]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs3.patch
- update 3.3 options/patches
[packages/kernel.git] / kernel-aufs3.patch
CommitLineData
f6c5ef8b 1aufs3.2 kbuild patch
7f207e10
AM
2
3diff --git a/fs/Kconfig b/fs/Kconfig
9dbd164d 4index 5f4c45d..357a8a6 100644
7f207e10
AM
5--- a/fs/Kconfig
6+++ b/fs/Kconfig
1e00d052 7@@ -215,6 +215,7 @@ source "fs/pstore/Kconfig"
7f207e10
AM
8 source "fs/sysv/Kconfig"
9 source "fs/ufs/Kconfig"
10 source "fs/exofs/Kconfig"
11+source "fs/aufs/Kconfig"
12
13 endif # MISC_FILESYSTEMS
14
15diff --git a/fs/Makefile b/fs/Makefile
9dbd164d 16index d2c3353..680ad8a 100644
7f207e10
AM
17--- a/fs/Makefile
18+++ b/fs/Makefile
1e00d052 19@@ -123,3 +123,4 @@ obj-$(CONFIG_GFS2_FS) += gfs2/
9dbd164d 20 obj-y += exofs/ # Multiple modules
7f207e10 21 obj-$(CONFIG_CEPH_FS) += ceph/
bf0370f2 22 obj-$(CONFIG_PSTORE) += pstore/
2cbb1c4b 23+obj-$(CONFIG_AUFS_FS) += aufs/
7f207e10 24diff --git a/include/linux/Kbuild b/include/linux/Kbuild
1e00d052 25index 619b565..29f386b 100644
7f207e10
AM
26--- a/include/linux/Kbuild
27+++ b/include/linux/Kbuild
2cbb1c4b 28@@ -65,6 +65,7 @@ header-y += atmppp.h
7f207e10
AM
29 header-y += atmsap.h
30 header-y += atmsvc.h
31 header-y += audit.h
32+header-y += aufs_type.h
33 header-y += auto_fs.h
34 header-y += auto_fs4.h
35 header-y += auxvec.h
f6c5ef8b 36aufs3.2 base patch
7f207e10
AM
37
38diff --git a/fs/namei.c b/fs/namei.c
9dbd164d 39index 5008f01..4cc94cf 100644
7f207e10
AM
40--- a/fs/namei.c
41+++ b/fs/namei.c
9dbd164d 42@@ -1753,7 +1753,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
7f207e10
AM
43 * needs parent already locked. Doesn't follow mounts.
44 * SMP-safe.
45 */
46-static struct dentry *lookup_hash(struct nameidata *nd)
47+struct dentry *lookup_hash(struct nameidata *nd)
48 {
7f207e10
AM
49 return __lookup_hash(&nd->last, nd->path.dentry, nd);
50 }
7f207e10 51diff --git a/fs/splice.c b/fs/splice.c
1e00d052 52index fa2defa..e3569b0 100644
7f207e10
AM
53--- a/fs/splice.c
54+++ b/fs/splice.c
2cbb1c4b 55@@ -1085,8 +1085,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
7f207e10
AM
56 /*
57 * Attempt to initiate a splice from pipe to file.
58 */
59-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
60- loff_t *ppos, size_t len, unsigned int flags)
61+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
62+ loff_t *ppos, size_t len, unsigned int flags)
63 {
64 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
65 loff_t *, size_t, unsigned int);
2cbb1c4b 66@@ -1113,9 +1113,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
67 /*
68 * Attempt to initiate a splice from a file to a pipe.
69 */
70-static long do_splice_to(struct file *in, loff_t *ppos,
71- struct pipe_inode_info *pipe, size_t len,
72- unsigned int flags)
73+long do_splice_to(struct file *in, loff_t *ppos,
74+ struct pipe_inode_info *pipe, size_t len,
75+ unsigned int flags)
76 {
77 ssize_t (*splice_read)(struct file *, loff_t *,
78 struct pipe_inode_info *, size_t, unsigned int);
79diff --git a/include/linux/namei.h b/include/linux/namei.h
9dbd164d 80index ffc0213..ef35a31 100644
7f207e10
AM
81--- a/include/linux/namei.h
82+++ b/include/linux/namei.h
9dbd164d 83@@ -85,6 +85,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
7f207e10
AM
84 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
85 int (*open)(struct inode *, struct file *));
86
87+extern struct dentry *lookup_hash(struct nameidata *nd);
7f207e10
AM
88 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
89
027c5e7a 90 extern int follow_down_one(struct path *);
1e00d052
AM
91diff --git a/include/linux/splice.h b/include/linux/splice.h
92index 26e5b61..3ffef2f 100644
93--- a/include/linux/splice.h
94+++ b/include/linux/splice.h
95@@ -91,4 +91,10 @@ extern void splice_shrink_spd(struct pipe_inode_info *,
4b3da204
AM
96 extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
97
98 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
1e00d052
AM
99+
100+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
101+ loff_t *ppos, size_t len, unsigned int flags);
102+extern long do_splice_to(struct file *in, loff_t *ppos,
103+ struct pipe_inode_info *pipe, size_t len,
104+ unsigned int flags);
105 #endif
f6c5ef8b 106aufs3.2 standalone patch
7f207e10
AM
107
108diff --git a/fs/file_table.c b/fs/file_table.c
1e00d052 109index c322794..2aad244 100644
7f207e10
AM
110--- a/fs/file_table.c
111+++ b/fs/file_table.c
2cbb1c4b 112@@ -443,6 +443,8 @@ void file_sb_list_del(struct file *file)
7f207e10
AM
113 }
114 }
115
116+EXPORT_SYMBOL(file_sb_list_del);
1facf9fc 117+
7f207e10
AM
118 #ifdef CONFIG_SMP
119
120 /*
1e00d052 121diff --git a/fs/inode.c b/fs/inode.c
9dbd164d 122index ee4e66b..728042b 100644
1e00d052
AM
123--- a/fs/inode.c
124+++ b/fs/inode.c
125@@ -65,6 +65,7 @@ static struct hlist_head *inode_hashtable __read_mostly;
4b3da204 126 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
2cbb1c4b
JR
127
128 __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
2cbb1c4b 129+EXPORT_SYMBOL(inode_sb_list_lock);
7f207e10
AM
130
131 /*
4b3da204 132 * Empty aops. Can be used for the cases where the user does not
7f207e10 133diff --git a/fs/namei.c b/fs/namei.c
9dbd164d 134index 4cc94cf..af19e30 100644
7f207e10
AM
135--- a/fs/namei.c
136+++ b/fs/namei.c
9dbd164d 137@@ -1757,6 +1757,7 @@ struct dentry *lookup_hash(struct nameidata *nd)
027c5e7a 138 {
7f207e10
AM
139 return __lookup_hash(&nd->last, nd->path.dentry, nd);
140 }
141+EXPORT_SYMBOL(lookup_hash);
142
7f207e10
AM
143 /**
144 * lookup_one_len - filesystem helper to lookup single pathname component
145diff --git a/fs/namespace.c b/fs/namespace.c
f6c5ef8b 146index cfc6d44..173d15a 100644
7f207e10
AM
147--- a/fs/namespace.c
148+++ b/fs/namespace.c
f6c5ef8b 149@@ -1506,6 +1506,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
7f207e10
AM
150 }
151 return 0;
152 }
153+EXPORT_SYMBOL(iterate_mounts);
154
155 static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
156 {
157diff --git a/fs/notify/group.c b/fs/notify/group.c
1e00d052 158index 63fc294..6f4adca 100644
7f207e10
AM
159--- a/fs/notify/group.c
160+++ b/fs/notify/group.c
161@@ -22,6 +22,7 @@
162 #include <linux/srcu.h>
163 #include <linux/rculist.h>
164 #include <linux/wait.h>
165+#include <linux/module.h>
166
167 #include <linux/fsnotify_backend.h>
168 #include "fsnotify.h"
169@@ -70,6 +71,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
170 if (atomic_dec_and_test(&group->refcnt))
171 fsnotify_destroy_group(group);
172 }
173+EXPORT_SYMBOL(fsnotify_put_group);
174
175 /*
176 * Create a new fsnotify_group and hold a reference for the group returned.
177@@ -102,3 +104,4 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
178
179 return group;
180 }
181+EXPORT_SYMBOL(fsnotify_alloc_group);
182diff --git a/fs/notify/mark.c b/fs/notify/mark.c
1e00d052 183index e14587d..be6533b 100644
7f207e10
AM
184--- a/fs/notify/mark.c
185+++ b/fs/notify/mark.c
2cbb1c4b 186@@ -112,6 +112,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
7f207e10
AM
187 if (atomic_dec_and_test(&mark->refcnt))
188 mark->free_mark(mark);
189 }
190+EXPORT_SYMBOL(fsnotify_put_mark);
191
192 /*
193 * Any time a mark is getting freed we end up here.
2cbb1c4b 194@@ -189,6 +190,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
7f207e10
AM
195 if (unlikely(atomic_dec_and_test(&group->num_marks)))
196 fsnotify_final_destroy_group(group);
197 }
198+EXPORT_SYMBOL(fsnotify_destroy_mark);
199
200 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
201 {
2cbb1c4b 202@@ -276,6 +278,7 @@ err:
7f207e10
AM
203
204 return ret;
205 }
206+EXPORT_SYMBOL(fsnotify_add_mark);
207
208 /*
209 * clear any marks in a group in which mark->flags & flags is true
2cbb1c4b 210@@ -331,6 +334,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
7f207e10
AM
211 atomic_set(&mark->refcnt, 1);
212 mark->free_mark = free_mark;
213 }
214+EXPORT_SYMBOL(fsnotify_init_mark);
215
216 static int fsnotify_mark_destroy(void *ignored)
217 {
218diff --git a/fs/open.c b/fs/open.c
9dbd164d 219index 22c41b5..33b4033 100644
7f207e10
AM
220--- a/fs/open.c
221+++ b/fs/open.c
222@@ -60,6 +60,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
223 mutex_unlock(&dentry->d_inode->i_mutex);
224 return ret;
225 }
226+EXPORT_SYMBOL(do_truncate);
227
228 static long do_sys_truncate(const char __user *pathname, loff_t length)
229 {
230diff --git a/fs/splice.c b/fs/splice.c
1e00d052 231index e3569b0..9dc07b7 100644
7f207e10
AM
232--- a/fs/splice.c
233+++ b/fs/splice.c
2cbb1c4b 234@@ -1109,6 +1109,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
235
236 return splice_write(pipe, out, ppos, len, flags);
237 }
238+EXPORT_SYMBOL(do_splice_from);
239
240 /*
241 * Attempt to initiate a splice from a file to a pipe.
2cbb1c4b 242@@ -1135,6 +1136,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
7f207e10
AM
243
244 return splice_read(in, ppos, pipe, len, flags);
245 }
246+EXPORT_SYMBOL(do_splice_to);
247
248 /**
249 * splice_direct_to_actor - splices data directly between two non-pipes
250diff --git a/security/commoncap.c b/security/commoncap.c
9dbd164d 251index ee4f848..611fd70 100644
7f207e10
AM
252--- a/security/commoncap.c
253+++ b/security/commoncap.c
9dbd164d 254@@ -975,3 +975,4 @@ int cap_file_mmap(struct file *file, unsigned long reqprot,
94337f0d 255 }
7f207e10
AM
256 return ret;
257 }
258+EXPORT_SYMBOL(cap_file_mmap);
259diff --git a/security/device_cgroup.c b/security/device_cgroup.c
f6c5ef8b 260index 4450fbe..bc94175 100644
7f207e10
AM
261--- a/security/device_cgroup.c
262+++ b/security/device_cgroup.c
f6c5ef8b
AM
263@@ -7,6 +7,7 @@
264 #include <linux/device_cgroup.h>
265 #include <linux/cgroup.h>
266 #include <linux/ctype.h>
267+#include <linux/export.h>
268 #include <linux/list.h>
269 #include <linux/uaccess.h>
270 #include <linux/seq_file.h>
271@@ -500,6 +501,7 @@ found:
7f207e10
AM
272
273 return -EPERM;
274 }
2cbb1c4b 275+EXPORT_SYMBOL(__devcgroup_inode_permission);
7f207e10
AM
276
277 int devcgroup_inode_mknod(int mode, dev_t dev)
278 {
279diff --git a/security/security.c b/security/security.c
f6c5ef8b 280index e2f684a..892000c 100644
7f207e10
AM
281--- a/security/security.c
282+++ b/security/security.c
9dbd164d 283@@ -411,6 +411,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry)
7f207e10
AM
284 return 0;
285 return security_ops->path_rmdir(dir, dentry);
286 }
287+EXPORT_SYMBOL(security_path_rmdir);
288
289 int security_path_unlink(struct path *dir, struct dentry *dentry)
290 {
9dbd164d 291@@ -427,6 +428,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
7f207e10
AM
292 return 0;
293 return security_ops->path_symlink(dir, dentry, old_name);
294 }
295+EXPORT_SYMBOL(security_path_symlink);
296
297 int security_path_link(struct dentry *old_dentry, struct path *new_dir,
298 struct dentry *new_dentry)
9dbd164d 299@@ -435,6 +437,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
7f207e10
AM
300 return 0;
301 return security_ops->path_link(old_dentry, new_dir, new_dentry);
302 }
303+EXPORT_SYMBOL(security_path_link);
304
305 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
306 struct path *new_dir, struct dentry *new_dentry)
9dbd164d 307@@ -453,6 +456,7 @@ int security_path_truncate(struct path *path)
7f207e10
AM
308 return 0;
309 return security_ops->path_truncate(path);
310 }
311+EXPORT_SYMBOL(security_path_truncate);
312
313 int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
314 mode_t mode)
9dbd164d 315@@ -461,6 +465,7 @@ int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
7f207e10
AM
316 return 0;
317 return security_ops->path_chmod(dentry, mnt, mode);
318 }
319+EXPORT_SYMBOL(security_path_chmod);
320
321 int security_path_chown(struct path *path, uid_t uid, gid_t gid)
322 {
9dbd164d 323@@ -468,6 +473,7 @@ int security_path_chown(struct path *path, uid_t uid, gid_t gid)
7f207e10
AM
324 return 0;
325 return security_ops->path_chown(path, uid, gid);
326 }
327+EXPORT_SYMBOL(security_path_chown);
328
329 int security_path_chroot(struct path *path)
330 {
9dbd164d 331@@ -544,6 +550,7 @@ int security_inode_readlink(struct dentry *dentry)
7f207e10
AM
332 return 0;
333 return security_ops->inode_readlink(dentry);
334 }
335+EXPORT_SYMBOL(security_inode_readlink);
336
337 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
338 {
9dbd164d 339@@ -558,6 +565,7 @@ int security_inode_permission(struct inode *inode, int mask)
7f207e10 340 return 0;
1e00d052 341 return security_ops->inode_permission(inode, mask);
7f207e10
AM
342 }
343+EXPORT_SYMBOL(security_inode_permission);
344
1e00d052 345 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
7f207e10 346 {
9dbd164d 347@@ -673,6 +681,7 @@ int security_file_permission(struct file *file, int mask)
7f207e10
AM
348
349 return fsnotify_perm(file, mask);
350 }
351+EXPORT_SYMBOL(security_file_permission);
352
353 int security_file_alloc(struct file *file)
354 {
9dbd164d 355@@ -700,6 +709,7 @@ int security_file_mmap(struct file *file, unsigned long reqprot,
7f207e10
AM
356 return ret;
357 return ima_file_mmap(file, prot);
358 }
359+EXPORT_SYMBOL(security_file_mmap);
360
361 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
362 unsigned long prot)
363diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
364--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 365+++ linux/Documentation/ABI/testing/debugfs-aufs 2012-02-13 21:54:56.963104881 +0100
7f207e10
AM
366@@ -0,0 +1,37 @@
367+What: /debug/aufs/si_<id>/
368+Date: March 2009
369+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
370+Description:
371+ Under /debug/aufs, a directory named si_<id> is created
372+ per aufs mount, where <id> is a unique id generated
373+ internally.
1facf9fc 374+
7f207e10
AM
375+What: /debug/aufs/si_<id>/xib
376+Date: March 2009
377+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
378+Description:
379+ It shows the consumed blocks by xib (External Inode Number
380+ Bitmap), its block size and file size.
381+ When the aufs mount option 'noxino' is specified, it
382+ will be empty. About XINO files, see the aufs manual.
383+
384+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
385+Date: March 2009
386+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
387+Description:
388+ It shows the consumed blocks by xino (External Inode Number
389+ Translation Table), its link count, block size and file
390+ size.
391+ When the aufs mount option 'noxino' is specified, it
392+ will be empty. About XINO files, see the aufs manual.
393+
394+What: /debug/aufs/si_<id>/xigen
395+Date: March 2009
396+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
397+Description:
398+ It shows the consumed blocks by xigen (External Inode
399+ Generation Table), its block size and file size.
400+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
401+ be created.
402+ When the aufs mount option 'noxino' is specified, it
403+ will be empty. About XINO files, see the aufs manual.
404diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
405--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 406+++ linux/Documentation/ABI/testing/sysfs-aufs 2012-02-13 21:54:56.963104881 +0100
7f207e10
AM
407@@ -0,0 +1,24 @@
408+What: /sys/fs/aufs/si_<id>/
409+Date: March 2009
410+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
411+Description:
412+ Under /sys/fs/aufs, a directory named si_<id> is created
413+ per aufs mount, where <id> is a unique id generated
414+ internally.
415+
416+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
417+Date: March 2009
418+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
419+Description:
420+ It shows the abolute path of a member directory (which
421+ is called branch) in aufs, and its permission.
422+
423+What: /sys/fs/aufs/si_<id>/xi_path
424+Date: March 2009
425+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
426+Description:
427+ It shows the abolute path of XINO (External Inode Number
428+ Bitmap, Translation Table and Generation Table) file
429+ even if it is the default path.
430+ When the aufs mount option 'noxino' is specified, it
431+ will be empty. About XINO files, see the aufs manual.
53392da6
AM
432diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
433--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 434+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2012-02-13 21:54:56.963104881 +0100
53392da6
AM
435@@ -0,0 +1,162 @@
436+
437+# Copyright (C) 2005-2011 Junjiro R. Okajima
438+#
439+# This program is free software; you can redistribute it and/or modify
440+# it under the terms of the GNU General Public License as published by
441+# the Free Software Foundation; either version 2 of the License, or
442+# (at your option) any later version.
443+#
444+# This program is distributed in the hope that it will be useful,
445+# but WITHOUT ANY WARRANTY; without even the implied warranty of
446+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
447+# GNU General Public License for more details.
448+#
449+# You should have received a copy of the GNU General Public License
450+# along with this program; if not, write to the Free Software
451+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
452+
453+Introduction
454+----------------------------------------
455+
456+aufs [ei ju: ef es] | [a u f s]
457+1. abbrev. for "advanced multi-layered unification filesystem".
458+2. abbrev. for "another unionfs".
459+3. abbrev. for "auf das" in German which means "on the" in English.
460+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
461+ But "Filesystem aufs Filesystem" is hard to understand.
462+
463+AUFS is a filesystem with features:
464+- multi layered stackable unification filesystem, the member directory
465+ is called as a branch.
466+- branch permission and attribute, 'readonly', 'real-readonly',
467+ 'readwrite', 'whiteout-able', 'link-able whiteout' and their
468+ combination.
469+- internal "file copy-on-write".
470+- logical deletion, whiteout.
471+- dynamic branch manipulation, adding, deleting and changing permission.
472+- allow bypassing aufs, user's direct branch access.
473+- external inode number translation table and bitmap which maintains the
474+ persistent aufs inode number.
475+- seekable directory, including NFS readdir.
476+- file mapping, mmap and sharing pages.
477+- pseudo-link, hardlink over branches.
478+- loopback mounted filesystem as a branch.
479+- several policies to select one among multiple writable branches.
480+- revert a single systemcall when an error occurs in aufs.
481+- and more...
482+
483+
484+Multi Layered Stackable Unification Filesystem
485+----------------------------------------------------------------------
486+Most people already knows what it is.
487+It is a filesystem which unifies several directories and provides a
488+merged single directory. When users access a file, the access will be
489+passed/re-directed/converted (sorry, I am not sure which English word is
490+correct) to the real file on the member filesystem. The member
491+filesystem is called 'lower filesystem' or 'branch' and has a mode
492+'readonly' and 'readwrite.' And the deletion for a file on the lower
493+readonly branch is handled by creating 'whiteout' on the upper writable
494+branch.
495+
496+On LKML, there have been discussions about UnionMount (Jan Blunck,
497+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
498+different approaches to implement the merged-view.
499+The former tries putting it into VFS, and the latter implements as a
500+separate filesystem.
501+(If I misunderstand about these implementations, please let me know and
502+I shall correct it. Because it is a long time ago when I read their
503+source files last time).
504+
505+UnionMount's approach will be able to small, but may be hard to share
506+branches between several UnionMount since the whiteout in it is
507+implemented in the inode on branch filesystem and always
508+shared. According to Bharata's post, readdir does not seems to be
509+finished yet.
510+There are several missing features known in this implementations such as
511+- for users, the inode number may change silently. eg. copy-up.
512+- link(2) may break by copy-up.
513+- read(2) may get an obsoleted filedata (fstat(2) too).
514+- fcntl(F_SETLK) may be broken by copy-up.
515+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
516+ open(O_RDWR).
517+
518+Unionfs has a longer history. When I started implementing a stacking filesystem
519+(Aug 2005), it already existed. It has virtual super_block, inode,
520+dentry and file objects and they have an array pointing lower same kind
521+objects. After contributing many patches for Unionfs, I re-started my
522+project AUFS (Jun 2006).
523+
524+In AUFS, the structure of filesystem resembles to Unionfs, but I
525+implemented my own ideas, approaches and enhancements and it became
526+totally different one.
527+
528+Comparing DM snapshot and fs based implementation
529+- the number of bytes to be copied between devices is much smaller.
530+- the type of filesystem must be one and only.
531+- the fs must be writable, no readonly fs, even for the lower original
532+ device. so the compression fs will not be usable. but if we use
533+ loopback mount, we may address this issue.
534+ for instance,
535+ mount /cdrom/squashfs.img /sq
536+ losetup /sq/ext2.img
537+ losetup /somewhere/cow
538+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
539+- it will be difficult (or needs more operations) to extract the
540+ difference between the original device and COW.
541+- DM snapshot-merge may help a lot when users try merging. in the
542+ fs-layer union, users will use rsync(1).
543+
544+
545+Several characters/aspects of aufs
546+----------------------------------------------------------------------
547+
548+Aufs has several characters or aspects.
549+1. a filesystem, callee of VFS helper
550+2. sub-VFS, caller of VFS helper for branches
551+3. a virtual filesystem which maintains persistent inode number
552+4. reader/writer of files on branches such like an application
553+
554+1. Callee of VFS Helper
555+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
556+unlink(2) from an application reaches sys_unlink() kernel function and
557+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
558+calls filesystem specific unlink operation. Actually aufs implements the
559+unlink operation but it behaves like a redirector.
560+
561+2. Caller of VFS Helper for Branches
562+aufs_unlink() passes the unlink request to the branch filesystem as if
563+it were called from VFS. So the called unlink operation of the branch
564+filesystem acts as usual. As a caller of VFS helper, aufs should handle
565+every necessary pre/post operation for the branch filesystem.
566+- acquire the lock for the parent dir on a branch
567+- lookup in a branch
568+- revalidate dentry on a branch
569+- mnt_want_write() for a branch
570+- vfs_unlink() for a branch
571+- mnt_drop_write() for a branch
572+- release the lock on a branch
573+
574+3. Persistent Inode Number
575+One of the most important issue for a filesystem is to maintain inode
576+numbers. This is particularly important to support exporting a
577+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
578+backend block device for its own. But some storage is necessary to
579+maintain inode number. It may be a large space and may not suit to keep
580+in memory. Aufs rents some space from its first writable branch
581+filesystem (by default) and creates file(s) on it. These files are
582+created by aufs internally and removed soon (currently) keeping opened.
583+Note: Because these files are removed, they are totally gone after
584+ unmounting aufs. It means the inode numbers are not persistent
585+ across unmount or reboot. I have a plan to make them really
586+ persistent which will be important for aufs on NFS server.
587+
588+4. Read/Write Files Internally (copy-on-write)
589+Because a branch can be readonly, when you write a file on it, aufs will
590+"copy-up" it to the upper writable branch internally. And then write the
591+originally requested thing to the file. Generally kernel doesn't
592+open/read/write file actively. In aufs, even a single write may cause a
593+internal "file copy". This behaviour is very similar to cp(1) command.
594+
595+Some people may think it is better to pass such work to user space
596+helper, instead of doing in kernel space. Actually I am still thinking
597+about it. But currently I have implemented it in kernel space.
598diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
599--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 600+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2012-02-13 21:54:56.963104881 +0100
53392da6
AM
601@@ -0,0 +1,226 @@
602+
603+# Copyright (C) 2005-2011 Junjiro R. Okajima
604+#
605+# This program is free software; you can redistribute it and/or modify
606+# it under the terms of the GNU General Public License as published by
607+# the Free Software Foundation; either version 2 of the License, or
608+# (at your option) any later version.
609+#
610+# This program is distributed in the hope that it will be useful,
611+# but WITHOUT ANY WARRANTY; without even the implied warranty of
612+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
613+# GNU General Public License for more details.
614+#
615+# You should have received a copy of the GNU General Public License
616+# along with this program; if not, write to the Free Software
617+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
618+
619+Basic Aufs Internal Structure
620+
621+Superblock/Inode/Dentry/File Objects
622+----------------------------------------------------------------------
623+As like an ordinary filesystem, aufs has its own
624+superblock/inode/dentry/file objects. All these objects have a
625+dynamically allocated array and store the same kind of pointers to the
626+lower filesystem, branch.
627+For example, when you build a union with one readwrite branch and one
628+readonly, mounted /au, /rw and /ro respectively.
629+- /au = /rw + /ro
630+- /ro/fileA exists but /rw/fileA
631+
632+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
633+pointers are stored in a aufs dentry. The array in aufs dentry will be,
634+- [0] = NULL
635+- [1] = /ro/fileA
636+
637+This style of an array is essentially same to the aufs
638+superblock/inode/dentry/file objects.
639+
640+Because aufs supports manipulating branches, ie. add/delete/change
641+dynamically, these objects has its own generation. When branches are
642+changed, the generation in aufs superblock is incremented. And a
643+generation in other object are compared when it is accessed.
644+When a generation in other objects are obsoleted, aufs refreshes the
645+internal array.
646+
647+
648+Superblock
649+----------------------------------------------------------------------
650+Additionally aufs superblock has some data for policies to select one
651+among multiple writable branches, XIB files, pseudo-links and kobject.
652+See below in detail.
653+About the policies which supports copy-down a directory, see policy.txt
654+too.
655+
656+
657+Branch and XINO(External Inode Number Translation Table)
658+----------------------------------------------------------------------
659+Every branch has its own xino (external inode number translation table)
660+file. The xino file is created and unlinked by aufs internally. When two
661+members of a union exist on the same filesystem, they share the single
662+xino file.
663+The struct of a xino file is simple, just a sequence of aufs inode
664+numbers which is indexed by the lower inode number.
665+In the above sample, assume the inode number of /ro/fileA is i111 and
666+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
667+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
668+
669+When the inode numbers are not contiguous, the xino file will be sparse
670+which has a hole in it and doesn't consume as much disk space as it
671+might appear. If your branch filesystem consumes disk space for such
672+holes, then you should specify 'xino=' option at mounting aufs.
673+
674+Also a writable branch has three kinds of "whiteout bases". All these
675+are existed when the branch is joined to aufs and the names are
676+whiteout-ed doubly, so that users will never see their names in aufs
677+hierarchy.
678+1. a regular file which will be linked to all whiteouts.
679+2. a directory to store a pseudo-link.
680+3. a directory to store an "orphan-ed" file temporary.
681+
682+1. Whiteout Base
683+ When you remove a file on a readonly branch, aufs handles it as a
684+ logical deletion and creates a whiteout on the upper writable branch
685+ as a hardlink of this file in order not to consume inode on the
686+ writable branch.
687+2. Pseudo-link Dir
688+ See below, Pseudo-link.
689+3. Step-Parent Dir
690+ When "fileC" exists on the lower readonly branch only and it is
691+ opened and removed with its parent dir, and then user writes
692+ something into it, then aufs copies-up fileC to this
693+ directory. Because there is no other dir to store fileC. After
694+ creating a file under this dir, the file is unlinked.
695+
696+Because aufs supports manipulating branches, ie. add/delete/change
697+dynamically, a branch has its own id. When the branch order changes, aufs
698+finds the new index by searching the branch id.
699+
700+
701+Pseudo-link
702+----------------------------------------------------------------------
703+Assume "fileA" exists on the lower readonly branch only and it is
704+hardlinked to "fileB" on the branch. When you write something to fileA,
705+aufs copies-up it to the upper writable branch. Additionally aufs
706+creates a hardlink under the Pseudo-link Directory of the writable
707+branch. The inode of a pseudo-link is kept in aufs super_block as a
708+simple list. If fileB is read after unlinking fileA, aufs returns
709+filedata from the pseudo-link instead of the lower readonly
710+branch. Because the pseudo-link is based upon the inode, to keep the
711+inode number by xino (see above) is important.
712+
713+All the hardlinks under the Pseudo-link Directory of the writable branch
714+should be restored in a proper location later. Aufs provides a utility
715+to do this. The userspace helpers executed at remounting and unmounting
716+aufs by default.
717+During this utility is running, it puts aufs into the pseudo-link
718+maintenance mode. In this mode, only the process which began the
719+maintenance mode (and its child processes) is allowed to operate in
720+aufs. Some other processes which are not related to the pseudo-link will
721+be allowed to run too, but the rest have to return an error or wait
722+until the maintenance mode ends. If a process already acquires an inode
723+mutex (in VFS), it has to return an error.
724+
725+
726+XIB(external inode number bitmap)
727+----------------------------------------------------------------------
728+Addition to the xino file per a branch, aufs has an external inode number
729+bitmap in a superblock object. It is also a file such like a xino file.
730+It is a simple bitmap to mark whether the aufs inode number is in-use or
731+not.
732+To reduce the file I/O, aufs prepares a single memory page to cache xib.
733+
734+Aufs implements a feature to truncate/refresh both of xino and xib to
735+reduce the number of consumed disk blocks for these files.
736+
737+
738+Virtual or Vertical Dir, and Readdir in Userspace
739+----------------------------------------------------------------------
740+In order to support multiple layers (branches), aufs readdir operation
741+constructs a virtual dir block on memory. For readdir, aufs calls
742+vfs_readdir() internally for each dir on branches, merges their entries
743+with eliminating the whiteout-ed ones, and sets it to file (dir)
744+object. So the file object has its entry list until it is closed. The
745+entry list will be updated when the file position is zero and becomes
746+old. This decision is made in aufs automatically.
747+
748+The dynamically allocated memory block for the name of entries has a
749+unit of 512 bytes (by default) and stores the names contiguously (no
750+padding). Another block for each entry is handled by kmem_cache too.
751+During building dir blocks, aufs creates hash list and judging whether
752+the entry is whiteouted by its upper branch or already listed.
753+The merged result is cached in the corresponding inode object and
754+maintained by a customizable life-time option.
755+
756+Some people may call it can be a security hole or invite DoS attack
757+since the opened and once readdir-ed dir (file object) holds its entry
758+list and becomes a pressure for system memory. But I'd say it is similar
759+to files under /proc or /sys. The virtual files in them also holds a
760+memory page (generally) while they are opened. When an idea to reduce
761+memory for them is introduced, it will be applied to aufs too.
762+For those who really hate this situation, I've developed readdir(3)
763+library which operates this merging in userspace. You just need to set
764+LD_PRELOAD environment variable, and aufs will not consume no memory in
765+kernel space for readdir(3).
766+
767+
768+Workqueue
769+----------------------------------------------------------------------
770+Aufs sometimes requires privilege access to a branch. For instance,
771+in copy-up/down operation. When a user process is going to make changes
772+to a file which exists in the lower readonly branch only, and the mode
773+of one of ancestor directories may not be writable by a user
774+process. Here aufs copy-up the file with its ancestors and they may
775+require privilege to set its owner/group/mode/etc.
776+This is a typical case of a application character of aufs (see
777+Introduction).
778+
779+Aufs uses workqueue synchronously for this case. It creates its own
780+workqueue. The workqueue is a kernel thread and has privilege. Aufs
781+passes the request to call mkdir or write (for example), and wait for
782+its completion. This approach solves a problem of a signal handler
783+simply.
784+If aufs didn't adopt the workqueue and changed the privilege of the
785+process, and if the mkdir/write call arises SIGXFSZ or other signal,
786+then the user process might gain a privilege or the generated core file
787+was owned by a superuser.
788+
789+Also aufs uses the system global workqueue ("events" kernel thread) too
790+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
791+whiteout base and etc. This is unrelated to a privilege.
792+Most of aufs operation tries acquiring a rw_semaphore for aufs
793+superblock at the beginning, at the same time waits for the completion
794+of all queued asynchronous tasks.
795+
796+
797+Whiteout
798+----------------------------------------------------------------------
799+The whiteout in aufs is very similar to Unionfs's. That is represented
800+by its filename. UnionMount takes an approach of a file mode, but I am
801+afraid several utilities (find(1) or something) will have to support it.
802+
803+Basically the whiteout represents "logical deletion" which stops aufs to
804+lookup further, but also it represents "dir is opaque" which also stop
805+lookup.
806+
807+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
808+In order to make several functions in a single systemcall to be
809+revertible, aufs adopts an approach to rename a directory to a temporary
810+unique whiteouted name.
811+For example, in rename(2) dir where the target dir already existed, aufs
812+renames the target dir to a temporary unique whiteouted name before the
813+actual rename on a branch and then handles other actions (make it opaque,
814+update the attributes, etc). If an error happens in these actions, aufs
815+simply renames the whiteouted name back and returns an error. If all are
816+succeeded, aufs registers a function to remove the whiteouted unique
817+temporary name completely and asynchronously to the system global
818+workqueue.
819+
820+
821+Copy-up
822+----------------------------------------------------------------------
823+It is a well-known feature or concept.
824+When user modifies a file on a readonly branch, aufs operate "copy-up"
825+internally and makes change to the new file on the upper writable branch.
826+When the trigger systemcall does not update the timestamps of the parent
827+dir, aufs reverts it after copy-up.
828diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
829--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 830+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2012-02-13 21:54:56.963104881 +0100
53392da6
AM
831@@ -0,0 +1,106 @@
832+
833+# Copyright (C) 2005-2011 Junjiro R. Okajima
834+#
835+# This program is free software; you can redistribute it and/or modify
836+# it under the terms of the GNU General Public License as published by
837+# the Free Software Foundation; either version 2 of the License, or
838+# (at your option) any later version.
839+#
840+# This program is distributed in the hope that it will be useful,
841+# but WITHOUT ANY WARRANTY; without even the implied warranty of
842+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
843+# GNU General Public License for more details.
844+#
845+# You should have received a copy of the GNU General Public License
846+# along with this program; if not, write to the Free Software
847+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
848+
849+Lookup in a Branch
850+----------------------------------------------------------------------
851+Since aufs has a character of sub-VFS (see Introduction), it operates
852+lookup for branches as VFS does. It may be a heavy work. Generally
853+speaking struct nameidata is a bigger structure and includes many
854+information. But almost all lookup operation in aufs is the simplest
855+case, ie. lookup only an entry directly connected to its parent. Digging
856+down the directory hierarchy is unnecessary.
857+
858+VFS has a function lookup_one_len() for that use, but it is not usable
859+for a branch filesystem which requires struct nameidata. So aufs
860+implements a simple lookup wrapper function. When a branch filesystem
861+allows NULL as nameidata, it calls lookup_one_len(). Otherwise it builds
862+a simplest nameidata and calls lookup_hash().
863+Here aufs applies "a principle in NFSD", ie. if the filesystem supports
864+NFS-export, then it has to support NULL as a nameidata parameter for
865+->create(), ->lookup() and ->d_revalidate(). So the lookup wrapper in
866+aufs tests if ->s_export_op in the branch is NULL or not.
867+
868+When a branch is a remote filesystem, aufs basically trusts its
869+->d_revalidate(), also aufs forces the hardest revalidate tests for
870+them.
871+For d_revalidate, aufs implements three levels of revalidate tests. See
872+"Revalidate Dentry and UDBA" in detail.
873+
874+
875+Loopback Mount
876+----------------------------------------------------------------------
877+Basically aufs supports any type of filesystem and block device for a
878+branch (actually there are some exceptions). But it is prohibited to add
879+a loopback mounted one whose backend file exists in a filesystem which is
880+already added to aufs. The reason is to protect aufs from a recursive
881+lookup. If it was allowed, the aufs lookup operation might re-enter a
882+lookup for the loopback mounted branch in the same context, and will
883+cause a deadlock.
884+
885+
886+Revalidate Dentry and UDBA (User's Direct Branch Access)
887+----------------------------------------------------------------------
888+Generally VFS helpers re-validate a dentry as a part of lookup.
889+0. digging down the directory hierarchy.
890+1. lock the parent dir by its i_mutex.
891+2. lookup the final (child) entry.
892+3. revalidate it.
893+4. call the actual operation (create, unlink, etc.)
894+5. unlock the parent dir
895+
896+If the filesystem implements its ->d_revalidate() (step 3), then it is
897+called. Actually aufs implements it and checks the dentry on a branch is
898+still valid.
899+But it is not enough. Because aufs has to release the lock for the
900+parent dir on a branch at the end of ->lookup() (step 2) and
901+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
902+held by VFS.
903+If the file on a branch is changed directly, eg. bypassing aufs, after
904+aufs released the lock, then the subsequent operation may cause
905+something unpleasant result.
906+
907+This situation is a result of VFS architecture, ->lookup() and
908+->d_revalidate() is separated. But I never say it is wrong. It is a good
909+design from VFS's point of view. It is just not suitable for sub-VFS
910+character in aufs.
911+
912+Aufs supports such case by three level of revalidation which is
913+selectable by user.
914+1. Simple Revalidate
915+ Addition to the native flow in VFS's, confirm the child-parent
916+ relationship on the branch just after locking the parent dir on the
917+ branch in the "actual operation" (step 4). When this validation
918+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
919+ checks the validation of the dentry on branches.
920+2. Monitor Changes Internally by Inotify/Fsnotify
921+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
922+ the dentry on the branch, and returns EBUSY if it finds different
923+ dentry.
924+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
925+ during it is in cache. When the event is notified, aufs registers a
926+ function to kernel 'events' thread by schedule_work(). And the
927+ function sets some special status to the cached aufs dentry and inode
928+ private data. If they are not cached, then aufs has nothing to
929+ do. When the same file is accessed through aufs (step 0-3) later,
930+ aufs will detect the status and refresh all necessary data.
931+ In this mode, aufs has to ignore the event which is fired by aufs
932+ itself.
933+3. No Extra Validation
934+ This is the simplest test and doesn't add any additional revalidation
935+ test, and skip therevalidatin in step 4. It is useful and improves
936+ aufs performance when system surely hide the aufs branches from user,
937+ by over-mounting something (or another method).
938diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
939--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 940+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2012-02-13 21:54:56.966438287 +0100
53392da6
AM
941@@ -0,0 +1,76 @@
942+
943+# Copyright (C) 2005-2011 Junjiro R. Okajima
944+#
945+# This program is free software; you can redistribute it and/or modify
946+# it under the terms of the GNU General Public License as published by
947+# the Free Software Foundation; either version 2 of the License, or
948+# (at your option) any later version.
949+#
950+# This program is distributed in the hope that it will be useful,
951+# but WITHOUT ANY WARRANTY; without even the implied warranty of
952+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
953+# GNU General Public License for more details.
954+#
955+# You should have received a copy of the GNU General Public License
956+# along with this program; if not, write to the Free Software
957+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
958+
959+Branch Manipulation
960+
961+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
962+and changing its permission/attribute, there are a lot of works to do.
963+
964+
965+Add a Branch
966+----------------------------------------------------------------------
967+o Confirm the adding dir exists outside of aufs, including loopback
968+ mount.
969+- and other various attributes...
970+o Initialize the xino file and whiteout bases if necessary.
971+ See struct.txt.
972+
973+o Check the owner/group/mode of the directory
974+ When the owner/group/mode of the adding directory differs from the
975+ existing branch, aufs issues a warning because it may impose a
976+ security risk.
977+ For example, when a upper writable branch has a world writable empty
978+ top directory, a malicious user can create any files on the writable
979+ branch directly, like copy-up and modify manually. If something like
980+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
981+ writable branch, and the writable branch is world-writable, then a
982+ malicious guy may create /etc/passwd on the writable branch directly
983+ and the infected file will be valid in aufs.
984+ I am afraid it can be a security issue, but nothing to do except
985+ producing a warning.
986+
987+
988+Delete a Branch
989+----------------------------------------------------------------------
990+o Confirm the deleting branch is not busy
991+ To be general, there is one merit to adopt "remount" interface to
992+ manipulate branches. It is to discard caches. At deleting a branch,
993+ aufs checks the still cached (and connected) dentries and inodes. If
994+ there are any, then they are all in-use. An inode without its
995+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
996+
997+ For the cached one, aufs checks whether the same named entry exists on
998+ other branches.
999+ If the cached one is a directory, because aufs provides a merged view
1000+ to users, as long as one dir is left on any branch aufs can show the
1001+ dir to users. In this case, the branch can be removed from aufs.
1002+ Otherwise aufs rejects deleting the branch.
1003+
1004+ If any file on the deleting branch is opened by aufs, then aufs
1005+ rejects deleting.
1006+
1007+
1008+Modify the Permission of a Branch
1009+----------------------------------------------------------------------
1010+o Re-initialize or remove the xino file and whiteout bases if necessary.
1011+ See struct.txt.
1012+
1013+o rw --> ro: Confirm the modifying branch is not busy
1014+ Aufs rejects the request if any of these conditions are true.
1015+ - a file on the branch is mmap-ed.
1016+ - a regular file on the branch is opened for write and there is no
1017+ same named entry on the upper branch.
1018diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1019--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 1020+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2012-02-13 21:54:56.966438287 +0100
53392da6
AM
1021@@ -0,0 +1,65 @@
1022+
1023+# Copyright (C) 2005-2011 Junjiro R. Okajima
1024+#
1025+# This program is free software; you can redistribute it and/or modify
1026+# it under the terms of the GNU General Public License as published by
1027+# the Free Software Foundation; either version 2 of the License, or
1028+# (at your option) any later version.
1029+#
1030+# This program is distributed in the hope that it will be useful,
1031+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1032+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1033+# GNU General Public License for more details.
1034+#
1035+# You should have received a copy of the GNU General Public License
1036+# along with this program; if not, write to the Free Software
1037+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1038+
1039+Policies to Select One among Multiple Writable Branches
1040+----------------------------------------------------------------------
1041+When the number of writable branch is more than one, aufs has to decide
1042+the target branch for file creation or copy-up. By default, the highest
1043+writable branch which has the parent (or ancestor) dir of the target
1044+file is chosen (top-down-parent policy).
1045+By user's request, aufs implements some other policies to select the
1046+writable branch, for file creation two policies, round-robin and
1047+most-free-space policies. For copy-up three policies, top-down-parent,
1048+bottom-up-parent and bottom-up policies.
1049+
1050+As expected, the round-robin policy selects the branch in circular. When
1051+you have two writable branches and creates 10 new files, 5 files will be
1052+created for each branch. mkdir(2) systemcall is an exception. When you
1053+create 10 new directories, all will be created on the same branch.
1054+And the most-free-space policy selects the one which has most free
1055+space among the writable branches. The amount of free space will be
1056+checked by aufs internally, and users can specify its time interval.
1057+
1058+The policies for copy-up is more simple,
1059+top-down-parent is equivalent to the same named on in create policy,
1060+bottom-up-parent selects the writable branch where the parent dir
1061+exists and the nearest upper one from the copyup-source,
1062+bottom-up selects the nearest upper writable branch from the
1063+copyup-source, regardless the existence of the parent dir.
1064+
1065+There are some rules or exceptions to apply these policies.
1066+- If there is a readonly branch above the policy-selected branch and
1067+ the parent dir is marked as opaque (a variation of whiteout), or the
1068+ target (creating) file is whiteout-ed on the upper readonly branch,
1069+ then the result of the policy is ignored and the target file will be
1070+ created on the nearest upper writable branch than the readonly branch.
1071+- If there is a writable branch above the policy-selected branch and
1072+ the parent dir is marked as opaque or the target file is whiteouted
1073+ on the branch, then the result of the policy is ignored and the target
1074+ file will be created on the highest one among the upper writable
1075+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1076+ it as usual.
1077+- link(2) and rename(2) systemcalls are exceptions in every policy.
1078+ They try selecting the branch where the source exists as possible
1079+ since copyup a large file will take long time. If it can't be,
1080+ ie. the branch where the source exists is readonly, then they will
1081+ follow the copyup policy.
1082+- There is an exception for rename(2) when the target exists.
1083+ If the rename target exists, aufs compares the index of the branches
1084+ where the source and the target exists and selects the higher
1085+ one. If the selected branch is readonly, then aufs follows the
1086+ copyup policy.
1087diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
1088--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 1089+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2012-02-13 21:54:56.966438287 +0100
53392da6
AM
1090@@ -0,0 +1,47 @@
1091+
1092+# Copyright (C) 2005-2011 Junjiro R. Okajima
1093+#
1094+# This program is free software; you can redistribute it and/or modify
1095+# it under the terms of the GNU General Public License as published by
1096+# the Free Software Foundation; either version 2 of the License, or
1097+# (at your option) any later version.
1098+#
1099+# This program is distributed in the hope that it will be useful,
1100+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1101+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1102+# GNU General Public License for more details.
1103+#
1104+# You should have received a copy of the GNU General Public License
1105+# along with this program; if not, write to the Free Software
1106+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1107+
1108+mmap(2) -- File Memory Mapping
1109+----------------------------------------------------------------------
1110+In aufs, the file-mapped pages are handled by a branch fs directly, no
1111+interaction with aufs. It means aufs_mmap() calls the branch fs's
1112+->mmap().
1113+This approach is simple and good, but there is one problem.
1114+Under /proc, several entries show the mmap-ped files by its path (with
1115+device and inode number), and the printed path will be the path on the
1116+branch fs's instead of virtual aufs's.
1117+This is not a problem in most cases, but some utilities lsof(1) (and its
1118+user) may expect the path on aufs.
1119+
1120+To address this issue, aufs adds a new member called vm_prfile in struct
1121+vm_area_struct (and struct vm_region). The original vm_file points to
1122+the file on the branch fs in order to handle everything correctly as
1123+usual. The new vm_prfile points to a virtual file in aufs, and the
1124+show-functions in procfs refers to vm_prfile if it is set.
1125+Also we need to maintain several other places where touching vm_file
1126+such like
1127+- fork()/clone() copies vma and the reference count of vm_file is
1128+ incremented.
1129+- merging vma maintains the ref count too.
1130+
1131+This is not a good approach. It just faking the printed path. But it
1132+leaves all behaviour around f_mapping unchanged. This is surely an
1133+advantage.
1134+Actually aufs had adopted another complicated approach which calls
1135+generic_file_mmap() and handles struct vm_operations_struct. In this
1136+approach, aufs met a hard problem and I could not solve it without
1137+switching the approach.
1138diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
1139--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 1140+++ linux/Documentation/filesystems/aufs/design/07export.txt 2012-02-13 21:54:56.966438287 +0100
53392da6
AM
1141@@ -0,0 +1,59 @@
1142+
1143+# Copyright (C) 2005-2011 Junjiro R. Okajima
1144+#
1145+# This program is free software; you can redistribute it and/or modify
1146+# it under the terms of the GNU General Public License as published by
1147+# the Free Software Foundation; either version 2 of the License, or
1148+# (at your option) any later version.
1149+#
1150+# This program is distributed in the hope that it will be useful,
1151+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1152+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1153+# GNU General Public License for more details.
1154+#
1155+# You should have received a copy of the GNU General Public License
1156+# along with this program; if not, write to the Free Software
1157+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1158+
1159+Export Aufs via NFS
1160+----------------------------------------------------------------------
1161+Here is an approach.
1162+- like xino/xib, add a new file 'xigen' which stores aufs inode
1163+ generation.
1164+- iget_locked(): initialize aufs inode generation for a new inode, and
1165+ store it in xigen file.
1166+- destroy_inode(): increment aufs inode generation and store it in xigen
1167+ file. it is necessary even if it is not unlinked, because any data of
1168+ inode may be changed by UDBA.
1169+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
1170+ build file handle by
1171+ + branch id (4 bytes)
1172+ + superblock generation (4 bytes)
1173+ + inode number (4 or 8 bytes)
1174+ + parent dir inode number (4 or 8 bytes)
1175+ + inode generation (4 bytes))
1176+ + return value of exportfs_encode_fh() for the parent on a branch (4
1177+ bytes)
1178+ + file handle for a branch (by exportfs_encode_fh())
1179+- fh_to_dentry():
1180+ + find the index of a branch from its id in handle, and check it is
1181+ still exist in aufs.
1182+ + 1st level: get the inode number from handle and search it in cache.
1183+ + 2nd level: if not found, get the parent inode number from handle and
1184+ search it in cache. and then open the parent dir, find the matching
1185+ inode number by vfs_readdir() and get its name, and call
1186+ lookup_one_len() for the target dentry.
1187+ + 3rd level: if the parent dir is not cached, call
1188+ exportfs_decode_fh() for a branch and get the parent on a branch,
1189+ build a pathname of it, convert it a pathname in aufs, call
1190+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
1191+ the 2nd level.
1192+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
1193+ for every branch, but not itself. to get this, (currently) aufs
1194+ searches in current->nsproxy->mnt_ns list. it may not be a good
1195+ idea, but I didn't get other approach.
1196+ + test the generation of the gotten inode.
1197+- every inode operation: they may get EBUSY due to UDBA. in this case,
1198+ convert it into ESTALE for NFSD.
1199+- readdir(): call lockdep_on/off() because filldir in NFSD calls
1200+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
1201diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
1202--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 1203+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2012-02-13 21:54:56.966438287 +0100
53392da6
AM
1204@@ -0,0 +1,53 @@
1205+
1206+# Copyright (C) 2005-2011 Junjiro R. Okajima
1207+#
1208+# This program is free software; you can redistribute it and/or modify
1209+# it under the terms of the GNU General Public License as published by
1210+# the Free Software Foundation; either version 2 of the License, or
1211+# (at your option) any later version.
1212+#
1213+# This program is distributed in the hope that it will be useful,
1214+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1215+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1216+# GNU General Public License for more details.
1217+#
1218+# You should have received a copy of the GNU General Public License
1219+# along with this program; if not, write to the Free Software
1220+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1221+
1222+Show Whiteout Mode (shwh)
1223+----------------------------------------------------------------------
1224+Generally aufs hides the name of whiteouts. But in some cases, to show
1225+them is very useful for users. For instance, creating a new middle layer
1226+(branch) by merging existing layers.
1227+
1228+(borrowing aufs1 HOW-TO from a user, Michael Towers)
1229+When you have three branches,
1230+- Bottom: 'system', squashfs (underlying base system), read-only
1231+- Middle: 'mods', squashfs, read-only
1232+- Top: 'overlay', ram (tmpfs), read-write
1233+
1234+The top layer is loaded at boot time and saved at shutdown, to preserve
1235+the changes made to the system during the session.
1236+When larger changes have been made, or smaller changes have accumulated,
1237+the size of the saved top layer data grows. At this point, it would be
1238+nice to be able to merge the two overlay branches ('mods' and 'overlay')
1239+and rewrite the 'mods' squashfs, clearing the top layer and thus
1240+restoring save and load speed.
1241+
1242+This merging is simplified by the use of another aufs mount, of just the
1243+two overlay branches using the 'shwh' option.
1244+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
1245+ aufs /livesys/merge_union
1246+
1247+A merged view of these two branches is then available at
1248+/livesys/merge_union, and the new feature is that the whiteouts are
1249+visible!
1250+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
1251+writing to all branches. Also the default mode for all branches is 'ro'.
1252+It is now possible to save the combined contents of the two overlay
1253+branches to a new squashfs, e.g.:
1254+# mksquashfs /livesys/merge_union /path/to/newmods.squash
1255+
1256+This new squashfs archive can be stored on the boot device and the
1257+initramfs will use it to replace the old one at the next boot.
1258diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
1259--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 1260+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2012-02-13 21:54:56.966438287 +0100
53392da6
AM
1261@@ -0,0 +1,47 @@
1262+
1263+# Copyright (C) 2010-2011 Junjiro R. Okajima
1264+#
1265+# This program is free software; you can redistribute it and/or modify
1266+# it under the terms of the GNU General Public License as published by
1267+# the Free Software Foundation; either version 2 of the License, or
1268+# (at your option) any later version.
1269+#
1270+# This program is distributed in the hope that it will be useful,
1271+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1272+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1273+# GNU General Public License for more details.
1274+#
1275+# You should have received a copy of the GNU General Public License
1276+# along with this program; if not, write to the Free Software
1277+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1278+
1279+Dynamically customizable FS operations
1280+----------------------------------------------------------------------
1281+Generally FS operations (struct inode_operations, struct
1282+address_space_operations, struct file_operations, etc.) are defined as
1283+"static const", but it never means that FS have only one set of
1284+operation. Some FS have multiple sets of them. For instance, ext2 has
1285+three sets, one for XIP, for NOBH, and for normal.
1286+Since aufs overrides and redirects these operations, sometimes aufs has
1287+to change its behaviour according to the branch FS type. More imporantly
1288+VFS acts differently if a function (member in the struct) is set or
1289+not. It means aufs should have several sets of operations and select one
1290+among them according to the branch FS definition.
1291+
1292+In order to solve this problem and not to affect the behavour of VFS,
1293+aufs defines these operations dynamically. For instance, aufs defines
1294+aio_read function for struct file_operations, but it may not be set to
1295+the file_operations. When the branch FS doesn't have it, aufs doesn't
1296+set it to its file_operations while the function definition itself is
1297+still alive. So the behaviour of io_submit(2) will not change, and it
1298+will return an error when aio_read is not defined.
1299+
1300+The lifetime of these dynamically generated operation object is
1301+maintained by aufs branch object. When the branch is removed from aufs,
1302+the reference counter of the object is decremented. When it reaches
1303+zero, the dynamically generated operation object will be freed.
1304+
1305+This approach is designed to support AIO (io_submit), Direcit I/O and
1306+XIP mainly.
1307+Currently this approach is applied to file_operations and
1308+vm_operations_struct for regular files only.
1309diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/99plan.txt linux/Documentation/filesystems/aufs/design/99plan.txt
1310--- /usr/share/empty/Documentation/filesystems/aufs/design/99plan.txt 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 1311+++ linux/Documentation/filesystems/aufs/design/99plan.txt 2012-02-13 21:54:56.966438287 +0100
53392da6
AM
1312@@ -0,0 +1,96 @@
1313+
1314+# Copyright (C) 2005-2011 Junjiro R. Okajima
1315+#
1316+# This program is free software; you can redistribute it and/or modify
1317+# it under the terms of the GNU General Public License as published by
1318+# the Free Software Foundation; either version 2 of the License, or
1319+# (at your option) any later version.
1320+#
1321+# This program is distributed in the hope that it will be useful,
1322+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1323+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1324+# GNU General Public License for more details.
1325+#
1326+# You should have received a copy of the GNU General Public License
1327+# along with this program; if not, write to the Free Software
1328+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1329+
1330+Plan
1331+
1332+Restoring some features which was implemented in aufs1.
1333+They were dropped in aufs2 in order to make source files simpler and
1334+easier to be reviewed.
1335+
1336+
1337+Test Only the Highest One for the Directory Permission (dirperm1 option)
1338+----------------------------------------------------------------------
1339+Let's try case study.
1340+- aufs has two branches, upper readwrite and lower readonly.
1341+ /au = /rw + /ro
1342+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1343+- user invoked "chmod a+rx /au/dirA"
1344+- then "dirA" becomes world readable?
1345+
1346+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1347+or it may be a natively readonly filesystem. If aufs respects the lower
1348+branch, it should not respond readdir request from other users. But user
1349+allowed it by chmod. Should really aufs rejects showing the entries
1350+under /ro/dirA?
1351+
1352+To be honest, I don't have a best solution for this case. So I
1353+implemented 'dirperm1' and 'nodirperm1' option in aufs1, and leave it to
1354+users.
1355+When dirperm1 is specified, aufs checks only the highest one for the
1356+directory permission, and shows the entries. Otherwise, as usual, checks
1357+every dir existing on all branches and rejects the request.
1358+
1359+As a side effect, dirperm1 option improves the performance of aufs
1360+because the number of permission check is reduced.
1361+
1362+
1363+Being Another Aufs's Readonly Branch (robr)
1364+----------------------------------------------------------------------
1365+Aufs1 allows aufs to be another aufs's readonly branch.
1366+This feature was developed by a user's request. But it may not be used
1367+currecnly.
1368+
1369+
1370+Copy-up on Open (coo=)
1371+----------------------------------------------------------------------
1372+By default the internal copy-up is executed when it is really necessary.
1373+It is not done when a file is opened for writing, but when write(2) is
1374+done. Users who have many (over 100) branches want to know and analyse
1375+when and what file is copied-up. To insert a new upper branch which
1376+contains such files only may improve the performance of aufs.
1377+
1378+Aufs1 implemented "coo=none | leaf | all" option.
1379+
1380+
1381+Refresh the Opened File (refrof)
1382+----------------------------------------------------------------------
1383+This option is implemented in aufs1 but incomplete.
1384+
1385+When user reads from a file, he expects to get its latest filedata
1386+generally. If the file is removed and a new same named file is created,
1387+the content he gets is unchanged, ie. the unlinked filedata.
1388+
1389+Let's try case study again.
1390+- aufs has two branches.
1391+ /au = /rw + /ro
1392+- "fileA" exists under /ro, but /rw.
1393+- user opened "/au/fileA".
1394+- he or someone else inserts a branch (/new) between /rw and /ro.
1395+ /au = /rw + /new + /ro
1396+- the new branch has "fileA".
1397+- user reads from the opened "fileA"
1398+- which filedata should aufs return, from /ro or /new?
1399+
1400+Some people says it has to be "from /ro" and it is a semantics of Unix.
1401+The others say it should be "from /new" because the file is not removed
1402+and it is equivalent to the case of someone else modifies the file.
1403+
1404+Here again I don't have a best and final answer. I got an idea to
1405+implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the
1406+Opened File) is specified (by default), aufs returns the filedata from
1407+/new.
1408+Otherwise from /new.
1409diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
1410--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 1411+++ linux/Documentation/filesystems/aufs/README 2012-02-13 21:54:56.963104881 +0100
9dbd164d 1412@@ -0,0 +1,328 @@
53392da6
AM
1413+
1414+Aufs3 -- advanced multi layered unification filesystem version 3.x
1415+http://aufs.sf.net
1416+Junjiro R. Okajima
1417+
1418+
1419+0. Introduction
1420+----------------------------------------
1421+In the early days, aufs was entirely re-designed and re-implemented
1422+Unionfs Version 1.x series. After many original ideas, approaches,
1423+improvements and implementations, it becomes totally different from
1424+Unionfs while keeping the basic features.
1425+Recently, Unionfs Version 2.x series begin taking some of the same
1426+approaches to aufs1's.
1427+Unionfs is being developed by Professor Erez Zadok at Stony Brook
1428+University and his team.
1429+
1430+Aufs3 supports linux-3.0 and later.
1431+If you want older kernel version support, try aufs2-2.6.git or
1432+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
1433+
1434+Note: it becomes clear that "Aufs was rejected. Let's give it up."
1435+According to Christoph Hellwig, linux rejects all union-type filesystems
1436+but UnionMount.
1437+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
1438+
1439+
1440+1. Features
1441+----------------------------------------
1442+- unite several directories into a single virtual filesystem. The member
1443+ directory is called as a branch.
1444+- you can specify the permission flags to the branch, which are 'readonly',
1445+ 'readwrite' and 'whiteout-able.'
1446+- by upper writable branch, internal copyup and whiteout, files/dirs on
1447+ readonly branch are modifiable logically.
1448+- dynamic branch manipulation, add, del.
1449+- etc...
1450+
1451+Also there are many enhancements in aufs1, such as:
1452+- readdir(3) in userspace.
1453+- keep inode number by external inode number table
1454+- keep the timestamps of file/dir in internal copyup operation
1455+- seekable directory, supporting NFS readdir.
1456+- whiteout is hardlinked in order to reduce the consumption of inodes
1457+ on branch
1458+- do not copyup, nor create a whiteout when it is unnecessary
1459+- revert a single systemcall when an error occurs in aufs
1460+- remount interface instead of ioctl
1461+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
1462+- loopback mounted filesystem as a branch
1463+- kernel thread for removing the dir who has a plenty of whiteouts
1464+- support copyup sparse file (a file which has a 'hole' in it)
1465+- default permission flags for branches
1466+- selectable permission flags for ro branch, whether whiteout can
1467+ exist or not
1468+- export via NFS.
1469+- support <sysfs>/fs/aufs and <debugfs>/aufs.
1470+- support multiple writable branches, some policies to select one
1471+ among multiple writable branches.
1472+- a new semantics for link(2) and rename(2) to support multiple
1473+ writable branches.
1474+- no glibc changes are required.
1475+- pseudo hardlink (hardlink over branches)
1476+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
1477+ including NFS or remote filesystem branch.
1478+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
1479+- and more...
1480+
1481+Currently these features are dropped temporary from aufs3.
1482+See design/08plan.txt in detail.
1483+- test only the highest one for the directory permission (dirperm1)
1484+- copyup on open (coo=)
1485+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
1486+ (robr)
1487+- statistics of aufs thread (/sys/fs/aufs/stat)
1488+- delegation mode (dlgt)
1489+ a delegation of the internal branch access to support task I/O
1490+ accounting, which also supports Linux Security Modules (LSM) mainly
1491+ for Suse AppArmor.
1492+- intent.open/create (file open in a single lookup)
1493+
1494+Features or just an idea in the future (see also design/*.txt),
1495+- reorder the branch index without del/re-add.
1496+- permanent xino files for NFSD
1497+- an option for refreshing the opened files after add/del branches
1498+- 'move' policy for copy-up between two writable branches, after
1499+ checking free space.
1500+- light version, without branch manipulation. (unnecessary?)
1501+- copyup in userspace
1502+- inotify in userspace
1503+- readv/writev
1504+- xattr, acl
1505+
1506+
1507+2. Download
1508+----------------------------------------
1e00d052
AM
1509+There were three GIT trees for aufs3, aufs3-linux.git,
1510+aufs3-standalone.git, and aufs-util.git. Note that there is no "3" in
1511+"aufs-util.git."
1512+While the aufs-util is always necessary, you need either of aufs3-linux
1513+or aufs3-standalone.
1514+
1515+The aufs3-linux tree includes the whole linux mainline GIT tree,
1516+git://git.kernel.org/.../torvalds/linux.git.
1517+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
1518+build aufs3 as an externel kernel module.
1519+
1520+On the other hand, the aufs3-standalone tree has only aufs source files
53392da6
AM
1521+and necessary patches, and you can select CONFIG_AUFS_FS=m.
1522+
1523+You will find GIT branches whose name is in form of "aufs3.x" where "x"
1524+represents the linux kernel version, "linux-3.x". For instance,
1e00d052
AM
1525+"aufs3.0" is for linux-3.0. For latest "linux-3.x-rcN", use
1526+"aufs3.x-rcN" branch.
1527+
1528+o aufs3-linux tree
1529+$ git clone --reference /your/linux/git/tree \
1530+ git://aufs.git.sourceforge.net/gitroot/aufs/aufs3-linux.git \
1531+ aufs3-linux.git
1532+- if you don't have linux GIT tree, then remove "--reference ..."
1533+$ cd aufs3-linux.git
1534+$ git checkout origin/aufs3.0
53392da6
AM
1535+
1536+o aufs3-standalone tree
1537+$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs3-standalone.git \
1538+ aufs3-standalone.git
1539+$ cd aufs3-standalone.git
1540+$ git checkout origin/aufs3.0
1541+
1542+o aufs-util tree
1543+$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs-util.git \
1544+ aufs-util.git
1545+$ cd aufs-util.git
1546+$ git checkout origin/aufs3.0
1547+
9dbd164d
AM
1548+Note: The 3.x-rcN branch is to be used with `rc' kernel versions ONLY.
1549+The minor version number, 'x' in '3.x', of aufs may not always
1550+follow the minor version number of the kernel.
1551+Because changes in the kernel that cause the use of a new
1552+minor version number do not always require changes to aufs-util.
1553+
1554+Since aufs-util has its own minor version number, you may not be
1555+able to find a GIT branch in aufs-util for your kernel's
1556+exact minor version number.
1557+In this case, you should git-checkout the branch for the
53392da6 1558+nearest lower number.
9dbd164d
AM
1559+
1560+For (an unreleased) example:
1561+If you are using "linux-3.10" and the "aufs3.10" branch
1562+does not exit in aufs-util repository, then "aufs3.9", "aufs3.8"
1563+or something numerically smaller is the branch for your kernel.
1564+
53392da6
AM
1565+Also you can view all branches by
1566+ $ git branch -a
1567+
1568+
1569+3. Configuration and Compilation
1570+----------------------------------------
1571+Make sure you have git-checkout'ed the correct branch.
1572+
1e00d052
AM
1573+For aufs3-linux tree,
1574+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS.
1575+- set other aufs configurations if necessary.
1576+
53392da6
AM
1577+For aufs3-standalone tree,
1578+There are several ways to build.
1579+
1580+1.
1581+- apply ./aufs3-kbuild.patch to your kernel source files.
1582+- apply ./aufs3-base.patch too.
1583+- apply ./aufs3-proc_map.patch too, if you want to make /proc/PID/maps (and
1584+ others including lsof(1)) show the file path on aufs instead of the
1585+ path on the branch fs.
1586+- apply ./aufs3-standalone.patch too, if you have a plan to set
1587+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs3-standalone.patch.
1588+- copy ./{Documentation,fs,include/linux/aufs_type.h} files to your
1589+ kernel source tree. Never copy ./include/linux/Kbuild.
1590+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS, you can select either
1591+ =m or =y.
1592+- and build your kernel as usual.
1593+- install the built kernel.
1594+- install the header files too by "make headers_install".
1595+- and reboot your system.
1596+
1597+2.
1598+- module only (CONFIG_AUFS_FS=m).
1599+- apply ./aufs3-base.patch to your kernel source files.
1600+- apply ./aufs3-proc_map.patch too to your kernel source files,
1601+ if you want to make /proc/PID/maps (and others including lsof(1)) show
1602+ the file path on aufs instead of the path on the branch fs.
1603+- apply ./aufs3-standalone.patch too.
1604+- build your kernel, don't forget "make headers_install", and reboot.
1605+- edit ./config.mk and set other aufs configurations if necessary.
1606+ Note: You should read ./fs/aufs/Kconfig carefully which describes
1607+ every aufs configurations.
1608+- build the module by simple "make".
1609+- you can specify ${KDIR} make variable which points to your kernel
1610+ source tree.
1611+- install the files
1612+ + run "make install" to install the aufs module, or copy the built
1613+ ./aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
1614+ + run "make headers_install" to install the aufs header file (you can
1615+ specify DESTDIR), or copty ./usr/include/linux/aufs_type.h to
1616+ /usr/include/linux or wherever you like.
1617+- no need to apply aufs3-kbuild.patch, nor copying source files to your
1618+ kernel source tree.
1619+
1620+Note: The haeder file aufs_type.h is necessary to build aufs-util
1621+ as well as "make headers_install" in the kernel source tree.
1622+ headers_install is subject to be forgotten, but it is essentially
1623+ necessary, not only for building aufs-util.
1624+ You may not meet problems without headers_install in some older
1625+ version though.
1626+
1627+And then,
1628+- read README in aufs-util, build and install it
9dbd164d
AM
1629+- note that your distribution may contain an obsoleted version of
1630+ aufs_type.h in /usr/include/linux or something. When you build aufs
1631+ utilities, make sure that your compiler refers the correct aufs header
1632+ file which is built by "make headers_install."
53392da6
AM
1633+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
1634+ then run "make install_ulib" too. And refer to the aufs manual in
1635+ detail.
1636+
1637+
1638+4. Usage
1639+----------------------------------------
1640+At first, make sure aufs-util are installed, and please read the aufs
1641+manual, aufs.5 in aufs-util.git tree.
1642+$ man -l aufs.5
1643+
1644+And then,
1645+$ mkdir /tmp/rw /tmp/aufs
1646+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
1647+
1648+Here is another example. The result is equivalent.
1649+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
1650+ Or
1651+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
1652+# mount -o remount,append:${HOME} /tmp/aufs
1653+
1654+Then, you can see whole tree of your home dir through /tmp/aufs. If
1655+you modify a file under /tmp/aufs, the one on your home directory is
1656+not affected, instead the same named file will be newly created under
1657+/tmp/rw. And all of your modification to a file will be applied to
1658+the one under /tmp/rw. This is called the file based Copy on Write
1659+(COW) method.
1660+Aufs mount options are described in aufs.5.
1661+If you run chroot or something and make your aufs as a root directory,
1662+then you need to customize the shutdown script. See the aufs manual in
1663+detail.
1664+
1665+Additionally, there are some sample usages of aufs which are a
1666+diskless system with network booting, and LiveCD over NFS.
1667+See sample dir in CVS tree on SourceForge.
1668+
1669+
1670+5. Contact
1671+----------------------------------------
1672+When you have any problems or strange behaviour in aufs, please let me
1673+know with:
1674+- /proc/mounts (instead of the output of mount(8))
1675+- /sys/module/aufs/*
1676+- /sys/fs/aufs/* (if you have them)
1677+- /debug/aufs/* (if you have them)
1678+- linux kernel version
1679+ if your kernel is not plain, for example modified by distributor,
1680+ the url where i can download its source is necessary too.
1681+- aufs version which was printed at loading the module or booting the
1682+ system, instead of the date you downloaded.
1683+- configuration (define/undefine CONFIG_AUFS_xxx)
1684+- kernel configuration or /proc/config.gz (if you have it)
1685+- behaviour which you think to be incorrect
1686+- actual operation, reproducible one is better
1687+- mailto: aufs-users at lists.sourceforge.net
1688+
1689+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
1690+and Feature Requests) on SourceForge. Please join and write to
1691+aufs-users ML.
1692+
1693+
1694+6. Acknowledgements
1695+----------------------------------------
1696+Thanks to everyone who have tried and are using aufs, whoever
1697+have reported a bug or any feedback.
1698+
1699+Especially donators:
1700+Tomas Matejicek(slax.org) made a donation (much more than once).
1701+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
1702+ scripts) is making "doubling" donations.
1703+ Unfortunately I cannot list all of the donators, but I really
1704+ appriciate.
1705+ It ends Aug 2010, but the ordinary donation URL is still available.
1706+ <http://sourceforge.net/donate/index.php?group_id=167503>
1707+Dai Itasaka made a donation (2007/8).
1708+Chuck Smith made a donation (2008/4, 10 and 12).
1709+Henk Schoneveld made a donation (2008/9).
1710+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
1711+Francois Dupoux made a donation (2008/11).
1712+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
1713+ aufs2 GIT tree (2009/2).
1714+William Grant made a donation (2009/3).
1715+Patrick Lane made a donation (2009/4).
1716+The Mail Archive (mail-archive.com) made donations (2009/5).
1717+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
1718+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
1719+Pavel Pronskiy made a donation (2011/2).
1720+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
1721+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
f6c5ef8b 1722+Max Lekomcev (DOM-TV project) made a donation (2011/7 and 12).
1e00d052 1723+Sam Liddicott made a donation (2011/9).
53392da6
AM
1724+
1725+Thank you very much.
1726+Donations are always, including future donations, very important and
1727+helpful for me to keep on developing aufs.
1728+
1729+
1730+7.
1731+----------------------------------------
1732+If you are an experienced user, no explanation is needed. Aufs is
1733+just a linux filesystem.
1734+
1735+
1736+Enjoy!
1737+
1738+# Local variables: ;
1739+# mode: text;
1740+# End: ;
7f207e10
AM
1741diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
1742--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 1743+++ linux/fs/aufs/aufs.h 2012-02-13 21:54:56.966438287 +0100
2cbb1c4b 1744@@ -0,0 +1,60 @@
7f207e10 1745+/*
f6c5ef8b 1746+ * Copyright (C) 2005-2012 Junjiro R. Okajima
7f207e10
AM
1747+ *
1748+ * This program, aufs is free software; you can redistribute it and/or modify
1749+ * it under the terms of the GNU General Public License as published by
1750+ * the Free Software Foundation; either version 2 of the License, or
1751+ * (at your option) any later version.
1752+ *
1753+ * This program is distributed in the hope that it will be useful,
1754+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1755+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1756+ * GNU General Public License for more details.
1757+ *
1758+ * You should have received a copy of the GNU General Public License
1759+ * along with this program; if not, write to the Free Software
1760+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1761+ */
1762+
1763+/*
1764+ * all header files
1765+ */
1766+
1767+#ifndef __AUFS_H__
1768+#define __AUFS_H__
1769+
1770+#ifdef __KERNEL__
1771+
1772+#define AuStub(type, name, body, ...) \
1773+ static inline type name(__VA_ARGS__) { body; }
1774+
1775+#define AuStubVoid(name, ...) \
1776+ AuStub(void, name, , __VA_ARGS__)
1777+#define AuStubInt0(name, ...) \
1778+ AuStub(int, name, return 0, __VA_ARGS__)
1779+
1780+#include "debug.h"
1781+
1782+#include "branch.h"
1783+#include "cpup.h"
1784+#include "dcsub.h"
1785+#include "dbgaufs.h"
1786+#include "dentry.h"
1787+#include "dir.h"
1788+#include "dynop.h"
1789+#include "file.h"
1790+#include "fstype.h"
1791+#include "inode.h"
1792+#include "loop.h"
1793+#include "module.h"
7f207e10
AM
1794+#include "opts.h"
1795+#include "rwsem.h"
1796+#include "spl.h"
1797+#include "super.h"
1798+#include "sysaufs.h"
1799+#include "vfsub.h"
1800+#include "whout.h"
1801+#include "wkq.h"
1802+
1803+#endif /* __KERNEL__ */
1804+#endif /* __AUFS_H__ */
1805diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
1806--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
1807+++ linux/fs/aufs/branch.c 2012-02-13 21:54:56.966438287 +0100
1808@@ -0,0 +1,1169 @@
7f207e10 1809+/*
f6c5ef8b 1810+ * Copyright (C) 2005-2012 Junjiro R. Okajima
7f207e10
AM
1811+ *
1812+ * This program, aufs is free software; you can redistribute it and/or modify
1813+ * it under the terms of the GNU General Public License as published by
1814+ * the Free Software Foundation; either version 2 of the License, or
1815+ * (at your option) any later version.
1816+ *
1817+ * This program is distributed in the hope that it will be useful,
1818+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1819+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1820+ * GNU General Public License for more details.
1821+ *
1822+ * You should have received a copy of the GNU General Public License
1823+ * along with this program; if not, write to the Free Software
1824+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1825+ */
1826+
1827+/*
1828+ * branch management
1829+ */
1830+
027c5e7a 1831+#include <linux/compat.h>
7f207e10
AM
1832+#include <linux/statfs.h>
1833+#include "aufs.h"
1834+
1835+/*
1836+ * free a single branch
1facf9fc 1837+ */
1838+static void au_br_do_free(struct au_branch *br)
1839+{
1840+ int i;
1841+ struct au_wbr *wbr;
4a4d8108 1842+ struct au_dykey **key;
1facf9fc 1843+
027c5e7a
AM
1844+ au_hnotify_fin_br(br);
1845+
1facf9fc 1846+ if (br->br_xino.xi_file)
1847+ fput(br->br_xino.xi_file);
1848+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
1849+
1850+ AuDebugOn(atomic_read(&br->br_count));
1851+
1852+ wbr = br->br_wbr;
1853+ if (wbr) {
1854+ for (i = 0; i < AuBrWh_Last; i++)
1855+ dput(wbr->wbr_wh[i]);
1856+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 1857+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 1858+ }
1859+
4a4d8108
AM
1860+ key = br->br_dykey;
1861+ for (i = 0; i < AuBrDynOp; i++, key++)
1862+ if (*key)
1863+ au_dy_put(*key);
1864+ else
1865+ break;
1866+
1facf9fc 1867+ mntput(br->br_mnt);
1facf9fc 1868+ kfree(wbr);
1869+ kfree(br);
1870+}
1871+
1872+/*
1873+ * frees all branches
1874+ */
1875+void au_br_free(struct au_sbinfo *sbinfo)
1876+{
1877+ aufs_bindex_t bmax;
1878+ struct au_branch **br;
1879+
dece6358
AM
1880+ AuRwMustWriteLock(&sbinfo->si_rwsem);
1881+
1facf9fc 1882+ bmax = sbinfo->si_bend + 1;
1883+ br = sbinfo->si_branch;
1884+ while (bmax--)
1885+ au_br_do_free(*br++);
1886+}
1887+
1888+/*
1889+ * find the index of a branch which is specified by @br_id.
1890+ */
1891+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
1892+{
1893+ aufs_bindex_t bindex, bend;
1894+
1895+ bend = au_sbend(sb);
1896+ for (bindex = 0; bindex <= bend; bindex++)
1897+ if (au_sbr_id(sb, bindex) == br_id)
1898+ return bindex;
1899+ return -1;
1900+}
1901+
1902+/* ---------------------------------------------------------------------- */
1903+
1904+/*
1905+ * add a branch
1906+ */
1907+
b752ccd1
AM
1908+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
1909+ struct dentry *h_root)
1facf9fc 1910+{
b752ccd1
AM
1911+ if (unlikely(h_adding == h_root
1912+ || au_test_loopback_overlap(sb, h_adding)))
1facf9fc 1913+ return 1;
b752ccd1
AM
1914+ if (h_adding->d_sb != h_root->d_sb)
1915+ return 0;
1916+ return au_test_subdir(h_adding, h_root)
1917+ || au_test_subdir(h_root, h_adding);
1facf9fc 1918+}
1919+
1920+/*
1921+ * returns a newly allocated branch. @new_nbranch is a number of branches
1922+ * after adding a branch.
1923+ */
1924+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
1925+ int perm)
1926+{
1927+ struct au_branch *add_branch;
1928+ struct dentry *root;
4a4d8108 1929+ int err;
1facf9fc 1930+
4a4d8108 1931+ err = -ENOMEM;
1facf9fc 1932+ root = sb->s_root;
1933+ add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
1934+ if (unlikely(!add_branch))
1935+ goto out;
1936+
027c5e7a
AM
1937+ err = au_hnotify_init_br(add_branch, perm);
1938+ if (unlikely(err))
1939+ goto out_br;
1940+
1facf9fc 1941+ add_branch->br_wbr = NULL;
1942+ if (au_br_writable(perm)) {
1943+ /* may be freed separately at changing the branch permission */
1944+ add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
1945+ GFP_NOFS);
1946+ if (unlikely(!add_branch->br_wbr))
027c5e7a 1947+ goto out_hnotify;
1facf9fc 1948+ }
1949+
4a4d8108
AM
1950+ err = au_sbr_realloc(au_sbi(sb), new_nbranch);
1951+ if (!err)
1952+ err = au_di_realloc(au_di(root), new_nbranch);
1953+ if (!err)
1954+ err = au_ii_realloc(au_ii(root->d_inode), new_nbranch);
1955+ if (!err)
1956+ return add_branch; /* success */
1facf9fc 1957+
1facf9fc 1958+ kfree(add_branch->br_wbr);
4a4d8108 1959+
027c5e7a
AM
1960+out_hnotify:
1961+ au_hnotify_fin_br(add_branch);
4f0767ce 1962+out_br:
1facf9fc 1963+ kfree(add_branch);
4f0767ce 1964+out:
4a4d8108 1965+ return ERR_PTR(err);
1facf9fc 1966+}
1967+
1968+/*
1969+ * test if the branch permission is legal or not.
1970+ */
1971+static int test_br(struct inode *inode, int brperm, char *path)
1972+{
1973+ int err;
1974+
4a4d8108
AM
1975+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
1976+ if (!err)
1977+ goto out;
1facf9fc 1978+
4a4d8108
AM
1979+ err = -EINVAL;
1980+ pr_err("write permission for readonly mount or inode, %s\n", path);
1981+
4f0767ce 1982+out:
1facf9fc 1983+ return err;
1984+}
1985+
1986+/*
1987+ * returns:
1988+ * 0: success, the caller will add it
1989+ * plus: success, it is already unified, the caller should ignore it
1990+ * minus: error
1991+ */
1992+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
1993+{
1994+ int err;
1995+ aufs_bindex_t bend, bindex;
1996+ struct dentry *root;
1997+ struct inode *inode, *h_inode;
1998+
1999+ root = sb->s_root;
2000+ bend = au_sbend(sb);
2001+ if (unlikely(bend >= 0
2002+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
2003+ err = 1;
2004+ if (!remount) {
2005+ err = -EINVAL;
4a4d8108 2006+ pr_err("%s duplicated\n", add->pathname);
1facf9fc 2007+ }
2008+ goto out;
2009+ }
2010+
2011+ err = -ENOSPC; /* -E2BIG; */
2012+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
2013+ || AUFS_BRANCH_MAX - 1 <= bend)) {
4a4d8108 2014+ pr_err("number of branches exceeded %s\n", add->pathname);
1facf9fc 2015+ goto out;
2016+ }
2017+
2018+ err = -EDOM;
2019+ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
4a4d8108 2020+ pr_err("bad index %d\n", add->bindex);
1facf9fc 2021+ goto out;
2022+ }
2023+
2024+ inode = add->path.dentry->d_inode;
2025+ err = -ENOENT;
2026+ if (unlikely(!inode->i_nlink)) {
4a4d8108 2027+ pr_err("no existence %s\n", add->pathname);
1facf9fc 2028+ goto out;
2029+ }
2030+
2031+ err = -EINVAL;
2032+ if (unlikely(inode->i_sb == sb)) {
4a4d8108 2033+ pr_err("%s must be outside\n", add->pathname);
1facf9fc 2034+ goto out;
2035+ }
2036+
2037+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
4a4d8108
AM
2038+ pr_err("unsupported filesystem, %s (%s)\n",
2039+ add->pathname, au_sbtype(inode->i_sb));
1facf9fc 2040+ goto out;
2041+ }
2042+
2043+ err = test_br(add->path.dentry->d_inode, add->perm, add->pathname);
2044+ if (unlikely(err))
2045+ goto out;
2046+
2047+ if (bend < 0)
2048+ return 0; /* success */
2049+
2050+ err = -EINVAL;
2051+ for (bindex = 0; bindex <= bend; bindex++)
2052+ if (unlikely(test_overlap(sb, add->path.dentry,
2053+ au_h_dptr(root, bindex)))) {
4a4d8108 2054+ pr_err("%s is overlapped\n", add->pathname);
1facf9fc 2055+ goto out;
2056+ }
2057+
2058+ err = 0;
2059+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
2060+ h_inode = au_h_dptr(root, 0)->d_inode;
2061+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
2062+ || h_inode->i_uid != inode->i_uid
2063+ || h_inode->i_gid != inode->i_gid)
4a4d8108
AM
2064+ pr_warning("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
2065+ add->pathname,
2066+ inode->i_uid, inode->i_gid,
2067+ (inode->i_mode & S_IALLUGO),
2068+ h_inode->i_uid, h_inode->i_gid,
2069+ (h_inode->i_mode & S_IALLUGO));
1facf9fc 2070+ }
2071+
4f0767ce 2072+out:
1facf9fc 2073+ return err;
2074+}
2075+
2076+/*
2077+ * initialize or clean the whiteouts for an adding branch
2078+ */
2079+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
2080+ int new_perm, struct dentry *h_root)
2081+{
2082+ int err, old_perm;
2083+ aufs_bindex_t bindex;
2084+ struct mutex *h_mtx;
2085+ struct au_wbr *wbr;
2086+ struct au_hinode *hdir;
2087+
2088+ wbr = br->br_wbr;
2089+ old_perm = br->br_perm;
2090+ br->br_perm = new_perm;
2091+ hdir = NULL;
2092+ h_mtx = NULL;
2093+ bindex = au_br_index(sb, br->br_id);
2094+ if (0 <= bindex) {
2095+ hdir = au_hi(sb->s_root->d_inode, bindex);
4a4d8108 2096+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 2097+ } else {
2098+ h_mtx = &h_root->d_inode->i_mutex;
2099+ mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
2100+ }
2101+ if (!wbr)
2102+ err = au_wh_init(h_root, br, sb);
2103+ else {
2104+ wbr_wh_write_lock(wbr);
2105+ err = au_wh_init(h_root, br, sb);
2106+ wbr_wh_write_unlock(wbr);
2107+ }
2108+ if (hdir)
4a4d8108 2109+ au_hn_imtx_unlock(hdir);
1facf9fc 2110+ else
2111+ mutex_unlock(h_mtx);
2112+ br->br_perm = old_perm;
2113+
2114+ if (!err && wbr && !au_br_writable(new_perm)) {
2115+ kfree(wbr);
2116+ br->br_wbr = NULL;
2117+ }
2118+
2119+ return err;
2120+}
2121+
2122+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
2123+ int perm, struct path *path)
2124+{
2125+ int err;
4a4d8108 2126+ struct kstatfs kst;
1facf9fc 2127+ struct au_wbr *wbr;
4a4d8108 2128+ struct dentry *h_dentry;
1facf9fc 2129+
2130+ wbr = br->br_wbr;
dece6358 2131+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 2132+ memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
2133+ atomic_set(&wbr->wbr_wh_running, 0);
2134+ wbr->wbr_bytes = 0;
2135+
4a4d8108
AM
2136+ /*
2137+ * a limit for rmdir/rename a dir
2138+ * cf. AUFS_MAX_NAMELEN in include/linux/aufs_type.h
2139+ */
7f207e10 2140+ err = vfs_statfs(path, &kst);
4a4d8108
AM
2141+ if (unlikely(err))
2142+ goto out;
2143+ err = -EINVAL;
7f207e10 2144+ h_dentry = path->dentry;
4a4d8108
AM
2145+ if (kst.f_namelen >= NAME_MAX)
2146+ err = au_br_init_wh(sb, br, perm, h_dentry);
2147+ else
2148+ pr_err("%.*s(%s), unsupported namelen %ld\n",
2149+ AuDLNPair(h_dentry), au_sbtype(h_dentry->d_sb),
2150+ kst.f_namelen);
1facf9fc 2151+
4f0767ce 2152+out:
1facf9fc 2153+ return err;
2154+}
2155+
2156+/* intialize a new branch */
2157+static int au_br_init(struct au_branch *br, struct super_block *sb,
2158+ struct au_opt_add *add)
2159+{
2160+ int err;
2161+
2162+ err = 0;
2163+ memset(&br->br_xino, 0, sizeof(br->br_xino));
2164+ mutex_init(&br->br_xino.xi_nondir_mtx);
2165+ br->br_perm = add->perm;
2166+ br->br_mnt = add->path.mnt; /* set first, mntget() later */
4a4d8108
AM
2167+ spin_lock_init(&br->br_dykey_lock);
2168+ memset(br->br_dykey, 0, sizeof(br->br_dykey));
1facf9fc 2169+ atomic_set(&br->br_count, 0);
2170+ br->br_xino_upper = AUFS_XINO_TRUNC_INIT;
2171+ atomic_set(&br->br_xino_running, 0);
2172+ br->br_id = au_new_br_id(sb);
7f207e10 2173+ AuDebugOn(br->br_id < 0);
1facf9fc 2174+
2175+ if (au_br_writable(add->perm)) {
2176+ err = au_wbr_init(br, sb, add->perm, &add->path);
2177+ if (unlikely(err))
b752ccd1 2178+ goto out_err;
1facf9fc 2179+ }
2180+
2181+ if (au_opt_test(au_mntflags(sb), XINO)) {
2182+ err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino,
2183+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
2184+ if (unlikely(err)) {
2185+ AuDebugOn(br->br_xino.xi_file);
b752ccd1 2186+ goto out_err;
1facf9fc 2187+ }
2188+ }
2189+
2190+ sysaufs_br_init(br);
2191+ mntget(add->path.mnt);
b752ccd1 2192+ goto out; /* success */
1facf9fc 2193+
4f0767ce 2194+out_err:
b752ccd1 2195+ br->br_mnt = NULL;
4f0767ce 2196+out:
1facf9fc 2197+ return err;
2198+}
2199+
2200+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
2201+ struct au_branch *br, aufs_bindex_t bend,
2202+ aufs_bindex_t amount)
2203+{
2204+ struct au_branch **brp;
2205+
dece6358
AM
2206+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2207+
1facf9fc 2208+ brp = sbinfo->si_branch + bindex;
2209+ memmove(brp + 1, brp, sizeof(*brp) * amount);
2210+ *brp = br;
2211+ sbinfo->si_bend++;
2212+ if (unlikely(bend < 0))
2213+ sbinfo->si_bend = 0;
2214+}
2215+
2216+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
2217+ aufs_bindex_t bend, aufs_bindex_t amount)
2218+{
2219+ struct au_hdentry *hdp;
2220+
1308ab2a 2221+ AuRwMustWriteLock(&dinfo->di_rwsem);
2222+
1facf9fc 2223+ hdp = dinfo->di_hdentry + bindex;
2224+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
2225+ au_h_dentry_init(hdp);
2226+ dinfo->di_bend++;
2227+ if (unlikely(bend < 0))
2228+ dinfo->di_bstart = 0;
2229+}
2230+
2231+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
2232+ aufs_bindex_t bend, aufs_bindex_t amount)
2233+{
2234+ struct au_hinode *hip;
2235+
1308ab2a 2236+ AuRwMustWriteLock(&iinfo->ii_rwsem);
2237+
1facf9fc 2238+ hip = iinfo->ii_hinode + bindex;
2239+ memmove(hip + 1, hip, sizeof(*hip) * amount);
2240+ hip->hi_inode = NULL;
4a4d8108 2241+ au_hn_init(hip);
1facf9fc 2242+ iinfo->ii_bend++;
2243+ if (unlikely(bend < 0))
2244+ iinfo->ii_bstart = 0;
2245+}
2246+
2247+static void au_br_do_add(struct super_block *sb, struct dentry *h_dentry,
2248+ struct au_branch *br, aufs_bindex_t bindex)
2249+{
2250+ struct dentry *root;
2251+ struct inode *root_inode;
2252+ aufs_bindex_t bend, amount;
2253+
2254+ root = sb->s_root;
2255+ root_inode = root->d_inode;
1facf9fc 2256+ bend = au_sbend(sb);
2257+ amount = bend + 1 - bindex;
53392da6 2258+ au_sbilist_lock();
1facf9fc 2259+ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
2260+ au_br_do_add_hdp(au_di(root), bindex, bend, amount);
2261+ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
2262+ au_set_h_dptr(root, bindex, dget(h_dentry));
2263+ au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode),
2264+ /*flags*/0);
53392da6 2265+ au_sbilist_unlock();
1facf9fc 2266+}
2267+
2268+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
2269+{
2270+ int err;
1facf9fc 2271+ aufs_bindex_t bend, add_bindex;
2272+ struct dentry *root, *h_dentry;
2273+ struct inode *root_inode;
2274+ struct au_branch *add_branch;
2275+
2276+ root = sb->s_root;
2277+ root_inode = root->d_inode;
2278+ IMustLock(root_inode);
2279+ err = test_add(sb, add, remount);
2280+ if (unlikely(err < 0))
2281+ goto out;
2282+ if (err) {
2283+ err = 0;
2284+ goto out; /* success */
2285+ }
2286+
2287+ bend = au_sbend(sb);
2288+ add_branch = au_br_alloc(sb, bend + 2, add->perm);
2289+ err = PTR_ERR(add_branch);
2290+ if (IS_ERR(add_branch))
2291+ goto out;
2292+
2293+ err = au_br_init(add_branch, sb, add);
2294+ if (unlikely(err)) {
2295+ au_br_do_free(add_branch);
2296+ goto out;
2297+ }
2298+
2299+ add_bindex = add->bindex;
2300+ h_dentry = add->path.dentry;
2301+ if (!remount)
2302+ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2303+ else {
2304+ sysaufs_brs_del(sb, add_bindex);
2305+ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2306+ sysaufs_brs_add(sb, add_bindex);
2307+ }
2308+
1308ab2a 2309+ if (!add_bindex) {
1facf9fc 2310+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 2311+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
2312+ } else
1facf9fc 2313+ au_add_nlink(root_inode, h_dentry->d_inode);
1facf9fc 2314+
2315+ /*
4a4d8108 2316+ * this test/set prevents aufs from handling unnecesary notify events
027c5e7a 2317+ * of xino files, in case of re-adding a writable branch which was
1facf9fc 2318+ * once detached from aufs.
2319+ */
2320+ if (au_xino_brid(sb) < 0
2321+ && au_br_writable(add_branch->br_perm)
2322+ && !au_test_fs_bad_xino(h_dentry->d_sb)
2323+ && add_branch->br_xino.xi_file
2324+ && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry)
2325+ au_xino_brid_set(sb, add_branch->br_id);
2326+
4f0767ce 2327+out:
1facf9fc 2328+ return err;
2329+}
2330+
2331+/* ---------------------------------------------------------------------- */
2332+
2333+/*
2334+ * delete a branch
2335+ */
2336+
2337+/* to show the line number, do not make it inlined function */
4a4d8108 2338+#define AuVerbose(do_info, fmt, ...) do { \
1facf9fc 2339+ if (do_info) \
4a4d8108 2340+ pr_info(fmt, ##__VA_ARGS__); \
1facf9fc 2341+} while (0)
2342+
027c5e7a
AM
2343+static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
2344+ aufs_bindex_t bend)
2345+{
2346+ return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
2347+}
2348+
2349+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
2350+ aufs_bindex_t bend)
2351+{
2352+ return au_test_ibusy(dentry->d_inode, bstart, bend);
2353+}
2354+
1facf9fc 2355+/*
2356+ * test if the branch is deletable or not.
2357+ */
2358+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
b752ccd1 2359+ unsigned int sigen, const unsigned int verbose)
1facf9fc 2360+{
2361+ int err, i, j, ndentry;
2362+ aufs_bindex_t bstart, bend;
1facf9fc 2363+ struct au_dcsub_pages dpages;
2364+ struct au_dpage *dpage;
2365+ struct dentry *d;
1facf9fc 2366+
2367+ err = au_dpages_init(&dpages, GFP_NOFS);
2368+ if (unlikely(err))
2369+ goto out;
2370+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
2371+ if (unlikely(err))
2372+ goto out_dpages;
2373+
1facf9fc 2374+ for (i = 0; !err && i < dpages.ndpage; i++) {
2375+ dpage = dpages.dpages + i;
2376+ ndentry = dpage->ndentry;
2377+ for (j = 0; !err && j < ndentry; j++) {
2378+ d = dpage->dentries[j];
027c5e7a
AM
2379+ AuDebugOn(!d->d_count);
2380+ if (!au_digen_test(d, sigen)) {
1facf9fc 2381+ di_read_lock_child(d, AuLock_IR);
027c5e7a
AM
2382+ if (unlikely(au_dbrange_test(d))) {
2383+ di_read_unlock(d, AuLock_IR);
2384+ continue;
2385+ }
2386+ } else {
1facf9fc 2387+ di_write_lock_child(d);
027c5e7a
AM
2388+ if (unlikely(au_dbrange_test(d))) {
2389+ di_write_unlock(d);
2390+ continue;
2391+ }
1facf9fc 2392+ err = au_reval_dpath(d, sigen);
2393+ if (!err)
2394+ di_downgrade_lock(d, AuLock_IR);
2395+ else {
2396+ di_write_unlock(d);
2397+ break;
2398+ }
2399+ }
2400+
027c5e7a 2401+ /* AuDbgDentry(d); */
1facf9fc 2402+ bstart = au_dbstart(d);
2403+ bend = au_dbend(d);
2404+ if (bstart <= bindex
2405+ && bindex <= bend
2406+ && au_h_dptr(d, bindex)
027c5e7a 2407+ && au_test_dbusy(d, bstart, bend)) {
1facf9fc 2408+ err = -EBUSY;
2409+ AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d));
027c5e7a 2410+ AuDbgDentry(d);
1facf9fc 2411+ }
2412+ di_read_unlock(d, AuLock_IR);
2413+ }
2414+ }
2415+
4f0767ce 2416+out_dpages:
1facf9fc 2417+ au_dpages_free(&dpages);
4f0767ce 2418+out:
1facf9fc 2419+ return err;
2420+}
2421+
2422+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
b752ccd1 2423+ unsigned int sigen, const unsigned int verbose)
1facf9fc 2424+{
2425+ int err;
7f207e10
AM
2426+ unsigned long long max, ull;
2427+ struct inode *i, **array;
1facf9fc 2428+ aufs_bindex_t bstart, bend;
1facf9fc 2429+
7f207e10
AM
2430+ array = au_iarray_alloc(sb, &max);
2431+ err = PTR_ERR(array);
2432+ if (IS_ERR(array))
2433+ goto out;
2434+
1facf9fc 2435+ err = 0;
7f207e10
AM
2436+ AuDbg("b%d\n", bindex);
2437+ for (ull = 0; !err && ull < max; ull++) {
2438+ i = array[ull];
2439+ if (i->i_ino == AUFS_ROOT_INO)
1facf9fc 2440+ continue;
2441+
7f207e10 2442+ /* AuDbgInode(i); */
1facf9fc 2443+ if (au_iigen(i) == sigen)
2444+ ii_read_lock_child(i);
2445+ else {
2446+ ii_write_lock_child(i);
027c5e7a
AM
2447+ err = au_refresh_hinode_self(i);
2448+ au_iigen_dec(i);
1facf9fc 2449+ if (!err)
2450+ ii_downgrade_lock(i);
2451+ else {
2452+ ii_write_unlock(i);
2453+ break;
2454+ }
2455+ }
2456+
2457+ bstart = au_ibstart(i);
2458+ bend = au_ibend(i);
2459+ if (bstart <= bindex
2460+ && bindex <= bend
2461+ && au_h_iptr(i, bindex)
027c5e7a 2462+ && au_test_ibusy(i, bstart, bend)) {
1facf9fc 2463+ err = -EBUSY;
2464+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
7f207e10 2465+ AuDbgInode(i);
1facf9fc 2466+ }
2467+ ii_read_unlock(i);
2468+ }
7f207e10 2469+ au_iarray_free(array, max);
1facf9fc 2470+
7f207e10 2471+out:
1facf9fc 2472+ return err;
2473+}
2474+
b752ccd1
AM
2475+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
2476+ const unsigned int verbose)
1facf9fc 2477+{
2478+ int err;
2479+ unsigned int sigen;
2480+
2481+ sigen = au_sigen(root->d_sb);
2482+ DiMustNoWaiters(root);
2483+ IiMustNoWaiters(root->d_inode);
2484+ di_write_unlock(root);
b752ccd1 2485+ err = test_dentry_busy(root, bindex, sigen, verbose);
1facf9fc 2486+ if (!err)
b752ccd1 2487+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
1facf9fc 2488+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
2489+
2490+ return err;
2491+}
2492+
2493+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
2494+ const aufs_bindex_t bindex,
2495+ const aufs_bindex_t bend)
2496+{
2497+ struct au_branch **brp, **p;
2498+
dece6358
AM
2499+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2500+
1facf9fc 2501+ brp = sbinfo->si_branch + bindex;
2502+ if (bindex < bend)
2503+ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
2504+ sbinfo->si_branch[0 + bend] = NULL;
2505+ sbinfo->si_bend--;
2506+
53392da6 2507+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2508+ if (p)
2509+ sbinfo->si_branch = p;
4a4d8108 2510+ /* harmless error */
1facf9fc 2511+}
2512+
2513+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
2514+ const aufs_bindex_t bend)
2515+{
2516+ struct au_hdentry *hdp, *p;
2517+
1308ab2a 2518+ AuRwMustWriteLock(&dinfo->di_rwsem);
2519+
4a4d8108 2520+ hdp = dinfo->di_hdentry;
1facf9fc 2521+ if (bindex < bend)
4a4d8108
AM
2522+ memmove(hdp + bindex, hdp + bindex + 1,
2523+ sizeof(*hdp) * (bend - bindex));
2524+ hdp[0 + bend].hd_dentry = NULL;
1facf9fc 2525+ dinfo->di_bend--;
2526+
53392da6 2527+ p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2528+ if (p)
2529+ dinfo->di_hdentry = p;
4a4d8108 2530+ /* harmless error */
1facf9fc 2531+}
2532+
2533+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
2534+ const aufs_bindex_t bend)
2535+{
2536+ struct au_hinode *hip, *p;
2537+
1308ab2a 2538+ AuRwMustWriteLock(&iinfo->ii_rwsem);
2539+
1facf9fc 2540+ hip = iinfo->ii_hinode + bindex;
2541+ if (bindex < bend)
2542+ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
2543+ iinfo->ii_hinode[0 + bend].hi_inode = NULL;
4a4d8108 2544+ au_hn_init(iinfo->ii_hinode + bend);
1facf9fc 2545+ iinfo->ii_bend--;
2546+
53392da6 2547+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2548+ if (p)
2549+ iinfo->ii_hinode = p;
4a4d8108 2550+ /* harmless error */
1facf9fc 2551+}
2552+
2553+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
2554+ struct au_branch *br)
2555+{
2556+ aufs_bindex_t bend;
2557+ struct au_sbinfo *sbinfo;
53392da6
AM
2558+ struct dentry *root, *h_root;
2559+ struct inode *inode, *h_inode;
2560+ struct au_hinode *hinode;
1facf9fc 2561+
dece6358
AM
2562+ SiMustWriteLock(sb);
2563+
1facf9fc 2564+ root = sb->s_root;
2565+ inode = root->d_inode;
1facf9fc 2566+ sbinfo = au_sbi(sb);
2567+ bend = sbinfo->si_bend;
2568+
53392da6
AM
2569+ h_root = au_h_dptr(root, bindex);
2570+ hinode = au_hi(inode, bindex);
2571+ h_inode = au_igrab(hinode->hi_inode);
2572+ au_hiput(hinode);
1facf9fc 2573+
53392da6 2574+ au_sbilist_lock();
1facf9fc 2575+ au_br_do_del_brp(sbinfo, bindex, bend);
2576+ au_br_do_del_hdp(au_di(root), bindex, bend);
2577+ au_br_do_del_hip(au_ii(inode), bindex, bend);
53392da6
AM
2578+ au_sbilist_unlock();
2579+
2580+ dput(h_root);
2581+ iput(h_inode);
2582+ au_br_do_free(br);
1facf9fc 2583+}
2584+
2585+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
2586+{
2587+ int err, rerr, i;
2588+ unsigned int mnt_flags;
2589+ aufs_bindex_t bindex, bend, br_id;
2590+ unsigned char do_wh, verbose;
2591+ struct au_branch *br;
2592+ struct au_wbr *wbr;
2593+
2594+ err = 0;
2595+ bindex = au_find_dbindex(sb->s_root, del->h_path.dentry);
2596+ if (bindex < 0) {
2597+ if (remount)
2598+ goto out; /* success */
2599+ err = -ENOENT;
4a4d8108 2600+ pr_err("%s no such branch\n", del->pathname);
1facf9fc 2601+ goto out;
2602+ }
2603+ AuDbg("bindex b%d\n", bindex);
2604+
2605+ err = -EBUSY;
2606+ mnt_flags = au_mntflags(sb);
2607+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
2608+ bend = au_sbend(sb);
2609+ if (unlikely(!bend)) {
2610+ AuVerbose(verbose, "no more branches left\n");
2611+ goto out;
2612+ }
2613+ br = au_sbr(sb, bindex);
2614+ i = atomic_read(&br->br_count);
2615+ if (unlikely(i)) {
2616+ AuVerbose(verbose, "%d file(s) opened\n", i);
e49829fe 2617+ goto out;
1facf9fc 2618+ }
2619+
2620+ wbr = br->br_wbr;
2621+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
2622+ if (do_wh) {
1308ab2a 2623+ /* instead of WbrWhMustWriteLock(wbr) */
2624+ SiMustWriteLock(sb);
1facf9fc 2625+ for (i = 0; i < AuBrWh_Last; i++) {
2626+ dput(wbr->wbr_wh[i]);
2627+ wbr->wbr_wh[i] = NULL;
2628+ }
2629+ }
2630+
b752ccd1 2631+ err = test_children_busy(sb->s_root, bindex, verbose);
1facf9fc 2632+ if (unlikely(err)) {
2633+ if (do_wh)
2634+ goto out_wh;
2635+ goto out;
2636+ }
2637+
2638+ err = 0;
2639+ br_id = br->br_id;
2640+ if (!remount)
2641+ au_br_do_del(sb, bindex, br);
2642+ else {
2643+ sysaufs_brs_del(sb, bindex);
2644+ au_br_do_del(sb, bindex, br);
2645+ sysaufs_brs_add(sb, bindex);
2646+ }
2647+
1308ab2a 2648+ if (!bindex) {
1facf9fc 2649+ au_cpup_attr_all(sb->s_root->d_inode, /*force*/1);
1308ab2a 2650+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
2651+ } else
1facf9fc 2652+ au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode);
2653+ if (au_opt_test(mnt_flags, PLINK))
2654+ au_plink_half_refresh(sb, br_id);
2655+
b752ccd1 2656+ if (au_xino_brid(sb) == br_id)
1facf9fc 2657+ au_xino_brid_set(sb, -1);
2658+ goto out; /* success */
2659+
4f0767ce 2660+out_wh:
1facf9fc 2661+ /* revert */
2662+ rerr = au_br_init_wh(sb, br, br->br_perm, del->h_path.dentry);
2663+ if (rerr)
4a4d8108
AM
2664+ pr_warning("failed re-creating base whiteout, %s. (%d)\n",
2665+ del->pathname, rerr);
4f0767ce 2666+out:
1facf9fc 2667+ return err;
2668+}
2669+
2670+/* ---------------------------------------------------------------------- */
2671+
027c5e7a
AM
2672+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
2673+{
2674+ int err;
2675+ aufs_bindex_t bstart, bend;
2676+ struct aufs_ibusy ibusy;
2677+ struct inode *inode, *h_inode;
2678+
2679+ err = -EPERM;
2680+ if (unlikely(!capable(CAP_SYS_ADMIN)))
2681+ goto out;
2682+
2683+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
2684+ if (!err)
2685+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
2686+ if (unlikely(err)) {
2687+ err = -EFAULT;
2688+ AuTraceErr(err);
2689+ goto out;
2690+ }
2691+
2692+ err = -EINVAL;
2693+ si_read_lock(sb, AuLock_FLUSH);
2694+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
2695+ goto out_unlock;
2696+
2697+ err = 0;
2698+ ibusy.h_ino = 0; /* invalid */
2699+ inode = ilookup(sb, ibusy.ino);
2700+ if (!inode
2701+ || inode->i_ino == AUFS_ROOT_INO
2702+ || is_bad_inode(inode))
2703+ goto out_unlock;
2704+
2705+ ii_read_lock_child(inode);
2706+ bstart = au_ibstart(inode);
2707+ bend = au_ibend(inode);
2708+ if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
2709+ h_inode = au_h_iptr(inode, ibusy.bindex);
2710+ if (h_inode && au_test_ibusy(inode, bstart, bend))
2711+ ibusy.h_ino = h_inode->i_ino;
2712+ }
2713+ ii_read_unlock(inode);
2714+ iput(inode);
2715+
2716+out_unlock:
2717+ si_read_unlock(sb);
2718+ if (!err) {
2719+ err = __put_user(ibusy.h_ino, &arg->h_ino);
2720+ if (unlikely(err)) {
2721+ err = -EFAULT;
2722+ AuTraceErr(err);
2723+ }
2724+ }
2725+out:
2726+ return err;
2727+}
2728+
2729+long au_ibusy_ioctl(struct file *file, unsigned long arg)
2730+{
2731+ return au_ibusy(file->f_dentry->d_sb, (void __user *)arg);
2732+}
2733+
2734+#ifdef CONFIG_COMPAT
2735+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
2736+{
2737+ return au_ibusy(file->f_dentry->d_sb, compat_ptr(arg));
2738+}
2739+#endif
2740+
2741+/* ---------------------------------------------------------------------- */
2742+
1facf9fc 2743+/*
2744+ * change a branch permission
2745+ */
2746+
dece6358
AM
2747+static void au_warn_ima(void)
2748+{
2749+#ifdef CONFIG_IMA
1308ab2a 2750+ /* since it doesn't support mark_files_ro() */
027c5e7a 2751+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
dece6358
AM
2752+#endif
2753+}
2754+
1facf9fc 2755+static int do_need_sigen_inc(int a, int b)
2756+{
2757+ return au_br_whable(a) && !au_br_whable(b);
2758+}
2759+
2760+static int need_sigen_inc(int old, int new)
2761+{
2762+ return do_need_sigen_inc(old, new)
2763+ || do_need_sigen_inc(new, old);
2764+}
2765+
7f207e10
AM
2766+static unsigned long long au_farray_cb(void *a,
2767+ unsigned long long max __maybe_unused,
2768+ void *arg)
2769+{
2770+ unsigned long long n;
2771+ struct file **p, *f;
2772+ struct super_block *sb = arg;
2773+
2774+ n = 0;
2775+ p = a;
2776+ lg_global_lock(files_lglock);
2777+ do_file_list_for_each_entry(sb, f) {
2778+ if (au_fi(f)
027c5e7a 2779+ && file_count(f)
7f207e10
AM
2780+ && !special_file(f->f_dentry->d_inode->i_mode)) {
2781+ get_file(f);
2782+ *p++ = f;
2783+ n++;
2784+ AuDebugOn(n > max);
2785+ }
2786+ } while_file_list_for_each_entry;
2787+ lg_global_unlock(files_lglock);
2788+
2789+ return n;
2790+}
2791+
2792+static struct file **au_farray_alloc(struct super_block *sb,
2793+ unsigned long long *max)
2794+{
2795+ *max = atomic_long_read(&au_sbi(sb)->si_nfiles);
2796+ return au_array_alloc(max, au_farray_cb, sb);
2797+}
2798+
2799+static void au_farray_free(struct file **a, unsigned long long max)
2800+{
2801+ unsigned long long ull;
2802+
2803+ for (ull = 0; ull < max; ull++)
2804+ if (a[ull])
2805+ fput(a[ull]);
2806+ au_array_free(a);
2807+}
2808+
1facf9fc 2809+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
2810+{
7f207e10 2811+ int err, do_warn;
027c5e7a 2812+ unsigned int mnt_flags;
7f207e10 2813+ unsigned long long ull, max;
e49829fe 2814+ aufs_bindex_t br_id;
027c5e7a 2815+ unsigned char verbose;
7f207e10 2816+ struct file *file, *hf, **array;
e49829fe
JR
2817+ struct inode *inode;
2818+ struct au_hfile *hfile;
1facf9fc 2819+
027c5e7a
AM
2820+ mnt_flags = au_mntflags(sb);
2821+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
2822+
7f207e10
AM
2823+ array = au_farray_alloc(sb, &max);
2824+ err = PTR_ERR(array);
2825+ if (IS_ERR(array))
1facf9fc 2826+ goto out;
2827+
7f207e10 2828+ do_warn = 0;
e49829fe 2829+ br_id = au_sbr_id(sb, bindex);
7f207e10
AM
2830+ for (ull = 0; ull < max; ull++) {
2831+ file = array[ull];
1facf9fc 2832+
7f207e10 2833+ /* AuDbg("%.*s\n", AuDLNPair(file->f_dentry)); */
1facf9fc 2834+ fi_read_lock(file);
2835+ if (unlikely(au_test_mmapped(file))) {
2836+ err = -EBUSY;
027c5e7a
AM
2837+ AuVerbose(verbose, "mmapped %.*s\n",
2838+ AuDLNPair(file->f_dentry));
7f207e10 2839+ AuDbgFile(file);
1facf9fc 2840+ FiMustNoWaiters(file);
2841+ fi_read_unlock(file);
7f207e10 2842+ goto out_array;
1facf9fc 2843+ }
2844+
027c5e7a 2845+ inode = file->f_dentry->d_inode;
e49829fe
JR
2846+ hfile = &au_fi(file)->fi_htop;
2847+ hf = hfile->hf_file;
2848+ if (!S_ISREG(inode->i_mode)
1facf9fc 2849+ || !(file->f_mode & FMODE_WRITE)
e49829fe 2850+ || hfile->hf_br->br_id != br_id
7f207e10
AM
2851+ || !(hf->f_mode & FMODE_WRITE))
2852+ array[ull] = NULL;
2853+ else {
2854+ do_warn = 1;
2855+ get_file(file);
1facf9fc 2856+ }
2857+
1facf9fc 2858+ FiMustNoWaiters(file);
2859+ fi_read_unlock(file);
7f207e10
AM
2860+ fput(file);
2861+ }
1facf9fc 2862+
2863+ err = 0;
7f207e10 2864+ if (do_warn)
dece6358 2865+ au_warn_ima();
7f207e10
AM
2866+
2867+ for (ull = 0; ull < max; ull++) {
2868+ file = array[ull];
2869+ if (!file)
2870+ continue;
2871+
1facf9fc 2872+ /* todo: already flushed? */
2873+ /* cf. fs/super.c:mark_files_ro() */
7f207e10
AM
2874+ /* fi_read_lock(file); */
2875+ hfile = &au_fi(file)->fi_htop;
2876+ hf = hfile->hf_file;
2877+ /* fi_read_unlock(file); */
027c5e7a 2878+ spin_lock(&hf->f_lock);
1facf9fc 2879+ hf->f_mode &= ~FMODE_WRITE;
027c5e7a 2880+ spin_unlock(&hf->f_lock);
1facf9fc 2881+ if (!file_check_writeable(hf)) {
2882+ file_release_write(hf);
2883+ mnt_drop_write(hf->f_vfsmnt);
2884+ }
2885+ }
2886+
7f207e10
AM
2887+out_array:
2888+ au_farray_free(array, max);
4f0767ce 2889+out:
7f207e10 2890+ AuTraceErr(err);
1facf9fc 2891+ return err;
2892+}
2893+
2894+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 2895+ int *do_refresh)
1facf9fc 2896+{
2897+ int err, rerr;
2898+ aufs_bindex_t bindex;
1308ab2a 2899+ struct path path;
1facf9fc 2900+ struct dentry *root;
2901+ struct au_branch *br;
2902+
2903+ root = sb->s_root;
1facf9fc 2904+ bindex = au_find_dbindex(root, mod->h_root);
2905+ if (bindex < 0) {
2906+ if (remount)
2907+ return 0; /* success */
2908+ err = -ENOENT;
4a4d8108 2909+ pr_err("%s no such branch\n", mod->path);
1facf9fc 2910+ goto out;
2911+ }
2912+ AuDbg("bindex b%d\n", bindex);
2913+
2914+ err = test_br(mod->h_root->d_inode, mod->perm, mod->path);
2915+ if (unlikely(err))
2916+ goto out;
2917+
2918+ br = au_sbr(sb, bindex);
2919+ if (br->br_perm == mod->perm)
2920+ return 0; /* success */
2921+
2922+ if (au_br_writable(br->br_perm)) {
2923+ /* remove whiteout base */
2924+ err = au_br_init_wh(sb, br, mod->perm, mod->h_root);
2925+ if (unlikely(err))
2926+ goto out;
2927+
2928+ if (!au_br_writable(mod->perm)) {
2929+ /* rw --> ro, file might be mmapped */
2930+ DiMustNoWaiters(root);
2931+ IiMustNoWaiters(root->d_inode);
2932+ di_write_unlock(root);
2933+ err = au_br_mod_files_ro(sb, bindex);
2934+ /* aufs_write_lock() calls ..._child() */
2935+ di_write_lock_child(root);
2936+
2937+ if (unlikely(err)) {
2938+ rerr = -ENOMEM;
2939+ br->br_wbr = kmalloc(sizeof(*br->br_wbr),
2940+ GFP_NOFS);
1308ab2a 2941+ if (br->br_wbr) {
2942+ path.mnt = br->br_mnt;
2943+ path.dentry = mod->h_root;
2944+ rerr = au_wbr_init(br, sb, br->br_perm,
2945+ &path);
2946+ }
1facf9fc 2947+ if (unlikely(rerr)) {
2948+ AuIOErr("nested error %d (%d)\n",
2949+ rerr, err);
2950+ br->br_perm = mod->perm;
2951+ }
2952+ }
2953+ }
2954+ } else if (au_br_writable(mod->perm)) {
2955+ /* ro --> rw */
2956+ err = -ENOMEM;
2957+ br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
2958+ if (br->br_wbr) {
1308ab2a 2959+ path.mnt = br->br_mnt;
2960+ path.dentry = mod->h_root;
1facf9fc 2961+ err = au_wbr_init(br, sb, mod->perm, &path);
2962+ if (unlikely(err)) {
2963+ kfree(br->br_wbr);
2964+ br->br_wbr = NULL;
2965+ }
2966+ }
2967+ }
2968+
2969+ if (!err) {
7f207e10 2970+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
1facf9fc 2971+ br->br_perm = mod->perm;
2972+ }
2973+
4f0767ce 2974+out:
7f207e10 2975+ AuTraceErr(err);
1facf9fc 2976+ return err;
2977+}
7f207e10
AM
2978diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
2979--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
2980+++ linux/fs/aufs/branch.h 2012-02-13 21:54:56.966438287 +0100
2981@@ -0,0 +1,230 @@
1facf9fc 2982+/*
f6c5ef8b 2983+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 2984+ *
2985+ * This program, aufs is free software; you can redistribute it and/or modify
2986+ * it under the terms of the GNU General Public License as published by
2987+ * the Free Software Foundation; either version 2 of the License, or
2988+ * (at your option) any later version.
dece6358
AM
2989+ *
2990+ * This program is distributed in the hope that it will be useful,
2991+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2992+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2993+ * GNU General Public License for more details.
2994+ *
2995+ * You should have received a copy of the GNU General Public License
2996+ * along with this program; if not, write to the Free Software
2997+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 2998+ */
2999+
3000+/*
3001+ * branch filesystems and xino for them
3002+ */
3003+
3004+#ifndef __AUFS_BRANCH_H__
3005+#define __AUFS_BRANCH_H__
3006+
3007+#ifdef __KERNEL__
3008+
1facf9fc 3009+#include <linux/mount.h>
4a4d8108 3010+#include "dynop.h"
1facf9fc 3011+#include "rwsem.h"
3012+#include "super.h"
3013+
3014+/* ---------------------------------------------------------------------- */
3015+
3016+/* a xino file */
3017+struct au_xino_file {
3018+ struct file *xi_file;
3019+ struct mutex xi_nondir_mtx;
3020+
3021+ /* todo: make xino files an array to support huge inode number */
3022+
3023+#ifdef CONFIG_DEBUG_FS
3024+ struct dentry *xi_dbgaufs;
3025+#endif
3026+};
3027+
3028+/* members for writable branch only */
3029+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
3030+struct au_wbr {
dece6358 3031+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 3032+ struct dentry *wbr_wh[AuBrWh_Last];
4a4d8108 3033+ atomic_t wbr_wh_running;
1facf9fc 3034+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
3035+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
3036+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
3037+
3038+ /* mfs mode */
3039+ unsigned long long wbr_bytes;
3040+};
3041+
4a4d8108
AM
3042+/* ext2 has 3 types of operations at least, ext3 has 4 */
3043+#define AuBrDynOp (AuDyLast * 4)
3044+
1facf9fc 3045+/* protected by superblock rwsem */
3046+struct au_branch {
3047+ struct au_xino_file br_xino;
3048+
3049+ aufs_bindex_t br_id;
3050+
3051+ int br_perm;
3052+ struct vfsmount *br_mnt;
4a4d8108
AM
3053+ spinlock_t br_dykey_lock;
3054+ struct au_dykey *br_dykey[AuBrDynOp];
1facf9fc 3055+ atomic_t br_count;
3056+
3057+ struct au_wbr *br_wbr;
3058+
3059+ /* xino truncation */
3060+ blkcnt_t br_xino_upper; /* watermark in blocks */
3061+ atomic_t br_xino_running;
3062+
027c5e7a
AM
3063+#ifdef CONFIG_AUFS_HFSNOTIFY
3064+ struct fsnotify_group *br_hfsn_group;
3065+ struct fsnotify_ops br_hfsn_ops;
3066+#endif
3067+
1facf9fc 3068+#ifdef CONFIG_SYSFS
3069+ /* an entry under sysfs per mount-point */
3070+ char br_name[8];
3071+ struct attribute br_attr;
3072+#endif
3073+};
3074+
3075+/* ---------------------------------------------------------------------- */
3076+
1e00d052
AM
3077+/* branch permissions and attributes */
3078+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */
3079+#define AuBrPerm_RO (1 << 1) /* readonly */
3080+#define AuBrPerm_RR (1 << 2) /* natively readonly */
3081+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
1facf9fc 3082+
1e00d052 3083+#define AuBrRAttr_WH (1 << 3) /* whiteout-able */
1facf9fc 3084+
1e00d052 3085+#define AuBrWAttr_NoLinkWH (1 << 4) /* un-hardlinkable whiteouts */
1facf9fc 3086+
3087+static inline int au_br_writable(int brperm)
3088+{
1e00d052 3089+ return brperm & AuBrPerm_RW;
1facf9fc 3090+}
3091+
3092+static inline int au_br_whable(int brperm)
3093+{
1e00d052
AM
3094+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
3095+}
3096+
3097+static inline int au_br_wh_linkable(int brperm)
3098+{
3099+ return !(brperm & AuBrWAttr_NoLinkWH);
1facf9fc 3100+}
3101+
3102+static inline int au_br_rdonly(struct au_branch *br)
3103+{
3104+ return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY)
3105+ || !au_br_writable(br->br_perm))
3106+ ? -EROFS : 0;
3107+}
3108+
4a4d8108 3109+static inline int au_br_hnotifyable(int brperm __maybe_unused)
1facf9fc 3110+{
4a4d8108 3111+#ifdef CONFIG_AUFS_HNOTIFY
1e00d052 3112+ return !(brperm & AuBrPerm_RR);
1facf9fc 3113+#else
3114+ return 0;
3115+#endif
3116+}
3117+
3118+/* ---------------------------------------------------------------------- */
3119+
3120+/* branch.c */
3121+struct au_sbinfo;
3122+void au_br_free(struct au_sbinfo *sinfo);
3123+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
3124+struct au_opt_add;
3125+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
3126+struct au_opt_del;
3127+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
027c5e7a
AM
3128+long au_ibusy_ioctl(struct file *file, unsigned long arg);
3129+#ifdef CONFIG_COMPAT
3130+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
3131+#endif
1facf9fc 3132+struct au_opt_mod;
3133+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 3134+ int *do_refresh);
1facf9fc 3135+
3136+/* xino.c */
3137+static const loff_t au_loff_max = LLONG_MAX;
3138+
3139+int au_xib_trunc(struct super_block *sb);
3140+ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
3141+ loff_t *pos);
3142+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
3143+ loff_t *pos);
3144+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
3145+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
3146+ino_t au_xino_new_ino(struct super_block *sb);
b752ccd1 3147+void au_xino_delete_inode(struct inode *inode, const int unlinked);
1facf9fc 3148+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
3149+ ino_t ino);
3150+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
3151+ ino_t *ino);
3152+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
3153+ struct file *base_file, int do_test);
3154+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
3155+
3156+struct au_opt_xino;
3157+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
3158+void au_xino_clr(struct super_block *sb);
3159+struct file *au_xino_def(struct super_block *sb);
3160+int au_xino_path(struct seq_file *seq, struct file *file);
3161+
3162+/* ---------------------------------------------------------------------- */
3163+
3164+/* Superblock to branch */
3165+static inline
3166+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
3167+{
3168+ return au_sbr(sb, bindex)->br_id;
3169+}
3170+
3171+static inline
3172+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
3173+{
3174+ return au_sbr(sb, bindex)->br_mnt;
3175+}
3176+
3177+static inline
3178+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
3179+{
3180+ return au_sbr_mnt(sb, bindex)->mnt_sb;
3181+}
3182+
3183+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
3184+{
e49829fe 3185+ atomic_dec(&au_sbr(sb, bindex)->br_count);
1facf9fc 3186+}
3187+
3188+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
3189+{
3190+ return au_sbr(sb, bindex)->br_perm;
3191+}
3192+
3193+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
3194+{
3195+ return au_br_whable(au_sbr_perm(sb, bindex));
3196+}
3197+
3198+/* ---------------------------------------------------------------------- */
3199+
3200+/*
3201+ * wbr_wh_read_lock, wbr_wh_write_lock
3202+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
3203+ */
3204+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
3205+
dece6358
AM
3206+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
3207+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
3208+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
3209+
1facf9fc 3210+#endif /* __KERNEL__ */
3211+#endif /* __AUFS_BRANCH_H__ */
7f207e10
AM
3212diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
3213--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 3214+++ linux/fs/aufs/conf.mk 2012-02-13 21:54:56.966438287 +0100
2cbb1c4b 3215@@ -0,0 +1,38 @@
4a4d8108
AM
3216+
3217+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
3218+
3219+define AuConf
3220+ifdef ${1}
3221+AuConfStr += ${1}=${${1}}
3222+endif
3223+endef
3224+
b752ccd1 3225+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
e49829fe 3226+ SBILIST \
7f207e10 3227+ HNOTIFY HFSNOTIFY \
4a4d8108
AM
3228+ EXPORT INO_T_64 \
3229+ RDU \
2cbb1c4b 3230+ PROC_MAP \
4a4d8108
AM
3231+ SP_IATTR \
3232+ SHWH \
3233+ BR_RAMFS \
3234+ BR_FUSE POLL \
3235+ BR_HFSPLUS \
3236+ BDEV_LOOP \
b752ccd1
AM
3237+ DEBUG MAGIC_SYSRQ
3238+$(foreach i, ${AuConfAll}, \
4a4d8108
AM
3239+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
3240+
3241+AuConfName = ${obj}/conf.str
3242+${AuConfName}.tmp: FORCE
3243+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
3244+${AuConfName}: ${AuConfName}.tmp
3245+ @diff -q $< $@ > /dev/null 2>&1 || { \
3246+ echo ' GEN ' $@; \
3247+ cp -p $< $@; \
3248+ }
3249+FORCE:
3250+clean-files += ${AuConfName} ${AuConfName}.tmp
3251+${obj}/sysfs.o: ${AuConfName}
b752ccd1
AM
3252+
3253+-include ${srctree}/${src}/conf_priv.mk
7f207e10
AM
3254diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
3255--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
3256+++ linux/fs/aufs/cpup.c 2012-02-13 21:54:56.966438287 +0100
3257@@ -0,0 +1,1079 @@
1facf9fc 3258+/*
f6c5ef8b 3259+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 3260+ *
3261+ * This program, aufs is free software; you can redistribute it and/or modify
3262+ * it under the terms of the GNU General Public License as published by
3263+ * the Free Software Foundation; either version 2 of the License, or
3264+ * (at your option) any later version.
dece6358
AM
3265+ *
3266+ * This program is distributed in the hope that it will be useful,
3267+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3268+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3269+ * GNU General Public License for more details.
3270+ *
3271+ * You should have received a copy of the GNU General Public License
3272+ * along with this program; if not, write to the Free Software
3273+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 3274+ */
3275+
3276+/*
3277+ * copy-up functions, see wbr_policy.c for copy-down
3278+ */
3279+
3280+#include <linux/fs_stack.h>
dece6358 3281+#include <linux/mm.h>
1facf9fc 3282+#include "aufs.h"
3283+
3284+void au_cpup_attr_flags(struct inode *dst, struct inode *src)
3285+{
3286+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
3287+ | S_NOATIME | S_NOCMTIME;
3288+
3289+ dst->i_flags |= src->i_flags & ~mask;
3290+ if (au_test_fs_notime(dst->i_sb))
3291+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
3292+}
3293+
3294+void au_cpup_attr_timesizes(struct inode *inode)
3295+{
3296+ struct inode *h_inode;
3297+
3298+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3299+ fsstack_copy_attr_times(inode, h_inode);
4a4d8108 3300+ fsstack_copy_inode_size(inode, h_inode);
1facf9fc 3301+}
3302+
3303+void au_cpup_attr_nlink(struct inode *inode, int force)
3304+{
3305+ struct inode *h_inode;
3306+ struct super_block *sb;
3307+ aufs_bindex_t bindex, bend;
3308+
3309+ sb = inode->i_sb;
3310+ bindex = au_ibstart(inode);
3311+ h_inode = au_h_iptr(inode, bindex);
3312+ if (!force
3313+ && !S_ISDIR(h_inode->i_mode)
3314+ && au_opt_test(au_mntflags(sb), PLINK)
3315+ && au_plink_test(inode))
3316+ return;
3317+
9dbd164d 3318+ set_nlink(inode, h_inode->i_nlink);
1facf9fc 3319+
3320+ /*
3321+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
3322+ * it may includes whplink directory.
3323+ */
3324+ if (S_ISDIR(h_inode->i_mode)) {
3325+ bend = au_ibend(inode);
3326+ for (bindex++; bindex <= bend; bindex++) {
3327+ h_inode = au_h_iptr(inode, bindex);
3328+ if (h_inode)
3329+ au_add_nlink(inode, h_inode);
3330+ }
3331+ }
3332+}
3333+
3334+void au_cpup_attr_changeable(struct inode *inode)
3335+{
3336+ struct inode *h_inode;
3337+
3338+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3339+ inode->i_mode = h_inode->i_mode;
3340+ inode->i_uid = h_inode->i_uid;
3341+ inode->i_gid = h_inode->i_gid;
3342+ au_cpup_attr_timesizes(inode);
3343+ au_cpup_attr_flags(inode, h_inode);
3344+}
3345+
3346+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
3347+{
3348+ struct au_iinfo *iinfo = au_ii(inode);
3349+
1308ab2a 3350+ IiMustWriteLock(inode);
3351+
1facf9fc 3352+ iinfo->ii_higen = h_inode->i_generation;
3353+ iinfo->ii_hsb1 = h_inode->i_sb;
3354+}
3355+
3356+void au_cpup_attr_all(struct inode *inode, int force)
3357+{
3358+ struct inode *h_inode;
3359+
3360+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3361+ au_cpup_attr_changeable(inode);
3362+ if (inode->i_nlink > 0)
3363+ au_cpup_attr_nlink(inode, force);
3364+ inode->i_rdev = h_inode->i_rdev;
3365+ inode->i_blkbits = h_inode->i_blkbits;
3366+ au_cpup_igen(inode, h_inode);
3367+}
3368+
3369+/* ---------------------------------------------------------------------- */
3370+
3371+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
3372+
3373+/* keep the timestamps of the parent dir when cpup */
3374+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
3375+ struct path *h_path)
3376+{
3377+ struct inode *h_inode;
3378+
3379+ dt->dt_dentry = dentry;
3380+ dt->dt_h_path = *h_path;
3381+ h_inode = h_path->dentry->d_inode;
3382+ dt->dt_atime = h_inode->i_atime;
3383+ dt->dt_mtime = h_inode->i_mtime;
3384+ /* smp_mb(); */
3385+}
3386+
3387+void au_dtime_revert(struct au_dtime *dt)
3388+{
3389+ struct iattr attr;
3390+ int err;
3391+
3392+ attr.ia_atime = dt->dt_atime;
3393+ attr.ia_mtime = dt->dt_mtime;
3394+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
3395+ | ATTR_ATIME | ATTR_ATIME_SET;
3396+
3397+ err = vfsub_notify_change(&dt->dt_h_path, &attr);
3398+ if (unlikely(err))
4a4d8108 3399+ pr_warning("restoring timestamps failed(%d). ignored\n", err);
1facf9fc 3400+}
3401+
3402+/* ---------------------------------------------------------------------- */
3403+
3404+static noinline_for_stack
3405+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src)
3406+{
3407+ int err, sbits;
3408+ struct iattr ia;
3409+ struct path h_path;
1308ab2a 3410+ struct inode *h_isrc, *h_idst;
1facf9fc 3411+
3412+ h_path.dentry = au_h_dptr(dst, bindex);
1308ab2a 3413+ h_idst = h_path.dentry->d_inode;
1facf9fc 3414+ h_path.mnt = au_sbr_mnt(dst->d_sb, bindex);
3415+ h_isrc = h_src->d_inode;
1308ab2a 3416+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 3417+ | ATTR_ATIME | ATTR_MTIME
3418+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
1facf9fc 3419+ ia.ia_uid = h_isrc->i_uid;
3420+ ia.ia_gid = h_isrc->i_gid;
3421+ ia.ia_atime = h_isrc->i_atime;
3422+ ia.ia_mtime = h_isrc->i_mtime;
1308ab2a 3423+ if (h_idst->i_mode != h_isrc->i_mode
3424+ && !S_ISLNK(h_idst->i_mode)) {
3425+ ia.ia_valid |= ATTR_MODE;
3426+ ia.ia_mode = h_isrc->i_mode;
3427+ }
3428+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
3429+ au_cpup_attr_flags(h_idst, h_isrc);
1facf9fc 3430+ err = vfsub_notify_change(&h_path, &ia);
3431+
3432+ /* is this nfs only? */
3433+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
3434+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
3435+ ia.ia_mode = h_isrc->i_mode;
3436+ err = vfsub_notify_change(&h_path, &ia);
3437+ }
3438+
3439+ return err;
3440+}
3441+
3442+/* ---------------------------------------------------------------------- */
3443+
3444+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
3445+ char *buf, unsigned long blksize)
3446+{
3447+ int err;
3448+ size_t sz, rbytes, wbytes;
3449+ unsigned char all_zero;
3450+ char *p, *zp;
3451+ struct mutex *h_mtx;
3452+ /* reduce stack usage */
3453+ struct iattr *ia;
3454+
3455+ zp = page_address(ZERO_PAGE(0));
3456+ if (unlikely(!zp))
3457+ return -ENOMEM; /* possible? */
3458+
3459+ err = 0;
3460+ all_zero = 0;
3461+ while (len) {
3462+ AuDbg("len %lld\n", len);
3463+ sz = blksize;
3464+ if (len < blksize)
3465+ sz = len;
3466+
3467+ rbytes = 0;
3468+ /* todo: signal_pending? */
3469+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
3470+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
3471+ err = rbytes;
3472+ }
3473+ if (unlikely(err < 0))
3474+ break;
3475+
3476+ all_zero = 0;
3477+ if (len >= rbytes && rbytes == blksize)
3478+ all_zero = !memcmp(buf, zp, rbytes);
3479+ if (!all_zero) {
3480+ wbytes = rbytes;
3481+ p = buf;
3482+ while (wbytes) {
3483+ size_t b;
3484+
3485+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
3486+ err = b;
3487+ /* todo: signal_pending? */
3488+ if (unlikely(err == -EAGAIN || err == -EINTR))
3489+ continue;
3490+ if (unlikely(err < 0))
3491+ break;
3492+ wbytes -= b;
3493+ p += b;
3494+ }
3495+ } else {
3496+ loff_t res;
3497+
3498+ AuLabel(hole);
3499+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
3500+ err = res;
3501+ if (unlikely(res < 0))
3502+ break;
3503+ }
3504+ len -= rbytes;
3505+ err = 0;
3506+ }
3507+
3508+ /* the last block may be a hole */
3509+ if (!err && all_zero) {
3510+ AuLabel(last hole);
3511+
3512+ err = 1;
3513+ if (au_test_nfs(dst->f_dentry->d_sb)) {
3514+ /* nfs requires this step to make last hole */
3515+ /* is this only nfs? */
3516+ do {
3517+ /* todo: signal_pending? */
3518+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
3519+ } while (err == -EAGAIN || err == -EINTR);
3520+ if (err == 1)
3521+ dst->f_pos--;
3522+ }
3523+
3524+ if (err == 1) {
3525+ ia = (void *)buf;
3526+ ia->ia_size = dst->f_pos;
3527+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
3528+ ia->ia_file = dst;
3529+ h_mtx = &dst->f_dentry->d_inode->i_mutex;
3530+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
3531+ err = vfsub_notify_change(&dst->f_path, ia);
3532+ mutex_unlock(h_mtx);
3533+ }
3534+ }
3535+
3536+ return err;
3537+}
3538+
3539+int au_copy_file(struct file *dst, struct file *src, loff_t len)
3540+{
3541+ int err;
3542+ unsigned long blksize;
3543+ unsigned char do_kfree;
3544+ char *buf;
3545+
3546+ err = -ENOMEM;
3547+ blksize = dst->f_dentry->d_sb->s_blocksize;
3548+ if (!blksize || PAGE_SIZE < blksize)
3549+ blksize = PAGE_SIZE;
3550+ AuDbg("blksize %lu\n", blksize);
3551+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
3552+ if (do_kfree)
3553+ buf = kmalloc(blksize, GFP_NOFS);
3554+ else
3555+ buf = (void *)__get_free_page(GFP_NOFS);
3556+ if (unlikely(!buf))
3557+ goto out;
3558+
3559+ if (len > (1 << 22))
3560+ AuDbg("copying a large file %lld\n", (long long)len);
3561+
3562+ src->f_pos = 0;
3563+ dst->f_pos = 0;
3564+ err = au_do_copy_file(dst, src, len, buf, blksize);
3565+ if (do_kfree)
3566+ kfree(buf);
3567+ else
3568+ free_page((unsigned long)buf);
3569+
4f0767ce 3570+out:
1facf9fc 3571+ return err;
3572+}
3573+
3574+/*
3575+ * to support a sparse file which is opened with O_APPEND,
3576+ * we need to close the file.
3577+ */
3578+static int au_cp_regular(struct dentry *dentry, aufs_bindex_t bdst,
4a4d8108 3579+ aufs_bindex_t bsrc, loff_t len)
1facf9fc 3580+{
3581+ int err, i;
3582+ enum { SRC, DST };
3583+ struct {
3584+ aufs_bindex_t bindex;
3585+ unsigned int flags;
3586+ struct dentry *dentry;
3587+ struct file *file;
3588+ void *label, *label_file;
3589+ } *f, file[] = {
3590+ {
3591+ .bindex = bsrc,
3592+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
3593+ .file = NULL,
3594+ .label = &&out,
3595+ .label_file = &&out_src
3596+ },
3597+ {
3598+ .bindex = bdst,
3599+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
3600+ .file = NULL,
3601+ .label = &&out_src,
3602+ .label_file = &&out_dst
3603+ }
3604+ };
3605+ struct super_block *sb;
3606+
3607+ /* bsrc branch can be ro/rw. */
3608+ sb = dentry->d_sb;
3609+ f = file;
3610+ for (i = 0; i < 2; i++, f++) {
3611+ f->dentry = au_h_dptr(dentry, f->bindex);
3612+ f->file = au_h_open(dentry, f->bindex, f->flags, /*file*/NULL);
3613+ err = PTR_ERR(f->file);
3614+ if (IS_ERR(f->file))
3615+ goto *f->label;
3616+ err = -EINVAL;
3617+ if (unlikely(!f->file->f_op))
3618+ goto *f->label_file;
3619+ }
3620+
3621+ /* try stopping to update while we copyup */
3622+ IMustLock(file[SRC].dentry->d_inode);
3623+ err = au_copy_file(file[DST].file, file[SRC].file, len);
3624+
4f0767ce 3625+out_dst:
1facf9fc 3626+ fput(file[DST].file);
3627+ au_sbr_put(sb, file[DST].bindex);
4f0767ce 3628+out_src:
1facf9fc 3629+ fput(file[SRC].file);
3630+ au_sbr_put(sb, file[SRC].bindex);
4f0767ce 3631+out:
1facf9fc 3632+ return err;
3633+}
3634+
3635+static int au_do_cpup_regular(struct dentry *dentry, aufs_bindex_t bdst,
3636+ aufs_bindex_t bsrc, loff_t len,
3637+ struct inode *h_dir, struct path *h_path)
3638+{
3639+ int err, rerr;
3640+ loff_t l;
3641+
3642+ err = 0;
3643+ l = i_size_read(au_h_iptr(dentry->d_inode, bsrc));
3644+ if (len == -1 || l < len)
3645+ len = l;
3646+ if (len)
3647+ err = au_cp_regular(dentry, bdst, bsrc, len);
3648+ if (!err)
3649+ goto out; /* success */
3650+
3651+ rerr = vfsub_unlink(h_dir, h_path, /*force*/0);
3652+ if (rerr) {
3653+ AuIOErr("failed unlinking cpup-ed %.*s(%d, %d)\n",
3654+ AuDLNPair(h_path->dentry), err, rerr);
3655+ err = -EIO;
3656+ }
3657+
4f0767ce 3658+out:
1facf9fc 3659+ return err;
3660+}
3661+
3662+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
3663+ struct inode *h_dir)
3664+{
3665+ int err, symlen;
3666+ mm_segment_t old_fs;
b752ccd1
AM
3667+ union {
3668+ char *k;
3669+ char __user *u;
3670+ } sym;
1facf9fc 3671+
3672+ err = -ENOSYS;
3673+ if (unlikely(!h_src->d_inode->i_op->readlink))
3674+ goto out;
3675+
3676+ err = -ENOMEM;
b752ccd1
AM
3677+ sym.k = __getname_gfp(GFP_NOFS);
3678+ if (unlikely(!sym.k))
1facf9fc 3679+ goto out;
3680+
9dbd164d 3681+ /* unnecessary to support mmap_sem since symlink is not mmap-able */
1facf9fc 3682+ old_fs = get_fs();
3683+ set_fs(KERNEL_DS);
b752ccd1 3684+ symlen = h_src->d_inode->i_op->readlink(h_src, sym.u, PATH_MAX);
1facf9fc 3685+ err = symlen;
3686+ set_fs(old_fs);
3687+
3688+ if (symlen > 0) {
b752ccd1
AM
3689+ sym.k[symlen] = 0;
3690+ err = vfsub_symlink(h_dir, h_path, sym.k);
1facf9fc 3691+ }
b752ccd1 3692+ __putname(sym.k);
1facf9fc 3693+
4f0767ce 3694+out:
1facf9fc 3695+ return err;
3696+}
3697+
3698+/* return with the lower dst inode is locked */
3699+static noinline_for_stack
3700+int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst,
3701+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3702+ struct dentry *dst_parent)
3703+{
3704+ int err;
3705+ umode_t mode;
3706+ unsigned int mnt_flags;
3707+ unsigned char isdir;
3708+ const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME);
3709+ struct au_dtime dt;
3710+ struct path h_path;
3711+ struct dentry *h_src, *h_dst, *h_parent;
3712+ struct inode *h_inode, *h_dir;
3713+ struct super_block *sb;
3714+
3715+ /* bsrc branch can be ro/rw. */
3716+ h_src = au_h_dptr(dentry, bsrc);
3717+ h_inode = h_src->d_inode;
3718+ AuDebugOn(h_inode != au_h_iptr(dentry->d_inode, bsrc));
3719+
3720+ /* try stopping to be referenced while we are creating */
3721+ h_dst = au_h_dptr(dentry, bdst);
3722+ h_parent = h_dst->d_parent; /* dir inode is locked */
3723+ h_dir = h_parent->d_inode;
3724+ IMustLock(h_dir);
3725+ AuDebugOn(h_parent != h_dst->d_parent);
3726+
3727+ sb = dentry->d_sb;
3728+ h_path.mnt = au_sbr_mnt(sb, bdst);
3729+ if (do_dt) {
3730+ h_path.dentry = h_parent;
3731+ au_dtime_store(&dt, dst_parent, &h_path);
3732+ }
3733+ h_path.dentry = h_dst;
3734+
3735+ isdir = 0;
3736+ mode = h_inode->i_mode;
3737+ switch (mode & S_IFMT) {
3738+ case S_IFREG:
3739+ /* try stopping to update while we are referencing */
3740+ IMustLock(h_inode);
3741+ err = vfsub_create(h_dir, &h_path, mode | S_IWUSR);
3742+ if (!err)
3743+ err = au_do_cpup_regular
3744+ (dentry, bdst, bsrc, len,
3745+ au_h_iptr(dst_parent->d_inode, bdst), &h_path);
3746+ break;
3747+ case S_IFDIR:
3748+ isdir = 1;
3749+ err = vfsub_mkdir(h_dir, &h_path, mode);
3750+ if (!err) {
3751+ /*
3752+ * strange behaviour from the users view,
3753+ * particularry setattr case
3754+ */
3755+ if (au_ibstart(dst_parent->d_inode) == bdst)
3756+ au_cpup_attr_nlink(dst_parent->d_inode,
3757+ /*force*/1);
3758+ au_cpup_attr_nlink(dentry->d_inode, /*force*/1);
3759+ }
3760+ break;
3761+ case S_IFLNK:
3762+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
3763+ break;
3764+ case S_IFCHR:
3765+ case S_IFBLK:
3766+ AuDebugOn(!capable(CAP_MKNOD));
3767+ /*FALLTHROUGH*/
3768+ case S_IFIFO:
3769+ case S_IFSOCK:
3770+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
3771+ break;
3772+ default:
3773+ AuIOErr("Unknown inode type 0%o\n", mode);
3774+ err = -EIO;
3775+ }
3776+
3777+ mnt_flags = au_mntflags(sb);
3778+ if (!au_opt_test(mnt_flags, UDBA_NONE)
3779+ && !isdir
3780+ && au_opt_test(mnt_flags, XINO)
3781+ && h_inode->i_nlink == 1
3782+ /* todo: unnecessary? */
3783+ /* && dentry->d_inode->i_nlink == 1 */
3784+ && bdst < bsrc
3785+ && !au_ftest_cpup(flags, KEEPLINO))
1308ab2a 3786+ au_xino_write(sb, bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 3787+ /* ignore this error */
3788+
3789+ if (do_dt)
3790+ au_dtime_revert(&dt);
3791+ return err;
3792+}
3793+
3794+/*
3795+ * copyup the @dentry from @bsrc to @bdst.
3796+ * the caller must set the both of lower dentries.
3797+ * @len is for truncating when it is -1 copyup the entire file.
3798+ * in link/rename cases, @dst_parent may be different from the real one.
3799+ */
3800+static int au_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3801+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3802+ struct dentry *dst_parent)
3803+{
3804+ int err, rerr;
3805+ aufs_bindex_t old_ibstart;
3806+ unsigned char isdir, plink;
3807+ struct au_dtime dt;
3808+ struct path h_path;
3809+ struct dentry *h_src, *h_dst, *h_parent;
3810+ struct inode *dst_inode, *h_dir, *inode;
3811+ struct super_block *sb;
3812+
3813+ AuDebugOn(bsrc <= bdst);
3814+
3815+ sb = dentry->d_sb;
3816+ h_path.mnt = au_sbr_mnt(sb, bdst);
3817+ h_dst = au_h_dptr(dentry, bdst);
3818+ h_parent = h_dst->d_parent; /* dir inode is locked */
3819+ h_dir = h_parent->d_inode;
3820+ IMustLock(h_dir);
3821+
3822+ h_src = au_h_dptr(dentry, bsrc);
3823+ inode = dentry->d_inode;
3824+
3825+ if (!dst_parent)
3826+ dst_parent = dget_parent(dentry);
3827+ else
3828+ dget(dst_parent);
3829+
3830+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
3831+ dst_inode = au_h_iptr(inode, bdst);
3832+ if (dst_inode) {
3833+ if (unlikely(!plink)) {
3834+ err = -EIO;
027c5e7a
AM
3835+ AuIOErr("hi%lu(i%lu) exists on b%d "
3836+ "but plink is disabled\n",
3837+ dst_inode->i_ino, inode->i_ino, bdst);
1facf9fc 3838+ goto out;
3839+ }
3840+
3841+ if (dst_inode->i_nlink) {
3842+ const int do_dt = au_ftest_cpup(flags, DTIME);
3843+
3844+ h_src = au_plink_lkup(inode, bdst);
3845+ err = PTR_ERR(h_src);
3846+ if (IS_ERR(h_src))
3847+ goto out;
3848+ if (unlikely(!h_src->d_inode)) {
3849+ err = -EIO;
3850+ AuIOErr("i%lu exists on a upper branch "
027c5e7a
AM
3851+ "but not pseudo-linked\n",
3852+ inode->i_ino);
1facf9fc 3853+ dput(h_src);
3854+ goto out;
3855+ }
3856+
3857+ if (do_dt) {
3858+ h_path.dentry = h_parent;
3859+ au_dtime_store(&dt, dst_parent, &h_path);
3860+ }
3861+ h_path.dentry = h_dst;
3862+ err = vfsub_link(h_src, h_dir, &h_path);
3863+ if (do_dt)
3864+ au_dtime_revert(&dt);
3865+ dput(h_src);
3866+ goto out;
3867+ } else
3868+ /* todo: cpup_wh_file? */
3869+ /* udba work */
4a4d8108 3870+ au_update_ibrange(inode, /*do_put_zero*/1);
1facf9fc 3871+ }
3872+
3873+ old_ibstart = au_ibstart(inode);
3874+ err = cpup_entry(dentry, bdst, bsrc, len, flags, dst_parent);
3875+ if (unlikely(err))
3876+ goto out;
3877+ dst_inode = h_dst->d_inode;
3878+ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
3879+
3880+ err = cpup_iattr(dentry, bdst, h_src);
3881+ isdir = S_ISDIR(dst_inode->i_mode);
3882+ if (!err) {
4a4d8108
AM
3883+ if (bdst < old_ibstart) {
3884+ if (S_ISREG(inode->i_mode)) {
3885+ err = au_dy_iaop(inode, bdst, dst_inode);
3886+ if (unlikely(err))
3887+ goto out_rev;
3888+ }
1facf9fc 3889+ au_set_ibstart(inode, bdst);
4a4d8108 3890+ }
1facf9fc 3891+ au_set_h_iptr(inode, bdst, au_igrab(dst_inode),
3892+ au_hi_flags(inode, isdir));
3893+ mutex_unlock(&dst_inode->i_mutex);
3894+ if (!isdir
3895+ && h_src->d_inode->i_nlink > 1
3896+ && plink)
3897+ au_plink_append(inode, bdst, h_dst);
3898+ goto out; /* success */
3899+ }
3900+
3901+ /* revert */
4a4d8108 3902+out_rev:
1facf9fc 3903+ h_path.dentry = h_parent;
3904+ mutex_unlock(&dst_inode->i_mutex);
3905+ au_dtime_store(&dt, dst_parent, &h_path);
3906+ h_path.dentry = h_dst;
3907+ if (!isdir)
3908+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
3909+ else
3910+ rerr = vfsub_rmdir(h_dir, &h_path);
3911+ au_dtime_revert(&dt);
3912+ if (rerr) {
3913+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
3914+ err = -EIO;
3915+ }
3916+
4f0767ce 3917+out:
1facf9fc 3918+ dput(dst_parent);
3919+ return err;
3920+}
3921+
3922+struct au_cpup_single_args {
3923+ int *errp;
3924+ struct dentry *dentry;
3925+ aufs_bindex_t bdst, bsrc;
3926+ loff_t len;
3927+ unsigned int flags;
3928+ struct dentry *dst_parent;
3929+};
3930+
3931+static void au_call_cpup_single(void *args)
3932+{
3933+ struct au_cpup_single_args *a = args;
3934+ *a->errp = au_cpup_single(a->dentry, a->bdst, a->bsrc, a->len,
3935+ a->flags, a->dst_parent);
3936+}
3937+
53392da6
AM
3938+/*
3939+ * prevent SIGXFSZ in copy-up.
3940+ * testing CAP_MKNOD is for generic fs,
3941+ * but CAP_FSETID is for xfs only, currently.
3942+ */
3943+static int au_cpup_sio_test(struct super_block *sb, umode_t mode)
3944+{
3945+ int do_sio;
3946+
3947+ do_sio = 0;
3948+ if (!au_wkq_test()
3949+ && (!au_sbi(sb)->si_plink_maint_pid
3950+ || au_plink_maint(sb, AuLock_NOPLM))) {
3951+ switch (mode & S_IFMT) {
3952+ case S_IFREG:
3953+ /* no condition about RLIMIT_FSIZE and the file size */
3954+ do_sio = 1;
3955+ break;
3956+ case S_IFCHR:
3957+ case S_IFBLK:
3958+ do_sio = !capable(CAP_MKNOD);
3959+ break;
3960+ }
3961+ if (!do_sio)
3962+ do_sio = ((mode & (S_ISUID | S_ISGID))
3963+ && !capable(CAP_FSETID));
3964+ }
3965+
3966+ return do_sio;
3967+}
3968+
1facf9fc 3969+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3970+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3971+ struct dentry *dst_parent)
3972+{
3973+ int err, wkq_err;
1facf9fc 3974+ struct dentry *h_dentry;
3975+
3976+ h_dentry = au_h_dptr(dentry, bsrc);
53392da6 3977+ if (!au_cpup_sio_test(dentry->d_sb, h_dentry->d_inode->i_mode))
1facf9fc 3978+ err = au_cpup_single(dentry, bdst, bsrc, len, flags,
3979+ dst_parent);
3980+ else {
3981+ struct au_cpup_single_args args = {
3982+ .errp = &err,
3983+ .dentry = dentry,
3984+ .bdst = bdst,
3985+ .bsrc = bsrc,
3986+ .len = len,
3987+ .flags = flags,
3988+ .dst_parent = dst_parent
3989+ };
3990+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
3991+ if (unlikely(wkq_err))
3992+ err = wkq_err;
3993+ }
3994+
3995+ return err;
3996+}
3997+
3998+/*
3999+ * copyup the @dentry from the first active lower branch to @bdst,
4000+ * using au_cpup_single().
4001+ */
4002+static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4003+ unsigned int flags)
4004+{
4005+ int err;
4006+ aufs_bindex_t bsrc, bend;
4007+
4008+ bend = au_dbend(dentry);
4009+ for (bsrc = bdst + 1; bsrc <= bend; bsrc++)
4010+ if (au_h_dptr(dentry, bsrc))
4011+ break;
4012+
4013+ err = au_lkup_neg(dentry, bdst);
4014+ if (!err) {
4015+ err = au_cpup_single(dentry, bdst, bsrc, len, flags, NULL);
4016+ if (!err)
4017+ return 0; /* success */
4018+
4019+ /* revert */
4020+ au_set_h_dptr(dentry, bdst, NULL);
4021+ au_set_dbstart(dentry, bsrc);
4022+ }
4023+
4024+ return err;
4025+}
4026+
4027+struct au_cpup_simple_args {
4028+ int *errp;
4029+ struct dentry *dentry;
4030+ aufs_bindex_t bdst;
4031+ loff_t len;
4032+ unsigned int flags;
4033+};
4034+
4035+static void au_call_cpup_simple(void *args)
4036+{
4037+ struct au_cpup_simple_args *a = args;
4038+ *a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags);
4039+}
4040+
4041+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4042+ unsigned int flags)
4043+{
4044+ int err, wkq_err;
1facf9fc 4045+ struct dentry *parent;
4046+ struct inode *h_dir;
4047+
4048+ parent = dget_parent(dentry);
4049+ h_dir = au_h_iptr(parent->d_inode, bdst);
53392da6
AM
4050+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
4051+ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
1facf9fc 4052+ err = au_cpup_simple(dentry, bdst, len, flags);
4053+ else {
4054+ struct au_cpup_simple_args args = {
4055+ .errp = &err,
4056+ .dentry = dentry,
4057+ .bdst = bdst,
4058+ .len = len,
4059+ .flags = flags
4060+ };
4061+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
4062+ if (unlikely(wkq_err))
4063+ err = wkq_err;
4064+ }
4065+
4066+ dput(parent);
4067+ return err;
4068+}
4069+
4070+/* ---------------------------------------------------------------------- */
4071+
4072+/*
4073+ * copyup the deleted file for writing.
4074+ */
4075+static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst,
4076+ struct dentry *wh_dentry, struct file *file,
4077+ loff_t len)
4078+{
4079+ int err;
4080+ aufs_bindex_t bstart;
4081+ struct au_dinfo *dinfo;
4082+ struct dentry *h_d_dst, *h_d_start;
4a4d8108 4083+ struct au_hdentry *hdp;
1facf9fc 4084+
4085+ dinfo = au_di(dentry);
1308ab2a 4086+ AuRwMustWriteLock(&dinfo->di_rwsem);
4087+
1facf9fc 4088+ bstart = dinfo->di_bstart;
4a4d8108
AM
4089+ hdp = dinfo->di_hdentry;
4090+ h_d_dst = hdp[0 + bdst].hd_dentry;
1facf9fc 4091+ dinfo->di_bstart = bdst;
4a4d8108 4092+ hdp[0 + bdst].hd_dentry = wh_dentry;
027c5e7a
AM
4093+ if (file) {
4094+ h_d_start = hdp[0 + bstart].hd_dentry;
4a4d8108 4095+ hdp[0 + bstart].hd_dentry = au_hf_top(file)->f_dentry;
027c5e7a 4096+ }
1facf9fc 4097+ err = au_cpup_single(dentry, bdst, bstart, len, !AuCpup_DTIME,
4098+ /*h_parent*/NULL);
027c5e7a
AM
4099+ if (file) {
4100+ if (!err)
4101+ err = au_reopen_nondir(file);
4a4d8108 4102+ hdp[0 + bstart].hd_dentry = h_d_start;
1facf9fc 4103+ }
4a4d8108 4104+ hdp[0 + bdst].hd_dentry = h_d_dst;
1facf9fc 4105+ dinfo->di_bstart = bstart;
4106+
4107+ return err;
4108+}
4109+
4110+static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4111+ struct file *file)
4112+{
4113+ int err;
4114+ struct au_dtime dt;
4115+ struct dentry *parent, *h_parent, *wh_dentry;
4116+ struct au_branch *br;
4117+ struct path h_path;
4118+
4119+ br = au_sbr(dentry->d_sb, bdst);
4120+ parent = dget_parent(dentry);
4121+ h_parent = au_h_dptr(parent, bdst);
4122+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
4123+ err = PTR_ERR(wh_dentry);
4124+ if (IS_ERR(wh_dentry))
4125+ goto out;
4126+
4127+ h_path.dentry = h_parent;
4128+ h_path.mnt = br->br_mnt;
4129+ au_dtime_store(&dt, parent, &h_path);
4130+ err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len);
4131+ if (unlikely(err))
4132+ goto out_wh;
4133+
4134+ dget(wh_dentry);
4135+ h_path.dentry = wh_dentry;
4a4d8108
AM
4136+ if (!S_ISDIR(wh_dentry->d_inode->i_mode))
4137+ err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0);
4138+ else
4139+ err = vfsub_rmdir(h_parent->d_inode, &h_path);
1facf9fc 4140+ if (unlikely(err)) {
4141+ AuIOErr("failed remove copied-up tmp file %.*s(%d)\n",
4142+ AuDLNPair(wh_dentry), err);
4143+ err = -EIO;
4144+ }
4145+ au_dtime_revert(&dt);
4146+ au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
4147+
4f0767ce 4148+out_wh:
1facf9fc 4149+ dput(wh_dentry);
4f0767ce 4150+out:
1facf9fc 4151+ dput(parent);
4152+ return err;
4153+}
4154+
4155+struct au_cpup_wh_args {
4156+ int *errp;
4157+ struct dentry *dentry;
4158+ aufs_bindex_t bdst;
4159+ loff_t len;
4160+ struct file *file;
4161+};
4162+
4163+static void au_call_cpup_wh(void *args)
4164+{
4165+ struct au_cpup_wh_args *a = args;
4166+ *a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file);
4167+}
4168+
4169+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4170+ struct file *file)
4171+{
4172+ int err, wkq_err;
4173+ struct dentry *parent, *h_orph, *h_parent, *h_dentry;
4174+ struct inode *dir, *h_dir, *h_tmpdir, *h_inode;
4175+ struct au_wbr *wbr;
4176+
4177+ parent = dget_parent(dentry);
4178+ dir = parent->d_inode;
4179+ h_orph = NULL;
4180+ h_parent = NULL;
4181+ h_dir = au_igrab(au_h_iptr(dir, bdst));
4182+ h_tmpdir = h_dir;
4183+ if (!h_dir->i_nlink) {
4184+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
4185+ h_orph = wbr->wbr_orph;
4186+
4187+ h_parent = dget(au_h_dptr(parent, bdst));
1facf9fc 4188+ au_set_h_dptr(parent, bdst, dget(h_orph));
4189+ h_tmpdir = h_orph->d_inode;
1facf9fc 4190+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
4191+
4192+ /* this temporary unlock is safe */
4193+ if (file)
4a4d8108 4194+ h_dentry = au_hf_top(file)->f_dentry;
1facf9fc 4195+ else
4196+ h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
4197+ h_inode = h_dentry->d_inode;
4198+ IMustLock(h_inode);
4199+ mutex_unlock(&h_inode->i_mutex);
dece6358 4200+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
1facf9fc 4201+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
4a4d8108 4202+ /* todo: au_h_open_pre()? */
1facf9fc 4203+ }
4204+
53392da6
AM
4205+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
4206+ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
1facf9fc 4207+ err = au_cpup_wh(dentry, bdst, len, file);
4208+ else {
4209+ struct au_cpup_wh_args args = {
4210+ .errp = &err,
4211+ .dentry = dentry,
4212+ .bdst = bdst,
4213+ .len = len,
4214+ .file = file
4215+ };
4216+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
4217+ if (unlikely(wkq_err))
4218+ err = wkq_err;
4219+ }
4220+
4221+ if (h_orph) {
4222+ mutex_unlock(&h_tmpdir->i_mutex);
4a4d8108 4223+ /* todo: au_h_open_post()? */
1facf9fc 4224+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1facf9fc 4225+ au_set_h_dptr(parent, bdst, h_parent);
4226+ }
4227+ iput(h_dir);
4228+ dput(parent);
4229+
4230+ return err;
4231+}
4232+
4233+/* ---------------------------------------------------------------------- */
4234+
4235+/*
4236+ * generic routine for both of copy-up and copy-down.
4237+ */
4238+/* cf. revalidate function in file.c */
4239+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
4240+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
4241+ struct dentry *h_parent, void *arg),
4242+ void *arg)
4243+{
4244+ int err;
4245+ struct au_pin pin;
4246+ struct dentry *d, *parent, *h_parent, *real_parent;
4247+
4248+ err = 0;
4249+ parent = dget_parent(dentry);
4250+ if (IS_ROOT(parent))
4251+ goto out;
4252+
4253+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
4254+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
4255+
4256+ /* do not use au_dpage */
4257+ real_parent = parent;
4258+ while (1) {
4259+ dput(parent);
4260+ parent = dget_parent(dentry);
4261+ h_parent = au_h_dptr(parent, bdst);
4262+ if (h_parent)
4263+ goto out; /* success */
4264+
4265+ /* find top dir which is necessary to cpup */
4266+ do {
4267+ d = parent;
4268+ dput(parent);
4269+ parent = dget_parent(d);
4270+ di_read_lock_parent3(parent, !AuLock_IR);
4271+ h_parent = au_h_dptr(parent, bdst);
4272+ di_read_unlock(parent, !AuLock_IR);
4273+ } while (!h_parent);
4274+
4275+ if (d != real_parent)
4276+ di_write_lock_child3(d);
4277+
4278+ /* somebody else might create while we were sleeping */
4279+ if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
4280+ if (au_h_dptr(d, bdst))
4281+ au_update_dbstart(d);
4282+
4283+ au_pin_set_dentry(&pin, d);
4284+ err = au_do_pin(&pin);
4285+ if (!err) {
4286+ err = cp(d, bdst, h_parent, arg);
4287+ au_unpin(&pin);
4288+ }
4289+ }
4290+
4291+ if (d != real_parent)
4292+ di_write_unlock(d);
4293+ if (unlikely(err))
4294+ break;
4295+ }
4296+
4f0767ce 4297+out:
1facf9fc 4298+ dput(parent);
4299+ return err;
4300+}
4301+
4302+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
4303+ struct dentry *h_parent __maybe_unused ,
4304+ void *arg __maybe_unused)
4305+{
4306+ return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME);
4307+}
4308+
4309+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
4310+{
4311+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
4312+}
4313+
4314+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
4315+{
4316+ int err;
4317+ struct dentry *parent;
4318+ struct inode *dir;
4319+
4320+ parent = dget_parent(dentry);
4321+ dir = parent->d_inode;
4322+ err = 0;
4323+ if (au_h_iptr(dir, bdst))
4324+ goto out;
4325+
4326+ di_read_unlock(parent, AuLock_IR);
4327+ di_write_lock_parent(parent);
4328+ /* someone else might change our inode while we were sleeping */
4329+ if (!au_h_iptr(dir, bdst))
4330+ err = au_cpup_dirs(dentry, bdst);
4331+ di_downgrade_lock(parent, AuLock_IR);
4332+
4f0767ce 4333+out:
1facf9fc 4334+ dput(parent);
4335+ return err;
4336+}
7f207e10
AM
4337diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
4338--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
4339+++ linux/fs/aufs/cpup.h 2012-02-13 21:54:56.966438287 +0100
4340@@ -0,0 +1,81 @@
1facf9fc 4341+/*
f6c5ef8b 4342+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 4343+ *
4344+ * This program, aufs is free software; you can redistribute it and/or modify
4345+ * it under the terms of the GNU General Public License as published by
4346+ * the Free Software Foundation; either version 2 of the License, or
4347+ * (at your option) any later version.
dece6358
AM
4348+ *
4349+ * This program is distributed in the hope that it will be useful,
4350+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4351+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4352+ * GNU General Public License for more details.
4353+ *
4354+ * You should have received a copy of the GNU General Public License
4355+ * along with this program; if not, write to the Free Software
4356+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4357+ */
4358+
4359+/*
4360+ * copy-up/down functions
4361+ */
4362+
4363+#ifndef __AUFS_CPUP_H__
4364+#define __AUFS_CPUP_H__
4365+
4366+#ifdef __KERNEL__
4367+
dece6358 4368+#include <linux/path.h>
1facf9fc 4369+
dece6358
AM
4370+struct inode;
4371+struct file;
4372+
1facf9fc 4373+void au_cpup_attr_flags(struct inode *dst, struct inode *src);
4374+void au_cpup_attr_timesizes(struct inode *inode);
4375+void au_cpup_attr_nlink(struct inode *inode, int force);
4376+void au_cpup_attr_changeable(struct inode *inode);
4377+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
4378+void au_cpup_attr_all(struct inode *inode, int force);
4379+
4380+/* ---------------------------------------------------------------------- */
4381+
4382+/* cpup flags */
4383+#define AuCpup_DTIME 1 /* do dtime_store/revert */
4384+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
4385+ for link(2) */
4386+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
7f207e10
AM
4387+#define au_fset_cpup(flags, name) \
4388+ do { (flags) |= AuCpup_##name; } while (0)
4389+#define au_fclr_cpup(flags, name) \
4390+ do { (flags) &= ~AuCpup_##name; } while (0)
1facf9fc 4391+
4392+int au_copy_file(struct file *dst, struct file *src, loff_t len);
4393+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
4394+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
4395+ struct dentry *dst_parent);
4396+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4397+ unsigned int flags);
4398+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4399+ struct file *file);
4400+
4401+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
4402+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
4403+ struct dentry *h_parent, void *arg),
4404+ void *arg);
4405+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
4406+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
4407+
4408+/* ---------------------------------------------------------------------- */
4409+
4410+/* keep timestamps when copyup */
4411+struct au_dtime {
4412+ struct dentry *dt_dentry;
4413+ struct path dt_h_path;
4414+ struct timespec dt_atime, dt_mtime;
4415+};
4416+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4417+ struct path *h_path);
4418+void au_dtime_revert(struct au_dtime *dt);
4419+
4420+#endif /* __KERNEL__ */
4421+#endif /* __AUFS_CPUP_H__ */
7f207e10
AM
4422diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
4423--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 4424+++ linux/fs/aufs/dbgaufs.c 2012-02-13 21:54:56.966438287 +0100
4a4d8108 4425@@ -0,0 +1,334 @@
1facf9fc 4426+/*
f6c5ef8b 4427+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 4428+ *
4429+ * This program, aufs is free software; you can redistribute it and/or modify
4430+ * it under the terms of the GNU General Public License as published by
4431+ * the Free Software Foundation; either version 2 of the License, or
4432+ * (at your option) any later version.
dece6358
AM
4433+ *
4434+ * This program is distributed in the hope that it will be useful,
4435+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4436+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4437+ * GNU General Public License for more details.
4438+ *
4439+ * You should have received a copy of the GNU General Public License
4440+ * along with this program; if not, write to the Free Software
4441+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4442+ */
4443+
4444+/*
4445+ * debugfs interface
4446+ */
4447+
4448+#include <linux/debugfs.h>
4449+#include "aufs.h"
4450+
4451+#ifndef CONFIG_SYSFS
4452+#error DEBUG_FS depends upon SYSFS
4453+#endif
4454+
4455+static struct dentry *dbgaufs;
4456+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
4457+
4458+/* 20 is max digits length of ulong 64 */
4459+struct dbgaufs_arg {
4460+ int n;
4461+ char a[20 * 4];
4462+};
4463+
4464+/*
4465+ * common function for all XINO files
4466+ */
4467+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
4468+ struct file *file)
4469+{
4470+ kfree(file->private_data);
4471+ return 0;
4472+}
4473+
4474+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
4475+{
4476+ int err;
4477+ struct kstat st;
4478+ struct dbgaufs_arg *p;
4479+
4480+ err = -ENOMEM;
4481+ p = kmalloc(sizeof(*p), GFP_NOFS);
4482+ if (unlikely(!p))
4483+ goto out;
4484+
4485+ err = 0;
4486+ p->n = 0;
4487+ file->private_data = p;
4488+ if (!xf)
4489+ goto out;
4490+
4491+ err = vfs_getattr(xf->f_vfsmnt, xf->f_dentry, &st);
4492+ if (!err) {
4493+ if (do_fcnt)
4494+ p->n = snprintf
4495+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
4496+ (long)file_count(xf), st.blocks, st.blksize,
4497+ (long long)st.size);
4498+ else
4499+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
4500+ st.blocks, st.blksize,
4501+ (long long)st.size);
4502+ AuDebugOn(p->n >= sizeof(p->a));
4503+ } else {
4504+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
4505+ err = 0;
4506+ }
4507+
4f0767ce 4508+out:
1facf9fc 4509+ return err;
4510+
4511+}
4512+
4513+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
4514+ size_t count, loff_t *ppos)
4515+{
4516+ struct dbgaufs_arg *p;
4517+
4518+ p = file->private_data;
4519+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
4520+}
4521+
4522+/* ---------------------------------------------------------------------- */
4523+
4524+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
4525+{
4526+ int err;
4527+ struct au_sbinfo *sbinfo;
4528+ struct super_block *sb;
4529+
4530+ sbinfo = inode->i_private;
4531+ sb = sbinfo->si_sb;
4532+ si_noflush_read_lock(sb);
4533+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
4534+ si_read_unlock(sb);
4535+ return err;
4536+}
4537+
4538+static const struct file_operations dbgaufs_xib_fop = {
4a4d8108 4539+ .owner = THIS_MODULE,
1facf9fc 4540+ .open = dbgaufs_xib_open,
4541+ .release = dbgaufs_xi_release,
4542+ .read = dbgaufs_xi_read
4543+};
4544+
4545+/* ---------------------------------------------------------------------- */
4546+
4547+#define DbgaufsXi_PREFIX "xi"
4548+
4549+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
4550+{
4551+ int err;
4552+ long l;
4553+ struct au_sbinfo *sbinfo;
4554+ struct super_block *sb;
4555+ struct file *xf;
4556+ struct qstr *name;
4557+
4558+ err = -ENOENT;
4559+ xf = NULL;
4560+ name = &file->f_dentry->d_name;
4561+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
4562+ || memcmp(name->name, DbgaufsXi_PREFIX,
4563+ sizeof(DbgaufsXi_PREFIX) - 1)))
4564+ goto out;
9dbd164d 4565+ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
1facf9fc 4566+ if (unlikely(err))
4567+ goto out;
4568+
4569+ sbinfo = inode->i_private;
4570+ sb = sbinfo->si_sb;
4571+ si_noflush_read_lock(sb);
4572+ if (l <= au_sbend(sb)) {
4573+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
4574+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
4575+ } else
4576+ err = -ENOENT;
4577+ si_read_unlock(sb);
4578+
4f0767ce 4579+out:
1facf9fc 4580+ return err;
4581+}
4582+
4583+static const struct file_operations dbgaufs_xino_fop = {
4a4d8108 4584+ .owner = THIS_MODULE,
1facf9fc 4585+ .open = dbgaufs_xino_open,
4586+ .release = dbgaufs_xi_release,
4587+ .read = dbgaufs_xi_read
4588+};
4589+
4590+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
4591+{
4592+ aufs_bindex_t bend;
4593+ struct au_branch *br;
4594+ struct au_xino_file *xi;
4595+
4596+ if (!au_sbi(sb)->si_dbgaufs)
4597+ return;
4598+
4599+ bend = au_sbend(sb);
4600+ for (; bindex <= bend; bindex++) {
4601+ br = au_sbr(sb, bindex);
4602+ xi = &br->br_xino;
4603+ if (xi->xi_dbgaufs) {
4604+ debugfs_remove(xi->xi_dbgaufs);
4605+ xi->xi_dbgaufs = NULL;
4606+ }
4607+ }
4608+}
4609+
4610+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
4611+{
4612+ struct au_sbinfo *sbinfo;
4613+ struct dentry *parent;
4614+ struct au_branch *br;
4615+ struct au_xino_file *xi;
4616+ aufs_bindex_t bend;
4617+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
4618+
4619+ sbinfo = au_sbi(sb);
4620+ parent = sbinfo->si_dbgaufs;
4621+ if (!parent)
4622+ return;
4623+
4624+ bend = au_sbend(sb);
4625+ for (; bindex <= bend; bindex++) {
4626+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
4627+ br = au_sbr(sb, bindex);
4628+ xi = &br->br_xino;
4629+ AuDebugOn(xi->xi_dbgaufs);
4630+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
4631+ sbinfo, &dbgaufs_xino_fop);
4632+ /* ignore an error */
4633+ if (unlikely(!xi->xi_dbgaufs))
4634+ AuWarn1("failed %s under debugfs\n", name);
4635+ }
4636+}
4637+
4638+/* ---------------------------------------------------------------------- */
4639+
4640+#ifdef CONFIG_AUFS_EXPORT
4641+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
4642+{
4643+ int err;
4644+ struct au_sbinfo *sbinfo;
4645+ struct super_block *sb;
4646+
4647+ sbinfo = inode->i_private;
4648+ sb = sbinfo->si_sb;
4649+ si_noflush_read_lock(sb);
4650+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
4651+ si_read_unlock(sb);
4652+ return err;
4653+}
4654+
4655+static const struct file_operations dbgaufs_xigen_fop = {
4a4d8108 4656+ .owner = THIS_MODULE,
1facf9fc 4657+ .open = dbgaufs_xigen_open,
4658+ .release = dbgaufs_xi_release,
4659+ .read = dbgaufs_xi_read
4660+};
4661+
4662+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4663+{
4664+ int err;
4665+
dece6358
AM
4666+ /*
4667+ * This function is a dynamic '__init' fucntion actually,
4668+ * so the tiny check for si_rwsem is unnecessary.
4669+ */
4670+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4671+
1facf9fc 4672+ err = -EIO;
4673+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
4674+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4675+ &dbgaufs_xigen_fop);
4676+ if (sbinfo->si_dbgaufs_xigen)
4677+ err = 0;
4678+
4679+ return err;
4680+}
4681+#else
4682+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4683+{
4684+ return 0;
4685+}
4686+#endif /* CONFIG_AUFS_EXPORT */
4687+
4688+/* ---------------------------------------------------------------------- */
4689+
4690+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
4691+{
dece6358
AM
4692+ /*
4693+ * This function is a dynamic '__init' fucntion actually,
4694+ * so the tiny check for si_rwsem is unnecessary.
4695+ */
4696+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4697+
1facf9fc 4698+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
4699+ sbinfo->si_dbgaufs = NULL;
4700+ kobject_put(&sbinfo->si_kobj);
4701+}
4702+
4703+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
4704+{
4705+ int err;
4706+ char name[SysaufsSiNameLen];
4707+
dece6358
AM
4708+ /*
4709+ * This function is a dynamic '__init' fucntion actually,
4710+ * so the tiny check for si_rwsem is unnecessary.
4711+ */
4712+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4713+
1facf9fc 4714+ err = -ENOENT;
4715+ if (!dbgaufs) {
4716+ AuErr1("/debug/aufs is uninitialized\n");
4717+ goto out;
4718+ }
4719+
4720+ err = -EIO;
4721+ sysaufs_name(sbinfo, name);
4722+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
4723+ if (unlikely(!sbinfo->si_dbgaufs))
4724+ goto out;
4725+ kobject_get(&sbinfo->si_kobj);
4726+
4727+ sbinfo->si_dbgaufs_xib = debugfs_create_file
4728+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4729+ &dbgaufs_xib_fop);
4730+ if (unlikely(!sbinfo->si_dbgaufs_xib))
4731+ goto out_dir;
4732+
4733+ err = dbgaufs_xigen_init(sbinfo);
4734+ if (!err)
4735+ goto out; /* success */
4736+
4f0767ce 4737+out_dir:
1facf9fc 4738+ dbgaufs_si_fin(sbinfo);
4f0767ce 4739+out:
1facf9fc 4740+ return err;
4741+}
4742+
4743+/* ---------------------------------------------------------------------- */
4744+
4745+void dbgaufs_fin(void)
4746+{
4747+ debugfs_remove(dbgaufs);
4748+}
4749+
4750+int __init dbgaufs_init(void)
4751+{
4752+ int err;
4753+
4754+ err = -EIO;
4755+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
4756+ if (dbgaufs)
4757+ err = 0;
4758+ return err;
4759+}
7f207e10
AM
4760diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
4761--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
4762+++ linux/fs/aufs/dbgaufs.h 2012-02-13 21:54:56.966438287 +0100
4763@@ -0,0 +1,49 @@
1facf9fc 4764+/*
f6c5ef8b 4765+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 4766+ *
4767+ * This program, aufs is free software; you can redistribute it and/or modify
4768+ * it under the terms of the GNU General Public License as published by
4769+ * the Free Software Foundation; either version 2 of the License, or
4770+ * (at your option) any later version.
dece6358
AM
4771+ *
4772+ * This program is distributed in the hope that it will be useful,
4773+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4774+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4775+ * GNU General Public License for more details.
4776+ *
4777+ * You should have received a copy of the GNU General Public License
4778+ * along with this program; if not, write to the Free Software
4779+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4780+ */
4781+
4782+/*
4783+ * debugfs interface
4784+ */
4785+
4786+#ifndef __DBGAUFS_H__
4787+#define __DBGAUFS_H__
4788+
4789+#ifdef __KERNEL__
4790+
dece6358 4791+struct super_block;
1facf9fc 4792+struct au_sbinfo;
dece6358 4793+
1facf9fc 4794+#ifdef CONFIG_DEBUG_FS
4795+/* dbgaufs.c */
4796+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
4797+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
4798+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
4799+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
4800+void dbgaufs_fin(void);
4801+int __init dbgaufs_init(void);
1facf9fc 4802+#else
4a4d8108
AM
4803+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
4804+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
4805+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
4806+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
4807+AuStubVoid(dbgaufs_fin, void)
4808+AuStubInt0(__init dbgaufs_init, void)
1facf9fc 4809+#endif /* CONFIG_DEBUG_FS */
4810+
4811+#endif /* __KERNEL__ */
4812+#endif /* __DBGAUFS_H__ */
7f207e10
AM
4813diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
4814--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 4815+++ linux/fs/aufs/dcsub.c 2012-02-13 21:54:56.966438287 +0100
027c5e7a 4816@@ -0,0 +1,243 @@
1facf9fc 4817+/*
f6c5ef8b 4818+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 4819+ *
4820+ * This program, aufs is free software; you can redistribute it and/or modify
4821+ * it under the terms of the GNU General Public License as published by
4822+ * the Free Software Foundation; either version 2 of the License, or
4823+ * (at your option) any later version.
dece6358
AM
4824+ *
4825+ * This program is distributed in the hope that it will be useful,
4826+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4827+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4828+ * GNU General Public License for more details.
4829+ *
4830+ * You should have received a copy of the GNU General Public License
4831+ * along with this program; if not, write to the Free Software
4832+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4833+ */
4834+
4835+/*
4836+ * sub-routines for dentry cache
4837+ */
4838+
4839+#include "aufs.h"
4840+
4841+static void au_dpage_free(struct au_dpage *dpage)
4842+{
4843+ int i;
4844+ struct dentry **p;
4845+
4846+ p = dpage->dentries;
4847+ for (i = 0; i < dpage->ndentry; i++)
4848+ dput(*p++);
4849+ free_page((unsigned long)dpage->dentries);
4850+}
4851+
4852+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
4853+{
4854+ int err;
4855+ void *p;
4856+
4857+ err = -ENOMEM;
4858+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
4859+ if (unlikely(!dpages->dpages))
4860+ goto out;
4861+
4862+ p = (void *)__get_free_page(gfp);
4863+ if (unlikely(!p))
4864+ goto out_dpages;
4865+
4866+ dpages->dpages[0].ndentry = 0;
4867+ dpages->dpages[0].dentries = p;
4868+ dpages->ndpage = 1;
4869+ return 0; /* success */
4870+
4f0767ce 4871+out_dpages:
1facf9fc 4872+ kfree(dpages->dpages);
4f0767ce 4873+out:
1facf9fc 4874+ return err;
4875+}
4876+
4877+void au_dpages_free(struct au_dcsub_pages *dpages)
4878+{
4879+ int i;
4880+ struct au_dpage *p;
4881+
4882+ p = dpages->dpages;
4883+ for (i = 0; i < dpages->ndpage; i++)
4884+ au_dpage_free(p++);
4885+ kfree(dpages->dpages);
4886+}
4887+
4888+static int au_dpages_append(struct au_dcsub_pages *dpages,
4889+ struct dentry *dentry, gfp_t gfp)
4890+{
4891+ int err, sz;
4892+ struct au_dpage *dpage;
4893+ void *p;
4894+
4895+ dpage = dpages->dpages + dpages->ndpage - 1;
4896+ sz = PAGE_SIZE / sizeof(dentry);
4897+ if (unlikely(dpage->ndentry >= sz)) {
4898+ AuLabel(new dpage);
4899+ err = -ENOMEM;
4900+ sz = dpages->ndpage * sizeof(*dpages->dpages);
4901+ p = au_kzrealloc(dpages->dpages, sz,
4902+ sz + sizeof(*dpages->dpages), gfp);
4903+ if (unlikely(!p))
4904+ goto out;
4905+
4906+ dpages->dpages = p;
4907+ dpage = dpages->dpages + dpages->ndpage;
4908+ p = (void *)__get_free_page(gfp);
4909+ if (unlikely(!p))
4910+ goto out;
4911+
4912+ dpage->ndentry = 0;
4913+ dpage->dentries = p;
4914+ dpages->ndpage++;
4915+ }
4916+
027c5e7a
AM
4917+ AuDebugOn(!dentry->d_count);
4918+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
1facf9fc 4919+ return 0; /* success */
4920+
4f0767ce 4921+out:
1facf9fc 4922+ return err;
4923+}
4924+
4925+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
4926+ au_dpages_test test, void *arg)
4927+{
4928+ int err;
027c5e7a 4929+ struct dentry *this_parent;
1facf9fc 4930+ struct list_head *next;
4931+ struct super_block *sb = root->d_sb;
4932+
4933+ err = 0;
027c5e7a
AM
4934+ write_seqlock(&rename_lock);
4935+ this_parent = root;
4936+ spin_lock(&this_parent->d_lock);
4f0767ce 4937+repeat:
1facf9fc 4938+ next = this_parent->d_subdirs.next;
4f0767ce 4939+resume:
1facf9fc 4940+ if (this_parent->d_sb == sb
4941+ && !IS_ROOT(this_parent)
027c5e7a
AM
4942+ && au_di(this_parent)
4943+ && this_parent->d_count
1facf9fc 4944+ && (!test || test(this_parent, arg))) {
4945+ err = au_dpages_append(dpages, this_parent, GFP_ATOMIC);
4946+ if (unlikely(err))
4947+ goto out;
4948+ }
4949+
4950+ while (next != &this_parent->d_subdirs) {
4951+ struct list_head *tmp = next;
4952+ struct dentry *dentry = list_entry(tmp, struct dentry,
4953+ d_u.d_child);
027c5e7a 4954+
1facf9fc 4955+ next = tmp->next;
027c5e7a
AM
4956+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
4957+ if (dentry->d_count) {
4958+ if (!list_empty(&dentry->d_subdirs)) {
4959+ spin_unlock(&this_parent->d_lock);
4960+ spin_release(&dentry->d_lock.dep_map, 1,
4961+ _RET_IP_);
4962+ this_parent = dentry;
4963+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1,
4964+ _RET_IP_);
4965+ goto repeat;
4966+ }
4967+ if (dentry->d_sb == sb
4968+ && au_di(dentry)
4969+ && (!test || test(dentry, arg)))
4970+ err = au_dpages_append(dpages, dentry,
4971+ GFP_ATOMIC);
1facf9fc 4972+ }
027c5e7a
AM
4973+ spin_unlock(&dentry->d_lock);
4974+ if (unlikely(err))
4975+ goto out;
1facf9fc 4976+ }
4977+
4978+ if (this_parent != root) {
027c5e7a
AM
4979+ struct dentry *tmp;
4980+ struct dentry *child;
4981+
4982+ tmp = this_parent->d_parent;
4983+ rcu_read_lock();
4984+ spin_unlock(&this_parent->d_lock);
4985+ child = this_parent;
4986+ this_parent = tmp;
4987+ spin_lock(&this_parent->d_lock);
4988+ rcu_read_unlock();
4989+ next = child->d_u.d_child.next;
1facf9fc 4990+ goto resume;
4991+ }
027c5e7a 4992+
4f0767ce 4993+out:
027c5e7a
AM
4994+ spin_unlock(&this_parent->d_lock);
4995+ write_sequnlock(&rename_lock);
1facf9fc 4996+ return err;
4997+}
4998+
4999+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
5000+ int do_include, au_dpages_test test, void *arg)
5001+{
5002+ int err;
5003+
5004+ err = 0;
027c5e7a
AM
5005+ write_seqlock(&rename_lock);
5006+ spin_lock(&dentry->d_lock);
5007+ if (do_include
5008+ && dentry->d_count
5009+ && (!test || test(dentry, arg)))
1facf9fc 5010+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
5011+ spin_unlock(&dentry->d_lock);
5012+ if (unlikely(err))
5013+ goto out;
5014+
5015+ /*
5016+ * vfsmount_lock is unnecessary since this is a traverse in a single
5017+ * mount
5018+ */
1facf9fc 5019+ while (!IS_ROOT(dentry)) {
027c5e7a
AM
5020+ dentry = dentry->d_parent; /* rename_lock is locked */
5021+ spin_lock(&dentry->d_lock);
5022+ if (dentry->d_count
5023+ && (!test || test(dentry, arg)))
1facf9fc 5024+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
5025+ spin_unlock(&dentry->d_lock);
5026+ if (unlikely(err))
5027+ break;
1facf9fc 5028+ }
5029+
4f0767ce 5030+out:
027c5e7a 5031+ write_sequnlock(&rename_lock);
1facf9fc 5032+ return err;
5033+}
5034+
027c5e7a
AM
5035+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
5036+{
5037+ return au_di(dentry) && dentry->d_sb == arg;
5038+}
5039+
5040+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
5041+ struct dentry *dentry, int do_include)
5042+{
5043+ return au_dcsub_pages_rev(dpages, dentry, do_include,
5044+ au_dcsub_dpages_aufs, dentry->d_sb);
5045+}
5046+
4a4d8108 5047+int au_test_subdir(struct dentry *d1, struct dentry *d2)
1facf9fc 5048+{
4a4d8108
AM
5049+ struct path path[2] = {
5050+ {
5051+ .dentry = d1
5052+ },
5053+ {
5054+ .dentry = d2
5055+ }
5056+ };
1facf9fc 5057+
4a4d8108 5058+ return path_is_under(path + 0, path + 1);
1facf9fc 5059+}
7f207e10
AM
5060diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
5061--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
5062+++ linux/fs/aufs/dcsub.h 2012-02-13 21:54:56.966438287 +0100
5063@@ -0,0 +1,94 @@
1facf9fc 5064+/*
f6c5ef8b 5065+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 5066+ *
5067+ * This program, aufs is free software; you can redistribute it and/or modify
5068+ * it under the terms of the GNU General Public License as published by
5069+ * the Free Software Foundation; either version 2 of the License, or
5070+ * (at your option) any later version.
dece6358
AM
5071+ *
5072+ * This program is distributed in the hope that it will be useful,
5073+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5074+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5075+ * GNU General Public License for more details.
5076+ *
5077+ * You should have received a copy of the GNU General Public License
5078+ * along with this program; if not, write to the Free Software
5079+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5080+ */
5081+
5082+/*
5083+ * sub-routines for dentry cache
5084+ */
5085+
5086+#ifndef __AUFS_DCSUB_H__
5087+#define __AUFS_DCSUB_H__
5088+
5089+#ifdef __KERNEL__
5090+
7f207e10 5091+#include <linux/dcache.h>
027c5e7a 5092+#include <linux/fs.h>
dece6358
AM
5093+
5094+struct dentry;
1facf9fc 5095+
5096+struct au_dpage {
5097+ int ndentry;
5098+ struct dentry **dentries;
5099+};
5100+
5101+struct au_dcsub_pages {
5102+ int ndpage;
5103+ struct au_dpage *dpages;
5104+};
5105+
5106+/* ---------------------------------------------------------------------- */
5107+
7f207e10 5108+/* dcsub.c */
1facf9fc 5109+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
5110+void au_dpages_free(struct au_dcsub_pages *dpages);
5111+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
5112+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
5113+ au_dpages_test test, void *arg);
5114+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
5115+ int do_include, au_dpages_test test, void *arg);
027c5e7a
AM
5116+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
5117+ struct dentry *dentry, int do_include);
4a4d8108 5118+int au_test_subdir(struct dentry *d1, struct dentry *d2);
1facf9fc 5119+
7f207e10
AM
5120+/* ---------------------------------------------------------------------- */
5121+
027c5e7a
AM
5122+static inline int au_d_hashed_positive(struct dentry *d)
5123+{
5124+ int err;
5125+ struct inode *inode = d->d_inode;
5126+ err = 0;
5127+ if (unlikely(d_unhashed(d) || !inode || !inode->i_nlink))
5128+ err = -ENOENT;
5129+ return err;
5130+}
5131+
5132+static inline int au_d_alive(struct dentry *d)
5133+{
5134+ int err;
5135+ struct inode *inode;
5136+ err = 0;
5137+ if (!IS_ROOT(d))
5138+ err = au_d_hashed_positive(d);
5139+ else {
5140+ inode = d->d_inode;
5141+ if (unlikely(d_unlinked(d) || !inode || !inode->i_nlink))
5142+ err = -ENOENT;
5143+ }
5144+ return err;
5145+}
5146+
5147+static inline int au_alive_dir(struct dentry *d)
7f207e10 5148+{
027c5e7a
AM
5149+ int err;
5150+ err = au_d_alive(d);
5151+ if (unlikely(err || IS_DEADDIR(d->d_inode)))
5152+ err = -ENOENT;
5153+ return err;
7f207e10
AM
5154+}
5155+
1facf9fc 5156+#endif /* __KERNEL__ */
5157+#endif /* __AUFS_DCSUB_H__ */
7f207e10
AM
5158diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
5159--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
5160+++ linux/fs/aufs/debug.c 2012-02-13 21:54:56.966438287 +0100
5161@@ -0,0 +1,489 @@
1facf9fc 5162+/*
f6c5ef8b 5163+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 5164+ *
5165+ * This program, aufs is free software; you can redistribute it and/or modify
5166+ * it under the terms of the GNU General Public License as published by
5167+ * the Free Software Foundation; either version 2 of the License, or
5168+ * (at your option) any later version.
dece6358
AM
5169+ *
5170+ * This program is distributed in the hope that it will be useful,
5171+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5172+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5173+ * GNU General Public License for more details.
5174+ *
5175+ * You should have received a copy of the GNU General Public License
5176+ * along with this program; if not, write to the Free Software
5177+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5178+ */
5179+
5180+/*
5181+ * debug print functions
5182+ */
5183+
7f207e10 5184+#include <linux/vt_kern.h>
1facf9fc 5185+#include "aufs.h"
5186+
5187+int aufs_debug;
5188+MODULE_PARM_DESC(debug, "debug print");
5189+module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
5190+
5191+char *au_plevel = KERN_DEBUG;
e49829fe
JR
5192+#define dpri(fmt, ...) do { \
5193+ if ((au_plevel \
5194+ && strcmp(au_plevel, KERN_DEBUG)) \
5195+ || au_debug_test()) \
5196+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
1facf9fc 5197+} while (0)
5198+
5199+/* ---------------------------------------------------------------------- */
5200+
5201+void au_dpri_whlist(struct au_nhash *whlist)
5202+{
5203+ unsigned long ul, n;
5204+ struct hlist_head *head;
5205+ struct au_vdir_wh *tpos;
5206+ struct hlist_node *pos;
5207+
5208+ n = whlist->nh_num;
5209+ head = whlist->nh_head;
5210+ for (ul = 0; ul < n; ul++) {
5211+ hlist_for_each_entry(tpos, pos, head, wh_hash)
5212+ dpri("b%d, %.*s, %d\n",
5213+ tpos->wh_bindex,
5214+ tpos->wh_str.len, tpos->wh_str.name,
5215+ tpos->wh_str.len);
5216+ head++;
5217+ }
5218+}
5219+
5220+void au_dpri_vdir(struct au_vdir *vdir)
5221+{
5222+ unsigned long ul;
5223+ union au_vdir_deblk_p p;
5224+ unsigned char *o;
5225+
5226+ if (!vdir || IS_ERR(vdir)) {
5227+ dpri("err %ld\n", PTR_ERR(vdir));
5228+ return;
5229+ }
5230+
5231+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
5232+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
5233+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
5234+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
5235+ p.deblk = vdir->vd_deblk[ul];
5236+ o = p.deblk;
5237+ dpri("[%lu]: %p\n", ul, o);
5238+ }
5239+}
5240+
53392da6 5241+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
1facf9fc 5242+ struct dentry *wh)
5243+{
5244+ char *n = NULL;
5245+ int l = 0;
5246+
5247+ if (!inode || IS_ERR(inode)) {
5248+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
5249+ return -1;
5250+ }
5251+
5252+ /* the type of i_blocks depends upon CONFIG_LSF */
5253+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
5254+ && sizeof(inode->i_blocks) != sizeof(u64));
5255+ if (wh) {
5256+ n = (void *)wh->d_name.name;
5257+ l = wh->d_name.len;
5258+ }
5259+
53392da6
AM
5260+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
5261+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
5262+ bindex, inode,
1facf9fc 5263+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
5264+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
5265+ i_size_read(inode), (unsigned long long)inode->i_blocks,
53392da6 5266+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
1facf9fc 5267+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
b752ccd1
AM
5268+ inode->i_state, inode->i_flags, inode->i_version,
5269+ inode->i_generation,
1facf9fc 5270+ l ? ", wh " : "", l, n);
5271+ return 0;
5272+}
5273+
5274+void au_dpri_inode(struct inode *inode)
5275+{
5276+ struct au_iinfo *iinfo;
5277+ aufs_bindex_t bindex;
53392da6 5278+ int err, hn;
1facf9fc 5279+
53392da6 5280+ err = do_pri_inode(-1, inode, -1, NULL);
1facf9fc 5281+ if (err || !au_test_aufs(inode->i_sb))
5282+ return;
5283+
5284+ iinfo = au_ii(inode);
5285+ if (!iinfo)
5286+ return;
5287+ dpri("i-1: bstart %d, bend %d, gen %d\n",
5288+ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode));
5289+ if (iinfo->ii_bstart < 0)
5290+ return;
53392da6
AM
5291+ hn = 0;
5292+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
5293+ hn = !!au_hn(iinfo->ii_hinode + bindex);
5294+ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
1facf9fc 5295+ iinfo->ii_hinode[0 + bindex].hi_whdentry);
53392da6 5296+ }
1facf9fc 5297+}
5298+
2cbb1c4b
JR
5299+void au_dpri_dalias(struct inode *inode)
5300+{
5301+ struct dentry *d;
5302+
5303+ spin_lock(&inode->i_lock);
5304+ list_for_each_entry(d, &inode->i_dentry, d_alias)
5305+ au_dpri_dentry(d);
5306+ spin_unlock(&inode->i_lock);
5307+}
5308+
1facf9fc 5309+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
5310+{
5311+ struct dentry *wh = NULL;
53392da6 5312+ int hn;
1facf9fc 5313+
5314+ if (!dentry || IS_ERR(dentry)) {
5315+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
5316+ return -1;
5317+ }
5318+ /* do not call dget_parent() here */
027c5e7a 5319+ /* note: access d_xxx without d_lock */
1facf9fc 5320+ dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n",
5321+ bindex,
5322+ AuDLNPair(dentry->d_parent), AuDLNPair(dentry),
5323+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
027c5e7a 5324+ dentry->d_count, dentry->d_flags);
53392da6 5325+ hn = -1;
1facf9fc 5326+ if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) {
5327+ struct au_iinfo *iinfo = au_ii(dentry->d_inode);
53392da6
AM
5328+ if (iinfo) {
5329+ hn = !!au_hn(iinfo->ii_hinode + bindex);
1facf9fc 5330+ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
53392da6 5331+ }
1facf9fc 5332+ }
53392da6 5333+ do_pri_inode(bindex, dentry->d_inode, hn, wh);
1facf9fc 5334+ return 0;
5335+}
5336+
5337+void au_dpri_dentry(struct dentry *dentry)
5338+{
5339+ struct au_dinfo *dinfo;
5340+ aufs_bindex_t bindex;
5341+ int err;
4a4d8108 5342+ struct au_hdentry *hdp;
1facf9fc 5343+
5344+ err = do_pri_dentry(-1, dentry);
5345+ if (err || !au_test_aufs(dentry->d_sb))
5346+ return;
5347+
5348+ dinfo = au_di(dentry);
5349+ if (!dinfo)
5350+ return;
5351+ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n",
5352+ dinfo->di_bstart, dinfo->di_bend,
5353+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry));
5354+ if (dinfo->di_bstart < 0)
5355+ return;
4a4d8108 5356+ hdp = dinfo->di_hdentry;
1facf9fc 5357+ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
4a4d8108 5358+ do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
1facf9fc 5359+}
5360+
5361+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
5362+{
5363+ char a[32];
5364+
5365+ if (!file || IS_ERR(file)) {
5366+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
5367+ return -1;
5368+ }
5369+ a[0] = 0;
5370+ if (bindex < 0
5371+ && file->f_dentry
5372+ && au_test_aufs(file->f_dentry->d_sb)
5373+ && au_fi(file))
e49829fe 5374+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
2cbb1c4b 5375+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
b752ccd1 5376+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
1facf9fc 5377+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
b752ccd1 5378+ file->f_version, file->f_pos, a);
1facf9fc 5379+ if (file->f_dentry)
5380+ do_pri_dentry(bindex, file->f_dentry);
5381+ return 0;
5382+}
5383+
5384+void au_dpri_file(struct file *file)
5385+{
5386+ struct au_finfo *finfo;
4a4d8108
AM
5387+ struct au_fidir *fidir;
5388+ struct au_hfile *hfile;
1facf9fc 5389+ aufs_bindex_t bindex;
5390+ int err;
5391+
5392+ err = do_pri_file(-1, file);
5393+ if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb))
5394+ return;
5395+
5396+ finfo = au_fi(file);
5397+ if (!finfo)
5398+ return;
4a4d8108 5399+ if (finfo->fi_btop < 0)
1facf9fc 5400+ return;
4a4d8108
AM
5401+ fidir = finfo->fi_hdir;
5402+ if (!fidir)
5403+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
5404+ else
e49829fe
JR
5405+ for (bindex = finfo->fi_btop;
5406+ bindex >= 0 && bindex <= fidir->fd_bbot;
4a4d8108
AM
5407+ bindex++) {
5408+ hfile = fidir->fd_hfile + bindex;
5409+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
5410+ }
1facf9fc 5411+}
5412+
5413+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
5414+{
5415+ struct vfsmount *mnt;
5416+ struct super_block *sb;
5417+
5418+ if (!br || IS_ERR(br))
5419+ goto out;
5420+ mnt = br->br_mnt;
5421+ if (!mnt || IS_ERR(mnt))
5422+ goto out;
5423+ sb = mnt->mnt_sb;
5424+ if (!sb || IS_ERR(sb))
5425+ goto out;
5426+
1e00d052 5427+ dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, "
b752ccd1 5428+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
1facf9fc 5429+ "xino %d\n",
1e00d052
AM
5430+ bindex, br->br_perm, br->br_id, atomic_read(&br->br_count),
5431+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
b752ccd1 5432+ sb->s_flags, sb->s_count,
1facf9fc 5433+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
5434+ return 0;
5435+
4f0767ce 5436+out:
1facf9fc 5437+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
5438+ return -1;
5439+}
5440+
5441+void au_dpri_sb(struct super_block *sb)
5442+{
5443+ struct au_sbinfo *sbinfo;
5444+ aufs_bindex_t bindex;
5445+ int err;
5446+ /* to reuduce stack size */
5447+ struct {
5448+ struct vfsmount mnt;
5449+ struct au_branch fake;
5450+ } *a;
5451+
5452+ /* this function can be called from magic sysrq */
5453+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
5454+ if (unlikely(!a)) {
5455+ dpri("no memory\n");
5456+ return;
5457+ }
5458+
5459+ a->mnt.mnt_sb = sb;
5460+ a->fake.br_perm = 0;
5461+ a->fake.br_mnt = &a->mnt;
5462+ a->fake.br_xino.xi_file = NULL;
5463+ atomic_set(&a->fake.br_count, 0);
5464+ smp_mb(); /* atomic_set */
5465+ err = do_pri_br(-1, &a->fake);
5466+ kfree(a);
5467+ dpri("dev 0x%x\n", sb->s_dev);
5468+ if (err || !au_test_aufs(sb))
5469+ return;
5470+
5471+ sbinfo = au_sbi(sb);
5472+ if (!sbinfo)
5473+ return;
5474+ dpri("nw %d, gen %u, kobj %d\n",
5475+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
5476+ atomic_read(&sbinfo->si_kobj.kref.refcount));
5477+ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
5478+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
5479+}
5480+
5481+/* ---------------------------------------------------------------------- */
5482+
5483+void au_dbg_sleep_jiffy(int jiffy)
5484+{
5485+ while (jiffy)
5486+ jiffy = schedule_timeout_uninterruptible(jiffy);
5487+}
5488+
5489+void au_dbg_iattr(struct iattr *ia)
5490+{
5491+#define AuBit(name) if (ia->ia_valid & ATTR_ ## name) \
5492+ dpri(#name "\n")
5493+ AuBit(MODE);
5494+ AuBit(UID);
5495+ AuBit(GID);
5496+ AuBit(SIZE);
5497+ AuBit(ATIME);
5498+ AuBit(MTIME);
5499+ AuBit(CTIME);
5500+ AuBit(ATIME_SET);
5501+ AuBit(MTIME_SET);
5502+ AuBit(FORCE);
5503+ AuBit(ATTR_FLAG);
5504+ AuBit(KILL_SUID);
5505+ AuBit(KILL_SGID);
5506+ AuBit(FILE);
5507+ AuBit(KILL_PRIV);
5508+ AuBit(OPEN);
5509+ AuBit(TIMES_SET);
5510+#undef AuBit
5511+ dpri("ia_file %p\n", ia->ia_file);
5512+}
5513+
5514+/* ---------------------------------------------------------------------- */
5515+
027c5e7a
AM
5516+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
5517+{
5518+ struct inode *h_inode, *inode = dentry->d_inode;
5519+ struct dentry *h_dentry;
5520+ aufs_bindex_t bindex, bend, bi;
5521+
5522+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
5523+ return;
5524+
5525+ bend = au_dbend(dentry);
5526+ bi = au_ibend(inode);
5527+ if (bi < bend)
5528+ bend = bi;
5529+ bindex = au_dbstart(dentry);
5530+ bi = au_ibstart(inode);
5531+ if (bi > bindex)
5532+ bindex = bi;
5533+
5534+ for (; bindex <= bend; bindex++) {
5535+ h_dentry = au_h_dptr(dentry, bindex);
5536+ if (!h_dentry)
5537+ continue;
5538+ h_inode = au_h_iptr(inode, bindex);
5539+ if (unlikely(h_inode != h_dentry->d_inode)) {
5540+ int old = au_debug_test();
5541+ if (!old)
5542+ au_debug(1);
5543+ AuDbg("b%d, %s:%d\n", bindex, func, line);
5544+ AuDbgDentry(dentry);
5545+ AuDbgInode(inode);
5546+ if (!old)
5547+ au_debug(0);
5548+ BUG();
5549+ }
5550+ }
5551+}
5552+
1facf9fc 5553+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen)
5554+{
5555+ struct dentry *parent;
5556+
5557+ parent = dget_parent(dentry);
027c5e7a
AM
5558+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
5559+ AuDebugOn(IS_ROOT(dentry));
5560+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 5561+ dput(parent);
5562+}
5563+
5564+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen)
5565+{
5566+ struct dentry *parent;
027c5e7a 5567+ struct inode *inode;
1facf9fc 5568+
5569+ parent = dget_parent(dentry);
027c5e7a
AM
5570+ inode = dentry->d_inode;
5571+ AuDebugOn(inode && S_ISDIR(dentry->d_inode->i_mode));
5572+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 5573+ dput(parent);
5574+}
5575+
5576+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
5577+{
5578+ int err, i, j;
5579+ struct au_dcsub_pages dpages;
5580+ struct au_dpage *dpage;
5581+ struct dentry **dentries;
5582+
5583+ err = au_dpages_init(&dpages, GFP_NOFS);
5584+ AuDebugOn(err);
027c5e7a 5585+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
1facf9fc 5586+ AuDebugOn(err);
5587+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
5588+ dpage = dpages.dpages + i;
5589+ dentries = dpage->dentries;
5590+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
027c5e7a 5591+ AuDebugOn(au_digen_test(dentries[j], sigen));
1facf9fc 5592+ }
5593+ au_dpages_free(&dpages);
5594+}
5595+
1facf9fc 5596+void au_dbg_verify_kthread(void)
5597+{
53392da6 5598+ if (au_wkq_test()) {
1facf9fc 5599+ au_dbg_blocked();
1e00d052
AM
5600+ /*
5601+ * It may be recursive, but udba=notify between two aufs mounts,
5602+ * where a single ro branch is shared, is not a problem.
5603+ */
5604+ /* WARN_ON(1); */
1facf9fc 5605+ }
5606+}
5607+
5608+/* ---------------------------------------------------------------------- */
5609+
5610+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused)
5611+{
5612+#ifdef AuForceNoPlink
5613+ au_opt_clr(sbinfo->si_mntflags, PLINK);
5614+#endif
5615+#ifdef AuForceNoXino
5616+ au_opt_clr(sbinfo->si_mntflags, XINO);
5617+#endif
5618+#ifdef AuForceNoRefrof
5619+ au_opt_clr(sbinfo->si_mntflags, REFROF);
5620+#endif
4a4d8108
AM
5621+#ifdef AuForceHnotify
5622+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_HNOTIFY);
1facf9fc 5623+#endif
1308ab2a 5624+#ifdef AuForceRd0
5625+ sbinfo->si_rdblk = 0;
5626+ sbinfo->si_rdhash = 0;
5627+#endif
1facf9fc 5628+}
5629+
5630+int __init au_debug_init(void)
5631+{
5632+ aufs_bindex_t bindex;
5633+ struct au_vdir_destr destr;
5634+
5635+ bindex = -1;
5636+ AuDebugOn(bindex >= 0);
5637+
5638+ destr.len = -1;
5639+ AuDebugOn(destr.len < NAME_MAX);
5640+
5641+#ifdef CONFIG_4KSTACKS
4a4d8108 5642+ pr_warning("CONFIG_4KSTACKS is defined.\n");
1facf9fc 5643+#endif
5644+
5645+#ifdef AuForceNoBrs
5646+ sysaufs_brs = 0;
5647+#endif
5648+
5649+ return 0;
5650+}
7f207e10
AM
5651diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
5652--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
5653+++ linux/fs/aufs/debug.h 2012-02-13 21:54:56.966438287 +0100
5654@@ -0,0 +1,243 @@
1facf9fc 5655+/*
f6c5ef8b 5656+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 5657+ *
5658+ * This program, aufs is free software; you can redistribute it and/or modify
5659+ * it under the terms of the GNU General Public License as published by
5660+ * the Free Software Foundation; either version 2 of the License, or
5661+ * (at your option) any later version.
dece6358
AM
5662+ *
5663+ * This program is distributed in the hope that it will be useful,
5664+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5665+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5666+ * GNU General Public License for more details.
5667+ *
5668+ * You should have received a copy of the GNU General Public License
5669+ * along with this program; if not, write to the Free Software
5670+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5671+ */
5672+
5673+/*
5674+ * debug print functions
5675+ */
5676+
5677+#ifndef __AUFS_DEBUG_H__
5678+#define __AUFS_DEBUG_H__
5679+
5680+#ifdef __KERNEL__
5681+
1308ab2a 5682+#include <asm/system.h>
4a4d8108
AM
5683+#include <linux/module.h>
5684+#include <linux/kallsyms.h>
1facf9fc 5685+#include <linux/sysrq.h>
4a4d8108 5686+
1facf9fc 5687+#ifdef CONFIG_AUFS_DEBUG
5688+#define AuDebugOn(a) BUG_ON(a)
5689+
5690+/* module parameter */
5691+extern int aufs_debug;
5692+static inline void au_debug(int n)
5693+{
5694+ aufs_debug = n;
5695+ smp_mb();
5696+}
5697+
5698+static inline int au_debug_test(void)
5699+{
5700+ return aufs_debug;
5701+}
5702+#else
5703+#define AuDebugOn(a) do {} while (0)
4a4d8108
AM
5704+AuStubVoid(au_debug, int n)
5705+AuStubInt0(au_debug_test, void)
1facf9fc 5706+#endif /* CONFIG_AUFS_DEBUG */
5707+
5708+/* ---------------------------------------------------------------------- */
5709+
5710+/* debug print */
5711+
4a4d8108 5712+#define AuDbg(fmt, ...) do { \
1facf9fc 5713+ if (au_debug_test()) \
4a4d8108 5714+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
1facf9fc 5715+} while (0)
4a4d8108
AM
5716+#define AuLabel(l) AuDbg(#l "\n")
5717+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
5718+#define AuWarn1(fmt, ...) do { \
1facf9fc 5719+ static unsigned char _c; \
5720+ if (!_c++) \
4a4d8108 5721+ pr_warning(fmt, ##__VA_ARGS__); \
1facf9fc 5722+} while (0)
5723+
4a4d8108 5724+#define AuErr1(fmt, ...) do { \
1facf9fc 5725+ static unsigned char _c; \
5726+ if (!_c++) \
4a4d8108 5727+ pr_err(fmt, ##__VA_ARGS__); \
1facf9fc 5728+} while (0)
5729+
4a4d8108 5730+#define AuIOErr1(fmt, ...) do { \
1facf9fc 5731+ static unsigned char _c; \
5732+ if (!_c++) \
4a4d8108 5733+ AuIOErr(fmt, ##__VA_ARGS__); \
1facf9fc 5734+} while (0)
5735+
5736+#define AuUnsupportMsg "This operation is not supported." \
5737+ " Please report this application to aufs-users ML."
4a4d8108
AM
5738+#define AuUnsupport(fmt, ...) do { \
5739+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
1facf9fc 5740+ dump_stack(); \
5741+} while (0)
5742+
5743+#define AuTraceErr(e) do { \
5744+ if (unlikely((e) < 0)) \
5745+ AuDbg("err %d\n", (int)(e)); \
5746+} while (0)
5747+
5748+#define AuTraceErrPtr(p) do { \
5749+ if (IS_ERR(p)) \
5750+ AuDbg("err %ld\n", PTR_ERR(p)); \
5751+} while (0)
5752+
5753+/* dirty macros for debug print, use with "%.*s" and caution */
5754+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
5755+#define AuDLNPair(d) AuLNPair(&(d)->d_name)
5756+
5757+/* ---------------------------------------------------------------------- */
5758+
5759+struct au_sbinfo;
5760+struct au_finfo;
dece6358 5761+struct dentry;
1facf9fc 5762+#ifdef CONFIG_AUFS_DEBUG
5763+extern char *au_plevel;
5764+struct au_nhash;
5765+void au_dpri_whlist(struct au_nhash *whlist);
5766+struct au_vdir;
5767+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 5768+struct inode;
1facf9fc 5769+void au_dpri_inode(struct inode *inode);
2cbb1c4b 5770+void au_dpri_dalias(struct inode *inode);
1facf9fc 5771+void au_dpri_dentry(struct dentry *dentry);
dece6358 5772+struct file;
1facf9fc 5773+void au_dpri_file(struct file *filp);
dece6358 5774+struct super_block;
1facf9fc 5775+void au_dpri_sb(struct super_block *sb);
5776+
5777+void au_dbg_sleep_jiffy(int jiffy);
dece6358 5778+struct iattr;
1facf9fc 5779+void au_dbg_iattr(struct iattr *ia);
5780+
027c5e7a
AM
5781+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
5782+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
1facf9fc 5783+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen);
5784+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen);
5785+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
1facf9fc 5786+void au_dbg_verify_kthread(void);
5787+
5788+int __init au_debug_init(void);
5789+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo);
5790+#define AuDbgWhlist(w) do { \
5791+ AuDbg(#w "\n"); \
5792+ au_dpri_whlist(w); \
5793+} while (0)
5794+
5795+#define AuDbgVdir(v) do { \
5796+ AuDbg(#v "\n"); \
5797+ au_dpri_vdir(v); \
5798+} while (0)
5799+
5800+#define AuDbgInode(i) do { \
5801+ AuDbg(#i "\n"); \
5802+ au_dpri_inode(i); \
5803+} while (0)
5804+
2cbb1c4b
JR
5805+#define AuDbgDAlias(i) do { \
5806+ AuDbg(#i "\n"); \
5807+ au_dpri_dalias(i); \
5808+} while (0)
5809+
1facf9fc 5810+#define AuDbgDentry(d) do { \
5811+ AuDbg(#d "\n"); \
5812+ au_dpri_dentry(d); \
5813+} while (0)
5814+
5815+#define AuDbgFile(f) do { \
5816+ AuDbg(#f "\n"); \
5817+ au_dpri_file(f); \
5818+} while (0)
5819+
5820+#define AuDbgSb(sb) do { \
5821+ AuDbg(#sb "\n"); \
5822+ au_dpri_sb(sb); \
5823+} while (0)
5824+
5825+#define AuDbgSleep(sec) do { \
5826+ AuDbg("sleep %d sec\n", sec); \
5827+ ssleep(sec); \
5828+} while (0)
5829+
5830+#define AuDbgSleepJiffy(jiffy) do { \
5831+ AuDbg("sleep %d jiffies\n", jiffy); \
5832+ au_dbg_sleep_jiffy(jiffy); \
5833+} while (0)
5834+
5835+#define AuDbgIAttr(ia) do { \
5836+ AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \
5837+ au_dbg_iattr(ia); \
5838+} while (0)
4a4d8108
AM
5839+
5840+#define AuDbgSym(addr) do { \
5841+ char sym[KSYM_SYMBOL_LEN]; \
5842+ sprint_symbol(sym, (unsigned long)addr); \
5843+ AuDbg("%s\n", sym); \
5844+} while (0)
5845+
5846+#define AuInfoSym(addr) do { \
5847+ char sym[KSYM_SYMBOL_LEN]; \
5848+ sprint_symbol(sym, (unsigned long)addr); \
5849+ AuInfo("%s\n", sym); \
5850+} while (0)
1facf9fc 5851+#else
027c5e7a 5852+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
4a4d8108
AM
5853+AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen)
5854+AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry,
5855+ unsigned int sigen)
5856+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
5857+AuStubVoid(au_dbg_verify_kthread, void)
5858+AuStubInt0(__init au_debug_init, void)
5859+AuStubVoid(au_debug_sbinfo_init, struct au_sbinfo *sbinfo)
1facf9fc 5860+
1facf9fc 5861+#define AuDbgWhlist(w) do {} while (0)
5862+#define AuDbgVdir(v) do {} while (0)
5863+#define AuDbgInode(i) do {} while (0)
2cbb1c4b 5864+#define AuDbgDAlias(i) do {} while (0)
1facf9fc 5865+#define AuDbgDentry(d) do {} while (0)
5866+#define AuDbgFile(f) do {} while (0)
5867+#define AuDbgSb(sb) do {} while (0)
5868+#define AuDbgSleep(sec) do {} while (0)
5869+#define AuDbgSleepJiffy(jiffy) do {} while (0)
5870+#define AuDbgIAttr(ia) do {} while (0)
4a4d8108
AM
5871+#define AuDbgSym(addr) do {} while (0)
5872+#define AuInfoSym(addr) do {} while (0)
1facf9fc 5873+#endif /* CONFIG_AUFS_DEBUG */
5874+
5875+/* ---------------------------------------------------------------------- */
5876+
5877+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
5878+int __init au_sysrq_init(void);
5879+void au_sysrq_fin(void);
5880+
5881+#ifdef CONFIG_HW_CONSOLE
5882+#define au_dbg_blocked() do { \
5883+ WARN_ON(1); \
0c5527e5 5884+ handle_sysrq('w'); \
1facf9fc 5885+} while (0)
5886+#else
4a4d8108 5887+AuStubVoid(au_dbg_blocked, void)
1facf9fc 5888+#endif
5889+
5890+#else
4a4d8108
AM
5891+AuStubInt0(__init au_sysrq_init, void)
5892+AuStubVoid(au_sysrq_fin, void)
5893+AuStubVoid(au_dbg_blocked, void)
1facf9fc 5894+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
5895+
5896+#endif /* __KERNEL__ */
5897+#endif /* __AUFS_DEBUG_H__ */
7f207e10
AM
5898diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
5899--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 5900+++ linux/fs/aufs/dentry.c 2012-02-13 21:54:56.969771692 +0100
027c5e7a 5901@@ -0,0 +1,1140 @@
1facf9fc 5902+/*
f6c5ef8b 5903+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 5904+ *
5905+ * This program, aufs is free software; you can redistribute it and/or modify
5906+ * it under the terms of the GNU General Public License as published by
5907+ * the Free Software Foundation; either version 2 of the License, or
5908+ * (at your option) any later version.
dece6358
AM
5909+ *
5910+ * This program is distributed in the hope that it will be useful,
5911+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5912+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5913+ * GNU General Public License for more details.
5914+ *
5915+ * You should have received a copy of the GNU General Public License
5916+ * along with this program; if not, write to the Free Software
5917+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5918+ */
5919+
5920+/*
5921+ * lookup and dentry operations
5922+ */
5923+
dece6358 5924+#include <linux/namei.h>
1facf9fc 5925+#include "aufs.h"
5926+
5927+static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
5928+{
5929+ if (nd) {
5930+ *h_nd = *nd;
5931+
5932+ /*
5933+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
5934+ * due to whiteout and branch permission.
5935+ */
5936+ h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
b752ccd1 5937+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
1facf9fc 5938+ /* unnecessary? */
5939+ h_nd->intent.open.file = NULL;
5940+ } else
5941+ memset(h_nd, 0, sizeof(*h_nd));
5942+}
5943+
5944+struct au_lkup_one_args {
5945+ struct dentry **errp;
5946+ struct qstr *name;
5947+ struct dentry *h_parent;
5948+ struct au_branch *br;
5949+ struct nameidata *nd;
5950+};
5951+
5952+struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
5953+ struct au_branch *br, struct nameidata *nd)
5954+{
5955+ struct dentry *h_dentry;
5956+ int err;
5957+ struct nameidata h_nd;
5958+
5959+ if (au_test_fs_null_nd(h_parent->d_sb))
5960+ return vfsub_lookup_one_len(name->name, h_parent, name->len);
5961+
5962+ au_h_nd(&h_nd, nd);
5963+ h_nd.path.dentry = h_parent;
5964+ h_nd.path.mnt = br->br_mnt;
5965+
2cbb1c4b 5966+ err = vfsub_name_hash(name->name, &h_nd.last, name->len);
1facf9fc 5967+ h_dentry = ERR_PTR(err);
5968+ if (!err) {
5969+ path_get(&h_nd.path);
5970+ h_dentry = vfsub_lookup_hash(&h_nd);
5971+ path_put(&h_nd.path);
5972+ }
5973+
4a4d8108 5974+ AuTraceErrPtr(h_dentry);
1facf9fc 5975+ return h_dentry;
5976+}
5977+
5978+static void au_call_lkup_one(void *args)
5979+{
5980+ struct au_lkup_one_args *a = args;
5981+ *a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
5982+}
5983+
5984+#define AuLkup_ALLOW_NEG 1
5985+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
7f207e10
AM
5986+#define au_fset_lkup(flags, name) \
5987+ do { (flags) |= AuLkup_##name; } while (0)
5988+#define au_fclr_lkup(flags, name) \
5989+ do { (flags) &= ~AuLkup_##name; } while (0)
1facf9fc 5990+
5991+struct au_do_lookup_args {
5992+ unsigned int flags;
5993+ mode_t type;
5994+ struct nameidata *nd;
5995+};
5996+
5997+/*
5998+ * returns positive/negative dentry, NULL or an error.
5999+ * NULL means whiteout-ed or not-found.
6000+ */
6001+static struct dentry*
6002+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
6003+ aufs_bindex_t bindex, struct qstr *wh_name,
6004+ struct au_do_lookup_args *args)
6005+{
6006+ struct dentry *h_dentry;
6007+ struct inode *h_inode, *inode;
1facf9fc 6008+ struct au_branch *br;
6009+ int wh_found, opq;
6010+ unsigned char wh_able;
6011+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
6012+
1facf9fc 6013+ wh_found = 0;
6014+ br = au_sbr(dentry->d_sb, bindex);
6015+ wh_able = !!au_br_whable(br->br_perm);
6016+ if (wh_able)
6017+ wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
6018+ h_dentry = ERR_PTR(wh_found);
6019+ if (!wh_found)
6020+ goto real_lookup;
6021+ if (unlikely(wh_found < 0))
6022+ goto out;
6023+
6024+ /* We found a whiteout */
6025+ /* au_set_dbend(dentry, bindex); */
6026+ au_set_dbwh(dentry, bindex);
6027+ if (!allow_neg)
6028+ return NULL; /* success */
6029+
4f0767ce 6030+real_lookup:
4a4d8108 6031+ h_dentry = au_lkup_one(&dentry->d_name, h_parent, br, args->nd);
1facf9fc 6032+ if (IS_ERR(h_dentry))
6033+ goto out;
6034+
6035+ h_inode = h_dentry->d_inode;
6036+ if (!h_inode) {
6037+ if (!allow_neg)
6038+ goto out_neg;
6039+ } else if (wh_found
6040+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
6041+ goto out_neg;
6042+
6043+ if (au_dbend(dentry) <= bindex)
6044+ au_set_dbend(dentry, bindex);
6045+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
6046+ au_set_dbstart(dentry, bindex);
6047+ au_set_h_dptr(dentry, bindex, h_dentry);
6048+
6049+ inode = dentry->d_inode;
6050+ if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
6051+ || (inode && !S_ISDIR(inode->i_mode)))
6052+ goto out; /* success */
6053+
6054+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
6055+ opq = au_diropq_test(h_dentry, br);
6056+ mutex_unlock(&h_inode->i_mutex);
6057+ if (opq > 0)
6058+ au_set_dbdiropq(dentry, bindex);
6059+ else if (unlikely(opq < 0)) {
6060+ au_set_h_dptr(dentry, bindex, NULL);
6061+ h_dentry = ERR_PTR(opq);
6062+ }
6063+ goto out;
6064+
4f0767ce 6065+out_neg:
1facf9fc 6066+ dput(h_dentry);
6067+ h_dentry = NULL;
4f0767ce 6068+out:
1facf9fc 6069+ return h_dentry;
6070+}
6071+
dece6358
AM
6072+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
6073+{
6074+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
6075+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
6076+ return -EPERM;
6077+ return 0;
6078+}
6079+
1facf9fc 6080+/*
6081+ * returns the number of lower positive dentries,
6082+ * otherwise an error.
6083+ * can be called at unlinking with @type is zero.
6084+ */
6085+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
6086+ struct nameidata *nd)
6087+{
6088+ int npositive, err;
6089+ aufs_bindex_t bindex, btail, bdiropq;
6090+ unsigned char isdir;
6091+ struct qstr whname;
6092+ struct au_do_lookup_args args = {
6093+ .flags = 0,
6094+ .type = type,
6095+ .nd = nd
6096+ };
6097+ const struct qstr *name = &dentry->d_name;
6098+ struct dentry *parent;
6099+ struct inode *inode;
6100+
dece6358
AM
6101+ err = au_test_shwh(dentry->d_sb, name);
6102+ if (unlikely(err))
1facf9fc 6103+ goto out;
6104+
6105+ err = au_wh_name_alloc(&whname, name);
6106+ if (unlikely(err))
6107+ goto out;
6108+
6109+ inode = dentry->d_inode;
6110+ isdir = !!(inode && S_ISDIR(inode->i_mode));
6111+ if (!type)
6112+ au_fset_lkup(args.flags, ALLOW_NEG);
6113+
6114+ npositive = 0;
4a4d8108 6115+ parent = dget_parent(dentry);
1facf9fc 6116+ btail = au_dbtaildir(parent);
6117+ for (bindex = bstart; bindex <= btail; bindex++) {
6118+ struct dentry *h_parent, *h_dentry;
6119+ struct inode *h_inode, *h_dir;
6120+
6121+ h_dentry = au_h_dptr(dentry, bindex);
6122+ if (h_dentry) {
6123+ if (h_dentry->d_inode)
6124+ npositive++;
6125+ if (type != S_IFDIR)
6126+ break;
6127+ continue;
6128+ }
6129+ h_parent = au_h_dptr(parent, bindex);
6130+ if (!h_parent)
6131+ continue;
6132+ h_dir = h_parent->d_inode;
6133+ if (!h_dir || !S_ISDIR(h_dir->i_mode))
6134+ continue;
6135+
6136+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
6137+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
6138+ &args);
6139+ mutex_unlock(&h_dir->i_mutex);
6140+ err = PTR_ERR(h_dentry);
6141+ if (IS_ERR(h_dentry))
4a4d8108 6142+ goto out_parent;
1facf9fc 6143+ au_fclr_lkup(args.flags, ALLOW_NEG);
6144+
6145+ if (au_dbwh(dentry) >= 0)
6146+ break;
6147+ if (!h_dentry)
6148+ continue;
6149+ h_inode = h_dentry->d_inode;
6150+ if (!h_inode)
6151+ continue;
6152+ npositive++;
6153+ if (!args.type)
6154+ args.type = h_inode->i_mode & S_IFMT;
6155+ if (args.type != S_IFDIR)
6156+ break;
6157+ else if (isdir) {
6158+ /* the type of lower may be different */
6159+ bdiropq = au_dbdiropq(dentry);
6160+ if (bdiropq >= 0 && bdiropq <= bindex)
6161+ break;
6162+ }
6163+ }
6164+
6165+ if (npositive) {
6166+ AuLabel(positive);
6167+ au_update_dbstart(dentry);
6168+ }
6169+ err = npositive;
6170+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
027c5e7a 6171+ && au_dbstart(dentry) < 0)) {
1facf9fc 6172+ err = -EIO;
027c5e7a
AM
6173+ AuIOErr("both of real entry and whiteout found, %.*s, err %d\n",
6174+ AuDLNPair(dentry), err);
6175+ }
1facf9fc 6176+
4f0767ce 6177+out_parent:
4a4d8108 6178+ dput(parent);
1facf9fc 6179+ kfree(whname.name);
4f0767ce 6180+out:
1facf9fc 6181+ return err;
6182+}
6183+
6184+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
6185+ struct au_branch *br)
6186+{
6187+ struct dentry *dentry;
6188+ int wkq_err;
6189+
6190+ if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
6191+ dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
6192+ else {
6193+ struct au_lkup_one_args args = {
6194+ .errp = &dentry,
6195+ .name = name,
6196+ .h_parent = parent,
6197+ .br = br,
6198+ .nd = NULL
6199+ };
6200+
6201+ wkq_err = au_wkq_wait(au_call_lkup_one, &args);
6202+ if (unlikely(wkq_err))
6203+ dentry = ERR_PTR(wkq_err);
6204+ }
6205+
6206+ return dentry;
6207+}
6208+
6209+/*
6210+ * lookup @dentry on @bindex which should be negative.
6211+ */
6212+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex)
6213+{
6214+ int err;
6215+ struct dentry *parent, *h_parent, *h_dentry;
1facf9fc 6216+
1facf9fc 6217+ parent = dget_parent(dentry);
6218+ h_parent = au_h_dptr(parent, bindex);
4a4d8108 6219+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent,
1facf9fc 6220+ au_sbr(dentry->d_sb, bindex));
6221+ err = PTR_ERR(h_dentry);
6222+ if (IS_ERR(h_dentry))
6223+ goto out;
6224+ if (unlikely(h_dentry->d_inode)) {
6225+ err = -EIO;
027c5e7a
AM
6226+ AuIOErr("%.*s should be negative on b%d.\n",
6227+ AuDLNPair(h_dentry), bindex);
1facf9fc 6228+ dput(h_dentry);
6229+ goto out;
6230+ }
6231+
4a4d8108 6232+ err = 0;
1facf9fc 6233+ if (bindex < au_dbstart(dentry))
6234+ au_set_dbstart(dentry, bindex);
6235+ if (au_dbend(dentry) < bindex)
6236+ au_set_dbend(dentry, bindex);
6237+ au_set_h_dptr(dentry, bindex, h_dentry);
1facf9fc 6238+
4f0767ce 6239+out:
1facf9fc 6240+ dput(parent);
6241+ return err;
6242+}
6243+
6244+/* ---------------------------------------------------------------------- */
6245+
6246+/* subset of struct inode */
6247+struct au_iattr {
6248+ unsigned long i_ino;
6249+ /* unsigned int i_nlink; */
6250+ uid_t i_uid;
6251+ gid_t i_gid;
6252+ u64 i_version;
6253+/*
6254+ loff_t i_size;
6255+ blkcnt_t i_blocks;
6256+*/
6257+ umode_t i_mode;
6258+};
6259+
6260+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
6261+{
6262+ ia->i_ino = h_inode->i_ino;
6263+ /* ia->i_nlink = h_inode->i_nlink; */
6264+ ia->i_uid = h_inode->i_uid;
6265+ ia->i_gid = h_inode->i_gid;
6266+ ia->i_version = h_inode->i_version;
6267+/*
6268+ ia->i_size = h_inode->i_size;
6269+ ia->i_blocks = h_inode->i_blocks;
6270+*/
6271+ ia->i_mode = (h_inode->i_mode & S_IFMT);
6272+}
6273+
6274+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
6275+{
6276+ return ia->i_ino != h_inode->i_ino
6277+ /* || ia->i_nlink != h_inode->i_nlink */
6278+ || ia->i_uid != h_inode->i_uid
6279+ || ia->i_gid != h_inode->i_gid
6280+ || ia->i_version != h_inode->i_version
6281+/*
6282+ || ia->i_size != h_inode->i_size
6283+ || ia->i_blocks != h_inode->i_blocks
6284+*/
6285+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
6286+}
6287+
6288+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
6289+ struct au_branch *br)
6290+{
6291+ int err;
6292+ struct au_iattr ia;
6293+ struct inode *h_inode;
6294+ struct dentry *h_d;
6295+ struct super_block *h_sb;
6296+
6297+ err = 0;
6298+ memset(&ia, -1, sizeof(ia));
6299+ h_sb = h_dentry->d_sb;
6300+ h_inode = h_dentry->d_inode;
6301+ if (h_inode)
6302+ au_iattr_save(&ia, h_inode);
6303+ else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
6304+ /* nfs d_revalidate may return 0 for negative dentry */
6305+ /* fuse d_revalidate always return 0 for negative dentry */
6306+ goto out;
6307+
6308+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
6309+ h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
6310+ err = PTR_ERR(h_d);
6311+ if (IS_ERR(h_d))
6312+ goto out;
6313+
6314+ err = 0;
6315+ if (unlikely(h_d != h_dentry
6316+ || h_d->d_inode != h_inode
6317+ || (h_inode && au_iattr_test(&ia, h_inode))))
6318+ err = au_busy_or_stale();
6319+ dput(h_d);
6320+
4f0767ce 6321+out:
1facf9fc 6322+ AuTraceErr(err);
6323+ return err;
6324+}
6325+
6326+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
6327+ struct dentry *h_parent, struct au_branch *br)
6328+{
6329+ int err;
6330+
6331+ err = 0;
027c5e7a
AM
6332+ if (udba == AuOpt_UDBA_REVAL
6333+ && !au_test_fs_remote(h_dentry->d_sb)) {
1facf9fc 6334+ IMustLock(h_dir);
6335+ err = (h_dentry->d_parent->d_inode != h_dir);
027c5e7a 6336+ } else if (udba != AuOpt_UDBA_NONE)
1facf9fc 6337+ err = au_h_verify_dentry(h_dentry, h_parent, br);
6338+
6339+ return err;
6340+}
6341+
6342+/* ---------------------------------------------------------------------- */
6343+
027c5e7a 6344+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
1facf9fc 6345+{
027c5e7a 6346+ int err;
1facf9fc 6347+ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
027c5e7a
AM
6348+ struct au_hdentry tmp, *p, *q;
6349+ struct au_dinfo *dinfo;
6350+ struct super_block *sb;
1facf9fc 6351+
027c5e7a 6352+ DiMustWriteLock(dentry);
1308ab2a 6353+
027c5e7a
AM
6354+ sb = dentry->d_sb;
6355+ dinfo = au_di(dentry);
1facf9fc 6356+ bend = dinfo->di_bend;
6357+ bwh = dinfo->di_bwh;
6358+ bdiropq = dinfo->di_bdiropq;
027c5e7a 6359+ p = dinfo->di_hdentry + dinfo->di_bstart;
1facf9fc 6360+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
027c5e7a 6361+ if (!p->hd_dentry)
1facf9fc 6362+ continue;
6363+
027c5e7a
AM
6364+ new_bindex = au_br_index(sb, p->hd_id);
6365+ if (new_bindex == bindex)
1facf9fc 6366+ continue;
1facf9fc 6367+
1facf9fc 6368+ if (dinfo->di_bwh == bindex)
6369+ bwh = new_bindex;
6370+ if (dinfo->di_bdiropq == bindex)
6371+ bdiropq = new_bindex;
6372+ if (new_bindex < 0) {
6373+ au_hdput(p);
6374+ p->hd_dentry = NULL;
6375+ continue;
6376+ }
6377+
6378+ /* swap two lower dentries, and loop again */
6379+ q = dinfo->di_hdentry + new_bindex;
6380+ tmp = *q;
6381+ *q = *p;
6382+ *p = tmp;
6383+ if (tmp.hd_dentry) {
6384+ bindex--;
6385+ p--;
6386+ }
6387+ }
6388+
1facf9fc 6389+ dinfo->di_bwh = -1;
6390+ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
6391+ dinfo->di_bwh = bwh;
6392+
6393+ dinfo->di_bdiropq = -1;
6394+ if (bdiropq >= 0
6395+ && bdiropq <= au_sbend(sb)
6396+ && au_sbr_whable(sb, bdiropq))
6397+ dinfo->di_bdiropq = bdiropq;
6398+
027c5e7a
AM
6399+ err = -EIO;
6400+ dinfo->di_bstart = -1;
6401+ dinfo->di_bend = -1;
1facf9fc 6402+ bend = au_dbend(parent);
6403+ p = dinfo->di_hdentry;
6404+ for (bindex = 0; bindex <= bend; bindex++, p++)
6405+ if (p->hd_dentry) {
6406+ dinfo->di_bstart = bindex;
6407+ break;
6408+ }
6409+
027c5e7a
AM
6410+ if (dinfo->di_bstart >= 0) {
6411+ p = dinfo->di_hdentry + bend;
6412+ for (bindex = bend; bindex >= 0; bindex--, p--)
6413+ if (p->hd_dentry) {
6414+ dinfo->di_bend = bindex;
6415+ err = 0;
6416+ break;
6417+ }
6418+ }
6419+
6420+ return err;
1facf9fc 6421+}
6422+
027c5e7a 6423+static void au_do_hide(struct dentry *dentry)
1facf9fc 6424+{
027c5e7a 6425+ struct inode *inode;
1facf9fc 6426+
027c5e7a
AM
6427+ inode = dentry->d_inode;
6428+ if (inode) {
6429+ if (!S_ISDIR(inode->i_mode)) {
6430+ if (inode->i_nlink && !d_unhashed(dentry))
6431+ drop_nlink(inode);
6432+ } else {
6433+ clear_nlink(inode);
6434+ /* stop next lookup */
6435+ inode->i_flags |= S_DEAD;
6436+ }
6437+ smp_mb(); /* necessary? */
6438+ }
6439+ d_drop(dentry);
6440+}
1308ab2a 6441+
027c5e7a
AM
6442+static int au_hide_children(struct dentry *parent)
6443+{
6444+ int err, i, j, ndentry;
6445+ struct au_dcsub_pages dpages;
6446+ struct au_dpage *dpage;
6447+ struct dentry *dentry;
1facf9fc 6448+
027c5e7a 6449+ err = au_dpages_init(&dpages, GFP_NOFS);
1facf9fc 6450+ if (unlikely(err))
6451+ goto out;
027c5e7a
AM
6452+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
6453+ if (unlikely(err))
6454+ goto out_dpages;
1facf9fc 6455+
027c5e7a
AM
6456+ /* in reverse order */
6457+ for (i = dpages.ndpage - 1; i >= 0; i--) {
6458+ dpage = dpages.dpages + i;
6459+ ndentry = dpage->ndentry;
6460+ for (j = ndentry - 1; j >= 0; j--) {
6461+ dentry = dpage->dentries[j];
6462+ if (dentry != parent)
6463+ au_do_hide(dentry);
6464+ }
6465+ }
1facf9fc 6466+
027c5e7a
AM
6467+out_dpages:
6468+ au_dpages_free(&dpages);
4f0767ce 6469+out:
027c5e7a 6470+ return err;
1facf9fc 6471+}
6472+
027c5e7a 6473+static void au_hide(struct dentry *dentry)
1facf9fc 6474+{
027c5e7a
AM
6475+ int err;
6476+ struct inode *inode;
1facf9fc 6477+
027c5e7a
AM
6478+ AuDbgDentry(dentry);
6479+ inode = dentry->d_inode;
6480+ if (inode && S_ISDIR(inode->i_mode)) {
6481+ /* shrink_dcache_parent(dentry); */
6482+ err = au_hide_children(dentry);
6483+ if (unlikely(err))
6484+ AuIOErr("%.*s, failed hiding children, ignored %d\n",
6485+ AuDLNPair(dentry), err);
6486+ }
6487+ au_do_hide(dentry);
6488+}
1facf9fc 6489+
027c5e7a
AM
6490+/*
6491+ * By adding a dirty branch, a cached dentry may be affected in various ways.
6492+ *
6493+ * a dirty branch is added
6494+ * - on the top of layers
6495+ * - in the middle of layers
6496+ * - to the bottom of layers
6497+ *
6498+ * on the added branch there exists
6499+ * - a whiteout
6500+ * - a diropq
6501+ * - a same named entry
6502+ * + exist
6503+ * * negative --> positive
6504+ * * positive --> positive
6505+ * - type is unchanged
6506+ * - type is changed
6507+ * + doesn't exist
6508+ * * negative --> negative
6509+ * * positive --> negative (rejected by au_br_del() for non-dir case)
6510+ * - none
6511+ */
6512+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
6513+ struct au_dinfo *tmp)
6514+{
6515+ int err;
6516+ aufs_bindex_t bindex, bend;
6517+ struct {
6518+ struct dentry *dentry;
6519+ struct inode *inode;
6520+ mode_t mode;
6521+ } orig_h, tmp_h;
6522+ struct au_hdentry *hd;
6523+ struct inode *inode, *h_inode;
6524+ struct dentry *h_dentry;
6525+
6526+ err = 0;
6527+ AuDebugOn(dinfo->di_bstart < 0);
6528+ orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
6529+ orig_h.inode = orig_h.dentry->d_inode;
6530+ orig_h.mode = 0;
6531+ if (orig_h.inode)
6532+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
6533+ memset(&tmp_h, 0, sizeof(tmp_h));
6534+ if (tmp->di_bstart >= 0) {
6535+ tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
6536+ tmp_h.inode = tmp_h.dentry->d_inode;
6537+ if (tmp_h.inode)
6538+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
6539+ }
6540+
6541+ inode = dentry->d_inode;
6542+ if (!orig_h.inode) {
6543+ AuDbg("nagative originally\n");
6544+ if (inode) {
6545+ au_hide(dentry);
6546+ goto out;
6547+ }
6548+ AuDebugOn(inode);
6549+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
6550+ AuDebugOn(dinfo->di_bdiropq != -1);
6551+
6552+ if (!tmp_h.inode) {
6553+ AuDbg("negative --> negative\n");
6554+ /* should have only one negative lower */
6555+ if (tmp->di_bstart >= 0
6556+ && tmp->di_bstart < dinfo->di_bstart) {
6557+ AuDebugOn(tmp->di_bstart != tmp->di_bend);
6558+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
6559+ au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
6560+ au_di_cp(dinfo, tmp);
6561+ hd = tmp->di_hdentry + tmp->di_bstart;
6562+ au_set_h_dptr(dentry, tmp->di_bstart,
6563+ dget(hd->hd_dentry));
6564+ }
6565+ au_dbg_verify_dinode(dentry);
6566+ } else {
6567+ AuDbg("negative --> positive\n");
6568+ /*
6569+ * similar to the behaviour of creating with bypassing
6570+ * aufs.
6571+ * unhash it in order to force an error in the
6572+ * succeeding create operation.
6573+ * we should not set S_DEAD here.
6574+ */
6575+ d_drop(dentry);
6576+ /* au_di_swap(tmp, dinfo); */
6577+ au_dbg_verify_dinode(dentry);
6578+ }
6579+ } else {
6580+ AuDbg("positive originally\n");
6581+ /* inode may be NULL */
6582+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
6583+ if (!tmp_h.inode) {
6584+ AuDbg("positive --> negative\n");
6585+ /* or bypassing aufs */
6586+ au_hide(dentry);
6587+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
6588+ dinfo->di_bwh = tmp->di_bwh;
6589+ if (inode)
6590+ err = au_refresh_hinode_self(inode);
6591+ au_dbg_verify_dinode(dentry);
6592+ } else if (orig_h.mode == tmp_h.mode) {
6593+ AuDbg("positive --> positive, same type\n");
6594+ if (!S_ISDIR(orig_h.mode)
6595+ && dinfo->di_bstart > tmp->di_bstart) {
6596+ /*
6597+ * similar to the behaviour of removing and
6598+ * creating.
6599+ */
6600+ au_hide(dentry);
6601+ if (inode)
6602+ err = au_refresh_hinode_self(inode);
6603+ au_dbg_verify_dinode(dentry);
6604+ } else {
6605+ /* fill empty slots */
6606+ if (dinfo->di_bstart > tmp->di_bstart)
6607+ dinfo->di_bstart = tmp->di_bstart;
6608+ if (dinfo->di_bend < tmp->di_bend)
6609+ dinfo->di_bend = tmp->di_bend;
6610+ dinfo->di_bwh = tmp->di_bwh;
6611+ dinfo->di_bdiropq = tmp->di_bdiropq;
6612+ hd = tmp->di_hdentry;
6613+ bend = dinfo->di_bend;
6614+ for (bindex = tmp->di_bstart; bindex <= bend;
6615+ bindex++) {
6616+ if (au_h_dptr(dentry, bindex))
6617+ continue;
6618+ h_dentry = hd[bindex].hd_dentry;
6619+ if (!h_dentry)
6620+ continue;
6621+ h_inode = h_dentry->d_inode;
6622+ AuDebugOn(!h_inode);
6623+ AuDebugOn(orig_h.mode
6624+ != (h_inode->i_mode
6625+ & S_IFMT));
6626+ au_set_h_dptr(dentry, bindex,
6627+ dget(h_dentry));
6628+ }
6629+ err = au_refresh_hinode(inode, dentry);
6630+ au_dbg_verify_dinode(dentry);
6631+ }
6632+ } else {
6633+ AuDbg("positive --> positive, different type\n");
6634+ /* similar to the behaviour of removing and creating */
6635+ au_hide(dentry);
6636+ if (inode)
6637+ err = au_refresh_hinode_self(inode);
6638+ au_dbg_verify_dinode(dentry);
6639+ }
6640+ }
6641+
6642+out:
6643+ return err;
6644+}
6645+
6646+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
6647+{
6648+ int err, ebrange;
6649+ unsigned int sigen;
6650+ struct au_dinfo *dinfo, *tmp;
6651+ struct super_block *sb;
6652+ struct inode *inode;
6653+
6654+ DiMustWriteLock(dentry);
6655+ AuDebugOn(IS_ROOT(dentry));
6656+ AuDebugOn(!parent->d_inode);
6657+
6658+ sb = dentry->d_sb;
6659+ inode = dentry->d_inode;
6660+ sigen = au_sigen(sb);
6661+ err = au_digen_test(parent, sigen);
6662+ if (unlikely(err))
6663+ goto out;
6664+
6665+ dinfo = au_di(dentry);
6666+ err = au_di_realloc(dinfo, au_sbend(sb) + 1);
6667+ if (unlikely(err))
6668+ goto out;
6669+ ebrange = au_dbrange_test(dentry);
6670+ if (!ebrange)
6671+ ebrange = au_do_refresh_hdentry(dentry, parent);
6672+
6673+ if (d_unhashed(dentry) || ebrange) {
6674+ AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
6675+ if (inode)
6676+ err = au_refresh_hinode_self(inode);
6677+ au_dbg_verify_dinode(dentry);
6678+ if (!err)
6679+ goto out_dgen; /* success */
6680+ goto out;
6681+ }
6682+
6683+ /* temporary dinfo */
6684+ AuDbgDentry(dentry);
6685+ err = -ENOMEM;
6686+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
6687+ if (unlikely(!tmp))
6688+ goto out;
6689+ au_di_swap(tmp, dinfo);
6690+ /* returns the number of positive dentries */
6691+ /*
6692+ * if current working dir is removed, it returns an error.
6693+ * but the dentry is legal.
6694+ */
6695+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0, /*nd*/NULL);
6696+ AuDbgDentry(dentry);
6697+ au_di_swap(tmp, dinfo);
6698+ if (err == -ENOENT)
6699+ err = 0;
6700+ if (err >= 0) {
6701+ /* compare/refresh by dinfo */
6702+ AuDbgDentry(dentry);
6703+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
6704+ au_dbg_verify_dinode(dentry);
6705+ AuTraceErr(err);
6706+ }
6707+ au_rw_write_unlock(&tmp->di_rwsem);
6708+ au_di_free(tmp);
6709+ if (unlikely(err))
6710+ goto out;
6711+
6712+out_dgen:
6713+ au_update_digen(dentry);
6714+out:
6715+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
6716+ AuIOErr("failed refreshing %.*s, %d\n",
6717+ AuDLNPair(dentry), err);
6718+ AuDbgDentry(dentry);
6719+ }
6720+ AuTraceErr(err);
6721+ return err;
6722+}
6723+
6724+static noinline_for_stack
6725+int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
6726+ struct dentry *dentry, aufs_bindex_t bindex)
6727+{
6728+ int err, valid;
6729+ int (*reval)(struct dentry *, struct nameidata *);
6730+
6731+ err = 0;
6732+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
6733+ goto out;
6734+ reval = h_dentry->d_op->d_revalidate;
6735+
6736+ AuDbg("b%d\n", bindex);
6737+ if (au_test_fs_null_nd(h_dentry->d_sb))
6738+ /* it may return tri-state */
6739+ valid = reval(h_dentry, NULL);
6740+ else {
6741+ struct nameidata h_nd;
6742+ int locked;
1facf9fc 6743+ struct dentry *parent;
6744+
6745+ au_h_nd(&h_nd, nd);
6746+ parent = nd->path.dentry;
6747+ locked = (nd && nd->path.dentry != dentry);
6748+ if (locked)
6749+ di_read_lock_parent(parent, AuLock_IR);
6750+ BUG_ON(bindex > au_dbend(parent));
6751+ h_nd.path.dentry = au_h_dptr(parent, bindex);
6752+ BUG_ON(!h_nd.path.dentry);
6753+ h_nd.path.mnt = au_sbr(parent->d_sb, bindex)->br_mnt;
6754+ path_get(&h_nd.path);
6755+ valid = reval(h_dentry, &h_nd);
6756+ path_put(&h_nd.path);
6757+ if (locked)
6758+ di_read_unlock(parent, AuLock_IR);
6759+ }
6760+
6761+ if (unlikely(valid < 0))
6762+ err = valid;
6763+ else if (!valid)
6764+ err = -EINVAL;
6765+
4f0767ce 6766+out:
1facf9fc 6767+ AuTraceErr(err);
6768+ return err;
6769+}
6770+
6771+/* todo: remove this */
6772+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
6773+ struct nameidata *nd, int do_udba)
6774+{
6775+ int err;
6776+ umode_t mode, h_mode;
6777+ aufs_bindex_t bindex, btail, bstart, ibs, ibe;
6778+ unsigned char plus, unhashed, is_root, h_plus;
4a4d8108 6779+ struct inode *h_inode, *h_cached_inode;
1facf9fc 6780+ struct dentry *h_dentry;
6781+ struct qstr *name, *h_name;
6782+
6783+ err = 0;
6784+ plus = 0;
6785+ mode = 0;
1facf9fc 6786+ ibs = -1;
6787+ ibe = -1;
6788+ unhashed = !!d_unhashed(dentry);
6789+ is_root = !!IS_ROOT(dentry);
6790+ name = &dentry->d_name;
6791+
6792+ /*
7f207e10
AM
6793+ * Theoretically, REVAL test should be unnecessary in case of
6794+ * {FS,I}NOTIFY.
6795+ * But {fs,i}notify doesn't fire some necessary events,
1facf9fc 6796+ * IN_ATTRIB for atime/nlink/pageio
6797+ * IN_DELETE for NFS dentry
6798+ * Let's do REVAL test too.
6799+ */
6800+ if (do_udba && inode) {
6801+ mode = (inode->i_mode & S_IFMT);
6802+ plus = (inode->i_nlink > 0);
1facf9fc 6803+ ibs = au_ibstart(inode);
6804+ ibe = au_ibend(inode);
6805+ }
6806+
6807+ bstart = au_dbstart(dentry);
6808+ btail = bstart;
6809+ if (inode && S_ISDIR(inode->i_mode))
6810+ btail = au_dbtaildir(dentry);
6811+ for (bindex = bstart; bindex <= btail; bindex++) {
6812+ h_dentry = au_h_dptr(dentry, bindex);
6813+ if (!h_dentry)
6814+ continue;
6815+
6816+ AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
027c5e7a 6817+ spin_lock(&h_dentry->d_lock);
1facf9fc 6818+ h_name = &h_dentry->d_name;
6819+ if (unlikely(do_udba
6820+ && !is_root
6821+ && (unhashed != !!d_unhashed(h_dentry)
6822+ || name->len != h_name->len
6823+ || memcmp(name->name, h_name->name, name->len))
6824+ )) {
6825+ AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
6826+ unhashed, d_unhashed(h_dentry),
6827+ AuDLNPair(dentry), AuDLNPair(h_dentry));
027c5e7a 6828+ spin_unlock(&h_dentry->d_lock);
1facf9fc 6829+ goto err;
6830+ }
027c5e7a 6831+ spin_unlock(&h_dentry->d_lock);
1facf9fc 6832+
6833+ err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
6834+ if (unlikely(err))
6835+ /* do not goto err, to keep the errno */
6836+ break;
6837+
6838+ /* todo: plink too? */
6839+ if (!do_udba)
6840+ continue;
6841+
6842+ /* UDBA tests */
6843+ h_inode = h_dentry->d_inode;
6844+ if (unlikely(!!inode != !!h_inode))
6845+ goto err;
6846+
6847+ h_plus = plus;
6848+ h_mode = mode;
6849+ h_cached_inode = h_inode;
6850+ if (h_inode) {
6851+ h_mode = (h_inode->i_mode & S_IFMT);
6852+ h_plus = (h_inode->i_nlink > 0);
6853+ }
6854+ if (inode && ibs <= bindex && bindex <= ibe)
6855+ h_cached_inode = au_h_iptr(inode, bindex);
6856+
6857+ if (unlikely(plus != h_plus
6858+ || mode != h_mode
6859+ || h_cached_inode != h_inode))
6860+ goto err;
6861+ continue;
6862+
6863+ err:
6864+ err = -EINVAL;
6865+ break;
6866+ }
6867+
6868+ return err;
6869+}
6870+
027c5e7a 6871+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
1facf9fc 6872+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
6873+{
6874+ int err;
6875+ struct dentry *parent;
1facf9fc 6876+
027c5e7a 6877+ if (!au_digen_test(dentry, sigen))
1facf9fc 6878+ return 0;
6879+
6880+ parent = dget_parent(dentry);
6881+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 6882+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 6883+ au_dbg_verify_gen(parent, sigen);
027c5e7a 6884+ err = au_refresh_dentry(dentry, parent);
1facf9fc 6885+ di_read_unlock(parent, AuLock_IR);
6886+ dput(parent);
027c5e7a 6887+ AuTraceErr(err);
1facf9fc 6888+ return err;
6889+}
6890+
6891+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
6892+{
6893+ int err;
6894+ struct dentry *d, *parent;
6895+ struct inode *inode;
6896+
027c5e7a 6897+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1facf9fc 6898+ return simple_reval_dpath(dentry, sigen);
6899+
6900+ /* slow loop, keep it simple and stupid */
6901+ /* cf: au_cpup_dirs() */
6902+ err = 0;
6903+ parent = NULL;
027c5e7a 6904+ while (au_digen_test(dentry, sigen)) {
1facf9fc 6905+ d = dentry;
6906+ while (1) {
6907+ dput(parent);
6908+ parent = dget_parent(d);
027c5e7a 6909+ if (!au_digen_test(parent, sigen))
1facf9fc 6910+ break;
6911+ d = parent;
6912+ }
6913+
6914+ inode = d->d_inode;
6915+ if (d != dentry)
027c5e7a 6916+ di_write_lock_child2(d);
1facf9fc 6917+
6918+ /* someone might update our dentry while we were sleeping */
027c5e7a
AM
6919+ if (au_digen_test(d, sigen)) {
6920+ /*
6921+ * todo: consolidate with simple_reval_dpath(),
6922+ * do_refresh() and au_reval_for_attr().
6923+ */
1facf9fc 6924+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 6925+ err = au_refresh_dentry(d, parent);
1facf9fc 6926+ di_read_unlock(parent, AuLock_IR);
6927+ }
6928+
6929+ if (d != dentry)
6930+ di_write_unlock(d);
6931+ dput(parent);
6932+ if (unlikely(err))
6933+ break;
6934+ }
6935+
6936+ return err;
6937+}
6938+
6939+/*
6940+ * if valid returns 1, otherwise 0.
6941+ */
6942+static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
6943+{
6944+ int valid, err;
6945+ unsigned int sigen;
6946+ unsigned char do_udba;
6947+ struct super_block *sb;
6948+ struct inode *inode;
6949+
027c5e7a
AM
6950+ /* todo: support rcu-walk? */
6951+ if (nd && (nd->flags & LOOKUP_RCU))
6952+ return -ECHILD;
6953+
6954+ valid = 0;
6955+ if (unlikely(!au_di(dentry)))
6956+ goto out;
6957+
6958+ inode = dentry->d_inode;
6959+ if (inode && is_bad_inode(inode))
6960+ goto out;
6961+
e49829fe 6962+ valid = 1;
1facf9fc 6963+ sb = dentry->d_sb;
e49829fe
JR
6964+ /*
6965+ * todo: very ugly
6966+ * i_mutex of parent dir may be held,
6967+ * but we should not return 'invalid' due to busy.
6968+ */
6969+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
6970+ if (unlikely(err)) {
6971+ valid = err;
027c5e7a 6972+ AuTraceErr(err);
e49829fe
JR
6973+ goto out;
6974+ }
027c5e7a
AM
6975+ if (unlikely(au_dbrange_test(dentry))) {
6976+ err = -EINVAL;
6977+ AuTraceErr(err);
6978+ goto out_dgrade;
1facf9fc 6979+ }
027c5e7a
AM
6980+
6981+ sigen = au_sigen(sb);
6982+ if (au_digen_test(dentry, sigen)) {
1facf9fc 6983+ AuDebugOn(IS_ROOT(dentry));
027c5e7a
AM
6984+ err = au_reval_dpath(dentry, sigen);
6985+ if (unlikely(err)) {
6986+ AuTraceErr(err);
1facf9fc 6987+ goto out_dgrade;
027c5e7a 6988+ }
1facf9fc 6989+ }
6990+ di_downgrade_lock(dentry, AuLock_IR);
6991+
1facf9fc 6992+ err = -EINVAL;
027c5e7a
AM
6993+ if (inode && (IS_DEADDIR(inode) || !inode->i_nlink))
6994+ goto out_inval;
6995+
1facf9fc 6996+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
6997+ if (do_udba && inode) {
6998+ aufs_bindex_t bstart = au_ibstart(inode);
027c5e7a 6999+ struct inode *h_inode;
1facf9fc 7000+
027c5e7a
AM
7001+ if (bstart >= 0) {
7002+ h_inode = au_h_iptr(inode, bstart);
7003+ if (h_inode && au_test_higen(inode, h_inode))
7004+ goto out_inval;
7005+ }
1facf9fc 7006+ }
7007+
7008+ err = h_d_revalidate(dentry, inode, nd, do_udba);
027c5e7a 7009+ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
1facf9fc 7010+ err = -EIO;
027c5e7a
AM
7011+ AuDbg("both of real entry and whiteout found, %.*s, err %d\n",
7012+ AuDLNPair(dentry), err);
7013+ }
e49829fe 7014+ goto out_inval;
1facf9fc 7015+
4f0767ce 7016+out_dgrade:
1facf9fc 7017+ di_downgrade_lock(dentry, AuLock_IR);
e49829fe 7018+out_inval:
1facf9fc 7019+ aufs_read_unlock(dentry, AuLock_IR);
7020+ AuTraceErr(err);
7021+ valid = !err;
e49829fe 7022+out:
027c5e7a 7023+ if (!valid) {
e49829fe 7024+ AuDbg("%.*s invalid, %d\n", AuDLNPair(dentry), valid);
027c5e7a
AM
7025+ d_drop(dentry);
7026+ }
1facf9fc 7027+ return valid;
7028+}
7029+
7030+static void aufs_d_release(struct dentry *dentry)
7031+{
027c5e7a 7032+ if (au_di(dentry)) {
4a4d8108
AM
7033+ au_di_fin(dentry);
7034+ au_hn_di_reinit(dentry);
1facf9fc 7035+ }
1facf9fc 7036+}
7037+
4a4d8108 7038+const struct dentry_operations aufs_dop = {
1facf9fc 7039+ .d_revalidate = aufs_d_revalidate,
7040+ .d_release = aufs_d_release
7041+};
7f207e10
AM
7042diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
7043--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
7044+++ linux/fs/aufs/dentry.h 2012-02-13 21:54:56.969771692 +0100
7045@@ -0,0 +1,237 @@
1facf9fc 7046+/*
f6c5ef8b 7047+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 7048+ *
7049+ * This program, aufs is free software; you can redistribute it and/or modify
7050+ * it under the terms of the GNU General Public License as published by
7051+ * the Free Software Foundation; either version 2 of the License, or
7052+ * (at your option) any later version.
dece6358
AM
7053+ *
7054+ * This program is distributed in the hope that it will be useful,
7055+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7056+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7057+ * GNU General Public License for more details.
7058+ *
7059+ * You should have received a copy of the GNU General Public License
7060+ * along with this program; if not, write to the Free Software
7061+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7062+ */
7063+
7064+/*
7065+ * lookup and dentry operations
7066+ */
7067+
7068+#ifndef __AUFS_DENTRY_H__
7069+#define __AUFS_DENTRY_H__
7070+
7071+#ifdef __KERNEL__
7072+
dece6358 7073+#include <linux/dcache.h>
1facf9fc 7074+#include "rwsem.h"
7075+
1facf9fc 7076+struct au_hdentry {
7077+ struct dentry *hd_dentry;
027c5e7a 7078+ aufs_bindex_t hd_id;
1facf9fc 7079+};
7080+
7081+struct au_dinfo {
7082+ atomic_t di_generation;
7083+
dece6358 7084+ struct au_rwsem di_rwsem;
1facf9fc 7085+ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
7086+ struct au_hdentry *di_hdentry;
4a4d8108 7087+} ____cacheline_aligned_in_smp;
1facf9fc 7088+
7089+/* ---------------------------------------------------------------------- */
7090+
7091+/* dentry.c */
4a4d8108 7092+extern const struct dentry_operations aufs_dop;
1facf9fc 7093+struct au_branch;
7094+struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
7095+ struct au_branch *br, struct nameidata *nd);
7096+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
7097+ struct au_branch *br);
7098+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
7099+ struct dentry *h_parent, struct au_branch *br);
7100+
7101+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
7102+ struct nameidata *nd);
7103+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex);
027c5e7a 7104+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
1facf9fc 7105+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
7106+
7107+/* dinfo.c */
4a4d8108 7108+void au_di_init_once(void *_di);
027c5e7a
AM
7109+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
7110+void au_di_free(struct au_dinfo *dinfo);
7111+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
7112+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
4a4d8108
AM
7113+int au_di_init(struct dentry *dentry);
7114+void au_di_fin(struct dentry *dentry);
1facf9fc 7115+int au_di_realloc(struct au_dinfo *dinfo, int nbr);
7116+
7117+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
7118+void di_read_unlock(struct dentry *d, int flags);
7119+void di_downgrade_lock(struct dentry *d, int flags);
7120+void di_write_lock(struct dentry *d, unsigned int lsc);
7121+void di_write_unlock(struct dentry *d);
7122+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
7123+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
7124+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
7125+
7126+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
2cbb1c4b 7127+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
1facf9fc 7128+aufs_bindex_t au_dbtail(struct dentry *dentry);
7129+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
7130+
7131+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
7132+ struct dentry *h_dentry);
027c5e7a
AM
7133+int au_digen_test(struct dentry *dentry, unsigned int sigen);
7134+int au_dbrange_test(struct dentry *dentry);
1facf9fc 7135+void au_update_digen(struct dentry *dentry);
7136+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
7137+void au_update_dbstart(struct dentry *dentry);
7138+void au_update_dbend(struct dentry *dentry);
7139+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
7140+
7141+/* ---------------------------------------------------------------------- */
7142+
7143+static inline struct au_dinfo *au_di(struct dentry *dentry)
7144+{
7145+ return dentry->d_fsdata;
7146+}
7147+
7148+/* ---------------------------------------------------------------------- */
7149+
7150+/* lock subclass for dinfo */
7151+enum {
7152+ AuLsc_DI_CHILD, /* child first */
4a4d8108 7153+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
1facf9fc 7154+ AuLsc_DI_CHILD3, /* copyup dirs */
7155+ AuLsc_DI_PARENT,
7156+ AuLsc_DI_PARENT2,
027c5e7a
AM
7157+ AuLsc_DI_PARENT3,
7158+ AuLsc_DI_TMP /* temp for replacing dinfo */
1facf9fc 7159+};
7160+
7161+/*
7162+ * di_read_lock_child, di_write_lock_child,
7163+ * di_read_lock_child2, di_write_lock_child2,
7164+ * di_read_lock_child3, di_write_lock_child3,
7165+ * di_read_lock_parent, di_write_lock_parent,
7166+ * di_read_lock_parent2, di_write_lock_parent2,
7167+ * di_read_lock_parent3, di_write_lock_parent3,
7168+ */
7169+#define AuReadLockFunc(name, lsc) \
7170+static inline void di_read_lock_##name(struct dentry *d, int flags) \
7171+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
7172+
7173+#define AuWriteLockFunc(name, lsc) \
7174+static inline void di_write_lock_##name(struct dentry *d) \
7175+{ di_write_lock(d, AuLsc_DI_##lsc); }
7176+
7177+#define AuRWLockFuncs(name, lsc) \
7178+ AuReadLockFunc(name, lsc) \
7179+ AuWriteLockFunc(name, lsc)
7180+
7181+AuRWLockFuncs(child, CHILD);
7182+AuRWLockFuncs(child2, CHILD2);
7183+AuRWLockFuncs(child3, CHILD3);
7184+AuRWLockFuncs(parent, PARENT);
7185+AuRWLockFuncs(parent2, PARENT2);
7186+AuRWLockFuncs(parent3, PARENT3);
7187+
7188+#undef AuReadLockFunc
7189+#undef AuWriteLockFunc
7190+#undef AuRWLockFuncs
7191+
7192+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
7193+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
7194+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 7195+
7196+/* ---------------------------------------------------------------------- */
7197+
7198+/* todo: memory barrier? */
7199+static inline unsigned int au_digen(struct dentry *d)
7200+{
7201+ return atomic_read(&au_di(d)->di_generation);
7202+}
7203+
7204+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
7205+{
7206+ hdentry->hd_dentry = NULL;
7207+}
7208+
7209+static inline void au_hdput(struct au_hdentry *hd)
7210+{
4a4d8108
AM
7211+ if (hd)
7212+ dput(hd->hd_dentry);
1facf9fc 7213+}
7214+
7215+static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
7216+{
1308ab2a 7217+ DiMustAnyLock(dentry);
1facf9fc 7218+ return au_di(dentry)->di_bstart;
7219+}
7220+
7221+static inline aufs_bindex_t au_dbend(struct dentry *dentry)
7222+{
1308ab2a 7223+ DiMustAnyLock(dentry);
1facf9fc 7224+ return au_di(dentry)->di_bend;
7225+}
7226+
7227+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
7228+{
1308ab2a 7229+ DiMustAnyLock(dentry);
1facf9fc 7230+ return au_di(dentry)->di_bwh;
7231+}
7232+
7233+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
7234+{
1308ab2a 7235+ DiMustAnyLock(dentry);
1facf9fc 7236+ return au_di(dentry)->di_bdiropq;
7237+}
7238+
7239+/* todo: hard/soft set? */
7240+static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
7241+{
1308ab2a 7242+ DiMustWriteLock(dentry);
1facf9fc 7243+ au_di(dentry)->di_bstart = bindex;
7244+}
7245+
7246+static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
7247+{
1308ab2a 7248+ DiMustWriteLock(dentry);
1facf9fc 7249+ au_di(dentry)->di_bend = bindex;
7250+}
7251+
7252+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
7253+{
1308ab2a 7254+ DiMustWriteLock(dentry);
1facf9fc 7255+ /* dbwh can be outside of bstart - bend range */
7256+ au_di(dentry)->di_bwh = bindex;
7257+}
7258+
7259+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
7260+{
1308ab2a 7261+ DiMustWriteLock(dentry);
1facf9fc 7262+ au_di(dentry)->di_bdiropq = bindex;
7263+}
7264+
7265+/* ---------------------------------------------------------------------- */
7266+
4a4d8108 7267+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 7268+static inline void au_digen_dec(struct dentry *d)
7269+{
e49829fe 7270+ atomic_dec(&au_di(d)->di_generation);
1facf9fc 7271+}
7272+
4a4d8108 7273+static inline void au_hn_di_reinit(struct dentry *dentry)
1facf9fc 7274+{
7275+ dentry->d_fsdata = NULL;
7276+}
7277+#else
4a4d8108
AM
7278+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
7279+#endif /* CONFIG_AUFS_HNOTIFY */
1facf9fc 7280+
7281+#endif /* __KERNEL__ */
7282+#endif /* __AUFS_DENTRY_H__ */
7f207e10
AM
7283diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
7284--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 7285+++ linux/fs/aufs/dinfo.c 2012-02-13 21:54:56.969771692 +0100
2cbb1c4b 7286@@ -0,0 +1,543 @@
1facf9fc 7287+/*
f6c5ef8b 7288+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 7289+ *
7290+ * This program, aufs is free software; you can redistribute it and/or modify
7291+ * it under the terms of the GNU General Public License as published by
7292+ * the Free Software Foundation; either version 2 of the License, or
7293+ * (at your option) any later version.
dece6358
AM
7294+ *
7295+ * This program is distributed in the hope that it will be useful,
7296+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7297+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7298+ * GNU General Public License for more details.
7299+ *
7300+ * You should have received a copy of the GNU General Public License
7301+ * along with this program; if not, write to the Free Software
7302+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7303+ */
7304+
7305+/*
7306+ * dentry private data
7307+ */
7308+
7309+#include "aufs.h"
7310+
e49829fe 7311+void au_di_init_once(void *_dinfo)
4a4d8108 7312+{
e49829fe
JR
7313+ struct au_dinfo *dinfo = _dinfo;
7314+ static struct lock_class_key aufs_di;
4a4d8108 7315+
e49829fe
JR
7316+ au_rw_init(&dinfo->di_rwsem);
7317+ au_rw_class(&dinfo->di_rwsem, &aufs_di);
4a4d8108
AM
7318+}
7319+
027c5e7a 7320+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
1facf9fc 7321+{
7322+ struct au_dinfo *dinfo;
027c5e7a 7323+ int nbr, i;
1facf9fc 7324+
7325+ dinfo = au_cache_alloc_dinfo();
7326+ if (unlikely(!dinfo))
7327+ goto out;
7328+
1facf9fc 7329+ nbr = au_sbend(sb) + 1;
7330+ if (nbr <= 0)
7331+ nbr = 1;
7332+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
027c5e7a
AM
7333+ if (dinfo->di_hdentry) {
7334+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
7335+ dinfo->di_bstart = -1;
7336+ dinfo->di_bend = -1;
7337+ dinfo->di_bwh = -1;
7338+ dinfo->di_bdiropq = -1;
7339+ for (i = 0; i < nbr; i++)
7340+ dinfo->di_hdentry[i].hd_id = -1;
7341+ goto out;
7342+ }
1facf9fc 7343+
1facf9fc 7344+ au_cache_free_dinfo(dinfo);
027c5e7a
AM
7345+ dinfo = NULL;
7346+
4f0767ce 7347+out:
027c5e7a 7348+ return dinfo;
1facf9fc 7349+}
7350+
027c5e7a 7351+void au_di_free(struct au_dinfo *dinfo)
4a4d8108 7352+{
4a4d8108
AM
7353+ struct au_hdentry *p;
7354+ aufs_bindex_t bend, bindex;
7355+
7356+ /* dentry may not be revalidated */
027c5e7a 7357+ bindex = dinfo->di_bstart;
4a4d8108 7358+ if (bindex >= 0) {
027c5e7a
AM
7359+ bend = dinfo->di_bend;
7360+ p = dinfo->di_hdentry + bindex;
4a4d8108
AM
7361+ while (bindex++ <= bend)
7362+ au_hdput(p++);
7363+ }
027c5e7a
AM
7364+ kfree(dinfo->di_hdentry);
7365+ au_cache_free_dinfo(dinfo);
7366+}
7367+
7368+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
7369+{
7370+ struct au_hdentry *p;
7371+ aufs_bindex_t bi;
7372+
7373+ AuRwMustWriteLock(&a->di_rwsem);
7374+ AuRwMustWriteLock(&b->di_rwsem);
7375+
7376+#define DiSwap(v, name) \
7377+ do { \
7378+ v = a->di_##name; \
7379+ a->di_##name = b->di_##name; \
7380+ b->di_##name = v; \
7381+ } while (0)
7382+
7383+ DiSwap(p, hdentry);
7384+ DiSwap(bi, bstart);
7385+ DiSwap(bi, bend);
7386+ DiSwap(bi, bwh);
7387+ DiSwap(bi, bdiropq);
7388+ /* smp_mb(); */
7389+
7390+#undef DiSwap
7391+}
7392+
7393+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
7394+{
7395+ AuRwMustWriteLock(&dst->di_rwsem);
7396+ AuRwMustWriteLock(&src->di_rwsem);
7397+
7398+ dst->di_bstart = src->di_bstart;
7399+ dst->di_bend = src->di_bend;
7400+ dst->di_bwh = src->di_bwh;
7401+ dst->di_bdiropq = src->di_bdiropq;
7402+ /* smp_mb(); */
7403+}
7404+
7405+int au_di_init(struct dentry *dentry)
7406+{
7407+ int err;
7408+ struct super_block *sb;
7409+ struct au_dinfo *dinfo;
7410+
7411+ err = 0;
7412+ sb = dentry->d_sb;
7413+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
7414+ if (dinfo) {
7415+ atomic_set(&dinfo->di_generation, au_sigen(sb));
7416+ /* smp_mb(); */ /* atomic_set */
7417+ dentry->d_fsdata = dinfo;
7418+ } else
7419+ err = -ENOMEM;
7420+
7421+ return err;
7422+}
7423+
7424+void au_di_fin(struct dentry *dentry)
7425+{
7426+ struct au_dinfo *dinfo;
7427+
7428+ dinfo = au_di(dentry);
7429+ AuRwDestroy(&dinfo->di_rwsem);
7430+ au_di_free(dinfo);
4a4d8108
AM
7431+}
7432+
1facf9fc 7433+int au_di_realloc(struct au_dinfo *dinfo, int nbr)
7434+{
7435+ int err, sz;
7436+ struct au_hdentry *hdp;
7437+
1308ab2a 7438+ AuRwMustWriteLock(&dinfo->di_rwsem);
7439+
1facf9fc 7440+ err = -ENOMEM;
7441+ sz = sizeof(*hdp) * (dinfo->di_bend + 1);
7442+ if (!sz)
7443+ sz = sizeof(*hdp);
7444+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
7445+ if (hdp) {
7446+ dinfo->di_hdentry = hdp;
7447+ err = 0;
7448+ }
7449+
7450+ return err;
7451+}
7452+
7453+/* ---------------------------------------------------------------------- */
7454+
7455+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
7456+{
7457+ switch (lsc) {
7458+ case AuLsc_DI_CHILD:
7459+ ii_write_lock_child(inode);
7460+ break;
7461+ case AuLsc_DI_CHILD2:
7462+ ii_write_lock_child2(inode);
7463+ break;
7464+ case AuLsc_DI_CHILD3:
7465+ ii_write_lock_child3(inode);
7466+ break;
7467+ case AuLsc_DI_PARENT:
7468+ ii_write_lock_parent(inode);
7469+ break;
7470+ case AuLsc_DI_PARENT2:
7471+ ii_write_lock_parent2(inode);
7472+ break;
7473+ case AuLsc_DI_PARENT3:
7474+ ii_write_lock_parent3(inode);
7475+ break;
7476+ default:
7477+ BUG();
7478+ }
7479+}
7480+
7481+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
7482+{
7483+ switch (lsc) {
7484+ case AuLsc_DI_CHILD:
7485+ ii_read_lock_child(inode);
7486+ break;
7487+ case AuLsc_DI_CHILD2:
7488+ ii_read_lock_child2(inode);
7489+ break;
7490+ case AuLsc_DI_CHILD3:
7491+ ii_read_lock_child3(inode);
7492+ break;
7493+ case AuLsc_DI_PARENT:
7494+ ii_read_lock_parent(inode);
7495+ break;
7496+ case AuLsc_DI_PARENT2:
7497+ ii_read_lock_parent2(inode);
7498+ break;
7499+ case AuLsc_DI_PARENT3:
7500+ ii_read_lock_parent3(inode);
7501+ break;
7502+ default:
7503+ BUG();
7504+ }
7505+}
7506+
7507+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
7508+{
dece6358 7509+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 7510+ if (d->d_inode) {
7511+ if (au_ftest_lock(flags, IW))
7512+ do_ii_write_lock(d->d_inode, lsc);
7513+ else if (au_ftest_lock(flags, IR))
7514+ do_ii_read_lock(d->d_inode, lsc);
7515+ }
7516+}
7517+
7518+void di_read_unlock(struct dentry *d, int flags)
7519+{
7520+ if (d->d_inode) {
027c5e7a
AM
7521+ if (au_ftest_lock(flags, IW)) {
7522+ au_dbg_verify_dinode(d);
1facf9fc 7523+ ii_write_unlock(d->d_inode);
027c5e7a
AM
7524+ } else if (au_ftest_lock(flags, IR)) {
7525+ au_dbg_verify_dinode(d);
1facf9fc 7526+ ii_read_unlock(d->d_inode);
027c5e7a 7527+ }
1facf9fc 7528+ }
dece6358 7529+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 7530+}
7531+
7532+void di_downgrade_lock(struct dentry *d, int flags)
7533+{
1facf9fc 7534+ if (d->d_inode && au_ftest_lock(flags, IR))
7535+ ii_downgrade_lock(d->d_inode);
dece6358 7536+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 7537+}
7538+
7539+void di_write_lock(struct dentry *d, unsigned int lsc)
7540+{
dece6358 7541+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 7542+ if (d->d_inode)
7543+ do_ii_write_lock(d->d_inode, lsc);
7544+}
7545+
7546+void di_write_unlock(struct dentry *d)
7547+{
027c5e7a 7548+ au_dbg_verify_dinode(d);
1facf9fc 7549+ if (d->d_inode)
7550+ ii_write_unlock(d->d_inode);
dece6358 7551+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 7552+}
7553+
7554+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
7555+{
7556+ AuDebugOn(d1 == d2
7557+ || d1->d_inode == d2->d_inode
7558+ || d1->d_sb != d2->d_sb);
7559+
7560+ if (isdir && au_test_subdir(d1, d2)) {
7561+ di_write_lock_child(d1);
7562+ di_write_lock_child2(d2);
7563+ } else {
7564+ /* there should be no races */
7565+ di_write_lock_child(d2);
7566+ di_write_lock_child2(d1);
7567+ }
7568+}
7569+
7570+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
7571+{
7572+ AuDebugOn(d1 == d2
7573+ || d1->d_inode == d2->d_inode
7574+ || d1->d_sb != d2->d_sb);
7575+
7576+ if (isdir && au_test_subdir(d1, d2)) {
7577+ di_write_lock_parent(d1);
7578+ di_write_lock_parent2(d2);
7579+ } else {
7580+ /* there should be no races */
7581+ di_write_lock_parent(d2);
7582+ di_write_lock_parent2(d1);
7583+ }
7584+}
7585+
7586+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
7587+{
7588+ di_write_unlock(d1);
7589+ if (d1->d_inode == d2->d_inode)
dece6358 7590+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 7591+ else
7592+ di_write_unlock(d2);
7593+}
7594+
7595+/* ---------------------------------------------------------------------- */
7596+
7597+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
7598+{
7599+ struct dentry *d;
7600+
1308ab2a 7601+ DiMustAnyLock(dentry);
7602+
1facf9fc 7603+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
7604+ return NULL;
7605+ AuDebugOn(bindex < 0);
7606+ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
027c5e7a 7607+ AuDebugOn(d && d->d_count <= 0);
1facf9fc 7608+ return d;
7609+}
7610+
2cbb1c4b
JR
7611+/*
7612+ * extended version of au_h_dptr().
7613+ * returns a hashed and positive h_dentry in bindex, NULL, or error.
7614+ */
7615+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
7616+{
7617+ struct dentry *h_dentry;
7618+ struct inode *inode, *h_inode;
7619+
7620+ inode = dentry->d_inode;
7621+ AuDebugOn(!inode);
7622+
7623+ h_dentry = NULL;
7624+ if (au_dbstart(dentry) <= bindex
7625+ && bindex <= au_dbend(dentry))
7626+ h_dentry = au_h_dptr(dentry, bindex);
7627+ if (h_dentry && !au_d_hashed_positive(h_dentry)) {
7628+ dget(h_dentry);
7629+ goto out; /* success */
7630+ }
7631+
7632+ AuDebugOn(bindex < au_ibstart(inode));
7633+ AuDebugOn(au_ibend(inode) < bindex);
7634+ h_inode = au_h_iptr(inode, bindex);
7635+ h_dentry = d_find_alias(h_inode);
7636+ if (h_dentry) {
7637+ if (!IS_ERR(h_dentry)) {
7638+ if (!au_d_hashed_positive(h_dentry))
7639+ goto out; /* success */
7640+ dput(h_dentry);
7641+ } else
7642+ goto out;
7643+ }
7644+
7645+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
7646+ h_dentry = au_plink_lkup(inode, bindex);
7647+ AuDebugOn(!h_dentry);
7648+ if (!IS_ERR(h_dentry)) {
7649+ if (!au_d_hashed_positive(h_dentry))
7650+ goto out; /* success */
7651+ dput(h_dentry);
7652+ h_dentry = NULL;
7653+ }
7654+ }
7655+
7656+out:
7657+ AuDbgDentry(h_dentry);
7658+ return h_dentry;
7659+}
7660+
1facf9fc 7661+aufs_bindex_t au_dbtail(struct dentry *dentry)
7662+{
7663+ aufs_bindex_t bend, bwh;
7664+
7665+ bend = au_dbend(dentry);
7666+ if (0 <= bend) {
7667+ bwh = au_dbwh(dentry);
7668+ if (!bwh)
7669+ return bwh;
7670+ if (0 < bwh && bwh < bend)
7671+ return bwh - 1;
7672+ }
7673+ return bend;
7674+}
7675+
7676+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
7677+{
7678+ aufs_bindex_t bend, bopq;
7679+
7680+ bend = au_dbtail(dentry);
7681+ if (0 <= bend) {
7682+ bopq = au_dbdiropq(dentry);
7683+ if (0 <= bopq && bopq < bend)
7684+ bend = bopq;
7685+ }
7686+ return bend;
7687+}
7688+
7689+/* ---------------------------------------------------------------------- */
7690+
7691+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
7692+ struct dentry *h_dentry)
7693+{
7694+ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
027c5e7a 7695+ struct au_branch *br;
1facf9fc 7696+
1308ab2a 7697+ DiMustWriteLock(dentry);
7698+
4a4d8108 7699+ au_hdput(hd);
1facf9fc 7700+ hd->hd_dentry = h_dentry;
027c5e7a
AM
7701+ if (h_dentry) {
7702+ br = au_sbr(dentry->d_sb, bindex);
7703+ hd->hd_id = br->br_id;
7704+ }
7705+}
7706+
7707+int au_dbrange_test(struct dentry *dentry)
7708+{
7709+ int err;
7710+ aufs_bindex_t bstart, bend;
7711+
7712+ err = 0;
7713+ bstart = au_dbstart(dentry);
7714+ bend = au_dbend(dentry);
7715+ if (bstart >= 0)
7716+ AuDebugOn(bend < 0 && bstart > bend);
7717+ else {
7718+ err = -EIO;
7719+ AuDebugOn(bend >= 0);
7720+ }
7721+
7722+ return err;
7723+}
7724+
7725+int au_digen_test(struct dentry *dentry, unsigned int sigen)
7726+{
7727+ int err;
7728+
7729+ err = 0;
7730+ if (unlikely(au_digen(dentry) != sigen
7731+ || au_iigen_test(dentry->d_inode, sigen)))
7732+ err = -EIO;
7733+
7734+ return err;
1facf9fc 7735+}
7736+
7737+void au_update_digen(struct dentry *dentry)
7738+{
7739+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
7740+ /* smp_mb(); */ /* atomic_set */
7741+}
7742+
7743+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
7744+{
7745+ struct au_dinfo *dinfo;
7746+ struct dentry *h_d;
4a4d8108 7747+ struct au_hdentry *hdp;
1facf9fc 7748+
1308ab2a 7749+ DiMustWriteLock(dentry);
7750+
1facf9fc 7751+ dinfo = au_di(dentry);
7752+ if (!dinfo || dinfo->di_bstart < 0)
7753+ return;
7754+
4a4d8108 7755+ hdp = dinfo->di_hdentry;
1facf9fc 7756+ if (do_put_zero) {
7757+ aufs_bindex_t bindex, bend;
7758+
7759+ bend = dinfo->di_bend;
7760+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
4a4d8108 7761+ h_d = hdp[0 + bindex].hd_dentry;
1facf9fc 7762+ if (h_d && !h_d->d_inode)
7763+ au_set_h_dptr(dentry, bindex, NULL);
7764+ }
7765+ }
7766+
7767+ dinfo->di_bstart = -1;
7768+ while (++dinfo->di_bstart <= dinfo->di_bend)
4a4d8108 7769+ if (hdp[0 + dinfo->di_bstart].hd_dentry)
1facf9fc 7770+ break;
7771+ if (dinfo->di_bstart > dinfo->di_bend) {
7772+ dinfo->di_bstart = -1;
7773+ dinfo->di_bend = -1;
7774+ return;
7775+ }
7776+
7777+ dinfo->di_bend++;
7778+ while (0 <= --dinfo->di_bend)
4a4d8108 7779+ if (hdp[0 + dinfo->di_bend].hd_dentry)
1facf9fc 7780+ break;
7781+ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
7782+}
7783+
7784+void au_update_dbstart(struct dentry *dentry)
7785+{
7786+ aufs_bindex_t bindex, bend;
7787+ struct dentry *h_dentry;
7788+
7789+ bend = au_dbend(dentry);
7790+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
7791+ h_dentry = au_h_dptr(dentry, bindex);
7792+ if (!h_dentry)
7793+ continue;
7794+ if (h_dentry->d_inode) {
7795+ au_set_dbstart(dentry, bindex);
7796+ return;
7797+ }
7798+ au_set_h_dptr(dentry, bindex, NULL);
7799+ }
7800+}
7801+
7802+void au_update_dbend(struct dentry *dentry)
7803+{
7804+ aufs_bindex_t bindex, bstart;
7805+ struct dentry *h_dentry;
7806+
7807+ bstart = au_dbstart(dentry);
7f207e10 7808+ for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
1facf9fc 7809+ h_dentry = au_h_dptr(dentry, bindex);
7810+ if (!h_dentry)
7811+ continue;
7812+ if (h_dentry->d_inode) {
7813+ au_set_dbend(dentry, bindex);
7814+ return;
7815+ }
7816+ au_set_h_dptr(dentry, bindex, NULL);
7817+ }
7818+}
7819+
7820+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
7821+{
7822+ aufs_bindex_t bindex, bend;
7823+
7824+ bend = au_dbend(dentry);
7825+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
7826+ if (au_h_dptr(dentry, bindex) == h_dentry)
7827+ return bindex;
7828+ return -1;
7829+}
7f207e10
AM
7830diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
7831--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
7832+++ linux/fs/aufs/dir.c 2012-02-13 21:54:56.969771692 +0100
7833@@ -0,0 +1,634 @@
1facf9fc 7834+/*
f6c5ef8b 7835+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 7836+ *
7837+ * This program, aufs is free software; you can redistribute it and/or modify
7838+ * it under the terms of the GNU General Public License as published by
7839+ * the Free Software Foundation; either version 2 of the License, or
7840+ * (at your option) any later version.
dece6358
AM
7841+ *
7842+ * This program is distributed in the hope that it will be useful,
7843+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7844+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7845+ * GNU General Public License for more details.
7846+ *
7847+ * You should have received a copy of the GNU General Public License
7848+ * along with this program; if not, write to the Free Software
7849+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7850+ */
7851+
7852+/*
7853+ * directory operations
7854+ */
7855+
7856+#include <linux/fs_stack.h>
7857+#include "aufs.h"
7858+
7859+void au_add_nlink(struct inode *dir, struct inode *h_dir)
7860+{
9dbd164d
AM
7861+ unsigned int nlink;
7862+
1facf9fc 7863+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
7864+
9dbd164d
AM
7865+ nlink = dir->i_nlink;
7866+ nlink += h_dir->i_nlink - 2;
1facf9fc 7867+ if (h_dir->i_nlink < 2)
9dbd164d
AM
7868+ nlink += 2;
7869+ set_nlink(dir, nlink);
1facf9fc 7870+}
7871+
7872+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
7873+{
9dbd164d
AM
7874+ unsigned int nlink;
7875+
1facf9fc 7876+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
7877+
9dbd164d
AM
7878+ nlink = dir->i_nlink;
7879+ nlink -= h_dir->i_nlink - 2;
1facf9fc 7880+ if (h_dir->i_nlink < 2)
9dbd164d
AM
7881+ nlink -= 2;
7882+ set_nlink(dir, nlink);
1facf9fc 7883+}
7884+
1308ab2a 7885+loff_t au_dir_size(struct file *file, struct dentry *dentry)
7886+{
7887+ loff_t sz;
7888+ aufs_bindex_t bindex, bend;
7889+ struct file *h_file;
7890+ struct dentry *h_dentry;
7891+
7892+ sz = 0;
7893+ if (file) {
7894+ AuDebugOn(!file->f_dentry);
7895+ AuDebugOn(!file->f_dentry->d_inode);
7896+ AuDebugOn(!S_ISDIR(file->f_dentry->d_inode->i_mode));
7897+
4a4d8108 7898+ bend = au_fbend_dir(file);
1308ab2a 7899+ for (bindex = au_fbstart(file);
7900+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
7901+ bindex++) {
4a4d8108 7902+ h_file = au_hf_dir(file, bindex);
1308ab2a 7903+ if (h_file
7904+ && h_file->f_dentry
7905+ && h_file->f_dentry->d_inode)
7906+ sz += i_size_read(h_file->f_dentry->d_inode);
7907+ }
7908+ } else {
7909+ AuDebugOn(!dentry);
7910+ AuDebugOn(!dentry->d_inode);
7911+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
7912+
7913+ bend = au_dbtaildir(dentry);
7914+ for (bindex = au_dbstart(dentry);
7915+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
7916+ bindex++) {
7917+ h_dentry = au_h_dptr(dentry, bindex);
7918+ if (h_dentry && h_dentry->d_inode)
7919+ sz += i_size_read(h_dentry->d_inode);
7920+ }
7921+ }
7922+ if (sz < KMALLOC_MAX_SIZE)
7923+ sz = roundup_pow_of_two(sz);
7924+ if (sz > KMALLOC_MAX_SIZE)
7925+ sz = KMALLOC_MAX_SIZE;
7926+ else if (sz < NAME_MAX) {
7927+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
7928+ sz = AUFS_RDBLK_DEF;
7929+ }
7930+ return sz;
7931+}
7932+
1facf9fc 7933+/* ---------------------------------------------------------------------- */
7934+
7935+static int reopen_dir(struct file *file)
7936+{
7937+ int err;
7938+ unsigned int flags;
7939+ aufs_bindex_t bindex, btail, bstart;
7940+ struct dentry *dentry, *h_dentry;
7941+ struct file *h_file;
7942+
7943+ /* open all lower dirs */
7944+ dentry = file->f_dentry;
7945+ bstart = au_dbstart(dentry);
7946+ for (bindex = au_fbstart(file); bindex < bstart; bindex++)
7947+ au_set_h_fptr(file, bindex, NULL);
7948+ au_set_fbstart(file, bstart);
7949+
7950+ btail = au_dbtaildir(dentry);
4a4d8108 7951+ for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
1facf9fc 7952+ au_set_h_fptr(file, bindex, NULL);
4a4d8108 7953+ au_set_fbend_dir(file, btail);
1facf9fc 7954+
4a4d8108 7955+ flags = vfsub_file_flags(file);
1facf9fc 7956+ for (bindex = bstart; bindex <= btail; bindex++) {
7957+ h_dentry = au_h_dptr(dentry, bindex);
7958+ if (!h_dentry)
7959+ continue;
4a4d8108 7960+ h_file = au_hf_dir(file, bindex);
1facf9fc 7961+ if (h_file)
7962+ continue;
7963+
7964+ h_file = au_h_open(dentry, bindex, flags, file);
7965+ err = PTR_ERR(h_file);
7966+ if (IS_ERR(h_file))
7967+ goto out; /* close all? */
7968+ au_set_h_fptr(file, bindex, h_file);
7969+ }
7970+ au_update_figen(file);
7971+ /* todo: necessary? */
7972+ /* file->f_ra = h_file->f_ra; */
7973+ err = 0;
7974+
4f0767ce 7975+out:
1facf9fc 7976+ return err;
7977+}
7978+
7979+static int do_open_dir(struct file *file, int flags)
7980+{
7981+ int err;
7982+ aufs_bindex_t bindex, btail;
7983+ struct dentry *dentry, *h_dentry;
7984+ struct file *h_file;
7985+
1308ab2a 7986+ FiMustWriteLock(file);
7987+
1facf9fc 7988+ dentry = file->f_dentry;
027c5e7a
AM
7989+ err = au_alive_dir(dentry);
7990+ if (unlikely(err))
7991+ goto out;
7992+
1facf9fc 7993+ file->f_version = dentry->d_inode->i_version;
7994+ bindex = au_dbstart(dentry);
7995+ au_set_fbstart(file, bindex);
7996+ btail = au_dbtaildir(dentry);
4a4d8108 7997+ au_set_fbend_dir(file, btail);
1facf9fc 7998+ for (; !err && bindex <= btail; bindex++) {
7999+ h_dentry = au_h_dptr(dentry, bindex);
8000+ if (!h_dentry)
8001+ continue;
8002+
8003+ h_file = au_h_open(dentry, bindex, flags, file);
8004+ if (IS_ERR(h_file)) {
8005+ err = PTR_ERR(h_file);
8006+ break;
8007+ }
8008+ au_set_h_fptr(file, bindex, h_file);
8009+ }
8010+ au_update_figen(file);
8011+ /* todo: necessary? */
8012+ /* file->f_ra = h_file->f_ra; */
8013+ if (!err)
8014+ return 0; /* success */
8015+
8016+ /* close all */
8017+ for (bindex = au_fbstart(file); bindex <= btail; bindex++)
8018+ au_set_h_fptr(file, bindex, NULL);
8019+ au_set_fbstart(file, -1);
4a4d8108
AM
8020+ au_set_fbend_dir(file, -1);
8021+
027c5e7a 8022+out:
1facf9fc 8023+ return err;
8024+}
8025+
8026+static int aufs_open_dir(struct inode *inode __maybe_unused,
8027+ struct file *file)
8028+{
4a4d8108
AM
8029+ int err;
8030+ struct super_block *sb;
8031+ struct au_fidir *fidir;
8032+
8033+ err = -ENOMEM;
8034+ sb = file->f_dentry->d_sb;
8035+ si_read_lock(sb, AuLock_FLUSH);
e49829fe 8036+ fidir = au_fidir_alloc(sb);
4a4d8108
AM
8037+ if (fidir) {
8038+ err = au_do_open(file, do_open_dir, fidir);
8039+ if (unlikely(err))
8040+ kfree(fidir);
8041+ }
8042+ si_read_unlock(sb);
8043+ return err;
1facf9fc 8044+}
8045+
8046+static int aufs_release_dir(struct inode *inode __maybe_unused,
8047+ struct file *file)
8048+{
8049+ struct au_vdir *vdir_cache;
4a4d8108
AM
8050+ struct au_finfo *finfo;
8051+ struct au_fidir *fidir;
8052+ aufs_bindex_t bindex, bend;
1facf9fc 8053+
4a4d8108
AM
8054+ finfo = au_fi(file);
8055+ fidir = finfo->fi_hdir;
8056+ if (fidir) {
0c5527e5
AM
8057+ /* remove me from sb->s_files */
8058+ file_sb_list_del(file);
8059+
4a4d8108
AM
8060+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
8061+ if (vdir_cache)
8062+ au_vdir_free(vdir_cache);
8063+
8064+ bindex = finfo->fi_btop;
8065+ if (bindex >= 0) {
8066+ /*
8067+ * calls fput() instead of filp_close(),
8068+ * since no dnotify or lock for the lower file.
8069+ */
8070+ bend = fidir->fd_bbot;
8071+ for (; bindex <= bend; bindex++)
8072+ au_set_h_fptr(file, bindex, NULL);
8073+ }
8074+ kfree(fidir);
8075+ finfo->fi_hdir = NULL;
1facf9fc 8076+ }
1facf9fc 8077+ au_finfo_fin(file);
1facf9fc 8078+ return 0;
8079+}
8080+
8081+/* ---------------------------------------------------------------------- */
8082+
4a4d8108
AM
8083+static int au_do_flush_dir(struct file *file, fl_owner_t id)
8084+{
8085+ int err;
8086+ aufs_bindex_t bindex, bend;
8087+ struct file *h_file;
8088+
8089+ err = 0;
8090+ bend = au_fbend_dir(file);
8091+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
8092+ h_file = au_hf_dir(file, bindex);
8093+ if (h_file)
8094+ err = vfsub_flush(h_file, id);
8095+ }
8096+ return err;
8097+}
8098+
8099+static int aufs_flush_dir(struct file *file, fl_owner_t id)
8100+{
8101+ return au_do_flush(file, id, au_do_flush_dir);
8102+}
8103+
8104+/* ---------------------------------------------------------------------- */
8105+
1facf9fc 8106+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
8107+{
8108+ int err;
8109+ aufs_bindex_t bend, bindex;
8110+ struct inode *inode;
8111+ struct super_block *sb;
8112+
8113+ err = 0;
8114+ sb = dentry->d_sb;
8115+ inode = dentry->d_inode;
8116+ IMustLock(inode);
8117+ bend = au_dbend(dentry);
8118+ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
8119+ struct path h_path;
1facf9fc 8120+
8121+ if (au_test_ro(sb, bindex, inode))
8122+ continue;
8123+ h_path.dentry = au_h_dptr(dentry, bindex);
8124+ if (!h_path.dentry)
8125+ continue;
1facf9fc 8126+
1facf9fc 8127+ h_path.mnt = au_sbr_mnt(sb, bindex);
53392da6 8128+ err = vfsub_fsync(NULL, &h_path, datasync);
1facf9fc 8129+ }
8130+
8131+ return err;
8132+}
8133+
8134+static int au_do_fsync_dir(struct file *file, int datasync)
8135+{
8136+ int err;
8137+ aufs_bindex_t bend, bindex;
8138+ struct file *h_file;
8139+ struct super_block *sb;
8140+ struct inode *inode;
1facf9fc 8141+
8142+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
8143+ if (unlikely(err))
8144+ goto out;
8145+
8146+ sb = file->f_dentry->d_sb;
8147+ inode = file->f_dentry->d_inode;
4a4d8108 8148+ bend = au_fbend_dir(file);
1facf9fc 8149+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
4a4d8108 8150+ h_file = au_hf_dir(file, bindex);
1facf9fc 8151+ if (!h_file || au_test_ro(sb, bindex, inode))
8152+ continue;
8153+
53392da6 8154+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
1facf9fc 8155+ }
8156+
4f0767ce 8157+out:
1facf9fc 8158+ return err;
8159+}
8160+
8161+/*
8162+ * @file may be NULL
8163+ */
1e00d052
AM
8164+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
8165+ int datasync)
1facf9fc 8166+{
8167+ int err;
b752ccd1 8168+ struct dentry *dentry;
1facf9fc 8169+ struct super_block *sb;
1e00d052 8170+ struct mutex *mtx;
1facf9fc 8171+
8172+ err = 0;
1e00d052
AM
8173+ dentry = file->f_dentry;
8174+ mtx = &dentry->d_inode->i_mutex;
8175+ mutex_lock(mtx);
1facf9fc 8176+ sb = dentry->d_sb;
8177+ si_noflush_read_lock(sb);
8178+ if (file)
8179+ err = au_do_fsync_dir(file, datasync);
8180+ else {
8181+ di_write_lock_child(dentry);
8182+ err = au_do_fsync_dir_no_file(dentry, datasync);
8183+ }
8184+ au_cpup_attr_timesizes(dentry->d_inode);
8185+ di_write_unlock(dentry);
8186+ if (file)
8187+ fi_write_unlock(file);
8188+
8189+ si_read_unlock(sb);
1e00d052 8190+ mutex_unlock(mtx);
1facf9fc 8191+ return err;
8192+}
8193+
8194+/* ---------------------------------------------------------------------- */
8195+
8196+static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir)
8197+{
8198+ int err;
8199+ struct dentry *dentry;
9dbd164d 8200+ struct inode *inode, *h_inode;
1facf9fc 8201+ struct super_block *sb;
8202+
8203+ dentry = file->f_dentry;
8204+ inode = dentry->d_inode;
8205+ IMustLock(inode);
8206+
8207+ sb = dentry->d_sb;
8208+ si_read_lock(sb, AuLock_FLUSH);
8209+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
8210+ if (unlikely(err))
8211+ goto out;
027c5e7a
AM
8212+ err = au_alive_dir(dentry);
8213+ if (!err)
8214+ err = au_vdir_init(file);
1facf9fc 8215+ di_downgrade_lock(dentry, AuLock_IR);
8216+ if (unlikely(err))
8217+ goto out_unlock;
8218+
9dbd164d 8219+ h_inode = au_h_iptr(inode, au_ibstart(inode));
b752ccd1 8220+ if (!au_test_nfsd()) {
1facf9fc 8221+ err = au_vdir_fill_de(file, dirent, filldir);
9dbd164d 8222+ fsstack_copy_attr_atime(inode, h_inode);
1facf9fc 8223+ } else {
8224+ /*
8225+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
8226+ * encode_fh() and others.
8227+ */
9dbd164d 8228+ atomic_inc(&h_inode->i_count);
1facf9fc 8229+ di_read_unlock(dentry, AuLock_IR);
8230+ si_read_unlock(sb);
1facf9fc 8231+ err = au_vdir_fill_de(file, dirent, filldir);
1facf9fc 8232+ fsstack_copy_attr_atime(inode, h_inode);
8233+ fi_write_unlock(file);
9dbd164d 8234+ iput(h_inode);
1facf9fc 8235+
8236+ AuTraceErr(err);
8237+ return err;
8238+ }
8239+
4f0767ce 8240+out_unlock:
1facf9fc 8241+ di_read_unlock(dentry, AuLock_IR);
8242+ fi_write_unlock(file);
4f0767ce 8243+out:
1facf9fc 8244+ si_read_unlock(sb);
8245+ return err;
8246+}
8247+
8248+/* ---------------------------------------------------------------------- */
8249+
8250+#define AuTestEmpty_WHONLY 1
dece6358
AM
8251+#define AuTestEmpty_CALLED (1 << 1)
8252+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 8253+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7f207e10
AM
8254+#define au_fset_testempty(flags, name) \
8255+ do { (flags) |= AuTestEmpty_##name; } while (0)
8256+#define au_fclr_testempty(flags, name) \
8257+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
1facf9fc 8258+
dece6358
AM
8259+#ifndef CONFIG_AUFS_SHWH
8260+#undef AuTestEmpty_SHWH
8261+#define AuTestEmpty_SHWH 0
8262+#endif
8263+
1facf9fc 8264+struct test_empty_arg {
1308ab2a 8265+ struct au_nhash *whlist;
1facf9fc 8266+ unsigned int flags;
8267+ int err;
8268+ aufs_bindex_t bindex;
8269+};
8270+
8271+static int test_empty_cb(void *__arg, const char *__name, int namelen,
dece6358
AM
8272+ loff_t offset __maybe_unused, u64 ino,
8273+ unsigned int d_type)
1facf9fc 8274+{
8275+ struct test_empty_arg *arg = __arg;
8276+ char *name = (void *)__name;
8277+
8278+ arg->err = 0;
8279+ au_fset_testempty(arg->flags, CALLED);
8280+ /* smp_mb(); */
8281+ if (name[0] == '.'
8282+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
8283+ goto out; /* success */
8284+
8285+ if (namelen <= AUFS_WH_PFX_LEN
8286+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
8287+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 8288+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 8289+ arg->err = -ENOTEMPTY;
8290+ goto out;
8291+ }
8292+
8293+ name += AUFS_WH_PFX_LEN;
8294+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 8295+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 8296+ arg->err = au_nhash_append_wh
1308ab2a 8297+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 8298+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 8299+
4f0767ce 8300+out:
1facf9fc 8301+ /* smp_mb(); */
8302+ AuTraceErr(arg->err);
8303+ return arg->err;
8304+}
8305+
8306+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
8307+{
8308+ int err;
8309+ struct file *h_file;
8310+
8311+ h_file = au_h_open(dentry, arg->bindex,
8312+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
8313+ /*file*/NULL);
8314+ err = PTR_ERR(h_file);
8315+ if (IS_ERR(h_file))
8316+ goto out;
8317+
8318+ err = 0;
8319+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
8320+ && !h_file->f_dentry->d_inode->i_nlink)
8321+ goto out_put;
8322+
8323+ do {
8324+ arg->err = 0;
8325+ au_fclr_testempty(arg->flags, CALLED);
8326+ /* smp_mb(); */
8327+ err = vfsub_readdir(h_file, test_empty_cb, arg);
8328+ if (err >= 0)
8329+ err = arg->err;
8330+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
8331+
4f0767ce 8332+out_put:
1facf9fc 8333+ fput(h_file);
8334+ au_sbr_put(dentry->d_sb, arg->bindex);
4f0767ce 8335+out:
1facf9fc 8336+ return err;
8337+}
8338+
8339+struct do_test_empty_args {
8340+ int *errp;
8341+ struct dentry *dentry;
8342+ struct test_empty_arg *arg;
8343+};
8344+
8345+static void call_do_test_empty(void *args)
8346+{
8347+ struct do_test_empty_args *a = args;
8348+ *a->errp = do_test_empty(a->dentry, a->arg);
8349+}
8350+
8351+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
8352+{
8353+ int err, wkq_err;
8354+ struct dentry *h_dentry;
8355+ struct inode *h_inode;
8356+
8357+ h_dentry = au_h_dptr(dentry, arg->bindex);
8358+ h_inode = h_dentry->d_inode;
53392da6 8359+ /* todo: i_mode changes anytime? */
1facf9fc 8360+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
8361+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
8362+ mutex_unlock(&h_inode->i_mutex);
8363+ if (!err)
8364+ err = do_test_empty(dentry, arg);
8365+ else {
8366+ struct do_test_empty_args args = {
8367+ .errp = &err,
8368+ .dentry = dentry,
8369+ .arg = arg
8370+ };
8371+ unsigned int flags = arg->flags;
8372+
8373+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
8374+ if (unlikely(wkq_err))
8375+ err = wkq_err;
8376+ arg->flags = flags;
8377+ }
8378+
8379+ return err;
8380+}
8381+
8382+int au_test_empty_lower(struct dentry *dentry)
8383+{
8384+ int err;
1308ab2a 8385+ unsigned int rdhash;
1facf9fc 8386+ aufs_bindex_t bindex, bstart, btail;
1308ab2a 8387+ struct au_nhash whlist;
1facf9fc 8388+ struct test_empty_arg arg;
1facf9fc 8389+
dece6358
AM
8390+ SiMustAnyLock(dentry->d_sb);
8391+
1308ab2a 8392+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
8393+ if (!rdhash)
8394+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
8395+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 8396+ if (unlikely(err))
1facf9fc 8397+ goto out;
8398+
1facf9fc 8399+ arg.flags = 0;
1308ab2a 8400+ arg.whlist = &whlist;
8401+ bstart = au_dbstart(dentry);
dece6358
AM
8402+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
8403+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 8404+ arg.bindex = bstart;
8405+ err = do_test_empty(dentry, &arg);
8406+ if (unlikely(err))
8407+ goto out_whlist;
8408+
8409+ au_fset_testempty(arg.flags, WHONLY);
8410+ btail = au_dbtaildir(dentry);
8411+ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
8412+ struct dentry *h_dentry;
8413+
8414+ h_dentry = au_h_dptr(dentry, bindex);
8415+ if (h_dentry && h_dentry->d_inode) {
8416+ arg.bindex = bindex;
8417+ err = do_test_empty(dentry, &arg);
8418+ }
8419+ }
8420+
4f0767ce 8421+out_whlist:
1308ab2a 8422+ au_nhash_wh_free(&whlist);
4f0767ce 8423+out:
1facf9fc 8424+ return err;
8425+}
8426+
8427+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
8428+{
8429+ int err;
8430+ struct test_empty_arg arg;
8431+ aufs_bindex_t bindex, btail;
8432+
8433+ err = 0;
1308ab2a 8434+ arg.whlist = whlist;
1facf9fc 8435+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
8436+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
8437+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 8438+ btail = au_dbtaildir(dentry);
8439+ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
8440+ struct dentry *h_dentry;
8441+
8442+ h_dentry = au_h_dptr(dentry, bindex);
8443+ if (h_dentry && h_dentry->d_inode) {
8444+ arg.bindex = bindex;
8445+ err = sio_test_empty(dentry, &arg);
8446+ }
8447+ }
8448+
8449+ return err;
8450+}
8451+
8452+/* ---------------------------------------------------------------------- */
8453+
8454+const struct file_operations aufs_dir_fop = {
4a4d8108 8455+ .owner = THIS_MODULE,
027c5e7a 8456+ .llseek = default_llseek,
1facf9fc 8457+ .read = generic_read_dir,
8458+ .readdir = aufs_readdir,
8459+ .unlocked_ioctl = aufs_ioctl_dir,
b752ccd1
AM
8460+#ifdef CONFIG_COMPAT
8461+ .compat_ioctl = aufs_compat_ioctl_dir,
8462+#endif
1facf9fc 8463+ .open = aufs_open_dir,
8464+ .release = aufs_release_dir,
4a4d8108 8465+ .flush = aufs_flush_dir,
1facf9fc 8466+ .fsync = aufs_fsync_dir
8467+};
7f207e10
AM
8468diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
8469--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
8470+++ linux/fs/aufs/dir.h 2012-02-13 21:54:56.969771692 +0100
8471@@ -0,0 +1,137 @@
1facf9fc 8472+/*
f6c5ef8b 8473+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 8474+ *
8475+ * This program, aufs is free software; you can redistribute it and/or modify
8476+ * it under the terms of the GNU General Public License as published by
8477+ * the Free Software Foundation; either version 2 of the License, or
8478+ * (at your option) any later version.
dece6358
AM
8479+ *
8480+ * This program is distributed in the hope that it will be useful,
8481+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8482+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8483+ * GNU General Public License for more details.
8484+ *
8485+ * You should have received a copy of the GNU General Public License
8486+ * along with this program; if not, write to the Free Software
8487+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 8488+ */
8489+
8490+/*
8491+ * directory operations
8492+ */
8493+
8494+#ifndef __AUFS_DIR_H__
8495+#define __AUFS_DIR_H__
8496+
8497+#ifdef __KERNEL__
8498+
8499+#include <linux/fs.h>
1facf9fc 8500+
8501+/* ---------------------------------------------------------------------- */
8502+
8503+/* need to be faster and smaller */
8504+
8505+struct au_nhash {
dece6358
AM
8506+ unsigned int nh_num;
8507+ struct hlist_head *nh_head;
1facf9fc 8508+};
8509+
8510+struct au_vdir_destr {
8511+ unsigned char len;
8512+ unsigned char name[0];
8513+} __packed;
8514+
8515+struct au_vdir_dehstr {
8516+ struct hlist_node hash;
8517+ struct au_vdir_destr *str;
4a4d8108 8518+} ____cacheline_aligned_in_smp;
1facf9fc 8519+
8520+struct au_vdir_de {
8521+ ino_t de_ino;
8522+ unsigned char de_type;
8523+ /* caution: packed */
8524+ struct au_vdir_destr de_str;
8525+} __packed;
8526+
8527+struct au_vdir_wh {
8528+ struct hlist_node wh_hash;
dece6358
AM
8529+#ifdef CONFIG_AUFS_SHWH
8530+ ino_t wh_ino;
1facf9fc 8531+ aufs_bindex_t wh_bindex;
dece6358
AM
8532+ unsigned char wh_type;
8533+#else
8534+ aufs_bindex_t wh_bindex;
8535+#endif
8536+ /* caution: packed */
1facf9fc 8537+ struct au_vdir_destr wh_str;
8538+} __packed;
8539+
8540+union au_vdir_deblk_p {
8541+ unsigned char *deblk;
8542+ struct au_vdir_de *de;
8543+};
8544+
8545+struct au_vdir {
8546+ unsigned char **vd_deblk;
8547+ unsigned long vd_nblk;
1facf9fc 8548+ struct {
8549+ unsigned long ul;
8550+ union au_vdir_deblk_p p;
8551+ } vd_last;
8552+
8553+ unsigned long vd_version;
dece6358 8554+ unsigned int vd_deblk_sz;
1facf9fc 8555+ unsigned long vd_jiffy;
4a4d8108 8556+} ____cacheline_aligned_in_smp;
1facf9fc 8557+
8558+/* ---------------------------------------------------------------------- */
8559+
8560+/* dir.c */
8561+extern const struct file_operations aufs_dir_fop;
8562+void au_add_nlink(struct inode *dir, struct inode *h_dir);
8563+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 8564+loff_t au_dir_size(struct file *file, struct dentry *dentry);
1facf9fc 8565+int au_test_empty_lower(struct dentry *dentry);
8566+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
8567+
8568+/* vdir.c */
1308ab2a 8569+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
8570+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
8571+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 8572+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
8573+ int limit);
dece6358
AM
8574+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
8575+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
8576+ unsigned int d_type, aufs_bindex_t bindex,
8577+ unsigned char shwh);
1facf9fc 8578+void au_vdir_free(struct au_vdir *vdir);
8579+int au_vdir_init(struct file *file);
8580+int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir);
8581+
8582+/* ioctl.c */
8583+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
8584+
1308ab2a 8585+#ifdef CONFIG_AUFS_RDU
8586+/* rdu.c */
8587+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
8588+#ifdef CONFIG_COMPAT
8589+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
8590+ unsigned long arg);
8591+#endif
1308ab2a 8592+#else
8593+static inline long au_rdu_ioctl(struct file *file, unsigned int cmd,
8594+ unsigned long arg)
8595+{
8596+ return -EINVAL;
8597+}
b752ccd1
AM
8598+#ifdef CONFIG_COMPAT
8599+static inline long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
8600+ unsigned long arg)
8601+{
8602+ return -EINVAL;
8603+}
8604+#endif
1308ab2a 8605+#endif
8606+
1facf9fc 8607+#endif /* __KERNEL__ */
8608+#endif /* __AUFS_DIR_H__ */
7f207e10
AM
8609diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
8610--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 8611+++ linux/fs/aufs/dynop.c 2012-02-13 21:54:56.969771692 +0100
2cbb1c4b 8612@@ -0,0 +1,377 @@
1facf9fc 8613+/*
f6c5ef8b 8614+ * Copyright (C) 2010-2012 Junjiro R. Okajima
1facf9fc 8615+ *
8616+ * This program, aufs is free software; you can redistribute it and/or modify
8617+ * it under the terms of the GNU General Public License as published by
8618+ * the Free Software Foundation; either version 2 of the License, or
8619+ * (at your option) any later version.
dece6358
AM
8620+ *
8621+ * This program is distributed in the hope that it will be useful,
8622+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8623+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8624+ * GNU General Public License for more details.
8625+ *
8626+ * You should have received a copy of the GNU General Public License
8627+ * along with this program; if not, write to the Free Software
8628+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 8629+ */
8630+
8631+/*
4a4d8108 8632+ * dynamically customizable operations for regular files
1facf9fc 8633+ */
8634+
1facf9fc 8635+#include "aufs.h"
8636+
4a4d8108 8637+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
1facf9fc 8638+
4a4d8108
AM
8639+/*
8640+ * How large will these lists be?
8641+ * Usually just a few elements, 20-30 at most for each, I guess.
8642+ */
8643+static struct au_splhead dynop[AuDyLast];
8644+
8645+static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op)
1facf9fc 8646+{
4a4d8108
AM
8647+ struct au_dykey *key, *tmp;
8648+ struct list_head *head;
1facf9fc 8649+
4a4d8108
AM
8650+ key = NULL;
8651+ head = &spl->head;
8652+ rcu_read_lock();
8653+ list_for_each_entry_rcu(tmp, head, dk_list)
8654+ if (tmp->dk_op.dy_hop == h_op) {
8655+ key = tmp;
8656+ kref_get(&key->dk_kref);
8657+ break;
8658+ }
8659+ rcu_read_unlock();
8660+
8661+ return key;
1facf9fc 8662+}
8663+
4a4d8108 8664+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
1facf9fc 8665+{
4a4d8108
AM
8666+ struct au_dykey **k, *found;
8667+ const void *h_op = key->dk_op.dy_hop;
8668+ int i;
1facf9fc 8669+
4a4d8108
AM
8670+ found = NULL;
8671+ k = br->br_dykey;
8672+ for (i = 0; i < AuBrDynOp; i++)
8673+ if (k[i]) {
8674+ if (k[i]->dk_op.dy_hop == h_op) {
8675+ found = k[i];
8676+ break;
8677+ }
8678+ } else
8679+ break;
8680+ if (!found) {
8681+ spin_lock(&br->br_dykey_lock);
8682+ for (; i < AuBrDynOp; i++)
8683+ if (k[i]) {
8684+ if (k[i]->dk_op.dy_hop == h_op) {
8685+ found = k[i];
8686+ break;
8687+ }
8688+ } else {
8689+ k[i] = key;
8690+ break;
8691+ }
8692+ spin_unlock(&br->br_dykey_lock);
8693+ BUG_ON(i == AuBrDynOp); /* expand the array */
8694+ }
8695+
8696+ return found;
1facf9fc 8697+}
8698+
4a4d8108
AM
8699+/* kref_get() if @key is already added */
8700+static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key)
8701+{
8702+ struct au_dykey *tmp, *found;
8703+ struct list_head *head;
8704+ const void *h_op = key->dk_op.dy_hop;
1facf9fc 8705+
4a4d8108
AM
8706+ found = NULL;
8707+ head = &spl->head;
8708+ spin_lock(&spl->spin);
8709+ list_for_each_entry(tmp, head, dk_list)
8710+ if (tmp->dk_op.dy_hop == h_op) {
8711+ kref_get(&tmp->dk_kref);
8712+ found = tmp;
8713+ break;
8714+ }
8715+ if (!found)
8716+ list_add_rcu(&key->dk_list, head);
8717+ spin_unlock(&spl->spin);
1facf9fc 8718+
4a4d8108
AM
8719+ if (!found)
8720+ DyPrSym(key);
8721+ return found;
8722+}
8723+
8724+static void dy_free_rcu(struct rcu_head *rcu)
1facf9fc 8725+{
4a4d8108
AM
8726+ struct au_dykey *key;
8727+
8728+ key = container_of(rcu, struct au_dykey, dk_rcu);
8729+ DyPrSym(key);
8730+ kfree(key);
1facf9fc 8731+}
8732+
4a4d8108
AM
8733+static void dy_free(struct kref *kref)
8734+{
8735+ struct au_dykey *key;
8736+ struct au_splhead *spl;
1facf9fc 8737+
4a4d8108
AM
8738+ key = container_of(kref, struct au_dykey, dk_kref);
8739+ spl = dynop + key->dk_op.dy_type;
8740+ au_spl_del_rcu(&key->dk_list, spl);
8741+ call_rcu(&key->dk_rcu, dy_free_rcu);
8742+}
8743+
8744+void au_dy_put(struct au_dykey *key)
1facf9fc 8745+{
4a4d8108
AM
8746+ kref_put(&key->dk_kref, dy_free);
8747+}
1facf9fc 8748+
4a4d8108
AM
8749+/* ---------------------------------------------------------------------- */
8750+
8751+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
8752+
8753+#ifdef CONFIG_AUFS_DEBUG
8754+#define DyDbgDeclare(cnt) unsigned int cnt = 0
4f0767ce 8755+#define DyDbgInc(cnt) do { cnt++; } while (0)
4a4d8108
AM
8756+#else
8757+#define DyDbgDeclare(cnt) do {} while (0)
8758+#define DyDbgInc(cnt) do {} while (0)
8759+#endif
8760+
8761+#define DySet(func, dst, src, h_op, h_sb) do { \
8762+ DyDbgInc(cnt); \
8763+ if (h_op->func) { \
8764+ if (src.func) \
8765+ dst.func = src.func; \
8766+ else \
8767+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
8768+ } \
8769+} while (0)
8770+
8771+#define DySetForce(func, dst, src) do { \
8772+ AuDebugOn(!src.func); \
8773+ DyDbgInc(cnt); \
8774+ dst.func = src.func; \
8775+} while (0)
8776+
8777+#define DySetAop(func) \
8778+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
8779+#define DySetAopForce(func) \
8780+ DySetForce(func, dyaop->da_op, aufs_aop)
8781+
8782+static void dy_aop(struct au_dykey *key, const void *h_op,
8783+ struct super_block *h_sb __maybe_unused)
8784+{
8785+ struct au_dyaop *dyaop = (void *)key;
8786+ const struct address_space_operations *h_aop = h_op;
8787+ DyDbgDeclare(cnt);
8788+
8789+ AuDbg("%s\n", au_sbtype(h_sb));
8790+
8791+ DySetAop(writepage);
8792+ DySetAopForce(readpage); /* force */
4a4d8108
AM
8793+ DySetAop(writepages);
8794+ DySetAop(set_page_dirty);
8795+ DySetAop(readpages);
8796+ DySetAop(write_begin);
8797+ DySetAop(write_end);
8798+ DySetAop(bmap);
8799+ DySetAop(invalidatepage);
8800+ DySetAop(releasepage);
027c5e7a 8801+ DySetAop(freepage);
4a4d8108
AM
8802+ /* these two will be changed according to an aufs mount option */
8803+ DySetAop(direct_IO);
8804+ DySetAop(get_xip_mem);
8805+ DySetAop(migratepage);
8806+ DySetAop(launder_page);
8807+ DySetAop(is_partially_uptodate);
8808+ DySetAop(error_remove_page);
8809+
8810+ DyDbgSize(cnt, *h_aop);
8811+ dyaop->da_get_xip_mem = h_aop->get_xip_mem;
8812+}
8813+
4a4d8108
AM
8814+/* ---------------------------------------------------------------------- */
8815+
8816+static void dy_bug(struct kref *kref)
8817+{
8818+ BUG();
8819+}
8820+
8821+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
8822+{
8823+ struct au_dykey *key, *old;
8824+ struct au_splhead *spl;
b752ccd1 8825+ struct op {
4a4d8108 8826+ unsigned int sz;
b752ccd1
AM
8827+ void (*set)(struct au_dykey *key, const void *h_op,
8828+ struct super_block *h_sb __maybe_unused);
8829+ };
8830+ static const struct op a[] = {
4a4d8108
AM
8831+ [AuDy_AOP] = {
8832+ .sz = sizeof(struct au_dyaop),
b752ccd1 8833+ .set = dy_aop
4a4d8108 8834+ }
b752ccd1
AM
8835+ };
8836+ const struct op *p;
4a4d8108
AM
8837+
8838+ spl = dynop + op->dy_type;
8839+ key = dy_gfind_get(spl, op->dy_hop);
8840+ if (key)
8841+ goto out_add; /* success */
8842+
8843+ p = a + op->dy_type;
8844+ key = kzalloc(p->sz, GFP_NOFS);
8845+ if (unlikely(!key)) {
8846+ key = ERR_PTR(-ENOMEM);
8847+ goto out;
8848+ }
8849+
8850+ key->dk_op.dy_hop = op->dy_hop;
8851+ kref_init(&key->dk_kref);
b752ccd1 8852+ p->set(key, op->dy_hop, br->br_mnt->mnt_sb);
4a4d8108
AM
8853+ old = dy_gadd(spl, key);
8854+ if (old) {
8855+ kfree(key);
8856+ key = old;
8857+ }
8858+
8859+out_add:
8860+ old = dy_bradd(br, key);
8861+ if (old)
8862+ /* its ref-count should never be zero here */
8863+ kref_put(&key->dk_kref, dy_bug);
8864+out:
8865+ return key;
8866+}
8867+
8868+/* ---------------------------------------------------------------------- */
8869+/*
8870+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
8871+ * This behaviour is neccessary to return an error from open(O_DIRECT) instead
8872+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
8873+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
8874+ * See the aufs manual in detail.
8875+ *
8876+ * To keep this behaviour, aufs has to set NULL to ->get_xip_mem too, and the
8877+ * performance of fadvise() and madvise() may be affected.
8878+ */
8879+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
8880+{
8881+ if (!do_dx) {
8882+ dyaop->da_op.direct_IO = NULL;
8883+ dyaop->da_op.get_xip_mem = NULL;
8884+ } else {
8885+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
8886+ dyaop->da_op.get_xip_mem = aufs_aop.get_xip_mem;
8887+ if (!dyaop->da_get_xip_mem)
8888+ dyaop->da_op.get_xip_mem = NULL;
8889+ }
8890+}
8891+
8892+static struct au_dyaop *dy_aget(struct au_branch *br,
8893+ const struct address_space_operations *h_aop,
8894+ int do_dx)
8895+{
8896+ struct au_dyaop *dyaop;
8897+ struct au_dynop op;
8898+
8899+ op.dy_type = AuDy_AOP;
8900+ op.dy_haop = h_aop;
8901+ dyaop = (void *)dy_get(&op, br);
8902+ if (IS_ERR(dyaop))
8903+ goto out;
8904+ dy_adx(dyaop, do_dx);
8905+
8906+out:
8907+ return dyaop;
8908+}
8909+
8910+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
8911+ struct inode *h_inode)
8912+{
8913+ int err, do_dx;
8914+ struct super_block *sb;
8915+ struct au_branch *br;
8916+ struct au_dyaop *dyaop;
8917+
8918+ AuDebugOn(!S_ISREG(h_inode->i_mode));
8919+ IiMustWriteLock(inode);
8920+
8921+ sb = inode->i_sb;
8922+ br = au_sbr(sb, bindex);
8923+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
8924+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
8925+ err = PTR_ERR(dyaop);
8926+ if (IS_ERR(dyaop))
8927+ /* unnecessary to call dy_fput() */
8928+ goto out;
8929+
8930+ err = 0;
8931+ inode->i_mapping->a_ops = &dyaop->da_op;
8932+
8933+out:
8934+ return err;
8935+}
8936+
b752ccd1
AM
8937+/*
8938+ * Is it safe to replace a_ops during the inode/file is in operation?
8939+ * Yes, I hope so.
8940+ */
8941+int au_dy_irefresh(struct inode *inode)
8942+{
8943+ int err;
8944+ aufs_bindex_t bstart;
8945+ struct inode *h_inode;
8946+
8947+ err = 0;
8948+ if (S_ISREG(inode->i_mode)) {
8949+ bstart = au_ibstart(inode);
8950+ h_inode = au_h_iptr(inode, bstart);
8951+ err = au_dy_iaop(inode, bstart, h_inode);
8952+ }
8953+ return err;
8954+}
8955+
4a4d8108
AM
8956+void au_dy_arefresh(int do_dx)
8957+{
8958+ struct au_splhead *spl;
8959+ struct list_head *head;
8960+ struct au_dykey *key;
8961+
8962+ spl = dynop + AuDy_AOP;
8963+ head = &spl->head;
8964+ spin_lock(&spl->spin);
8965+ list_for_each_entry(key, head, dk_list)
8966+ dy_adx((void *)key, do_dx);
8967+ spin_unlock(&spl->spin);
8968+}
8969+
4a4d8108
AM
8970+/* ---------------------------------------------------------------------- */
8971+
8972+void __init au_dy_init(void)
8973+{
8974+ int i;
8975+
8976+ /* make sure that 'struct au_dykey *' can be any type */
8977+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
4a4d8108
AM
8978+
8979+ for (i = 0; i < AuDyLast; i++)
8980+ au_spl_init(dynop + i);
8981+}
8982+
8983+void au_dy_fin(void)
8984+{
8985+ int i;
8986+
8987+ for (i = 0; i < AuDyLast; i++)
8988+ WARN_ON(!list_empty(&dynop[i].head));
8989+}
7f207e10
AM
8990diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
8991--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
8992+++ linux/fs/aufs/dynop.h 2012-02-13 21:54:56.969771692 +0100
8993@@ -0,0 +1,76 @@
4a4d8108 8994+/*
f6c5ef8b 8995+ * Copyright (C) 2010-2012 Junjiro R. Okajima
4a4d8108
AM
8996+ *
8997+ * This program, aufs is free software; you can redistribute it and/or modify
8998+ * it under the terms of the GNU General Public License as published by
8999+ * the Free Software Foundation; either version 2 of the License, or
9000+ * (at your option) any later version.
9001+ *
9002+ * This program is distributed in the hope that it will be useful,
9003+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9004+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9005+ * GNU General Public License for more details.
9006+ *
9007+ * You should have received a copy of the GNU General Public License
9008+ * along with this program; if not, write to the Free Software
9009+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
9010+ */
9011+
9012+/*
9013+ * dynamically customizable operations (for regular files only)
9014+ */
9015+
9016+#ifndef __AUFS_DYNOP_H__
9017+#define __AUFS_DYNOP_H__
9018+
9019+#ifdef __KERNEL__
9020+
4a4d8108
AM
9021+#include "inode.h"
9022+
2cbb1c4b 9023+enum {AuDy_AOP, AuDyLast};
4a4d8108
AM
9024+
9025+struct au_dynop {
9026+ int dy_type;
9027+ union {
9028+ const void *dy_hop;
9029+ const struct address_space_operations *dy_haop;
4a4d8108
AM
9030+ };
9031+};
9032+
9033+struct au_dykey {
9034+ union {
9035+ struct list_head dk_list;
9036+ struct rcu_head dk_rcu;
9037+ };
9038+ struct au_dynop dk_op;
9039+
9040+ /*
9041+ * during I am in the branch local array, kref is gotten. when the
9042+ * branch is removed, kref is put.
9043+ */
9044+ struct kref dk_kref;
9045+};
9046+
9047+/* stop unioning since their sizes are very different from each other */
9048+struct au_dyaop {
9049+ struct au_dykey da_key;
9050+ struct address_space_operations da_op; /* not const */
9051+ int (*da_get_xip_mem)(struct address_space *, pgoff_t, int,
9052+ void **, unsigned long *);
9053+};
9054+
4a4d8108
AM
9055+/* ---------------------------------------------------------------------- */
9056+
9057+/* dynop.c */
9058+struct au_branch;
9059+void au_dy_put(struct au_dykey *key);
9060+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
9061+ struct inode *h_inode);
b752ccd1 9062+int au_dy_irefresh(struct inode *inode);
4a4d8108 9063+void au_dy_arefresh(int do_dio);
4a4d8108
AM
9064+
9065+void __init au_dy_init(void);
9066+void au_dy_fin(void);
9067+
4a4d8108
AM
9068+#endif /* __KERNEL__ */
9069+#endif /* __AUFS_DYNOP_H__ */
7f207e10
AM
9070diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
9071--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
9072+++ linux/fs/aufs/export.c 2012-02-13 21:54:56.969771692 +0100
9073@@ -0,0 +1,804 @@
4a4d8108 9074+/*
f6c5ef8b 9075+ * Copyright (C) 2005-2012 Junjiro R. Okajima
4a4d8108
AM
9076+ *
9077+ * This program, aufs is free software; you can redistribute it and/or modify
9078+ * it under the terms of the GNU General Public License as published by
9079+ * the Free Software Foundation; either version 2 of the License, or
9080+ * (at your option) any later version.
9081+ *
9082+ * This program is distributed in the hope that it will be useful,
9083+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9084+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9085+ * GNU General Public License for more details.
9086+ *
9087+ * You should have received a copy of the GNU General Public License
9088+ * along with this program; if not, write to the Free Software
9089+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
9090+ */
9091+
9092+/*
9093+ * export via nfs
9094+ */
9095+
9096+#include <linux/exportfs.h>
4a4d8108
AM
9097+#include <linux/mnt_namespace.h>
9098+#include <linux/namei.h>
9099+#include <linux/nsproxy.h>
9100+#include <linux/random.h>
9101+#include <linux/writeback.h>
9102+#include "aufs.h"
9103+
9104+union conv {
9105+#ifdef CONFIG_AUFS_INO_T_64
9106+ __u32 a[2];
9107+#else
9108+ __u32 a[1];
9109+#endif
9110+ ino_t ino;
9111+};
9112+
9113+static ino_t decode_ino(__u32 *a)
9114+{
9115+ union conv u;
9116+
9117+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
9118+ u.a[0] = a[0];
9119+#ifdef CONFIG_AUFS_INO_T_64
9120+ u.a[1] = a[1];
9121+#endif
9122+ return u.ino;
9123+}
9124+
9125+static void encode_ino(__u32 *a, ino_t ino)
9126+{
9127+ union conv u;
9128+
9129+ u.ino = ino;
9130+ a[0] = u.a[0];
9131+#ifdef CONFIG_AUFS_INO_T_64
9132+ a[1] = u.a[1];
9133+#endif
9134+}
9135+
9136+/* NFS file handle */
9137+enum {
9138+ Fh_br_id,
9139+ Fh_sigen,
9140+#ifdef CONFIG_AUFS_INO_T_64
9141+ /* support 64bit inode number */
9142+ Fh_ino1,
9143+ Fh_ino2,
9144+ Fh_dir_ino1,
9145+ Fh_dir_ino2,
9146+#else
9147+ Fh_ino1,
9148+ Fh_dir_ino1,
9149+#endif
9150+ Fh_igen,
9151+ Fh_h_type,
9152+ Fh_tail,
9153+
9154+ Fh_ino = Fh_ino1,
9155+ Fh_dir_ino = Fh_dir_ino1
9156+};
9157+
9158+static int au_test_anon(struct dentry *dentry)
9159+{
027c5e7a 9160+ /* note: read d_flags without d_lock */
4a4d8108
AM
9161+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
9162+}
9163+
9164+/* ---------------------------------------------------------------------- */
9165+/* inode generation external table */
9166+
b752ccd1 9167+void au_xigen_inc(struct inode *inode)
4a4d8108 9168+{
4a4d8108
AM
9169+ loff_t pos;
9170+ ssize_t sz;
9171+ __u32 igen;
9172+ struct super_block *sb;
9173+ struct au_sbinfo *sbinfo;
9174+
4a4d8108 9175+ sb = inode->i_sb;
b752ccd1 9176+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1facf9fc 9177+
b752ccd1 9178+ sbinfo = au_sbi(sb);
1facf9fc 9179+ pos = inode->i_ino;
9180+ pos *= sizeof(igen);
9181+ igen = inode->i_generation + 1;
1facf9fc 9182+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
9183+ sizeof(igen), &pos);
9184+ if (sz == sizeof(igen))
b752ccd1 9185+ return; /* success */
1facf9fc 9186+
b752ccd1 9187+ if (unlikely(sz >= 0))
1facf9fc 9188+ AuIOErr("xigen error (%zd)\n", sz);
1facf9fc 9189+}
9190+
9191+int au_xigen_new(struct inode *inode)
9192+{
9193+ int err;
9194+ loff_t pos;
9195+ ssize_t sz;
9196+ struct super_block *sb;
9197+ struct au_sbinfo *sbinfo;
9198+ struct file *file;
9199+
9200+ err = 0;
9201+ /* todo: dirty, at mount time */
9202+ if (inode->i_ino == AUFS_ROOT_INO)
9203+ goto out;
9204+ sb = inode->i_sb;
dece6358 9205+ SiMustAnyLock(sb);
1facf9fc 9206+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
9207+ goto out;
9208+
9209+ err = -EFBIG;
9210+ pos = inode->i_ino;
9211+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
9212+ AuIOErr1("too large i%lld\n", pos);
9213+ goto out;
9214+ }
9215+ pos *= sizeof(inode->i_generation);
9216+
9217+ err = 0;
9218+ sbinfo = au_sbi(sb);
9219+ file = sbinfo->si_xigen;
9220+ BUG_ON(!file);
9221+
9222+ if (i_size_read(file->f_dentry->d_inode)
9223+ < pos + sizeof(inode->i_generation)) {
9224+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
9225+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
9226+ sizeof(inode->i_generation), &pos);
9227+ } else
9228+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
9229+ sizeof(inode->i_generation), &pos);
9230+ if (sz == sizeof(inode->i_generation))
9231+ goto out; /* success */
9232+
9233+ err = sz;
9234+ if (unlikely(sz >= 0)) {
9235+ err = -EIO;
9236+ AuIOErr("xigen error (%zd)\n", sz);
9237+ }
9238+
4f0767ce 9239+out:
1facf9fc 9240+ return err;
9241+}
9242+
9243+int au_xigen_set(struct super_block *sb, struct file *base)
9244+{
9245+ int err;
9246+ struct au_sbinfo *sbinfo;
9247+ struct file *file;
9248+
dece6358
AM
9249+ SiMustWriteLock(sb);
9250+
1facf9fc 9251+ sbinfo = au_sbi(sb);
9252+ file = au_xino_create2(base, sbinfo->si_xigen);
9253+ err = PTR_ERR(file);
9254+ if (IS_ERR(file))
9255+ goto out;
9256+ err = 0;
9257+ if (sbinfo->si_xigen)
9258+ fput(sbinfo->si_xigen);
9259+ sbinfo->si_xigen = file;
9260+
4f0767ce 9261+out:
1facf9fc 9262+ return err;
9263+}
9264+
9265+void au_xigen_clr(struct super_block *sb)
9266+{
9267+ struct au_sbinfo *sbinfo;
9268+
dece6358
AM
9269+ SiMustWriteLock(sb);
9270+
1facf9fc 9271+ sbinfo = au_sbi(sb);
9272+ if (sbinfo->si_xigen) {
9273+ fput(sbinfo->si_xigen);
9274+ sbinfo->si_xigen = NULL;
9275+ }
9276+}
9277+
9278+/* ---------------------------------------------------------------------- */
9279+
9280+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
9281+ ino_t dir_ino)
9282+{
9283+ struct dentry *dentry, *d;
9284+ struct inode *inode;
9285+ unsigned int sigen;
9286+
9287+ dentry = NULL;
9288+ inode = ilookup(sb, ino);
9289+ if (!inode)
9290+ goto out;
9291+
9292+ dentry = ERR_PTR(-ESTALE);
9293+ sigen = au_sigen(sb);
9294+ if (unlikely(is_bad_inode(inode)
9295+ || IS_DEADDIR(inode)
9296+ || sigen != au_iigen(inode)))
9297+ goto out_iput;
9298+
9299+ dentry = NULL;
9300+ if (!dir_ino || S_ISDIR(inode->i_mode))
9301+ dentry = d_find_alias(inode);
9302+ else {
027c5e7a
AM
9303+ spin_lock(&inode->i_lock);
9304+ list_for_each_entry(d, &inode->i_dentry, d_alias) {
9305+ spin_lock(&d->d_lock);
1facf9fc 9306+ if (!au_test_anon(d)
9307+ && d->d_parent->d_inode->i_ino == dir_ino) {
027c5e7a
AM
9308+ dentry = dget_dlock(d);
9309+ spin_unlock(&d->d_lock);
1facf9fc 9310+ break;
9311+ }
027c5e7a
AM
9312+ spin_unlock(&d->d_lock);
9313+ }
9314+ spin_unlock(&inode->i_lock);
1facf9fc 9315+ }
027c5e7a 9316+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
2cbb1c4b 9317+ /* need to refresh */
1facf9fc 9318+ dput(dentry);
2cbb1c4b 9319+ dentry = NULL;
1facf9fc 9320+ }
9321+
4f0767ce 9322+out_iput:
1facf9fc 9323+ iput(inode);
4f0767ce 9324+out:
2cbb1c4b 9325+ AuTraceErrPtr(dentry);
1facf9fc 9326+ return dentry;
9327+}
9328+
9329+/* ---------------------------------------------------------------------- */
9330+
9331+/* todo: dirty? */
9332+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
4a4d8108
AM
9333+
9334+struct au_compare_mnt_args {
9335+ /* input */
9336+ struct super_block *sb;
9337+
9338+ /* output */
9339+ struct vfsmount *mnt;
9340+};
9341+
9342+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
9343+{
9344+ struct au_compare_mnt_args *a = arg;
9345+
9346+ if (mnt->mnt_sb != a->sb)
9347+ return 0;
9348+ a->mnt = mntget(mnt);
9349+ return 1;
9350+}
9351+
1facf9fc 9352+static struct vfsmount *au_mnt_get(struct super_block *sb)
9353+{
4a4d8108
AM
9354+ int err;
9355+ struct au_compare_mnt_args args = {
9356+ .sb = sb
9357+ };
1facf9fc 9358+ struct mnt_namespace *ns;
1facf9fc 9359+
0c5527e5 9360+ br_read_lock(vfsmount_lock);
1facf9fc 9361+ /* no get/put ?? */
9362+ AuDebugOn(!current->nsproxy);
9363+ ns = current->nsproxy->mnt_ns;
9364+ AuDebugOn(!ns);
4a4d8108 9365+ err = iterate_mounts(au_compare_mnt, &args, ns->root);
0c5527e5 9366+ br_read_unlock(vfsmount_lock);
4a4d8108
AM
9367+ AuDebugOn(!err);
9368+ AuDebugOn(!args.mnt);
9369+ return args.mnt;
1facf9fc 9370+}
9371+
9372+struct au_nfsd_si_lock {
4a4d8108 9373+ unsigned int sigen;
027c5e7a 9374+ aufs_bindex_t bindex, br_id;
1facf9fc 9375+ unsigned char force_lock;
9376+};
9377+
027c5e7a
AM
9378+static int si_nfsd_read_lock(struct super_block *sb,
9379+ struct au_nfsd_si_lock *nsi_lock)
1facf9fc 9380+{
027c5e7a 9381+ int err;
1facf9fc 9382+ aufs_bindex_t bindex;
9383+
9384+ si_read_lock(sb, AuLock_FLUSH);
9385+
9386+ /* branch id may be wrapped around */
027c5e7a 9387+ err = 0;
1facf9fc 9388+ bindex = au_br_index(sb, nsi_lock->br_id);
9389+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
9390+ goto out; /* success */
9391+
027c5e7a
AM
9392+ err = -ESTALE;
9393+ bindex = -1;
1facf9fc 9394+ if (!nsi_lock->force_lock)
9395+ si_read_unlock(sb);
1facf9fc 9396+
4f0767ce 9397+out:
027c5e7a
AM
9398+ nsi_lock->bindex = bindex;
9399+ return err;
1facf9fc 9400+}
9401+
9402+struct find_name_by_ino {
9403+ int called, found;
9404+ ino_t ino;
9405+ char *name;
9406+ int namelen;
9407+};
9408+
9409+static int
9410+find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset,
9411+ u64 ino, unsigned int d_type)
9412+{
9413+ struct find_name_by_ino *a = arg;
9414+
9415+ a->called++;
9416+ if (a->ino != ino)
9417+ return 0;
9418+
9419+ memcpy(a->name, name, namelen);
9420+ a->namelen = namelen;
9421+ a->found = 1;
9422+ return 1;
9423+}
9424+
9425+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
9426+ struct au_nfsd_si_lock *nsi_lock)
9427+{
9428+ struct dentry *dentry, *parent;
9429+ struct file *file;
9430+ struct inode *dir;
9431+ struct find_name_by_ino arg;
9432+ int err;
9433+
9434+ parent = path->dentry;
9435+ if (nsi_lock)
9436+ si_read_unlock(parent->d_sb);
4a4d8108 9437+ file = vfsub_dentry_open(path, au_dir_roflags);
1facf9fc 9438+ dentry = (void *)file;
9439+ if (IS_ERR(file))
9440+ goto out;
9441+
9442+ dentry = ERR_PTR(-ENOMEM);
4a4d8108 9443+ arg.name = __getname_gfp(GFP_NOFS);
1facf9fc 9444+ if (unlikely(!arg.name))
9445+ goto out_file;
9446+ arg.ino = ino;
9447+ arg.found = 0;
9448+ do {
9449+ arg.called = 0;
9450+ /* smp_mb(); */
9451+ err = vfsub_readdir(file, find_name_by_ino, &arg);
9452+ } while (!err && !arg.found && arg.called);
9453+ dentry = ERR_PTR(err);
9454+ if (unlikely(err))
9455+ goto out_name;
9456+ dentry = ERR_PTR(-ENOENT);
9457+ if (!arg.found)
9458+ goto out_name;
9459+
9460+ /* do not call au_lkup_one() */
9461+ dir = parent->d_inode;
9462+ mutex_lock(&dir->i_mutex);
9463+ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
9464+ mutex_unlock(&dir->i_mutex);
9465+ AuTraceErrPtr(dentry);
9466+ if (IS_ERR(dentry))
9467+ goto out_name;
9468+ AuDebugOn(au_test_anon(dentry));
9469+ if (unlikely(!dentry->d_inode)) {
9470+ dput(dentry);
9471+ dentry = ERR_PTR(-ENOENT);
9472+ }
9473+
4f0767ce 9474+out_name:
1facf9fc 9475+ __putname(arg.name);
4f0767ce 9476+out_file:
1facf9fc 9477+ fput(file);
4f0767ce 9478+out:
1facf9fc 9479+ if (unlikely(nsi_lock
9480+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
9481+ if (!IS_ERR(dentry)) {
9482+ dput(dentry);
9483+ dentry = ERR_PTR(-ESTALE);
9484+ }
9485+ AuTraceErrPtr(dentry);
9486+ return dentry;
9487+}
9488+
9489+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
9490+ ino_t dir_ino,
9491+ struct au_nfsd_si_lock *nsi_lock)
9492+{
9493+ struct dentry *dentry;
9494+ struct path path;
9495+
9496+ if (dir_ino != AUFS_ROOT_INO) {
9497+ path.dentry = decode_by_ino(sb, dir_ino, 0);
9498+ dentry = path.dentry;
9499+ if (!path.dentry || IS_ERR(path.dentry))
9500+ goto out;
9501+ AuDebugOn(au_test_anon(path.dentry));
9502+ } else
9503+ path.dentry = dget(sb->s_root);
9504+
9505+ path.mnt = au_mnt_get(sb);
9506+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
9507+ path_put(&path);
9508+
4f0767ce 9509+out:
1facf9fc 9510+ AuTraceErrPtr(dentry);
9511+ return dentry;
9512+}
9513+
9514+/* ---------------------------------------------------------------------- */
9515+
9516+static int h_acceptable(void *expv, struct dentry *dentry)
9517+{
9518+ return 1;
9519+}
9520+
9521+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
9522+ char *buf, int len, struct super_block *sb)
9523+{
9524+ char *p;
9525+ int n;
9526+ struct path path;
9527+
9528+ p = d_path(h_rootpath, buf, len);
9529+ if (IS_ERR(p))
9530+ goto out;
9531+ n = strlen(p);
9532+
9533+ path.mnt = h_rootpath->mnt;
9534+ path.dentry = h_parent;
9535+ p = d_path(&path, buf, len);
9536+ if (IS_ERR(p))
9537+ goto out;
9538+ if (n != 1)
9539+ p += n;
9540+
9541+ path.mnt = au_mnt_get(sb);
9542+ path.dentry = sb->s_root;
9543+ p = d_path(&path, buf, len - strlen(p));
9544+ mntput(path.mnt);
9545+ if (IS_ERR(p))
9546+ goto out;
9547+ if (n != 1)
9548+ p[strlen(p)] = '/';
9549+
4f0767ce 9550+out:
1facf9fc 9551+ AuTraceErrPtr(p);
9552+ return p;
9553+}
9554+
9555+static
027c5e7a
AM
9556+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
9557+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
1facf9fc 9558+{
9559+ struct dentry *dentry, *h_parent, *root;
9560+ struct super_block *h_sb;
9561+ char *pathname, *p;
9562+ struct vfsmount *h_mnt;
9563+ struct au_branch *br;
9564+ int err;
9565+ struct path path;
9566+
027c5e7a 9567+ br = au_sbr(sb, nsi_lock->bindex);
1facf9fc 9568+ h_mnt = br->br_mnt;
9569+ h_sb = h_mnt->mnt_sb;
9570+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
9571+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
9572+ fh_len - Fh_tail, fh[Fh_h_type],
9573+ h_acceptable, /*context*/NULL);
9574+ dentry = h_parent;
9575+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
9576+ AuWarn1("%s decode_fh failed, %ld\n",
9577+ au_sbtype(h_sb), PTR_ERR(h_parent));
9578+ goto out;
9579+ }
9580+ dentry = NULL;
9581+ if (unlikely(au_test_anon(h_parent))) {
9582+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
9583+ au_sbtype(h_sb));
9584+ goto out_h_parent;
9585+ }
9586+
9587+ dentry = ERR_PTR(-ENOMEM);
9588+ pathname = (void *)__get_free_page(GFP_NOFS);
9589+ if (unlikely(!pathname))
9590+ goto out_h_parent;
9591+
9592+ root = sb->s_root;
9593+ path.mnt = h_mnt;
9594+ di_read_lock_parent(root, !AuLock_IR);
027c5e7a 9595+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
1facf9fc 9596+ di_read_unlock(root, !AuLock_IR);
9597+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
9598+ dentry = (void *)p;
9599+ if (IS_ERR(p))
9600+ goto out_pathname;
9601+
9602+ si_read_unlock(sb);
9603+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
9604+ dentry = ERR_PTR(err);
9605+ if (unlikely(err))
9606+ goto out_relock;
9607+
9608+ dentry = ERR_PTR(-ENOENT);
9609+ AuDebugOn(au_test_anon(path.dentry));
9610+ if (unlikely(!path.dentry->d_inode))
9611+ goto out_path;
9612+
9613+ if (ino != path.dentry->d_inode->i_ino)
9614+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
9615+ else
9616+ dentry = dget(path.dentry);
9617+
4f0767ce 9618+out_path:
1facf9fc 9619+ path_put(&path);
4f0767ce 9620+out_relock:
1facf9fc 9621+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
9622+ if (!IS_ERR(dentry)) {
9623+ dput(dentry);
9624+ dentry = ERR_PTR(-ESTALE);
9625+ }
4f0767ce 9626+out_pathname:
1facf9fc 9627+ free_page((unsigned long)pathname);
4f0767ce 9628+out_h_parent:
1facf9fc 9629+ dput(h_parent);
4f0767ce 9630+out:
1facf9fc 9631+ AuTraceErrPtr(dentry);
9632+ return dentry;
9633+}
9634+
9635+/* ---------------------------------------------------------------------- */
9636+
9637+static struct dentry *
9638+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
9639+ int fh_type)
9640+{
9641+ struct dentry *dentry;
9642+ __u32 *fh = fid->raw;
027c5e7a 9643+ struct au_branch *br;
1facf9fc 9644+ ino_t ino, dir_ino;
1facf9fc 9645+ struct au_nfsd_si_lock nsi_lock = {
1facf9fc 9646+ .force_lock = 0
9647+ };
9648+
1facf9fc 9649+ dentry = ERR_PTR(-ESTALE);
4a4d8108
AM
9650+ /* it should never happen, but the file handle is unreliable */
9651+ if (unlikely(fh_len < Fh_tail))
9652+ goto out;
9653+ nsi_lock.sigen = fh[Fh_sigen];
9654+ nsi_lock.br_id = fh[Fh_br_id];
9655+
1facf9fc 9656+ /* branch id may be wrapped around */
027c5e7a
AM
9657+ br = NULL;
9658+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
1facf9fc 9659+ goto out;
9660+ nsi_lock.force_lock = 1;
9661+
9662+ /* is this inode still cached? */
9663+ ino = decode_ino(fh + Fh_ino);
4a4d8108
AM
9664+ /* it should never happen */
9665+ if (unlikely(ino == AUFS_ROOT_INO))
9666+ goto out;
9667+
1facf9fc 9668+ dir_ino = decode_ino(fh + Fh_dir_ino);
9669+ dentry = decode_by_ino(sb, ino, dir_ino);
9670+ if (IS_ERR(dentry))
9671+ goto out_unlock;
9672+ if (dentry)
9673+ goto accept;
9674+
9675+ /* is the parent dir cached? */
027c5e7a
AM
9676+ br = au_sbr(sb, nsi_lock.bindex);
9677+ atomic_inc(&br->br_count);
1facf9fc 9678+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
9679+ if (IS_ERR(dentry))
9680+ goto out_unlock;
9681+ if (dentry)
9682+ goto accept;
9683+
9684+ /* lookup path */
027c5e7a 9685+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
1facf9fc 9686+ if (IS_ERR(dentry))
9687+ goto out_unlock;
9688+ if (unlikely(!dentry))
9689+ /* todo?: make it ESTALE */
9690+ goto out_unlock;
9691+
4f0767ce 9692+accept:
027c5e7a
AM
9693+ if (!au_digen_test(dentry, au_sigen(sb))
9694+ && dentry->d_inode->i_generation == fh[Fh_igen])
1facf9fc 9695+ goto out_unlock; /* success */
9696+
9697+ dput(dentry);
9698+ dentry = ERR_PTR(-ESTALE);
4f0767ce 9699+out_unlock:
027c5e7a
AM
9700+ if (br)
9701+ atomic_dec(&br->br_count);
1facf9fc 9702+ si_read_unlock(sb);
4f0767ce 9703+out:
1facf9fc 9704+ AuTraceErrPtr(dentry);
9705+ return dentry;
9706+}
9707+
9708+#if 0 /* reserved for future use */
9709+/* support subtreecheck option */
9710+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
9711+ int fh_len, int fh_type)
9712+{
9713+ struct dentry *parent;
9714+ __u32 *fh = fid->raw;
9715+ ino_t dir_ino;
9716+
9717+ dir_ino = decode_ino(fh + Fh_dir_ino);
9718+ parent = decode_by_ino(sb, dir_ino, 0);
9719+ if (IS_ERR(parent))
9720+ goto out;
9721+ if (!parent)
9722+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
9723+ dir_ino, fh, fh_len);
9724+
4f0767ce 9725+out:
1facf9fc 9726+ AuTraceErrPtr(parent);
9727+ return parent;
9728+}
9729+#endif
9730+
9731+/* ---------------------------------------------------------------------- */
9732+
9733+static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
9734+ int connectable)
9735+{
9736+ int err;
9737+ aufs_bindex_t bindex, bend;
9738+ struct super_block *sb, *h_sb;
9739+ struct inode *inode;
9740+ struct dentry *parent, *h_parent;
9741+ struct au_branch *br;
9742+
9743+ AuDebugOn(au_test_anon(dentry));
9744+
9745+ parent = NULL;
9746+ err = -ENOSPC;
9747+ if (unlikely(*max_len <= Fh_tail)) {
9748+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
9749+ goto out;
9750+ }
9751+
9752+ err = FILEID_ROOT;
9753+ if (IS_ROOT(dentry)) {
9754+ AuDebugOn(dentry->d_inode->i_ino != AUFS_ROOT_INO);
9755+ goto out;
9756+ }
9757+
1facf9fc 9758+ h_parent = NULL;
027c5e7a
AM
9759+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_IR | AuLock_GEN);
9760+ if (unlikely(err))
9761+ goto out;
9762+
1facf9fc 9763+ inode = dentry->d_inode;
9764+ AuDebugOn(!inode);
027c5e7a 9765+ sb = dentry->d_sb;
1facf9fc 9766+#ifdef CONFIG_AUFS_DEBUG
9767+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
9768+ AuWarn1("NFS-exporting requires xino\n");
9769+#endif
027c5e7a
AM
9770+ err = -EIO;
9771+ parent = dget_parent(dentry);
9772+ di_read_lock_parent(parent, !AuLock_IR);
1facf9fc 9773+ bend = au_dbtaildir(parent);
9774+ for (bindex = au_dbstart(parent); bindex <= bend; bindex++) {
9775+ h_parent = au_h_dptr(parent, bindex);
9776+ if (h_parent) {
9777+ dget(h_parent);
9778+ break;
9779+ }
9780+ }
9781+ if (unlikely(!h_parent))
9782+ goto out_unlock;
9783+
9784+ err = -EPERM;
9785+ br = au_sbr(sb, bindex);
9786+ h_sb = br->br_mnt->mnt_sb;
9787+ if (unlikely(!h_sb->s_export_op)) {
9788+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
9789+ goto out_dput;
9790+ }
9791+
9792+ fh[Fh_br_id] = br->br_id;
9793+ fh[Fh_sigen] = au_sigen(sb);
9794+ encode_ino(fh + Fh_ino, inode->i_ino);
9795+ encode_ino(fh + Fh_dir_ino, parent->d_inode->i_ino);
9796+ fh[Fh_igen] = inode->i_generation;
9797+
9798+ *max_len -= Fh_tail;
9799+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
9800+ max_len,
9801+ /*connectable or subtreecheck*/0);
9802+ err = fh[Fh_h_type];
9803+ *max_len += Fh_tail;
9804+ /* todo: macros? */
9805+ if (err != 255)
9806+ err = 99;
9807+ else
9808+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
9809+
4f0767ce 9810+out_dput:
1facf9fc 9811+ dput(h_parent);
4f0767ce 9812+out_unlock:
1facf9fc 9813+ di_read_unlock(parent, !AuLock_IR);
9814+ dput(parent);
9815+ aufs_read_unlock(dentry, AuLock_IR);
4f0767ce 9816+out:
1facf9fc 9817+ if (unlikely(err < 0))
9818+ err = 255;
9819+ return err;
9820+}
9821+
9822+/* ---------------------------------------------------------------------- */
9823+
4a4d8108
AM
9824+static int aufs_commit_metadata(struct inode *inode)
9825+{
9826+ int err;
9827+ aufs_bindex_t bindex;
9828+ struct super_block *sb;
9829+ struct inode *h_inode;
9830+ int (*f)(struct inode *inode);
9831+
9832+ sb = inode->i_sb;
e49829fe 9833+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
9834+ ii_write_lock_child(inode);
9835+ bindex = au_ibstart(inode);
9836+ AuDebugOn(bindex < 0);
9837+ h_inode = au_h_iptr(inode, bindex);
9838+
9839+ f = h_inode->i_sb->s_export_op->commit_metadata;
9840+ if (f)
9841+ err = f(h_inode);
9842+ else {
9843+ struct writeback_control wbc = {
9844+ .sync_mode = WB_SYNC_ALL,
9845+ .nr_to_write = 0 /* metadata only */
9846+ };
9847+
9848+ err = sync_inode(h_inode, &wbc);
9849+ }
9850+
9851+ au_cpup_attr_timesizes(inode);
9852+ ii_write_unlock(inode);
9853+ si_read_unlock(sb);
9854+ return err;
9855+}
9856+
9857+/* ---------------------------------------------------------------------- */
9858+
1facf9fc 9859+static struct export_operations aufs_export_op = {
4a4d8108 9860+ .fh_to_dentry = aufs_fh_to_dentry,
1facf9fc 9861+ /* .fh_to_parent = aufs_fh_to_parent, */
4a4d8108
AM
9862+ .encode_fh = aufs_encode_fh,
9863+ .commit_metadata = aufs_commit_metadata
1facf9fc 9864+};
9865+
9866+void au_export_init(struct super_block *sb)
9867+{
9868+ struct au_sbinfo *sbinfo;
9869+ __u32 u;
9870+
9871+ sb->s_export_op = &aufs_export_op;
9872+ sbinfo = au_sbi(sb);
9873+ sbinfo->si_xigen = NULL;
9874+ get_random_bytes(&u, sizeof(u));
9875+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
9876+ atomic_set(&sbinfo->si_xigen_next, u);
9877+}
7f207e10
AM
9878diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
9879--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
9880+++ linux/fs/aufs/file.c 2012-02-13 21:54:56.969771692 +0100
9881@@ -0,0 +1,673 @@
1facf9fc 9882+/*
f6c5ef8b 9883+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 9884+ *
9885+ * This program, aufs is free software; you can redistribute it and/or modify
9886+ * it under the terms of the GNU General Public License as published by
9887+ * the Free Software Foundation; either version 2 of the License, or
9888+ * (at your option) any later version.
dece6358
AM
9889+ *
9890+ * This program is distributed in the hope that it will be useful,
9891+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9892+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9893+ * GNU General Public License for more details.
9894+ *
9895+ * You should have received a copy of the GNU General Public License
9896+ * along with this program; if not, write to the Free Software
9897+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 9898+ */
9899+
9900+/*
4a4d8108 9901+ * handling file/dir, and address_space operation
1facf9fc 9902+ */
9903+
4a4d8108 9904+#include <linux/pagemap.h>
1facf9fc 9905+#include "aufs.h"
9906+
4a4d8108
AM
9907+/* drop flags for writing */
9908+unsigned int au_file_roflags(unsigned int flags)
9909+{
9910+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
9911+ flags |= O_RDONLY | O_NOATIME;
9912+ return flags;
9913+}
9914+
9915+/* common functions to regular file and dir */
9916+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
9917+ struct file *file)
1facf9fc 9918+{
1308ab2a 9919+ struct file *h_file;
4a4d8108
AM
9920+ struct dentry *h_dentry;
9921+ struct inode *h_inode;
9922+ struct super_block *sb;
9923+ struct au_branch *br;
9924+ struct path h_path;
9925+ int err, exec_flag;
1facf9fc 9926+
4a4d8108
AM
9927+ /* a race condition can happen between open and unlink/rmdir */
9928+ h_file = ERR_PTR(-ENOENT);
9929+ h_dentry = au_h_dptr(dentry, bindex);
b752ccd1 9930+ if (au_test_nfsd() && !h_dentry)
4a4d8108
AM
9931+ goto out;
9932+ h_inode = h_dentry->d_inode;
b752ccd1 9933+ if (au_test_nfsd() && !h_inode)
4a4d8108 9934+ goto out;
027c5e7a
AM
9935+ spin_lock(&h_dentry->d_lock);
9936+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
9937+ || !h_inode
9938+ /* || !dentry->d_inode->i_nlink */
9939+ ;
9940+ spin_unlock(&h_dentry->d_lock);
9941+ if (unlikely(err))
4a4d8108 9942+ goto out;
1facf9fc 9943+
4a4d8108
AM
9944+ sb = dentry->d_sb;
9945+ br = au_sbr(sb, bindex);
9946+ h_file = ERR_PTR(-EACCES);
2cbb1c4b 9947+ exec_flag = flags & __FMODE_EXEC;
4a4d8108 9948+ if (exec_flag && (br->br_mnt->mnt_flags & MNT_NOEXEC))
027c5e7a 9949+ goto out;
1facf9fc 9950+
4a4d8108
AM
9951+ /* drop flags for writing */
9952+ if (au_test_ro(sb, bindex, dentry->d_inode))
9953+ flags = au_file_roflags(flags);
9954+ flags &= ~O_CREAT;
9955+ atomic_inc(&br->br_count);
9956+ h_path.dentry = h_dentry;
9957+ h_path.mnt = br->br_mnt;
9958+ if (!au_special_file(h_inode->i_mode))
9959+ h_file = vfsub_dentry_open(&h_path, flags);
9960+ else {
9961+ /* this block depends upon the configuration */
9962+ di_read_unlock(dentry, AuLock_IR);
9963+ fi_write_unlock(file);
9964+ si_read_unlock(sb);
9965+ h_file = vfsub_dentry_open(&h_path, flags);
9966+ si_noflush_read_lock(sb);
9967+ fi_write_lock(file);
9968+ di_read_lock_child(dentry, AuLock_IR);
dece6358 9969+ }
4a4d8108
AM
9970+ if (IS_ERR(h_file))
9971+ goto out_br;
dece6358 9972+
4a4d8108
AM
9973+ if (exec_flag) {
9974+ err = deny_write_access(h_file);
9975+ if (unlikely(err)) {
9976+ fput(h_file);
9977+ h_file = ERR_PTR(err);
9978+ goto out_br;
9979+ }
9980+ }
953406b4 9981+ fsnotify_open(h_file);
4a4d8108 9982+ goto out; /* success */
1facf9fc 9983+
4f0767ce 9984+out_br:
4a4d8108 9985+ atomic_dec(&br->br_count);
4f0767ce 9986+out:
4a4d8108
AM
9987+ return h_file;
9988+}
1308ab2a 9989+
4a4d8108
AM
9990+int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
9991+ struct au_fidir *fidir)
1facf9fc 9992+{
dece6358 9993+ int err;
1facf9fc 9994+ struct dentry *dentry;
1308ab2a 9995+
4a4d8108
AM
9996+ err = au_finfo_init(file, fidir);
9997+ if (unlikely(err))
9998+ goto out;
1facf9fc 9999+
10000+ dentry = file->f_dentry;
4a4d8108
AM
10001+ di_read_lock_child(dentry, AuLock_IR);
10002+ err = open(file, vfsub_file_flags(file));
10003+ di_read_unlock(dentry, AuLock_IR);
1facf9fc 10004+
4a4d8108
AM
10005+ fi_write_unlock(file);
10006+ if (unlikely(err)) {
10007+ au_fi(file)->fi_hdir = NULL;
10008+ au_finfo_fin(file);
1308ab2a 10009+ }
4a4d8108 10010+
4f0767ce 10011+out:
1308ab2a 10012+ return err;
10013+}
dece6358 10014+
4a4d8108 10015+int au_reopen_nondir(struct file *file)
1308ab2a 10016+{
4a4d8108
AM
10017+ int err;
10018+ aufs_bindex_t bstart;
10019+ struct dentry *dentry;
10020+ struct file *h_file, *h_file_tmp;
1308ab2a 10021+
4a4d8108
AM
10022+ dentry = file->f_dentry;
10023+ AuDebugOn(au_special_file(dentry->d_inode->i_mode));
10024+ bstart = au_dbstart(dentry);
10025+ h_file_tmp = NULL;
10026+ if (au_fbstart(file) == bstart) {
10027+ h_file = au_hf_top(file);
10028+ if (file->f_mode == h_file->f_mode)
10029+ return 0; /* success */
10030+ h_file_tmp = h_file;
10031+ get_file(h_file_tmp);
10032+ au_set_h_fptr(file, bstart, NULL);
10033+ }
10034+ AuDebugOn(au_fi(file)->fi_hdir);
10035+ AuDebugOn(au_fbstart(file) < bstart);
1308ab2a 10036+
4a4d8108
AM
10037+ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
10038+ file);
10039+ err = PTR_ERR(h_file);
10040+ if (IS_ERR(h_file))
10041+ goto out; /* todo: close all? */
10042+
10043+ err = 0;
10044+ au_set_fbstart(file, bstart);
10045+ au_set_h_fptr(file, bstart, h_file);
10046+ au_update_figen(file);
10047+ /* todo: necessary? */
10048+ /* file->f_ra = h_file->f_ra; */
10049+
4f0767ce 10050+out:
4a4d8108
AM
10051+ if (h_file_tmp)
10052+ fput(h_file_tmp);
10053+ return err;
1facf9fc 10054+}
10055+
1308ab2a 10056+/* ---------------------------------------------------------------------- */
10057+
4a4d8108
AM
10058+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
10059+ struct dentry *hi_wh)
1facf9fc 10060+{
4a4d8108
AM
10061+ int err;
10062+ aufs_bindex_t bstart;
10063+ struct au_dinfo *dinfo;
10064+ struct dentry *h_dentry;
10065+ struct au_hdentry *hdp;
1facf9fc 10066+
4a4d8108
AM
10067+ dinfo = au_di(file->f_dentry);
10068+ AuRwMustWriteLock(&dinfo->di_rwsem);
dece6358 10069+
4a4d8108
AM
10070+ bstart = dinfo->di_bstart;
10071+ dinfo->di_bstart = btgt;
10072+ hdp = dinfo->di_hdentry;
10073+ h_dentry = hdp[0 + btgt].hd_dentry;
10074+ hdp[0 + btgt].hd_dentry = hi_wh;
10075+ err = au_reopen_nondir(file);
10076+ hdp[0 + btgt].hd_dentry = h_dentry;
10077+ dinfo->di_bstart = bstart;
1facf9fc 10078+
1facf9fc 10079+ return err;
10080+}
10081+
4a4d8108
AM
10082+static int au_ready_to_write_wh(struct file *file, loff_t len,
10083+ aufs_bindex_t bcpup)
1facf9fc 10084+{
4a4d8108 10085+ int err;
027c5e7a
AM
10086+ struct inode *inode, *h_inode;
10087+ struct dentry *dentry, *h_dentry, *hi_wh;
1facf9fc 10088+
dece6358 10089+ dentry = file->f_dentry;
4a4d8108 10090+ au_update_dbstart(dentry);
dece6358 10091+ inode = dentry->d_inode;
027c5e7a
AM
10092+ h_inode = NULL;
10093+ if (au_dbstart(dentry) <= bcpup && au_dbend(dentry) >= bcpup) {
10094+ h_dentry = au_h_dptr(dentry, bcpup);
10095+ if (h_dentry)
10096+ h_inode = h_dentry->d_inode;
10097+ }
4a4d8108 10098+ hi_wh = au_hi_wh(inode, bcpup);
027c5e7a 10099+ if (!hi_wh && !h_inode)
4a4d8108
AM
10100+ err = au_sio_cpup_wh(dentry, bcpup, len, file);
10101+ else
10102+ /* already copied-up after unlink */
10103+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 10104+
4a4d8108
AM
10105+ if (!err
10106+ && inode->i_nlink > 1
10107+ && au_opt_test(au_mntflags(dentry->d_sb), PLINK))
10108+ au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup));
1308ab2a 10109+
dece6358 10110+ return err;
1facf9fc 10111+}
10112+
4a4d8108
AM
10113+/*
10114+ * prepare the @file for writing.
10115+ */
10116+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
1facf9fc 10117+{
4a4d8108 10118+ int err;
027c5e7a 10119+ aufs_bindex_t bstart, bcpup, dbstart;
4a4d8108
AM
10120+ struct dentry *dentry, *parent, *h_dentry;
10121+ struct inode *h_inode, *inode;
1facf9fc 10122+ struct super_block *sb;
4a4d8108 10123+ struct file *h_file;
1facf9fc 10124+
10125+ dentry = file->f_dentry;
1facf9fc 10126+ sb = dentry->d_sb;
4a4d8108
AM
10127+ inode = dentry->d_inode;
10128+ AuDebugOn(au_special_file(inode->i_mode));
10129+ bstart = au_fbstart(file);
10130+ err = au_test_ro(sb, bstart, inode);
10131+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
10132+ err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0);
1facf9fc 10133+ goto out;
4a4d8108 10134+ }
1facf9fc 10135+
027c5e7a 10136+ /* need to cpup or reopen */
4a4d8108
AM
10137+ parent = dget_parent(dentry);
10138+ di_write_lock_parent(parent);
10139+ err = AuWbrCopyup(au_sbi(sb), dentry);
10140+ bcpup = err;
10141+ if (unlikely(err < 0))
10142+ goto out_dgrade;
10143+ err = 0;
10144+
027c5e7a 10145+ if (!d_unhashed(dentry) && !au_h_dptr(parent, bcpup)) {
4a4d8108 10146+ err = au_cpup_dirs(dentry, bcpup);
1facf9fc 10147+ if (unlikely(err))
4a4d8108
AM
10148+ goto out_dgrade;
10149+ }
10150+
10151+ err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE,
10152+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
10153+ if (unlikely(err))
10154+ goto out_dgrade;
10155+
10156+ h_dentry = au_hf_top(file)->f_dentry;
10157+ h_inode = h_dentry->d_inode;
027c5e7a
AM
10158+ dbstart = au_dbstart(dentry);
10159+ if (dbstart <= bcpup) {
10160+ h_dentry = au_h_dptr(dentry, bcpup);
10161+ AuDebugOn(!h_dentry);
10162+ h_inode = h_dentry->d_inode;
10163+ AuDebugOn(!h_inode);
10164+ bstart = bcpup;
10165+ }
10166+
10167+ if (dbstart <= bcpup /* just reopen */
10168+ || !d_unhashed(dentry) /* copyup and reopen */
10169+ ) {
10170+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
10171+ h_file = au_h_open_pre(dentry, bstart);
10172+ if (IS_ERR(h_file)) {
10173+ err = PTR_ERR(h_file);
10174+ h_file = NULL;
10175+ } else {
10176+ di_downgrade_lock(parent, AuLock_IR);
10177+ if (dbstart > bcpup)
10178+ err = au_sio_cpup_simple(dentry, bcpup, len,
10179+ AuCpup_DTIME);
10180+ if (!err)
10181+ err = au_reopen_nondir(file);
10182+ }
10183+ mutex_unlock(&h_inode->i_mutex);
10184+ au_h_open_post(dentry, bstart, h_file);
10185+ } else { /* copyup as wh and reopen */
10186+ /*
10187+ * since writable hfsplus branch is not supported,
10188+ * h_open_pre/post() are unnecessary.
10189+ */
10190+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
4a4d8108
AM
10191+ err = au_ready_to_write_wh(file, len, bcpup);
10192+ di_downgrade_lock(parent, AuLock_IR);
027c5e7a 10193+ mutex_unlock(&h_inode->i_mutex);
4a4d8108 10194+ }
4a4d8108
AM
10195+
10196+ if (!err) {
10197+ au_pin_set_parent_lflag(pin, /*lflag*/0);
10198+ goto out_dput; /* success */
10199+ }
10200+ au_unpin(pin);
10201+ goto out_unlock;
1facf9fc 10202+
4f0767ce 10203+out_dgrade:
4a4d8108 10204+ di_downgrade_lock(parent, AuLock_IR);
4f0767ce 10205+out_unlock:
4a4d8108 10206+ di_read_unlock(parent, AuLock_IR);
4f0767ce 10207+out_dput:
4a4d8108 10208+ dput(parent);
4f0767ce 10209+out:
1facf9fc 10210+ return err;
10211+}
10212+
4a4d8108
AM
10213+/* ---------------------------------------------------------------------- */
10214+
10215+int au_do_flush(struct file *file, fl_owner_t id,
10216+ int (*flush)(struct file *file, fl_owner_t id))
1facf9fc 10217+{
4a4d8108 10218+ int err;
1308ab2a 10219+ struct dentry *dentry;
1facf9fc 10220+ struct super_block *sb;
4a4d8108 10221+ struct inode *inode;
1facf9fc 10222+
1facf9fc 10223+ dentry = file->f_dentry;
10224+ sb = dentry->d_sb;
dece6358 10225+ inode = dentry->d_inode;
4a4d8108
AM
10226+ si_noflush_read_lock(sb);
10227+ fi_read_lock(file);
b752ccd1 10228+ ii_read_lock_child(inode);
1facf9fc 10229+
4a4d8108
AM
10230+ err = flush(file, id);
10231+ au_cpup_attr_timesizes(inode);
1facf9fc 10232+
b752ccd1 10233+ ii_read_unlock(inode);
4a4d8108 10234+ fi_read_unlock(file);
1308ab2a 10235+ si_read_unlock(sb);
dece6358 10236+ return err;
1facf9fc 10237+}
10238+
4a4d8108
AM
10239+/* ---------------------------------------------------------------------- */
10240+
10241+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 10242+{
4a4d8108
AM
10243+ int err;
10244+ aufs_bindex_t bstart;
10245+ struct au_pin pin;
10246+ struct au_finfo *finfo;
10247+ struct dentry *dentry, *parent, *hi_wh;
10248+ struct inode *inode;
1facf9fc 10249+ struct super_block *sb;
10250+
4a4d8108
AM
10251+ FiMustWriteLock(file);
10252+
10253+ err = 0;
10254+ finfo = au_fi(file);
1308ab2a 10255+ dentry = file->f_dentry;
10256+ sb = dentry->d_sb;
4a4d8108
AM
10257+ inode = dentry->d_inode;
10258+ bstart = au_ibstart(inode);
027c5e7a 10259+ if (bstart == finfo->fi_btop || IS_ROOT(dentry))
1308ab2a 10260+ goto out;
dece6358 10261+
4a4d8108
AM
10262+ parent = dget_parent(dentry);
10263+ if (au_test_ro(sb, bstart, inode)) {
10264+ di_read_lock_parent(parent, !AuLock_IR);
10265+ err = AuWbrCopyup(au_sbi(sb), dentry);
10266+ bstart = err;
10267+ di_read_unlock(parent, !AuLock_IR);
10268+ if (unlikely(err < 0))
10269+ goto out_parent;
10270+ err = 0;
1facf9fc 10271+ }
1facf9fc 10272+
4a4d8108
AM
10273+ di_read_lock_parent(parent, AuLock_IR);
10274+ hi_wh = au_hi_wh(inode, bstart);
7f207e10
AM
10275+ if (!S_ISDIR(inode->i_mode)
10276+ && au_opt_test(au_mntflags(sb), PLINK)
4a4d8108
AM
10277+ && au_plink_test(inode)
10278+ && !d_unhashed(dentry)) {
10279+ err = au_test_and_cpup_dirs(dentry, bstart);
10280+ if (unlikely(err))
10281+ goto out_unlock;
10282+
10283+ /* always superio. */
10284+ err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE,
10285+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
10286+ if (!err)
10287+ err = au_sio_cpup_simple(dentry, bstart, -1,
10288+ AuCpup_DTIME);
10289+ au_unpin(&pin);
10290+ } else if (hi_wh) {
10291+ /* already copied-up after unlink */
10292+ err = au_reopen_wh(file, bstart, hi_wh);
10293+ *need_reopen = 0;
10294+ }
1facf9fc 10295+
4f0767ce 10296+out_unlock:
4a4d8108 10297+ di_read_unlock(parent, AuLock_IR);
4f0767ce 10298+out_parent:
4a4d8108 10299+ dput(parent);
4f0767ce 10300+out:
1308ab2a 10301+ return err;
dece6358 10302+}
1facf9fc 10303+
4a4d8108 10304+static void au_do_refresh_dir(struct file *file)
dece6358 10305+{
4a4d8108
AM
10306+ aufs_bindex_t bindex, bend, new_bindex, brid;
10307+ struct au_hfile *p, tmp, *q;
10308+ struct au_finfo *finfo;
1308ab2a 10309+ struct super_block *sb;
4a4d8108 10310+ struct au_fidir *fidir;
1facf9fc 10311+
4a4d8108 10312+ FiMustWriteLock(file);
1facf9fc 10313+
4a4d8108
AM
10314+ sb = file->f_dentry->d_sb;
10315+ finfo = au_fi(file);
10316+ fidir = finfo->fi_hdir;
10317+ AuDebugOn(!fidir);
10318+ p = fidir->fd_hfile + finfo->fi_btop;
10319+ brid = p->hf_br->br_id;
10320+ bend = fidir->fd_bbot;
10321+ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
10322+ if (!p->hf_file)
10323+ continue;
1308ab2a 10324+
4a4d8108
AM
10325+ new_bindex = au_br_index(sb, p->hf_br->br_id);
10326+ if (new_bindex == bindex)
10327+ continue;
10328+ if (new_bindex < 0) {
10329+ au_set_h_fptr(file, bindex, NULL);
10330+ continue;
10331+ }
1308ab2a 10332+
4a4d8108
AM
10333+ /* swap two lower inode, and loop again */
10334+ q = fidir->fd_hfile + new_bindex;
10335+ tmp = *q;
10336+ *q = *p;
10337+ *p = tmp;
10338+ if (tmp.hf_file) {
10339+ bindex--;
10340+ p--;
10341+ }
10342+ }
1308ab2a 10343+
4a4d8108 10344+ p = fidir->fd_hfile;
027c5e7a 10345+ if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) {
4a4d8108
AM
10346+ bend = au_sbend(sb);
10347+ for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
10348+ finfo->fi_btop++, p++)
10349+ if (p->hf_file) {
10350+ if (p->hf_file->f_dentry
10351+ && p->hf_file->f_dentry->d_inode)
10352+ break;
10353+ else
10354+ au_hfput(p, file);
10355+ }
10356+ } else {
10357+ bend = au_br_index(sb, brid);
10358+ for (finfo->fi_btop = 0; finfo->fi_btop < bend;
10359+ finfo->fi_btop++, p++)
10360+ if (p->hf_file)
10361+ au_hfput(p, file);
10362+ bend = au_sbend(sb);
10363+ }
1308ab2a 10364+
4a4d8108
AM
10365+ p = fidir->fd_hfile + bend;
10366+ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
10367+ fidir->fd_bbot--, p--)
10368+ if (p->hf_file) {
10369+ if (p->hf_file->f_dentry
10370+ && p->hf_file->f_dentry->d_inode)
10371+ break;
10372+ else
10373+ au_hfput(p, file);
10374+ }
10375+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
1308ab2a 10376+}
10377+
4a4d8108
AM
10378+/*
10379+ * after branch manipulating, refresh the file.
10380+ */
10381+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 10382+{
4a4d8108
AM
10383+ int err, need_reopen;
10384+ aufs_bindex_t bend, bindex;
10385+ struct dentry *dentry;
1308ab2a 10386+ struct au_finfo *finfo;
4a4d8108 10387+ struct au_hfile *hfile;
1facf9fc 10388+
4a4d8108 10389+ dentry = file->f_dentry;
1308ab2a 10390+ finfo = au_fi(file);
4a4d8108
AM
10391+ if (!finfo->fi_hdir) {
10392+ hfile = &finfo->fi_htop;
10393+ AuDebugOn(!hfile->hf_file);
10394+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
10395+ AuDebugOn(bindex < 0);
10396+ if (bindex != finfo->fi_btop)
10397+ au_set_fbstart(file, bindex);
10398+ } else {
10399+ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
10400+ if (unlikely(err))
10401+ goto out;
10402+ au_do_refresh_dir(file);
10403+ }
1facf9fc 10404+
4a4d8108
AM
10405+ err = 0;
10406+ need_reopen = 1;
10407+ if (!au_test_mmapped(file))
10408+ err = au_file_refresh_by_inode(file, &need_reopen);
027c5e7a 10409+ if (!err && need_reopen && !d_unlinked(dentry))
4a4d8108
AM
10410+ err = reopen(file);
10411+ if (!err) {
10412+ au_update_figen(file);
10413+ goto out; /* success */
10414+ }
10415+
10416+ /* error, close all lower files */
10417+ if (finfo->fi_hdir) {
10418+ bend = au_fbend_dir(file);
10419+ for (bindex = au_fbstart(file); bindex <= bend; bindex++)
10420+ au_set_h_fptr(file, bindex, NULL);
10421+ }
1facf9fc 10422+
4f0767ce 10423+out:
1facf9fc 10424+ return err;
10425+}
10426+
4a4d8108
AM
10427+/* common function to regular file and dir */
10428+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
10429+ int wlock)
dece6358 10430+{
1308ab2a 10431+ int err;
4a4d8108
AM
10432+ unsigned int sigen, figen;
10433+ aufs_bindex_t bstart;
10434+ unsigned char pseudo_link;
10435+ struct dentry *dentry;
10436+ struct inode *inode;
1facf9fc 10437+
4a4d8108
AM
10438+ err = 0;
10439+ dentry = file->f_dentry;
10440+ inode = dentry->d_inode;
10441+ AuDebugOn(au_special_file(inode->i_mode));
10442+ sigen = au_sigen(dentry->d_sb);
10443+ fi_write_lock(file);
10444+ figen = au_figen(file);
10445+ di_write_lock_child(dentry);
10446+ bstart = au_dbstart(dentry);
10447+ pseudo_link = (bstart != au_ibstart(inode));
10448+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
10449+ if (!wlock) {
10450+ di_downgrade_lock(dentry, AuLock_IR);
10451+ fi_downgrade_lock(file);
10452+ }
10453+ goto out; /* success */
10454+ }
dece6358 10455+
4a4d8108 10456+ AuDbg("sigen %d, figen %d\n", sigen, figen);
027c5e7a 10457+ if (au_digen_test(dentry, sigen)) {
4a4d8108 10458+ err = au_reval_dpath(dentry, sigen);
027c5e7a 10459+ AuDebugOn(!err && au_digen_test(dentry, sigen));
4a4d8108 10460+ }
dece6358 10461+
027c5e7a
AM
10462+ if (!err)
10463+ err = refresh_file(file, reopen);
4a4d8108
AM
10464+ if (!err) {
10465+ if (!wlock) {
10466+ di_downgrade_lock(dentry, AuLock_IR);
10467+ fi_downgrade_lock(file);
10468+ }
10469+ } else {
10470+ di_write_unlock(dentry);
10471+ fi_write_unlock(file);
10472+ }
1facf9fc 10473+
4f0767ce 10474+out:
1308ab2a 10475+ return err;
10476+}
1facf9fc 10477+
4a4d8108
AM
10478+/* ---------------------------------------------------------------------- */
10479+
10480+/* cf. aufs_nopage() */
10481+/* for madvise(2) */
10482+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1308ab2a 10483+{
4a4d8108
AM
10484+ unlock_page(page);
10485+ return 0;
10486+}
1facf9fc 10487+
4a4d8108
AM
10488+/* it will never be called, but necessary to support O_DIRECT */
10489+static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
10490+ const struct iovec *iov, loff_t offset,
10491+ unsigned long nr_segs)
10492+{ BUG(); return 0; }
1facf9fc 10493+
4a4d8108
AM
10494+/*
10495+ * it will never be called, but madvise and fadvise behaves differently
10496+ * when get_xip_mem is defined
10497+ */
10498+static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff,
10499+ int create, void **kmem, unsigned long *pfn)
10500+{ BUG(); return 0; }
1facf9fc 10501+
4a4d8108
AM
10502+/* they will never be called. */
10503+#ifdef CONFIG_AUFS_DEBUG
10504+static int aufs_write_begin(struct file *file, struct address_space *mapping,
10505+ loff_t pos, unsigned len, unsigned flags,
10506+ struct page **pagep, void **fsdata)
10507+{ AuUnsupport(); return 0; }
10508+static int aufs_write_end(struct file *file, struct address_space *mapping,
10509+ loff_t pos, unsigned len, unsigned copied,
10510+ struct page *page, void *fsdata)
10511+{ AuUnsupport(); return 0; }
10512+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
10513+{ AuUnsupport(); return 0; }
1308ab2a 10514+
4a4d8108
AM
10515+static int aufs_set_page_dirty(struct page *page)
10516+{ AuUnsupport(); return 0; }
10517+static void aufs_invalidatepage(struct page *page, unsigned long offset)
10518+{ AuUnsupport(); }
10519+static int aufs_releasepage(struct page *page, gfp_t gfp)
10520+{ AuUnsupport(); return 0; }
10521+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
10522+ struct page *page)
10523+{ AuUnsupport(); return 0; }
10524+static int aufs_launder_page(struct page *page)
10525+{ AuUnsupport(); return 0; }
10526+static int aufs_is_partially_uptodate(struct page *page,
10527+ read_descriptor_t *desc,
10528+ unsigned long from)
10529+{ AuUnsupport(); return 0; }
10530+static int aufs_error_remove_page(struct address_space *mapping,
10531+ struct page *page)
10532+{ AuUnsupport(); return 0; }
10533+#endif /* CONFIG_AUFS_DEBUG */
10534+
10535+const struct address_space_operations aufs_aop = {
10536+ .readpage = aufs_readpage,
10537+ .direct_IO = aufs_direct_IO,
10538+ .get_xip_mem = aufs_get_xip_mem,
10539+#ifdef CONFIG_AUFS_DEBUG
10540+ .writepage = aufs_writepage,
4a4d8108
AM
10541+ /* no writepages, because of writepage */
10542+ .set_page_dirty = aufs_set_page_dirty,
10543+ /* no readpages, because of readpage */
10544+ .write_begin = aufs_write_begin,
10545+ .write_end = aufs_write_end,
10546+ /* no bmap, no block device */
10547+ .invalidatepage = aufs_invalidatepage,
10548+ .releasepage = aufs_releasepage,
10549+ .migratepage = aufs_migratepage,
10550+ .launder_page = aufs_launder_page,
10551+ .is_partially_uptodate = aufs_is_partially_uptodate,
10552+ .error_remove_page = aufs_error_remove_page
10553+#endif /* CONFIG_AUFS_DEBUG */
dece6358 10554+};
7f207e10
AM
10555diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
10556--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
10557+++ linux/fs/aufs/file.h 2012-02-13 21:54:56.969771692 +0100
10558@@ -0,0 +1,298 @@
4a4d8108 10559+/*
f6c5ef8b 10560+ * Copyright (C) 2005-2012 Junjiro R. Okajima
4a4d8108
AM
10561+ *
10562+ * This program, aufs is free software; you can redistribute it and/or modify
10563+ * it under the terms of the GNU General Public License as published by
10564+ * the Free Software Foundation; either version 2 of the License, or
10565+ * (at your option) any later version.
10566+ *
10567+ * This program is distributed in the hope that it will be useful,
10568+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10569+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10570+ * GNU General Public License for more details.
10571+ *
10572+ * You should have received a copy of the GNU General Public License
10573+ * along with this program; if not, write to the Free Software
10574+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
10575+ */
1facf9fc 10576+
4a4d8108
AM
10577+/*
10578+ * file operations
10579+ */
1facf9fc 10580+
4a4d8108
AM
10581+#ifndef __AUFS_FILE_H__
10582+#define __AUFS_FILE_H__
1facf9fc 10583+
4a4d8108 10584+#ifdef __KERNEL__
1facf9fc 10585+
2cbb1c4b 10586+#include <linux/file.h>
4a4d8108
AM
10587+#include <linux/fs.h>
10588+#include <linux/poll.h>
4a4d8108 10589+#include "rwsem.h"
1facf9fc 10590+
4a4d8108
AM
10591+struct au_branch;
10592+struct au_hfile {
10593+ struct file *hf_file;
10594+ struct au_branch *hf_br;
10595+};
1facf9fc 10596+
4a4d8108
AM
10597+struct au_vdir;
10598+struct au_fidir {
10599+ aufs_bindex_t fd_bbot;
10600+ aufs_bindex_t fd_nent;
10601+ struct au_vdir *fd_vdir_cache;
10602+ struct au_hfile fd_hfile[];
10603+};
1facf9fc 10604+
4a4d8108 10605+static inline int au_fidir_sz(int nent)
dece6358 10606+{
4f0767ce
JR
10607+ AuDebugOn(nent < 0);
10608+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
4a4d8108 10609+}
1facf9fc 10610+
4a4d8108
AM
10611+struct au_finfo {
10612+ atomic_t fi_generation;
dece6358 10613+
4a4d8108
AM
10614+ struct au_rwsem fi_rwsem;
10615+ aufs_bindex_t fi_btop;
10616+
10617+ /* do not union them */
10618+ struct { /* for non-dir */
10619+ struct au_hfile fi_htop;
2cbb1c4b 10620+ atomic_t fi_mmapped;
4a4d8108
AM
10621+ };
10622+ struct au_fidir *fi_hdir; /* for dir only */
10623+} ____cacheline_aligned_in_smp;
1facf9fc 10624+
4a4d8108 10625+/* ---------------------------------------------------------------------- */
1facf9fc 10626+
4a4d8108
AM
10627+/* file.c */
10628+extern const struct address_space_operations aufs_aop;
10629+unsigned int au_file_roflags(unsigned int flags);
10630+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
10631+ struct file *file);
10632+int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
10633+ struct au_fidir *fidir);
10634+int au_reopen_nondir(struct file *file);
10635+struct au_pin;
10636+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
10637+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
10638+ int wlock);
10639+int au_do_flush(struct file *file, fl_owner_t id,
10640+ int (*flush)(struct file *file, fl_owner_t id));
1facf9fc 10641+
4a4d8108
AM
10642+/* poll.c */
10643+#ifdef CONFIG_AUFS_POLL
10644+unsigned int aufs_poll(struct file *file, poll_table *wait);
10645+#endif
1facf9fc 10646+
4a4d8108
AM
10647+#ifdef CONFIG_AUFS_BR_HFSPLUS
10648+/* hfsplus.c */
10649+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex);
10650+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
10651+ struct file *h_file);
10652+#else
10653+static inline
10654+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
dece6358 10655+{
4a4d8108
AM
10656+ return NULL;
10657+}
1facf9fc 10658+
4a4d8108
AM
10659+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
10660+ struct file *h_file);
10661+#endif
1facf9fc 10662+
4a4d8108
AM
10663+/* f_op.c */
10664+extern const struct file_operations aufs_file_fop;
4a4d8108
AM
10665+int au_do_open_nondir(struct file *file, int flags);
10666+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
10667+
10668+#ifdef CONFIG_AUFS_SP_IATTR
10669+/* f_op_sp.c */
10670+int au_special_file(umode_t mode);
10671+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev);
10672+#else
10673+AuStubInt0(au_special_file, umode_t mode)
10674+static inline void au_init_special_fop(struct inode *inode, umode_t mode,
10675+ dev_t rdev)
10676+{
10677+ init_special_inode(inode, mode, rdev);
10678+}
10679+#endif
1facf9fc 10680+
4a4d8108
AM
10681+/* finfo.c */
10682+void au_hfput(struct au_hfile *hf, struct file *file);
10683+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
10684+ struct file *h_file);
1facf9fc 10685+
4a4d8108 10686+void au_update_figen(struct file *file);
4a4d8108
AM
10687+struct au_fidir *au_fidir_alloc(struct super_block *sb);
10688+int au_fidir_realloc(struct au_finfo *finfo, int nbr);
1facf9fc 10689+
4a4d8108
AM
10690+void au_fi_init_once(void *_fi);
10691+void au_finfo_fin(struct file *file);
10692+int au_finfo_init(struct file *file, struct au_fidir *fidir);
1facf9fc 10693+
4a4d8108
AM
10694+/* ioctl.c */
10695+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
10696+#ifdef CONFIG_COMPAT
10697+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
10698+ unsigned long arg);
10699+#endif
1facf9fc 10700+
4a4d8108 10701+/* ---------------------------------------------------------------------- */
1facf9fc 10702+
4a4d8108
AM
10703+static inline struct au_finfo *au_fi(struct file *file)
10704+{
10705+ return file->private_data;
10706+}
1facf9fc 10707+
4a4d8108 10708+/* ---------------------------------------------------------------------- */
1facf9fc 10709+
4a4d8108
AM
10710+/*
10711+ * fi_read_lock, fi_write_lock,
10712+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
10713+ */
10714+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
1308ab2a 10715+
4a4d8108
AM
10716+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
10717+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
10718+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
1facf9fc 10719+
1308ab2a 10720+/* ---------------------------------------------------------------------- */
10721+
4a4d8108
AM
10722+/* todo: hard/soft set? */
10723+static inline aufs_bindex_t au_fbstart(struct file *file)
dece6358 10724+{
4a4d8108
AM
10725+ FiMustAnyLock(file);
10726+ return au_fi(file)->fi_btop;
10727+}
dece6358 10728+
4a4d8108
AM
10729+static inline aufs_bindex_t au_fbend_dir(struct file *file)
10730+{
10731+ FiMustAnyLock(file);
10732+ AuDebugOn(!au_fi(file)->fi_hdir);
10733+ return au_fi(file)->fi_hdir->fd_bbot;
10734+}
1facf9fc 10735+
4a4d8108
AM
10736+static inline struct au_vdir *au_fvdir_cache(struct file *file)
10737+{
10738+ FiMustAnyLock(file);
10739+ AuDebugOn(!au_fi(file)->fi_hdir);
10740+ return au_fi(file)->fi_hdir->fd_vdir_cache;
10741+}
1facf9fc 10742+
4a4d8108
AM
10743+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
10744+{
10745+ FiMustWriteLock(file);
10746+ au_fi(file)->fi_btop = bindex;
10747+}
1facf9fc 10748+
4a4d8108
AM
10749+static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
10750+{
10751+ FiMustWriteLock(file);
10752+ AuDebugOn(!au_fi(file)->fi_hdir);
10753+ au_fi(file)->fi_hdir->fd_bbot = bindex;
10754+}
1308ab2a 10755+
4a4d8108
AM
10756+static inline void au_set_fvdir_cache(struct file *file,
10757+ struct au_vdir *vdir_cache)
10758+{
10759+ FiMustWriteLock(file);
10760+ AuDebugOn(!au_fi(file)->fi_hdir);
10761+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
10762+}
dece6358 10763+
4a4d8108
AM
10764+static inline struct file *au_hf_top(struct file *file)
10765+{
10766+ FiMustAnyLock(file);
10767+ AuDebugOn(au_fi(file)->fi_hdir);
10768+ return au_fi(file)->fi_htop.hf_file;
10769+}
1facf9fc 10770+
4a4d8108
AM
10771+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
10772+{
10773+ FiMustAnyLock(file);
10774+ AuDebugOn(!au_fi(file)->fi_hdir);
10775+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
dece6358
AM
10776+}
10777+
4a4d8108
AM
10778+/* todo: memory barrier? */
10779+static inline unsigned int au_figen(struct file *f)
dece6358 10780+{
4a4d8108
AM
10781+ return atomic_read(&au_fi(f)->fi_generation);
10782+}
dece6358 10783+
2cbb1c4b
JR
10784+static inline void au_set_mmapped(struct file *f)
10785+{
10786+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
10787+ return;
10788+ pr_warning("fi_mmapped wrapped around\n");
10789+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
10790+ ;
10791+}
10792+
10793+static inline void au_unset_mmapped(struct file *f)
10794+{
10795+ atomic_dec(&au_fi(f)->fi_mmapped);
10796+}
10797+
4a4d8108
AM
10798+static inline int au_test_mmapped(struct file *f)
10799+{
2cbb1c4b
JR
10800+ return atomic_read(&au_fi(f)->fi_mmapped);
10801+}
10802+
10803+/* customize vma->vm_file */
10804+
10805+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
10806+ struct file *file)
10807+{
53392da6
AM
10808+ struct file *f;
10809+
10810+ f = vma->vm_file;
2cbb1c4b
JR
10811+ get_file(file);
10812+ vma->vm_file = file;
53392da6 10813+ fput(f);
2cbb1c4b
JR
10814+}
10815+
10816+#ifdef CONFIG_MMU
10817+#define AuDbgVmRegion(file, vma) do {} while (0)
10818+
10819+static inline void au_vm_file_reset(struct vm_area_struct *vma,
10820+ struct file *file)
10821+{
10822+ au_do_vm_file_reset(vma, file);
10823+}
10824+#else
10825+#define AuDbgVmRegion(file, vma) \
10826+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
10827+
10828+static inline void au_vm_file_reset(struct vm_area_struct *vma,
10829+ struct file *file)
10830+{
53392da6
AM
10831+ struct file *f;
10832+
2cbb1c4b 10833+ au_do_vm_file_reset(vma, file);
53392da6 10834+ f = vma->vm_region->vm_file;
2cbb1c4b
JR
10835+ get_file(file);
10836+ vma->vm_region->vm_file = file;
53392da6 10837+ fput(f);
2cbb1c4b
JR
10838+}
10839+#endif /* CONFIG_MMU */
10840+
10841+/* handle vma->vm_prfile */
10842+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
10843+ struct file *file)
10844+{
10845+#ifdef CONFIG_AUFS_PROC_MAP
10846+ get_file(file);
10847+ vma->vm_prfile = file;
10848+#ifndef CONFIG_MMU
10849+ get_file(file);
10850+ vma->vm_region->vm_prfile = file;
10851+#endif
10852+#endif
4a4d8108 10853+}
1308ab2a 10854+
4a4d8108
AM
10855+#endif /* __KERNEL__ */
10856+#endif /* __AUFS_FILE_H__ */
7f207e10
AM
10857diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
10858--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
10859+++ linux/fs/aufs/finfo.c 2012-02-13 21:54:56.969771692 +0100
10860@@ -0,0 +1,156 @@
4a4d8108 10861+/*
f6c5ef8b 10862+ * Copyright (C) 2005-2012 Junjiro R. Okajima
4a4d8108
AM
10863+ *
10864+ * This program, aufs is free software; you can redistribute it and/or modify
10865+ * it under the terms of the GNU General Public License as published by
10866+ * the Free Software Foundation; either version 2 of the License, or
10867+ * (at your option) any later version.
10868+ *
10869+ * This program is distributed in the hope that it will be useful,
10870+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10871+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10872+ * GNU General Public License for more details.
10873+ *
10874+ * You should have received a copy of the GNU General Public License
10875+ * along with this program; if not, write to the Free Software
10876+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
10877+ */
1308ab2a 10878+
4a4d8108
AM
10879+/*
10880+ * file private data
10881+ */
1facf9fc 10882+
4a4d8108 10883+#include "aufs.h"
1facf9fc 10884+
4a4d8108
AM
10885+void au_hfput(struct au_hfile *hf, struct file *file)
10886+{
10887+ /* todo: direct access f_flags */
2cbb1c4b 10888+ if (vfsub_file_flags(file) & __FMODE_EXEC)
4a4d8108
AM
10889+ allow_write_access(hf->hf_file);
10890+ fput(hf->hf_file);
10891+ hf->hf_file = NULL;
e49829fe 10892+ atomic_dec(&hf->hf_br->br_count);
4a4d8108
AM
10893+ hf->hf_br = NULL;
10894+}
1facf9fc 10895+
4a4d8108
AM
10896+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
10897+{
10898+ struct au_finfo *finfo = au_fi(file);
10899+ struct au_hfile *hf;
10900+ struct au_fidir *fidir;
10901+
10902+ fidir = finfo->fi_hdir;
10903+ if (!fidir) {
10904+ AuDebugOn(finfo->fi_btop != bindex);
10905+ hf = &finfo->fi_htop;
10906+ } else
10907+ hf = fidir->fd_hfile + bindex;
10908+
10909+ if (hf && hf->hf_file)
10910+ au_hfput(hf, file);
10911+ if (val) {
10912+ FiMustWriteLock(file);
10913+ hf->hf_file = val;
10914+ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
1308ab2a 10915+ }
4a4d8108 10916+}
1facf9fc 10917+
4a4d8108
AM
10918+void au_update_figen(struct file *file)
10919+{
10920+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
10921+ /* smp_mb(); */ /* atomic_set */
1facf9fc 10922+}
10923+
4a4d8108
AM
10924+/* ---------------------------------------------------------------------- */
10925+
4a4d8108
AM
10926+struct au_fidir *au_fidir_alloc(struct super_block *sb)
10927+{
10928+ struct au_fidir *fidir;
10929+ int nbr;
10930+
10931+ nbr = au_sbend(sb) + 1;
10932+ if (nbr < 2)
10933+ nbr = 2; /* initial allocate for 2 branches */
10934+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
10935+ if (fidir) {
10936+ fidir->fd_bbot = -1;
10937+ fidir->fd_nent = nbr;
10938+ fidir->fd_vdir_cache = NULL;
10939+ }
10940+
10941+ return fidir;
10942+}
10943+
10944+int au_fidir_realloc(struct au_finfo *finfo, int nbr)
10945+{
10946+ int err;
10947+ struct au_fidir *fidir, *p;
10948+
10949+ AuRwMustWriteLock(&finfo->fi_rwsem);
10950+ fidir = finfo->fi_hdir;
10951+ AuDebugOn(!fidir);
10952+
10953+ err = -ENOMEM;
10954+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
10955+ GFP_NOFS);
10956+ if (p) {
10957+ p->fd_nent = nbr;
10958+ finfo->fi_hdir = p;
10959+ err = 0;
10960+ }
1facf9fc 10961+
dece6358 10962+ return err;
1facf9fc 10963+}
1308ab2a 10964+
10965+/* ---------------------------------------------------------------------- */
10966+
4a4d8108 10967+void au_finfo_fin(struct file *file)
1308ab2a 10968+{
4a4d8108
AM
10969+ struct au_finfo *finfo;
10970+
7f207e10
AM
10971+ au_nfiles_dec(file->f_dentry->d_sb);
10972+
4a4d8108
AM
10973+ finfo = au_fi(file);
10974+ AuDebugOn(finfo->fi_hdir);
10975+ AuRwDestroy(&finfo->fi_rwsem);
10976+ au_cache_free_finfo(finfo);
1308ab2a 10977+}
1308ab2a 10978+
e49829fe 10979+void au_fi_init_once(void *_finfo)
4a4d8108 10980+{
e49829fe 10981+ struct au_finfo *finfo = _finfo;
2cbb1c4b 10982+ static struct lock_class_key aufs_fi;
1308ab2a 10983+
e49829fe
JR
10984+ au_rw_init(&finfo->fi_rwsem);
10985+ au_rw_class(&finfo->fi_rwsem, &aufs_fi);
4a4d8108 10986+}
1308ab2a 10987+
4a4d8108
AM
10988+int au_finfo_init(struct file *file, struct au_fidir *fidir)
10989+{
9dbd164d 10990+ int err, lc_idx;
4a4d8108
AM
10991+ struct au_finfo *finfo;
10992+ struct dentry *dentry;
10993+
10994+ err = -ENOMEM;
10995+ dentry = file->f_dentry;
10996+ finfo = au_cache_alloc_finfo();
10997+ if (unlikely(!finfo))
10998+ goto out;
10999+
11000+ err = 0;
7f207e10 11001+ au_nfiles_inc(dentry->d_sb);
9dbd164d
AM
11002+ lc_idx = AuLcNonDir_FIINFO;
11003+ if (fidir)
11004+ lc_idx = AuLcDir_FIINFO;
11005+ au_rw_class(&finfo->fi_rwsem, au_lc_key + lc_idx);
4a4d8108
AM
11006+ au_rw_write_lock(&finfo->fi_rwsem);
11007+ finfo->fi_btop = -1;
11008+ finfo->fi_hdir = fidir;
11009+ atomic_set(&finfo->fi_generation, au_digen(dentry));
11010+ /* smp_mb(); */ /* atomic_set */
11011+
11012+ file->private_data = finfo;
11013+
11014+out:
11015+ return err;
11016+}
7f207e10
AM
11017diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
11018--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
11019+++ linux/fs/aufs/f_op.c 2012-02-13 21:54:56.969771692 +0100
11020@@ -0,0 +1,729 @@
dece6358 11021+/*
f6c5ef8b 11022+ * Copyright (C) 2005-2012 Junjiro R. Okajima
dece6358
AM
11023+ *
11024+ * This program, aufs is free software; you can redistribute it and/or modify
11025+ * it under the terms of the GNU General Public License as published by
11026+ * the Free Software Foundation; either version 2 of the License, or
11027+ * (at your option) any later version.
11028+ *
11029+ * This program is distributed in the hope that it will be useful,
11030+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11031+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11032+ * GNU General Public License for more details.
11033+ *
11034+ * You should have received a copy of the GNU General Public License
11035+ * along with this program; if not, write to the Free Software
11036+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11037+ */
1facf9fc 11038+
11039+/*
4a4d8108 11040+ * file and vm operations
1facf9fc 11041+ */
dece6358 11042+
4a4d8108
AM
11043+#include <linux/fs_stack.h>
11044+#include <linux/mman.h>
4a4d8108 11045+#include <linux/security.h>
dece6358
AM
11046+#include "aufs.h"
11047+
4a4d8108 11048+int au_do_open_nondir(struct file *file, int flags)
1facf9fc 11049+{
4a4d8108
AM
11050+ int err;
11051+ aufs_bindex_t bindex;
11052+ struct file *h_file;
11053+ struct dentry *dentry;
11054+ struct au_finfo *finfo;
11055+
11056+ FiMustWriteLock(file);
11057+
4a4d8108 11058+ dentry = file->f_dentry;
027c5e7a
AM
11059+ err = au_d_alive(dentry);
11060+ if (unlikely(err))
11061+ goto out;
11062+
4a4d8108
AM
11063+ finfo = au_fi(file);
11064+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
2cbb1c4b 11065+ atomic_set(&finfo->fi_mmapped, 0);
4a4d8108
AM
11066+ bindex = au_dbstart(dentry);
11067+ h_file = au_h_open(dentry, bindex, flags, file);
11068+ if (IS_ERR(h_file))
11069+ err = PTR_ERR(h_file);
11070+ else {
11071+ au_set_fbstart(file, bindex);
11072+ au_set_h_fptr(file, bindex, h_file);
11073+ au_update_figen(file);
11074+ /* todo: necessary? */
11075+ /* file->f_ra = h_file->f_ra; */
11076+ }
027c5e7a
AM
11077+
11078+out:
4a4d8108 11079+ return err;
1facf9fc 11080+}
11081+
4a4d8108
AM
11082+static int aufs_open_nondir(struct inode *inode __maybe_unused,
11083+ struct file *file)
1facf9fc 11084+{
4a4d8108 11085+ int err;
1308ab2a 11086+ struct super_block *sb;
1facf9fc 11087+
2cbb1c4b 11088+ AuDbg("%.*s, f_flags 0x%x, f_mode 0x%x\n",
4a4d8108
AM
11089+ AuDLNPair(file->f_dentry), vfsub_file_flags(file),
11090+ file->f_mode);
1facf9fc 11091+
4a4d8108
AM
11092+ sb = file->f_dentry->d_sb;
11093+ si_read_lock(sb, AuLock_FLUSH);
11094+ err = au_do_open(file, au_do_open_nondir, /*fidir*/NULL);
11095+ si_read_unlock(sb);
11096+ return err;
11097+}
1facf9fc 11098+
4a4d8108
AM
11099+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
11100+{
11101+ struct au_finfo *finfo;
11102+ aufs_bindex_t bindex;
1facf9fc 11103+
4a4d8108
AM
11104+ finfo = au_fi(file);
11105+ bindex = finfo->fi_btop;
0c5527e5
AM
11106+ if (bindex >= 0) {
11107+ /* remove me from sb->s_files */
11108+ file_sb_list_del(file);
4a4d8108 11109+ au_set_h_fptr(file, bindex, NULL);
0c5527e5 11110+ }
7f207e10 11111+
4a4d8108
AM
11112+ au_finfo_fin(file);
11113+ return 0;
1facf9fc 11114+}
11115+
4a4d8108
AM
11116+/* ---------------------------------------------------------------------- */
11117+
11118+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
dece6358 11119+{
1308ab2a 11120+ int err;
4a4d8108
AM
11121+ struct file *h_file;
11122+
11123+ err = 0;
11124+ h_file = au_hf_top(file);
11125+ if (h_file)
11126+ err = vfsub_flush(h_file, id);
11127+ return err;
11128+}
11129+
11130+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
11131+{
11132+ return au_do_flush(file, id, au_do_flush_nondir);
11133+}
11134+
11135+/* ---------------------------------------------------------------------- */
9dbd164d
AM
11136+/*
11137+ * read and write functions acquire [fdi]_rwsem once, but release before
11138+ * mmap_sem. This is because to stop a race condition between mmap(2).
11139+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
11140+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
11141+ * read functions after [fdi]_rwsem are released, but it should be harmless.
11142+ */
4a4d8108
AM
11143+
11144+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
11145+ loff_t *ppos)
11146+{
11147+ ssize_t err;
dece6358 11148+ struct dentry *dentry;
4a4d8108 11149+ struct file *h_file;
dece6358 11150+ struct super_block *sb;
1facf9fc 11151+
dece6358
AM
11152+ dentry = file->f_dentry;
11153+ sb = dentry->d_sb;
e49829fe 11154+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 11155+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
dece6358
AM
11156+ if (unlikely(err))
11157+ goto out;
1facf9fc 11158+
4a4d8108 11159+ h_file = au_hf_top(file);
9dbd164d
AM
11160+ get_file(h_file);
11161+ di_read_unlock(dentry, AuLock_IR);
11162+ fi_read_unlock(file);
11163+
11164+ /* filedata may be obsoleted by concurrent copyup, but no problem */
4a4d8108
AM
11165+ err = vfsub_read_u(h_file, buf, count, ppos);
11166+ /* todo: necessary? */
11167+ /* file->f_ra = h_file->f_ra; */
9dbd164d 11168+ /* update without lock, I don't think it a problem */
4a4d8108 11169+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
9dbd164d 11170+ fput(h_file);
1308ab2a 11171+
4f0767ce 11172+out:
dece6358
AM
11173+ si_read_unlock(sb);
11174+ return err;
11175+}
1facf9fc 11176+
e49829fe
JR
11177+/*
11178+ * todo: very ugly
11179+ * it locks both of i_mutex and si_rwsem for read in safe.
11180+ * if the plink maintenance mode continues forever (that is the problem),
11181+ * may loop forever.
11182+ */
11183+static void au_mtx_and_read_lock(struct inode *inode)
11184+{
11185+ int err;
11186+ struct super_block *sb = inode->i_sb;
11187+
11188+ while (1) {
11189+ mutex_lock(&inode->i_mutex);
11190+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
11191+ if (!err)
11192+ break;
11193+ mutex_unlock(&inode->i_mutex);
11194+ si_read_lock(sb, AuLock_NOPLMW);
11195+ si_read_unlock(sb);
11196+ }
11197+}
11198+
4a4d8108
AM
11199+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
11200+ size_t count, loff_t *ppos)
dece6358 11201+{
4a4d8108
AM
11202+ ssize_t err;
11203+ struct au_pin pin;
dece6358 11204+ struct dentry *dentry;
9dbd164d 11205+ struct super_block *sb;
4a4d8108 11206+ struct inode *inode;
4a4d8108
AM
11207+ struct file *h_file;
11208+ char __user *buf = (char __user *)ubuf;
1facf9fc 11209+
dece6358 11210+ dentry = file->f_dentry;
9dbd164d 11211+ sb = dentry->d_sb;
4a4d8108 11212+ inode = dentry->d_inode;
e49829fe 11213+ au_mtx_and_read_lock(inode);
1facf9fc 11214+
4a4d8108
AM
11215+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11216+ if (unlikely(err))
11217+ goto out;
1facf9fc 11218+
4a4d8108
AM
11219+ err = au_ready_to_write(file, -1, &pin);
11220+ di_downgrade_lock(dentry, AuLock_IR);
9dbd164d
AM
11221+ if (unlikely(err)) {
11222+ di_read_unlock(dentry, AuLock_IR);
11223+ fi_write_unlock(file);
11224+ goto out;
11225+ }
1facf9fc 11226+
4a4d8108 11227+ h_file = au_hf_top(file);
9dbd164d 11228+ get_file(h_file);
4a4d8108 11229+ au_unpin(&pin);
9dbd164d
AM
11230+ di_read_unlock(dentry, AuLock_IR);
11231+ fi_write_unlock(file);
11232+
4a4d8108 11233+ err = vfsub_write_u(h_file, buf, count, ppos);
9dbd164d 11234+ ii_write_lock_child(inode);
4a4d8108
AM
11235+ au_cpup_attr_timesizes(inode);
11236+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
9dbd164d
AM
11237+ ii_write_unlock(inode);
11238+ fput(h_file);
1facf9fc 11239+
4f0767ce 11240+out:
9dbd164d 11241+ si_read_unlock(sb);
4a4d8108 11242+ mutex_unlock(&inode->i_mutex);
dece6358
AM
11243+ return err;
11244+}
1facf9fc 11245+
4a4d8108
AM
11246+static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio,
11247+ const struct iovec *iov, unsigned long nv, loff_t pos)
dece6358 11248+{
4a4d8108
AM
11249+ ssize_t err;
11250+ struct file *file;
11251+ ssize_t (*func)(struct kiocb *, const struct iovec *, unsigned long,
11252+ loff_t);
1facf9fc 11253+
4a4d8108
AM
11254+ err = security_file_permission(h_file, rw);
11255+ if (unlikely(err))
11256+ goto out;
1facf9fc 11257+
4a4d8108
AM
11258+ err = -ENOSYS;
11259+ func = NULL;
11260+ if (rw == MAY_READ)
11261+ func = h_file->f_op->aio_read;
11262+ else if (rw == MAY_WRITE)
11263+ func = h_file->f_op->aio_write;
11264+ if (func) {
11265+ file = kio->ki_filp;
11266+ kio->ki_filp = h_file;
2cbb1c4b 11267+ lockdep_off();
4a4d8108 11268+ err = func(kio, iov, nv, pos);
2cbb1c4b 11269+ lockdep_on();
4a4d8108
AM
11270+ kio->ki_filp = file;
11271+ } else
11272+ /* currently there is no such fs */
11273+ WARN_ON_ONCE(1);
1facf9fc 11274+
4f0767ce 11275+out:
dece6358
AM
11276+ return err;
11277+}
1facf9fc 11278+
4a4d8108
AM
11279+static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
11280+ unsigned long nv, loff_t pos)
1facf9fc 11281+{
4a4d8108
AM
11282+ ssize_t err;
11283+ struct file *file, *h_file;
11284+ struct dentry *dentry;
dece6358 11285+ struct super_block *sb;
1facf9fc 11286+
4a4d8108 11287+ file = kio->ki_filp;
dece6358 11288+ dentry = file->f_dentry;
1308ab2a 11289+ sb = dentry->d_sb;
e49829fe 11290+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11291+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11292+ if (unlikely(err))
11293+ goto out;
11294+
11295+ h_file = au_hf_top(file);
9dbd164d
AM
11296+ get_file(h_file);
11297+ di_read_unlock(dentry, AuLock_IR);
11298+ fi_read_unlock(file);
11299+
4a4d8108
AM
11300+ err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos);
11301+ /* todo: necessary? */
11302+ /* file->f_ra = h_file->f_ra; */
9dbd164d 11303+ /* update without lock, I don't think it a problem */
4a4d8108 11304+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
9dbd164d 11305+ fput(h_file);
1facf9fc 11306+
4f0767ce 11307+out:
4a4d8108 11308+ si_read_unlock(sb);
1308ab2a 11309+ return err;
11310+}
1facf9fc 11311+
4a4d8108
AM
11312+static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
11313+ unsigned long nv, loff_t pos)
1308ab2a 11314+{
4a4d8108
AM
11315+ ssize_t err;
11316+ struct au_pin pin;
11317+ struct dentry *dentry;
11318+ struct inode *inode;
4a4d8108 11319+ struct file *file, *h_file;
9dbd164d 11320+ struct super_block *sb;
1308ab2a 11321+
4a4d8108 11322+ file = kio->ki_filp;
1308ab2a 11323+ dentry = file->f_dentry;
9dbd164d 11324+ sb = dentry->d_sb;
1308ab2a 11325+ inode = dentry->d_inode;
e49829fe
JR
11326+ au_mtx_and_read_lock(inode);
11327+
4a4d8108
AM
11328+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11329+ if (unlikely(err))
1308ab2a 11330+ goto out;
1facf9fc 11331+
4a4d8108
AM
11332+ err = au_ready_to_write(file, -1, &pin);
11333+ di_downgrade_lock(dentry, AuLock_IR);
9dbd164d
AM
11334+ if (unlikely(err)) {
11335+ di_read_unlock(dentry, AuLock_IR);
11336+ fi_write_unlock(file);
11337+ goto out;
11338+ }
1facf9fc 11339+
4a4d8108 11340+ h_file = au_hf_top(file);
9dbd164d
AM
11341+ get_file(h_file);
11342+ au_unpin(&pin);
11343+ di_read_unlock(dentry, AuLock_IR);
11344+ fi_write_unlock(file);
11345+
4a4d8108 11346+ err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos);
9dbd164d 11347+ ii_write_lock_child(inode);
4a4d8108
AM
11348+ au_cpup_attr_timesizes(inode);
11349+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
9dbd164d
AM
11350+ ii_write_unlock(inode);
11351+ fput(h_file);
1facf9fc 11352+
4f0767ce 11353+out:
9dbd164d 11354+ si_read_unlock(sb);
4a4d8108 11355+ mutex_unlock(&inode->i_mutex);
dece6358 11356+ return err;
1facf9fc 11357+}
11358+
4a4d8108
AM
11359+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
11360+ struct pipe_inode_info *pipe, size_t len,
11361+ unsigned int flags)
1facf9fc 11362+{
4a4d8108
AM
11363+ ssize_t err;
11364+ struct file *h_file;
11365+ struct dentry *dentry;
dece6358 11366+ struct super_block *sb;
1facf9fc 11367+
dece6358 11368+ dentry = file->f_dentry;
dece6358 11369+ sb = dentry->d_sb;
e49829fe 11370+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11371+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11372+ if (unlikely(err))
dece6358 11373+ goto out;
1facf9fc 11374+
4a4d8108
AM
11375+ err = -EINVAL;
11376+ h_file = au_hf_top(file);
9dbd164d 11377+ get_file(h_file);
4a4d8108 11378+ if (au_test_loopback_kthread()) {
87a755f4
AM
11379+ au_warn_loopback(h_file->f_dentry->d_sb);
11380+ if (file->f_mapping != h_file->f_mapping) {
11381+ file->f_mapping = h_file->f_mapping;
11382+ smp_mb(); /* unnecessary? */
11383+ }
1308ab2a 11384+ }
9dbd164d
AM
11385+ di_read_unlock(dentry, AuLock_IR);
11386+ fi_read_unlock(file);
11387+
4a4d8108
AM
11388+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
11389+ /* todo: necessasry? */
11390+ /* file->f_ra = h_file->f_ra; */
9dbd164d 11391+ /* update without lock, I don't think it a problem */
4a4d8108 11392+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
9dbd164d 11393+ fput(h_file);
1facf9fc 11394+
4f0767ce 11395+out:
4a4d8108 11396+ si_read_unlock(sb);
dece6358 11397+ return err;
1facf9fc 11398+}
11399+
4a4d8108
AM
11400+static ssize_t
11401+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
11402+ size_t len, unsigned int flags)
1facf9fc 11403+{
4a4d8108
AM
11404+ ssize_t err;
11405+ struct au_pin pin;
11406+ struct dentry *dentry;
11407+ struct inode *inode;
4a4d8108 11408+ struct file *h_file;
9dbd164d 11409+ struct super_block *sb;
1facf9fc 11410+
4a4d8108 11411+ dentry = file->f_dentry;
9dbd164d 11412+ sb = dentry->d_sb;
4a4d8108 11413+ inode = dentry->d_inode;
e49829fe 11414+ au_mtx_and_read_lock(inode);
9dbd164d 11415+
4a4d8108
AM
11416+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11417+ if (unlikely(err))
11418+ goto out;
1facf9fc 11419+
4a4d8108
AM
11420+ err = au_ready_to_write(file, -1, &pin);
11421+ di_downgrade_lock(dentry, AuLock_IR);
9dbd164d
AM
11422+ if (unlikely(err)) {
11423+ di_read_unlock(dentry, AuLock_IR);
11424+ fi_write_unlock(file);
11425+ goto out;
11426+ }
1facf9fc 11427+
4a4d8108 11428+ h_file = au_hf_top(file);
9dbd164d 11429+ get_file(h_file);
4a4d8108 11430+ au_unpin(&pin);
9dbd164d
AM
11431+ di_read_unlock(dentry, AuLock_IR);
11432+ fi_write_unlock(file);
11433+
4a4d8108 11434+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
9dbd164d 11435+ ii_write_lock_child(inode);
4a4d8108
AM
11436+ au_cpup_attr_timesizes(inode);
11437+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
9dbd164d
AM
11438+ ii_write_unlock(inode);
11439+ fput(h_file);
1facf9fc 11440+
4f0767ce 11441+out:
9dbd164d 11442+ si_read_unlock(sb);
4a4d8108
AM
11443+ mutex_unlock(&inode->i_mutex);
11444+ return err;
11445+}
1facf9fc 11446+
4a4d8108
AM
11447+/* ---------------------------------------------------------------------- */
11448+
9dbd164d
AM
11449+/*
11450+ * The locking order around current->mmap_sem.
11451+ * - in most and regular cases
11452+ * file I/O syscall -- aufs_read() or something
11453+ * -- si_rwsem for read -- mmap_sem
11454+ * (Note that [fdi]i_rwsem are released before mmap_sem).
11455+ * - in mmap case
11456+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
11457+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
11458+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
11459+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
11460+ * It means that when aufs acquires si_rwsem for write, the process should never
11461+ * acquire mmap_sem.
11462+ *
11463+ * Actually aufs_readdir() holds [fdi]i_rwsem before mmap_sem, but this is not a
11464+ * problem either since any directory is not able to be mmap-ed.
11465+ * The similar scenario is applied to aufs_readlink() too.
11466+ */
11467+
4a4d8108
AM
11468+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
11469+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
1308ab2a 11470+
4a4d8108 11471+static unsigned long au_arch_prot_conv(unsigned long flags)
dece6358 11472+{
4a4d8108
AM
11473+ /* currently ppc64 only */
11474+#ifdef CONFIG_PPC64
11475+ /* cf. linux/arch/powerpc/include/asm/mman.h */
11476+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
11477+ return AuConv_VM_PROT(flags, SAO);
11478+#else
11479+ AuDebugOn(arch_calc_vm_prot_bits(-1));
11480+ return 0;
11481+#endif
dece6358
AM
11482+}
11483+
4a4d8108 11484+static unsigned long au_prot_conv(unsigned long flags)
dece6358 11485+{
4a4d8108
AM
11486+ return AuConv_VM_PROT(flags, READ)
11487+ | AuConv_VM_PROT(flags, WRITE)
11488+ | AuConv_VM_PROT(flags, EXEC)
11489+ | au_arch_prot_conv(flags);
dece6358
AM
11490+}
11491+
4a4d8108
AM
11492+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
11493+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
dece6358 11494+
4a4d8108 11495+static unsigned long au_flag_conv(unsigned long flags)
dece6358 11496+{
4a4d8108
AM
11497+ return AuConv_VM_MAP(flags, GROWSDOWN)
11498+ | AuConv_VM_MAP(flags, DENYWRITE)
11499+ | AuConv_VM_MAP(flags, EXECUTABLE)
11500+ | AuConv_VM_MAP(flags, LOCKED);
dece6358 11501+}
1308ab2a 11502+
9dbd164d 11503+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
dece6358 11504+{
4a4d8108 11505+ int err;
9dbd164d 11506+ unsigned long prot;
4a4d8108
AM
11507+ aufs_bindex_t bstart;
11508+ const unsigned char wlock
9dbd164d 11509+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
4a4d8108
AM
11510+ struct dentry *dentry;
11511+ struct super_block *sb;
9dbd164d
AM
11512+ struct file *h_file;
11513+ struct au_branch *br;
11514+ struct au_pin pin;
11515+
11516+ AuDbgVmRegion(file, vma);
1308ab2a 11517+
4a4d8108
AM
11518+ dentry = file->f_dentry;
11519+ sb = dentry->d_sb;
9dbd164d 11520+ lockdep_off();
e49829fe 11521+ si_read_lock(sb, AuLock_NOPLMW);
4a4d8108
AM
11522+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11523+ if (unlikely(err))
11524+ goto out;
11525+
4a4d8108 11526+ if (wlock) {
4a4d8108
AM
11527+ err = au_ready_to_write(file, -1, &pin);
11528+ di_write_unlock(dentry);
9dbd164d
AM
11529+ if (unlikely(err)) {
11530+ fi_write_unlock(file);
11531+ goto out;
11532+ }
4a4d8108
AM
11533+ au_unpin(&pin);
11534+ } else
11535+ di_write_unlock(dentry);
9dbd164d 11536+
4a4d8108 11537+ bstart = au_fbstart(file);
9dbd164d
AM
11538+ br = au_sbr(sb, bstart);
11539+ h_file = au_hf_top(file);
11540+ get_file(h_file);
2cbb1c4b 11541+ au_set_mmapped(file);
4a4d8108 11542+ fi_write_unlock(file);
9dbd164d 11543+ lockdep_on();
1308ab2a 11544+
9dbd164d 11545+ au_vm_file_reset(vma, h_file);
2cbb1c4b 11546+ prot = au_prot_conv(vma->vm_flags);
9dbd164d 11547+ err = security_file_mmap(h_file, /*reqprot*/prot, prot,
2cbb1c4b 11548+ au_flag_conv(vma->vm_flags), vma->vm_start, 0);
9dbd164d
AM
11549+ if (!err)
11550+ err = h_file->f_op->mmap(h_file, vma);
2cbb1c4b
JR
11551+ if (unlikely(err))
11552+ goto out_reset;
4a4d8108 11553+
2cbb1c4b 11554+ au_vm_prfile_set(vma, file);
4a4d8108 11555+ /* update without lock, I don't think it a problem */
2cbb1c4b 11556+ fsstack_copy_attr_atime(file->f_dentry->d_inode,
9dbd164d 11557+ h_file->f_dentry->d_inode);
2cbb1c4b 11558+ goto out_fput; /* success */
4a4d8108 11559+
2cbb1c4b
JR
11560+out_reset:
11561+ au_unset_mmapped(file);
11562+ au_vm_file_reset(vma, file);
11563+out_fput:
9dbd164d
AM
11564+ fput(h_file);
11565+ lockdep_off();
4f0767ce 11566+out:
9dbd164d
AM
11567+ si_read_unlock(sb);
11568+ lockdep_on();
11569+ AuTraceErr(err);
4a4d8108
AM
11570+ return err;
11571+}
11572+
11573+/* ---------------------------------------------------------------------- */
11574+
1e00d052
AM
11575+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
11576+ int datasync)
4a4d8108
AM
11577+{
11578+ int err;
11579+ struct au_pin pin;
b752ccd1 11580+ struct dentry *dentry;
4a4d8108
AM
11581+ struct inode *inode;
11582+ struct file *h_file;
11583+ struct super_block *sb;
11584+
b752ccd1 11585+ dentry = file->f_dentry;
4a4d8108 11586+ inode = dentry->d_inode;
4a4d8108 11587+ sb = dentry->d_sb;
1e00d052 11588+ mutex_lock(&inode->i_mutex);
e49829fe
JR
11589+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
11590+ if (unlikely(err))
11591+ goto out;
4a4d8108
AM
11592+
11593+ err = 0; /* -EBADF; */ /* posix? */
11594+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
e49829fe 11595+ goto out_si;
4a4d8108
AM
11596+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11597+ if (unlikely(err))
e49829fe 11598+ goto out_si;
4a4d8108
AM
11599+
11600+ err = au_ready_to_write(file, -1, &pin);
11601+ di_downgrade_lock(dentry, AuLock_IR);
11602+ if (unlikely(err))
11603+ goto out_unlock;
11604+ au_unpin(&pin);
11605+
11606+ err = -EINVAL;
11607+ h_file = au_hf_top(file);
53392da6
AM
11608+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
11609+ au_cpup_attr_timesizes(inode);
4a4d8108 11610+
4f0767ce 11611+out_unlock:
4a4d8108 11612+ di_read_unlock(dentry, AuLock_IR);
1308ab2a 11613+ fi_write_unlock(file);
e49829fe 11614+out_si:
953406b4 11615+ si_read_unlock(sb);
e49829fe 11616+out:
1e00d052 11617+ mutex_unlock(&inode->i_mutex);
4a4d8108 11618+ return err;
dece6358
AM
11619+}
11620+
4a4d8108
AM
11621+/* no one supports this operation, currently */
11622+#if 0
11623+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
dece6358 11624+{
4a4d8108
AM
11625+ int err;
11626+ struct au_pin pin;
1308ab2a 11627+ struct dentry *dentry;
4a4d8108
AM
11628+ struct inode *inode;
11629+ struct file *file, *h_file;
1308ab2a 11630+
4a4d8108 11631+ file = kio->ki_filp;
1308ab2a 11632+ dentry = file->f_dentry;
4a4d8108 11633+ inode = dentry->d_inode;
e49829fe 11634+ au_mtx_and_read_lock(inode);
4a4d8108
AM
11635+
11636+ err = 0; /* -EBADF; */ /* posix? */
11637+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
11638+ goto out;
11639+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11640+ if (unlikely(err))
1308ab2a 11641+ goto out;
11642+
4a4d8108
AM
11643+ err = au_ready_to_write(file, -1, &pin);
11644+ di_downgrade_lock(dentry, AuLock_IR);
11645+ if (unlikely(err))
11646+ goto out_unlock;
11647+ au_unpin(&pin);
1308ab2a 11648+
4a4d8108
AM
11649+ err = -ENOSYS;
11650+ h_file = au_hf_top(file);
11651+ if (h_file->f_op && h_file->f_op->aio_fsync) {
11652+ struct dentry *h_d;
11653+ struct mutex *h_mtx;
1308ab2a 11654+
4a4d8108
AM
11655+ h_d = h_file->f_dentry;
11656+ h_mtx = &h_d->d_inode->i_mutex;
11657+ if (!is_sync_kiocb(kio)) {
11658+ get_file(h_file);
11659+ fput(file);
11660+ }
11661+ kio->ki_filp = h_file;
11662+ err = h_file->f_op->aio_fsync(kio, datasync);
11663+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
11664+ if (!err)
11665+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
11666+ /*ignore*/
11667+ au_cpup_attr_timesizes(inode);
11668+ mutex_unlock(h_mtx);
11669+ }
1308ab2a 11670+
4f0767ce 11671+out_unlock:
4a4d8108
AM
11672+ di_read_unlock(dentry, AuLock_IR);
11673+ fi_write_unlock(file);
4f0767ce 11674+out:
e49829fe 11675+ si_read_unlock(inode->sb);
4a4d8108
AM
11676+ mutex_unlock(&inode->i_mutex);
11677+ return err;
dece6358 11678+}
4a4d8108 11679+#endif
dece6358 11680+
4a4d8108 11681+static int aufs_fasync(int fd, struct file *file, int flag)
dece6358 11682+{
4a4d8108
AM
11683+ int err;
11684+ struct file *h_file;
11685+ struct dentry *dentry;
11686+ struct super_block *sb;
1308ab2a 11687+
4a4d8108
AM
11688+ dentry = file->f_dentry;
11689+ sb = dentry->d_sb;
e49829fe 11690+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11691+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11692+ if (unlikely(err))
11693+ goto out;
11694+
11695+ h_file = au_hf_top(file);
11696+ if (h_file->f_op && h_file->f_op->fasync)
11697+ err = h_file->f_op->fasync(fd, h_file, flag);
11698+
11699+ di_read_unlock(dentry, AuLock_IR);
11700+ fi_read_unlock(file);
1308ab2a 11701+
4f0767ce 11702+out:
4a4d8108 11703+ si_read_unlock(sb);
1308ab2a 11704+ return err;
dece6358 11705+}
4a4d8108
AM
11706+
11707+/* ---------------------------------------------------------------------- */
11708+
11709+/* no one supports this operation, currently */
11710+#if 0
11711+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
11712+ size_t len, loff_t *pos , int more)
11713+{
11714+}
11715+#endif
11716+
11717+/* ---------------------------------------------------------------------- */
11718+
11719+const struct file_operations aufs_file_fop = {
11720+ .owner = THIS_MODULE,
2cbb1c4b 11721+
027c5e7a 11722+ .llseek = default_llseek,
4a4d8108
AM
11723+
11724+ .read = aufs_read,
11725+ .write = aufs_write,
11726+ .aio_read = aufs_aio_read,
11727+ .aio_write = aufs_aio_write,
11728+#ifdef CONFIG_AUFS_POLL
11729+ .poll = aufs_poll,
11730+#endif
11731+ .unlocked_ioctl = aufs_ioctl_nondir,
b752ccd1
AM
11732+#ifdef CONFIG_COMPAT
11733+ .compat_ioctl = aufs_ioctl_nondir, /* same */
11734+#endif
4a4d8108
AM
11735+ .mmap = aufs_mmap,
11736+ .open = aufs_open_nondir,
11737+ .flush = aufs_flush_nondir,
11738+ .release = aufs_release_nondir,
11739+ .fsync = aufs_fsync_nondir,
11740+ /* .aio_fsync = aufs_aio_fsync_nondir, */
11741+ .fasync = aufs_fasync,
11742+ /* .sendpage = aufs_sendpage, */
11743+ .splice_write = aufs_splice_write,
11744+ .splice_read = aufs_splice_read,
11745+#if 0
11746+ .aio_splice_write = aufs_aio_splice_write,
11747+ .aio_splice_read = aufs_aio_splice_read
11748+#endif
11749+};
7f207e10
AM
11750diff -urN /usr/share/empty/fs/aufs/f_op_sp.c linux/fs/aufs/f_op_sp.c
11751--- /usr/share/empty/fs/aufs/f_op_sp.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
11752+++ linux/fs/aufs/f_op_sp.c 2012-02-13 21:54:56.969771692 +0100
11753@@ -0,0 +1,298 @@
1308ab2a 11754+/*
f6c5ef8b 11755+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1308ab2a 11756+ *
11757+ * This program, aufs is free software; you can redistribute it and/or modify
11758+ * it under the terms of the GNU General Public License as published by
11759+ * the Free Software Foundation; either version 2 of the License, or
11760+ * (at your option) any later version.
11761+ *
11762+ * This program is distributed in the hope that it will be useful,
11763+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11764+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11765+ * GNU General Public License for more details.
11766+ *
11767+ * You should have received a copy of the GNU General Public License
11768+ * along with this program; if not, write to the Free Software
11769+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11770+ */
dece6358 11771+
1308ab2a 11772+/*
4a4d8108
AM
11773+ * file operations for special files.
11774+ * while they exist in aufs virtually,
11775+ * their file I/O is handled out of aufs.
1308ab2a 11776+ */
11777+
4a4d8108 11778+#include "aufs.h"
1308ab2a 11779+
4a4d8108
AM
11780+static ssize_t aufs_aio_read_sp(struct kiocb *kio, const struct iovec *iov,
11781+ unsigned long nv, loff_t pos)
dece6358 11782+{
4a4d8108
AM
11783+ ssize_t err;
11784+ aufs_bindex_t bstart;
11785+ unsigned char wbr;
11786+ struct file *file, *h_file;
11787+ struct super_block *sb;
1308ab2a 11788+
4a4d8108
AM
11789+ file = kio->ki_filp;
11790+ sb = file->f_dentry->d_sb;
11791+ si_read_lock(sb, AuLock_FLUSH);
11792+ fi_read_lock(file);
11793+ bstart = au_fbstart(file);
11794+ h_file = au_hf_top(file);
11795+ fi_read_unlock(file);
11796+ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
11797+ si_read_unlock(sb);
11798+
11799+ /* do not change the file in kio */
11800+ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_read);
11801+ err = h_file->f_op->aio_read(kio, iov, nv, pos);
11802+ if (err > 0 && wbr)
11803+ file_accessed(h_file);
11804+
11805+ return err;
11806+}
11807+
11808+static ssize_t aufs_aio_write_sp(struct kiocb *kio, const struct iovec *iov,
11809+ unsigned long nv, loff_t pos)
11810+{
11811+ ssize_t err;
11812+ aufs_bindex_t bstart;
11813+ unsigned char wbr;
11814+ struct super_block *sb;
11815+ struct file *file, *h_file;
11816+
11817+ file = kio->ki_filp;
11818+ sb = file->f_dentry->d_sb;
11819+ si_read_lock(sb, AuLock_FLUSH);
11820+ fi_read_lock(file);
11821+ bstart = au_fbstart(file);
11822+ h_file = au_hf_top(file);
11823+ fi_read_unlock(file);
11824+ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
11825+ si_read_unlock(sb);
11826+
11827+ /* do not change the file in kio */
11828+ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_write);
11829+ err = h_file->f_op->aio_write(kio, iov, nv, pos);
11830+ if (err > 0 && wbr)
11831+ file_update_time(h_file);
11832+
11833+ return err;
11834+}
11835+
11836+/* ---------------------------------------------------------------------- */
11837+
11838+static int aufs_release_sp(struct inode *inode, struct file *file)
11839+{
11840+ int err;
11841+ struct file *h_file;
11842+
11843+ fi_read_lock(file);
11844+ h_file = au_hf_top(file);
11845+ fi_read_unlock(file);
11846+ /* close this fifo in aufs */
11847+ err = h_file->f_op->release(inode, file); /* ignore */
11848+ aufs_release_nondir(inode, file); /* ignore */
11849+ return err;
11850+}
11851+
11852+/* ---------------------------------------------------------------------- */
11853+
11854+/* currently, support only FIFO */
4f0767ce
JR
11855+enum {
11856+ AuSp_FIFO, AuSp_FIFO_R, AuSp_FIFO_W, AuSp_FIFO_RW,
11857+ /* AuSp_SOCK, AuSp_CHR, AuSp_BLK, */
11858+ AuSp_Last
11859+};
4a4d8108
AM
11860+static int aufs_open_sp(struct inode *inode, struct file *file);
11861+static struct au_sp_fop {
11862+ int done;
11863+ struct file_operations fop; /* not 'const' */
11864+ spinlock_t spin;
11865+} au_sp_fop[AuSp_Last] = {
11866+ [AuSp_FIFO] = {
11867+ .fop = {
11868+ .owner = THIS_MODULE,
11869+ .open = aufs_open_sp
11870+ }
11871+ }
11872+};
11873+
11874+static void au_init_fop_sp(struct file *file)
11875+{
11876+ struct au_sp_fop *p;
11877+ int i;
11878+ struct file *h_file;
11879+
11880+ p = au_sp_fop;
11881+ if (unlikely(!p->done)) {
11882+ /* initialize first time only */
11883+ static DEFINE_SPINLOCK(spin);
11884+
11885+ spin_lock(&spin);
11886+ if (!p->done) {
11887+ BUILD_BUG_ON(sizeof(au_sp_fop)/sizeof(*au_sp_fop)
11888+ != AuSp_Last);
11889+ for (i = 0; i < AuSp_Last; i++)
11890+ spin_lock_init(&p[i].spin);
11891+ p->done = 1;
11892+ }
11893+ spin_unlock(&spin);
11894+ }
11895+
11896+ switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
11897+ case FMODE_READ:
11898+ i = AuSp_FIFO_R;
11899+ break;
11900+ case FMODE_WRITE:
11901+ i = AuSp_FIFO_W;
11902+ break;
11903+ case FMODE_READ | FMODE_WRITE:
11904+ i = AuSp_FIFO_RW;
11905+ break;
11906+ default:
11907+ BUG();
11908+ }
11909+
11910+ p += i;
11911+ if (unlikely(!p->done)) {
11912+ /* initialize first time only */
11913+ h_file = au_hf_top(file);
11914+ spin_lock(&p->spin);
11915+ if (!p->done) {
11916+ p->fop = *h_file->f_op;
11917+ p->fop.owner = THIS_MODULE;
11918+ if (p->fop.aio_read)
11919+ p->fop.aio_read = aufs_aio_read_sp;
11920+ if (p->fop.aio_write)
11921+ p->fop.aio_write = aufs_aio_write_sp;
11922+ p->fop.release = aufs_release_sp;
11923+ p->done = 1;
11924+ }
11925+ spin_unlock(&p->spin);
11926+ }
11927+ file->f_op = &p->fop;
11928+}
11929+
11930+static int au_cpup_sp(struct dentry *dentry)
11931+{
11932+ int err;
11933+ aufs_bindex_t bcpup;
11934+ struct au_pin pin;
11935+ struct au_wr_dir_args wr_dir_args = {
11936+ .force_btgt = -1,
11937+ .flags = 0
11938+ };
11939+
11940+ AuDbg("%.*s\n", AuDLNPair(dentry));
11941+
11942+ di_read_unlock(dentry, AuLock_IR);
11943+ di_write_lock_child(dentry);
11944+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
11945+ if (unlikely(err < 0))
11946+ goto out;
11947+ bcpup = err;
11948+ err = 0;
11949+ if (bcpup == au_dbstart(dentry))
11950+ goto out; /* success */
11951+
11952+ err = au_pin(&pin, dentry, bcpup, au_opt_udba(dentry->d_sb),
11953+ AuPin_MNT_WRITE);
11954+ if (!err) {
11955+ err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME);
11956+ au_unpin(&pin);
11957+ }
11958+
4f0767ce 11959+out:
4a4d8108
AM
11960+ di_downgrade_lock(dentry, AuLock_IR);
11961+ return err;
11962+}
11963+
11964+static int au_do_open_sp(struct file *file, int flags)
11965+{
11966+ int err;
11967+ struct dentry *dentry;
11968+ struct super_block *sb;
11969+ struct file *h_file;
11970+ struct inode *h_inode;
11971+
11972+ dentry = file->f_dentry;
11973+ AuDbg("%.*s\n", AuDLNPair(dentry));
11974+
11975+ /*
11976+ * try copying-up.
11977+ * operate on the ro branch is not an error.
11978+ */
11979+ au_cpup_sp(dentry); /* ignore */
11980+
11981+ /* prepare h_file */
11982+ err = au_do_open_nondir(file, vfsub_file_flags(file));
11983+ if (unlikely(err))
11984+ goto out;
11985+
11986+ sb = dentry->d_sb;
11987+ h_file = au_hf_top(file);
11988+ h_inode = h_file->f_dentry->d_inode;
11989+ di_read_unlock(dentry, AuLock_IR);
11990+ fi_write_unlock(file);
11991+ si_read_unlock(sb);
11992+ /* open this fifo in aufs */
11993+ err = h_inode->i_fop->open(file->f_dentry->d_inode, file);
11994+ si_noflush_read_lock(sb);
11995+ fi_write_lock(file);
11996+ di_read_lock_child(dentry, AuLock_IR);
11997+ if (!err)
11998+ au_init_fop_sp(file);
4a4d8108 11999+
4f0767ce 12000+out:
4a4d8108
AM
12001+ return err;
12002+}
12003+
12004+static int aufs_open_sp(struct inode *inode, struct file *file)
12005+{
12006+ int err;
12007+ struct super_block *sb;
12008+
12009+ sb = file->f_dentry->d_sb;
12010+ si_read_lock(sb, AuLock_FLUSH);
12011+ err = au_do_open(file, au_do_open_sp, /*fidir*/NULL);
12012+ si_read_unlock(sb);
12013+ return err;
12014+}
12015+
12016+/* ---------------------------------------------------------------------- */
12017+
12018+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev)
12019+{
12020+ init_special_inode(inode, mode, rdev);
12021+
12022+ switch (mode & S_IFMT) {
12023+ case S_IFIFO:
12024+ inode->i_fop = &au_sp_fop[AuSp_FIFO].fop;
12025+ /*FALLTHROUGH*/
12026+ case S_IFCHR:
12027+ case S_IFBLK:
12028+ case S_IFSOCK:
12029+ break;
12030+ default:
12031+ AuDebugOn(1);
12032+ }
12033+}
12034+
12035+int au_special_file(umode_t mode)
12036+{
12037+ int ret;
12038+
12039+ ret = 0;
12040+ switch (mode & S_IFMT) {
12041+ case S_IFIFO:
12042+#if 0
12043+ case S_IFCHR:
12044+ case S_IFBLK:
12045+ case S_IFSOCK:
12046+#endif
12047+ ret = 1;
12048+ }
12049+
12050+ return ret;
12051+}
7f207e10
AM
12052diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
12053--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
12054+++ linux/fs/aufs/fstype.h 2012-02-13 21:54:56.969771692 +0100
12055@@ -0,0 +1,496 @@
4a4d8108 12056+/*
f6c5ef8b 12057+ * Copyright (C) 2005-2012 Junjiro R. Okajima
4a4d8108
AM
12058+ *
12059+ * This program, aufs is free software; you can redistribute it and/or modify
12060+ * it under the terms of the GNU General Public License as published by
12061+ * the Free Software Foundation; either version 2 of the License, or
12062+ * (at your option) any later version.
12063+ *
12064+ * This program is distributed in the hope that it will be useful,
12065+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12066+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12067+ * GNU General Public License for more details.
12068+ *
12069+ * You should have received a copy of the GNU General Public License
12070+ * along with this program; if not, write to the Free Software
12071+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12072+ */
12073+
12074+/*
12075+ * judging filesystem type
12076+ */
12077+
12078+#ifndef __AUFS_FSTYPE_H__
12079+#define __AUFS_FSTYPE_H__
12080+
12081+#ifdef __KERNEL__
12082+
12083+#include <linux/fs.h>
12084+#include <linux/magic.h>
12085+#include <linux/romfs_fs.h>
4a4d8108
AM
12086+
12087+static inline int au_test_aufs(struct super_block *sb)
12088+{
12089+ return sb->s_magic == AUFS_SUPER_MAGIC;
12090+}
12091+
12092+static inline const char *au_sbtype(struct super_block *sb)
12093+{
12094+ return sb->s_type->name;
12095+}
1308ab2a 12096+
12097+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
12098+{
12099+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
12100+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
12101+#else
12102+ return 0;
12103+#endif
12104+}
12105+
1308ab2a 12106+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 12107+{
1308ab2a 12108+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
12109+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
12110+#else
12111+ return 0;
12112+#endif
12113+}
12114+
1308ab2a 12115+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 12116+{
1308ab2a 12117+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
12118+ return sb->s_magic == CRAMFS_MAGIC;
12119+#endif
12120+ return 0;
12121+}
12122+
12123+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
12124+{
12125+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
12126+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
12127+#else
12128+ return 0;
12129+#endif
12130+}
12131+
1308ab2a 12132+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 12133+{
1308ab2a 12134+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
12135+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
12136+#else
12137+ return 0;
12138+#endif
12139+}
12140+
1308ab2a 12141+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 12142+{
1308ab2a 12143+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
12144+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
12145+#else
12146+ return 0;
12147+#endif
12148+}
12149+
1308ab2a 12150+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 12151+{
1308ab2a 12152+#ifdef CONFIG_TMPFS
12153+ return sb->s_magic == TMPFS_MAGIC;
12154+#else
12155+ return 0;
dece6358 12156+#endif
dece6358
AM
12157+}
12158+
1308ab2a 12159+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 12160+{
1308ab2a 12161+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
12162+ return !strcmp(au_sbtype(sb), "ecryptfs");
12163+#else
12164+ return 0;
12165+#endif
1facf9fc 12166+}
12167+
1308ab2a 12168+static inline int au_test_smbfs(struct super_block *sb __maybe_unused)
1facf9fc 12169+{
1308ab2a 12170+#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE)
12171+ return sb->s_magic == SMB_SUPER_MAGIC;
12172+#else
12173+ return 0;
1facf9fc 12174+#endif
1facf9fc 12175+}
12176+
1308ab2a 12177+static inline int au_test_ocfs2(struct super_block *sb __maybe_unused)
1facf9fc 12178+{
1308ab2a 12179+#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE)
12180+ return sb->s_magic == OCFS2_SUPER_MAGIC;
12181+#else
12182+ return 0;
12183+#endif
1facf9fc 12184+}
12185+
1308ab2a 12186+static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused)
1facf9fc 12187+{
1308ab2a 12188+#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE)
12189+ return sb->s_magic == DLMFS_MAGIC;
12190+#else
12191+ return 0;
12192+#endif
1facf9fc 12193+}
12194+
1308ab2a 12195+static inline int au_test_coda(struct super_block *sb __maybe_unused)
1facf9fc 12196+{
1308ab2a 12197+#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE)
12198+ return sb->s_magic == CODA_SUPER_MAGIC;
12199+#else
12200+ return 0;
12201+#endif
12202+}
12203+
12204+static inline int au_test_v9fs(struct super_block *sb __maybe_unused)
12205+{
12206+#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE)
12207+ return sb->s_magic == V9FS_MAGIC;
12208+#else
12209+ return 0;
12210+#endif
12211+}
12212+
12213+static inline int au_test_ext4(struct super_block *sb __maybe_unused)
12214+{
12215+#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE)
12216+ return sb->s_magic == EXT4_SUPER_MAGIC;
12217+#else
12218+ return 0;
12219+#endif
12220+}
12221+
12222+static inline int au_test_sysv(struct super_block *sb __maybe_unused)
12223+{
12224+#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE)
12225+ return !strcmp(au_sbtype(sb), "sysv");
12226+#else
12227+ return 0;
12228+#endif
12229+}
12230+
12231+static inline int au_test_ramfs(struct super_block *sb)
12232+{
12233+ return sb->s_magic == RAMFS_MAGIC;
12234+}
12235+
12236+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
12237+{
12238+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
12239+ return sb->s_magic == UBIFS_SUPER_MAGIC;
12240+#else
12241+ return 0;
12242+#endif
12243+}
12244+
12245+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
12246+{
12247+#ifdef CONFIG_PROC_FS
12248+ return sb->s_magic == PROC_SUPER_MAGIC;
12249+#else
12250+ return 0;
12251+#endif
12252+}
12253+
12254+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
12255+{
12256+#ifdef CONFIG_SYSFS
12257+ return sb->s_magic == SYSFS_MAGIC;
12258+#else
12259+ return 0;
12260+#endif
12261+}
12262+
12263+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
12264+{
12265+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
12266+ return sb->s_magic == CONFIGFS_MAGIC;
12267+#else
12268+ return 0;
12269+#endif
12270+}
12271+
12272+static inline int au_test_minix(struct super_block *sb __maybe_unused)
12273+{
12274+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
12275+ return sb->s_magic == MINIX3_SUPER_MAGIC
12276+ || sb->s_magic == MINIX2_SUPER_MAGIC
12277+ || sb->s_magic == MINIX2_SUPER_MAGIC2
12278+ || sb->s_magic == MINIX_SUPER_MAGIC
12279+ || sb->s_magic == MINIX_SUPER_MAGIC2;
12280+#else
12281+ return 0;
12282+#endif
12283+}
12284+
12285+static inline int au_test_cifs(struct super_block *sb __maybe_unused)
12286+{
12287+#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE)
12288+ return sb->s_magic == CIFS_MAGIC_NUMBER;
12289+#else
12290+ return 0;
12291+#endif
12292+}
12293+
12294+static inline int au_test_fat(struct super_block *sb __maybe_unused)
12295+{
12296+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
12297+ return sb->s_magic == MSDOS_SUPER_MAGIC;
12298+#else
12299+ return 0;
12300+#endif
12301+}
12302+
12303+static inline int au_test_msdos(struct super_block *sb)
12304+{
12305+ return au_test_fat(sb);
12306+}
12307+
12308+static inline int au_test_vfat(struct super_block *sb)
12309+{
12310+ return au_test_fat(sb);
12311+}
12312+
12313+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
12314+{
12315+#ifdef CONFIG_SECURITYFS
12316+ return sb->s_magic == SECURITYFS_MAGIC;
12317+#else
12318+ return 0;
12319+#endif
12320+}
12321+
12322+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
12323+{
12324+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
12325+ return sb->s_magic == SQUASHFS_MAGIC;
12326+#else
12327+ return 0;
12328+#endif
12329+}
12330+
12331+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
12332+{
12333+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
12334+ return sb->s_magic == BTRFS_SUPER_MAGIC;
12335+#else
12336+ return 0;
12337+#endif
12338+}
12339+
12340+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
12341+{
12342+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
12343+ return sb->s_magic == XENFS_SUPER_MAGIC;
12344+#else
12345+ return 0;
12346+#endif
12347+}
12348+
12349+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
12350+{
12351+#ifdef CONFIG_DEBUG_FS
12352+ return sb->s_magic == DEBUGFS_MAGIC;
12353+#else
12354+ return 0;
12355+#endif
12356+}
12357+
12358+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
12359+{
12360+#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
12361+ return sb->s_magic == NILFS_SUPER_MAGIC;
12362+#else
12363+ return 0;
12364+#endif
12365+}
12366+
4a4d8108
AM
12367+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
12368+{
12369+#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE)
12370+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
12371+#else
12372+ return 0;
12373+#endif
12374+}
12375+
1308ab2a 12376+/* ---------------------------------------------------------------------- */
12377+/*
12378+ * they can't be an aufs branch.
12379+ */
12380+static inline int au_test_fs_unsuppoted(struct super_block *sb)
12381+{
12382+ return
12383+#ifndef CONFIG_AUFS_BR_RAMFS
12384+ au_test_ramfs(sb) ||
12385+#endif
12386+ au_test_procfs(sb)
12387+ || au_test_sysfs(sb)
12388+ || au_test_configfs(sb)
12389+ || au_test_debugfs(sb)
12390+ || au_test_securityfs(sb)
12391+ || au_test_xenfs(sb)
12392+ || au_test_ecryptfs(sb)
12393+ /* || !strcmp(au_sbtype(sb), "unionfs") */
12394+ || au_test_aufs(sb); /* will be supported in next version */
12395+}
12396+
12397+/*
12398+ * If the filesystem supports NFS-export, then it has to support NULL as
12399+ * a nameidata parameter for ->create(), ->lookup() and ->d_revalidate().
12400+ * We can apply this principle when we handle a lower filesystem.
12401+ */
12402+static inline int au_test_fs_null_nd(struct super_block *sb)
12403+{
12404+ return !!sb->s_export_op;
12405+}
12406+
12407+static inline int au_test_fs_remote(struct super_block *sb)
12408+{
12409+ return !au_test_tmpfs(sb)
12410+#ifdef CONFIG_AUFS_BR_RAMFS
12411+ && !au_test_ramfs(sb)
12412+#endif
12413+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
12414+}
12415+
12416+/* ---------------------------------------------------------------------- */
12417+
12418+/*
12419+ * Note: these functions (below) are created after reading ->getattr() in all
12420+ * filesystems under linux/fs. it means we have to do so in every update...
12421+ */
12422+
12423+/*
12424+ * some filesystems require getattr to refresh the inode attributes before
12425+ * referencing.
12426+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
12427+ * and leave the work for d_revalidate()
12428+ */
12429+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
12430+{
12431+ return au_test_nfs(sb)
12432+ || au_test_fuse(sb)
12433+ /* || au_test_smbfs(sb) */ /* untested */
12434+ /* || au_test_ocfs2(sb) */ /* untested */
12435+ /* || au_test_btrfs(sb) */ /* untested */
12436+ /* || au_test_coda(sb) */ /* untested */
12437+ /* || au_test_v9fs(sb) */ /* untested */
12438+ ;
12439+}
12440+
12441+/*
12442+ * filesystems which don't maintain i_size or i_blocks.
12443+ */
12444+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
12445+{
12446+ return au_test_xfs(sb)
4a4d8108
AM
12447+ || au_test_btrfs(sb)
12448+ || au_test_ubifs(sb)
12449+ || au_test_hfsplus(sb) /* maintained, but incorrect */
1308ab2a 12450+ /* || au_test_ext4(sb) */ /* untested */
12451+ /* || au_test_ocfs2(sb) */ /* untested */
12452+ /* || au_test_ocfs2_dlmfs(sb) */ /* untested */
12453+ /* || au_test_sysv(sb) */ /* untested */
1308ab2a 12454+ /* || au_test_minix(sb) */ /* untested */
12455+ ;
12456+}
12457+
12458+/*
12459+ * filesystems which don't store the correct value in some of their inode
12460+ * attributes.
12461+ */
12462+static inline int au_test_fs_bad_iattr(struct super_block *sb)
12463+{
12464+ return au_test_fs_bad_iattr_size(sb)
12465+ /* || au_test_cifs(sb) */ /* untested */
12466+ || au_test_fat(sb)
12467+ || au_test_msdos(sb)
12468+ || au_test_vfat(sb);
1facf9fc 12469+}
12470+
12471+/* they don't check i_nlink in link(2) */
12472+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
12473+{
12474+ return au_test_tmpfs(sb)
12475+#ifdef CONFIG_AUFS_BR_RAMFS
12476+ || au_test_ramfs(sb)
12477+#endif
4a4d8108
AM
12478+ || au_test_ubifs(sb)
12479+ || au_test_btrfs(sb)
12480+ || au_test_hfsplus(sb);
1facf9fc 12481+}
12482+
12483+/*
12484+ * filesystems which sets S_NOATIME and S_NOCMTIME.
12485+ */
12486+static inline int au_test_fs_notime(struct super_block *sb)
12487+{
12488+ return au_test_nfs(sb)
12489+ || au_test_fuse(sb)
dece6358 12490+ || au_test_ubifs(sb)
1facf9fc 12491+ /* || au_test_cifs(sb) */ /* untested */
1facf9fc 12492+ ;
12493+}
12494+
12495+/*
12496+ * filesystems which requires replacing i_mapping.
12497+ */
12498+static inline int au_test_fs_bad_mapping(struct super_block *sb)
12499+{
dece6358
AM
12500+ return au_test_fuse(sb)
12501+ || au_test_ubifs(sb);
1facf9fc 12502+}
12503+
12504+/* temporary support for i#1 in cramfs */
12505+static inline int au_test_fs_unique_ino(struct inode *inode)
12506+{
12507+ if (au_test_cramfs(inode->i_sb))
12508+ return inode->i_ino != 1;
12509+ return 1;
12510+}
12511+
12512+/* ---------------------------------------------------------------------- */
12513+
12514+/*
12515+ * the filesystem where the xino files placed must support i/o after unlink and
12516+ * maintain i_size and i_blocks.
12517+ */
12518+static inline int au_test_fs_bad_xino(struct super_block *sb)
12519+{
12520+ return au_test_fs_remote(sb)
12521+ || au_test_fs_bad_iattr_size(sb)
12522+#ifdef CONFIG_AUFS_BR_RAMFS
12523+ || !(au_test_ramfs(sb) || au_test_fs_null_nd(sb))
12524+#else
12525+ || !au_test_fs_null_nd(sb) /* to keep xino code simple */
12526+#endif
12527+ /* don't want unnecessary work for xino */
12528+ || au_test_aufs(sb)
1308ab2a 12529+ || au_test_ecryptfs(sb)
12530+ || au_test_nilfs(sb);
1facf9fc 12531+}
12532+
12533+static inline int au_test_fs_trunc_xino(struct super_block *sb)
12534+{
12535+ return au_test_tmpfs(sb)
12536+ || au_test_ramfs(sb);
12537+}
12538+
12539+/*
12540+ * test if the @sb is real-readonly.
12541+ */
12542+static inline int au_test_fs_rr(struct super_block *sb)
12543+{
12544+ return au_test_squashfs(sb)
12545+ || au_test_iso9660(sb)
12546+ || au_test_cramfs(sb)
12547+ || au_test_romfs(sb);
12548+}
12549+
12550+#endif /* __KERNEL__ */
12551+#endif /* __AUFS_FSTYPE_H__ */
7f207e10
AM
12552diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
12553--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 12554+++ linux/fs/aufs/hfsnotify.c 2012-02-13 21:54:56.969771692 +0100
027c5e7a 12555@@ -0,0 +1,247 @@
1facf9fc 12556+/*
f6c5ef8b 12557+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 12558+ *
12559+ * This program, aufs is free software; you can redistribute it and/or modify
12560+ * it under the terms of the GNU General Public License as published by
12561+ * the Free Software Foundation; either version 2 of the License, or
12562+ * (at your option) any later version.
dece6358
AM
12563+ *
12564+ * This program is distributed in the hope that it will be useful,
12565+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12566+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12567+ * GNU General Public License for more details.
12568+ *
12569+ * You should have received a copy of the GNU General Public License
12570+ * along with this program; if not, write to the Free Software
12571+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 12572+ */
12573+
12574+/*
4a4d8108 12575+ * fsnotify for the lower directories
1facf9fc 12576+ */
12577+
12578+#include "aufs.h"
12579+
4a4d8108
AM
12580+/* FS_IN_IGNORED is unnecessary */
12581+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
12582+ | FS_CREATE | FS_EVENT_ON_CHILD);
7f207e10 12583+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
1facf9fc 12584+
0c5527e5 12585+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
1facf9fc 12586+{
0c5527e5
AM
12587+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
12588+ hn_mark);
4a4d8108 12589+ AuDbg("here\n");
7f207e10
AM
12590+ hn->hn_mark_dead = 1;
12591+ smp_mb();
12592+ wake_up_all(&au_hfsn_wq);
4a4d8108 12593+}
1facf9fc 12594+
027c5e7a 12595+static int au_hfsn_alloc(struct au_hinode *hinode)
4a4d8108 12596+{
027c5e7a
AM
12597+ struct au_hnotify *hn;
12598+ struct super_block *sb;
12599+ struct au_branch *br;
0c5527e5 12600+ struct fsnotify_mark *mark;
027c5e7a 12601+ aufs_bindex_t bindex;
1facf9fc 12602+
027c5e7a
AM
12603+ hn = hinode->hi_notify;
12604+ sb = hn->hn_aufs_inode->i_sb;
12605+ bindex = au_br_index(sb, hinode->hi_id);
12606+ br = au_sbr(sb, bindex);
7f207e10 12607+ hn->hn_mark_dead = 0;
0c5527e5
AM
12608+ mark = &hn->hn_mark;
12609+ fsnotify_init_mark(mark, au_hfsn_free_mark);
12610+ mark->mask = AuHfsnMask;
7f207e10
AM
12611+ /*
12612+ * by udba rename or rmdir, aufs assign a new inode to the known
12613+ * h_inode, so specify 1 to allow dups.
12614+ */
027c5e7a
AM
12615+ return fsnotify_add_mark(mark, br->br_hfsn_group, hinode->hi_inode,
12616+ /*mnt*/NULL, /*allow_dups*/1);
1facf9fc 12617+}
12618+
027c5e7a 12619+static void au_hfsn_free(struct au_hinode *hinode)
1facf9fc 12620+{
027c5e7a 12621+ struct au_hnotify *hn;
0c5527e5 12622+ struct fsnotify_mark *mark;
953406b4 12623+
027c5e7a 12624+ hn = hinode->hi_notify;
0c5527e5
AM
12625+ mark = &hn->hn_mark;
12626+ fsnotify_destroy_mark(mark);
12627+ fsnotify_put_mark(mark);
7f207e10
AM
12628+
12629+ /* TODO: bad approach */
12630+ wait_event(au_hfsn_wq, hn->hn_mark_dead);
1facf9fc 12631+}
12632+
12633+/* ---------------------------------------------------------------------- */
12634+
4a4d8108 12635+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
1facf9fc 12636+{
0c5527e5 12637+ struct fsnotify_mark *mark;
1facf9fc 12638+
0c5527e5
AM
12639+ mark = &hinode->hi_notify->hn_mark;
12640+ spin_lock(&mark->lock);
1facf9fc 12641+ if (do_set) {
0c5527e5
AM
12642+ AuDebugOn(mark->mask & AuHfsnMask);
12643+ mark->mask |= AuHfsnMask;
1facf9fc 12644+ } else {
0c5527e5
AM
12645+ AuDebugOn(!(mark->mask & AuHfsnMask));
12646+ mark->mask &= ~AuHfsnMask;
1facf9fc 12647+ }
0c5527e5 12648+ spin_unlock(&mark->lock);
4a4d8108 12649+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
1facf9fc 12650+}
12651+
4a4d8108 12652+/* ---------------------------------------------------------------------- */
1facf9fc 12653+
4a4d8108
AM
12654+/* #define AuDbgHnotify */
12655+#ifdef AuDbgHnotify
12656+static char *au_hfsn_name(u32 mask)
12657+{
12658+#ifdef CONFIG_AUFS_DEBUG
12659+#define test_ret(flag) if (mask & flag) \
12660+ return #flag;
12661+ test_ret(FS_ACCESS);
12662+ test_ret(FS_MODIFY);
12663+ test_ret(FS_ATTRIB);
12664+ test_ret(FS_CLOSE_WRITE);
12665+ test_ret(FS_CLOSE_NOWRITE);
12666+ test_ret(FS_OPEN);
12667+ test_ret(FS_MOVED_FROM);
12668+ test_ret(FS_MOVED_TO);
12669+ test_ret(FS_CREATE);
12670+ test_ret(FS_DELETE);
12671+ test_ret(FS_DELETE_SELF);
12672+ test_ret(FS_MOVE_SELF);
12673+ test_ret(FS_UNMOUNT);
12674+ test_ret(FS_Q_OVERFLOW);
12675+ test_ret(FS_IN_IGNORED);
12676+ test_ret(FS_IN_ISDIR);
12677+ test_ret(FS_IN_ONESHOT);
12678+ test_ret(FS_EVENT_ON_CHILD);
12679+ return "";
12680+#undef test_ret
12681+#else
12682+ return "??";
12683+#endif
1facf9fc 12684+}
4a4d8108 12685+#endif
1facf9fc 12686+
12687+/* ---------------------------------------------------------------------- */
12688+
4a4d8108 12689+static int au_hfsn_handle_event(struct fsnotify_group *group,
0c5527e5
AM
12690+ struct fsnotify_mark *inode_mark,
12691+ struct fsnotify_mark *vfsmount_mark,
4a4d8108 12692+ struct fsnotify_event *event)
1facf9fc 12693+{
12694+ int err;
4a4d8108
AM
12695+ struct au_hnotify *hnotify;
12696+ struct inode *h_dir, *h_inode;
12697+ __u32 mask;
4a4d8108
AM
12698+ struct qstr h_child_qstr = {
12699+ .name = event->file_name,
12700+ .len = event->name_len
12701+ };
12702+
12703+ AuDebugOn(event->data_type != FSNOTIFY_EVENT_INODE);
1facf9fc 12704+
12705+ err = 0;
0c5527e5 12706+ /* if FS_UNMOUNT happens, there must be another bug */
4a4d8108
AM
12707+ mask = event->mask;
12708+ AuDebugOn(mask & FS_UNMOUNT);
0c5527e5 12709+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
1facf9fc 12710+ goto out;
1facf9fc 12711+
4a4d8108
AM
12712+ h_dir = event->to_tell;
12713+ h_inode = event->inode;
12714+#ifdef AuDbgHnotify
12715+ au_debug(1);
12716+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
12717+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
12718+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
12719+ h_dir->i_ino, mask, au_hfsn_name(mask),
12720+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
12721+ /* WARN_ON(1); */
1facf9fc 12722+ }
4a4d8108 12723+ au_debug(0);
1facf9fc 12724+#endif
4a4d8108 12725+
0c5527e5
AM
12726+ AuDebugOn(!inode_mark);
12727+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
12728+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
1facf9fc 12729+
4a4d8108
AM
12730+out:
12731+ return err;
12732+}
1facf9fc 12733+
027c5e7a 12734+/* isn't it waste to ask every registered 'group'? */
7f207e10 12735+/* copied from linux/fs/notify/inotify/inotify_fsnotiry.c */
4a4d8108 12736+/* it should be exported to modules */
7f207e10
AM
12737+static bool au_hfsn_should_send_event(struct fsnotify_group *group,
12738+ struct inode *h_inode,
0c5527e5
AM
12739+ struct fsnotify_mark *inode_mark,
12740+ struct fsnotify_mark *vfsmount_mark,
12741+ __u32 mask, void *data, int data_type)
4a4d8108 12742+{
4a4d8108 12743+ mask = (mask & ~FS_EVENT_ON_CHILD);
7f207e10 12744+ return inode_mark->mask & mask;
4a4d8108
AM
12745+}
12746+
12747+static struct fsnotify_ops au_hfsn_ops = {
12748+ .should_send_event = au_hfsn_should_send_event,
12749+ .handle_event = au_hfsn_handle_event
12750+};
12751+
12752+/* ---------------------------------------------------------------------- */
12753+
027c5e7a
AM
12754+static void au_hfsn_fin_br(struct au_branch *br)
12755+{
12756+ if (br->br_hfsn_group)
12757+ fsnotify_put_group(br->br_hfsn_group);
12758+}
12759+
12760+static int au_hfsn_init_br(struct au_branch *br, int perm)
12761+{
12762+ br->br_hfsn_group = NULL;
12763+ br->br_hfsn_ops = au_hfsn_ops;
12764+ return 0;
12765+}
12766+
12767+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
4a4d8108
AM
12768+{
12769+ int err;
1facf9fc 12770+
4a4d8108 12771+ err = 0;
027c5e7a
AM
12772+ if (udba != AuOpt_UDBA_HNOTIFY
12773+ || !au_br_hnotifyable(perm)) {
12774+ au_hfsn_fin_br(br);
12775+ br->br_hfsn_group = NULL;
12776+ goto out;
12777+ }
12778+
12779+ if (br->br_hfsn_group)
12780+ goto out;
12781+
12782+ br->br_hfsn_group = fsnotify_alloc_group(&br->br_hfsn_ops);
12783+ if (IS_ERR(br->br_hfsn_group)) {
12784+ err = PTR_ERR(br->br_hfsn_group);
0c5527e5 12785+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
027c5e7a 12786+ br->br_hfsn_group = NULL;
4a4d8108 12787+ }
1facf9fc 12788+
027c5e7a 12789+out:
1facf9fc 12790+ AuTraceErr(err);
12791+ return err;
12792+}
12793+
4a4d8108
AM
12794+const struct au_hnotify_op au_hnotify_op = {
12795+ .ctl = au_hfsn_ctl,
12796+ .alloc = au_hfsn_alloc,
12797+ .free = au_hfsn_free,
1facf9fc 12798+
027c5e7a
AM
12799+ .reset_br = au_hfsn_reset_br,
12800+ .fin_br = au_hfsn_fin_br,
12801+ .init_br = au_hfsn_init_br
4a4d8108 12802+};
7f207e10
AM
12803diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
12804--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
12805+++ linux/fs/aufs/hfsplus.c 2012-02-13 21:54:56.969771692 +0100
12806@@ -0,0 +1,57 @@
4a4d8108 12807+/*
f6c5ef8b 12808+ * Copyright (C) 2010-2012 Junjiro R. Okajima
4a4d8108
AM
12809+ *
12810+ * This program, aufs is free software; you can redistribute it and/or modify
12811+ * it under the terms of the GNU General Public License as published by
12812+ * the Free Software Foundation; either version 2 of the License, or
12813+ * (at your option) any later version.
12814+ *
12815+ * This program is distributed in the hope that it will be useful,
12816+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12817+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12818+ * GNU General Public License for more details.
12819+ *
12820+ * You should have received a copy of the GNU General Public License
12821+ * along with this program; if not, write to the Free Software
12822+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12823+ */
1facf9fc 12824+
4a4d8108
AM
12825+/*
12826+ * special support for filesystems which aqucires an inode mutex
12827+ * at final closing a file, eg, hfsplus.
12828+ *
12829+ * This trick is very simple and stupid, just to open the file before really
12830+ * neceeary open to tell hfsplus that this is not the final closing.
12831+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
12832+ * and au_h_open_post() after releasing it.
12833+ */
1facf9fc 12834+
4a4d8108 12835+#include "aufs.h"
1facf9fc 12836+
4a4d8108
AM
12837+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
12838+{
12839+ struct file *h_file;
12840+ struct dentry *h_dentry;
1facf9fc 12841+
4a4d8108
AM
12842+ h_dentry = au_h_dptr(dentry, bindex);
12843+ AuDebugOn(!h_dentry);
12844+ AuDebugOn(!h_dentry->d_inode);
12845+ IMustLock(h_dentry->d_inode);
12846+
12847+ h_file = NULL;
12848+ if (au_test_hfsplus(h_dentry->d_sb)
12849+ && S_ISREG(h_dentry->d_inode->i_mode))
12850+ h_file = au_h_open(dentry, bindex,
12851+ O_RDONLY | O_NOATIME | O_LARGEFILE,
12852+ /*file*/NULL);
12853+ return h_file;
1facf9fc 12854+}
12855+
4a4d8108
AM
12856+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
12857+ struct file *h_file)
12858+{
12859+ if (h_file) {
12860+ fput(h_file);
12861+ au_sbr_put(dentry->d_sb, bindex);
12862+ }
12863+}
7f207e10
AM
12864diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
12865--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 12866+++ linux/fs/aufs/hnotify.c 2012-02-13 21:54:56.969771692 +0100
53392da6 12867@@ -0,0 +1,712 @@
e49829fe 12868+/*
f6c5ef8b 12869+ * Copyright (C) 2005-2012 Junjiro R. Okajima
e49829fe
JR
12870+ *
12871+ * This program, aufs is free software; you can redistribute it and/or modify
12872+ * it under the terms of the GNU General Public License as published by
12873+ * the Free Software Foundation; either version 2 of the License, or
12874+ * (at your option) any later version.
12875+ *
12876+ * This program is distributed in the hope that it will be useful,
12877+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12878+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12879+ * GNU General Public License for more details.
12880+ *
12881+ * You should have received a copy of the GNU General Public License
12882+ * along with this program; if not, write to the Free Software
12883+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12884+ */
12885+
12886+/*
7f207e10 12887+ * abstraction to notify the direct changes on lower directories
e49829fe
JR
12888+ */
12889+
12890+#include "aufs.h"
12891+
027c5e7a 12892+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
e49829fe
JR
12893+{
12894+ int err;
7f207e10 12895+ struct au_hnotify *hn;
1facf9fc 12896+
4a4d8108
AM
12897+ err = -ENOMEM;
12898+ hn = au_cache_alloc_hnotify();
12899+ if (hn) {
12900+ hn->hn_aufs_inode = inode;
027c5e7a
AM
12901+ hinode->hi_notify = hn;
12902+ err = au_hnotify_op.alloc(hinode);
12903+ AuTraceErr(err);
12904+ if (unlikely(err)) {
12905+ hinode->hi_notify = NULL;
4a4d8108
AM
12906+ au_cache_free_hnotify(hn);
12907+ /*
12908+ * The upper dir was removed by udba, but the same named
12909+ * dir left. In this case, aufs assignes a new inode
12910+ * number and set the monitor again.
12911+ * For the lower dir, the old monitnor is still left.
12912+ */
12913+ if (err == -EEXIST)
12914+ err = 0;
12915+ }
1308ab2a 12916+ }
1308ab2a 12917+
027c5e7a 12918+ AuTraceErr(err);
1308ab2a 12919+ return err;
dece6358 12920+}
1facf9fc 12921+
4a4d8108 12922+void au_hn_free(struct au_hinode *hinode)
dece6358 12923+{
4a4d8108 12924+ struct au_hnotify *hn;
1facf9fc 12925+
4a4d8108
AM
12926+ hn = hinode->hi_notify;
12927+ if (hn) {
027c5e7a 12928+ au_hnotify_op.free(hinode);
4a4d8108
AM
12929+ au_cache_free_hnotify(hn);
12930+ hinode->hi_notify = NULL;
12931+ }
12932+}
dece6358 12933+
4a4d8108 12934+/* ---------------------------------------------------------------------- */
dece6358 12935+
4a4d8108
AM
12936+void au_hn_ctl(struct au_hinode *hinode, int do_set)
12937+{
12938+ if (hinode->hi_notify)
12939+ au_hnotify_op.ctl(hinode, do_set);
12940+}
12941+
12942+void au_hn_reset(struct inode *inode, unsigned int flags)
12943+{
12944+ aufs_bindex_t bindex, bend;
12945+ struct inode *hi;
12946+ struct dentry *iwhdentry;
1facf9fc 12947+
1308ab2a 12948+ bend = au_ibend(inode);
4a4d8108
AM
12949+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
12950+ hi = au_h_iptr(inode, bindex);
12951+ if (!hi)
12952+ continue;
1308ab2a 12953+
4a4d8108
AM
12954+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
12955+ iwhdentry = au_hi_wh(inode, bindex);
12956+ if (iwhdentry)
12957+ dget(iwhdentry);
12958+ au_igrab(hi);
12959+ au_set_h_iptr(inode, bindex, NULL, 0);
12960+ au_set_h_iptr(inode, bindex, au_igrab(hi),
12961+ flags & ~AuHi_XINO);
12962+ iput(hi);
12963+ dput(iwhdentry);
12964+ /* mutex_unlock(&hi->i_mutex); */
1facf9fc 12965+ }
1facf9fc 12966+}
12967+
1308ab2a 12968+/* ---------------------------------------------------------------------- */
1facf9fc 12969+
4a4d8108 12970+static int hn_xino(struct inode *inode, struct inode *h_inode)
1facf9fc 12971+{
4a4d8108
AM
12972+ int err;
12973+ aufs_bindex_t bindex, bend, bfound, bstart;
12974+ struct inode *h_i;
1facf9fc 12975+
4a4d8108
AM
12976+ err = 0;
12977+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
12978+ pr_warning("branch root dir was changed\n");
12979+ goto out;
12980+ }
1facf9fc 12981+
4a4d8108
AM
12982+ bfound = -1;
12983+ bend = au_ibend(inode);
12984+ bstart = au_ibstart(inode);
12985+#if 0 /* reserved for future use */
12986+ if (bindex == bend) {
12987+ /* keep this ino in rename case */
12988+ goto out;
12989+ }
12990+#endif
12991+ for (bindex = bstart; bindex <= bend; bindex++)
12992+ if (au_h_iptr(inode, bindex) == h_inode) {
12993+ bfound = bindex;
12994+ break;
12995+ }
12996+ if (bfound < 0)
1308ab2a 12997+ goto out;
1facf9fc 12998+
4a4d8108
AM
12999+ for (bindex = bstart; bindex <= bend; bindex++) {
13000+ h_i = au_h_iptr(inode, bindex);
13001+ if (!h_i)
13002+ continue;
1facf9fc 13003+
4a4d8108
AM
13004+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
13005+ /* ignore this error */
13006+ /* bad action? */
1facf9fc 13007+ }
1facf9fc 13008+
4a4d8108 13009+ /* children inode number will be broken */
1facf9fc 13010+
4f0767ce 13011+out:
4a4d8108
AM
13012+ AuTraceErr(err);
13013+ return err;
1facf9fc 13014+}
13015+
4a4d8108 13016+static int hn_gen_tree(struct dentry *dentry)
1facf9fc 13017+{
4a4d8108
AM
13018+ int err, i, j, ndentry;
13019+ struct au_dcsub_pages dpages;
13020+ struct au_dpage *dpage;
13021+ struct dentry **dentries;
1facf9fc 13022+
4a4d8108
AM
13023+ err = au_dpages_init(&dpages, GFP_NOFS);
13024+ if (unlikely(err))
13025+ goto out;
13026+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
13027+ if (unlikely(err))
13028+ goto out_dpages;
1facf9fc 13029+
4a4d8108
AM
13030+ for (i = 0; i < dpages.ndpage; i++) {
13031+ dpage = dpages.dpages + i;
13032+ dentries = dpage->dentries;
13033+ ndentry = dpage->ndentry;
13034+ for (j = 0; j < ndentry; j++) {
13035+ struct dentry *d;
13036+
13037+ d = dentries[j];
13038+ if (IS_ROOT(d))
13039+ continue;
13040+
4a4d8108
AM
13041+ au_digen_dec(d);
13042+ if (d->d_inode)
13043+ /* todo: reset children xino?
13044+ cached children only? */
13045+ au_iigen_dec(d->d_inode);
1308ab2a 13046+ }
dece6358 13047+ }
1facf9fc 13048+
4f0767ce 13049+out_dpages:
4a4d8108 13050+ au_dpages_free(&dpages);
dece6358 13051+
027c5e7a 13052+#if 0
4a4d8108
AM
13053+ /* discard children */
13054+ dentry_unhash(dentry);
13055+ dput(dentry);
027c5e7a 13056+#endif
4f0767ce 13057+out:
dece6358
AM
13058+ return err;
13059+}
13060+
1308ab2a 13061+/*
4a4d8108 13062+ * return 0 if processed.
1308ab2a 13063+ */
4a4d8108
AM
13064+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
13065+ const unsigned int isdir)
dece6358 13066+{
1308ab2a 13067+ int err;
4a4d8108
AM
13068+ struct dentry *d;
13069+ struct qstr *dname;
1facf9fc 13070+
4a4d8108
AM
13071+ err = 1;
13072+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
13073+ pr_warning("branch root dir was changed\n");
13074+ err = 0;
13075+ goto out;
13076+ }
dece6358 13077+
4a4d8108
AM
13078+ if (!isdir) {
13079+ AuDebugOn(!name);
13080+ au_iigen_dec(inode);
027c5e7a 13081+ spin_lock(&inode->i_lock);
4a4d8108 13082+ list_for_each_entry(d, &inode->i_dentry, d_alias) {
027c5e7a 13083+ spin_lock(&d->d_lock);
4a4d8108
AM
13084+ dname = &d->d_name;
13085+ if (dname->len != nlen
027c5e7a
AM
13086+ && memcmp(dname->name, name, nlen)) {
13087+ spin_unlock(&d->d_lock);
4a4d8108 13088+ continue;
027c5e7a 13089+ }
4a4d8108 13090+ err = 0;
4a4d8108
AM
13091+ au_digen_dec(d);
13092+ spin_unlock(&d->d_lock);
13093+ break;
1facf9fc 13094+ }
027c5e7a 13095+ spin_unlock(&inode->i_lock);
1308ab2a 13096+ } else {
027c5e7a 13097+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
4a4d8108
AM
13098+ d = d_find_alias(inode);
13099+ if (!d) {
13100+ au_iigen_dec(inode);
13101+ goto out;
13102+ }
1facf9fc 13103+
027c5e7a 13104+ spin_lock(&d->d_lock);
4a4d8108 13105+ dname = &d->d_name;
027c5e7a
AM
13106+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
13107+ spin_unlock(&d->d_lock);
4a4d8108 13108+ err = hn_gen_tree(d);
027c5e7a
AM
13109+ spin_lock(&d->d_lock);
13110+ }
13111+ spin_unlock(&d->d_lock);
4a4d8108
AM
13112+ dput(d);
13113+ }
1facf9fc 13114+
4f0767ce 13115+out:
4a4d8108 13116+ AuTraceErr(err);
1308ab2a 13117+ return err;
13118+}
dece6358 13119+
4a4d8108 13120+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
1facf9fc 13121+{
4a4d8108
AM
13122+ int err;
13123+ struct inode *inode;
1facf9fc 13124+
4a4d8108
AM
13125+ inode = dentry->d_inode;
13126+ if (IS_ROOT(dentry)
13127+ /* || (inode && inode->i_ino == AUFS_ROOT_INO) */
13128+ ) {
13129+ pr_warning("branch root dir was changed\n");
13130+ return 0;
13131+ }
1308ab2a 13132+
4a4d8108
AM
13133+ err = 0;
13134+ if (!isdir) {
4a4d8108
AM
13135+ au_digen_dec(dentry);
13136+ if (inode)
13137+ au_iigen_dec(inode);
13138+ } else {
027c5e7a 13139+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
4a4d8108
AM
13140+ if (inode)
13141+ err = hn_gen_tree(dentry);
13142+ }
13143+
13144+ AuTraceErr(err);
13145+ return err;
1facf9fc 13146+}
13147+
4a4d8108 13148+/* ---------------------------------------------------------------------- */
1facf9fc 13149+
4a4d8108
AM
13150+/* hnotify job flags */
13151+#define AuHnJob_XINO0 1
13152+#define AuHnJob_GEN (1 << 1)
13153+#define AuHnJob_DIRENT (1 << 2)
13154+#define AuHnJob_ISDIR (1 << 3)
13155+#define AuHnJob_TRYXINO0 (1 << 4)
13156+#define AuHnJob_MNTPNT (1 << 5)
13157+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
7f207e10
AM
13158+#define au_fset_hnjob(flags, name) \
13159+ do { (flags) |= AuHnJob_##name; } while (0)
13160+#define au_fclr_hnjob(flags, name) \
13161+ do { (flags) &= ~AuHnJob_##name; } while (0)
1facf9fc 13162+
4a4d8108
AM
13163+enum {
13164+ AuHn_CHILD,
13165+ AuHn_PARENT,
13166+ AuHnLast
13167+};
1facf9fc 13168+
4a4d8108
AM
13169+struct au_hnotify_args {
13170+ struct inode *h_dir, *dir, *h_child_inode;
13171+ u32 mask;
13172+ unsigned int flags[AuHnLast];
13173+ unsigned int h_child_nlen;
13174+ char h_child_name[];
13175+};
1facf9fc 13176+
4a4d8108
AM
13177+struct hn_job_args {
13178+ unsigned int flags;
13179+ struct inode *inode, *h_inode, *dir, *h_dir;
13180+ struct dentry *dentry;
13181+ char *h_name;
13182+ int h_nlen;
13183+};
1308ab2a 13184+
4a4d8108
AM
13185+static int hn_job(struct hn_job_args *a)
13186+{
13187+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
1308ab2a 13188+
4a4d8108
AM
13189+ /* reset xino */
13190+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
13191+ hn_xino(a->inode, a->h_inode); /* ignore this error */
1308ab2a 13192+
4a4d8108
AM
13193+ if (au_ftest_hnjob(a->flags, TRYXINO0)
13194+ && a->inode
13195+ && a->h_inode) {
13196+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
13197+ if (!a->h_inode->i_nlink)
13198+ hn_xino(a->inode, a->h_inode); /* ignore this error */
13199+ mutex_unlock(&a->h_inode->i_mutex);
1308ab2a 13200+ }
1facf9fc 13201+
4a4d8108
AM
13202+ /* make the generation obsolete */
13203+ if (au_ftest_hnjob(a->flags, GEN)) {
13204+ int err = -1;
13205+ if (a->inode)
13206+ err = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
13207+ isdir);
13208+ if (err && a->dentry)
13209+ hn_gen_by_name(a->dentry, isdir);
13210+ /* ignore this error */
1facf9fc 13211+ }
1facf9fc 13212+
4a4d8108
AM
13213+ /* make dir entries obsolete */
13214+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
13215+ struct au_vdir *vdir;
1facf9fc 13216+
4a4d8108
AM
13217+ vdir = au_ivdir(a->inode);
13218+ if (vdir)
13219+ vdir->vd_jiffy = 0;
13220+ /* IMustLock(a->inode); */
13221+ /* a->inode->i_version++; */
13222+ }
1facf9fc 13223+
4a4d8108
AM
13224+ /* can do nothing but warn */
13225+ if (au_ftest_hnjob(a->flags, MNTPNT)
13226+ && a->dentry
13227+ && d_mountpoint(a->dentry))
13228+ pr_warning("mount-point %.*s is removed or renamed\n",
13229+ AuDLNPair(a->dentry));
1facf9fc 13230+
4a4d8108 13231+ return 0;
1308ab2a 13232+}
1facf9fc 13233+
1308ab2a 13234+/* ---------------------------------------------------------------------- */
1facf9fc 13235+
4a4d8108
AM
13236+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
13237+ struct inode *dir)
1308ab2a 13238+{
4a4d8108
AM
13239+ struct dentry *dentry, *d, *parent;
13240+ struct qstr *dname;
1308ab2a 13241+
4a4d8108
AM
13242+ parent = d_find_alias(dir);
13243+ if (!parent)
13244+ return NULL;
1308ab2a 13245+
4a4d8108 13246+ dentry = NULL;
027c5e7a 13247+ spin_lock(&parent->d_lock);
4a4d8108
AM
13248+ list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) {
13249+ /* AuDbg("%.*s\n", AuDLNPair(d)); */
027c5e7a 13250+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
4a4d8108
AM
13251+ dname = &d->d_name;
13252+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
027c5e7a
AM
13253+ goto cont_unlock;
13254+ if (au_di(d))
13255+ au_digen_dec(d);
13256+ else
13257+ goto cont_unlock;
13258+ if (d->d_count) {
13259+ dentry = dget_dlock(d);
4a4d8108 13260+ spin_unlock(&d->d_lock);
027c5e7a 13261+ break;
dece6358 13262+ }
1facf9fc 13263+
027c5e7a
AM
13264+ cont_unlock:
13265+ spin_unlock(&d->d_lock);
1308ab2a 13266+ }
027c5e7a 13267+ spin_unlock(&parent->d_lock);
4a4d8108 13268+ dput(parent);
1facf9fc 13269+
4a4d8108
AM
13270+ if (dentry)
13271+ di_write_lock_child(dentry);
1308ab2a 13272+
4a4d8108
AM
13273+ return dentry;
13274+}
dece6358 13275+
4a4d8108
AM
13276+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
13277+ aufs_bindex_t bindex, ino_t h_ino)
13278+{
13279+ struct inode *inode;
13280+ ino_t ino;
13281+ int err;
13282+
13283+ inode = NULL;
13284+ err = au_xino_read(sb, bindex, h_ino, &ino);
13285+ if (!err && ino)
13286+ inode = ilookup(sb, ino);
13287+ if (!inode)
13288+ goto out;
13289+
13290+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
13291+ pr_warning("wrong root branch\n");
13292+ iput(inode);
13293+ inode = NULL;
13294+ goto out;
1308ab2a 13295+ }
13296+
4a4d8108 13297+ ii_write_lock_child(inode);
1308ab2a 13298+
4f0767ce 13299+out:
4a4d8108 13300+ return inode;
dece6358
AM
13301+}
13302+
4a4d8108 13303+static void au_hn_bh(void *_args)
1facf9fc 13304+{
4a4d8108
AM
13305+ struct au_hnotify_args *a = _args;
13306+ struct super_block *sb;
13307+ aufs_bindex_t bindex, bend, bfound;
13308+ unsigned char xino, try_iput;
1facf9fc 13309+ int err;
1308ab2a 13310+ struct inode *inode;
4a4d8108
AM
13311+ ino_t h_ino;
13312+ struct hn_job_args args;
13313+ struct dentry *dentry;
13314+ struct au_sbinfo *sbinfo;
1facf9fc 13315+
4a4d8108
AM
13316+ AuDebugOn(!_args);
13317+ AuDebugOn(!a->h_dir);
13318+ AuDebugOn(!a->dir);
13319+ AuDebugOn(!a->mask);
13320+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
13321+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
13322+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
1facf9fc 13323+
4a4d8108
AM
13324+ inode = NULL;
13325+ dentry = NULL;
13326+ /*
13327+ * do not lock a->dir->i_mutex here
13328+ * because of d_revalidate() may cause a deadlock.
13329+ */
13330+ sb = a->dir->i_sb;
13331+ AuDebugOn(!sb);
13332+ sbinfo = au_sbi(sb);
13333+ AuDebugOn(!sbinfo);
7f207e10 13334+ si_write_lock(sb, AuLock_NOPLMW);
1facf9fc 13335+
4a4d8108
AM
13336+ ii_read_lock_parent(a->dir);
13337+ bfound = -1;
13338+ bend = au_ibend(a->dir);
13339+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
13340+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
13341+ bfound = bindex;
13342+ break;
13343+ }
13344+ ii_read_unlock(a->dir);
13345+ if (unlikely(bfound < 0))
13346+ goto out;
1facf9fc 13347+
4a4d8108
AM
13348+ xino = !!au_opt_test(au_mntflags(sb), XINO);
13349+ h_ino = 0;
13350+ if (a->h_child_inode)
13351+ h_ino = a->h_child_inode->i_ino;
1facf9fc 13352+
4a4d8108
AM
13353+ if (a->h_child_nlen
13354+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
13355+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
13356+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
13357+ a->dir);
13358+ try_iput = 0;
13359+ if (dentry)
13360+ inode = dentry->d_inode;
13361+ if (xino && !inode && h_ino
13362+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
13363+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
13364+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
13365+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
13366+ try_iput = 1;
13367+ }
1facf9fc 13368+
4a4d8108
AM
13369+ args.flags = a->flags[AuHn_CHILD];
13370+ args.dentry = dentry;
13371+ args.inode = inode;
13372+ args.h_inode = a->h_child_inode;
13373+ args.dir = a->dir;
13374+ args.h_dir = a->h_dir;
13375+ args.h_name = a->h_child_name;
13376+ args.h_nlen = a->h_child_nlen;
13377+ err = hn_job(&args);
13378+ if (dentry) {
027c5e7a 13379+ if (au_di(dentry))
4a4d8108
AM
13380+ di_write_unlock(dentry);
13381+ dput(dentry);
13382+ }
13383+ if (inode && try_iput) {
13384+ ii_write_unlock(inode);
13385+ iput(inode);
13386+ }
1facf9fc 13387+
4a4d8108
AM
13388+ ii_write_lock_parent(a->dir);
13389+ args.flags = a->flags[AuHn_PARENT];
13390+ args.dentry = NULL;
13391+ args.inode = a->dir;
13392+ args.h_inode = a->h_dir;
13393+ args.dir = NULL;
13394+ args.h_dir = NULL;
13395+ args.h_name = NULL;
13396+ args.h_nlen = 0;
13397+ err = hn_job(&args);
13398+ ii_write_unlock(a->dir);
1facf9fc 13399+
4f0767ce 13400+out:
4a4d8108
AM
13401+ iput(a->h_child_inode);
13402+ iput(a->h_dir);
13403+ iput(a->dir);
027c5e7a
AM
13404+ si_write_unlock(sb);
13405+ au_nwt_done(&sbinfo->si_nowait);
1308ab2a 13406+ kfree(a);
dece6358 13407+}
1facf9fc 13408+
4a4d8108
AM
13409+/* ---------------------------------------------------------------------- */
13410+
13411+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
13412+ struct qstr *h_child_qstr, struct inode *h_child_inode)
dece6358 13413+{
4a4d8108 13414+ int err, len;
53392da6 13415+ unsigned int flags[AuHnLast], f;
4a4d8108
AM
13416+ unsigned char isdir, isroot, wh;
13417+ struct inode *dir;
13418+ struct au_hnotify_args *args;
13419+ char *p, *h_child_name;
dece6358 13420+
1308ab2a 13421+ err = 0;
4a4d8108
AM
13422+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
13423+ dir = igrab(hnotify->hn_aufs_inode);
13424+ if (!dir)
13425+ goto out;
1facf9fc 13426+
4a4d8108
AM
13427+ isroot = (dir->i_ino == AUFS_ROOT_INO);
13428+ wh = 0;
13429+ h_child_name = (void *)h_child_qstr->name;
13430+ len = h_child_qstr->len;
13431+ if (h_child_name) {
13432+ if (len > AUFS_WH_PFX_LEN
13433+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
13434+ h_child_name += AUFS_WH_PFX_LEN;
13435+ len -= AUFS_WH_PFX_LEN;
13436+ wh = 1;
13437+ }
1facf9fc 13438+ }
dece6358 13439+
4a4d8108
AM
13440+ isdir = 0;
13441+ if (h_child_inode)
13442+ isdir = !!S_ISDIR(h_child_inode->i_mode);
13443+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
13444+ flags[AuHn_CHILD] = 0;
13445+ if (isdir)
13446+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
13447+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
13448+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
13449+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
13450+ case FS_MOVED_FROM:
13451+ case FS_MOVED_TO:
13452+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
13453+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
13454+ /*FALLTHROUGH*/
13455+ case FS_CREATE:
13456+ AuDebugOn(!h_child_name || !h_child_inode);
13457+ break;
1facf9fc 13458+
4a4d8108
AM
13459+ case FS_DELETE:
13460+ /*
13461+ * aufs never be able to get this child inode.
13462+ * revalidation should be in d_revalidate()
13463+ * by checking i_nlink, i_generation or d_unhashed().
13464+ */
13465+ AuDebugOn(!h_child_name);
13466+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
13467+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
13468+ break;
dece6358 13469+
4a4d8108
AM
13470+ default:
13471+ AuDebugOn(1);
13472+ }
1308ab2a 13473+
4a4d8108
AM
13474+ if (wh)
13475+ h_child_inode = NULL;
1308ab2a 13476+
4a4d8108
AM
13477+ err = -ENOMEM;
13478+ /* iput() and kfree() will be called in au_hnotify() */
4a4d8108 13479+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
4a4d8108
AM
13480+ if (unlikely(!args)) {
13481+ AuErr1("no memory\n");
13482+ iput(dir);
13483+ goto out;
13484+ }
13485+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
13486+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
13487+ args->mask = mask;
13488+ args->dir = dir;
13489+ args->h_dir = igrab(h_dir);
13490+ if (h_child_inode)
13491+ h_child_inode = igrab(h_child_inode); /* can be NULL */
13492+ args->h_child_inode = h_child_inode;
13493+ args->h_child_nlen = len;
13494+ if (len) {
13495+ p = (void *)args;
13496+ p += sizeof(*args);
13497+ memcpy(p, h_child_name, len);
13498+ p[len] = 0;
1308ab2a 13499+ }
1308ab2a 13500+
53392da6
AM
13501+ f = 0;
13502+ if (!dir->i_nlink)
13503+ f = AuWkq_NEST;
13504+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
4a4d8108
AM
13505+ if (unlikely(err)) {
13506+ pr_err("wkq %d\n", err);
13507+ iput(args->h_child_inode);
13508+ iput(args->h_dir);
13509+ iput(args->dir);
13510+ kfree(args);
1facf9fc 13511+ }
1facf9fc 13512+
4a4d8108 13513+out:
1facf9fc 13514+ return err;
13515+}
13516+
027c5e7a
AM
13517+/* ---------------------------------------------------------------------- */
13518+
13519+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
13520+{
13521+ int err;
13522+
13523+ AuDebugOn(!(udba & AuOptMask_UDBA));
13524+
13525+ err = 0;
13526+ if (au_hnotify_op.reset_br)
13527+ err = au_hnotify_op.reset_br(udba, br, perm);
13528+
13529+ return err;
13530+}
13531+
13532+int au_hnotify_init_br(struct au_branch *br, int perm)
13533+{
13534+ int err;
13535+
13536+ err = 0;
13537+ if (au_hnotify_op.init_br)
13538+ err = au_hnotify_op.init_br(br, perm);
13539+
13540+ return err;
13541+}
13542+
13543+void au_hnotify_fin_br(struct au_branch *br)
13544+{
13545+ if (au_hnotify_op.fin_br)
13546+ au_hnotify_op.fin_br(br);
13547+}
13548+
4a4d8108
AM
13549+static void au_hn_destroy_cache(void)
13550+{
13551+ kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]);
13552+ au_cachep[AuCache_HNOTIFY] = NULL;
13553+}
1308ab2a 13554+
4a4d8108 13555+int __init au_hnotify_init(void)
1facf9fc 13556+{
1308ab2a 13557+ int err;
1308ab2a 13558+
4a4d8108
AM
13559+ err = -ENOMEM;
13560+ au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify);
13561+ if (au_cachep[AuCache_HNOTIFY]) {
027c5e7a
AM
13562+ err = 0;
13563+ if (au_hnotify_op.init)
13564+ err = au_hnotify_op.init();
4a4d8108
AM
13565+ if (unlikely(err))
13566+ au_hn_destroy_cache();
1308ab2a 13567+ }
1308ab2a 13568+ AuTraceErr(err);
4a4d8108 13569+ return err;
1308ab2a 13570+}
13571+
4a4d8108 13572+void au_hnotify_fin(void)
1308ab2a 13573+{
027c5e7a
AM
13574+ if (au_hnotify_op.fin)
13575+ au_hnotify_op.fin();
4a4d8108
AM
13576+ /* cf. au_cache_fin() */
13577+ if (au_cachep[AuCache_HNOTIFY])
13578+ au_hn_destroy_cache();
dece6358 13579+}
7f207e10
AM
13580diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
13581--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 13582+++ linux/fs/aufs/iinfo.c 2012-02-13 21:54:56.969771692 +0100
027c5e7a 13583@@ -0,0 +1,264 @@
dece6358 13584+/*
f6c5ef8b 13585+ * Copyright (C) 2005-2012 Junjiro R. Okajima
dece6358
AM
13586+ *
13587+ * This program, aufs is free software; you can redistribute it and/or modify
13588+ * it under the terms of the GNU General Public License as published by
13589+ * the Free Software Foundation; either version 2 of the License, or
13590+ * (at your option) any later version.
13591+ *
13592+ * This program is distributed in the hope that it will be useful,
13593+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13594+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13595+ * GNU General Public License for more details.
13596+ *
13597+ * You should have received a copy of the GNU General Public License
13598+ * along with this program; if not, write to the Free Software
13599+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
13600+ */
1facf9fc 13601+
dece6358 13602+/*
4a4d8108 13603+ * inode private data
dece6358 13604+ */
1facf9fc 13605+
1308ab2a 13606+#include "aufs.h"
1facf9fc 13607+
4a4d8108 13608+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 13609+{
4a4d8108 13610+ struct inode *h_inode;
1facf9fc 13611+
4a4d8108 13612+ IiMustAnyLock(inode);
1facf9fc 13613+
4a4d8108
AM
13614+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
13615+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
13616+ return h_inode;
13617+}
1facf9fc 13618+
4a4d8108
AM
13619+/* todo: hard/soft set? */
13620+void au_hiput(struct au_hinode *hinode)
13621+{
13622+ au_hn_free(hinode);
13623+ dput(hinode->hi_whdentry);
13624+ iput(hinode->hi_inode);
13625+}
1facf9fc 13626+
4a4d8108
AM
13627+unsigned int au_hi_flags(struct inode *inode, int isdir)
13628+{
13629+ unsigned int flags;
13630+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
1facf9fc 13631+
4a4d8108
AM
13632+ flags = 0;
13633+ if (au_opt_test(mnt_flags, XINO))
13634+ au_fset_hi(flags, XINO);
13635+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
13636+ au_fset_hi(flags, HNOTIFY);
13637+ return flags;
1facf9fc 13638+}
13639+
4a4d8108
AM
13640+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
13641+ struct inode *h_inode, unsigned int flags)
1308ab2a 13642+{
4a4d8108
AM
13643+ struct au_hinode *hinode;
13644+ struct inode *hi;
13645+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 13646+
4a4d8108 13647+ IiMustWriteLock(inode);
dece6358 13648+
4a4d8108
AM
13649+ hinode = iinfo->ii_hinode + bindex;
13650+ hi = hinode->hi_inode;
13651+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
13652+
13653+ if (hi)
13654+ au_hiput(hinode);
13655+ hinode->hi_inode = h_inode;
13656+ if (h_inode) {
13657+ int err;
13658+ struct super_block *sb = inode->i_sb;
13659+ struct au_branch *br;
13660+
027c5e7a
AM
13661+ AuDebugOn(inode->i_mode
13662+ && (h_inode->i_mode & S_IFMT)
13663+ != (inode->i_mode & S_IFMT));
4a4d8108
AM
13664+ if (bindex == iinfo->ii_bstart)
13665+ au_cpup_igen(inode, h_inode);
13666+ br = au_sbr(sb, bindex);
13667+ hinode->hi_id = br->br_id;
13668+ if (au_ftest_hi(flags, XINO)) {
13669+ err = au_xino_write(sb, bindex, h_inode->i_ino,
13670+ inode->i_ino);
13671+ if (unlikely(err))
13672+ AuIOErr1("failed au_xino_write() %d\n", err);
13673+ }
13674+
13675+ if (au_ftest_hi(flags, HNOTIFY)
13676+ && au_br_hnotifyable(br->br_perm)) {
027c5e7a 13677+ err = au_hn_alloc(hinode, inode);
4a4d8108
AM
13678+ if (unlikely(err))
13679+ AuIOErr1("au_hn_alloc() %d\n", err);
1308ab2a 13680+ }
13681+ }
4a4d8108 13682+}
dece6358 13683+
4a4d8108
AM
13684+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
13685+ struct dentry *h_wh)
13686+{
13687+ struct au_hinode *hinode;
dece6358 13688+
4a4d8108
AM
13689+ IiMustWriteLock(inode);
13690+
13691+ hinode = au_ii(inode)->ii_hinode + bindex;
13692+ AuDebugOn(hinode->hi_whdentry);
13693+ hinode->hi_whdentry = h_wh;
1facf9fc 13694+}
13695+
4a4d8108 13696+void au_update_iigen(struct inode *inode)
1308ab2a 13697+{
4a4d8108
AM
13698+ atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
13699+ /* smp_mb(); */ /* atomic_set */
13700+}
1facf9fc 13701+
4a4d8108
AM
13702+/* it may be called at remount time, too */
13703+void au_update_ibrange(struct inode *inode, int do_put_zero)
13704+{
13705+ struct au_iinfo *iinfo;
027c5e7a 13706+ aufs_bindex_t bindex, bend;
1facf9fc 13707+
4a4d8108 13708+ iinfo = au_ii(inode);
027c5e7a 13709+ if (!iinfo)
4a4d8108 13710+ return;
1facf9fc 13711+
4a4d8108 13712+ IiMustWriteLock(inode);
1facf9fc 13713+
027c5e7a 13714+ if (do_put_zero && iinfo->ii_bstart >= 0) {
4a4d8108
AM
13715+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
13716+ bindex++) {
13717+ struct inode *h_i;
1facf9fc 13718+
4a4d8108 13719+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
027c5e7a
AM
13720+ if (h_i && !h_i->i_nlink)
13721+ au_set_h_iptr(inode, bindex, NULL, 0);
13722+ }
4a4d8108
AM
13723+ }
13724+
027c5e7a
AM
13725+ iinfo->ii_bstart = -1;
13726+ iinfo->ii_bend = -1;
13727+ bend = au_sbend(inode->i_sb);
13728+ for (bindex = 0; bindex <= bend; bindex++)
13729+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
13730+ iinfo->ii_bstart = bindex;
4a4d8108 13731+ break;
027c5e7a
AM
13732+ }
13733+ if (iinfo->ii_bstart >= 0)
13734+ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
13735+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
13736+ iinfo->ii_bend = bindex;
13737+ break;
13738+ }
13739+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
1308ab2a 13740+}
1facf9fc 13741+
dece6358 13742+/* ---------------------------------------------------------------------- */
1facf9fc 13743+
4a4d8108 13744+void au_icntnr_init_once(void *_c)
dece6358 13745+{
4a4d8108
AM
13746+ struct au_icntnr *c = _c;
13747+ struct au_iinfo *iinfo = &c->iinfo;
e49829fe 13748+ static struct lock_class_key aufs_ii;
1facf9fc 13749+
4a4d8108 13750+ au_rw_init(&iinfo->ii_rwsem);
e49829fe 13751+ au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
4a4d8108
AM
13752+ inode_init_once(&c->vfs_inode);
13753+}
1facf9fc 13754+
4a4d8108
AM
13755+int au_iinfo_init(struct inode *inode)
13756+{
13757+ struct au_iinfo *iinfo;
13758+ struct super_block *sb;
13759+ int nbr, i;
1facf9fc 13760+
4a4d8108
AM
13761+ sb = inode->i_sb;
13762+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
13763+ nbr = au_sbend(sb) + 1;
13764+ if (unlikely(nbr <= 0))
13765+ nbr = 1;
13766+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
13767+ if (iinfo->ii_hinode) {
7f207e10 13768+ au_ninodes_inc(sb);
4a4d8108
AM
13769+ for (i = 0; i < nbr; i++)
13770+ iinfo->ii_hinode[i].hi_id = -1;
1facf9fc 13771+
4a4d8108
AM
13772+ atomic_set(&iinfo->ii_generation, au_sigen(sb));
13773+ /* smp_mb(); */ /* atomic_set */
13774+ iinfo->ii_bstart = -1;
13775+ iinfo->ii_bend = -1;
13776+ iinfo->ii_vdir = NULL;
13777+ return 0;
1308ab2a 13778+ }
4a4d8108
AM
13779+ return -ENOMEM;
13780+}
1facf9fc 13781+
4a4d8108
AM
13782+int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
13783+{
13784+ int err, sz;
13785+ struct au_hinode *hip;
1facf9fc 13786+
4a4d8108
AM
13787+ AuRwMustWriteLock(&iinfo->ii_rwsem);
13788+
13789+ err = -ENOMEM;
13790+ sz = sizeof(*hip) * (iinfo->ii_bend + 1);
13791+ if (!sz)
13792+ sz = sizeof(*hip);
13793+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
13794+ if (hip) {
13795+ iinfo->ii_hinode = hip;
13796+ err = 0;
1308ab2a 13797+ }
4a4d8108 13798+
1308ab2a 13799+ return err;
1facf9fc 13800+}
13801+
4a4d8108 13802+void au_iinfo_fin(struct inode *inode)
1facf9fc 13803+{
4a4d8108
AM
13804+ struct au_iinfo *iinfo;
13805+ struct au_hinode *hi;
13806+ struct super_block *sb;
b752ccd1
AM
13807+ aufs_bindex_t bindex, bend;
13808+ const unsigned char unlinked = !inode->i_nlink;
1308ab2a 13809+
4a4d8108
AM
13810+ iinfo = au_ii(inode);
13811+ /* bad_inode case */
13812+ if (!iinfo)
13813+ return;
1308ab2a 13814+
b752ccd1 13815+ sb = inode->i_sb;
7f207e10 13816+ au_ninodes_dec(sb);
b752ccd1
AM
13817+ if (si_pid_test(sb))
13818+ au_xino_delete_inode(inode, unlinked);
13819+ else {
13820+ /*
13821+ * it is safe to hide the dependency between sbinfo and
13822+ * sb->s_umount.
13823+ */
13824+ lockdep_off();
13825+ si_noflush_read_lock(sb);
13826+ au_xino_delete_inode(inode, unlinked);
13827+ si_read_unlock(sb);
13828+ lockdep_on();
13829+ }
13830+
4a4d8108
AM
13831+ if (iinfo->ii_vdir)
13832+ au_vdir_free(iinfo->ii_vdir);
1308ab2a 13833+
b752ccd1
AM
13834+ bindex = iinfo->ii_bstart;
13835+ if (bindex >= 0) {
13836+ hi = iinfo->ii_hinode + bindex;
4a4d8108 13837+ bend = iinfo->ii_bend;
b752ccd1
AM
13838+ while (bindex++ <= bend) {
13839+ if (hi->hi_inode)
4a4d8108 13840+ au_hiput(hi);
4a4d8108
AM
13841+ hi++;
13842+ }
13843+ }
4a4d8108 13844+ kfree(iinfo->ii_hinode);
027c5e7a 13845+ iinfo->ii_hinode = NULL;
4a4d8108 13846+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 13847+}
7f207e10
AM
13848diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
13849--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 13850+++ linux/fs/aufs/inode.c 2012-02-13 21:54:56.969771692 +0100
027c5e7a 13851@@ -0,0 +1,471 @@
4a4d8108 13852+/*
f6c5ef8b 13853+ * Copyright (C) 2005-2012 Junjiro R. Okajima
4a4d8108
AM
13854+ *
13855+ * This program, aufs is free software; you can redistribute it and/or modify
13856+ * it under the terms of the GNU General Public License as published by
13857+ * the Free Software Foundation; either version 2 of the License, or
13858+ * (at your option) any later version.
13859+ *
13860+ * This program is distributed in the hope that it will be useful,
13861+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13862+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13863+ * GNU General Public License for more details.
13864+ *
13865+ * You should have received a copy of the GNU General Public License
13866+ * along with this program; if not, write to the Free Software
13867+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
13868+ */
1facf9fc 13869+
4a4d8108
AM
13870+/*
13871+ * inode functions
13872+ */
1facf9fc 13873+
4a4d8108 13874+#include "aufs.h"
1308ab2a 13875+
4a4d8108
AM
13876+struct inode *au_igrab(struct inode *inode)
13877+{
13878+ if (inode) {
13879+ AuDebugOn(!atomic_read(&inode->i_count));
027c5e7a 13880+ ihold(inode);
1facf9fc 13881+ }
4a4d8108
AM
13882+ return inode;
13883+}
1facf9fc 13884+
4a4d8108
AM
13885+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
13886+{
13887+ au_cpup_attr_all(inode, /*force*/0);
13888+ au_update_iigen(inode);
13889+ if (do_version)
13890+ inode->i_version++;
dece6358 13891+}
1facf9fc 13892+
027c5e7a 13893+static int au_ii_refresh(struct inode *inode, int *update)
dece6358 13894+{
4a4d8108 13895+ int err, e;
027c5e7a 13896+ umode_t type;
4a4d8108 13897+ aufs_bindex_t bindex, new_bindex;
1308ab2a 13898+ struct super_block *sb;
4a4d8108 13899+ struct au_iinfo *iinfo;
027c5e7a 13900+ struct au_hinode *p, *q, tmp;
1facf9fc 13901+
4a4d8108 13902+ IiMustWriteLock(inode);
1facf9fc 13903+
027c5e7a 13904+ *update = 0;
4a4d8108 13905+ sb = inode->i_sb;
027c5e7a 13906+ type = inode->i_mode & S_IFMT;
4a4d8108
AM
13907+ iinfo = au_ii(inode);
13908+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
13909+ if (unlikely(err))
1308ab2a 13910+ goto out;
1facf9fc 13911+
027c5e7a 13912+ AuDebugOn(iinfo->ii_bstart < 0);
4a4d8108 13913+ p = iinfo->ii_hinode + iinfo->ii_bstart;
4a4d8108
AM
13914+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
13915+ bindex++, p++) {
13916+ if (!p->hi_inode)
13917+ continue;
1facf9fc 13918+
027c5e7a 13919+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
4a4d8108
AM
13920+ new_bindex = au_br_index(sb, p->hi_id);
13921+ if (new_bindex == bindex)
13922+ continue;
1facf9fc 13923+
4a4d8108 13924+ if (new_bindex < 0) {
027c5e7a 13925+ *update = 1;
4a4d8108
AM
13926+ au_hiput(p);
13927+ p->hi_inode = NULL;
13928+ continue;
1308ab2a 13929+ }
4a4d8108
AM
13930+
13931+ if (new_bindex < iinfo->ii_bstart)
13932+ iinfo->ii_bstart = new_bindex;
13933+ if (iinfo->ii_bend < new_bindex)
13934+ iinfo->ii_bend = new_bindex;
13935+ /* swap two lower inode, and loop again */
13936+ q = iinfo->ii_hinode + new_bindex;
13937+ tmp = *q;
13938+ *q = *p;
13939+ *p = tmp;
13940+ if (tmp.hi_inode) {
13941+ bindex--;
13942+ p--;
1308ab2a 13943+ }
13944+ }
4a4d8108
AM
13945+ au_update_ibrange(inode, /*do_put_zero*/0);
13946+ e = au_dy_irefresh(inode);
13947+ if (unlikely(e && !err))
13948+ err = e;
1facf9fc 13949+
4f0767ce 13950+out:
027c5e7a
AM
13951+ AuTraceErr(err);
13952+ return err;
13953+}
13954+
13955+int au_refresh_hinode_self(struct inode *inode)
13956+{
13957+ int err, update;
13958+
13959+ err = au_ii_refresh(inode, &update);
13960+ if (!err)
13961+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
13962+
13963+ AuTraceErr(err);
4a4d8108
AM
13964+ return err;
13965+}
1facf9fc 13966+
4a4d8108
AM
13967+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
13968+{
027c5e7a 13969+ int err, e, update;
4a4d8108 13970+ unsigned int flags;
027c5e7a 13971+ umode_t mode;
4a4d8108 13972+ aufs_bindex_t bindex, bend;
027c5e7a 13973+ unsigned char isdir;
4a4d8108
AM
13974+ struct au_hinode *p;
13975+ struct au_iinfo *iinfo;
1facf9fc 13976+
027c5e7a 13977+ err = au_ii_refresh(inode, &update);
4a4d8108
AM
13978+ if (unlikely(err))
13979+ goto out;
13980+
13981+ update = 0;
13982+ iinfo = au_ii(inode);
13983+ p = iinfo->ii_hinode + iinfo->ii_bstart;
027c5e7a
AM
13984+ mode = (inode->i_mode & S_IFMT);
13985+ isdir = S_ISDIR(mode);
4a4d8108
AM
13986+ flags = au_hi_flags(inode, isdir);
13987+ bend = au_dbend(dentry);
13988+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
13989+ struct inode *h_i;
13990+ struct dentry *h_d;
13991+
13992+ h_d = au_h_dptr(dentry, bindex);
13993+ if (!h_d || !h_d->d_inode)
13994+ continue;
13995+
027c5e7a 13996+ AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT));
4a4d8108
AM
13997+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
13998+ h_i = au_h_iptr(inode, bindex);
13999+ if (h_i) {
14000+ if (h_i == h_d->d_inode)
14001+ continue;
14002+ err = -EIO;
14003+ break;
14004+ }
14005+ }
14006+ if (bindex < iinfo->ii_bstart)
14007+ iinfo->ii_bstart = bindex;
14008+ if (iinfo->ii_bend < bindex)
14009+ iinfo->ii_bend = bindex;
14010+ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
14011+ update = 1;
1308ab2a 14012+ }
4a4d8108
AM
14013+ au_update_ibrange(inode, /*do_put_zero*/0);
14014+ e = au_dy_irefresh(inode);
14015+ if (unlikely(e && !err))
14016+ err = e;
027c5e7a
AM
14017+ if (!err)
14018+ au_refresh_hinode_attr(inode, update && isdir);
4a4d8108 14019+
4f0767ce 14020+out:
4a4d8108 14021+ AuTraceErr(err);
1308ab2a 14022+ return err;
dece6358
AM
14023+}
14024+
4a4d8108 14025+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 14026+{
4a4d8108
AM
14027+ int err;
14028+ unsigned int flags;
14029+ umode_t mode;
14030+ aufs_bindex_t bindex, bstart, btail;
14031+ unsigned char isdir;
14032+ struct dentry *h_dentry;
14033+ struct inode *h_inode;
14034+ struct au_iinfo *iinfo;
dece6358 14035+
4a4d8108 14036+ IiMustWriteLock(inode);
dece6358 14037+
4a4d8108
AM
14038+ err = 0;
14039+ isdir = 0;
14040+ bstart = au_dbstart(dentry);
14041+ h_inode = au_h_dptr(dentry, bstart)->d_inode;
14042+ mode = h_inode->i_mode;
14043+ switch (mode & S_IFMT) {
14044+ case S_IFREG:
14045+ btail = au_dbtail(dentry);
14046+ inode->i_op = &aufs_iop;
14047+ inode->i_fop = &aufs_file_fop;
14048+ err = au_dy_iaop(inode, bstart, h_inode);
14049+ if (unlikely(err))
14050+ goto out;
14051+ break;
14052+ case S_IFDIR:
14053+ isdir = 1;
14054+ btail = au_dbtaildir(dentry);
14055+ inode->i_op = &aufs_dir_iop;
14056+ inode->i_fop = &aufs_dir_fop;
14057+ break;
14058+ case S_IFLNK:
14059+ btail = au_dbtail(dentry);
14060+ inode->i_op = &aufs_symlink_iop;
14061+ break;
14062+ case S_IFBLK:
14063+ case S_IFCHR:
14064+ case S_IFIFO:
14065+ case S_IFSOCK:
14066+ btail = au_dbtail(dentry);
14067+ inode->i_op = &aufs_iop;
14068+ au_init_special_fop(inode, mode, h_inode->i_rdev);
14069+ break;
14070+ default:
14071+ AuIOErr("Unknown file type 0%o\n", mode);
14072+ err = -EIO;
1308ab2a 14073+ goto out;
4a4d8108 14074+ }
dece6358 14075+
4a4d8108
AM
14076+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
14077+ flags = au_hi_flags(inode, isdir);
14078+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
14079+ && au_ftest_hi(flags, HNOTIFY)
14080+ && dentry->d_name.len > AUFS_WH_PFX_LEN
14081+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
14082+ au_fclr_hi(flags, HNOTIFY);
14083+ iinfo = au_ii(inode);
14084+ iinfo->ii_bstart = bstart;
14085+ iinfo->ii_bend = btail;
14086+ for (bindex = bstart; bindex <= btail; bindex++) {
14087+ h_dentry = au_h_dptr(dentry, bindex);
14088+ if (h_dentry)
14089+ au_set_h_iptr(inode, bindex,
14090+ au_igrab(h_dentry->d_inode), flags);
14091+ }
14092+ au_cpup_attr_all(inode, /*force*/1);
dece6358 14093+
4f0767ce 14094+out:
4a4d8108
AM
14095+ return err;
14096+}
dece6358 14097+
027c5e7a
AM
14098+/*
14099+ * successful returns with iinfo write_locked
14100+ * minus: errno
14101+ * zero: success, matched
14102+ * plus: no error, but unmatched
14103+ */
14104+static int reval_inode(struct inode *inode, struct dentry *dentry)
4a4d8108
AM
14105+{
14106+ int err;
14107+ aufs_bindex_t bindex, bend;
14108+ struct inode *h_inode, *h_dinode;
dece6358 14109+
4a4d8108
AM
14110+ /*
14111+ * before this function, if aufs got any iinfo lock, it must be only
14112+ * one, the parent dir.
14113+ * it can happen by UDBA and the obsoleted inode number.
14114+ */
14115+ err = -EIO;
14116+ if (unlikely(inode->i_ino == parent_ino(dentry)))
14117+ goto out;
14118+
027c5e7a 14119+ err = 1;
4a4d8108
AM
14120+ ii_write_lock_new_child(inode);
14121+ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
14122+ bend = au_ibend(inode);
14123+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
14124+ h_inode = au_h_iptr(inode, bindex);
14125+ if (h_inode && h_inode == h_dinode) {
4a4d8108 14126+ err = 0;
027c5e7a 14127+ if (au_iigen_test(inode, au_digen(dentry)))
4a4d8108
AM
14128+ err = au_refresh_hinode(inode, dentry);
14129+ break;
1308ab2a 14130+ }
1facf9fc 14131+ }
dece6358 14132+
4a4d8108
AM
14133+ if (unlikely(err))
14134+ ii_write_unlock(inode);
4f0767ce 14135+out:
1facf9fc 14136+ return err;
14137+}
1facf9fc 14138+
4a4d8108
AM
14139+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
14140+ unsigned int d_type, ino_t *ino)
1facf9fc 14141+{
4a4d8108
AM
14142+ int err;
14143+ struct mutex *mtx;
1facf9fc 14144+
b752ccd1 14145+ /* prevent hardlinked inode number from race condition */
4a4d8108 14146+ mtx = NULL;
b752ccd1 14147+ if (d_type != DT_DIR) {
4a4d8108
AM
14148+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
14149+ mutex_lock(mtx);
14150+ }
14151+ err = au_xino_read(sb, bindex, h_ino, ino);
14152+ if (unlikely(err))
14153+ goto out;
1308ab2a 14154+
4a4d8108
AM
14155+ if (!*ino) {
14156+ err = -EIO;
14157+ *ino = au_xino_new_ino(sb);
14158+ if (unlikely(!*ino))
1facf9fc 14159+ goto out;
4a4d8108
AM
14160+ err = au_xino_write(sb, bindex, h_ino, *ino);
14161+ if (unlikely(err))
1308ab2a 14162+ goto out;
1308ab2a 14163+ }
1facf9fc 14164+
4f0767ce 14165+out:
b752ccd1 14166+ if (mtx)
4a4d8108 14167+ mutex_unlock(mtx);
1facf9fc 14168+ return err;
14169+}
14170+
4a4d8108
AM
14171+/* successful returns with iinfo write_locked */
14172+/* todo: return with unlocked? */
14173+struct inode *au_new_inode(struct dentry *dentry, int must_new)
1facf9fc 14174+{
b752ccd1 14175+ struct inode *inode, *h_inode;
4a4d8108
AM
14176+ struct dentry *h_dentry;
14177+ struct super_block *sb;
b752ccd1 14178+ struct mutex *mtx;
4a4d8108 14179+ ino_t h_ino, ino;
027c5e7a 14180+ int err;
4a4d8108 14181+ aufs_bindex_t bstart;
1facf9fc 14182+
4a4d8108
AM
14183+ sb = dentry->d_sb;
14184+ bstart = au_dbstart(dentry);
14185+ h_dentry = au_h_dptr(dentry, bstart);
b752ccd1
AM
14186+ h_inode = h_dentry->d_inode;
14187+ h_ino = h_inode->i_ino;
14188+
14189+ /*
14190+ * stop 'race'-ing between hardlinks under different
14191+ * parents.
14192+ */
14193+ mtx = NULL;
14194+ if (!S_ISDIR(h_inode->i_mode))
14195+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
14196+
4f0767ce 14197+new_ino:
b752ccd1
AM
14198+ if (mtx)
14199+ mutex_lock(mtx);
4a4d8108
AM
14200+ err = au_xino_read(sb, bstart, h_ino, &ino);
14201+ inode = ERR_PTR(err);
14202+ if (unlikely(err))
14203+ goto out;
b752ccd1 14204+
4a4d8108
AM
14205+ if (!ino) {
14206+ ino = au_xino_new_ino(sb);
14207+ if (unlikely(!ino)) {
14208+ inode = ERR_PTR(-EIO);
dece6358
AM
14209+ goto out;
14210+ }
14211+ }
1facf9fc 14212+
4a4d8108
AM
14213+ AuDbg("i%lu\n", (unsigned long)ino);
14214+ inode = au_iget_locked(sb, ino);
14215+ err = PTR_ERR(inode);
14216+ if (IS_ERR(inode))
1facf9fc 14217+ goto out;
1facf9fc 14218+
4a4d8108
AM
14219+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
14220+ if (inode->i_state & I_NEW) {
14221+ ii_write_lock_new_child(inode);
14222+ err = set_inode(inode, dentry);
14223+ if (!err) {
14224+ unlock_new_inode(inode);
14225+ goto out; /* success */
14226+ }
1308ab2a 14227+
027c5e7a
AM
14228+ /*
14229+ * iget_failed() calls iput(), but we need to call
14230+ * ii_write_unlock() after iget_failed(). so dirty hack for
14231+ * i_count.
14232+ */
14233+ atomic_inc(&inode->i_count);
4a4d8108 14234+ iget_failed(inode);
027c5e7a
AM
14235+ ii_write_unlock(inode);
14236+ au_xino_write(sb, bstart, h_ino, /*ino*/0);
14237+ /* ignore this error */
14238+ goto out_iput;
14239+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
b752ccd1
AM
14240+ /*
14241+ * horrible race condition between lookup, readdir and copyup
14242+ * (or something).
14243+ */
14244+ if (mtx)
14245+ mutex_unlock(mtx);
027c5e7a
AM
14246+ err = reval_inode(inode, dentry);
14247+ if (unlikely(err < 0)) {
14248+ mtx = NULL;
14249+ goto out_iput;
14250+ }
14251+
b752ccd1
AM
14252+ if (!err) {
14253+ mtx = NULL;
4a4d8108 14254+ goto out; /* success */
b752ccd1
AM
14255+ } else if (mtx)
14256+ mutex_lock(mtx);
4a4d8108
AM
14257+ }
14258+
14259+ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
14260+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
14261+ " b%d, %s, %.*s, hi%lu, i%lu.\n",
14262+ bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
14263+ (unsigned long)h_ino, (unsigned long)ino);
14264+ ino = 0;
14265+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
14266+ if (!err) {
14267+ iput(inode);
b752ccd1
AM
14268+ if (mtx)
14269+ mutex_unlock(mtx);
4a4d8108
AM
14270+ goto new_ino;
14271+ }
1308ab2a 14272+
4f0767ce 14273+out_iput:
4a4d8108 14274+ iput(inode);
4a4d8108 14275+ inode = ERR_PTR(err);
4f0767ce 14276+out:
b752ccd1
AM
14277+ if (mtx)
14278+ mutex_unlock(mtx);
4a4d8108 14279+ return inode;
1facf9fc 14280+}
14281+
4a4d8108 14282+/* ---------------------------------------------------------------------- */
1facf9fc 14283+
4a4d8108
AM
14284+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
14285+ struct inode *inode)
14286+{
14287+ int err;
1facf9fc 14288+
4a4d8108 14289+ err = au_br_rdonly(au_sbr(sb, bindex));
1facf9fc 14290+
4a4d8108
AM
14291+ /* pseudo-link after flushed may happen out of bounds */
14292+ if (!err
14293+ && inode
14294+ && au_ibstart(inode) <= bindex
14295+ && bindex <= au_ibend(inode)) {
14296+ /*
14297+ * permission check is unnecessary since vfsub routine
14298+ * will be called later
14299+ */
14300+ struct inode *hi = au_h_iptr(inode, bindex);
14301+ if (hi)
14302+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
1facf9fc 14303+ }
14304+
4a4d8108
AM
14305+ return err;
14306+}
dece6358 14307+
4a4d8108
AM
14308+int au_test_h_perm(struct inode *h_inode, int mask)
14309+{
14310+ if (!current_fsuid())
14311+ return 0;
14312+ return inode_permission(h_inode, mask);
14313+}
1facf9fc 14314+
4a4d8108
AM
14315+int au_test_h_perm_sio(struct inode *h_inode, int mask)
14316+{
14317+ if (au_test_nfs(h_inode->i_sb)
14318+ && (mask & MAY_WRITE)
14319+ && S_ISDIR(h_inode->i_mode))
14320+ mask |= MAY_READ; /* force permission check */
14321+ return au_test_h_perm(h_inode, mask);
1facf9fc 14322+}
7f207e10
AM
14323diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
14324--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
14325+++ linux/fs/aufs/inode.h 2012-02-13 21:54:56.969771692 +0100
14326@@ -0,0 +1,554 @@
4a4d8108 14327+/*
f6c5ef8b 14328+ * Copyright (C) 2005-2012 Junjiro R. Okajima
4a4d8108
AM
14329+ *
14330+ * This program, aufs is free software; you can redistribute it and/or modify
14331+ * it under the terms of the GNU General Public License as published by
14332+ * the Free Software Foundation; either version 2 of the License, or
14333+ * (at your option) any later version.
14334+ *
14335+ * This program is distributed in the hope that it will be useful,
14336+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14337+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14338+ * GNU General Public License for more details.
14339+ *
14340+ * You should have received a copy of the GNU General Public License
14341+ * along with this program; if not, write to the Free Software
14342+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
14343+ */
1facf9fc 14344+
1308ab2a 14345+/*
4a4d8108 14346+ * inode operations
1308ab2a 14347+ */
dece6358 14348+
4a4d8108
AM
14349+#ifndef __AUFS_INODE_H__
14350+#define __AUFS_INODE_H__
dece6358 14351+
4a4d8108 14352+#ifdef __KERNEL__
1308ab2a 14353+
4a4d8108 14354+#include <linux/fsnotify.h>
4a4d8108 14355+#include "rwsem.h"
1308ab2a 14356+
4a4d8108 14357+struct vfsmount;
1facf9fc 14358+
4a4d8108
AM
14359+struct au_hnotify {
14360+#ifdef CONFIG_AUFS_HNOTIFY
14361+#ifdef CONFIG_AUFS_HFSNOTIFY
7f207e10 14362+ /* never use fsnotify_add_vfsmount_mark() */
0c5527e5 14363+ struct fsnotify_mark hn_mark;
7f207e10 14364+ int hn_mark_dead;
4a4d8108 14365+#endif
7f207e10 14366+ struct inode *hn_aufs_inode; /* no get/put */
4a4d8108
AM
14367+#endif
14368+} ____cacheline_aligned_in_smp;
1facf9fc 14369+
4a4d8108
AM
14370+struct au_hinode {
14371+ struct inode *hi_inode;
14372+ aufs_bindex_t hi_id;
14373+#ifdef CONFIG_AUFS_HNOTIFY
14374+ struct au_hnotify *hi_notify;
14375+#endif
dece6358 14376+
4a4d8108
AM
14377+ /* reference to the copied-up whiteout with get/put */
14378+ struct dentry *hi_whdentry;
14379+};
dece6358 14380+
4a4d8108
AM
14381+struct au_vdir;
14382+struct au_iinfo {
14383+ atomic_t ii_generation;
14384+ struct super_block *ii_hsb1; /* no get/put */
1facf9fc 14385+
4a4d8108
AM
14386+ struct au_rwsem ii_rwsem;
14387+ aufs_bindex_t ii_bstart, ii_bend;
14388+ __u32 ii_higen;
14389+ struct au_hinode *ii_hinode;
14390+ struct au_vdir *ii_vdir;
14391+};
1facf9fc 14392+
4a4d8108
AM
14393+struct au_icntnr {
14394+ struct au_iinfo iinfo;
14395+ struct inode vfs_inode;
14396+} ____cacheline_aligned_in_smp;
1308ab2a 14397+
4a4d8108
AM
14398+/* au_pin flags */
14399+#define AuPin_DI_LOCKED 1
14400+#define AuPin_MNT_WRITE (1 << 1)
14401+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
7f207e10
AM
14402+#define au_fset_pin(flags, name) \
14403+ do { (flags) |= AuPin_##name; } while (0)
14404+#define au_fclr_pin(flags, name) \
14405+ do { (flags) &= ~AuPin_##name; } while (0)
4a4d8108
AM
14406+
14407+struct au_pin {
14408+ /* input */
14409+ struct dentry *dentry;
14410+ unsigned int udba;
14411+ unsigned char lsc_di, lsc_hi, flags;
14412+ aufs_bindex_t bindex;
14413+
14414+ /* output */
14415+ struct dentry *parent;
14416+ struct au_hinode *hdir;
14417+ struct vfsmount *h_mnt;
14418+};
1facf9fc 14419+
1308ab2a 14420+/* ---------------------------------------------------------------------- */
14421+
4a4d8108 14422+static inline struct au_iinfo *au_ii(struct inode *inode)
1facf9fc 14423+{
4a4d8108 14424+ struct au_iinfo *iinfo;
1facf9fc 14425+
4a4d8108
AM
14426+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
14427+ if (iinfo->ii_hinode)
14428+ return iinfo;
14429+ return NULL; /* debugging bad_inode case */
14430+}
1facf9fc 14431+
4a4d8108 14432+/* ---------------------------------------------------------------------- */
1facf9fc 14433+
4a4d8108
AM
14434+/* inode.c */
14435+struct inode *au_igrab(struct inode *inode);
027c5e7a 14436+int au_refresh_hinode_self(struct inode *inode);
4a4d8108
AM
14437+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
14438+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
14439+ unsigned int d_type, ino_t *ino);
14440+struct inode *au_new_inode(struct dentry *dentry, int must_new);
14441+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
14442+ struct inode *inode);
14443+int au_test_h_perm(struct inode *h_inode, int mask);
14444+int au_test_h_perm_sio(struct inode *h_inode, int mask);
1facf9fc 14445+
4a4d8108
AM
14446+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
14447+ ino_t h_ino, unsigned int d_type, ino_t *ino)
14448+{
14449+#ifdef CONFIG_AUFS_SHWH
14450+ return au_ino(sb, bindex, h_ino, d_type, ino);
14451+#else
14452+ return 0;
14453+#endif
14454+}
1facf9fc 14455+
4a4d8108
AM
14456+/* i_op.c */
14457+extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
1308ab2a 14458+
4a4d8108
AM
14459+/* au_wr_dir flags */
14460+#define AuWrDir_ADD_ENTRY 1
14461+#define AuWrDir_ISDIR (1 << 1)
14462+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
7f207e10
AM
14463+#define au_fset_wrdir(flags, name) \
14464+ do { (flags) |= AuWrDir_##name; } while (0)
14465+#define au_fclr_wrdir(flags, name) \
14466+ do { (flags) &= ~AuWrDir_##name; } while (0)
1facf9fc 14467+
4a4d8108
AM
14468+struct au_wr_dir_args {
14469+ aufs_bindex_t force_btgt;
14470+ unsigned char flags;
14471+};
14472+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
14473+ struct au_wr_dir_args *args);
dece6358 14474+
4a4d8108
AM
14475+struct dentry *au_pinned_h_parent(struct au_pin *pin);
14476+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
14477+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
14478+ unsigned int udba, unsigned char flags);
14479+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
14480+ unsigned int udba, unsigned char flags) __must_check;
14481+int au_do_pin(struct au_pin *pin) __must_check;
14482+void au_unpin(struct au_pin *pin);
1facf9fc 14483+
4a4d8108
AM
14484+/* i_op_add.c */
14485+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
14486+ struct dentry *h_parent, int isdir);
14487+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
14488+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
14489+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
14490+ struct nameidata *nd);
14491+int aufs_link(struct dentry *src_dentry, struct inode *dir,
14492+ struct dentry *dentry);
14493+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
1facf9fc 14494+
4a4d8108
AM
14495+/* i_op_del.c */
14496+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
14497+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
14498+ struct dentry *h_parent, int isdir);
14499+int aufs_unlink(struct inode *dir, struct dentry *dentry);
14500+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1308ab2a 14501+
4a4d8108
AM
14502+/* i_op_ren.c */
14503+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
14504+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
14505+ struct inode *dir, struct dentry *dentry);
1facf9fc 14506+
4a4d8108
AM
14507+/* iinfo.c */
14508+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
14509+void au_hiput(struct au_hinode *hinode);
14510+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
14511+ struct dentry *h_wh);
14512+unsigned int au_hi_flags(struct inode *inode, int isdir);
1308ab2a 14513+
4a4d8108
AM
14514+/* hinode flags */
14515+#define AuHi_XINO 1
14516+#define AuHi_HNOTIFY (1 << 1)
14517+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
7f207e10
AM
14518+#define au_fset_hi(flags, name) \
14519+ do { (flags) |= AuHi_##name; } while (0)
14520+#define au_fclr_hi(flags, name) \
14521+ do { (flags) &= ~AuHi_##name; } while (0)
1facf9fc 14522+
4a4d8108
AM
14523+#ifndef CONFIG_AUFS_HNOTIFY
14524+#undef AuHi_HNOTIFY
14525+#define AuHi_HNOTIFY 0
14526+#endif
1facf9fc 14527+
4a4d8108
AM
14528+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
14529+ struct inode *h_inode, unsigned int flags);
1facf9fc 14530+
4a4d8108
AM
14531+void au_update_iigen(struct inode *inode);
14532+void au_update_ibrange(struct inode *inode, int do_put_zero);
1facf9fc 14533+
4a4d8108
AM
14534+void au_icntnr_init_once(void *_c);
14535+int au_iinfo_init(struct inode *inode);
14536+void au_iinfo_fin(struct inode *inode);
14537+int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
1308ab2a 14538+
e49829fe 14539+#ifdef CONFIG_PROC_FS
4a4d8108 14540+/* plink.c */
e49829fe
JR
14541+int au_plink_maint(struct super_block *sb, int flags);
14542+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
14543+int au_plink_maint_enter(struct super_block *sb);
4a4d8108
AM
14544+#ifdef CONFIG_AUFS_DEBUG
14545+void au_plink_list(struct super_block *sb);
14546+#else
14547+AuStubVoid(au_plink_list, struct super_block *sb)
14548+#endif
14549+int au_plink_test(struct inode *inode);
14550+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
14551+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
14552+ struct dentry *h_dentry);
e49829fe
JR
14553+void au_plink_put(struct super_block *sb, int verbose);
14554+void au_plink_clean(struct super_block *sb, int verbose);
4a4d8108 14555+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
e49829fe
JR
14556+#else
14557+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
14558+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
14559+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
14560+AuStubVoid(au_plink_list, struct super_block *sb);
14561+AuStubInt0(au_plink_test, struct inode *inode);
14562+AuStub(struct dentry *, au_plink_lkup, return NULL,
14563+ struct inode *inode, aufs_bindex_t bindex);
14564+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
14565+ struct dentry *h_dentry);
14566+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
14567+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
14568+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
14569+#endif /* CONFIG_PROC_FS */
1facf9fc 14570+
4a4d8108 14571+/* ---------------------------------------------------------------------- */
1308ab2a 14572+
4a4d8108
AM
14573+/* lock subclass for iinfo */
14574+enum {
14575+ AuLsc_II_CHILD, /* child first */
14576+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
14577+ AuLsc_II_CHILD3, /* copyup dirs */
14578+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
14579+ AuLsc_II_PARENT2,
14580+ AuLsc_II_PARENT3, /* copyup dirs */
14581+ AuLsc_II_NEW_CHILD
14582+};
1308ab2a 14583+
1facf9fc 14584+/*
4a4d8108
AM
14585+ * ii_read_lock_child, ii_write_lock_child,
14586+ * ii_read_lock_child2, ii_write_lock_child2,
14587+ * ii_read_lock_child3, ii_write_lock_child3,
14588+ * ii_read_lock_parent, ii_write_lock_parent,
14589+ * ii_read_lock_parent2, ii_write_lock_parent2,
14590+ * ii_read_lock_parent3, ii_write_lock_parent3,
14591+ * ii_read_lock_new_child, ii_write_lock_new_child,
1facf9fc 14592+ */
4a4d8108
AM
14593+#define AuReadLockFunc(name, lsc) \
14594+static inline void ii_read_lock_##name(struct inode *i) \
14595+{ \
14596+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
14597+}
14598+
14599+#define AuWriteLockFunc(name, lsc) \
14600+static inline void ii_write_lock_##name(struct inode *i) \
14601+{ \
14602+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
14603+}
14604+
14605+#define AuRWLockFuncs(name, lsc) \
14606+ AuReadLockFunc(name, lsc) \
14607+ AuWriteLockFunc(name, lsc)
14608+
14609+AuRWLockFuncs(child, CHILD);
14610+AuRWLockFuncs(child2, CHILD2);
14611+AuRWLockFuncs(child3, CHILD3);
14612+AuRWLockFuncs(parent, PARENT);
14613+AuRWLockFuncs(parent2, PARENT2);
14614+AuRWLockFuncs(parent3, PARENT3);
14615+AuRWLockFuncs(new_child, NEW_CHILD);
14616+
14617+#undef AuReadLockFunc
14618+#undef AuWriteLockFunc
14619+#undef AuRWLockFuncs
1facf9fc 14620+
14621+/*
4a4d8108 14622+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
1facf9fc 14623+ */
4a4d8108 14624+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
1facf9fc 14625+
4a4d8108
AM
14626+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
14627+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
14628+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
1facf9fc 14629+
4a4d8108 14630+/* ---------------------------------------------------------------------- */
1308ab2a 14631+
027c5e7a
AM
14632+static inline void au_icntnr_init(struct au_icntnr *c)
14633+{
14634+#ifdef CONFIG_AUFS_DEBUG
14635+ c->vfs_inode.i_mode = 0;
14636+#endif
14637+}
14638+
4a4d8108
AM
14639+static inline unsigned int au_iigen(struct inode *inode)
14640+{
14641+ return atomic_read(&au_ii(inode)->ii_generation);
14642+}
1308ab2a 14643+
4a4d8108
AM
14644+/* tiny test for inode number */
14645+/* tmpfs generation is too rough */
14646+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
14647+{
14648+ struct au_iinfo *iinfo;
1308ab2a 14649+
4a4d8108
AM
14650+ iinfo = au_ii(inode);
14651+ AuRwMustAnyLock(&iinfo->ii_rwsem);
14652+ return !(iinfo->ii_hsb1 == h_inode->i_sb
14653+ && iinfo->ii_higen == h_inode->i_generation);
14654+}
1308ab2a 14655+
4a4d8108
AM
14656+static inline void au_iigen_dec(struct inode *inode)
14657+{
e49829fe 14658+ atomic_dec(&au_ii(inode)->ii_generation);
027c5e7a
AM
14659+}
14660+
14661+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
14662+{
14663+ int err;
14664+
14665+ err = 0;
14666+ if (unlikely(inode && au_iigen(inode) != sigen))
14667+ err = -EIO;
14668+
14669+ return err;
4a4d8108 14670+}
1308ab2a 14671+
4a4d8108 14672+/* ---------------------------------------------------------------------- */
1308ab2a 14673+
4a4d8108
AM
14674+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
14675+ aufs_bindex_t bindex)
14676+{
14677+ IiMustAnyLock(inode);
14678+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
14679+}
1308ab2a 14680+
4a4d8108
AM
14681+static inline aufs_bindex_t au_ibstart(struct inode *inode)
14682+{
14683+ IiMustAnyLock(inode);
14684+ return au_ii(inode)->ii_bstart;
14685+}
1308ab2a 14686+
4a4d8108
AM
14687+static inline aufs_bindex_t au_ibend(struct inode *inode)
14688+{
14689+ IiMustAnyLock(inode);
14690+ return au_ii(inode)->ii_bend;
14691+}
1308ab2a 14692+
4a4d8108
AM
14693+static inline struct au_vdir *au_ivdir(struct inode *inode)
14694+{
14695+ IiMustAnyLock(inode);
14696+ return au_ii(inode)->ii_vdir;
14697+}
1308ab2a 14698+
4a4d8108
AM
14699+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
14700+{
14701+ IiMustAnyLock(inode);
14702+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
14703+}
1308ab2a 14704+
4a4d8108 14705+static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 14706+{
4a4d8108
AM
14707+ IiMustWriteLock(inode);
14708+ au_ii(inode)->ii_bstart = bindex;
14709+}
1308ab2a 14710+
4a4d8108
AM
14711+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
14712+{
14713+ IiMustWriteLock(inode);
14714+ au_ii(inode)->ii_bend = bindex;
1308ab2a 14715+}
14716+
4a4d8108
AM
14717+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
14718+{
14719+ IiMustWriteLock(inode);
14720+ au_ii(inode)->ii_vdir = vdir;
14721+}
1facf9fc 14722+
4a4d8108 14723+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 14724+{
4a4d8108
AM
14725+ IiMustAnyLock(inode);
14726+ return au_ii(inode)->ii_hinode + bindex;
14727+}
dece6358 14728+
4a4d8108 14729+/* ---------------------------------------------------------------------- */
1facf9fc 14730+
4a4d8108
AM
14731+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
14732+{
14733+ if (pin)
14734+ return pin->parent;
14735+ return NULL;
1facf9fc 14736+}
14737+
4a4d8108 14738+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
1facf9fc 14739+{
4a4d8108
AM
14740+ if (pin && pin->hdir)
14741+ return pin->hdir->hi_inode;
14742+ return NULL;
1308ab2a 14743+}
1facf9fc 14744+
4a4d8108
AM
14745+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
14746+{
14747+ if (pin)
14748+ return pin->hdir;
14749+ return NULL;
14750+}
1facf9fc 14751+
4a4d8108 14752+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
1308ab2a 14753+{
4a4d8108
AM
14754+ if (pin)
14755+ pin->dentry = dentry;
14756+}
1308ab2a 14757+
4a4d8108
AM
14758+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
14759+ unsigned char lflag)
14760+{
14761+ if (pin) {
7f207e10 14762+ if (lflag)
4a4d8108 14763+ au_fset_pin(pin->flags, DI_LOCKED);
7f207e10 14764+ else
4a4d8108 14765+ au_fclr_pin(pin->flags, DI_LOCKED);
1308ab2a 14766+ }
4a4d8108
AM
14767+}
14768+
14769+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
14770+{
14771+ if (pin) {
14772+ dput(pin->parent);
14773+ pin->parent = dget(parent);
1facf9fc 14774+ }
4a4d8108 14775+}
1facf9fc 14776+
4a4d8108
AM
14777+/* ---------------------------------------------------------------------- */
14778+
027c5e7a 14779+struct au_branch;
4a4d8108
AM
14780+#ifdef CONFIG_AUFS_HNOTIFY
14781+struct au_hnotify_op {
14782+ void (*ctl)(struct au_hinode *hinode, int do_set);
027c5e7a
AM
14783+ int (*alloc)(struct au_hinode *hinode);
14784+ void (*free)(struct au_hinode *hinode);
4a4d8108
AM
14785+
14786+ void (*fin)(void);
14787+ int (*init)(void);
027c5e7a
AM
14788+
14789+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
14790+ void (*fin_br)(struct au_branch *br);
14791+ int (*init_br)(struct au_branch *br, int perm);
4a4d8108
AM
14792+};
14793+
14794+/* hnotify.c */
027c5e7a 14795+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
4a4d8108
AM
14796+void au_hn_free(struct au_hinode *hinode);
14797+void au_hn_ctl(struct au_hinode *hinode, int do_set);
14798+void au_hn_reset(struct inode *inode, unsigned int flags);
14799+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
14800+ struct qstr *h_child_qstr, struct inode *h_child_inode);
027c5e7a
AM
14801+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
14802+int au_hnotify_init_br(struct au_branch *br, int perm);
14803+void au_hnotify_fin_br(struct au_branch *br);
4a4d8108
AM
14804+int __init au_hnotify_init(void);
14805+void au_hnotify_fin(void);
14806+
7f207e10 14807+/* hfsnotify.c */
4a4d8108
AM
14808+extern const struct au_hnotify_op au_hnotify_op;
14809+
14810+static inline
14811+void au_hn_init(struct au_hinode *hinode)
14812+{
14813+ hinode->hi_notify = NULL;
1308ab2a 14814+}
14815+
53392da6
AM
14816+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
14817+{
14818+ return hinode->hi_notify;
14819+}
14820+
4a4d8108
AM
14821+#else
14822+static inline
14823+int au_hn_alloc(struct au_hinode *hinode __maybe_unused,
027c5e7a 14824+ struct inode *inode __maybe_unused)
1308ab2a 14825+{
4a4d8108
AM
14826+ return -EOPNOTSUPP;
14827+}
1308ab2a 14828+
53392da6
AM
14829+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
14830+{
14831+ return NULL;
14832+}
14833+
4a4d8108
AM
14834+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
14835+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
14836+ int do_set __maybe_unused)
14837+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
14838+ unsigned int flags __maybe_unused)
027c5e7a
AM
14839+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
14840+ struct au_branch *br __maybe_unused,
14841+ int perm __maybe_unused)
14842+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
14843+ int perm __maybe_unused)
14844+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
4a4d8108
AM
14845+AuStubInt0(__init au_hnotify_init, void)
14846+AuStubVoid(au_hnotify_fin, void)
14847+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
14848+#endif /* CONFIG_AUFS_HNOTIFY */
14849+
14850+static inline void au_hn_suspend(struct au_hinode *hdir)
14851+{
14852+ au_hn_ctl(hdir, /*do_set*/0);
1308ab2a 14853+}
14854+
4a4d8108 14855+static inline void au_hn_resume(struct au_hinode *hdir)
1308ab2a 14856+{
4a4d8108
AM
14857+ au_hn_ctl(hdir, /*do_set*/1);
14858+}
1308ab2a 14859+
4a4d8108
AM
14860+static inline void au_hn_imtx_lock(struct au_hinode *hdir)
14861+{
14862+ mutex_lock(&hdir->hi_inode->i_mutex);
14863+ au_hn_suspend(hdir);
14864+}
dece6358 14865+
4a4d8108
AM
14866+static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
14867+ unsigned int sc __maybe_unused)
14868+{
14869+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
14870+ au_hn_suspend(hdir);
1facf9fc 14871+}
1facf9fc 14872+
4a4d8108
AM
14873+static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
14874+{
14875+ au_hn_resume(hdir);
14876+ mutex_unlock(&hdir->hi_inode->i_mutex);
14877+}
14878+
14879+#endif /* __KERNEL__ */
14880+#endif /* __AUFS_INODE_H__ */
7f207e10
AM
14881diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
14882--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
14883+++ linux/fs/aufs/ioctl.c 2012-02-13 21:54:56.969771692 +0100
14884@@ -0,0 +1,196 @@
4a4d8108 14885+/*
f6c5ef8b 14886+ * Copyright (C) 2005-2012 Junjiro R. Okajima
4a4d8108
AM
14887+ *
14888+ * This program, aufs is free software; you can redistribute it and/or modify
14889+ * it under the terms of the GNU General Public License as published by
14890+ * the Free Software Foundation; either version 2 of the License, or
14891+ * (at your option) any later version.
14892+ *
14893+ * This program is distributed in the hope that it will be useful,
14894+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14895+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14896+ * GNU General Public License for more details.
14897+ *
14898+ * You should have received a copy of the GNU General Public License
14899+ * along with this program; if not, write to the Free Software
14900+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
14901+ */
14902+
14903+/*
14904+ * ioctl
14905+ * plink-management and readdir in userspace.
14906+ * assist the pathconf(3) wrapper library.
14907+ */
14908+
4a4d8108
AM
14909+#include "aufs.h"
14910+
1e00d052 14911+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
4a4d8108
AM
14912+{
14913+ int err, fd;
14914+ aufs_bindex_t wbi, bindex, bend;
14915+ struct file *h_file;
14916+ struct super_block *sb;
14917+ struct dentry *root;
1e00d052
AM
14918+ struct au_branch *br;
14919+ struct aufs_wbr_fd wbrfd = {
14920+ .oflags = au_dir_roflags,
14921+ .brid = -1
14922+ };
14923+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
14924+ | O_NOATIME | O_CLOEXEC;
4a4d8108 14925+
1e00d052
AM
14926+ AuDebugOn(wbrfd.oflags & ~valid);
14927+
14928+ if (arg) {
14929+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
14930+ if (unlikely(err)) {
14931+ err = -EFAULT;
14932+ goto out;
14933+ }
14934+
14935+ err = -EINVAL;
14936+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
14937+ wbrfd.oflags |= au_dir_roflags;
14938+ AuDbg("0%o\n", wbrfd.oflags);
14939+ if (unlikely(wbrfd.oflags & ~valid))
14940+ goto out;
14941+ }
14942+
14943+ fd = get_unused_fd();
14944+ err = fd;
14945+ if (unlikely(fd < 0))
4a4d8108 14946+ goto out;
4a4d8108 14947+
1e00d052 14948+ h_file = ERR_PTR(-EINVAL);
4a4d8108 14949+ wbi = 0;
1e00d052 14950+ br = NULL;
4a4d8108
AM
14951+ sb = path->dentry->d_sb;
14952+ root = sb->s_root;
14953+ aufs_read_lock(root, AuLock_IR);
1e00d052
AM
14954+ bend = au_sbend(sb);
14955+ if (wbrfd.brid >= 0) {
14956+ wbi = au_br_index(sb, wbrfd.brid);
14957+ if (unlikely(wbi < 0 || wbi > bend))
14958+ goto out_unlock;
14959+ }
14960+
14961+ h_file = ERR_PTR(-ENOENT);
14962+ br = au_sbr(sb, wbi);
14963+ if (!au_br_writable(br->br_perm)) {
14964+ if (arg)
14965+ goto out_unlock;
14966+
14967+ bindex = wbi + 1;
14968+ wbi = -1;
14969+ for (; bindex <= bend; bindex++) {
14970+ br = au_sbr(sb, bindex);
14971+ if (au_br_writable(br->br_perm)) {
4a4d8108 14972+ wbi = bindex;
1e00d052 14973+ br = au_sbr(sb, wbi);
4a4d8108
AM
14974+ break;
14975+ }
14976+ }
4a4d8108
AM
14977+ }
14978+ AuDbg("wbi %d\n", wbi);
1e00d052
AM
14979+ if (wbi >= 0)
14980+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL);
14981+
14982+out_unlock:
4a4d8108
AM
14983+ aufs_read_unlock(root, AuLock_IR);
14984+ err = PTR_ERR(h_file);
14985+ if (IS_ERR(h_file))
14986+ goto out_fd;
14987+
1e00d052 14988+ atomic_dec(&br->br_count); /* cf. au_h_open() */
4a4d8108
AM
14989+ fd_install(fd, h_file);
14990+ err = fd;
14991+ goto out; /* success */
14992+
4f0767ce 14993+out_fd:
4a4d8108 14994+ put_unused_fd(fd);
4f0767ce 14995+out:
1e00d052 14996+ AuTraceErr(err);
4a4d8108
AM
14997+ return err;
14998+}
14999+
15000+/* ---------------------------------------------------------------------- */
15001+
15002+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
15003+{
15004+ long err;
15005+
15006+ switch (cmd) {
4a4d8108
AM
15007+ case AUFS_CTL_RDU:
15008+ case AUFS_CTL_RDU_INO:
15009+ err = au_rdu_ioctl(file, cmd, arg);
15010+ break;
15011+
15012+ case AUFS_CTL_WBR_FD:
1e00d052 15013+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
15014+ break;
15015+
027c5e7a
AM
15016+ case AUFS_CTL_IBUSY:
15017+ err = au_ibusy_ioctl(file, arg);
15018+ break;
15019+
4a4d8108
AM
15020+ default:
15021+ /* do not call the lower */
15022+ AuDbg("0x%x\n", cmd);
15023+ err = -ENOTTY;
15024+ }
15025+
15026+ AuTraceErr(err);
15027+ return err;
15028+}
15029+
15030+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
15031+{
15032+ long err;
15033+
15034+ switch (cmd) {
15035+ case AUFS_CTL_WBR_FD:
1e00d052 15036+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
15037+ break;
15038+
15039+ default:
15040+ /* do not call the lower */
15041+ AuDbg("0x%x\n", cmd);
15042+ err = -ENOTTY;
15043+ }
15044+
15045+ AuTraceErr(err);
15046+ return err;
15047+}
b752ccd1
AM
15048+
15049+#ifdef CONFIG_COMPAT
15050+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
15051+ unsigned long arg)
15052+{
15053+ long err;
15054+
15055+ switch (cmd) {
15056+ case AUFS_CTL_RDU:
15057+ case AUFS_CTL_RDU_INO:
15058+ err = au_rdu_compat_ioctl(file, cmd, arg);
15059+ break;
15060+
027c5e7a
AM
15061+ case AUFS_CTL_IBUSY:
15062+ err = au_ibusy_compat_ioctl(file, arg);
15063+ break;
15064+
b752ccd1
AM
15065+ default:
15066+ err = aufs_ioctl_dir(file, cmd, arg);
15067+ }
15068+
15069+ AuTraceErr(err);
15070+ return err;
15071+}
15072+
15073+#if 0 /* unused yet */
15074+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
15075+ unsigned long arg)
15076+{
15077+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
15078+}
15079+#endif
15080+#endif
7f207e10
AM
15081diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
15082--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 15083+++ linux/fs/aufs/i_op_add.c 2012-02-13 21:54:56.969771692 +0100
2cbb1c4b 15084@@ -0,0 +1,711 @@
4a4d8108 15085+/*
f6c5ef8b 15086+ * Copyright (C) 2005-2012 Junjiro R. Okajima
4a4d8108
AM
15087+ *
15088+ * This program, aufs is free software; you can redistribute it and/or modify
15089+ * it under the terms of the GNU General Public License as published by
15090+ * the Free Software Foundation; either version 2 of the License, or
15091+ * (at your option) any later version.
15092+ *
15093+ * This program is distributed in the hope that it will be useful,
15094+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15095+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15096+ * GNU General Public License for more details.
15097+ *
15098+ * You should have received a copy of the GNU General Public License
15099+ * along with this program; if not, write to the Free Software
15100+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
15101+ */
15102+
15103+/*
15104+ * inode operations (add entry)
15105+ */
15106+
15107+#include "aufs.h"
15108+
15109+/*
15110+ * final procedure of adding a new entry, except link(2).
15111+ * remove whiteout, instantiate, copyup the parent dir's times and size
15112+ * and update version.
15113+ * if it failed, re-create the removed whiteout.
15114+ */
15115+static int epilog(struct inode *dir, aufs_bindex_t bindex,
15116+ struct dentry *wh_dentry, struct dentry *dentry)
15117+{
15118+ int err, rerr;
15119+ aufs_bindex_t bwh;
15120+ struct path h_path;
15121+ struct inode *inode, *h_dir;
15122+ struct dentry *wh;
15123+
15124+ bwh = -1;
15125+ if (wh_dentry) {
15126+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
15127+ IMustLock(h_dir);
15128+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
15129+ bwh = au_dbwh(dentry);
15130+ h_path.dentry = wh_dentry;
15131+ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
15132+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
15133+ dentry);
15134+ if (unlikely(err))
15135+ goto out;
15136+ }
15137+
15138+ inode = au_new_inode(dentry, /*must_new*/1);
15139+ if (!IS_ERR(inode)) {
15140+ d_instantiate(dentry, inode);
15141+ dir = dentry->d_parent->d_inode; /* dir inode is locked */
15142+ IMustLock(dir);
15143+ if (au_ibstart(dir) == au_dbstart(dentry))
15144+ au_cpup_attr_timesizes(dir);
15145+ dir->i_version++;
15146+ return 0; /* success */
15147+ }
15148+
15149+ err = PTR_ERR(inode);
15150+ if (!wh_dentry)
15151+ goto out;
15152+
15153+ /* revert */
15154+ /* dir inode is locked */
15155+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
15156+ rerr = PTR_ERR(wh);
15157+ if (IS_ERR(wh)) {
15158+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
15159+ AuDLNPair(dentry), err, rerr);
15160+ err = -EIO;
15161+ } else
15162+ dput(wh);
15163+
4f0767ce 15164+out:
4a4d8108
AM
15165+ return err;
15166+}
15167+
027c5e7a
AM
15168+static int au_d_may_add(struct dentry *dentry)
15169+{
15170+ int err;
15171+
15172+ err = 0;
15173+ if (unlikely(d_unhashed(dentry)))
15174+ err = -ENOENT;
15175+ if (unlikely(dentry->d_inode))
15176+ err = -EEXIST;
15177+ return err;
15178+}
15179+
4a4d8108
AM
15180+/*
15181+ * simple tests for the adding inode operations.
15182+ * following the checks in vfs, plus the parent-child relationship.
15183+ */
15184+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
15185+ struct dentry *h_parent, int isdir)
15186+{
15187+ int err;
15188+ umode_t h_mode;
15189+ struct dentry *h_dentry;
15190+ struct inode *h_inode;
15191+
15192+ err = -ENAMETOOLONG;
15193+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
15194+ goto out;
15195+
15196+ h_dentry = au_h_dptr(dentry, bindex);
15197+ h_inode = h_dentry->d_inode;
15198+ if (!dentry->d_inode) {
15199+ err = -EEXIST;
15200+ if (unlikely(h_inode))
15201+ goto out;
15202+ } else {
15203+ /* rename(2) case */
15204+ err = -EIO;
15205+ if (unlikely(!h_inode || !h_inode->i_nlink))
15206+ goto out;
15207+
15208+ h_mode = h_inode->i_mode;
15209+ if (!isdir) {
15210+ err = -EISDIR;
15211+ if (unlikely(S_ISDIR(h_mode)))
15212+ goto out;
15213+ } else if (unlikely(!S_ISDIR(h_mode))) {
15214+ err = -ENOTDIR;
15215+ goto out;
15216+ }
15217+ }
15218+
15219+ err = 0;
15220+ /* expected parent dir is locked */
15221+ if (unlikely(h_parent != h_dentry->d_parent))
15222+ err = -EIO;
15223+
4f0767ce 15224+out:
4a4d8108
AM
15225+ AuTraceErr(err);
15226+ return err;
15227+}
15228+
15229+/*
15230+ * initial procedure of adding a new entry.
15231+ * prepare writable branch and the parent dir, lock it,
15232+ * and lookup whiteout for the new entry.
15233+ */
15234+static struct dentry*
15235+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
15236+ struct dentry *src_dentry, struct au_pin *pin,
15237+ struct au_wr_dir_args *wr_dir_args)
15238+{
15239+ struct dentry *wh_dentry, *h_parent;
15240+ struct super_block *sb;
15241+ struct au_branch *br;
15242+ int err;
15243+ unsigned int udba;
15244+ aufs_bindex_t bcpup;
15245+
15246+ AuDbg("%.*s\n", AuDLNPair(dentry));
15247+
15248+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
15249+ bcpup = err;
15250+ wh_dentry = ERR_PTR(err);
15251+ if (unlikely(err < 0))
15252+ goto out;
15253+
15254+ sb = dentry->d_sb;
15255+ udba = au_opt_udba(sb);
15256+ err = au_pin(pin, dentry, bcpup, udba,
15257+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15258+ wh_dentry = ERR_PTR(err);
15259+ if (unlikely(err))
15260+ goto out;
15261+
15262+ h_parent = au_pinned_h_parent(pin);
15263+ if (udba != AuOpt_UDBA_NONE
15264+ && au_dbstart(dentry) == bcpup)
15265+ err = au_may_add(dentry, bcpup, h_parent,
15266+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
15267+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
15268+ err = -ENAMETOOLONG;
15269+ wh_dentry = ERR_PTR(err);
15270+ if (unlikely(err))
15271+ goto out_unpin;
15272+
15273+ br = au_sbr(sb, bcpup);
15274+ if (dt) {
15275+ struct path tmp = {
15276+ .dentry = h_parent,
15277+ .mnt = br->br_mnt
15278+ };
15279+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
15280+ }
15281+
15282+ wh_dentry = NULL;
15283+ if (bcpup != au_dbwh(dentry))
15284+ goto out; /* success */
15285+
15286+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
15287+
4f0767ce 15288+out_unpin:
4a4d8108
AM
15289+ if (IS_ERR(wh_dentry))
15290+ au_unpin(pin);
4f0767ce 15291+out:
4a4d8108
AM
15292+ return wh_dentry;
15293+}
15294+
15295+/* ---------------------------------------------------------------------- */
15296+
15297+enum { Mknod, Symlink, Creat };
15298+struct simple_arg {
15299+ int type;
15300+ union {
15301+ struct {
15302+ int mode;
15303+ struct nameidata *nd;
15304+ } c;
15305+ struct {
15306+ const char *symname;
15307+ } s;
15308+ struct {
15309+ int mode;
15310+ dev_t dev;
15311+ } m;
15312+ } u;
15313+};
15314+
15315+static int add_simple(struct inode *dir, struct dentry *dentry,
15316+ struct simple_arg *arg)
15317+{
15318+ int err;
15319+ aufs_bindex_t bstart;
15320+ unsigned char created;
15321+ struct au_dtime dt;
15322+ struct au_pin pin;
15323+ struct path h_path;
15324+ struct dentry *wh_dentry, *parent;
15325+ struct inode *h_dir;
15326+ struct au_wr_dir_args wr_dir_args = {
15327+ .force_btgt = -1,
15328+ .flags = AuWrDir_ADD_ENTRY
15329+ };
15330+
15331+ AuDbg("%.*s\n", AuDLNPair(dentry));
15332+ IMustLock(dir);
15333+
15334+ parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
15335+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
15336+ if (unlikely(err))
15337+ goto out;
15338+ err = au_d_may_add(dentry);
15339+ if (unlikely(err))
15340+ goto out_unlock;
4a4d8108
AM
15341+ di_write_lock_parent(parent);
15342+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
15343+ &wr_dir_args);
15344+ err = PTR_ERR(wh_dentry);
15345+ if (IS_ERR(wh_dentry))
027c5e7a 15346+ goto out_parent;
4a4d8108
AM
15347+
15348+ bstart = au_dbstart(dentry);
15349+ h_path.dentry = au_h_dptr(dentry, bstart);
15350+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
15351+ h_dir = au_pinned_h_dir(&pin);
15352+ switch (arg->type) {
15353+ case Creat:
15354+ err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
15355+ break;
15356+ case Symlink:
15357+ err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
15358+ break;
15359+ case Mknod:
15360+ err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
15361+ break;
15362+ default:
15363+ BUG();
15364+ }
15365+ created = !err;
15366+ if (!err)
15367+ err = epilog(dir, bstart, wh_dentry, dentry);
15368+
15369+ /* revert */
15370+ if (unlikely(created && err && h_path.dentry->d_inode)) {
15371+ int rerr;
15372+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
15373+ if (rerr) {
15374+ AuIOErr("%.*s revert failure(%d, %d)\n",
15375+ AuDLNPair(dentry), err, rerr);
15376+ err = -EIO;
15377+ }
15378+ au_dtime_revert(&dt);
4a4d8108
AM
15379+ }
15380+
15381+ au_unpin(&pin);
15382+ dput(wh_dentry);
15383+
027c5e7a
AM
15384+out_parent:
15385+ di_write_unlock(parent);
15386+out_unlock:
4a4d8108
AM
15387+ if (unlikely(err)) {
15388+ au_update_dbstart(dentry);
15389+ d_drop(dentry);
15390+ }
4a4d8108 15391+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 15392+out:
4a4d8108
AM
15393+ return err;
15394+}
15395+
15396+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
15397+{
15398+ struct simple_arg arg = {
15399+ .type = Mknod,
15400+ .u.m = {
15401+ .mode = mode,
15402+ .dev = dev
15403+ }
15404+ };
15405+ return add_simple(dir, dentry, &arg);
15406+}
15407+
15408+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
15409+{
15410+ struct simple_arg arg = {
15411+ .type = Symlink,
15412+ .u.s.symname = symname
15413+ };
15414+ return add_simple(dir, dentry, &arg);
15415+}
15416+
15417+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
15418+ struct nameidata *nd)
15419+{
15420+ struct simple_arg arg = {
15421+ .type = Creat,
15422+ .u.c = {
15423+ .mode = mode,
15424+ .nd = nd
15425+ }
15426+ };
15427+ return add_simple(dir, dentry, &arg);
15428+}
15429+
15430+/* ---------------------------------------------------------------------- */
15431+
15432+struct au_link_args {
15433+ aufs_bindex_t bdst, bsrc;
15434+ struct au_pin pin;
15435+ struct path h_path;
15436+ struct dentry *src_parent, *parent;
15437+};
15438+
15439+static int au_cpup_before_link(struct dentry *src_dentry,
15440+ struct au_link_args *a)
15441+{
15442+ int err;
15443+ struct dentry *h_src_dentry;
15444+ struct mutex *h_mtx;
15445+ struct file *h_file;
15446+
15447+ di_read_lock_parent(a->src_parent, AuLock_IR);
15448+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
15449+ if (unlikely(err))
15450+ goto out;
15451+
15452+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
15453+ h_mtx = &h_src_dentry->d_inode->i_mutex;
15454+ err = au_pin(&a->pin, src_dentry, a->bdst,
15455+ au_opt_udba(src_dentry->d_sb),
15456+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15457+ if (unlikely(err))
15458+ goto out;
15459+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15460+ h_file = au_h_open_pre(src_dentry, a->bsrc);
15461+ if (IS_ERR(h_file)) {
15462+ err = PTR_ERR(h_file);
15463+ h_file = NULL;
15464+ } else
1e00d052 15465+ err = au_sio_cpup_simple(src_dentry, a->bdst, -1,
4a4d8108
AM
15466+ AuCpup_DTIME /* | AuCpup_KEEPLINO */);
15467+ mutex_unlock(h_mtx);
15468+ au_h_open_post(src_dentry, a->bsrc, h_file);
15469+ au_unpin(&a->pin);
15470+
4f0767ce 15471+out:
4a4d8108
AM
15472+ di_read_unlock(a->src_parent, AuLock_IR);
15473+ return err;
15474+}
15475+
15476+static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
15477+{
15478+ int err;
15479+ unsigned char plink;
15480+ struct inode *h_inode, *inode;
15481+ struct dentry *h_src_dentry;
15482+ struct super_block *sb;
15483+ struct file *h_file;
15484+
15485+ plink = 0;
15486+ h_inode = NULL;
15487+ sb = src_dentry->d_sb;
15488+ inode = src_dentry->d_inode;
15489+ if (au_ibstart(inode) <= a->bdst)
15490+ h_inode = au_h_iptr(inode, a->bdst);
15491+ if (!h_inode || !h_inode->i_nlink) {
15492+ /* copyup src_dentry as the name of dentry. */
15493+ au_set_dbstart(src_dentry, a->bdst);
15494+ au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
15495+ h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
15496+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
15497+ h_file = au_h_open_pre(src_dentry, a->bsrc);
15498+ if (IS_ERR(h_file)) {
15499+ err = PTR_ERR(h_file);
15500+ h_file = NULL;
15501+ } else
15502+ err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc,
15503+ -1, AuCpup_KEEPLINO,
15504+ a->parent);
15505+ mutex_unlock(&h_inode->i_mutex);
15506+ au_h_open_post(src_dentry, a->bsrc, h_file);
15507+ au_set_h_dptr(src_dentry, a->bdst, NULL);
15508+ au_set_dbstart(src_dentry, a->bsrc);
15509+ } else {
15510+ /* the inode of src_dentry already exists on a.bdst branch */
15511+ h_src_dentry = d_find_alias(h_inode);
15512+ if (!h_src_dentry && au_plink_test(inode)) {
15513+ plink = 1;
15514+ h_src_dentry = au_plink_lkup(inode, a->bdst);
15515+ err = PTR_ERR(h_src_dentry);
15516+ if (IS_ERR(h_src_dentry))
15517+ goto out;
15518+
15519+ if (unlikely(!h_src_dentry->d_inode)) {
15520+ dput(h_src_dentry);
15521+ h_src_dentry = NULL;
15522+ }
15523+
15524+ }
15525+ if (h_src_dentry) {
15526+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
15527+ &a->h_path);
15528+ dput(h_src_dentry);
15529+ } else {
15530+ AuIOErr("no dentry found for hi%lu on b%d\n",
15531+ h_inode->i_ino, a->bdst);
15532+ err = -EIO;
15533+ }
15534+ }
15535+
15536+ if (!err && !plink)
15537+ au_plink_append(inode, a->bdst, a->h_path.dentry);
15538+
15539+out:
2cbb1c4b 15540+ AuTraceErr(err);
4a4d8108
AM
15541+ return err;
15542+}
15543+
15544+int aufs_link(struct dentry *src_dentry, struct inode *dir,
15545+ struct dentry *dentry)
15546+{
15547+ int err, rerr;
15548+ struct au_dtime dt;
15549+ struct au_link_args *a;
15550+ struct dentry *wh_dentry, *h_src_dentry;
15551+ struct inode *inode;
15552+ struct super_block *sb;
15553+ struct au_wr_dir_args wr_dir_args = {
15554+ /* .force_btgt = -1, */
15555+ .flags = AuWrDir_ADD_ENTRY
15556+ };
15557+
15558+ IMustLock(dir);
15559+ inode = src_dentry->d_inode;
15560+ IMustLock(inode);
15561+
4a4d8108
AM
15562+ err = -ENOMEM;
15563+ a = kzalloc(sizeof(*a), GFP_NOFS);
15564+ if (unlikely(!a))
15565+ goto out;
15566+
15567+ a->parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
15568+ err = aufs_read_and_write_lock2(dentry, src_dentry,
15569+ AuLock_NOPLM | AuLock_GEN);
e49829fe
JR
15570+ if (unlikely(err))
15571+ goto out_kfree;
027c5e7a
AM
15572+ err = au_d_hashed_positive(src_dentry);
15573+ if (unlikely(err))
15574+ goto out_unlock;
15575+ err = au_d_may_add(dentry);
15576+ if (unlikely(err))
15577+ goto out_unlock;
e49829fe 15578+
4a4d8108 15579+ a->src_parent = dget_parent(src_dentry);
2cbb1c4b 15580+ wr_dir_args.force_btgt = au_ibstart(inode);
4a4d8108
AM
15581+
15582+ di_write_lock_parent(a->parent);
15583+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
15584+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
15585+ &wr_dir_args);
15586+ err = PTR_ERR(wh_dentry);
15587+ if (IS_ERR(wh_dentry))
027c5e7a 15588+ goto out_parent;
4a4d8108
AM
15589+
15590+ err = 0;
15591+ sb = dentry->d_sb;
15592+ a->bdst = au_dbstart(dentry);
15593+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
15594+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
2cbb1c4b
JR
15595+ a->bsrc = au_ibstart(inode);
15596+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
15597+ if (!h_src_dentry) {
15598+ a->bsrc = au_dbstart(src_dentry);
15599+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
15600+ AuDebugOn(!h_src_dentry);
15601+ } else if (IS_ERR(h_src_dentry))
15602+ goto out_parent;
15603+
4a4d8108
AM
15604+ if (au_opt_test(au_mntflags(sb), PLINK)) {
15605+ if (a->bdst < a->bsrc
15606+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
15607+ err = au_cpup_or_link(src_dentry, a);
2cbb1c4b 15608+ else
4a4d8108
AM
15609+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
15610+ &a->h_path);
2cbb1c4b 15611+ dput(h_src_dentry);
4a4d8108
AM
15612+ } else {
15613+ /*
15614+ * copyup src_dentry to the branch we process,
15615+ * and then link(2) to it.
15616+ */
2cbb1c4b 15617+ dput(h_src_dentry);
4a4d8108
AM
15618+ if (a->bdst < a->bsrc
15619+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
15620+ au_unpin(&a->pin);
15621+ di_write_unlock(a->parent);
15622+ err = au_cpup_before_link(src_dentry, a);
15623+ di_write_lock_parent(a->parent);
15624+ if (!err)
15625+ err = au_pin(&a->pin, dentry, a->bdst,
15626+ au_opt_udba(sb),
15627+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15628+ if (unlikely(err))
15629+ goto out_wh;
15630+ }
15631+ if (!err) {
15632+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
15633+ err = -ENOENT;
15634+ if (h_src_dentry && h_src_dentry->d_inode)
15635+ err = vfsub_link(h_src_dentry,
15636+ au_pinned_h_dir(&a->pin),
15637+ &a->h_path);
15638+ }
15639+ }
15640+ if (unlikely(err))
15641+ goto out_unpin;
15642+
15643+ if (wh_dentry) {
15644+ a->h_path.dentry = wh_dentry;
15645+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
15646+ dentry);
15647+ if (unlikely(err))
15648+ goto out_revert;
15649+ }
15650+
15651+ dir->i_version++;
15652+ if (au_ibstart(dir) == au_dbstart(dentry))
15653+ au_cpup_attr_timesizes(dir);
15654+ inc_nlink(inode);
15655+ inode->i_ctime = dir->i_ctime;
027c5e7a
AM
15656+ d_instantiate(dentry, au_igrab(inode));
15657+ if (d_unhashed(a->h_path.dentry))
4a4d8108
AM
15658+ /* some filesystem calls d_drop() */
15659+ d_drop(dentry);
15660+ goto out_unpin; /* success */
15661+
4f0767ce 15662+out_revert:
4a4d8108 15663+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
027c5e7a
AM
15664+ if (unlikely(rerr)) {
15665+ AuIOErr("%.*s reverting failed(%d, %d)\n",
15666+ AuDLNPair(dentry), err, rerr);
15667+ err = -EIO;
15668+ }
4a4d8108 15669+ au_dtime_revert(&dt);
4f0767ce 15670+out_unpin:
4a4d8108 15671+ au_unpin(&a->pin);
4f0767ce 15672+out_wh:
4a4d8108 15673+ dput(wh_dentry);
027c5e7a
AM
15674+out_parent:
15675+ di_write_unlock(a->parent);
15676+ dput(a->src_parent);
4f0767ce 15677+out_unlock:
4a4d8108
AM
15678+ if (unlikely(err)) {
15679+ au_update_dbstart(dentry);
15680+ d_drop(dentry);
15681+ }
4a4d8108 15682+ aufs_read_and_write_unlock2(dentry, src_dentry);
e49829fe 15683+out_kfree:
4a4d8108 15684+ kfree(a);
4f0767ce 15685+out:
4a4d8108
AM
15686+ return err;
15687+}
15688+
15689+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
15690+{
15691+ int err, rerr;
15692+ aufs_bindex_t bindex;
15693+ unsigned char diropq;
15694+ struct path h_path;
15695+ struct dentry *wh_dentry, *parent, *opq_dentry;
15696+ struct mutex *h_mtx;
15697+ struct super_block *sb;
15698+ struct {
15699+ struct au_pin pin;
15700+ struct au_dtime dt;
15701+ } *a; /* reduce the stack usage */
15702+ struct au_wr_dir_args wr_dir_args = {
15703+ .force_btgt = -1,
15704+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
15705+ };
15706+
15707+ IMustLock(dir);
15708+
15709+ err = -ENOMEM;
15710+ a = kmalloc(sizeof(*a), GFP_NOFS);
15711+ if (unlikely(!a))
15712+ goto out;
15713+
027c5e7a
AM
15714+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
15715+ if (unlikely(err))
15716+ goto out_free;
15717+ err = au_d_may_add(dentry);
15718+ if (unlikely(err))
15719+ goto out_unlock;
15720+
4a4d8108
AM
15721+ parent = dentry->d_parent; /* dir inode is locked */
15722+ di_write_lock_parent(parent);
15723+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
15724+ &a->pin, &wr_dir_args);
15725+ err = PTR_ERR(wh_dentry);
15726+ if (IS_ERR(wh_dentry))
027c5e7a 15727+ goto out_parent;
4a4d8108
AM
15728+
15729+ sb = dentry->d_sb;
15730+ bindex = au_dbstart(dentry);
15731+ h_path.dentry = au_h_dptr(dentry, bindex);
15732+ h_path.mnt = au_sbr_mnt(sb, bindex);
15733+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
15734+ if (unlikely(err))
027c5e7a 15735+ goto out_unpin;
4a4d8108
AM
15736+
15737+ /* make the dir opaque */
15738+ diropq = 0;
15739+ h_mtx = &h_path.dentry->d_inode->i_mutex;
15740+ if (wh_dentry
15741+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
15742+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15743+ opq_dentry = au_diropq_create(dentry, bindex);
15744+ mutex_unlock(h_mtx);
15745+ err = PTR_ERR(opq_dentry);
15746+ if (IS_ERR(opq_dentry))
15747+ goto out_dir;
15748+ dput(opq_dentry);
15749+ diropq = 1;
15750+ }
15751+
15752+ err = epilog(dir, bindex, wh_dentry, dentry);
15753+ if (!err) {
15754+ inc_nlink(dir);
027c5e7a 15755+ goto out_unpin; /* success */
4a4d8108
AM
15756+ }
15757+
15758+ /* revert */
15759+ if (diropq) {
15760+ AuLabel(revert opq);
15761+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15762+ rerr = au_diropq_remove(dentry, bindex);
15763+ mutex_unlock(h_mtx);
15764+ if (rerr) {
15765+ AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
15766+ AuDLNPair(dentry), err, rerr);
15767+ err = -EIO;
15768+ }
15769+ }
15770+
4f0767ce 15771+out_dir:
4a4d8108
AM
15772+ AuLabel(revert dir);
15773+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
15774+ if (rerr) {
15775+ AuIOErr("%.*s reverting dir failed(%d, %d)\n",
15776+ AuDLNPair(dentry), err, rerr);
15777+ err = -EIO;
15778+ }
4a4d8108 15779+ au_dtime_revert(&a->dt);
027c5e7a 15780+out_unpin:
4a4d8108
AM
15781+ au_unpin(&a->pin);
15782+ dput(wh_dentry);
027c5e7a
AM
15783+out_parent:
15784+ di_write_unlock(parent);
15785+out_unlock:
4a4d8108
AM
15786+ if (unlikely(err)) {
15787+ au_update_dbstart(dentry);
15788+ d_drop(dentry);
15789+ }
4a4d8108 15790+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 15791+out_free:
4a4d8108 15792+ kfree(a);
4f0767ce 15793+out:
4a4d8108
AM
15794+ return err;
15795+}
7f207e10
AM
15796diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
15797--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
15798+++ linux/fs/aufs/i_op.c 2012-02-13 21:54:56.969771692 +0100
15799@@ -0,0 +1,992 @@
4a4d8108 15800+/*
f6c5ef8b 15801+ * Copyright (C) 2005-2012 Junjiro R. Okajima
4a4d8108
AM
15802+ *
15803+ * This program, aufs is free software; you can redistribute it and/or modify
15804+ * it under the terms of the GNU General Public License as published by
15805+ * the Free Software Foundation; either version 2 of the License, or
15806+ * (at your option) any later version.
15807+ *
15808+ * This program is distributed in the hope that it will be useful,
15809+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15810+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15811+ * GNU General Public License for more details.
15812+ *
15813+ * You should have received a copy of the GNU General Public License
15814+ * along with this program; if not, write to the Free Software
15815+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
15816+ */
1facf9fc 15817+
1308ab2a 15818+/*
4a4d8108 15819+ * inode operations (except add/del/rename)
1308ab2a 15820+ */
4a4d8108
AM
15821+
15822+#include <linux/device_cgroup.h>
15823+#include <linux/fs_stack.h>
4a4d8108
AM
15824+#include <linux/namei.h>
15825+#include <linux/security.h>
4a4d8108
AM
15826+#include "aufs.h"
15827+
1e00d052 15828+static int h_permission(struct inode *h_inode, int mask,
4a4d8108 15829+ struct vfsmount *h_mnt, int brperm)
1facf9fc 15830+{
1308ab2a 15831+ int err;
4a4d8108 15832+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
1facf9fc 15833+
4a4d8108
AM
15834+ err = -EACCES;
15835+ if ((write_mask && IS_IMMUTABLE(h_inode))
15836+ || ((mask & MAY_EXEC)
15837+ && S_ISREG(h_inode->i_mode)
15838+ && ((h_mnt->mnt_flags & MNT_NOEXEC)
15839+ || !(h_inode->i_mode & S_IXUGO))))
15840+ goto out;
15841+
15842+ /*
15843+ * - skip the lower fs test in the case of write to ro branch.
15844+ * - nfs dir permission write check is optimized, but a policy for
15845+ * link/rename requires a real check.
15846+ */
15847+ if ((write_mask && !au_br_writable(brperm))
15848+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
15849+ && write_mask && !(mask & MAY_READ))
15850+ || !h_inode->i_op->permission) {
15851+ /* AuLabel(generic_permission); */
1e00d052 15852+ err = generic_permission(h_inode, mask);
1308ab2a 15853+ } else {
4a4d8108 15854+ /* AuLabel(h_inode->permission); */
1e00d052 15855+ err = h_inode->i_op->permission(h_inode, mask);
4a4d8108
AM
15856+ AuTraceErr(err);
15857+ }
1facf9fc 15858+
4a4d8108
AM
15859+ if (!err)
15860+ err = devcgroup_inode_permission(h_inode, mask);
7f207e10 15861+ if (!err)
4a4d8108 15862+ err = security_inode_permission(h_inode, mask);
4a4d8108
AM
15863+
15864+#if 0
15865+ if (!err) {
15866+ /* todo: do we need to call ima_path_check()? */
15867+ struct path h_path = {
15868+ .dentry =
15869+ .mnt = h_mnt
15870+ };
15871+ err = ima_path_check(&h_path,
15872+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
15873+ IMA_COUNT_LEAVE);
1308ab2a 15874+ }
4a4d8108 15875+#endif
dece6358 15876+
4f0767ce 15877+out:
1308ab2a 15878+ return err;
15879+}
dece6358 15880+
1e00d052 15881+static int aufs_permission(struct inode *inode, int mask)
1308ab2a 15882+{
15883+ int err;
4a4d8108
AM
15884+ aufs_bindex_t bindex, bend;
15885+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
15886+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
15887+ struct inode *h_inode;
15888+ struct super_block *sb;
15889+ struct au_branch *br;
1facf9fc 15890+
027c5e7a 15891+ /* todo: support rcu-walk? */
1e00d052 15892+ if (mask & MAY_NOT_BLOCK)
027c5e7a
AM
15893+ return -ECHILD;
15894+
4a4d8108
AM
15895+ sb = inode->i_sb;
15896+ si_read_lock(sb, AuLock_FLUSH);
15897+ ii_read_lock_child(inode);
027c5e7a
AM
15898+#if 0
15899+ err = au_iigen_test(inode, au_sigen(sb));
15900+ if (unlikely(err))
15901+ goto out;
15902+#endif
dece6358 15903+
4a4d8108
AM
15904+ if (!isdir || write_mask) {
15905+ err = au_busy_or_stale();
15906+ h_inode = au_h_iptr(inode, au_ibstart(inode));
15907+ if (unlikely(!h_inode
15908+ || (h_inode->i_mode & S_IFMT)
15909+ != (inode->i_mode & S_IFMT)))
15910+ goto out;
1facf9fc 15911+
4a4d8108
AM
15912+ err = 0;
15913+ bindex = au_ibstart(inode);
15914+ br = au_sbr(sb, bindex);
1e00d052 15915+ err = h_permission(h_inode, mask, br->br_mnt, br->br_perm);
4a4d8108
AM
15916+ if (write_mask
15917+ && !err
15918+ && !special_file(h_inode->i_mode)) {
15919+ /* test whether the upper writable branch exists */
15920+ err = -EROFS;
15921+ for (; bindex >= 0; bindex--)
15922+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
15923+ err = 0;
15924+ break;
15925+ }
15926+ }
15927+ goto out;
15928+ }
dece6358 15929+
4a4d8108 15930+ /* non-write to dir */
1308ab2a 15931+ err = 0;
4a4d8108
AM
15932+ bend = au_ibend(inode);
15933+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
15934+ h_inode = au_h_iptr(inode, bindex);
15935+ if (h_inode) {
15936+ err = au_busy_or_stale();
15937+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
15938+ break;
15939+
15940+ br = au_sbr(sb, bindex);
1e00d052 15941+ err = h_permission(h_inode, mask, br->br_mnt,
4a4d8108
AM
15942+ br->br_perm);
15943+ }
15944+ }
1308ab2a 15945+
4f0767ce 15946+out:
4a4d8108
AM
15947+ ii_read_unlock(inode);
15948+ si_read_unlock(sb);
1308ab2a 15949+ return err;
15950+}
15951+
4a4d8108 15952+/* ---------------------------------------------------------------------- */
1facf9fc 15953+
4a4d8108
AM
15954+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
15955+ struct nameidata *nd)
15956+{
15957+ struct dentry *ret, *parent;
b752ccd1 15958+ struct inode *inode;
4a4d8108 15959+ struct super_block *sb;
9dbd164d 15960+ int err, npositive, lc_idx;
dece6358 15961+
4a4d8108 15962+ IMustLock(dir);
1308ab2a 15963+
4a4d8108 15964+ sb = dir->i_sb;
7f207e10
AM
15965+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
15966+ ret = ERR_PTR(err);
15967+ if (unlikely(err))
15968+ goto out;
15969+
4a4d8108
AM
15970+ ret = ERR_PTR(-ENAMETOOLONG);
15971+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
7f207e10 15972+ goto out_si;
4a4d8108
AM
15973+ err = au_di_init(dentry);
15974+ ret = ERR_PTR(err);
15975+ if (unlikely(err))
7f207e10 15976+ goto out_si;
1308ab2a 15977+
9dbd164d 15978+ inode = NULL;
027c5e7a 15979+ npositive = 0; /* suppress a warning */
4a4d8108
AM
15980+ parent = dentry->d_parent; /* dir inode is locked */
15981+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
15982+ err = au_alive_dir(parent);
15983+ if (!err)
15984+ err = au_digen_test(parent, au_sigen(sb));
15985+ if (!err) {
15986+ npositive = au_lkup_dentry(dentry, au_dbstart(parent),
15987+ /*type*/0, nd);
15988+ err = npositive;
15989+ }
4a4d8108 15990+ di_read_unlock(parent, AuLock_IR);
4a4d8108
AM
15991+ ret = ERR_PTR(err);
15992+ if (unlikely(err < 0))
15993+ goto out_unlock;
1308ab2a 15994+
4a4d8108 15995+ if (npositive) {
b752ccd1 15996+ inode = au_new_inode(dentry, /*must_new*/0);
4a4d8108 15997+ ret = (void *)inode;
1facf9fc 15998+ }
9dbd164d
AM
15999+ if (IS_ERR(inode)) {
16000+ inode = NULL;
4a4d8108 16001+ goto out_unlock;
9dbd164d 16002+ }
4a4d8108
AM
16003+
16004+ ret = d_splice_alias(inode, dentry);
7f207e10 16005+ if (unlikely(IS_ERR(ret) && inode)) {
4a4d8108 16006+ ii_write_unlock(inode);
9dbd164d
AM
16007+ lc_idx = AuLcNonDir_IIINFO;
16008+ if (S_ISLNK(inode->i_mode))
16009+ lc_idx = AuLcSymlink_IIINFO;
16010+ else if (S_ISDIR(inode->i_mode))
16011+ lc_idx = AuLcDir_IIINFO;
16012+ au_rw_class(&au_ii(inode)->ii_rwsem, au_lc_key + lc_idx);
7f207e10
AM
16013+ iput(inode);
16014+ }
1facf9fc 16015+
4f0767ce 16016+out_unlock:
4a4d8108 16017+ di_write_unlock(dentry);
9dbd164d
AM
16018+ if (unlikely(IS_ERR(ret) && inode)) {
16019+ lc_idx = AuLcNonDir_DIINFO;
16020+ if (S_ISLNK(inode->i_mode))
16021+ lc_idx = AuLcSymlink_DIINFO;
16022+ else if (S_ISDIR(inode->i_mode))
16023+ lc_idx = AuLcDir_DIINFO;
16024+ au_rw_class(&au_di(dentry)->di_rwsem, au_lc_key + lc_idx);
16025+ }
7f207e10 16026+out_si:
4a4d8108 16027+ si_read_unlock(sb);
7f207e10 16028+out:
4a4d8108
AM
16029+ return ret;
16030+}
1facf9fc 16031+
4a4d8108 16032+/* ---------------------------------------------------------------------- */
1facf9fc 16033+
4a4d8108
AM
16034+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
16035+ const unsigned char add_entry, aufs_bindex_t bcpup,
16036+ aufs_bindex_t bstart)
16037+{
16038+ int err;
16039+ struct dentry *h_parent;
16040+ struct inode *h_dir;
1facf9fc 16041+
027c5e7a 16042+ if (add_entry)
4a4d8108 16043+ IMustLock(parent->d_inode);
027c5e7a 16044+ else
4a4d8108
AM
16045+ di_write_lock_parent(parent);
16046+
16047+ err = 0;
16048+ if (!au_h_dptr(parent, bcpup)) {
16049+ if (bstart < bcpup)
16050+ err = au_cpdown_dirs(dentry, bcpup);
16051+ else
16052+ err = au_cpup_dirs(dentry, bcpup);
16053+ }
16054+ if (!err && add_entry) {
16055+ h_parent = au_h_dptr(parent, bcpup);
16056+ h_dir = h_parent->d_inode;
16057+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
16058+ err = au_lkup_neg(dentry, bcpup);
16059+ /* todo: no unlock here */
16060+ mutex_unlock(&h_dir->i_mutex);
027c5e7a
AM
16061+
16062+ AuDbg("bcpup %d\n", bcpup);
16063+ if (!err) {
16064+ if (!dentry->d_inode)
16065+ au_set_h_dptr(dentry, bstart, NULL);
4a4d8108
AM
16066+ au_update_dbrange(dentry, /*do_put_zero*/0);
16067+ }
1308ab2a 16068+ }
1facf9fc 16069+
4a4d8108
AM
16070+ if (!add_entry)
16071+ di_write_unlock(parent);
16072+ if (!err)
16073+ err = bcpup; /* success */
1308ab2a 16074+
027c5e7a 16075+ AuTraceErr(err);
4a4d8108
AM
16076+ return err;
16077+}
1facf9fc 16078+
4a4d8108
AM
16079+/*
16080+ * decide the branch and the parent dir where we will create a new entry.
16081+ * returns new bindex or an error.
16082+ * copyup the parent dir if needed.
16083+ */
16084+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
16085+ struct au_wr_dir_args *args)
16086+{
16087+ int err;
16088+ aufs_bindex_t bcpup, bstart, src_bstart;
16089+ const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
16090+ ADD_ENTRY);
16091+ struct super_block *sb;
16092+ struct dentry *parent;
16093+ struct au_sbinfo *sbinfo;
1facf9fc 16094+
4a4d8108
AM
16095+ sb = dentry->d_sb;
16096+ sbinfo = au_sbi(sb);
16097+ parent = dget_parent(dentry);
16098+ bstart = au_dbstart(dentry);
16099+ bcpup = bstart;
16100+ if (args->force_btgt < 0) {
16101+ if (src_dentry) {
16102+ src_bstart = au_dbstart(src_dentry);
16103+ if (src_bstart < bstart)
16104+ bcpup = src_bstart;
16105+ } else if (add_entry) {
16106+ err = AuWbrCreate(sbinfo, dentry,
16107+ au_ftest_wrdir(args->flags, ISDIR));
16108+ bcpup = err;
16109+ }
1facf9fc 16110+
4a4d8108
AM
16111+ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
16112+ if (add_entry)
16113+ err = AuWbrCopyup(sbinfo, dentry);
16114+ else {
16115+ if (!IS_ROOT(dentry)) {
16116+ di_read_lock_parent(parent, !AuLock_IR);
16117+ err = AuWbrCopyup(sbinfo, dentry);
16118+ di_read_unlock(parent, !AuLock_IR);
16119+ } else
16120+ err = AuWbrCopyup(sbinfo, dentry);
16121+ }
16122+ bcpup = err;
16123+ if (unlikely(err < 0))
16124+ goto out;
16125+ }
16126+ } else {
16127+ bcpup = args->force_btgt;
16128+ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
1308ab2a 16129+ }
027c5e7a 16130+
4a4d8108
AM
16131+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
16132+ err = bcpup;
16133+ if (bcpup == bstart)
16134+ goto out; /* success */
4a4d8108
AM
16135+
16136+ /* copyup the new parent into the branch we process */
16137+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
027c5e7a
AM
16138+ if (err >= 0) {
16139+ if (!dentry->d_inode) {
16140+ au_set_h_dptr(dentry, bstart, NULL);
16141+ au_set_dbstart(dentry, bcpup);
16142+ au_set_dbend(dentry, bcpup);
16143+ }
16144+ AuDebugOn(add_entry && !au_h_dptr(dentry, bcpup));
16145+ }
4a4d8108 16146+
4f0767ce 16147+out:
4a4d8108 16148+ dput(parent);
dece6358
AM
16149+ return err;
16150+}
1facf9fc 16151+
1308ab2a 16152+/* ---------------------------------------------------------------------- */
16153+
4a4d8108 16154+struct dentry *au_pinned_h_parent(struct au_pin *pin)
1308ab2a 16155+{
4a4d8108
AM
16156+ if (pin && pin->parent)
16157+ return au_h_dptr(pin->parent, pin->bindex);
16158+ return NULL;
dece6358 16159+}
1facf9fc 16160+
4a4d8108 16161+void au_unpin(struct au_pin *p)
dece6358 16162+{
e49829fe 16163+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
4a4d8108
AM
16164+ mnt_drop_write(p->h_mnt);
16165+ if (!p->hdir)
16166+ return;
1facf9fc 16167+
4a4d8108
AM
16168+ au_hn_imtx_unlock(p->hdir);
16169+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16170+ di_read_unlock(p->parent, AuLock_IR);
16171+ iput(p->hdir->hi_inode);
16172+ dput(p->parent);
16173+ p->parent = NULL;
16174+ p->hdir = NULL;
16175+ p->h_mnt = NULL;
16176+}
1308ab2a 16177+
4a4d8108
AM
16178+int au_do_pin(struct au_pin *p)
16179+{
16180+ int err;
16181+ struct super_block *sb;
16182+ struct dentry *h_dentry, *h_parent;
16183+ struct au_branch *br;
16184+ struct inode *h_dir;
16185+
16186+ err = 0;
16187+ sb = p->dentry->d_sb;
16188+ br = au_sbr(sb, p->bindex);
16189+ if (IS_ROOT(p->dentry)) {
16190+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
16191+ p->h_mnt = br->br_mnt;
16192+ err = mnt_want_write(p->h_mnt);
16193+ if (unlikely(err)) {
16194+ au_fclr_pin(p->flags, MNT_WRITE);
16195+ goto out_err;
16196+ }
16197+ }
dece6358 16198+ goto out;
1facf9fc 16199+ }
16200+
4a4d8108
AM
16201+ h_dentry = NULL;
16202+ if (p->bindex <= au_dbend(p->dentry))
16203+ h_dentry = au_h_dptr(p->dentry, p->bindex);
dece6358 16204+
4a4d8108
AM
16205+ p->parent = dget_parent(p->dentry);
16206+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16207+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
dece6358 16208+
4a4d8108
AM
16209+ h_dir = NULL;
16210+ h_parent = au_h_dptr(p->parent, p->bindex);
16211+ p->hdir = au_hi(p->parent->d_inode, p->bindex);
16212+ if (p->hdir)
16213+ h_dir = p->hdir->hi_inode;
dece6358 16214+
b752ccd1
AM
16215+ /*
16216+ * udba case, or
16217+ * if DI_LOCKED is not set, then p->parent may be different
16218+ * and h_parent can be NULL.
16219+ */
16220+ if (unlikely(!p->hdir || !h_dir || !h_parent)) {
e49829fe 16221+ err = -EBUSY;
4a4d8108
AM
16222+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16223+ di_read_unlock(p->parent, AuLock_IR);
16224+ dput(p->parent);
16225+ p->parent = NULL;
16226+ goto out_err;
16227+ }
1308ab2a 16228+
4a4d8108
AM
16229+ au_igrab(h_dir);
16230+ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
1308ab2a 16231+
4a4d8108
AM
16232+ if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) {
16233+ err = -EBUSY;
16234+ goto out_unpin;
16235+ }
16236+ if (h_dentry) {
16237+ err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
16238+ if (unlikely(err)) {
16239+ au_fclr_pin(p->flags, MNT_WRITE);
16240+ goto out_unpin;
16241+ }
1facf9fc 16242+ }
dece6358 16243+
4a4d8108
AM
16244+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
16245+ p->h_mnt = br->br_mnt;
16246+ err = mnt_want_write(p->h_mnt);
dece6358 16247+ if (unlikely(err)) {
4a4d8108
AM
16248+ au_fclr_pin(p->flags, MNT_WRITE);
16249+ goto out_unpin;
dece6358
AM
16250+ }
16251+ }
4a4d8108
AM
16252+ goto out; /* success */
16253+
4f0767ce 16254+out_unpin:
4a4d8108 16255+ au_unpin(p);
4f0767ce 16256+out_err:
4a4d8108
AM
16257+ pr_err("err %d\n", err);
16258+ err = au_busy_or_stale();
4f0767ce 16259+out:
1facf9fc 16260+ return err;
16261+}
16262+
4a4d8108
AM
16263+void au_pin_init(struct au_pin *p, struct dentry *dentry,
16264+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
16265+ unsigned int udba, unsigned char flags)
16266+{
16267+ p->dentry = dentry;
16268+ p->udba = udba;
16269+ p->lsc_di = lsc_di;
16270+ p->lsc_hi = lsc_hi;
16271+ p->flags = flags;
16272+ p->bindex = bindex;
16273+
16274+ p->parent = NULL;
16275+ p->hdir = NULL;
16276+ p->h_mnt = NULL;
16277+}
16278+
16279+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
16280+ unsigned int udba, unsigned char flags)
16281+{
16282+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
16283+ udba, flags);
16284+ return au_do_pin(pin);
16285+}
16286+
dece6358
AM
16287+/* ---------------------------------------------------------------------- */
16288+
1308ab2a 16289+/*
4a4d8108
AM
16290+ * ->setattr() and ->getattr() are called in various cases.
16291+ * chmod, stat: dentry is revalidated.
16292+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
16293+ * unhashed.
16294+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
1308ab2a 16295+ */
027c5e7a 16296+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
4a4d8108 16297+static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
1facf9fc 16298+{
4a4d8108
AM
16299+ int err;
16300+ struct inode *inode;
16301+ struct dentry *parent;
1facf9fc 16302+
1308ab2a 16303+ err = 0;
4a4d8108 16304+ inode = dentry->d_inode;
027c5e7a 16305+ if (au_digen_test(dentry, sigen)) {
4a4d8108
AM
16306+ parent = dget_parent(dentry);
16307+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 16308+ err = au_refresh_dentry(dentry, parent);
4a4d8108
AM
16309+ di_read_unlock(parent, AuLock_IR);
16310+ dput(parent);
dece6358 16311+ }
1facf9fc 16312+
4a4d8108 16313+ AuTraceErr(err);
1308ab2a 16314+ return err;
16315+}
dece6358 16316+
4a4d8108
AM
16317+#define AuIcpup_DID_CPUP 1
16318+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
7f207e10
AM
16319+#define au_fset_icpup(flags, name) \
16320+ do { (flags) |= AuIcpup_##name; } while (0)
16321+#define au_fclr_icpup(flags, name) \
16322+ do { (flags) &= ~AuIcpup_##name; } while (0)
1308ab2a 16323+
4a4d8108
AM
16324+struct au_icpup_args {
16325+ unsigned char flags;
16326+ unsigned char pin_flags;
16327+ aufs_bindex_t btgt;
16328+ unsigned int udba;
16329+ struct au_pin pin;
16330+ struct path h_path;
16331+ struct inode *h_inode;
16332+};
1308ab2a 16333+
4a4d8108
AM
16334+static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
16335+ struct au_icpup_args *a)
1308ab2a 16336+{
16337+ int err;
4a4d8108 16338+ loff_t sz;
e49829fe 16339+ aufs_bindex_t bstart, ibstart;
4a4d8108
AM
16340+ struct dentry *hi_wh, *parent;
16341+ struct inode *inode;
16342+ struct file *h_file;
16343+ struct au_wr_dir_args wr_dir_args = {
16344+ .force_btgt = -1,
16345+ .flags = 0
16346+ };
16347+
16348+ bstart = au_dbstart(dentry);
16349+ inode = dentry->d_inode;
16350+ if (S_ISDIR(inode->i_mode))
16351+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
16352+ /* plink or hi_wh() case */
e49829fe 16353+ ibstart = au_ibstart(inode);
027c5e7a 16354+ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
e49829fe 16355+ wr_dir_args.force_btgt = ibstart;
4a4d8108
AM
16356+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
16357+ if (unlikely(err < 0))
16358+ goto out;
16359+ a->btgt = err;
16360+ if (err != bstart)
16361+ au_fset_icpup(a->flags, DID_CPUP);
16362+
16363+ err = 0;
16364+ a->pin_flags = AuPin_MNT_WRITE;
16365+ parent = NULL;
16366+ if (!IS_ROOT(dentry)) {
16367+ au_fset_pin(a->pin_flags, DI_LOCKED);
16368+ parent = dget_parent(dentry);
16369+ di_write_lock_parent(parent);
16370+ }
16371+
16372+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
16373+ if (unlikely(err))
16374+ goto out_parent;
16375+
16376+ a->h_path.dentry = au_h_dptr(dentry, bstart);
16377+ a->h_inode = a->h_path.dentry->d_inode;
16378+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
16379+ sz = -1;
16380+ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
16381+ sz = ia->ia_size;
16382+
16383+ h_file = NULL;
16384+ hi_wh = NULL;
027c5e7a 16385+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
4a4d8108
AM
16386+ hi_wh = au_hi_wh(inode, a->btgt);
16387+ if (!hi_wh) {
16388+ err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
16389+ if (unlikely(err))
16390+ goto out_unlock;
16391+ hi_wh = au_hi_wh(inode, a->btgt);
16392+ /* todo: revalidate hi_wh? */
16393+ }
16394+ }
16395+
16396+ if (parent) {
16397+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
16398+ di_downgrade_lock(parent, AuLock_IR);
16399+ dput(parent);
16400+ parent = NULL;
16401+ }
16402+ if (!au_ftest_icpup(a->flags, DID_CPUP))
16403+ goto out; /* success */
16404+
16405+ if (!d_unhashed(dentry)) {
16406+ h_file = au_h_open_pre(dentry, bstart);
16407+ if (IS_ERR(h_file)) {
16408+ err = PTR_ERR(h_file);
16409+ h_file = NULL;
16410+ } else
16411+ err = au_sio_cpup_simple(dentry, a->btgt, sz,
16412+ AuCpup_DTIME);
16413+ if (!err)
16414+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
16415+ } else if (!hi_wh)
16416+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
16417+ else
16418+ a->h_path.dentry = hi_wh; /* do not dget here */
1308ab2a 16419+
4f0767ce 16420+out_unlock:
4a4d8108
AM
16421+ mutex_unlock(&a->h_inode->i_mutex);
16422+ au_h_open_post(dentry, bstart, h_file);
16423+ a->h_inode = a->h_path.dentry->d_inode;
dece6358 16424+ if (!err) {
4a4d8108 16425+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
dece6358 16426+ goto out; /* success */
1facf9fc 16427+ }
dece6358 16428+
4a4d8108 16429+ au_unpin(&a->pin);
4f0767ce 16430+out_parent:
4a4d8108
AM
16431+ if (parent) {
16432+ di_write_unlock(parent);
16433+ dput(parent);
16434+ }
4f0767ce 16435+out:
1facf9fc 16436+ return err;
16437+}
16438+
4a4d8108 16439+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
1facf9fc 16440+{
4a4d8108
AM
16441+ int err;
16442+ struct inode *inode;
16443+ struct super_block *sb;
16444+ struct file *file;
16445+ struct au_icpup_args *a;
1facf9fc 16446+
4a4d8108
AM
16447+ inode = dentry->d_inode;
16448+ IMustLock(inode);
dece6358 16449+
4a4d8108
AM
16450+ err = -ENOMEM;
16451+ a = kzalloc(sizeof(*a), GFP_NOFS);
16452+ if (unlikely(!a))
16453+ goto out;
1facf9fc 16454+
4a4d8108
AM
16455+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
16456+ ia->ia_valid &= ~ATTR_MODE;
dece6358 16457+
4a4d8108
AM
16458+ file = NULL;
16459+ sb = dentry->d_sb;
e49829fe
JR
16460+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
16461+ if (unlikely(err))
16462+ goto out_kfree;
16463+
4a4d8108
AM
16464+ if (ia->ia_valid & ATTR_FILE) {
16465+ /* currently ftruncate(2) only */
16466+ AuDebugOn(!S_ISREG(inode->i_mode));
16467+ file = ia->ia_file;
16468+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
16469+ if (unlikely(err))
16470+ goto out_si;
16471+ ia->ia_file = au_hf_top(file);
16472+ a->udba = AuOpt_UDBA_NONE;
16473+ } else {
16474+ /* fchmod() doesn't pass ia_file */
16475+ a->udba = au_opt_udba(sb);
027c5e7a
AM
16476+ di_write_lock_child(dentry);
16477+ /* no d_unlinked(), to set UDBA_NONE for root */
4a4d8108
AM
16478+ if (d_unhashed(dentry))
16479+ a->udba = AuOpt_UDBA_NONE;
4a4d8108
AM
16480+ if (a->udba != AuOpt_UDBA_NONE) {
16481+ AuDebugOn(IS_ROOT(dentry));
16482+ err = au_reval_for_attr(dentry, au_sigen(sb));
16483+ if (unlikely(err))
16484+ goto out_dentry;
16485+ }
dece6358 16486+ }
dece6358 16487+
4a4d8108
AM
16488+ err = au_pin_and_icpup(dentry, ia, a);
16489+ if (unlikely(err < 0))
16490+ goto out_dentry;
16491+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
16492+ ia->ia_file = NULL;
16493+ ia->ia_valid &= ~ATTR_FILE;
1308ab2a 16494+ }
dece6358 16495+
4a4d8108
AM
16496+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
16497+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
16498+ == (ATTR_MODE | ATTR_CTIME)) {
16499+ err = security_path_chmod(a->h_path.dentry, a->h_path.mnt,
16500+ ia->ia_mode);
16501+ if (unlikely(err))
16502+ goto out_unlock;
16503+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
16504+ && (ia->ia_valid & ATTR_CTIME)) {
16505+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
16506+ if (unlikely(err))
16507+ goto out_unlock;
16508+ }
dece6358 16509+
4a4d8108
AM
16510+ if (ia->ia_valid & ATTR_SIZE) {
16511+ struct file *f;
1308ab2a 16512+
953406b4 16513+ if (ia->ia_size < i_size_read(inode))
4a4d8108 16514+ /* unmap only */
953406b4 16515+ truncate_setsize(inode, ia->ia_size);
1308ab2a 16516+
4a4d8108
AM
16517+ f = NULL;
16518+ if (ia->ia_valid & ATTR_FILE)
16519+ f = ia->ia_file;
16520+ mutex_unlock(&a->h_inode->i_mutex);
16521+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
16522+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
16523+ } else
16524+ err = vfsub_notify_change(&a->h_path, ia);
16525+ if (!err)
16526+ au_cpup_attr_changeable(inode);
1308ab2a 16527+
4f0767ce 16528+out_unlock:
4a4d8108
AM
16529+ mutex_unlock(&a->h_inode->i_mutex);
16530+ au_unpin(&a->pin);
027c5e7a
AM
16531+ if (unlikely(err))
16532+ au_update_dbstart(dentry);
4f0767ce 16533+out_dentry:
4a4d8108
AM
16534+ di_write_unlock(dentry);
16535+ if (file) {
16536+ fi_write_unlock(file);
16537+ ia->ia_file = file;
16538+ ia->ia_valid |= ATTR_FILE;
16539+ }
4f0767ce 16540+out_si:
4a4d8108 16541+ si_read_unlock(sb);
e49829fe 16542+out_kfree:
4a4d8108 16543+ kfree(a);
4f0767ce 16544+out:
4a4d8108
AM
16545+ AuTraceErr(err);
16546+ return err;
1facf9fc 16547+}
16548+
4a4d8108
AM
16549+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
16550+ unsigned int nlink)
1facf9fc 16551+{
9dbd164d
AM
16552+ unsigned int n;
16553+
4a4d8108
AM
16554+ inode->i_mode = st->mode;
16555+ inode->i_uid = st->uid;
16556+ inode->i_gid = st->gid;
16557+ inode->i_atime = st->atime;
16558+ inode->i_mtime = st->mtime;
16559+ inode->i_ctime = st->ctime;
1facf9fc 16560+
4a4d8108
AM
16561+ au_cpup_attr_nlink(inode, /*force*/0);
16562+ if (S_ISDIR(inode->i_mode)) {
9dbd164d
AM
16563+ n = inode->i_nlink;
16564+ n -= nlink;
16565+ n += st->nlink;
16566+ set_nlink(inode, n);
4a4d8108 16567+ }
1facf9fc 16568+
4a4d8108
AM
16569+ spin_lock(&inode->i_lock);
16570+ inode->i_blocks = st->blocks;
16571+ i_size_write(inode, st->size);
16572+ spin_unlock(&inode->i_lock);
1facf9fc 16573+}
16574+
4a4d8108
AM
16575+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
16576+ struct dentry *dentry, struct kstat *st)
1facf9fc 16577+{
4a4d8108
AM
16578+ int err;
16579+ unsigned int mnt_flags;
16580+ aufs_bindex_t bindex;
16581+ unsigned char udba_none, positive;
16582+ struct super_block *sb, *h_sb;
16583+ struct inode *inode;
16584+ struct vfsmount *h_mnt;
16585+ struct dentry *h_dentry;
1facf9fc 16586+
4a4d8108
AM
16587+ sb = dentry->d_sb;
16588+ inode = dentry->d_inode;
7f207e10
AM
16589+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
16590+ if (unlikely(err))
16591+ goto out;
4a4d8108
AM
16592+ mnt_flags = au_mntflags(sb);
16593+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
1facf9fc 16594+
4a4d8108 16595+ /* support fstat(2) */
027c5e7a 16596+ if (!d_unlinked(dentry) && !udba_none) {
4a4d8108 16597+ unsigned int sigen = au_sigen(sb);
027c5e7a
AM
16598+ err = au_digen_test(dentry, sigen);
16599+ if (!err) {
4a4d8108 16600+ di_read_lock_child(dentry, AuLock_IR);
027c5e7a
AM
16601+ err = au_dbrange_test(dentry);
16602+ if (unlikely(err))
16603+ goto out_unlock;
16604+ } else {
4a4d8108
AM
16605+ AuDebugOn(IS_ROOT(dentry));
16606+ di_write_lock_child(dentry);
027c5e7a
AM
16607+ err = au_dbrange_test(dentry);
16608+ if (!err)
16609+ err = au_reval_for_attr(dentry, sigen);
4a4d8108
AM
16610+ di_downgrade_lock(dentry, AuLock_IR);
16611+ if (unlikely(err))
7f207e10 16612+ goto out_unlock;
4a4d8108
AM
16613+ }
16614+ } else
16615+ di_read_lock_child(dentry, AuLock_IR);
1facf9fc 16616+
4a4d8108
AM
16617+ bindex = au_ibstart(inode);
16618+ h_mnt = au_sbr_mnt(sb, bindex);
16619+ h_sb = h_mnt->mnt_sb;
16620+ if (!au_test_fs_bad_iattr(h_sb) && udba_none)
16621+ goto out_fill; /* success */
1facf9fc 16622+
4a4d8108
AM
16623+ h_dentry = NULL;
16624+ if (au_dbstart(dentry) == bindex)
16625+ h_dentry = dget(au_h_dptr(dentry, bindex));
16626+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
16627+ h_dentry = au_plink_lkup(inode, bindex);
16628+ if (IS_ERR(h_dentry))
16629+ goto out_fill; /* pretending success */
16630+ }
16631+ /* illegally overlapped or something */
16632+ if (unlikely(!h_dentry))
16633+ goto out_fill; /* pretending success */
16634+
16635+ positive = !!h_dentry->d_inode;
16636+ if (positive)
16637+ err = vfs_getattr(h_mnt, h_dentry, st);
16638+ dput(h_dentry);
16639+ if (!err) {
16640+ if (positive)
16641+ au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
16642+ goto out_fill; /* success */
1facf9fc 16643+ }
7f207e10
AM
16644+ AuTraceErr(err);
16645+ goto out_unlock;
4a4d8108 16646+
4f0767ce 16647+out_fill:
4a4d8108 16648+ generic_fillattr(inode, st);
7f207e10 16649+out_unlock:
4a4d8108
AM
16650+ di_read_unlock(dentry, AuLock_IR);
16651+ si_read_unlock(sb);
7f207e10
AM
16652+out:
16653+ AuTraceErr(err);
4a4d8108 16654+ return err;
1facf9fc 16655+}
16656+
16657+/* ---------------------------------------------------------------------- */
16658+
4a4d8108
AM
16659+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
16660+ int bufsiz)
1facf9fc 16661+{
16662+ int err;
4a4d8108
AM
16663+ struct super_block *sb;
16664+ struct dentry *h_dentry;
1facf9fc 16665+
4a4d8108
AM
16666+ err = -EINVAL;
16667+ h_dentry = au_h_dptr(dentry, bindex);
16668+ if (unlikely(!h_dentry->d_inode->i_op->readlink))
16669+ goto out;
1facf9fc 16670+
4a4d8108
AM
16671+ err = security_inode_readlink(h_dentry);
16672+ if (unlikely(err))
dece6358 16673+ goto out;
1facf9fc 16674+
4a4d8108
AM
16675+ sb = dentry->d_sb;
16676+ if (!au_test_ro(sb, bindex, dentry->d_inode)) {
16677+ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
16678+ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
1facf9fc 16679+ }
4a4d8108 16680+ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
1facf9fc 16681+
4f0767ce 16682+out:
4a4d8108
AM
16683+ return err;
16684+}
1facf9fc 16685+
4a4d8108
AM
16686+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
16687+{
16688+ int err;
1facf9fc 16689+
027c5e7a
AM
16690+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
16691+ if (unlikely(err))
16692+ goto out;
16693+ err = au_d_hashed_positive(dentry);
16694+ if (!err)
16695+ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
4a4d8108 16696+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 16697+
027c5e7a 16698+out:
4a4d8108
AM
16699+ return err;
16700+}
1facf9fc 16701+
4a4d8108
AM
16702+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
16703+{
16704+ int err;
4a4d8108 16705+ mm_segment_t old_fs;
b752ccd1
AM
16706+ union {
16707+ char *k;
16708+ char __user *u;
16709+ } buf;
1facf9fc 16710+
4a4d8108 16711+ err = -ENOMEM;
b752ccd1
AM
16712+ buf.k = __getname_gfp(GFP_NOFS);
16713+ if (unlikely(!buf.k))
4a4d8108 16714+ goto out;
1facf9fc 16715+
027c5e7a
AM
16716+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
16717+ if (unlikely(err))
16718+ goto out_name;
16719+
16720+ err = au_d_hashed_positive(dentry);
16721+ if (!err) {
16722+ old_fs = get_fs();
16723+ set_fs(KERNEL_DS);
16724+ err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX);
16725+ set_fs(old_fs);
16726+ }
4a4d8108 16727+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 16728+
4a4d8108 16729+ if (err >= 0) {
b752ccd1 16730+ buf.k[err] = 0;
4a4d8108 16731+ /* will be freed by put_link */
b752ccd1 16732+ nd_set_link(nd, buf.k);
4a4d8108 16733+ return NULL; /* success */
1308ab2a 16734+ }
1facf9fc 16735+
027c5e7a
AM
16736+out_name:
16737+ __putname(buf.k);
4f0767ce 16738+out:
4a4d8108
AM
16739+ path_put(&nd->path);
16740+ AuTraceErr(err);
16741+ return ERR_PTR(err);
16742+}
1facf9fc 16743+
4a4d8108
AM
16744+static void aufs_put_link(struct dentry *dentry __maybe_unused,
16745+ struct nameidata *nd, void *cookie __maybe_unused)
16746+{
16747+ __putname(nd_get_link(nd));
16748+}
1facf9fc 16749+
4a4d8108 16750+/* ---------------------------------------------------------------------- */
1facf9fc 16751+
4a4d8108
AM
16752+static void aufs_truncate_range(struct inode *inode __maybe_unused,
16753+ loff_t start __maybe_unused,
16754+ loff_t end __maybe_unused)
16755+{
16756+ AuUnsupport();
16757+}
1facf9fc 16758+
4a4d8108 16759+/* ---------------------------------------------------------------------- */
1308ab2a 16760+
4a4d8108
AM
16761+struct inode_operations aufs_symlink_iop = {
16762+ .permission = aufs_permission,
16763+ .setattr = aufs_setattr,
16764+ .getattr = aufs_getattr,
16765+ .readlink = aufs_readlink,
16766+ .follow_link = aufs_follow_link,
16767+ .put_link = aufs_put_link
16768+};
16769+
16770+struct inode_operations aufs_dir_iop = {
16771+ .create = aufs_create,
16772+ .lookup = aufs_lookup,
16773+ .link = aufs_link,
16774+ .unlink = aufs_unlink,
16775+ .symlink = aufs_symlink,
16776+ .mkdir = aufs_mkdir,
16777+ .rmdir = aufs_rmdir,
16778+ .mknod = aufs_mknod,
16779+ .rename = aufs_rename,
16780+
16781+ .permission = aufs_permission,
16782+ .setattr = aufs_setattr,
16783+ .getattr = aufs_getattr
16784+};
16785+
16786+struct inode_operations aufs_iop = {
16787+ .permission = aufs_permission,
16788+ .setattr = aufs_setattr,
16789+ .getattr = aufs_getattr,
16790+ .truncate_range = aufs_truncate_range
16791+};
7f207e10
AM
16792diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
16793--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 16794+++ linux/fs/aufs/i_op_del.c 2012-02-13 21:54:56.969771692 +0100
53392da6 16795@@ -0,0 +1,478 @@
1facf9fc 16796+/*
f6c5ef8b 16797+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 16798+ *
16799+ * This program, aufs is free software; you can redistribute it and/or modify
16800+ * it under the terms of the GNU General Public License as published by
16801+ * the Free Software Foundation; either version 2 of the License, or
16802+ * (at your option) any later version.
dece6358
AM
16803+ *
16804+ * This program is distributed in the hope that it will be useful,
16805+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16806+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16807+ * GNU General Public License for more details.
16808+ *
16809+ * You should have received a copy of the GNU General Public License
16810+ * along with this program; if not, write to the Free Software
16811+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 16812+ */
16813+
16814+/*
4a4d8108 16815+ * inode operations (del entry)
1308ab2a 16816+ */
dece6358 16817+
1308ab2a 16818+#include "aufs.h"
dece6358 16819+
4a4d8108
AM
16820+/*
16821+ * decide if a new whiteout for @dentry is necessary or not.
16822+ * when it is necessary, prepare the parent dir for the upper branch whose
16823+ * branch index is @bcpup for creation. the actual creation of the whiteout will
16824+ * be done by caller.
16825+ * return value:
16826+ * 0: wh is unnecessary
16827+ * plus: wh is necessary
16828+ * minus: error
16829+ */
16830+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1308ab2a 16831+{
4a4d8108
AM
16832+ int need_wh, err;
16833+ aufs_bindex_t bstart;
16834+ struct super_block *sb;
dece6358 16835+
4a4d8108
AM
16836+ sb = dentry->d_sb;
16837+ bstart = au_dbstart(dentry);
16838+ if (*bcpup < 0) {
16839+ *bcpup = bstart;
16840+ if (au_test_ro(sb, bstart, dentry->d_inode)) {
16841+ err = AuWbrCopyup(au_sbi(sb), dentry);
16842+ *bcpup = err;
16843+ if (unlikely(err < 0))
16844+ goto out;
16845+ }
16846+ } else
16847+ AuDebugOn(bstart < *bcpup
16848+ || au_test_ro(sb, *bcpup, dentry->d_inode));
16849+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
1308ab2a 16850+
4a4d8108
AM
16851+ if (*bcpup != bstart) {
16852+ err = au_cpup_dirs(dentry, *bcpup);
16853+ if (unlikely(err))
16854+ goto out;
16855+ need_wh = 1;
16856+ } else {
027c5e7a 16857+ struct au_dinfo *dinfo, *tmp;
4a4d8108 16858+
027c5e7a
AM
16859+ need_wh = -ENOMEM;
16860+ dinfo = au_di(dentry);
16861+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
16862+ if (tmp) {
16863+ au_di_cp(tmp, dinfo);
16864+ au_di_swap(tmp, dinfo);
16865+ /* returns the number of positive dentries */
16866+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
16867+ /*nd*/NULL);
16868+ au_di_swap(tmp, dinfo);
16869+ au_rw_write_unlock(&tmp->di_rwsem);
16870+ au_di_free(tmp);
4a4d8108
AM
16871+ }
16872+ }
16873+ AuDbg("need_wh %d\n", need_wh);
16874+ err = need_wh;
16875+
4f0767ce 16876+out:
4a4d8108 16877+ return err;
1facf9fc 16878+}
16879+
4a4d8108
AM
16880+/*
16881+ * simple tests for the del-entry operations.
16882+ * following the checks in vfs, plus the parent-child relationship.
16883+ */
16884+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
16885+ struct dentry *h_parent, int isdir)
1facf9fc 16886+{
4a4d8108
AM
16887+ int err;
16888+ umode_t h_mode;
16889+ struct dentry *h_dentry, *h_latest;
1308ab2a 16890+ struct inode *h_inode;
1facf9fc 16891+
4a4d8108
AM
16892+ h_dentry = au_h_dptr(dentry, bindex);
16893+ h_inode = h_dentry->d_inode;
16894+ if (dentry->d_inode) {
16895+ err = -ENOENT;
16896+ if (unlikely(!h_inode || !h_inode->i_nlink))
16897+ goto out;
1facf9fc 16898+
4a4d8108
AM
16899+ h_mode = h_inode->i_mode;
16900+ if (!isdir) {
16901+ err = -EISDIR;
16902+ if (unlikely(S_ISDIR(h_mode)))
16903+ goto out;
16904+ } else if (unlikely(!S_ISDIR(h_mode))) {
16905+ err = -ENOTDIR;
16906+ goto out;
16907+ }
16908+ } else {
16909+ /* rename(2) case */
16910+ err = -EIO;
16911+ if (unlikely(h_inode))
16912+ goto out;
16913+ }
1facf9fc 16914+
4a4d8108
AM
16915+ err = -ENOENT;
16916+ /* expected parent dir is locked */
16917+ if (unlikely(h_parent != h_dentry->d_parent))
16918+ goto out;
16919+ err = 0;
16920+
16921+ /*
16922+ * rmdir a dir may break the consistency on some filesystem.
16923+ * let's try heavy test.
16924+ */
16925+ err = -EACCES;
16926+ if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
16927+ goto out;
16928+
16929+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
16930+ au_sbr(dentry->d_sb, bindex));
16931+ err = -EIO;
16932+ if (IS_ERR(h_latest))
16933+ goto out;
16934+ if (h_latest == h_dentry)
16935+ err = 0;
16936+ dput(h_latest);
16937+
4f0767ce 16938+out:
4a4d8108 16939+ return err;
1308ab2a 16940+}
1facf9fc 16941+
4a4d8108
AM
16942+/*
16943+ * decide the branch where we operate for @dentry. the branch index will be set
16944+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
16945+ * dir for reverting.
16946+ * when a new whiteout is necessary, create it.
16947+ */
16948+static struct dentry*
16949+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
16950+ struct au_dtime *dt, struct au_pin *pin)
1308ab2a 16951+{
4a4d8108
AM
16952+ struct dentry *wh_dentry;
16953+ struct super_block *sb;
16954+ struct path h_path;
16955+ int err, need_wh;
16956+ unsigned int udba;
16957+ aufs_bindex_t bcpup;
dece6358 16958+
4a4d8108
AM
16959+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
16960+ wh_dentry = ERR_PTR(need_wh);
16961+ if (unlikely(need_wh < 0))
16962+ goto out;
16963+
16964+ sb = dentry->d_sb;
16965+ udba = au_opt_udba(sb);
16966+ bcpup = *rbcpup;
16967+ err = au_pin(pin, dentry, bcpup, udba,
16968+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
16969+ wh_dentry = ERR_PTR(err);
16970+ if (unlikely(err))
16971+ goto out;
16972+
16973+ h_path.dentry = au_pinned_h_parent(pin);
16974+ if (udba != AuOpt_UDBA_NONE
16975+ && au_dbstart(dentry) == bcpup) {
16976+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
16977+ wh_dentry = ERR_PTR(err);
16978+ if (unlikely(err))
16979+ goto out_unpin;
16980+ }
16981+
16982+ h_path.mnt = au_sbr_mnt(sb, bcpup);
16983+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
16984+ wh_dentry = NULL;
16985+ if (!need_wh)
16986+ goto out; /* success, no need to create whiteout */
16987+
16988+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
16989+ if (IS_ERR(wh_dentry))
16990+ goto out_unpin;
16991+
16992+ /* returns with the parent is locked and wh_dentry is dget-ed */
16993+ goto out; /* success */
16994+
4f0767ce 16995+out_unpin:
4a4d8108 16996+ au_unpin(pin);
4f0767ce 16997+out:
4a4d8108 16998+ return wh_dentry;
1facf9fc 16999+}
17000+
4a4d8108
AM
17001+/*
17002+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
17003+ * in order to be revertible and save time for removing many child whiteouts
17004+ * under the dir.
17005+ * returns 1 when there are too many child whiteout and caller should remove
17006+ * them asynchronously. returns 0 when the number of children is enough small to
17007+ * remove now or the branch fs is a remote fs.
17008+ * otherwise return an error.
17009+ */
17010+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
17011+ struct au_nhash *whlist, struct inode *dir)
1facf9fc 17012+{
4a4d8108
AM
17013+ int rmdir_later, err, dirwh;
17014+ struct dentry *h_dentry;
17015+ struct super_block *sb;
17016+
17017+ sb = dentry->d_sb;
17018+ SiMustAnyLock(sb);
17019+ h_dentry = au_h_dptr(dentry, bindex);
17020+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
17021+ if (unlikely(err))
17022+ goto out;
17023+
17024+ /* stop monitoring */
17025+ au_hn_free(au_hi(dentry->d_inode, bindex));
17026+
17027+ if (!au_test_fs_remote(h_dentry->d_sb)) {
17028+ dirwh = au_sbi(sb)->si_dirwh;
17029+ rmdir_later = (dirwh <= 1);
17030+ if (!rmdir_later)
17031+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
17032+ dirwh);
17033+ if (rmdir_later)
17034+ return rmdir_later;
17035+ }
1facf9fc 17036+
4a4d8108
AM
17037+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
17038+ if (unlikely(err)) {
17039+ AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
17040+ AuDLNPair(h_dentry), bindex, err);
17041+ err = 0;
17042+ }
dece6358 17043+
4f0767ce 17044+out:
4a4d8108
AM
17045+ AuTraceErr(err);
17046+ return err;
17047+}
1308ab2a 17048+
4a4d8108
AM
17049+/*
17050+ * final procedure for deleting a entry.
17051+ * maintain dentry and iattr.
17052+ */
17053+static void epilog(struct inode *dir, struct dentry *dentry,
17054+ aufs_bindex_t bindex)
17055+{
17056+ struct inode *inode;
1308ab2a 17057+
4a4d8108
AM
17058+ inode = dentry->d_inode;
17059+ d_drop(dentry);
17060+ inode->i_ctime = dir->i_ctime;
1308ab2a 17061+
4a4d8108
AM
17062+ if (au_ibstart(dir) == bindex)
17063+ au_cpup_attr_timesizes(dir);
17064+ dir->i_version++;
1facf9fc 17065+}
17066+
4a4d8108
AM
17067+/*
17068+ * when an error happened, remove the created whiteout and revert everything.
17069+ */
7f207e10
AM
17070+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
17071+ aufs_bindex_t bwh, struct dentry *wh_dentry,
17072+ struct dentry *dentry, struct au_dtime *dt)
1facf9fc 17073+{
4a4d8108
AM
17074+ int rerr;
17075+ struct path h_path = {
17076+ .dentry = wh_dentry,
7f207e10 17077+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
4a4d8108 17078+ };
dece6358 17079+
7f207e10 17080+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
4a4d8108
AM
17081+ if (!rerr) {
17082+ au_set_dbwh(dentry, bwh);
17083+ au_dtime_revert(dt);
17084+ return 0;
17085+ }
dece6358 17086+
4a4d8108
AM
17087+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
17088+ AuDLNPair(dentry), err, rerr);
17089+ return -EIO;
1facf9fc 17090+}
17091+
4a4d8108 17092+/* ---------------------------------------------------------------------- */
1facf9fc 17093+
4a4d8108 17094+int aufs_unlink(struct inode *dir, struct dentry *dentry)
1308ab2a 17095+{
4a4d8108
AM
17096+ int err;
17097+ aufs_bindex_t bwh, bindex, bstart;
17098+ struct au_dtime dt;
17099+ struct au_pin pin;
17100+ struct path h_path;
17101+ struct inode *inode, *h_dir;
17102+ struct dentry *parent, *wh_dentry;
1facf9fc 17103+
4a4d8108 17104+ IMustLock(dir);
027c5e7a
AM
17105+
17106+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
17107+ if (unlikely(err))
17108+ goto out;
17109+ err = au_d_hashed_positive(dentry);
17110+ if (unlikely(err))
17111+ goto out_unlock;
4a4d8108 17112+ inode = dentry->d_inode;
4a4d8108 17113+ IMustLock(inode);
027c5e7a
AM
17114+ err = -EISDIR;
17115+ if (unlikely(S_ISDIR(inode->i_mode)))
17116+ goto out_unlock; /* possible? */
1facf9fc 17117+
4a4d8108
AM
17118+ bstart = au_dbstart(dentry);
17119+ bwh = au_dbwh(dentry);
17120+ bindex = -1;
027c5e7a
AM
17121+ parent = dentry->d_parent; /* dir inode is locked */
17122+ di_write_lock_parent(parent);
4a4d8108
AM
17123+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
17124+ err = PTR_ERR(wh_dentry);
17125+ if (IS_ERR(wh_dentry))
027c5e7a 17126+ goto out_parent;
1facf9fc 17127+
4a4d8108
AM
17128+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
17129+ h_path.dentry = au_h_dptr(dentry, bstart);
17130+ dget(h_path.dentry);
17131+ if (bindex == bstart) {
17132+ h_dir = au_pinned_h_dir(&pin);
17133+ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
17134+ } else {
17135+ /* dir inode is locked */
17136+ h_dir = wh_dentry->d_parent->d_inode;
17137+ IMustLock(h_dir);
17138+ err = 0;
17139+ }
dece6358 17140+
4a4d8108 17141+ if (!err) {
7f207e10 17142+ vfsub_drop_nlink(inode);
4a4d8108
AM
17143+ epilog(dir, dentry, bindex);
17144+
17145+ /* update target timestamps */
17146+ if (bindex == bstart) {
17147+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
17148+ inode->i_ctime = h_path.dentry->d_inode->i_ctime;
17149+ } else
17150+ /* todo: this timestamp may be reverted later */
17151+ inode->i_ctime = h_dir->i_ctime;
027c5e7a 17152+ goto out_unpin; /* success */
1facf9fc 17153+ }
17154+
4a4d8108
AM
17155+ /* revert */
17156+ if (wh_dentry) {
17157+ int rerr;
17158+
7f207e10 17159+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
4a4d8108
AM
17160+ if (rerr)
17161+ err = rerr;
dece6358 17162+ }
1facf9fc 17163+
027c5e7a 17164+out_unpin:
4a4d8108
AM
17165+ au_unpin(&pin);
17166+ dput(wh_dentry);
17167+ dput(h_path.dentry);
027c5e7a 17168+out_parent:
4a4d8108 17169+ di_write_unlock(parent);
027c5e7a 17170+out_unlock:
4a4d8108 17171+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 17172+out:
4a4d8108 17173+ return err;
dece6358
AM
17174+}
17175+
4a4d8108 17176+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
1308ab2a 17177+{
4a4d8108
AM
17178+ int err, rmdir_later;
17179+ aufs_bindex_t bwh, bindex, bstart;
17180+ struct au_dtime dt;
17181+ struct au_pin pin;
17182+ struct inode *inode;
17183+ struct dentry *parent, *wh_dentry, *h_dentry;
17184+ struct au_whtmp_rmdir *args;
1facf9fc 17185+
4a4d8108 17186+ IMustLock(dir);
027c5e7a
AM
17187+
17188+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
17189+ if (unlikely(err))
4a4d8108 17190+ goto out;
53392da6
AM
17191+ err = au_alive_dir(dentry);
17192+ if (unlikely(err))
027c5e7a 17193+ goto out_unlock;
53392da6 17194+ inode = dentry->d_inode;
4a4d8108 17195+ IMustLock(inode);
027c5e7a
AM
17196+ err = -ENOTDIR;
17197+ if (unlikely(!S_ISDIR(inode->i_mode)))
17198+ goto out_unlock; /* possible? */
dece6358 17199+
4a4d8108
AM
17200+ err = -ENOMEM;
17201+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
17202+ if (unlikely(!args))
17203+ goto out_unlock;
dece6358 17204+
4a4d8108
AM
17205+ parent = dentry->d_parent; /* dir inode is locked */
17206+ di_write_lock_parent(parent);
17207+ err = au_test_empty(dentry, &args->whlist);
17208+ if (unlikely(err))
027c5e7a 17209+ goto out_parent;
1facf9fc 17210+
4a4d8108
AM
17211+ bstart = au_dbstart(dentry);
17212+ bwh = au_dbwh(dentry);
17213+ bindex = -1;
17214+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
17215+ err = PTR_ERR(wh_dentry);
17216+ if (IS_ERR(wh_dentry))
027c5e7a 17217+ goto out_parent;
1facf9fc 17218+
4a4d8108
AM
17219+ h_dentry = au_h_dptr(dentry, bstart);
17220+ dget(h_dentry);
17221+ rmdir_later = 0;
17222+ if (bindex == bstart) {
17223+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
17224+ if (err > 0) {
17225+ rmdir_later = err;
17226+ err = 0;
17227+ }
17228+ } else {
17229+ /* stop monitoring */
17230+ au_hn_free(au_hi(inode, bstart));
17231+
17232+ /* dir inode is locked */
17233+ IMustLock(wh_dentry->d_parent->d_inode);
1facf9fc 17234+ err = 0;
17235+ }
17236+
4a4d8108 17237+ if (!err) {
027c5e7a 17238+ vfsub_dead_dir(inode);
4a4d8108
AM
17239+ au_set_dbdiropq(dentry, -1);
17240+ epilog(dir, dentry, bindex);
1308ab2a 17241+
4a4d8108
AM
17242+ if (rmdir_later) {
17243+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
17244+ args = NULL;
17245+ }
1308ab2a 17246+
4a4d8108 17247+ goto out_unpin; /* success */
1facf9fc 17248+ }
17249+
4a4d8108
AM
17250+ /* revert */
17251+ AuLabel(revert);
17252+ if (wh_dentry) {
17253+ int rerr;
1308ab2a 17254+
7f207e10 17255+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
4a4d8108
AM
17256+ if (rerr)
17257+ err = rerr;
1facf9fc 17258+ }
17259+
4f0767ce 17260+out_unpin:
4a4d8108
AM
17261+ au_unpin(&pin);
17262+ dput(wh_dentry);
17263+ dput(h_dentry);
027c5e7a 17264+out_parent:
4a4d8108
AM
17265+ di_write_unlock(parent);
17266+ if (args)
17267+ au_whtmp_rmdir_free(args);
4f0767ce 17268+out_unlock:
4a4d8108 17269+ aufs_read_unlock(dentry, AuLock_DW);
4f0767ce 17270+out:
4a4d8108
AM
17271+ AuTraceErr(err);
17272+ return err;
dece6358 17273+}
7f207e10
AM
17274diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
17275--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 17276+++ linux/fs/aufs/i_op_ren.c 2012-02-13 21:54:56.969771692 +0100
027c5e7a 17277@@ -0,0 +1,1017 @@
1facf9fc 17278+/*
f6c5ef8b 17279+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 17280+ *
17281+ * This program, aufs is free software; you can redistribute it and/or modify
17282+ * it under the terms of the GNU General Public License as published by
17283+ * the Free Software Foundation; either version 2 of the License, or
17284+ * (at your option) any later version.
dece6358
AM
17285+ *
17286+ * This program is distributed in the hope that it will be useful,
17287+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17288+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17289+ * GNU General Public License for more details.
17290+ *
17291+ * You should have received a copy of the GNU General Public License
17292+ * along with this program; if not, write to the Free Software
17293+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 17294+ */
17295+
17296+/*
4a4d8108
AM
17297+ * inode operation (rename entry)
17298+ * todo: this is crazy monster
1facf9fc 17299+ */
17300+
17301+#include "aufs.h"
17302+
4a4d8108
AM
17303+enum { AuSRC, AuDST, AuSrcDst };
17304+enum { AuPARENT, AuCHILD, AuParentChild };
1facf9fc 17305+
4a4d8108
AM
17306+#define AuRen_ISDIR 1
17307+#define AuRen_ISSAMEDIR (1 << 1)
17308+#define AuRen_WHSRC (1 << 2)
17309+#define AuRen_WHDST (1 << 3)
17310+#define AuRen_MNT_WRITE (1 << 4)
17311+#define AuRen_DT_DSTDIR (1 << 5)
17312+#define AuRen_DIROPQ (1 << 6)
17313+#define AuRen_CPUP (1 << 7)
17314+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
7f207e10
AM
17315+#define au_fset_ren(flags, name) \
17316+ do { (flags) |= AuRen_##name; } while (0)
17317+#define au_fclr_ren(flags, name) \
17318+ do { (flags) &= ~AuRen_##name; } while (0)
1facf9fc 17319+
4a4d8108
AM
17320+struct au_ren_args {
17321+ struct {
17322+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
17323+ *wh_dentry;
17324+ struct inode *dir, *inode;
17325+ struct au_hinode *hdir;
17326+ struct au_dtime dt[AuParentChild];
17327+ aufs_bindex_t bstart;
17328+ } sd[AuSrcDst];
1facf9fc 17329+
4a4d8108
AM
17330+#define src_dentry sd[AuSRC].dentry
17331+#define src_dir sd[AuSRC].dir
17332+#define src_inode sd[AuSRC].inode
17333+#define src_h_dentry sd[AuSRC].h_dentry
17334+#define src_parent sd[AuSRC].parent
17335+#define src_h_parent sd[AuSRC].h_parent
17336+#define src_wh_dentry sd[AuSRC].wh_dentry
17337+#define src_hdir sd[AuSRC].hdir
17338+#define src_h_dir sd[AuSRC].hdir->hi_inode
17339+#define src_dt sd[AuSRC].dt
17340+#define src_bstart sd[AuSRC].bstart
1facf9fc 17341+
4a4d8108
AM
17342+#define dst_dentry sd[AuDST].dentry
17343+#define dst_dir sd[AuDST].dir
17344+#define dst_inode sd[AuDST].inode
17345+#define dst_h_dentry sd[AuDST].h_dentry
17346+#define dst_parent sd[AuDST].parent
17347+#define dst_h_parent sd[AuDST].h_parent
17348+#define dst_wh_dentry sd[AuDST].wh_dentry
17349+#define dst_hdir sd[AuDST].hdir
17350+#define dst_h_dir sd[AuDST].hdir->hi_inode
17351+#define dst_dt sd[AuDST].dt
17352+#define dst_bstart sd[AuDST].bstart
17353+
17354+ struct dentry *h_trap;
17355+ struct au_branch *br;
17356+ struct au_hinode *src_hinode;
17357+ struct path h_path;
17358+ struct au_nhash whlist;
027c5e7a 17359+ aufs_bindex_t btgt, src_bwh, src_bdiropq;
1facf9fc 17360+
1308ab2a 17361+ unsigned int flags;
1facf9fc 17362+
4a4d8108
AM
17363+ struct au_whtmp_rmdir *thargs;
17364+ struct dentry *h_dst;
17365+};
1308ab2a 17366+
4a4d8108 17367+/* ---------------------------------------------------------------------- */
1308ab2a 17368+
4a4d8108
AM
17369+/*
17370+ * functions for reverting.
17371+ * when an error happened in a single rename systemcall, we should revert
17372+ * everything as if nothing happend.
17373+ * we don't need to revert the copied-up/down the parent dir since they are
17374+ * harmless.
17375+ */
1facf9fc 17376+
4a4d8108
AM
17377+#define RevertFailure(fmt, ...) do { \
17378+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
17379+ ##__VA_ARGS__, err, rerr); \
17380+ err = -EIO; \
17381+} while (0)
1facf9fc 17382+
4a4d8108 17383+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
1facf9fc 17384+{
4a4d8108 17385+ int rerr;
1facf9fc 17386+
4a4d8108
AM
17387+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
17388+ rerr = au_diropq_remove(a->src_dentry, a->btgt);
17389+ au_hn_imtx_unlock(a->src_hinode);
027c5e7a 17390+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
4a4d8108
AM
17391+ if (rerr)
17392+ RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
17393+}
1facf9fc 17394+
4a4d8108
AM
17395+static void au_ren_rev_rename(int err, struct au_ren_args *a)
17396+{
17397+ int rerr;
1facf9fc 17398+
4a4d8108
AM
17399+ a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
17400+ a->br, /*nd*/NULL);
17401+ rerr = PTR_ERR(a->h_path.dentry);
17402+ if (IS_ERR(a->h_path.dentry)) {
17403+ RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
17404+ return;
1facf9fc 17405+ }
17406+
4a4d8108
AM
17407+ rerr = vfsub_rename(a->dst_h_dir,
17408+ au_h_dptr(a->src_dentry, a->btgt),
17409+ a->src_h_dir, &a->h_path);
17410+ d_drop(a->h_path.dentry);
17411+ dput(a->h_path.dentry);
17412+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
17413+ if (rerr)
17414+ RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
1facf9fc 17415+}
17416+
4a4d8108 17417+static void au_ren_rev_cpup(int err, struct au_ren_args *a)
1facf9fc 17418+{
4a4d8108 17419+ int rerr;
1facf9fc 17420+
4a4d8108
AM
17421+ a->h_path.dentry = a->dst_h_dentry;
17422+ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
17423+ au_set_h_dptr(a->src_dentry, a->btgt, NULL);
17424+ au_set_dbstart(a->src_dentry, a->src_bstart);
17425+ if (rerr)
17426+ RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
1facf9fc 17427+}
17428+
4a4d8108 17429+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
1facf9fc 17430+{
4a4d8108 17431+ int rerr;
dece6358 17432+
4a4d8108
AM
17433+ a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
17434+ a->br, /*nd*/NULL);
17435+ rerr = PTR_ERR(a->h_path.dentry);
17436+ if (IS_ERR(a->h_path.dentry)) {
17437+ RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
17438+ return;
17439+ }
17440+ if (a->h_path.dentry->d_inode) {
17441+ d_drop(a->h_path.dentry);
17442+ dput(a->h_path.dentry);
17443+ return;
dece6358
AM
17444+ }
17445+
4a4d8108
AM
17446+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
17447+ d_drop(a->h_path.dentry);
17448+ dput(a->h_path.dentry);
17449+ if (!rerr)
17450+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
17451+ else
17452+ RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
17453+}
1308ab2a 17454+
4a4d8108
AM
17455+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
17456+{
17457+ int rerr;
1308ab2a 17458+
4a4d8108
AM
17459+ a->h_path.dentry = a->src_wh_dentry;
17460+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
027c5e7a 17461+ au_set_dbwh(a->src_dentry, a->src_bwh);
4a4d8108
AM
17462+ if (rerr)
17463+ RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
17464+}
4a4d8108 17465+#undef RevertFailure
1facf9fc 17466+
1308ab2a 17467+/* ---------------------------------------------------------------------- */
17468+
4a4d8108
AM
17469+/*
17470+ * when we have to copyup the renaming entry, do it with the rename-target name
17471+ * in order to minimize the cost (the later actual rename is unnecessary).
17472+ * otherwise rename it on the target branch.
17473+ */
17474+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 17475+{
dece6358 17476+ int err;
4a4d8108 17477+ struct dentry *d;
1facf9fc 17478+
4a4d8108
AM
17479+ d = a->src_dentry;
17480+ if (au_dbstart(d) == a->btgt) {
17481+ a->h_path.dentry = a->dst_h_dentry;
17482+ if (au_ftest_ren(a->flags, DIROPQ)
17483+ && au_dbdiropq(d) == a->btgt)
17484+ au_fclr_ren(a->flags, DIROPQ);
17485+ AuDebugOn(au_dbstart(d) != a->btgt);
17486+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
17487+ a->dst_h_dir, &a->h_path);
17488+ } else {
17489+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
17490+ struct file *h_file;
1308ab2a 17491+
4a4d8108
AM
17492+ au_fset_ren(a->flags, CPUP);
17493+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
17494+ au_set_dbstart(d, a->btgt);
17495+ au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
17496+ h_file = au_h_open_pre(d, a->src_bstart);
17497+ if (IS_ERR(h_file)) {
17498+ err = PTR_ERR(h_file);
17499+ h_file = NULL;
17500+ } else
17501+ err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
17502+ !AuCpup_DTIME, a->dst_parent);
17503+ mutex_unlock(h_mtx);
17504+ au_h_open_post(d, a->src_bstart, h_file);
17505+ if (!err) {
17506+ d = a->dst_dentry;
17507+ au_set_h_dptr(d, a->btgt, NULL);
17508+ au_update_dbstart(d);
17509+ } else {
17510+ au_set_h_dptr(d, a->btgt, NULL);
17511+ au_set_dbstart(d, a->src_bstart);
17512+ }
1308ab2a 17513+ }
027c5e7a
AM
17514+ if (!err && a->h_dst)
17515+ /* it will be set to dinfo later */
17516+ dget(a->h_dst);
1facf9fc 17517+
dece6358
AM
17518+ return err;
17519+}
1facf9fc 17520+
4a4d8108
AM
17521+/* cf. aufs_rmdir() */
17522+static int au_ren_del_whtmp(struct au_ren_args *a)
dece6358 17523+{
4a4d8108
AM
17524+ int err;
17525+ struct inode *dir;
1facf9fc 17526+
4a4d8108
AM
17527+ dir = a->dst_dir;
17528+ SiMustAnyLock(dir->i_sb);
17529+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
17530+ au_sbi(dir->i_sb)->si_dirwh)
17531+ || au_test_fs_remote(a->h_dst->d_sb)) {
17532+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
17533+ if (unlikely(err))
17534+ pr_warning("failed removing whtmp dir %.*s (%d), "
17535+ "ignored.\n", AuDLNPair(a->h_dst), err);
17536+ } else {
17537+ au_nhash_wh_free(&a->thargs->whlist);
17538+ a->thargs->whlist = a->whlist;
17539+ a->whlist.nh_num = 0;
17540+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
17541+ dput(a->h_dst);
17542+ a->thargs = NULL;
17543+ }
17544+
17545+ return 0;
1308ab2a 17546+}
1facf9fc 17547+
4a4d8108
AM
17548+/* make it 'opaque' dir. */
17549+static int au_ren_diropq(struct au_ren_args *a)
17550+{
17551+ int err;
17552+ struct dentry *diropq;
1facf9fc 17553+
4a4d8108 17554+ err = 0;
027c5e7a 17555+ a->src_bdiropq = au_dbdiropq(a->src_dentry);
4a4d8108
AM
17556+ a->src_hinode = au_hi(a->src_inode, a->btgt);
17557+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
17558+ diropq = au_diropq_create(a->src_dentry, a->btgt);
17559+ au_hn_imtx_unlock(a->src_hinode);
17560+ if (IS_ERR(diropq))
17561+ err = PTR_ERR(diropq);
17562+ dput(diropq);
1facf9fc 17563+
4a4d8108
AM
17564+ return err;
17565+}
1facf9fc 17566+
4a4d8108
AM
17567+static int do_rename(struct au_ren_args *a)
17568+{
17569+ int err;
17570+ struct dentry *d, *h_d;
1facf9fc 17571+
4a4d8108
AM
17572+ /* prepare workqueue args for asynchronous rmdir */
17573+ h_d = a->dst_h_dentry;
17574+ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
17575+ err = -ENOMEM;
17576+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
17577+ if (unlikely(!a->thargs))
17578+ goto out;
17579+ a->h_dst = dget(h_d);
17580+ }
1facf9fc 17581+
4a4d8108
AM
17582+ /* create whiteout for src_dentry */
17583+ if (au_ftest_ren(a->flags, WHSRC)) {
027c5e7a
AM
17584+ a->src_bwh = au_dbwh(a->src_dentry);
17585+ AuDebugOn(a->src_bwh >= 0);
4a4d8108
AM
17586+ a->src_wh_dentry
17587+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
17588+ err = PTR_ERR(a->src_wh_dentry);
17589+ if (IS_ERR(a->src_wh_dentry))
17590+ goto out_thargs;
17591+ }
1facf9fc 17592+
4a4d8108
AM
17593+ /* lookup whiteout for dentry */
17594+ if (au_ftest_ren(a->flags, WHDST)) {
17595+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
17596+ a->br);
17597+ err = PTR_ERR(h_d);
17598+ if (IS_ERR(h_d))
17599+ goto out_whsrc;
17600+ if (!h_d->d_inode)
17601+ dput(h_d);
17602+ else
17603+ a->dst_wh_dentry = h_d;
17604+ }
1facf9fc 17605+
4a4d8108
AM
17606+ /* rename dentry to tmpwh */
17607+ if (a->thargs) {
17608+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
17609+ if (unlikely(err))
17610+ goto out_whdst;
dece6358 17611+
4a4d8108
AM
17612+ d = a->dst_dentry;
17613+ au_set_h_dptr(d, a->btgt, NULL);
17614+ err = au_lkup_neg(d, a->btgt);
17615+ if (unlikely(err))
17616+ goto out_whtmp;
17617+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
17618+ }
1facf9fc 17619+
4a4d8108
AM
17620+ /* cpup src */
17621+ if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
17622+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
17623+ struct file *h_file;
1facf9fc 17624+
4a4d8108
AM
17625+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
17626+ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
17627+ h_file = au_h_open_pre(a->src_dentry, a->src_bstart);
17628+ if (IS_ERR(h_file)) {
17629+ err = PTR_ERR(h_file);
17630+ h_file = NULL;
17631+ } else
17632+ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
17633+ !AuCpup_DTIME);
17634+ mutex_unlock(h_mtx);
17635+ au_h_open_post(a->src_dentry, a->src_bstart, h_file);
17636+ if (unlikely(err))
17637+ goto out_whtmp;
17638+ }
1facf9fc 17639+
4a4d8108
AM
17640+ /* rename by vfs_rename or cpup */
17641+ d = a->dst_dentry;
17642+ if (au_ftest_ren(a->flags, ISDIR)
17643+ && (a->dst_wh_dentry
17644+ || au_dbdiropq(d) == a->btgt
17645+ /* hide the lower to keep xino */
17646+ || a->btgt < au_dbend(d)
17647+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
17648+ au_fset_ren(a->flags, DIROPQ);
17649+ err = au_ren_or_cpup(a);
17650+ if (unlikely(err))
17651+ /* leave the copied-up one */
17652+ goto out_whtmp;
1308ab2a 17653+
4a4d8108
AM
17654+ /* make dir opaque */
17655+ if (au_ftest_ren(a->flags, DIROPQ)) {
17656+ err = au_ren_diropq(a);
17657+ if (unlikely(err))
17658+ goto out_rename;
17659+ }
1308ab2a 17660+
4a4d8108
AM
17661+ /* update target timestamps */
17662+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
17663+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
17664+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
17665+ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
1facf9fc 17666+
4a4d8108
AM
17667+ /* remove whiteout for dentry */
17668+ if (a->dst_wh_dentry) {
17669+ a->h_path.dentry = a->dst_wh_dentry;
17670+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
17671+ a->dst_dentry);
17672+ if (unlikely(err))
17673+ goto out_diropq;
17674+ }
1facf9fc 17675+
4a4d8108
AM
17676+ /* remove whtmp */
17677+ if (a->thargs)
17678+ au_ren_del_whtmp(a); /* ignore this error */
1308ab2a 17679+
4a4d8108
AM
17680+ err = 0;
17681+ goto out_success;
17682+
4f0767ce 17683+out_diropq:
4a4d8108
AM
17684+ if (au_ftest_ren(a->flags, DIROPQ))
17685+ au_ren_rev_diropq(err, a);
4f0767ce 17686+out_rename:
4a4d8108
AM
17687+ if (!au_ftest_ren(a->flags, CPUP))
17688+ au_ren_rev_rename(err, a);
17689+ else
17690+ au_ren_rev_cpup(err, a);
027c5e7a 17691+ dput(a->h_dst);
4f0767ce 17692+out_whtmp:
4a4d8108
AM
17693+ if (a->thargs)
17694+ au_ren_rev_whtmp(err, a);
4f0767ce 17695+out_whdst:
4a4d8108
AM
17696+ dput(a->dst_wh_dentry);
17697+ a->dst_wh_dentry = NULL;
4f0767ce 17698+out_whsrc:
4a4d8108
AM
17699+ if (a->src_wh_dentry)
17700+ au_ren_rev_whsrc(err, a);
4f0767ce 17701+out_success:
4a4d8108
AM
17702+ dput(a->src_wh_dentry);
17703+ dput(a->dst_wh_dentry);
4f0767ce 17704+out_thargs:
4a4d8108
AM
17705+ if (a->thargs) {
17706+ dput(a->h_dst);
17707+ au_whtmp_rmdir_free(a->thargs);
17708+ a->thargs = NULL;
17709+ }
4f0767ce 17710+out:
4a4d8108 17711+ return err;
dece6358 17712+}
1facf9fc 17713+
1308ab2a 17714+/* ---------------------------------------------------------------------- */
1facf9fc 17715+
4a4d8108
AM
17716+/*
17717+ * test if @dentry dir can be rename destination or not.
17718+ * success means, it is a logically empty dir.
17719+ */
17720+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
1308ab2a 17721+{
4a4d8108 17722+ return au_test_empty(dentry, whlist);
1308ab2a 17723+}
1facf9fc 17724+
4a4d8108
AM
17725+/*
17726+ * test if @dentry dir can be rename source or not.
17727+ * if it can, return 0 and @children is filled.
17728+ * success means,
17729+ * - it is a logically empty dir.
17730+ * - or, it exists on writable branch and has no children including whiteouts
17731+ * on the lower branch.
17732+ */
17733+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
17734+{
17735+ int err;
17736+ unsigned int rdhash;
17737+ aufs_bindex_t bstart;
1facf9fc 17738+
4a4d8108
AM
17739+ bstart = au_dbstart(dentry);
17740+ if (bstart != btgt) {
17741+ struct au_nhash whlist;
dece6358 17742+
4a4d8108
AM
17743+ SiMustAnyLock(dentry->d_sb);
17744+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
17745+ if (!rdhash)
17746+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
17747+ dentry));
17748+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
17749+ if (unlikely(err))
17750+ goto out;
17751+ err = au_test_empty(dentry, &whlist);
17752+ au_nhash_wh_free(&whlist);
17753+ goto out;
17754+ }
dece6358 17755+
4a4d8108
AM
17756+ if (bstart == au_dbtaildir(dentry))
17757+ return 0; /* success */
dece6358 17758+
4a4d8108 17759+ err = au_test_empty_lower(dentry);
1facf9fc 17760+
4f0767ce 17761+out:
4a4d8108
AM
17762+ if (err == -ENOTEMPTY) {
17763+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
17764+ " is not supported\n");
17765+ err = -EXDEV;
17766+ }
17767+ return err;
17768+}
1308ab2a 17769+
4a4d8108
AM
17770+/* side effect: sets whlist and h_dentry */
17771+static int au_ren_may_dir(struct au_ren_args *a)
1308ab2a 17772+{
4a4d8108
AM
17773+ int err;
17774+ unsigned int rdhash;
17775+ struct dentry *d;
1facf9fc 17776+
4a4d8108
AM
17777+ d = a->dst_dentry;
17778+ SiMustAnyLock(d->d_sb);
1facf9fc 17779+
4a4d8108
AM
17780+ err = 0;
17781+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
17782+ rdhash = au_sbi(d->d_sb)->si_rdhash;
17783+ if (!rdhash)
17784+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
17785+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
17786+ if (unlikely(err))
17787+ goto out;
1308ab2a 17788+
4a4d8108
AM
17789+ au_set_dbstart(d, a->dst_bstart);
17790+ err = may_rename_dstdir(d, &a->whlist);
17791+ au_set_dbstart(d, a->btgt);
17792+ }
17793+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
17794+ if (unlikely(err))
17795+ goto out;
17796+
17797+ d = a->src_dentry;
17798+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
17799+ if (au_ftest_ren(a->flags, ISDIR)) {
17800+ err = may_rename_srcdir(d, a->btgt);
17801+ if (unlikely(err)) {
17802+ au_nhash_wh_free(&a->whlist);
17803+ a->whlist.nh_num = 0;
17804+ }
17805+ }
4f0767ce 17806+out:
4a4d8108 17807+ return err;
1facf9fc 17808+}
17809+
4a4d8108 17810+/* ---------------------------------------------------------------------- */
1facf9fc 17811+
4a4d8108
AM
17812+/*
17813+ * simple tests for rename.
17814+ * following the checks in vfs, plus the parent-child relationship.
17815+ */
17816+static int au_may_ren(struct au_ren_args *a)
17817+{
17818+ int err, isdir;
17819+ struct inode *h_inode;
1facf9fc 17820+
4a4d8108
AM
17821+ if (a->src_bstart == a->btgt) {
17822+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
17823+ au_ftest_ren(a->flags, ISDIR));
17824+ if (unlikely(err))
17825+ goto out;
17826+ err = -EINVAL;
17827+ if (unlikely(a->src_h_dentry == a->h_trap))
17828+ goto out;
17829+ }
1facf9fc 17830+
4a4d8108
AM
17831+ err = 0;
17832+ if (a->dst_bstart != a->btgt)
17833+ goto out;
1facf9fc 17834+
027c5e7a
AM
17835+ err = -ENOTEMPTY;
17836+ if (unlikely(a->dst_h_dentry == a->h_trap))
17837+ goto out;
17838+
4a4d8108
AM
17839+ err = -EIO;
17840+ h_inode = a->dst_h_dentry->d_inode;
17841+ isdir = !!au_ftest_ren(a->flags, ISDIR);
17842+ if (!a->dst_dentry->d_inode) {
17843+ if (unlikely(h_inode))
17844+ goto out;
17845+ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
17846+ isdir);
17847+ } else {
17848+ if (unlikely(!h_inode || !h_inode->i_nlink))
17849+ goto out;
17850+ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
17851+ isdir);
17852+ if (unlikely(err))
17853+ goto out;
4a4d8108 17854+ }
1facf9fc 17855+
4f0767ce 17856+out:
4a4d8108
AM
17857+ if (unlikely(err == -ENOENT || err == -EEXIST))
17858+ err = -EIO;
17859+ AuTraceErr(err);
17860+ return err;
17861+}
1facf9fc 17862+
1308ab2a 17863+/* ---------------------------------------------------------------------- */
1facf9fc 17864+
4a4d8108
AM
17865+/*
17866+ * locking order
17867+ * (VFS)
17868+ * - src_dir and dir by lock_rename()
17869+ * - inode if exitsts
17870+ * (aufs)
17871+ * - lock all
17872+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
17873+ * + si_read_lock
17874+ * + di_write_lock2_child()
17875+ * + di_write_lock_child()
17876+ * + ii_write_lock_child()
17877+ * + di_write_lock_child2()
17878+ * + ii_write_lock_child2()
17879+ * + src_parent and parent
17880+ * + di_write_lock_parent()
17881+ * + ii_write_lock_parent()
17882+ * + di_write_lock_parent2()
17883+ * + ii_write_lock_parent2()
17884+ * + lower src_dir and dir by vfsub_lock_rename()
17885+ * + verify the every relationships between child and parent. if any
17886+ * of them failed, unlock all and return -EBUSY.
17887+ */
17888+static void au_ren_unlock(struct au_ren_args *a)
1308ab2a 17889+{
4a4d8108
AM
17890+ struct super_block *sb;
17891+
17892+ sb = a->dst_dentry->d_sb;
17893+ if (au_ftest_ren(a->flags, MNT_WRITE))
17894+ mnt_drop_write(a->br->br_mnt);
17895+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
17896+ a->dst_h_parent, a->dst_hdir);
1308ab2a 17897+}
17898+
4a4d8108 17899+static int au_ren_lock(struct au_ren_args *a)
1308ab2a 17900+{
4a4d8108
AM
17901+ int err;
17902+ unsigned int udba;
1308ab2a 17903+
4a4d8108
AM
17904+ err = 0;
17905+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
17906+ a->src_hdir = au_hi(a->src_dir, a->btgt);
17907+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
17908+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
17909+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
17910+ a->dst_h_parent, a->dst_hdir);
17911+ udba = au_opt_udba(a->src_dentry->d_sb);
17912+ if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
17913+ || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
17914+ err = au_busy_or_stale();
17915+ if (!err && au_dbstart(a->src_dentry) == a->btgt)
17916+ err = au_h_verify(a->src_h_dentry, udba,
17917+ a->src_h_parent->d_inode, a->src_h_parent,
17918+ a->br);
17919+ if (!err && au_dbstart(a->dst_dentry) == a->btgt)
17920+ err = au_h_verify(a->dst_h_dentry, udba,
17921+ a->dst_h_parent->d_inode, a->dst_h_parent,
17922+ a->br);
17923+ if (!err) {
17924+ err = mnt_want_write(a->br->br_mnt);
17925+ if (unlikely(err))
17926+ goto out_unlock;
17927+ au_fset_ren(a->flags, MNT_WRITE);
17928+ goto out; /* success */
17929+ }
17930+
17931+ err = au_busy_or_stale();
17932+
4f0767ce 17933+out_unlock:
4a4d8108 17934+ au_ren_unlock(a);
4f0767ce 17935+out:
4a4d8108 17936+ return err;
1facf9fc 17937+}
17938+
17939+/* ---------------------------------------------------------------------- */
17940+
4a4d8108 17941+static void au_ren_refresh_dir(struct au_ren_args *a)
1facf9fc 17942+{
4a4d8108 17943+ struct inode *dir;
dece6358 17944+
4a4d8108
AM
17945+ dir = a->dst_dir;
17946+ dir->i_version++;
17947+ if (au_ftest_ren(a->flags, ISDIR)) {
17948+ /* is this updating defined in POSIX? */
17949+ au_cpup_attr_timesizes(a->src_inode);
17950+ au_cpup_attr_nlink(dir, /*force*/1);
4a4d8108 17951+ }
027c5e7a 17952+
4a4d8108
AM
17953+ if (au_ibstart(dir) == a->btgt)
17954+ au_cpup_attr_timesizes(dir);
dece6358 17955+
4a4d8108
AM
17956+ if (au_ftest_ren(a->flags, ISSAMEDIR))
17957+ return;
dece6358 17958+
4a4d8108
AM
17959+ dir = a->src_dir;
17960+ dir->i_version++;
17961+ if (au_ftest_ren(a->flags, ISDIR))
17962+ au_cpup_attr_nlink(dir, /*force*/1);
17963+ if (au_ibstart(dir) == a->btgt)
17964+ au_cpup_attr_timesizes(dir);
1facf9fc 17965+}
17966+
4a4d8108 17967+static void au_ren_refresh(struct au_ren_args *a)
1facf9fc 17968+{
4a4d8108
AM
17969+ aufs_bindex_t bend, bindex;
17970+ struct dentry *d, *h_d;
17971+ struct inode *i, *h_i;
17972+ struct super_block *sb;
dece6358 17973+
027c5e7a
AM
17974+ d = a->dst_dentry;
17975+ d_drop(d);
17976+ if (a->h_dst)
17977+ /* already dget-ed by au_ren_or_cpup() */
17978+ au_set_h_dptr(d, a->btgt, a->h_dst);
17979+
17980+ i = a->dst_inode;
17981+ if (i) {
17982+ if (!au_ftest_ren(a->flags, ISDIR))
17983+ vfsub_drop_nlink(i);
17984+ else {
17985+ vfsub_dead_dir(i);
17986+ au_cpup_attr_timesizes(i);
17987+ }
17988+ au_update_dbrange(d, /*do_put_zero*/1);
17989+ } else {
17990+ bend = a->btgt;
17991+ for (bindex = au_dbstart(d); bindex < bend; bindex++)
17992+ au_set_h_dptr(d, bindex, NULL);
17993+ bend = au_dbend(d);
17994+ for (bindex = a->btgt + 1; bindex <= bend; bindex++)
17995+ au_set_h_dptr(d, bindex, NULL);
17996+ au_update_dbrange(d, /*do_put_zero*/0);
17997+ }
17998+
4a4d8108
AM
17999+ d = a->src_dentry;
18000+ au_set_dbwh(d, -1);
18001+ bend = au_dbend(d);
18002+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
18003+ h_d = au_h_dptr(d, bindex);
18004+ if (h_d)
18005+ au_set_h_dptr(d, bindex, NULL);
18006+ }
18007+ au_set_dbend(d, a->btgt);
18008+
18009+ sb = d->d_sb;
18010+ i = a->src_inode;
18011+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
18012+ return; /* success */
18013+
18014+ bend = au_ibend(i);
18015+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
18016+ h_i = au_h_iptr(i, bindex);
18017+ if (h_i) {
18018+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
18019+ /* ignore this error */
18020+ au_set_h_iptr(i, bindex, NULL, 0);
18021+ }
18022+ }
18023+ au_set_ibend(i, a->btgt);
1308ab2a 18024+}
dece6358 18025+
4a4d8108
AM
18026+/* ---------------------------------------------------------------------- */
18027+
18028+/* mainly for link(2) and rename(2) */
18029+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
1308ab2a 18030+{
4a4d8108
AM
18031+ aufs_bindex_t bdiropq, bwh;
18032+ struct dentry *parent;
18033+ struct au_branch *br;
18034+
18035+ parent = dentry->d_parent;
18036+ IMustLock(parent->d_inode); /* dir is locked */
18037+
18038+ bdiropq = au_dbdiropq(parent);
18039+ bwh = au_dbwh(dentry);
18040+ br = au_sbr(dentry->d_sb, btgt);
18041+ if (au_br_rdonly(br)
18042+ || (0 <= bdiropq && bdiropq < btgt)
18043+ || (0 <= bwh && bwh < btgt))
18044+ btgt = -1;
18045+
18046+ AuDbg("btgt %d\n", btgt);
18047+ return btgt;
1facf9fc 18048+}
18049+
4a4d8108
AM
18050+/* sets src_bstart, dst_bstart and btgt */
18051+static int au_ren_wbr(struct au_ren_args *a)
1facf9fc 18052+{
4a4d8108
AM
18053+ int err;
18054+ struct au_wr_dir_args wr_dir_args = {
18055+ /* .force_btgt = -1, */
18056+ .flags = AuWrDir_ADD_ENTRY
18057+ };
dece6358 18058+
4a4d8108
AM
18059+ a->src_bstart = au_dbstart(a->src_dentry);
18060+ a->dst_bstart = au_dbstart(a->dst_dentry);
18061+ if (au_ftest_ren(a->flags, ISDIR))
18062+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
18063+ wr_dir_args.force_btgt = a->src_bstart;
18064+ if (a->dst_inode && a->dst_bstart < a->src_bstart)
18065+ wr_dir_args.force_btgt = a->dst_bstart;
18066+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
18067+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
18068+ a->btgt = err;
dece6358 18069+
4a4d8108 18070+ return err;
1facf9fc 18071+}
18072+
4a4d8108 18073+static void au_ren_dt(struct au_ren_args *a)
1facf9fc 18074+{
4a4d8108
AM
18075+ a->h_path.dentry = a->src_h_parent;
18076+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
18077+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
18078+ a->h_path.dentry = a->dst_h_parent;
18079+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
18080+ }
1facf9fc 18081+
4a4d8108
AM
18082+ au_fclr_ren(a->flags, DT_DSTDIR);
18083+ if (!au_ftest_ren(a->flags, ISDIR))
18084+ return;
dece6358 18085+
4a4d8108
AM
18086+ a->h_path.dentry = a->src_h_dentry;
18087+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
18088+ if (a->dst_h_dentry->d_inode) {
18089+ au_fset_ren(a->flags, DT_DSTDIR);
18090+ a->h_path.dentry = a->dst_h_dentry;
18091+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
18092+ }
1308ab2a 18093+}
dece6358 18094+
4a4d8108 18095+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1308ab2a 18096+{
4a4d8108
AM
18097+ struct dentry *h_d;
18098+ struct mutex *h_mtx;
18099+
18100+ au_dtime_revert(a->src_dt + AuPARENT);
18101+ if (!au_ftest_ren(a->flags, ISSAMEDIR))
18102+ au_dtime_revert(a->dst_dt + AuPARENT);
18103+
18104+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
18105+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
18106+ h_mtx = &h_d->d_inode->i_mutex;
18107+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18108+ au_dtime_revert(a->src_dt + AuCHILD);
18109+ mutex_unlock(h_mtx);
18110+
18111+ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
18112+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
18113+ h_mtx = &h_d->d_inode->i_mutex;
18114+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18115+ au_dtime_revert(a->dst_dt + AuCHILD);
18116+ mutex_unlock(h_mtx);
1facf9fc 18117+ }
18118+ }
18119+}
18120+
4a4d8108
AM
18121+/* ---------------------------------------------------------------------- */
18122+
18123+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
18124+ struct inode *_dst_dir, struct dentry *_dst_dentry)
1facf9fc 18125+{
e49829fe 18126+ int err, flags;
4a4d8108
AM
18127+ /* reduce stack space */
18128+ struct au_ren_args *a;
18129+
18130+ AuDbg("%.*s, %.*s\n", AuDLNPair(_src_dentry), AuDLNPair(_dst_dentry));
18131+ IMustLock(_src_dir);
18132+ IMustLock(_dst_dir);
18133+
18134+ err = -ENOMEM;
18135+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
18136+ a = kzalloc(sizeof(*a), GFP_NOFS);
18137+ if (unlikely(!a))
18138+ goto out;
18139+
18140+ a->src_dir = _src_dir;
18141+ a->src_dentry = _src_dentry;
18142+ a->src_inode = a->src_dentry->d_inode;
18143+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
18144+ a->dst_dir = _dst_dir;
18145+ a->dst_dentry = _dst_dentry;
18146+ a->dst_inode = a->dst_dentry->d_inode;
18147+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
18148+ if (a->dst_inode) {
18149+ IMustLock(a->dst_inode);
18150+ au_igrab(a->dst_inode);
1facf9fc 18151+ }
1facf9fc 18152+
4a4d8108 18153+ err = -ENOTDIR;
027c5e7a 18154+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
4a4d8108
AM
18155+ if (S_ISDIR(a->src_inode->i_mode)) {
18156+ au_fset_ren(a->flags, ISDIR);
18157+ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
18158+ goto out_free;
e49829fe
JR
18159+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
18160+ AuLock_DIR | flags);
4a4d8108 18161+ } else
e49829fe
JR
18162+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
18163+ flags);
18164+ if (unlikely(err))
18165+ goto out_free;
1facf9fc 18166+
027c5e7a
AM
18167+ err = au_d_hashed_positive(a->src_dentry);
18168+ if (unlikely(err))
18169+ goto out_unlock;
18170+ err = -ENOENT;
18171+ if (a->dst_inode) {
18172+ /*
18173+ * If it is a dir, VFS unhash dst_dentry before this
18174+ * function. It means we cannot rely upon d_unhashed().
18175+ */
18176+ if (unlikely(!a->dst_inode->i_nlink))
18177+ goto out_unlock;
18178+ if (!S_ISDIR(a->dst_inode->i_mode)) {
18179+ err = au_d_hashed_positive(a->dst_dentry);
18180+ if (unlikely(err))
18181+ goto out_unlock;
18182+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
18183+ goto out_unlock;
18184+ } else if (unlikely(d_unhashed(a->dst_dentry)))
18185+ goto out_unlock;
18186+
4a4d8108
AM
18187+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
18188+ di_write_lock_parent(a->dst_parent);
1facf9fc 18189+
4a4d8108
AM
18190+ /* which branch we process */
18191+ err = au_ren_wbr(a);
18192+ if (unlikely(err < 0))
027c5e7a 18193+ goto out_parent;
4a4d8108
AM
18194+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
18195+ a->h_path.mnt = a->br->br_mnt;
1facf9fc 18196+
4a4d8108
AM
18197+ /* are they available to be renamed */
18198+ err = au_ren_may_dir(a);
18199+ if (unlikely(err))
18200+ goto out_children;
1facf9fc 18201+
4a4d8108
AM
18202+ /* prepare the writable parent dir on the same branch */
18203+ if (a->dst_bstart == a->btgt) {
18204+ au_fset_ren(a->flags, WHDST);
18205+ } else {
18206+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
18207+ if (unlikely(err))
18208+ goto out_children;
18209+ }
1facf9fc 18210+
4a4d8108
AM
18211+ if (a->src_dir != a->dst_dir) {
18212+ /*
18213+ * this temporary unlock is safe,
18214+ * because both dir->i_mutex are locked.
18215+ */
18216+ di_write_unlock(a->dst_parent);
18217+ di_write_lock_parent(a->src_parent);
18218+ err = au_wr_dir_need_wh(a->src_dentry,
18219+ au_ftest_ren(a->flags, ISDIR),
18220+ &a->btgt);
18221+ di_write_unlock(a->src_parent);
18222+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
18223+ au_fclr_ren(a->flags, ISSAMEDIR);
18224+ } else
18225+ err = au_wr_dir_need_wh(a->src_dentry,
18226+ au_ftest_ren(a->flags, ISDIR),
18227+ &a->btgt);
18228+ if (unlikely(err < 0))
18229+ goto out_children;
18230+ if (err)
18231+ au_fset_ren(a->flags, WHSRC);
1facf9fc 18232+
4a4d8108
AM
18233+ /* lock them all */
18234+ err = au_ren_lock(a);
18235+ if (unlikely(err))
18236+ goto out_children;
1facf9fc 18237+
4a4d8108
AM
18238+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
18239+ err = au_may_ren(a);
18240+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
18241+ err = -ENAMETOOLONG;
18242+ if (unlikely(err))
18243+ goto out_hdir;
1facf9fc 18244+
4a4d8108
AM
18245+ /* store timestamps to be revertible */
18246+ au_ren_dt(a);
1facf9fc 18247+
4a4d8108
AM
18248+ /* here we go */
18249+ err = do_rename(a);
18250+ if (unlikely(err))
18251+ goto out_dt;
18252+
18253+ /* update dir attributes */
18254+ au_ren_refresh_dir(a);
18255+
18256+ /* dput/iput all lower dentries */
18257+ au_ren_refresh(a);
18258+
18259+ goto out_hdir; /* success */
18260+
4f0767ce 18261+out_dt:
4a4d8108 18262+ au_ren_rev_dt(err, a);
4f0767ce 18263+out_hdir:
4a4d8108 18264+ au_ren_unlock(a);
4f0767ce 18265+out_children:
4a4d8108 18266+ au_nhash_wh_free(&a->whlist);
027c5e7a
AM
18267+ if (err && a->dst_inode && a->dst_bstart != a->btgt) {
18268+ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
18269+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
18270+ au_set_dbstart(a->dst_dentry, a->dst_bstart);
4a4d8108 18271+ }
027c5e7a 18272+out_parent:
4a4d8108
AM
18273+ if (!err)
18274+ d_move(a->src_dentry, a->dst_dentry);
027c5e7a
AM
18275+ else {
18276+ au_update_dbstart(a->dst_dentry);
18277+ if (!a->dst_inode)
18278+ d_drop(a->dst_dentry);
18279+ }
4a4d8108
AM
18280+ if (au_ftest_ren(a->flags, ISSAMEDIR))
18281+ di_write_unlock(a->dst_parent);
18282+ else
18283+ di_write_unlock2(a->src_parent, a->dst_parent);
027c5e7a 18284+out_unlock:
4a4d8108 18285+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
4f0767ce 18286+out_free:
4a4d8108
AM
18287+ iput(a->dst_inode);
18288+ if (a->thargs)
18289+ au_whtmp_rmdir_free(a->thargs);
18290+ kfree(a);
4f0767ce 18291+out:
4a4d8108
AM
18292+ AuTraceErr(err);
18293+ return err;
1308ab2a 18294+}
7f207e10
AM
18295diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
18296--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 18297+++ linux/fs/aufs/Kconfig 2012-02-13 21:54:56.966438287 +0100
2cbb1c4b 18298@@ -0,0 +1,203 @@
4a4d8108
AM
18299+config AUFS_FS
18300+ tristate "Aufs (Advanced multi layered unification filesystem) support"
18301+ depends on EXPERIMENTAL
18302+ help
18303+ Aufs is a stackable unification filesystem such as Unionfs,
18304+ which unifies several directories and provides a merged single
18305+ directory.
18306+ In the early days, aufs was entirely re-designed and
18307+ re-implemented Unionfs Version 1.x series. Introducing many
18308+ original ideas, approaches and improvements, it becomes totally
18309+ different from Unionfs while keeping the basic features.
1facf9fc 18310+
4a4d8108
AM
18311+if AUFS_FS
18312+choice
18313+ prompt "Maximum number of branches"
18314+ default AUFS_BRANCH_MAX_127
18315+ help
18316+ Specifies the maximum number of branches (or member directories)
18317+ in a single aufs. The larger value consumes more system
18318+ resources and has a minor impact to performance.
18319+config AUFS_BRANCH_MAX_127
18320+ bool "127"
18321+ help
18322+ Specifies the maximum number of branches (or member directories)
18323+ in a single aufs. The larger value consumes more system
18324+ resources and has a minor impact to performance.
18325+config AUFS_BRANCH_MAX_511
18326+ bool "511"
18327+ help
18328+ Specifies the maximum number of branches (or member directories)
18329+ in a single aufs. The larger value consumes more system
18330+ resources and has a minor impact to performance.
18331+config AUFS_BRANCH_MAX_1023
18332+ bool "1023"
18333+ help
18334+ Specifies the maximum number of branches (or member directories)
18335+ in a single aufs. The larger value consumes more system
18336+ resources and has a minor impact to performance.
18337+config AUFS_BRANCH_MAX_32767
18338+ bool "32767"
18339+ help
18340+ Specifies the maximum number of branches (or member directories)
18341+ in a single aufs. The larger value consumes more system
18342+ resources and has a minor impact to performance.
18343+endchoice
1facf9fc 18344+
e49829fe
JR
18345+config AUFS_SBILIST
18346+ bool
18347+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
18348+ default y
18349+ help
18350+ Automatic configuration for internal use.
18351+ When aufs supports Magic SysRq or /proc, enabled automatically.
18352+
4a4d8108
AM
18353+config AUFS_HNOTIFY
18354+ bool "Detect direct branch access (bypassing aufs)"
18355+ help
18356+ If you want to modify files on branches directly, eg. bypassing aufs,
18357+ and want aufs to detect the changes of them fully, then enable this
18358+ option and use 'udba=notify' mount option.
7f207e10 18359+ Currently there is only one available configuration, "fsnotify".
4a4d8108
AM
18360+ It will have a negative impact to the performance.
18361+ See detail in aufs.5.
dece6358 18362+
4a4d8108
AM
18363+choice
18364+ prompt "method" if AUFS_HNOTIFY
18365+ default AUFS_HFSNOTIFY
18366+config AUFS_HFSNOTIFY
18367+ bool "fsnotify"
18368+ select FSNOTIFY
4a4d8108 18369+endchoice
1facf9fc 18370+
4a4d8108
AM
18371+config AUFS_EXPORT
18372+ bool "NFS-exportable aufs"
2cbb1c4b 18373+ depends on EXPORTFS
4a4d8108
AM
18374+ help
18375+ If you want to export your mounted aufs via NFS, then enable this
18376+ option. There are several requirements for this configuration.
18377+ See detail in aufs.5.
1facf9fc 18378+
4a4d8108
AM
18379+config AUFS_INO_T_64
18380+ bool
18381+ depends on AUFS_EXPORT
18382+ depends on 64BIT && !(ALPHA || S390)
18383+ default y
18384+ help
18385+ Automatic configuration for internal use.
18386+ /* typedef unsigned long/int __kernel_ino_t */
18387+ /* alpha and s390x are int */
1facf9fc 18388+
4a4d8108
AM
18389+config AUFS_RDU
18390+ bool "Readdir in userspace"
18391+ help
18392+ Aufs has two methods to provide a merged view for a directory,
18393+ by a user-space library and by kernel-space natively. The latter
18394+ is always enabled but sometimes large and slow.
18395+ If you enable this option, install the library in aufs2-util
18396+ package, and set some environment variables for your readdir(3),
18397+ then the work will be handled in user-space which generally
18398+ shows better performance in most cases.
18399+ See detail in aufs.5.
1facf9fc 18400+
2cbb1c4b
JR
18401+config AUFS_PROC_MAP
18402+ bool "support for /proc/maps and lsof(1)"
18403+ depends on PROC_FS
18404+ help
18405+ When you issue mmap(2) in aufs, it is actually a direct mmap(2)
18406+ call to the file on the branch fs since the file in aufs is
18407+ purely virtual. And the file path printed in /proc/maps (and
18408+ others) will be the path on the branch fs. In most cases, it
18409+ does no harm. But some utilities like lsof(1) may confuse since
18410+ the utility or user may expect the file path in aufs to be
18411+ printed.
18412+ To address this issue, aufs provides a patch which introduces a
18413+ new member called vm_prfile into struct vm_are_struct. The patch
18414+ is meaningless without enabling this configuration since nobody
18415+ sets the new vm_prfile member.
18416+ If you don't apply the patch, then enabling this configuration
18417+ will cause a compile error.
18418+ This approach is fragile since if someone else make some changes
18419+ around vm_file, then vm_prfile may not work anymore. As a
18420+ workaround such case, aufs provides this configuration. If you
18421+ disable it, then lsof(1) may produce incorrect result but the
18422+ problem will be gone even if the aufs patch is applied (I hope).
18423+
4a4d8108
AM
18424+config AUFS_SP_IATTR
18425+ bool "Respect the attributes (mtime/ctime mainly) of special files"
18426+ help
18427+ When you write something to a special file, some attributes of it
18428+ (mtime/ctime mainly) may be updated. Generally such updates are
18429+ less important (actually some device drivers and NFS ignore
18430+ it). But some applications (such like test program) requires
18431+ such updates. If you need these updates, then enable this
18432+ configuration which introduces some overhead.
18433+ Currently this configuration handles FIFO only.
1facf9fc 18434+
4a4d8108
AM
18435+config AUFS_SHWH
18436+ bool "Show whiteouts"
18437+ help
18438+ If you want to make the whiteouts in aufs visible, then enable
18439+ this option and specify 'shwh' mount option. Although it may
18440+ sounds like philosophy or something, but in technically it
18441+ simply shows the name of whiteout with keeping its behaviour.
1facf9fc 18442+
4a4d8108
AM
18443+config AUFS_BR_RAMFS
18444+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
18445+ help
18446+ If you want to use ramfs as an aufs branch fs, then enable this
18447+ option. Generally tmpfs is recommended.
18448+ Aufs prohibited them to be a branch fs by default, because
18449+ initramfs becomes unusable after switch_root or something
18450+ generally. If you sets initramfs as an aufs branch and boot your
18451+ system by switch_root, you will meet a problem easily since the
18452+ files in initramfs may be inaccessible.
18453+ Unless you are going to use ramfs as an aufs branch fs without
18454+ switch_root or something, leave it N.
1facf9fc 18455+
4a4d8108
AM
18456+config AUFS_BR_FUSE
18457+ bool "Fuse fs as an aufs branch"
18458+ depends on FUSE_FS
18459+ select AUFS_POLL
18460+ help
18461+ If you want to use fuse-based userspace filesystem as an aufs
18462+ branch fs, then enable this option.
18463+ It implements the internal poll(2) operation which is
18464+ implemented by fuse only (curretnly).
1facf9fc 18465+
4a4d8108
AM
18466+config AUFS_POLL
18467+ bool
18468+ help
18469+ Automatic configuration for internal use.
1facf9fc 18470+
4a4d8108
AM
18471+config AUFS_BR_HFSPLUS
18472+ bool "Hfsplus as an aufs branch"
18473+ depends on HFSPLUS_FS
18474+ default y
18475+ help
18476+ If you want to use hfsplus fs as an aufs branch fs, then enable
18477+ this option. This option introduces a small overhead at
18478+ copying-up a file on hfsplus.
1facf9fc 18479+
4a4d8108
AM
18480+config AUFS_BDEV_LOOP
18481+ bool
18482+ depends on BLK_DEV_LOOP
18483+ default y
18484+ help
18485+ Automatic configuration for internal use.
18486+ Convert =[ym] into =y.
1308ab2a 18487+
4a4d8108
AM
18488+config AUFS_DEBUG
18489+ bool "Debug aufs"
18490+ help
18491+ Enable this to compile aufs internal debug code.
18492+ It will have a negative impact to the performance.
18493+
18494+config AUFS_MAGIC_SYSRQ
18495+ bool
18496+ depends on AUFS_DEBUG && MAGIC_SYSRQ
18497+ default y
18498+ help
18499+ Automatic configuration for internal use.
18500+ When aufs supports Magic SysRq, enabled automatically.
18501+endif
7f207e10
AM
18502diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
18503--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 18504+++ linux/fs/aufs/loop.c 2012-02-13 21:54:56.969771692 +0100
87a755f4 18505@@ -0,0 +1,133 @@
1facf9fc 18506+/*
f6c5ef8b 18507+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 18508+ *
18509+ * This program, aufs is free software; you can redistribute it and/or modify
18510+ * it under the terms of the GNU General Public License as published by
18511+ * the Free Software Foundation; either version 2 of the License, or
18512+ * (at your option) any later version.
dece6358
AM
18513+ *
18514+ * This program is distributed in the hope that it will be useful,
18515+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18516+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18517+ * GNU General Public License for more details.
18518+ *
18519+ * You should have received a copy of the GNU General Public License
18520+ * along with this program; if not, write to the Free Software
18521+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18522+ */
18523+
18524+/*
18525+ * support for loopback block device as a branch
18526+ */
18527+
18528+#include <linux/loop.h>
18529+#include "aufs.h"
18530+
18531+/*
18532+ * test if two lower dentries have overlapping branches.
18533+ */
b752ccd1 18534+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
1facf9fc 18535+{
b752ccd1 18536+ struct super_block *h_sb;
1facf9fc 18537+ struct loop_device *l;
18538+
b752ccd1
AM
18539+ h_sb = h_adding->d_sb;
18540+ if (MAJOR(h_sb->s_dev) != LOOP_MAJOR)
1facf9fc 18541+ return 0;
18542+
b752ccd1
AM
18543+ l = h_sb->s_bdev->bd_disk->private_data;
18544+ h_adding = l->lo_backing_file->f_dentry;
18545+ /*
18546+ * h_adding can be local NFS.
18547+ * in this case aufs cannot detect the loop.
18548+ */
18549+ if (unlikely(h_adding->d_sb == sb))
1facf9fc 18550+ return 1;
b752ccd1 18551+ return !!au_test_subdir(h_adding, sb->s_root);
1facf9fc 18552+}
18553+
18554+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
18555+int au_test_loopback_kthread(void)
18556+{
b752ccd1
AM
18557+ int ret;
18558+ struct task_struct *tsk = current;
18559+
18560+ ret = 0;
18561+ if (tsk->flags & PF_KTHREAD) {
18562+ const char c = tsk->comm[4];
18563+ ret = ('0' <= c && c <= '9'
18564+ && !strncmp(tsk->comm, "loop", 4));
18565+ }
1facf9fc 18566+
b752ccd1 18567+ return ret;
1facf9fc 18568+}
87a755f4
AM
18569+
18570+/* ---------------------------------------------------------------------- */
18571+
18572+#define au_warn_loopback_step 16
18573+static int au_warn_loopback_nelem = au_warn_loopback_step;
18574+static unsigned long *au_warn_loopback_array;
18575+
18576+void au_warn_loopback(struct super_block *h_sb)
18577+{
18578+ int i, new_nelem;
18579+ unsigned long *a, magic;
18580+ static DEFINE_SPINLOCK(spin);
18581+
18582+ magic = h_sb->s_magic;
18583+ spin_lock(&spin);
18584+ a = au_warn_loopback_array;
18585+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
18586+ if (a[i] == magic) {
18587+ spin_unlock(&spin);
18588+ return;
18589+ }
18590+
18591+ /* h_sb is new to us, print it */
18592+ if (i < au_warn_loopback_nelem) {
18593+ a[i] = magic;
18594+ goto pr;
18595+ }
18596+
18597+ /* expand the array */
18598+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
18599+ a = au_kzrealloc(au_warn_loopback_array,
18600+ au_warn_loopback_nelem * sizeof(unsigned long),
18601+ new_nelem * sizeof(unsigned long), GFP_ATOMIC);
18602+ if (a) {
18603+ au_warn_loopback_nelem = new_nelem;
18604+ au_warn_loopback_array = a;
18605+ a[i] = magic;
18606+ goto pr;
18607+ }
18608+
18609+ spin_unlock(&spin);
18610+ AuWarn1("realloc failed, ignored\n");
18611+ return;
18612+
18613+pr:
18614+ spin_unlock(&spin);
18615+ pr_warning("you may want to try another patch for loopback file "
18616+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
18617+}
18618+
18619+int au_loopback_init(void)
18620+{
18621+ int err;
18622+ struct super_block *sb __maybe_unused;
18623+
18624+ AuDebugOn(sizeof(sb->s_magic) != sizeof(unsigned long));
18625+
18626+ err = 0;
18627+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
18628+ sizeof(unsigned long), GFP_NOFS);
18629+ if (unlikely(!au_warn_loopback_array))
18630+ err = -ENOMEM;
18631+
18632+ return err;
18633+}
18634+
18635+void au_loopback_fin(void)
18636+{
18637+ kfree(au_warn_loopback_array);
18638+}
7f207e10
AM
18639diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
18640--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 18641+++ linux/fs/aufs/loop.h 2012-02-13 21:54:56.969771692 +0100
87a755f4 18642@@ -0,0 +1,50 @@
1facf9fc 18643+/*
f6c5ef8b 18644+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 18645+ *
18646+ * This program, aufs is free software; you can redistribute it and/or modify
18647+ * it under the terms of the GNU General Public License as published by
18648+ * the Free Software Foundation; either version 2 of the License, or
18649+ * (at your option) any later version.
dece6358
AM
18650+ *
18651+ * This program is distributed in the hope that it will be useful,
18652+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18653+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18654+ * GNU General Public License for more details.
18655+ *
18656+ * You should have received a copy of the GNU General Public License
18657+ * along with this program; if not, write to the Free Software
18658+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18659+ */
18660+
18661+/*
18662+ * support for loopback mount as a branch
18663+ */
18664+
18665+#ifndef __AUFS_LOOP_H__
18666+#define __AUFS_LOOP_H__
18667+
18668+#ifdef __KERNEL__
18669+
dece6358
AM
18670+struct dentry;
18671+struct super_block;
1facf9fc 18672+
18673+#ifdef CONFIG_AUFS_BDEV_LOOP
18674+/* loop.c */
b752ccd1 18675+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
1facf9fc 18676+int au_test_loopback_kthread(void);
87a755f4
AM
18677+void au_warn_loopback(struct super_block *h_sb);
18678+
18679+int au_loopback_init(void);
18680+void au_loopback_fin(void);
1facf9fc 18681+#else
4a4d8108 18682+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
b752ccd1 18683+ struct dentry *h_adding)
4a4d8108 18684+AuStubInt0(au_test_loopback_kthread, void)
87a755f4
AM
18685+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
18686+
18687+AuStubInt0(au_loopback_init, void)
18688+AuStubVoid(au_loopback_fin, void)
1facf9fc 18689+#endif /* BLK_DEV_LOOP */
18690+
18691+#endif /* __KERNEL__ */
18692+#endif /* __AUFS_LOOP_H__ */
7f207e10
AM
18693diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
18694--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 18695+++ linux/fs/aufs/magic.mk 2012-02-13 21:54:56.969771692 +0100
4a4d8108 18696@@ -0,0 +1,54 @@
1facf9fc 18697+
18698+# defined in ${srctree}/fs/fuse/inode.c
18699+# tristate
18700+ifdef CONFIG_FUSE_FS
18701+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
18702+endif
18703+
18704+# defined in ${srctree}/fs/ocfs2/ocfs2_fs.h
18705+# tristate
18706+ifdef CONFIG_OCFS2_FS
18707+ccflags-y += -DOCFS2_SUPER_MAGIC=0x7461636f
18708+endif
18709+
18710+# defined in ${srctree}/fs/ocfs2/dlm/userdlm.h
18711+# tristate
18712+ifdef CONFIG_OCFS2_FS_O2CB
18713+ccflags-y += -DDLMFS_MAGIC=0x76a9f425
18714+endif
18715+
1facf9fc 18716+# defined in ${srctree}/fs/cifs/cifsfs.c
18717+# tristate
18718+ifdef CONFIG_CIFS_FS
18719+ccflags-y += -DCIFS_MAGIC_NUMBER=0xFF534D42
18720+endif
18721+
18722+# defined in ${srctree}/fs/xfs/xfs_sb.h
18723+# tristate
18724+ifdef CONFIG_XFS_FS
18725+ccflags-y += -DXFS_SB_MAGIC=0x58465342
18726+endif
18727+
18728+# defined in ${srctree}/fs/configfs/mount.c
18729+# tristate
18730+ifdef CONFIG_CONFIGFS_FS
18731+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
18732+endif
18733+
18734+# defined in ${srctree}/fs/9p/v9fs.h
18735+# tristate
18736+ifdef CONFIG_9P_FS
18737+ccflags-y += -DV9FS_MAGIC=0x01021997
18738+endif
18739+
18740+# defined in ${srctree}/fs/ubifs/ubifs.h
18741+# tristate
18742+ifdef CONFIG_UBIFS_FS
18743+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
18744+endif
4a4d8108
AM
18745+
18746+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
18747+# tristate
18748+ifdef CONFIG_HFSPLUS_FS
18749+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
18750+endif
7f207e10
AM
18751diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
18752--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
18753+++ linux/fs/aufs/Makefile 2012-02-13 21:54:56.966438287 +0100
18754@@ -0,0 +1,42 @@
4a4d8108
AM
18755+
18756+include ${src}/magic.mk
18757+ifeq (${CONFIG_AUFS_FS},m)
18758+include ${src}/conf.mk
18759+endif
18760+-include ${src}/priv_def.mk
18761+
18762+# cf. include/linux/kernel.h
18763+# enable pr_debug
18764+ccflags-y += -DDEBUG
f6c5ef8b
AM
18765+# sparse requires the full pathname
18766+ifdef M
18767+ccflags-y += -include ${M}/../../include/linux/aufs_type.h
18768+else
18769+ccflags-y += -include ${srctree}/include/linux/aufs_type.h
18770+endif
4a4d8108
AM
18771+
18772+obj-$(CONFIG_AUFS_FS) += aufs.o
18773+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
18774+ wkq.o vfsub.o dcsub.o \
e49829fe 18775+ cpup.o whout.o wbr_policy.o \
4a4d8108
AM
18776+ dinfo.o dentry.o \
18777+ dynop.o \
18778+ finfo.o file.o f_op.o \
18779+ dir.o vdir.o \
18780+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
18781+ ioctl.o
18782+
18783+# all are boolean
e49829fe 18784+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
4a4d8108
AM
18785+aufs-$(CONFIG_SYSFS) += sysfs.o
18786+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
18787+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
18788+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
18789+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
4a4d8108
AM
18790+aufs-$(CONFIG_AUFS_EXPORT) += export.o
18791+aufs-$(CONFIG_AUFS_POLL) += poll.o
18792+aufs-$(CONFIG_AUFS_RDU) += rdu.o
18793+aufs-$(CONFIG_AUFS_SP_IATTR) += f_op_sp.o
18794+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
18795+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
18796+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
7f207e10
AM
18797diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
18798--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 18799+++ linux/fs/aufs/module.c 2012-02-13 21:54:56.969771692 +0100
9dbd164d 18800@@ -0,0 +1,195 @@
1facf9fc 18801+/*
f6c5ef8b 18802+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 18803+ *
18804+ * This program, aufs is free software; you can redistribute it and/or modify
18805+ * it under the terms of the GNU General Public License as published by
18806+ * the Free Software Foundation; either version 2 of the License, or
18807+ * (at your option) any later version.
dece6358
AM
18808+ *
18809+ * This program is distributed in the hope that it will be useful,
18810+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18811+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18812+ * GNU General Public License for more details.
18813+ *
18814+ * You should have received a copy of the GNU General Public License
18815+ * along with this program; if not, write to the Free Software
18816+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18817+ */
18818+
18819+/*
18820+ * module global variables and operations
18821+ */
18822+
18823+#include <linux/module.h>
18824+#include <linux/seq_file.h>
18825+#include "aufs.h"
18826+
18827+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
18828+{
18829+ if (new_sz <= nused)
18830+ return p;
18831+
18832+ p = krealloc(p, new_sz, gfp);
18833+ if (p)
18834+ memset(p + nused, 0, new_sz - nused);
18835+ return p;
18836+}
18837+
18838+/* ---------------------------------------------------------------------- */
18839+
18840+/*
18841+ * aufs caches
18842+ */
18843+struct kmem_cache *au_cachep[AuCache_Last];
18844+static int __init au_cache_init(void)
18845+{
4a4d8108 18846+ au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
1facf9fc 18847+ if (au_cachep[AuCache_DINFO])
027c5e7a 18848+ /* SLAB_DESTROY_BY_RCU */
4a4d8108
AM
18849+ au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
18850+ au_icntnr_init_once);
1facf9fc 18851+ if (au_cachep[AuCache_ICNTNR])
4a4d8108
AM
18852+ au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo,
18853+ au_fi_init_once);
1facf9fc 18854+ if (au_cachep[AuCache_FINFO])
18855+ au_cachep[AuCache_VDIR] = AuCache(au_vdir);
18856+ if (au_cachep[AuCache_VDIR])
18857+ au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
18858+ if (au_cachep[AuCache_DEHSTR])
18859+ return 0;
18860+
18861+ return -ENOMEM;
18862+}
18863+
18864+static void au_cache_fin(void)
18865+{
18866+ int i;
4a4d8108
AM
18867+
18868+ /* including AuCache_HNOTIFY */
1facf9fc 18869+ for (i = 0; i < AuCache_Last; i++)
18870+ if (au_cachep[i]) {
18871+ kmem_cache_destroy(au_cachep[i]);
18872+ au_cachep[i] = NULL;
18873+ }
18874+}
18875+
18876+/* ---------------------------------------------------------------------- */
18877+
18878+int au_dir_roflags;
18879+
e49829fe 18880+#ifdef CONFIG_AUFS_SBILIST
1e00d052
AM
18881+/*
18882+ * iterate_supers_type() doesn't protect us from
18883+ * remounting (branch management)
18884+ */
e49829fe
JR
18885+struct au_splhead au_sbilist;
18886+#endif
18887+
9dbd164d
AM
18888+struct lock_class_key au_lc_key[AuLcKey_Last];
18889+
1facf9fc 18890+/*
18891+ * functions for module interface.
18892+ */
18893+MODULE_LICENSE("GPL");
18894+/* MODULE_LICENSE("GPL v2"); */
dece6358 18895+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 18896+MODULE_DESCRIPTION(AUFS_NAME
18897+ " -- Advanced multi layered unification filesystem");
18898+MODULE_VERSION(AUFS_VERSION);
18899+
1facf9fc 18900+/* this module parameter has no meaning when SYSFS is disabled */
18901+int sysaufs_brs = 1;
18902+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
18903+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
18904+
18905+/* ---------------------------------------------------------------------- */
18906+
18907+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
18908+
18909+int au_seq_path(struct seq_file *seq, struct path *path)
18910+{
18911+ return seq_path(seq, path, au_esc_chars);
18912+}
18913+
18914+/* ---------------------------------------------------------------------- */
18915+
18916+static int __init aufs_init(void)
18917+{
18918+ int err, i;
18919+ char *p;
18920+
18921+ p = au_esc_chars;
18922+ for (i = 1; i <= ' '; i++)
18923+ *p++ = i;
18924+ *p++ = '\\';
18925+ *p++ = '\x7f';
18926+ *p = 0;
18927+
18928+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
18929+
e49829fe 18930+ au_sbilist_init();
1facf9fc 18931+ sysaufs_brs_init();
18932+ au_debug_init();
4a4d8108 18933+ au_dy_init();
1facf9fc 18934+ err = sysaufs_init();
18935+ if (unlikely(err))
18936+ goto out;
e49829fe 18937+ err = au_procfs_init();
4f0767ce 18938+ if (unlikely(err))
953406b4 18939+ goto out_sysaufs;
e49829fe
JR
18940+ err = au_wkq_init();
18941+ if (unlikely(err))
18942+ goto out_procfs;
87a755f4 18943+ err = au_loopback_init();
1facf9fc 18944+ if (unlikely(err))
18945+ goto out_wkq;
87a755f4
AM
18946+ err = au_hnotify_init();
18947+ if (unlikely(err))
18948+ goto out_loopback;
1facf9fc 18949+ err = au_sysrq_init();
18950+ if (unlikely(err))
18951+ goto out_hin;
18952+ err = au_cache_init();
18953+ if (unlikely(err))
18954+ goto out_sysrq;
18955+ err = register_filesystem(&aufs_fs_type);
18956+ if (unlikely(err))
18957+ goto out_cache;
4a4d8108
AM
18958+ /* since we define pr_fmt, call printk directly */
18959+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
1facf9fc 18960+ goto out; /* success */
18961+
4f0767ce 18962+out_cache:
1facf9fc 18963+ au_cache_fin();
4f0767ce 18964+out_sysrq:
1facf9fc 18965+ au_sysrq_fin();
4f0767ce 18966+out_hin:
4a4d8108 18967+ au_hnotify_fin();
87a755f4
AM
18968+out_loopback:
18969+ au_loopback_fin();
4f0767ce 18970+out_wkq:
1facf9fc 18971+ au_wkq_fin();
e49829fe
JR
18972+out_procfs:
18973+ au_procfs_fin();
4f0767ce 18974+out_sysaufs:
1facf9fc 18975+ sysaufs_fin();
4a4d8108 18976+ au_dy_fin();
4f0767ce 18977+out:
1facf9fc 18978+ return err;
18979+}
18980+
18981+static void __exit aufs_exit(void)
18982+{
18983+ unregister_filesystem(&aufs_fs_type);
18984+ au_cache_fin();
18985+ au_sysrq_fin();
4a4d8108 18986+ au_hnotify_fin();
87a755f4 18987+ au_loopback_fin();
1facf9fc 18988+ au_wkq_fin();
e49829fe 18989+ au_procfs_fin();
1facf9fc 18990+ sysaufs_fin();
4a4d8108 18991+ au_dy_fin();
1facf9fc 18992+}
18993+
18994+module_init(aufs_init);
18995+module_exit(aufs_exit);
7f207e10
AM
18996diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
18997--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 18998+++ linux/fs/aufs/module.h 2012-02-13 21:54:56.969771692 +0100
9dbd164d 18999@@ -0,0 +1,107 @@
1facf9fc 19000+/*
f6c5ef8b 19001+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 19002+ *
19003+ * This program, aufs is free software; you can redistribute it and/or modify
19004+ * it under the terms of the GNU General Public License as published by
19005+ * the Free Software Foundation; either version 2 of the License, or
19006+ * (at your option) any later version.
dece6358
AM
19007+ *
19008+ * This program is distributed in the hope that it will be useful,
19009+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19010+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19011+ * GNU General Public License for more details.
19012+ *
19013+ * You should have received a copy of the GNU General Public License
19014+ * along with this program; if not, write to the Free Software
19015+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 19016+ */
19017+
19018+/*
19019+ * module initialization and module-global
19020+ */
19021+
19022+#ifndef __AUFS_MODULE_H__
19023+#define __AUFS_MODULE_H__
19024+
19025+#ifdef __KERNEL__
19026+
19027+#include <linux/slab.h>
19028+
dece6358
AM
19029+struct path;
19030+struct seq_file;
19031+
1facf9fc 19032+/* module parameters */
1facf9fc 19033+extern int sysaufs_brs;
19034+
19035+/* ---------------------------------------------------------------------- */
19036+
19037+extern int au_dir_roflags;
19038+
9dbd164d
AM
19039+enum {
19040+ AuLcNonDir_FIINFO,
19041+ AuLcNonDir_DIINFO,
19042+ AuLcNonDir_IIINFO,
19043+
19044+ AuLcDir_FIINFO,
19045+ AuLcDir_DIINFO,
19046+ AuLcDir_IIINFO,
19047+
19048+ AuLcSymlink_DIINFO,
19049+ AuLcSymlink_IIINFO,
19050+
19051+ AuLcKey_Last
19052+};
19053+extern struct lock_class_key au_lc_key[AuLcKey_Last];
19054+
1facf9fc 19055+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
19056+int au_seq_path(struct seq_file *seq, struct path *path);
19057+
e49829fe
JR
19058+#ifdef CONFIG_PROC_FS
19059+/* procfs.c */
19060+int __init au_procfs_init(void);
19061+void au_procfs_fin(void);
19062+#else
19063+AuStubInt0(au_procfs_init, void);
19064+AuStubVoid(au_procfs_fin, void);
19065+#endif
19066+
4f0767ce
JR
19067+/* ---------------------------------------------------------------------- */
19068+
19069+/* kmem cache */
1facf9fc 19070+enum {
19071+ AuCache_DINFO,
19072+ AuCache_ICNTNR,
19073+ AuCache_FINFO,
19074+ AuCache_VDIR,
19075+ AuCache_DEHSTR,
4a4d8108
AM
19076+#ifdef CONFIG_AUFS_HNOTIFY
19077+ AuCache_HNOTIFY,
1facf9fc 19078+#endif
19079+ AuCache_Last
19080+};
19081+
4a4d8108
AM
19082+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
19083+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
19084+#define AuCacheCtor(type, ctor) \
19085+ kmem_cache_create(#type, sizeof(struct type), \
19086+ __alignof__(struct type), AuCacheFlags, ctor)
1facf9fc 19087+
19088+extern struct kmem_cache *au_cachep[];
19089+
19090+#define AuCacheFuncs(name, index) \
4a4d8108 19091+static inline struct au_##name *au_cache_alloc_##name(void) \
1facf9fc 19092+{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
4a4d8108 19093+static inline void au_cache_free_##name(struct au_##name *p) \
1facf9fc 19094+{ kmem_cache_free(au_cachep[AuCache_##index], p); }
19095+
19096+AuCacheFuncs(dinfo, DINFO);
19097+AuCacheFuncs(icntnr, ICNTNR);
19098+AuCacheFuncs(finfo, FINFO);
19099+AuCacheFuncs(vdir, VDIR);
4a4d8108
AM
19100+AuCacheFuncs(vdir_dehstr, DEHSTR);
19101+#ifdef CONFIG_AUFS_HNOTIFY
19102+AuCacheFuncs(hnotify, HNOTIFY);
19103+#endif
1facf9fc 19104+
4a4d8108
AM
19105+#endif /* __KERNEL__ */
19106+#endif /* __AUFS_MODULE_H__ */
7f207e10
AM
19107diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
19108--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
19109+++ linux/fs/aufs/opts.c 2012-02-13 21:54:56.973105100 +0100
19110@@ -0,0 +1,1677 @@
1facf9fc 19111+/*
f6c5ef8b 19112+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 19113+ *
19114+ * This program, aufs is free software; you can redistribute it and/or modify
19115+ * it under the terms of the GNU General Public License as published by
19116+ * the Free Software Foundation; either version 2 of the License, or
19117+ * (at your option) any later version.
dece6358
AM
19118+ *
19119+ * This program is distributed in the hope that it will be useful,
19120+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19121+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19122+ * GNU General Public License for more details.
19123+ *
19124+ * You should have received a copy of the GNU General Public License
19125+ * along with this program; if not, write to the Free Software
19126+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 19127+ */
19128+
19129+/*
19130+ * mount options/flags
19131+ */
19132+
dece6358 19133+#include <linux/namei.h>
1facf9fc 19134+#include <linux/types.h> /* a distribution requires */
19135+#include <linux/parser.h>
19136+#include "aufs.h"
19137+
19138+/* ---------------------------------------------------------------------- */
19139+
19140+enum {
19141+ Opt_br,
19142+ Opt_add, Opt_del, Opt_mod, Opt_reorder, Opt_append, Opt_prepend,
19143+ Opt_idel, Opt_imod, Opt_ireorder,
19144+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash, Opt_rendir,
dece6358 19145+ Opt_rdblk_def, Opt_rdhash_def,
1facf9fc 19146+ Opt_xino, Opt_zxino, Opt_noxino,
19147+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
19148+ Opt_trunc_xino_path, Opt_itrunc_xino,
19149+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 19150+ Opt_shwh, Opt_noshwh,
1facf9fc 19151+ Opt_plink, Opt_noplink, Opt_list_plink,
19152+ Opt_udba,
4a4d8108 19153+ Opt_dio, Opt_nodio,
1facf9fc 19154+ /* Opt_lock, Opt_unlock, */
19155+ Opt_cmd, Opt_cmd_args,
19156+ Opt_diropq_a, Opt_diropq_w,
19157+ Opt_warn_perm, Opt_nowarn_perm,
19158+ Opt_wbr_copyup, Opt_wbr_create,
19159+ Opt_refrof, Opt_norefrof,
19160+ Opt_verbose, Opt_noverbose,
19161+ Opt_sum, Opt_nosum, Opt_wsum,
19162+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
19163+};
19164+
19165+static match_table_t options = {
19166+ {Opt_br, "br=%s"},
19167+ {Opt_br, "br:%s"},
19168+
19169+ {Opt_add, "add=%d:%s"},
19170+ {Opt_add, "add:%d:%s"},
19171+ {Opt_add, "ins=%d:%s"},
19172+ {Opt_add, "ins:%d:%s"},
19173+ {Opt_append, "append=%s"},
19174+ {Opt_append, "append:%s"},
19175+ {Opt_prepend, "prepend=%s"},
19176+ {Opt_prepend, "prepend:%s"},
19177+
19178+ {Opt_del, "del=%s"},
19179+ {Opt_del, "del:%s"},
19180+ /* {Opt_idel, "idel:%d"}, */
19181+ {Opt_mod, "mod=%s"},
19182+ {Opt_mod, "mod:%s"},
19183+ /* {Opt_imod, "imod:%d:%s"}, */
19184+
19185+ {Opt_dirwh, "dirwh=%d"},
19186+
19187+ {Opt_xino, "xino=%s"},
19188+ {Opt_noxino, "noxino"},
19189+ {Opt_trunc_xino, "trunc_xino"},
19190+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
19191+ {Opt_notrunc_xino, "notrunc_xino"},
19192+ {Opt_trunc_xino_path, "trunc_xino=%s"},
19193+ {Opt_itrunc_xino, "itrunc_xino=%d"},
19194+ /* {Opt_zxino, "zxino=%s"}, */
19195+ {Opt_trunc_xib, "trunc_xib"},
19196+ {Opt_notrunc_xib, "notrunc_xib"},
19197+
e49829fe 19198+#ifdef CONFIG_PROC_FS
1facf9fc 19199+ {Opt_plink, "plink"},
e49829fe
JR
19200+#else
19201+ {Opt_ignore_silent, "plink"},
19202+#endif
19203+
1facf9fc 19204+ {Opt_noplink, "noplink"},
e49829fe 19205+
1facf9fc 19206+#ifdef CONFIG_AUFS_DEBUG
19207+ {Opt_list_plink, "list_plink"},
19208+#endif
19209+
19210+ {Opt_udba, "udba=%s"},
19211+
4a4d8108
AM
19212+ {Opt_dio, "dio"},
19213+ {Opt_nodio, "nodio"},
19214+
1facf9fc 19215+ {Opt_diropq_a, "diropq=always"},
19216+ {Opt_diropq_a, "diropq=a"},
19217+ {Opt_diropq_w, "diropq=whiteouted"},
19218+ {Opt_diropq_w, "diropq=w"},
19219+
19220+ {Opt_warn_perm, "warn_perm"},
19221+ {Opt_nowarn_perm, "nowarn_perm"},
19222+
19223+ /* keep them temporary */
19224+ {Opt_ignore_silent, "coo=%s"},
19225+ {Opt_ignore_silent, "nodlgt"},
19226+ {Opt_ignore_silent, "nodirperm1"},
1facf9fc 19227+ {Opt_ignore_silent, "clean_plink"},
19228+
dece6358
AM
19229+#ifdef CONFIG_AUFS_SHWH
19230+ {Opt_shwh, "shwh"},
19231+#endif
19232+ {Opt_noshwh, "noshwh"},
19233+
1facf9fc 19234+ {Opt_rendir, "rendir=%d"},
19235+
19236+ {Opt_refrof, "refrof"},
19237+ {Opt_norefrof, "norefrof"},
19238+
19239+ {Opt_verbose, "verbose"},
19240+ {Opt_verbose, "v"},
19241+ {Opt_noverbose, "noverbose"},
19242+ {Opt_noverbose, "quiet"},
19243+ {Opt_noverbose, "q"},
19244+ {Opt_noverbose, "silent"},
19245+
19246+ {Opt_sum, "sum"},
19247+ {Opt_nosum, "nosum"},
19248+ {Opt_wsum, "wsum"},
19249+
19250+ {Opt_rdcache, "rdcache=%d"},
19251+ {Opt_rdblk, "rdblk=%d"},
dece6358 19252+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 19253+ {Opt_rdhash, "rdhash=%d"},
dece6358 19254+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 19255+
19256+ {Opt_wbr_create, "create=%s"},
19257+ {Opt_wbr_create, "create_policy=%s"},
19258+ {Opt_wbr_copyup, "cpup=%s"},
19259+ {Opt_wbr_copyup, "copyup=%s"},
19260+ {Opt_wbr_copyup, "copyup_policy=%s"},
19261+
19262+ /* internal use for the scripts */
19263+ {Opt_ignore_silent, "si=%s"},
19264+
19265+ {Opt_br, "dirs=%s"},
19266+ {Opt_ignore, "debug=%d"},
19267+ {Opt_ignore, "delete=whiteout"},
19268+ {Opt_ignore, "delete=all"},
19269+ {Opt_ignore, "imap=%s"},
19270+
1308ab2a 19271+ /* temporary workaround, due to old mount(8)? */
19272+ {Opt_ignore_silent, "relatime"},
19273+
1facf9fc 19274+ {Opt_err, NULL}
19275+};
19276+
19277+/* ---------------------------------------------------------------------- */
19278+
19279+static const char *au_parser_pattern(int val, struct match_token *token)
19280+{
19281+ while (token->pattern) {
19282+ if (token->token == val)
19283+ return token->pattern;
19284+ token++;
19285+ }
19286+ BUG();
19287+ return "??";
19288+}
19289+
19290+/* ---------------------------------------------------------------------- */
19291+
1e00d052 19292+static match_table_t brperm = {
1facf9fc 19293+ {AuBrPerm_RO, AUFS_BRPERM_RO},
19294+ {AuBrPerm_RR, AUFS_BRPERM_RR},
19295+ {AuBrPerm_RW, AUFS_BRPERM_RW},
1e00d052
AM
19296+ {0, NULL}
19297+};
1facf9fc 19298+
1e00d052
AM
19299+static match_table_t brrattr = {
19300+ {AuBrRAttr_WH, AUFS_BRRATTR_WH},
19301+ {0, NULL}
19302+};
1facf9fc 19303+
1e00d052
AM
19304+static match_table_t brwattr = {
19305+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
19306+ {0, NULL}
1facf9fc 19307+};
19308+
1e00d052
AM
19309+#define AuBrStr_LONGEST AUFS_BRPERM_RW "+" AUFS_BRWATTR_NLWH
19310+
19311+static int br_attr_val(char *str, match_table_t table, substring_t args[])
19312+{
19313+ int attr, v;
19314+ char *p;
19315+
19316+ attr = 0;
19317+ do {
19318+ p = strchr(str, '+');
19319+ if (p)
19320+ *p = 0;
19321+ v = match_token(str, table, args);
19322+ if (v)
19323+ attr |= v;
19324+ else {
19325+ if (p)
19326+ *p = '+';
19327+ pr_warning("ignored branch attribute %s\n", str);
19328+ break;
19329+ }
19330+ if (p)
19331+ str = p + 1;
19332+ } while (p);
19333+
19334+ return attr;
19335+}
19336+
4a4d8108 19337+static int noinline_for_stack br_perm_val(char *perm)
1facf9fc 19338+{
19339+ int val;
1e00d052 19340+ char *p;
1facf9fc 19341+ substring_t args[MAX_OPT_ARGS];
19342+
1e00d052
AM
19343+ p = strchr(perm, '+');
19344+ if (p)
19345+ *p = 0;
19346+ val = match_token(perm, brperm, args);
19347+ if (!val) {
19348+ if (p)
19349+ *p = '+';
19350+ pr_warning("ignored branch permission %s\n", perm);
19351+ val = AuBrPerm_RO;
19352+ goto out;
19353+ }
19354+ if (!p)
19355+ goto out;
19356+
19357+ switch (val) {
19358+ case AuBrPerm_RO:
19359+ case AuBrPerm_RR:
19360+ val |= br_attr_val(p + 1, brrattr, args);
19361+ break;
19362+ case AuBrPerm_RW:
19363+ val |= br_attr_val(p + 1, brwattr, args);
19364+ break;
19365+ }
19366+
19367+out:
1facf9fc 19368+ return val;
19369+}
19370+
1e00d052
AM
19371+/* Caller should free the return value */
19372+char *au_optstr_br_perm(int brperm)
1facf9fc 19373+{
1e00d052
AM
19374+ char *p, a[sizeof(AuBrStr_LONGEST)];
19375+ int sz;
19376+
19377+#define SetPerm(str) do { \
19378+ sz = sizeof(str); \
19379+ memcpy(a, str, sz); \
19380+ p = a + sz - 1; \
19381+ } while (0)
19382+
19383+#define AppendAttr(flag, str) do { \
19384+ if (brperm & flag) { \
19385+ sz = sizeof(str); \
19386+ *p++ = '+'; \
19387+ memcpy(p, str, sz); \
19388+ p += sz - 1; \
19389+ } \
19390+ } while (0)
19391+
19392+ switch (brperm & AuBrPerm_Mask) {
19393+ case AuBrPerm_RO:
19394+ SetPerm(AUFS_BRPERM_RO);
19395+ break;
19396+ case AuBrPerm_RR:
19397+ SetPerm(AUFS_BRPERM_RR);
19398+ break;
19399+ case AuBrPerm_RW:
19400+ SetPerm(AUFS_BRPERM_RW);
19401+ break;
19402+ default:
19403+ AuDebugOn(1);
19404+ }
19405+
19406+ AppendAttr(AuBrRAttr_WH, AUFS_BRRATTR_WH);
19407+ AppendAttr(AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH);
19408+
19409+ AuDebugOn(strlen(a) >= sizeof(a));
19410+ return kstrdup(a, GFP_NOFS);
19411+#undef SetPerm
19412+#undef AppendAttr
1facf9fc 19413+}
19414+
19415+/* ---------------------------------------------------------------------- */
19416+
19417+static match_table_t udbalevel = {
19418+ {AuOpt_UDBA_REVAL, "reval"},
19419+ {AuOpt_UDBA_NONE, "none"},
4a4d8108
AM
19420+#ifdef CONFIG_AUFS_HNOTIFY
19421+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
19422+#ifdef CONFIG_AUFS_HFSNOTIFY
19423+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
4a4d8108 19424+#endif
1facf9fc 19425+#endif
19426+ {-1, NULL}
19427+};
19428+
4a4d8108 19429+static int noinline_for_stack udba_val(char *str)
1facf9fc 19430+{
19431+ substring_t args[MAX_OPT_ARGS];
19432+
7f207e10 19433+ return match_token(str, udbalevel, args);
1facf9fc 19434+}
19435+
19436+const char *au_optstr_udba(int udba)
19437+{
19438+ return au_parser_pattern(udba, (void *)udbalevel);
19439+}
19440+
19441+/* ---------------------------------------------------------------------- */
19442+
19443+static match_table_t au_wbr_create_policy = {
19444+ {AuWbrCreate_TDP, "tdp"},
19445+ {AuWbrCreate_TDP, "top-down-parent"},
19446+ {AuWbrCreate_RR, "rr"},
19447+ {AuWbrCreate_RR, "round-robin"},
19448+ {AuWbrCreate_MFS, "mfs"},
19449+ {AuWbrCreate_MFS, "most-free-space"},
19450+ {AuWbrCreate_MFSV, "mfs:%d"},
19451+ {AuWbrCreate_MFSV, "most-free-space:%d"},
19452+
19453+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
19454+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
19455+ {AuWbrCreate_PMFS, "pmfs"},
19456+ {AuWbrCreate_PMFSV, "pmfs:%d"},
19457+
19458+ {-1, NULL}
19459+};
19460+
dece6358
AM
19461+/*
19462+ * cf. linux/lib/parser.c and cmdline.c
19463+ * gave up calling memparse() since it uses simple_strtoull() instead of
9dbd164d 19464+ * kstrto...().
dece6358 19465+ */
4a4d8108
AM
19466+static int noinline_for_stack
19467+au_match_ull(substring_t *s, unsigned long long *result)
1facf9fc 19468+{
19469+ int err;
19470+ unsigned int len;
19471+ char a[32];
19472+
19473+ err = -ERANGE;
19474+ len = s->to - s->from;
19475+ if (len + 1 <= sizeof(a)) {
19476+ memcpy(a, s->from, len);
19477+ a[len] = '\0';
9dbd164d 19478+ err = kstrtoull(a, 0, result);
1facf9fc 19479+ }
19480+ return err;
19481+}
19482+
19483+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
19484+ struct au_opt_wbr_create *create)
19485+{
19486+ int err;
19487+ unsigned long long ull;
19488+
19489+ err = 0;
19490+ if (!au_match_ull(arg, &ull))
19491+ create->mfsrr_watermark = ull;
19492+ else {
4a4d8108 19493+ pr_err("bad integer in %s\n", str);
1facf9fc 19494+ err = -EINVAL;
19495+ }
19496+
19497+ return err;
19498+}
19499+
19500+static int au_wbr_mfs_sec(substring_t *arg, char *str,
19501+ struct au_opt_wbr_create *create)
19502+{
19503+ int n, err;
19504+
19505+ err = 0;
027c5e7a 19506+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
1facf9fc 19507+ create->mfs_second = n;
19508+ else {
4a4d8108 19509+ pr_err("bad integer in %s\n", str);
1facf9fc 19510+ err = -EINVAL;
19511+ }
19512+
19513+ return err;
19514+}
19515+
4a4d8108
AM
19516+static int noinline_for_stack
19517+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
1facf9fc 19518+{
19519+ int err, e;
19520+ substring_t args[MAX_OPT_ARGS];
19521+
19522+ err = match_token(str, au_wbr_create_policy, args);
19523+ create->wbr_create = err;
19524+ switch (err) {
19525+ case AuWbrCreate_MFSRRV:
19526+ e = au_wbr_mfs_wmark(&args[0], str, create);
19527+ if (!e)
19528+ e = au_wbr_mfs_sec(&args[1], str, create);
19529+ if (unlikely(e))
19530+ err = e;
19531+ break;
19532+ case AuWbrCreate_MFSRR:
19533+ e = au_wbr_mfs_wmark(&args[0], str, create);
19534+ if (unlikely(e)) {
19535+ err = e;
19536+ break;
19537+ }
19538+ /*FALLTHROUGH*/
19539+ case AuWbrCreate_MFS:
19540+ case AuWbrCreate_PMFS:
027c5e7a 19541+ create->mfs_second = AUFS_MFS_DEF_SEC;
1facf9fc 19542+ break;
19543+ case AuWbrCreate_MFSV:
19544+ case AuWbrCreate_PMFSV:
19545+ e = au_wbr_mfs_sec(&args[0], str, create);
19546+ if (unlikely(e))
19547+ err = e;
19548+ break;
19549+ }
19550+
19551+ return err;
19552+}
19553+
19554+const char *au_optstr_wbr_create(int wbr_create)
19555+{
19556+ return au_parser_pattern(wbr_create, (void *)au_wbr_create_policy);
19557+}
19558+
19559+static match_table_t au_wbr_copyup_policy = {
19560+ {AuWbrCopyup_TDP, "tdp"},
19561+ {AuWbrCopyup_TDP, "top-down-parent"},
19562+ {AuWbrCopyup_BUP, "bup"},
19563+ {AuWbrCopyup_BUP, "bottom-up-parent"},
19564+ {AuWbrCopyup_BU, "bu"},
19565+ {AuWbrCopyup_BU, "bottom-up"},
19566+ {-1, NULL}
19567+};
19568+
4a4d8108 19569+static int noinline_for_stack au_wbr_copyup_val(char *str)
1facf9fc 19570+{
19571+ substring_t args[MAX_OPT_ARGS];
19572+
19573+ return match_token(str, au_wbr_copyup_policy, args);
19574+}
19575+
19576+const char *au_optstr_wbr_copyup(int wbr_copyup)
19577+{
19578+ return au_parser_pattern(wbr_copyup, (void *)au_wbr_copyup_policy);
19579+}
19580+
19581+/* ---------------------------------------------------------------------- */
19582+
19583+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
19584+
19585+static void dump_opts(struct au_opts *opts)
19586+{
19587+#ifdef CONFIG_AUFS_DEBUG
19588+ /* reduce stack space */
19589+ union {
19590+ struct au_opt_add *add;
19591+ struct au_opt_del *del;
19592+ struct au_opt_mod *mod;
19593+ struct au_opt_xino *xino;
19594+ struct au_opt_xino_itrunc *xino_itrunc;
19595+ struct au_opt_wbr_create *create;
19596+ } u;
19597+ struct au_opt *opt;
19598+
19599+ opt = opts->opt;
19600+ while (opt->type != Opt_tail) {
19601+ switch (opt->type) {
19602+ case Opt_add:
19603+ u.add = &opt->add;
19604+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
19605+ u.add->bindex, u.add->pathname, u.add->perm,
19606+ u.add->path.dentry);
19607+ break;
19608+ case Opt_del:
19609+ case Opt_idel:
19610+ u.del = &opt->del;
19611+ AuDbg("del {%s, %p}\n",
19612+ u.del->pathname, u.del->h_path.dentry);
19613+ break;
19614+ case Opt_mod:
19615+ case Opt_imod:
19616+ u.mod = &opt->mod;
19617+ AuDbg("mod {%s, 0x%x, %p}\n",
19618+ u.mod->path, u.mod->perm, u.mod->h_root);
19619+ break;
19620+ case Opt_append:
19621+ u.add = &opt->add;
19622+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
19623+ u.add->bindex, u.add->pathname, u.add->perm,
19624+ u.add->path.dentry);
19625+ break;
19626+ case Opt_prepend:
19627+ u.add = &opt->add;
19628+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
19629+ u.add->bindex, u.add->pathname, u.add->perm,
19630+ u.add->path.dentry);
19631+ break;
19632+ case Opt_dirwh:
19633+ AuDbg("dirwh %d\n", opt->dirwh);
19634+ break;
19635+ case Opt_rdcache:
19636+ AuDbg("rdcache %d\n", opt->rdcache);
19637+ break;
19638+ case Opt_rdblk:
19639+ AuDbg("rdblk %u\n", opt->rdblk);
19640+ break;
dece6358
AM
19641+ case Opt_rdblk_def:
19642+ AuDbg("rdblk_def\n");
19643+ break;
1facf9fc 19644+ case Opt_rdhash:
19645+ AuDbg("rdhash %u\n", opt->rdhash);
19646+ break;
dece6358
AM
19647+ case Opt_rdhash_def:
19648+ AuDbg("rdhash_def\n");
19649+ break;
1facf9fc 19650+ case Opt_xino:
19651+ u.xino = &opt->xino;
19652+ AuDbg("xino {%s %.*s}\n",
19653+ u.xino->path,
19654+ AuDLNPair(u.xino->file->f_dentry));
19655+ break;
19656+ case Opt_trunc_xino:
19657+ AuLabel(trunc_xino);
19658+ break;
19659+ case Opt_notrunc_xino:
19660+ AuLabel(notrunc_xino);
19661+ break;
19662+ case Opt_trunc_xino_path:
19663+ case Opt_itrunc_xino:
19664+ u.xino_itrunc = &opt->xino_itrunc;
19665+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
19666+ break;
19667+
19668+ case Opt_noxino:
19669+ AuLabel(noxino);
19670+ break;
19671+ case Opt_trunc_xib:
19672+ AuLabel(trunc_xib);
19673+ break;
19674+ case Opt_notrunc_xib:
19675+ AuLabel(notrunc_xib);
19676+ break;
dece6358
AM
19677+ case Opt_shwh:
19678+ AuLabel(shwh);
19679+ break;
19680+ case Opt_noshwh:
19681+ AuLabel(noshwh);
19682+ break;
1facf9fc 19683+ case Opt_plink:
19684+ AuLabel(plink);
19685+ break;
19686+ case Opt_noplink:
19687+ AuLabel(noplink);
19688+ break;
19689+ case Opt_list_plink:
19690+ AuLabel(list_plink);
19691+ break;
19692+ case Opt_udba:
19693+ AuDbg("udba %d, %s\n",
19694+ opt->udba, au_optstr_udba(opt->udba));
19695+ break;
4a4d8108
AM
19696+ case Opt_dio:
19697+ AuLabel(dio);
19698+ break;
19699+ case Opt_nodio:
19700+ AuLabel(nodio);
19701+ break;
1facf9fc 19702+ case Opt_diropq_a:
19703+ AuLabel(diropq_a);
19704+ break;
19705+ case Opt_diropq_w:
19706+ AuLabel(diropq_w);
19707+ break;
19708+ case Opt_warn_perm:
19709+ AuLabel(warn_perm);
19710+ break;
19711+ case Opt_nowarn_perm:
19712+ AuLabel(nowarn_perm);
19713+ break;
19714+ case Opt_refrof:
19715+ AuLabel(refrof);
19716+ break;
19717+ case Opt_norefrof:
19718+ AuLabel(norefrof);
19719+ break;
19720+ case Opt_verbose:
19721+ AuLabel(verbose);
19722+ break;
19723+ case Opt_noverbose:
19724+ AuLabel(noverbose);
19725+ break;
19726+ case Opt_sum:
19727+ AuLabel(sum);
19728+ break;
19729+ case Opt_nosum:
19730+ AuLabel(nosum);
19731+ break;
19732+ case Opt_wsum:
19733+ AuLabel(wsum);
19734+ break;
19735+ case Opt_wbr_create:
19736+ u.create = &opt->wbr_create;
19737+ AuDbg("create %d, %s\n", u.create->wbr_create,
19738+ au_optstr_wbr_create(u.create->wbr_create));
19739+ switch (u.create->wbr_create) {
19740+ case AuWbrCreate_MFSV:
19741+ case AuWbrCreate_PMFSV:
19742+ AuDbg("%d sec\n", u.create->mfs_second);
19743+ break;
19744+ case AuWbrCreate_MFSRR:
19745+ AuDbg("%llu watermark\n",
19746+ u.create->mfsrr_watermark);
19747+ break;
19748+ case AuWbrCreate_MFSRRV:
19749+ AuDbg("%llu watermark, %d sec\n",
19750+ u.create->mfsrr_watermark,
19751+ u.create->mfs_second);
19752+ break;
19753+ }
19754+ break;
19755+ case Opt_wbr_copyup:
19756+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
19757+ au_optstr_wbr_copyup(opt->wbr_copyup));
19758+ break;
19759+ default:
19760+ BUG();
19761+ }
19762+ opt++;
19763+ }
19764+#endif
19765+}
19766+
19767+void au_opts_free(struct au_opts *opts)
19768+{
19769+ struct au_opt *opt;
19770+
19771+ opt = opts->opt;
19772+ while (opt->type != Opt_tail) {
19773+ switch (opt->type) {
19774+ case Opt_add:
19775+ case Opt_append:
19776+ case Opt_prepend:
19777+ path_put(&opt->add.path);
19778+ break;
19779+ case Opt_del:
19780+ case Opt_idel:
19781+ path_put(&opt->del.h_path);
19782+ break;
19783+ case Opt_mod:
19784+ case Opt_imod:
19785+ dput(opt->mod.h_root);
19786+ break;
19787+ case Opt_xino:
19788+ fput(opt->xino.file);
19789+ break;
19790+ }
19791+ opt++;
19792+ }
19793+}
19794+
19795+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
19796+ aufs_bindex_t bindex)
19797+{
19798+ int err;
19799+ struct au_opt_add *add = &opt->add;
19800+ char *p;
19801+
19802+ add->bindex = bindex;
1e00d052 19803+ add->perm = AuBrPerm_RO;
1facf9fc 19804+ add->pathname = opt_str;
19805+ p = strchr(opt_str, '=');
19806+ if (p) {
19807+ *p++ = 0;
19808+ if (*p)
19809+ add->perm = br_perm_val(p);
19810+ }
19811+
19812+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
19813+ if (!err) {
19814+ if (!p) {
19815+ add->perm = AuBrPerm_RO;
19816+ if (au_test_fs_rr(add->path.dentry->d_sb))
19817+ add->perm = AuBrPerm_RR;
19818+ else if (!bindex && !(sb_flags & MS_RDONLY))
19819+ add->perm = AuBrPerm_RW;
19820+ }
19821+ opt->type = Opt_add;
19822+ goto out;
19823+ }
4a4d8108 19824+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
1facf9fc 19825+ err = -EINVAL;
19826+
4f0767ce 19827+out:
1facf9fc 19828+ return err;
19829+}
19830+
19831+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
19832+{
19833+ int err;
19834+
19835+ del->pathname = args[0].from;
19836+ AuDbg("del path %s\n", del->pathname);
19837+
19838+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
19839+ if (unlikely(err))
4a4d8108 19840+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
1facf9fc 19841+
19842+ return err;
19843+}
19844+
19845+#if 0 /* reserved for future use */
19846+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
19847+ struct au_opt_del *del, substring_t args[])
19848+{
19849+ int err;
19850+ struct dentry *root;
19851+
19852+ err = -EINVAL;
19853+ root = sb->s_root;
19854+ aufs_read_lock(root, AuLock_FLUSH);
19855+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 19856+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 19857+ goto out;
19858+ }
19859+
19860+ err = 0;
19861+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
19862+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
19863+
4f0767ce 19864+out:
1facf9fc 19865+ aufs_read_unlock(root, !AuLock_IR);
19866+ return err;
19867+}
19868+#endif
19869+
4a4d8108
AM
19870+static int noinline_for_stack
19871+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
1facf9fc 19872+{
19873+ int err;
19874+ struct path path;
19875+ char *p;
19876+
19877+ err = -EINVAL;
19878+ mod->path = args[0].from;
19879+ p = strchr(mod->path, '=');
19880+ if (unlikely(!p)) {
4a4d8108 19881+ pr_err("no permssion %s\n", args[0].from);
1facf9fc 19882+ goto out;
19883+ }
19884+
19885+ *p++ = 0;
19886+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
19887+ if (unlikely(err)) {
4a4d8108 19888+ pr_err("lookup failed %s (%d)\n", mod->path, err);
1facf9fc 19889+ goto out;
19890+ }
19891+
19892+ mod->perm = br_perm_val(p);
19893+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
19894+ mod->h_root = dget(path.dentry);
19895+ path_put(&path);
19896+
4f0767ce 19897+out:
1facf9fc 19898+ return err;
19899+}
19900+
19901+#if 0 /* reserved for future use */
19902+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
19903+ struct au_opt_mod *mod, substring_t args[])
19904+{
19905+ int err;
19906+ struct dentry *root;
19907+
19908+ err = -EINVAL;
19909+ root = sb->s_root;
19910+ aufs_read_lock(root, AuLock_FLUSH);
19911+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 19912+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 19913+ goto out;
19914+ }
19915+
19916+ err = 0;
19917+ mod->perm = br_perm_val(args[1].from);
19918+ AuDbg("mod path %s, perm 0x%x, %s\n",
19919+ mod->path, mod->perm, args[1].from);
19920+ mod->h_root = dget(au_h_dptr(root, bindex));
19921+
4f0767ce 19922+out:
1facf9fc 19923+ aufs_read_unlock(root, !AuLock_IR);
19924+ return err;
19925+}
19926+#endif
19927+
19928+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
19929+ substring_t args[])
19930+{
19931+ int err;
19932+ struct file *file;
19933+
19934+ file = au_xino_create(sb, args[0].from, /*silent*/0);
19935+ err = PTR_ERR(file);
19936+ if (IS_ERR(file))
19937+ goto out;
19938+
19939+ err = -EINVAL;
19940+ if (unlikely(file->f_dentry->d_sb == sb)) {
19941+ fput(file);
4a4d8108 19942+ pr_err("%s must be outside\n", args[0].from);
1facf9fc 19943+ goto out;
19944+ }
19945+
19946+ err = 0;
19947+ xino->file = file;
19948+ xino->path = args[0].from;
19949+
4f0767ce 19950+out:
1facf9fc 19951+ return err;
19952+}
19953+
4a4d8108
AM
19954+static int noinline_for_stack
19955+au_opts_parse_xino_itrunc_path(struct super_block *sb,
19956+ struct au_opt_xino_itrunc *xino_itrunc,
19957+ substring_t args[])
1facf9fc 19958+{
19959+ int err;
19960+ aufs_bindex_t bend, bindex;
19961+ struct path path;
19962+ struct dentry *root;
19963+
19964+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
19965+ if (unlikely(err)) {
4a4d8108 19966+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
1facf9fc 19967+ goto out;
19968+ }
19969+
19970+ xino_itrunc->bindex = -1;
19971+ root = sb->s_root;
19972+ aufs_read_lock(root, AuLock_FLUSH);
19973+ bend = au_sbend(sb);
19974+ for (bindex = 0; bindex <= bend; bindex++) {
19975+ if (au_h_dptr(root, bindex) == path.dentry) {
19976+ xino_itrunc->bindex = bindex;
19977+ break;
19978+ }
19979+ }
19980+ aufs_read_unlock(root, !AuLock_IR);
19981+ path_put(&path);
19982+
19983+ if (unlikely(xino_itrunc->bindex < 0)) {
4a4d8108 19984+ pr_err("no such branch %s\n", args[0].from);
1facf9fc 19985+ err = -EINVAL;
19986+ }
19987+
4f0767ce 19988+out:
1facf9fc 19989+ return err;
19990+}
19991+
19992+/* called without aufs lock */
19993+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
19994+{
19995+ int err, n, token;
19996+ aufs_bindex_t bindex;
19997+ unsigned char skipped;
19998+ struct dentry *root;
19999+ struct au_opt *opt, *opt_tail;
20000+ char *opt_str;
20001+ /* reduce the stack space */
20002+ union {
20003+ struct au_opt_xino_itrunc *xino_itrunc;
20004+ struct au_opt_wbr_create *create;
20005+ } u;
20006+ struct {
20007+ substring_t args[MAX_OPT_ARGS];
20008+ } *a;
20009+
20010+ err = -ENOMEM;
20011+ a = kmalloc(sizeof(*a), GFP_NOFS);
20012+ if (unlikely(!a))
20013+ goto out;
20014+
20015+ root = sb->s_root;
20016+ err = 0;
20017+ bindex = 0;
20018+ opt = opts->opt;
20019+ opt_tail = opt + opts->max_opt - 1;
20020+ opt->type = Opt_tail;
20021+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
20022+ err = -EINVAL;
20023+ skipped = 0;
20024+ token = match_token(opt_str, options, a->args);
20025+ switch (token) {
20026+ case Opt_br:
20027+ err = 0;
20028+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
20029+ && *opt_str) {
20030+ err = opt_add(opt, opt_str, opts->sb_flags,
20031+ bindex++);
20032+ if (unlikely(!err && ++opt > opt_tail)) {
20033+ err = -E2BIG;
20034+ break;
20035+ }
20036+ opt->type = Opt_tail;
20037+ skipped = 1;
20038+ }
20039+ break;
20040+ case Opt_add:
20041+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 20042+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20043+ break;
20044+ }
20045+ bindex = n;
20046+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
20047+ bindex);
20048+ if (!err)
20049+ opt->type = token;
20050+ break;
20051+ case Opt_append:
20052+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
20053+ /*dummy bindex*/1);
20054+ if (!err)
20055+ opt->type = token;
20056+ break;
20057+ case Opt_prepend:
20058+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
20059+ /*bindex*/0);
20060+ if (!err)
20061+ opt->type = token;
20062+ break;
20063+ case Opt_del:
20064+ err = au_opts_parse_del(&opt->del, a->args);
20065+ if (!err)
20066+ opt->type = token;
20067+ break;
20068+#if 0 /* reserved for future use */
20069+ case Opt_idel:
20070+ del->pathname = "(indexed)";
20071+ if (unlikely(match_int(&args[0], &n))) {
4a4d8108 20072+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20073+ break;
20074+ }
20075+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
20076+ if (!err)
20077+ opt->type = token;
20078+ break;
20079+#endif
20080+ case Opt_mod:
20081+ err = au_opts_parse_mod(&opt->mod, a->args);
20082+ if (!err)
20083+ opt->type = token;
20084+ break;
20085+#ifdef IMOD /* reserved for future use */
20086+ case Opt_imod:
20087+ u.mod->path = "(indexed)";
20088+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 20089+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20090+ break;
20091+ }
20092+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
20093+ if (!err)
20094+ opt->type = token;
20095+ break;
20096+#endif
20097+ case Opt_xino:
20098+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
20099+ if (!err)
20100+ opt->type = token;
20101+ break;
20102+
20103+ case Opt_trunc_xino_path:
20104+ err = au_opts_parse_xino_itrunc_path
20105+ (sb, &opt->xino_itrunc, a->args);
20106+ if (!err)
20107+ opt->type = token;
20108+ break;
20109+
20110+ case Opt_itrunc_xino:
20111+ u.xino_itrunc = &opt->xino_itrunc;
20112+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 20113+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20114+ break;
20115+ }
20116+ u.xino_itrunc->bindex = n;
20117+ aufs_read_lock(root, AuLock_FLUSH);
20118+ if (n < 0 || au_sbend(sb) < n) {
4a4d8108 20119+ pr_err("out of bounds, %d\n", n);
1facf9fc 20120+ aufs_read_unlock(root, !AuLock_IR);
20121+ break;
20122+ }
20123+ aufs_read_unlock(root, !AuLock_IR);
20124+ err = 0;
20125+ opt->type = token;
20126+ break;
20127+
20128+ case Opt_dirwh:
20129+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
20130+ break;
20131+ err = 0;
20132+ opt->type = token;
20133+ break;
20134+
20135+ case Opt_rdcache:
027c5e7a
AM
20136+ if (unlikely(match_int(&a->args[0], &n))) {
20137+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20138+ break;
027c5e7a
AM
20139+ }
20140+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
20141+ pr_err("rdcache must be smaller than %d\n",
20142+ AUFS_RDCACHE_MAX);
20143+ break;
20144+ }
20145+ opt->rdcache = n;
1facf9fc 20146+ err = 0;
20147+ opt->type = token;
20148+ break;
20149+ case Opt_rdblk:
20150+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 20151+ || n < 0
1facf9fc 20152+ || n > KMALLOC_MAX_SIZE)) {
4a4d8108 20153+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20154+ break;
20155+ }
1308ab2a 20156+ if (unlikely(n && n < NAME_MAX)) {
4a4d8108
AM
20157+ pr_err("rdblk must be larger than %d\n",
20158+ NAME_MAX);
1facf9fc 20159+ break;
20160+ }
20161+ opt->rdblk = n;
20162+ err = 0;
20163+ opt->type = token;
20164+ break;
20165+ case Opt_rdhash:
20166+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 20167+ || n < 0
1facf9fc 20168+ || n * sizeof(struct hlist_head)
20169+ > KMALLOC_MAX_SIZE)) {
4a4d8108 20170+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20171+ break;
20172+ }
20173+ opt->rdhash = n;
20174+ err = 0;
20175+ opt->type = token;
20176+ break;
20177+
20178+ case Opt_trunc_xino:
20179+ case Opt_notrunc_xino:
20180+ case Opt_noxino:
20181+ case Opt_trunc_xib:
20182+ case Opt_notrunc_xib:
dece6358
AM
20183+ case Opt_shwh:
20184+ case Opt_noshwh:
1facf9fc 20185+ case Opt_plink:
20186+ case Opt_noplink:
20187+ case Opt_list_plink:
4a4d8108
AM
20188+ case Opt_dio:
20189+ case Opt_nodio:
1facf9fc 20190+ case Opt_diropq_a:
20191+ case Opt_diropq_w:
20192+ case Opt_warn_perm:
20193+ case Opt_nowarn_perm:
20194+ case Opt_refrof:
20195+ case Opt_norefrof:
20196+ case Opt_verbose:
20197+ case Opt_noverbose:
20198+ case Opt_sum:
20199+ case Opt_nosum:
20200+ case Opt_wsum:
dece6358
AM
20201+ case Opt_rdblk_def:
20202+ case Opt_rdhash_def:
1facf9fc 20203+ err = 0;
20204+ opt->type = token;
20205+ break;
20206+
20207+ case Opt_udba:
20208+ opt->udba = udba_val(a->args[0].from);
20209+ if (opt->udba >= 0) {
20210+ err = 0;
20211+ opt->type = token;
20212+ } else
4a4d8108 20213+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20214+ break;
20215+
20216+ case Opt_wbr_create:
20217+ u.create = &opt->wbr_create;
20218+ u.create->wbr_create
20219+ = au_wbr_create_val(a->args[0].from, u.create);
20220+ if (u.create->wbr_create >= 0) {
20221+ err = 0;
20222+ opt->type = token;
20223+ } else
4a4d8108 20224+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20225+ break;
20226+ case Opt_wbr_copyup:
20227+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
20228+ if (opt->wbr_copyup >= 0) {
20229+ err = 0;
20230+ opt->type = token;
20231+ } else
4a4d8108 20232+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20233+ break;
20234+
20235+ case Opt_ignore:
4a4d8108 20236+ pr_warning("ignored %s\n", opt_str);
1facf9fc 20237+ /*FALLTHROUGH*/
20238+ case Opt_ignore_silent:
20239+ skipped = 1;
20240+ err = 0;
20241+ break;
20242+ case Opt_err:
4a4d8108 20243+ pr_err("unknown option %s\n", opt_str);
1facf9fc 20244+ break;
20245+ }
20246+
20247+ if (!err && !skipped) {
20248+ if (unlikely(++opt > opt_tail)) {
20249+ err = -E2BIG;
20250+ opt--;
20251+ opt->type = Opt_tail;
20252+ break;
20253+ }
20254+ opt->type = Opt_tail;
20255+ }
20256+ }
20257+
20258+ kfree(a);
20259+ dump_opts(opts);
20260+ if (unlikely(err))
20261+ au_opts_free(opts);
20262+
4f0767ce 20263+out:
1facf9fc 20264+ return err;
20265+}
20266+
20267+static int au_opt_wbr_create(struct super_block *sb,
20268+ struct au_opt_wbr_create *create)
20269+{
20270+ int err;
20271+ struct au_sbinfo *sbinfo;
20272+
dece6358
AM
20273+ SiMustWriteLock(sb);
20274+
1facf9fc 20275+ err = 1; /* handled */
20276+ sbinfo = au_sbi(sb);
20277+ if (sbinfo->si_wbr_create_ops->fin) {
20278+ err = sbinfo->si_wbr_create_ops->fin(sb);
20279+ if (!err)
20280+ err = 1;
20281+ }
20282+
20283+ sbinfo->si_wbr_create = create->wbr_create;
20284+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
20285+ switch (create->wbr_create) {
20286+ case AuWbrCreate_MFSRRV:
20287+ case AuWbrCreate_MFSRR:
20288+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
20289+ /*FALLTHROUGH*/
20290+ case AuWbrCreate_MFS:
20291+ case AuWbrCreate_MFSV:
20292+ case AuWbrCreate_PMFS:
20293+ case AuWbrCreate_PMFSV:
e49829fe
JR
20294+ sbinfo->si_wbr_mfs.mfs_expire
20295+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
1facf9fc 20296+ break;
20297+ }
20298+
20299+ if (sbinfo->si_wbr_create_ops->init)
20300+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
20301+
20302+ return err;
20303+}
20304+
20305+/*
20306+ * returns,
20307+ * plus: processed without an error
20308+ * zero: unprocessed
20309+ */
20310+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
20311+ struct au_opts *opts)
20312+{
20313+ int err;
20314+ struct au_sbinfo *sbinfo;
20315+
dece6358
AM
20316+ SiMustWriteLock(sb);
20317+
1facf9fc 20318+ err = 1; /* handled */
20319+ sbinfo = au_sbi(sb);
20320+ switch (opt->type) {
20321+ case Opt_udba:
20322+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
20323+ sbinfo->si_mntflags |= opt->udba;
20324+ opts->given_udba |= opt->udba;
20325+ break;
20326+
20327+ case Opt_plink:
20328+ au_opt_set(sbinfo->si_mntflags, PLINK);
20329+ break;
20330+ case Opt_noplink:
20331+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
e49829fe 20332+ au_plink_put(sb, /*verbose*/1);
1facf9fc 20333+ au_opt_clr(sbinfo->si_mntflags, PLINK);
20334+ break;
20335+ case Opt_list_plink:
20336+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
20337+ au_plink_list(sb);
20338+ break;
20339+
4a4d8108
AM
20340+ case Opt_dio:
20341+ au_opt_set(sbinfo->si_mntflags, DIO);
20342+ au_fset_opts(opts->flags, REFRESH_DYAOP);
20343+ break;
20344+ case Opt_nodio:
20345+ au_opt_clr(sbinfo->si_mntflags, DIO);
20346+ au_fset_opts(opts->flags, REFRESH_DYAOP);
20347+ break;
20348+
1facf9fc 20349+ case Opt_diropq_a:
20350+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
20351+ break;
20352+ case Opt_diropq_w:
20353+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
20354+ break;
20355+
20356+ case Opt_warn_perm:
20357+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
20358+ break;
20359+ case Opt_nowarn_perm:
20360+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
20361+ break;
20362+
20363+ case Opt_refrof:
20364+ au_opt_set(sbinfo->si_mntflags, REFROF);
20365+ break;
20366+ case Opt_norefrof:
20367+ au_opt_clr(sbinfo->si_mntflags, REFROF);
20368+ break;
20369+
20370+ case Opt_verbose:
20371+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
20372+ break;
20373+ case Opt_noverbose:
20374+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
20375+ break;
20376+
20377+ case Opt_sum:
20378+ au_opt_set(sbinfo->si_mntflags, SUM);
20379+ break;
20380+ case Opt_wsum:
20381+ au_opt_clr(sbinfo->si_mntflags, SUM);
20382+ au_opt_set(sbinfo->si_mntflags, SUM_W);
20383+ case Opt_nosum:
20384+ au_opt_clr(sbinfo->si_mntflags, SUM);
20385+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
20386+ break;
20387+
20388+ case Opt_wbr_create:
20389+ err = au_opt_wbr_create(sb, &opt->wbr_create);
20390+ break;
20391+ case Opt_wbr_copyup:
20392+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
20393+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
20394+ break;
20395+
20396+ case Opt_dirwh:
20397+ sbinfo->si_dirwh = opt->dirwh;
20398+ break;
20399+
20400+ case Opt_rdcache:
e49829fe
JR
20401+ sbinfo->si_rdcache
20402+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
1facf9fc 20403+ break;
20404+ case Opt_rdblk:
20405+ sbinfo->si_rdblk = opt->rdblk;
20406+ break;
dece6358
AM
20407+ case Opt_rdblk_def:
20408+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
20409+ break;
1facf9fc 20410+ case Opt_rdhash:
20411+ sbinfo->si_rdhash = opt->rdhash;
20412+ break;
dece6358
AM
20413+ case Opt_rdhash_def:
20414+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
20415+ break;
20416+
20417+ case Opt_shwh:
20418+ au_opt_set(sbinfo->si_mntflags, SHWH);
20419+ break;
20420+ case Opt_noshwh:
20421+ au_opt_clr(sbinfo->si_mntflags, SHWH);
20422+ break;
1facf9fc 20423+
20424+ case Opt_trunc_xino:
20425+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
20426+ break;
20427+ case Opt_notrunc_xino:
20428+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
20429+ break;
20430+
20431+ case Opt_trunc_xino_path:
20432+ case Opt_itrunc_xino:
20433+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
20434+ if (!err)
20435+ err = 1;
20436+ break;
20437+
20438+ case Opt_trunc_xib:
20439+ au_fset_opts(opts->flags, TRUNC_XIB);
20440+ break;
20441+ case Opt_notrunc_xib:
20442+ au_fclr_opts(opts->flags, TRUNC_XIB);
20443+ break;
20444+
20445+ default:
20446+ err = 0;
20447+ break;
20448+ }
20449+
20450+ return err;
20451+}
20452+
20453+/*
20454+ * returns tri-state.
20455+ * plus: processed without an error
20456+ * zero: unprocessed
20457+ * minus: error
20458+ */
20459+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
20460+ struct au_opts *opts)
20461+{
20462+ int err, do_refresh;
20463+
20464+ err = 0;
20465+ switch (opt->type) {
20466+ case Opt_append:
20467+ opt->add.bindex = au_sbend(sb) + 1;
20468+ if (opt->add.bindex < 0)
20469+ opt->add.bindex = 0;
20470+ goto add;
20471+ case Opt_prepend:
20472+ opt->add.bindex = 0;
20473+ add:
20474+ case Opt_add:
20475+ err = au_br_add(sb, &opt->add,
20476+ au_ftest_opts(opts->flags, REMOUNT));
20477+ if (!err) {
20478+ err = 1;
027c5e7a 20479+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20480+ }
20481+ break;
20482+
20483+ case Opt_del:
20484+ case Opt_idel:
20485+ err = au_br_del(sb, &opt->del,
20486+ au_ftest_opts(opts->flags, REMOUNT));
20487+ if (!err) {
20488+ err = 1;
20489+ au_fset_opts(opts->flags, TRUNC_XIB);
027c5e7a 20490+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20491+ }
20492+ break;
20493+
20494+ case Opt_mod:
20495+ case Opt_imod:
20496+ err = au_br_mod(sb, &opt->mod,
20497+ au_ftest_opts(opts->flags, REMOUNT),
20498+ &do_refresh);
20499+ if (!err) {
20500+ err = 1;
027c5e7a
AM
20501+ if (do_refresh)
20502+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20503+ }
20504+ break;
20505+ }
20506+
20507+ return err;
20508+}
20509+
20510+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
20511+ struct au_opt_xino **opt_xino,
20512+ struct au_opts *opts)
20513+{
20514+ int err;
20515+ aufs_bindex_t bend, bindex;
20516+ struct dentry *root, *parent, *h_root;
20517+
20518+ err = 0;
20519+ switch (opt->type) {
20520+ case Opt_xino:
20521+ err = au_xino_set(sb, &opt->xino,
20522+ !!au_ftest_opts(opts->flags, REMOUNT));
20523+ if (unlikely(err))
20524+ break;
20525+
20526+ *opt_xino = &opt->xino;
20527+ au_xino_brid_set(sb, -1);
20528+
20529+ /* safe d_parent access */
20530+ parent = opt->xino.file->f_dentry->d_parent;
20531+ root = sb->s_root;
20532+ bend = au_sbend(sb);
20533+ for (bindex = 0; bindex <= bend; bindex++) {
20534+ h_root = au_h_dptr(root, bindex);
20535+ if (h_root == parent) {
20536+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
20537+ break;
20538+ }
20539+ }
20540+ break;
20541+
20542+ case Opt_noxino:
20543+ au_xino_clr(sb);
20544+ au_xino_brid_set(sb, -1);
20545+ *opt_xino = (void *)-1;
20546+ break;
20547+ }
20548+
20549+ return err;
20550+}
20551+
20552+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
20553+ unsigned int pending)
20554+{
20555+ int err;
20556+ aufs_bindex_t bindex, bend;
20557+ unsigned char do_plink, skip, do_free;
20558+ struct au_branch *br;
20559+ struct au_wbr *wbr;
20560+ struct dentry *root;
20561+ struct inode *dir, *h_dir;
20562+ struct au_sbinfo *sbinfo;
20563+ struct au_hinode *hdir;
20564+
dece6358
AM
20565+ SiMustAnyLock(sb);
20566+
1facf9fc 20567+ sbinfo = au_sbi(sb);
20568+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
20569+
dece6358
AM
20570+ if (!(sb_flags & MS_RDONLY)) {
20571+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
4a4d8108 20572+ pr_warning("first branch should be rw\n");
dece6358 20573+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
4a4d8108 20574+ pr_warning("shwh should be used with ro\n");
dece6358 20575+ }
1facf9fc 20576+
4a4d8108 20577+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
1facf9fc 20578+ && !au_opt_test(sbinfo->si_mntflags, XINO))
4a4d8108 20579+ pr_warning("udba=*notify requires xino\n");
1facf9fc 20580+
20581+ err = 0;
20582+ root = sb->s_root;
4a4d8108 20583+ dir = root->d_inode;
1facf9fc 20584+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
20585+ bend = au_sbend(sb);
20586+ for (bindex = 0; !err && bindex <= bend; bindex++) {
20587+ skip = 0;
20588+ h_dir = au_h_iptr(dir, bindex);
20589+ br = au_sbr(sb, bindex);
20590+ do_free = 0;
20591+
20592+ wbr = br->br_wbr;
20593+ if (wbr)
20594+ wbr_wh_read_lock(wbr);
20595+
1e00d052 20596+ if (!au_br_writable(br->br_perm)) {
1facf9fc 20597+ do_free = !!wbr;
20598+ skip = (!wbr
20599+ || (!wbr->wbr_whbase
20600+ && !wbr->wbr_plink
20601+ && !wbr->wbr_orph));
1e00d052 20602+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 20603+ /* skip = (!br->br_whbase && !br->br_orph); */
20604+ skip = (!wbr || !wbr->wbr_whbase);
20605+ if (skip && wbr) {
20606+ if (do_plink)
20607+ skip = !!wbr->wbr_plink;
20608+ else
20609+ skip = !wbr->wbr_plink;
20610+ }
1e00d052 20611+ } else {
1facf9fc 20612+ /* skip = (br->br_whbase && br->br_ohph); */
20613+ skip = (wbr && wbr->wbr_whbase);
20614+ if (skip) {
20615+ if (do_plink)
20616+ skip = !!wbr->wbr_plink;
20617+ else
20618+ skip = !wbr->wbr_plink;
20619+ }
1facf9fc 20620+ }
20621+ if (wbr)
20622+ wbr_wh_read_unlock(wbr);
20623+
20624+ if (skip)
20625+ continue;
20626+
20627+ hdir = au_hi(dir, bindex);
4a4d8108 20628+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 20629+ if (wbr)
20630+ wbr_wh_write_lock(wbr);
20631+ err = au_wh_init(au_h_dptr(root, bindex), br, sb);
20632+ if (wbr)
20633+ wbr_wh_write_unlock(wbr);
4a4d8108 20634+ au_hn_imtx_unlock(hdir);
1facf9fc 20635+
20636+ if (!err && do_free) {
20637+ kfree(wbr);
20638+ br->br_wbr = NULL;
20639+ }
20640+ }
20641+
20642+ return err;
20643+}
20644+
20645+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
20646+{
20647+ int err;
20648+ unsigned int tmp;
027c5e7a 20649+ aufs_bindex_t bindex, bend;
1facf9fc 20650+ struct au_opt *opt;
20651+ struct au_opt_xino *opt_xino, xino;
20652+ struct au_sbinfo *sbinfo;
027c5e7a 20653+ struct au_branch *br;
1facf9fc 20654+
dece6358
AM
20655+ SiMustWriteLock(sb);
20656+
1facf9fc 20657+ err = 0;
20658+ opt_xino = NULL;
20659+ opt = opts->opt;
20660+ while (err >= 0 && opt->type != Opt_tail)
20661+ err = au_opt_simple(sb, opt++, opts);
20662+ if (err > 0)
20663+ err = 0;
20664+ else if (unlikely(err < 0))
20665+ goto out;
20666+
20667+ /* disable xino and udba temporary */
20668+ sbinfo = au_sbi(sb);
20669+ tmp = sbinfo->si_mntflags;
20670+ au_opt_clr(sbinfo->si_mntflags, XINO);
20671+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
20672+
20673+ opt = opts->opt;
20674+ while (err >= 0 && opt->type != Opt_tail)
20675+ err = au_opt_br(sb, opt++, opts);
20676+ if (err > 0)
20677+ err = 0;
20678+ else if (unlikely(err < 0))
20679+ goto out;
20680+
20681+ bend = au_sbend(sb);
20682+ if (unlikely(bend < 0)) {
20683+ err = -EINVAL;
4a4d8108 20684+ pr_err("no branches\n");
1facf9fc 20685+ goto out;
20686+ }
20687+
20688+ if (au_opt_test(tmp, XINO))
20689+ au_opt_set(sbinfo->si_mntflags, XINO);
20690+ opt = opts->opt;
20691+ while (!err && opt->type != Opt_tail)
20692+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
20693+ if (unlikely(err))
20694+ goto out;
20695+
20696+ err = au_opts_verify(sb, sb->s_flags, tmp);
20697+ if (unlikely(err))
20698+ goto out;
20699+
20700+ /* restore xino */
20701+ if (au_opt_test(tmp, XINO) && !opt_xino) {
20702+ xino.file = au_xino_def(sb);
20703+ err = PTR_ERR(xino.file);
20704+ if (IS_ERR(xino.file))
20705+ goto out;
20706+
20707+ err = au_xino_set(sb, &xino, /*remount*/0);
20708+ fput(xino.file);
20709+ if (unlikely(err))
20710+ goto out;
20711+ }
20712+
20713+ /* restore udba */
027c5e7a 20714+ tmp &= AuOptMask_UDBA;
1facf9fc 20715+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
027c5e7a
AM
20716+ sbinfo->si_mntflags |= tmp;
20717+ bend = au_sbend(sb);
20718+ for (bindex = 0; bindex <= bend; bindex++) {
20719+ br = au_sbr(sb, bindex);
20720+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
20721+ if (unlikely(err))
20722+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
20723+ bindex, err);
20724+ /* go on even if err */
20725+ }
4a4d8108 20726+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
1facf9fc 20727+ struct inode *dir = sb->s_root->d_inode;
4a4d8108 20728+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
1facf9fc 20729+ }
20730+
4f0767ce 20731+out:
1facf9fc 20732+ return err;
20733+}
20734+
20735+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
20736+{
20737+ int err, rerr;
20738+ struct inode *dir;
20739+ struct au_opt_xino *opt_xino;
20740+ struct au_opt *opt;
20741+ struct au_sbinfo *sbinfo;
20742+
dece6358
AM
20743+ SiMustWriteLock(sb);
20744+
1facf9fc 20745+ dir = sb->s_root->d_inode;
20746+ sbinfo = au_sbi(sb);
20747+ err = 0;
20748+ opt_xino = NULL;
20749+ opt = opts->opt;
20750+ while (err >= 0 && opt->type != Opt_tail) {
20751+ err = au_opt_simple(sb, opt, opts);
20752+ if (!err)
20753+ err = au_opt_br(sb, opt, opts);
20754+ if (!err)
20755+ err = au_opt_xino(sb, opt, &opt_xino, opts);
20756+ opt++;
20757+ }
20758+ if (err > 0)
20759+ err = 0;
20760+ AuTraceErr(err);
20761+ /* go on even err */
20762+
20763+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
20764+ if (unlikely(rerr && !err))
20765+ err = rerr;
20766+
20767+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
20768+ rerr = au_xib_trunc(sb);
20769+ if (unlikely(rerr && !err))
20770+ err = rerr;
20771+ }
20772+
20773+ /* will be handled by the caller */
027c5e7a 20774+ if (!au_ftest_opts(opts->flags, REFRESH)
1facf9fc 20775+ && (opts->given_udba || au_opt_test(sbinfo->si_mntflags, XINO)))
027c5e7a 20776+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20777+
20778+ AuDbg("status 0x%x\n", opts->flags);
20779+ return err;
20780+}
20781+
20782+/* ---------------------------------------------------------------------- */
20783+
20784+unsigned int au_opt_udba(struct super_block *sb)
20785+{
20786+ return au_mntflags(sb) & AuOptMask_UDBA;
20787+}
7f207e10
AM
20788diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
20789--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
20790+++ linux/fs/aufs/opts.h 2012-02-13 21:54:56.973105100 +0100
20791@@ -0,0 +1,209 @@
1facf9fc 20792+/*
f6c5ef8b 20793+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 20794+ *
20795+ * This program, aufs is free software; you can redistribute it and/or modify
20796+ * it under the terms of the GNU General Public License as published by
20797+ * the Free Software Foundation; either version 2 of the License, or
20798+ * (at your option) any later version.
dece6358
AM
20799+ *
20800+ * This program is distributed in the hope that it will be useful,
20801+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20802+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20803+ * GNU General Public License for more details.
20804+ *
20805+ * You should have received a copy of the GNU General Public License
20806+ * along with this program; if not, write to the Free Software
20807+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20808+ */
20809+
20810+/*
20811+ * mount options/flags
20812+ */
20813+
20814+#ifndef __AUFS_OPTS_H__
20815+#define __AUFS_OPTS_H__
20816+
20817+#ifdef __KERNEL__
20818+
dece6358 20819+#include <linux/path.h>
1facf9fc 20820+
dece6358
AM
20821+struct file;
20822+struct super_block;
20823+
1facf9fc 20824+/* ---------------------------------------------------------------------- */
20825+
20826+/* mount flags */
20827+#define AuOpt_XINO 1 /* external inode number bitmap
20828+ and translation table */
20829+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
20830+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
20831+#define AuOpt_UDBA_REVAL (1 << 3)
4a4d8108 20832+#define AuOpt_UDBA_HNOTIFY (1 << 4)
dece6358
AM
20833+#define AuOpt_SHWH (1 << 5) /* show whiteout */
20834+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
20835+#define AuOpt_DIRPERM1 (1 << 7) /* unimplemented */
20836+#define AuOpt_REFROF (1 << 8) /* unimplemented */
20837+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
20838+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
20839+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
20840+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
20841+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
4a4d8108 20842+#define AuOpt_DIO (1 << 14) /* direct io */
1facf9fc 20843+
4a4d8108
AM
20844+#ifndef CONFIG_AUFS_HNOTIFY
20845+#undef AuOpt_UDBA_HNOTIFY
20846+#define AuOpt_UDBA_HNOTIFY 0
1facf9fc 20847+#endif
dece6358
AM
20848+#ifndef CONFIG_AUFS_SHWH
20849+#undef AuOpt_SHWH
20850+#define AuOpt_SHWH 0
20851+#endif
1facf9fc 20852+
20853+#define AuOpt_Def (AuOpt_XINO \
20854+ | AuOpt_UDBA_REVAL \
20855+ | AuOpt_PLINK \
20856+ /* | AuOpt_DIRPERM1 */ \
20857+ | AuOpt_WARN_PERM)
20858+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
20859+ | AuOpt_UDBA_REVAL \
4a4d8108 20860+ | AuOpt_UDBA_HNOTIFY)
1facf9fc 20861+
20862+#define au_opt_test(flags, name) (flags & AuOpt_##name)
20863+#define au_opt_set(flags, name) do { \
20864+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
20865+ ((flags) |= AuOpt_##name); \
20866+} while (0)
20867+#define au_opt_set_udba(flags, name) do { \
20868+ (flags) &= ~AuOptMask_UDBA; \
20869+ ((flags) |= AuOpt_##name); \
20870+} while (0)
7f207e10
AM
20871+#define au_opt_clr(flags, name) do { \
20872+ ((flags) &= ~AuOpt_##name); \
20873+} while (0)
1facf9fc 20874+
e49829fe
JR
20875+static inline unsigned int au_opts_plink(unsigned int mntflags)
20876+{
20877+#ifdef CONFIG_PROC_FS
20878+ return mntflags;
20879+#else
20880+ return mntflags & ~AuOpt_PLINK;
20881+#endif
20882+}
20883+
1facf9fc 20884+/* ---------------------------------------------------------------------- */
20885+
20886+/* policies to select one among multiple writable branches */
20887+enum {
20888+ AuWbrCreate_TDP, /* top down parent */
20889+ AuWbrCreate_RR, /* round robin */
20890+ AuWbrCreate_MFS, /* most free space */
20891+ AuWbrCreate_MFSV, /* mfs with seconds */
20892+ AuWbrCreate_MFSRR, /* mfs then rr */
20893+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
20894+ AuWbrCreate_PMFS, /* parent and mfs */
20895+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
20896+
20897+ AuWbrCreate_Def = AuWbrCreate_TDP
20898+};
20899+
20900+enum {
20901+ AuWbrCopyup_TDP, /* top down parent */
20902+ AuWbrCopyup_BUP, /* bottom up parent */
20903+ AuWbrCopyup_BU, /* bottom up */
20904+
20905+ AuWbrCopyup_Def = AuWbrCopyup_TDP
20906+};
20907+
20908+/* ---------------------------------------------------------------------- */
20909+
20910+struct au_opt_add {
20911+ aufs_bindex_t bindex;
20912+ char *pathname;
20913+ int perm;
20914+ struct path path;
20915+};
20916+
20917+struct au_opt_del {
20918+ char *pathname;
20919+ struct path h_path;
20920+};
20921+
20922+struct au_opt_mod {
20923+ char *path;
20924+ int perm;
20925+ struct dentry *h_root;
20926+};
20927+
20928+struct au_opt_xino {
20929+ char *path;
20930+ struct file *file;
20931+};
20932+
20933+struct au_opt_xino_itrunc {
20934+ aufs_bindex_t bindex;
20935+};
20936+
20937+struct au_opt_wbr_create {
20938+ int wbr_create;
20939+ int mfs_second;
20940+ unsigned long long mfsrr_watermark;
20941+};
20942+
20943+struct au_opt {
20944+ int type;
20945+ union {
20946+ struct au_opt_xino xino;
20947+ struct au_opt_xino_itrunc xino_itrunc;
20948+ struct au_opt_add add;
20949+ struct au_opt_del del;
20950+ struct au_opt_mod mod;
20951+ int dirwh;
20952+ int rdcache;
20953+ unsigned int rdblk;
20954+ unsigned int rdhash;
20955+ int udba;
20956+ struct au_opt_wbr_create wbr_create;
20957+ int wbr_copyup;
20958+ };
20959+};
20960+
20961+/* opts flags */
20962+#define AuOpts_REMOUNT 1
027c5e7a
AM
20963+#define AuOpts_REFRESH (1 << 1)
20964+#define AuOpts_TRUNC_XIB (1 << 2)
20965+#define AuOpts_REFRESH_DYAOP (1 << 3)
1facf9fc 20966+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
7f207e10
AM
20967+#define au_fset_opts(flags, name) \
20968+ do { (flags) |= AuOpts_##name; } while (0)
20969+#define au_fclr_opts(flags, name) \
20970+ do { (flags) &= ~AuOpts_##name; } while (0)
1facf9fc 20971+
20972+struct au_opts {
20973+ struct au_opt *opt;
20974+ int max_opt;
20975+
20976+ unsigned int given_udba;
20977+ unsigned int flags;
20978+ unsigned long sb_flags;
20979+};
20980+
20981+/* ---------------------------------------------------------------------- */
20982+
1e00d052 20983+char *au_optstr_br_perm(int brperm);
1facf9fc 20984+const char *au_optstr_udba(int udba);
20985+const char *au_optstr_wbr_copyup(int wbr_copyup);
20986+const char *au_optstr_wbr_create(int wbr_create);
20987+
20988+void au_opts_free(struct au_opts *opts);
20989+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
20990+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
20991+ unsigned int pending);
20992+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
20993+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
20994+
20995+unsigned int au_opt_udba(struct super_block *sb);
20996+
20997+/* ---------------------------------------------------------------------- */
20998+
20999+#endif /* __KERNEL__ */
21000+#endif /* __AUFS_OPTS_H__ */
7f207e10
AM
21001diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
21002--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 21003+++ linux/fs/aufs/plink.c 2012-02-13 21:54:56.973105100 +0100
027c5e7a 21004@@ -0,0 +1,515 @@
1facf9fc 21005+/*
f6c5ef8b 21006+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 21007+ *
21008+ * This program, aufs is free software; you can redistribute it and/or modify
21009+ * it under the terms of the GNU General Public License as published by
21010+ * the Free Software Foundation; either version 2 of the License, or
21011+ * (at your option) any later version.
dece6358
AM
21012+ *
21013+ * This program is distributed in the hope that it will be useful,
21014+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21015+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21016+ * GNU General Public License for more details.
21017+ *
21018+ * You should have received a copy of the GNU General Public License
21019+ * along with this program; if not, write to the Free Software
21020+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 21021+ */
21022+
21023+/*
21024+ * pseudo-link
21025+ */
21026+
21027+#include "aufs.h"
21028+
21029+/*
e49829fe 21030+ * the pseudo-link maintenance mode.
1facf9fc 21031+ * during a user process maintains the pseudo-links,
21032+ * prohibit adding a new plink and branch manipulation.
e49829fe
JR
21033+ *
21034+ * Flags
21035+ * NOPLM:
21036+ * For entry functions which will handle plink, and i_mutex is already held
21037+ * in VFS.
21038+ * They cannot wait and should return an error at once.
21039+ * Callers has to check the error.
21040+ * NOPLMW:
21041+ * For entry functions which will handle plink, but i_mutex is not held
21042+ * in VFS.
21043+ * They can wait the plink maintenance mode to finish.
21044+ *
21045+ * They behave like F_SETLK and F_SETLKW.
21046+ * If the caller never handle plink, then both flags are unnecessary.
1facf9fc 21047+ */
e49829fe
JR
21048+
21049+int au_plink_maint(struct super_block *sb, int flags)
1facf9fc 21050+{
e49829fe
JR
21051+ int err;
21052+ pid_t pid, ppid;
21053+ struct au_sbinfo *sbi;
dece6358
AM
21054+
21055+ SiMustAnyLock(sb);
21056+
e49829fe
JR
21057+ err = 0;
21058+ if (!au_opt_test(au_mntflags(sb), PLINK))
21059+ goto out;
21060+
21061+ sbi = au_sbi(sb);
21062+ pid = sbi->si_plink_maint_pid;
21063+ if (!pid || pid == current->pid)
21064+ goto out;
21065+
21066+ /* todo: it highly depends upon /sbin/mount.aufs */
21067+ rcu_read_lock();
21068+ ppid = task_pid_vnr(rcu_dereference(current->real_parent));
21069+ rcu_read_unlock();
21070+ if (pid == ppid)
21071+ goto out;
21072+
21073+ if (au_ftest_lock(flags, NOPLMW)) {
027c5e7a
AM
21074+ /* if there is no i_mutex lock in VFS, we don't need to wait */
21075+ /* AuDebugOn(!lockdep_depth(current)); */
e49829fe
JR
21076+ while (sbi->si_plink_maint_pid) {
21077+ si_read_unlock(sb);
21078+ /* gave up wake_up_bit() */
21079+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
21080+
21081+ if (au_ftest_lock(flags, FLUSH))
21082+ au_nwt_flush(&sbi->si_nowait);
21083+ si_noflush_read_lock(sb);
21084+ }
21085+ } else if (au_ftest_lock(flags, NOPLM)) {
21086+ AuDbg("ppid %d, pid %d\n", ppid, pid);
21087+ err = -EAGAIN;
21088+ }
21089+
21090+out:
21091+ return err;
4a4d8108
AM
21092+}
21093+
e49829fe 21094+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
4a4d8108 21095+{
4a4d8108 21096+ spin_lock(&sbinfo->si_plink_maint_lock);
027c5e7a 21097+ sbinfo->si_plink_maint_pid = 0;
4a4d8108 21098+ spin_unlock(&sbinfo->si_plink_maint_lock);
027c5e7a 21099+ wake_up_all(&sbinfo->si_plink_wq);
4a4d8108
AM
21100+}
21101+
e49829fe 21102+int au_plink_maint_enter(struct super_block *sb)
4a4d8108
AM
21103+{
21104+ int err;
4a4d8108
AM
21105+ struct au_sbinfo *sbinfo;
21106+
21107+ err = 0;
4a4d8108
AM
21108+ sbinfo = au_sbi(sb);
21109+ /* make sure i am the only one in this fs */
e49829fe
JR
21110+ si_write_lock(sb, AuLock_FLUSH);
21111+ if (au_opt_test(au_mntflags(sb), PLINK)) {
21112+ spin_lock(&sbinfo->si_plink_maint_lock);
21113+ if (!sbinfo->si_plink_maint_pid)
21114+ sbinfo->si_plink_maint_pid = current->pid;
21115+ else
21116+ err = -EBUSY;
21117+ spin_unlock(&sbinfo->si_plink_maint_lock);
21118+ }
4a4d8108
AM
21119+ si_write_unlock(sb);
21120+
21121+ return err;
1facf9fc 21122+}
21123+
21124+/* ---------------------------------------------------------------------- */
21125+
21126+struct pseudo_link {
4a4d8108
AM
21127+ union {
21128+ struct list_head list;
21129+ struct rcu_head rcu;
21130+ };
1facf9fc 21131+ struct inode *inode;
21132+};
21133+
21134+#ifdef CONFIG_AUFS_DEBUG
21135+void au_plink_list(struct super_block *sb)
21136+{
21137+ struct au_sbinfo *sbinfo;
21138+ struct list_head *plink_list;
21139+ struct pseudo_link *plink;
21140+
dece6358
AM
21141+ SiMustAnyLock(sb);
21142+
1facf9fc 21143+ sbinfo = au_sbi(sb);
21144+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21145+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21146+
21147+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
21148+ rcu_read_lock();
21149+ list_for_each_entry_rcu(plink, plink_list, list)
1facf9fc 21150+ AuDbg("%lu\n", plink->inode->i_ino);
4a4d8108 21151+ rcu_read_unlock();
1facf9fc 21152+}
21153+#endif
21154+
21155+/* is the inode pseudo-linked? */
21156+int au_plink_test(struct inode *inode)
21157+{
21158+ int found;
21159+ struct au_sbinfo *sbinfo;
21160+ struct list_head *plink_list;
21161+ struct pseudo_link *plink;
21162+
21163+ sbinfo = au_sbi(inode->i_sb);
dece6358 21164+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 21165+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
e49829fe 21166+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
1facf9fc 21167+
21168+ found = 0;
21169+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
21170+ rcu_read_lock();
21171+ list_for_each_entry_rcu(plink, plink_list, list)
1facf9fc 21172+ if (plink->inode == inode) {
21173+ found = 1;
21174+ break;
21175+ }
4a4d8108 21176+ rcu_read_unlock();
1facf9fc 21177+ return found;
21178+}
21179+
21180+/* ---------------------------------------------------------------------- */
21181+
21182+/*
21183+ * generate a name for plink.
21184+ * the file will be stored under AUFS_WH_PLINKDIR.
21185+ */
21186+/* 20 is max digits length of ulong 64 */
21187+#define PLINK_NAME_LEN ((20 + 1) * 2)
21188+
21189+static int plink_name(char *name, int len, struct inode *inode,
21190+ aufs_bindex_t bindex)
21191+{
21192+ int rlen;
21193+ struct inode *h_inode;
21194+
21195+ h_inode = au_h_iptr(inode, bindex);
21196+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
21197+ return rlen;
21198+}
21199+
7f207e10
AM
21200+struct au_do_plink_lkup_args {
21201+ struct dentry **errp;
21202+ struct qstr *tgtname;
21203+ struct dentry *h_parent;
21204+ struct au_branch *br;
21205+};
21206+
21207+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
21208+ struct dentry *h_parent,
21209+ struct au_branch *br)
21210+{
21211+ struct dentry *h_dentry;
21212+ struct mutex *h_mtx;
21213+
21214+ h_mtx = &h_parent->d_inode->i_mutex;
21215+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
21216+ h_dentry = au_lkup_one(tgtname, h_parent, br, /*nd*/NULL);
21217+ mutex_unlock(h_mtx);
21218+ return h_dentry;
21219+}
21220+
21221+static void au_call_do_plink_lkup(void *args)
21222+{
21223+ struct au_do_plink_lkup_args *a = args;
21224+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
21225+}
21226+
1facf9fc 21227+/* lookup the plink-ed @inode under the branch at @bindex */
21228+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
21229+{
21230+ struct dentry *h_dentry, *h_parent;
21231+ struct au_branch *br;
21232+ struct inode *h_dir;
7f207e10 21233+ int wkq_err;
1facf9fc 21234+ char a[PLINK_NAME_LEN];
21235+ struct qstr tgtname = {
21236+ .name = a
21237+ };
21238+
e49829fe
JR
21239+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
21240+
1facf9fc 21241+ br = au_sbr(inode->i_sb, bindex);
21242+ h_parent = br->br_wbr->wbr_plink;
21243+ h_dir = h_parent->d_inode;
21244+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
21245+
7f207e10
AM
21246+ if (current_fsuid()) {
21247+ struct au_do_plink_lkup_args args = {
21248+ .errp = &h_dentry,
21249+ .tgtname = &tgtname,
21250+ .h_parent = h_parent,
21251+ .br = br
21252+ };
21253+
21254+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
21255+ if (unlikely(wkq_err))
21256+ h_dentry = ERR_PTR(wkq_err);
21257+ } else
21258+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
21259+
1facf9fc 21260+ return h_dentry;
21261+}
21262+
21263+/* create a pseudo-link */
21264+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
21265+ struct dentry *h_dentry, struct au_branch *br)
21266+{
21267+ int err;
21268+ struct path h_path = {
21269+ .mnt = br->br_mnt
21270+ };
21271+ struct inode *h_dir;
21272+
21273+ h_dir = h_parent->d_inode;
7f207e10 21274+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
4f0767ce 21275+again:
1facf9fc 21276+ h_path.dentry = au_lkup_one(tgt, h_parent, br, /*nd*/NULL);
21277+ err = PTR_ERR(h_path.dentry);
21278+ if (IS_ERR(h_path.dentry))
21279+ goto out;
21280+
21281+ err = 0;
21282+ /* wh.plink dir is not monitored */
7f207e10 21283+ /* todo: is it really safe? */
1facf9fc 21284+ if (h_path.dentry->d_inode
21285+ && h_path.dentry->d_inode != h_dentry->d_inode) {
21286+ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
21287+ dput(h_path.dentry);
21288+ h_path.dentry = NULL;
21289+ if (!err)
21290+ goto again;
21291+ }
21292+ if (!err && !h_path.dentry->d_inode)
21293+ err = vfsub_link(h_dentry, h_dir, &h_path);
21294+ dput(h_path.dentry);
21295+
4f0767ce 21296+out:
7f207e10 21297+ mutex_unlock(&h_dir->i_mutex);
1facf9fc 21298+ return err;
21299+}
21300+
21301+struct do_whplink_args {
21302+ int *errp;
21303+ struct qstr *tgt;
21304+ struct dentry *h_parent;
21305+ struct dentry *h_dentry;
21306+ struct au_branch *br;
21307+};
21308+
21309+static void call_do_whplink(void *args)
21310+{
21311+ struct do_whplink_args *a = args;
21312+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
21313+}
21314+
21315+static int whplink(struct dentry *h_dentry, struct inode *inode,
21316+ aufs_bindex_t bindex, struct au_branch *br)
21317+{
21318+ int err, wkq_err;
21319+ struct au_wbr *wbr;
21320+ struct dentry *h_parent;
21321+ struct inode *h_dir;
21322+ char a[PLINK_NAME_LEN];
21323+ struct qstr tgtname = {
21324+ .name = a
21325+ };
21326+
21327+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
21328+ h_parent = wbr->wbr_plink;
21329+ h_dir = h_parent->d_inode;
21330+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
21331+
21332+ /* always superio. */
b752ccd1 21333+ if (current_fsuid()) {
1facf9fc 21334+ struct do_whplink_args args = {
21335+ .errp = &err,
21336+ .tgt = &tgtname,
21337+ .h_parent = h_parent,
21338+ .h_dentry = h_dentry,
21339+ .br = br
21340+ };
21341+ wkq_err = au_wkq_wait(call_do_whplink, &args);
21342+ if (unlikely(wkq_err))
21343+ err = wkq_err;
21344+ } else
21345+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
1facf9fc 21346+
21347+ return err;
21348+}
21349+
21350+/* free a single plink */
21351+static void do_put_plink(struct pseudo_link *plink, int do_del)
21352+{
1facf9fc 21353+ if (do_del)
21354+ list_del(&plink->list);
4a4d8108
AM
21355+ iput(plink->inode);
21356+ kfree(plink);
21357+}
21358+
21359+static void do_put_plink_rcu(struct rcu_head *rcu)
21360+{
21361+ struct pseudo_link *plink;
21362+
21363+ plink = container_of(rcu, struct pseudo_link, rcu);
21364+ iput(plink->inode);
1facf9fc 21365+ kfree(plink);
21366+}
21367+
21368+/*
21369+ * create a new pseudo-link for @h_dentry on @bindex.
21370+ * the linked inode is held in aufs @inode.
21371+ */
21372+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
21373+ struct dentry *h_dentry)
21374+{
21375+ struct super_block *sb;
21376+ struct au_sbinfo *sbinfo;
21377+ struct list_head *plink_list;
4a4d8108 21378+ struct pseudo_link *plink, *tmp;
1facf9fc 21379+ int found, err, cnt;
21380+
21381+ sb = inode->i_sb;
21382+ sbinfo = au_sbi(sb);
21383+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21384+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21385+
1facf9fc 21386+ cnt = 0;
21387+ found = 0;
21388+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
21389+ rcu_read_lock();
21390+ list_for_each_entry_rcu(plink, plink_list, list) {
1facf9fc 21391+ cnt++;
21392+ if (plink->inode == inode) {
21393+ found = 1;
21394+ break;
21395+ }
21396+ }
4a4d8108
AM
21397+ rcu_read_unlock();
21398+ if (found)
1facf9fc 21399+ return;
4a4d8108
AM
21400+
21401+ tmp = kmalloc(sizeof(*plink), GFP_NOFS);
21402+ if (tmp)
21403+ tmp->inode = au_igrab(inode);
21404+ else {
21405+ err = -ENOMEM;
21406+ goto out;
1facf9fc 21407+ }
21408+
4a4d8108
AM
21409+ spin_lock(&sbinfo->si_plink.spin);
21410+ list_for_each_entry(plink, plink_list, list) {
21411+ if (plink->inode == inode) {
21412+ found = 1;
21413+ break;
21414+ }
1facf9fc 21415+ }
4a4d8108
AM
21416+ if (!found)
21417+ list_add_rcu(&tmp->list, plink_list);
1facf9fc 21418+ spin_unlock(&sbinfo->si_plink.spin);
4a4d8108
AM
21419+ if (!found) {
21420+ cnt++;
21421+ WARN_ONCE(cnt > AUFS_PLINK_WARN,
21422+ "unexpectedly many pseudo links, %d\n", cnt);
1facf9fc 21423+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
4a4d8108
AM
21424+ } else {
21425+ do_put_plink(tmp, 0);
21426+ return;
1facf9fc 21427+ }
21428+
4a4d8108 21429+out:
1facf9fc 21430+ if (unlikely(err)) {
4a4d8108
AM
21431+ pr_warning("err %d, damaged pseudo link.\n", err);
21432+ if (tmp) {
21433+ au_spl_del_rcu(&tmp->list, &sbinfo->si_plink);
21434+ call_rcu(&tmp->rcu, do_put_plink_rcu);
21435+ }
1facf9fc 21436+ }
21437+}
21438+
21439+/* free all plinks */
e49829fe 21440+void au_plink_put(struct super_block *sb, int verbose)
1facf9fc 21441+{
21442+ struct au_sbinfo *sbinfo;
21443+ struct list_head *plink_list;
21444+ struct pseudo_link *plink, *tmp;
21445+
dece6358
AM
21446+ SiMustWriteLock(sb);
21447+
1facf9fc 21448+ sbinfo = au_sbi(sb);
21449+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21450+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21451+
21452+ plink_list = &sbinfo->si_plink.head;
21453+ /* no spin_lock since sbinfo is write-locked */
e49829fe 21454+ WARN(verbose && !list_empty(plink_list), "pseudo-link is not flushed");
1facf9fc 21455+ list_for_each_entry_safe(plink, tmp, plink_list, list)
21456+ do_put_plink(plink, 0);
21457+ INIT_LIST_HEAD(plink_list);
21458+}
21459+
e49829fe
JR
21460+void au_plink_clean(struct super_block *sb, int verbose)
21461+{
21462+ struct dentry *root;
21463+
21464+ root = sb->s_root;
21465+ aufs_write_lock(root);
21466+ if (au_opt_test(au_mntflags(sb), PLINK))
21467+ au_plink_put(sb, verbose);
21468+ aufs_write_unlock(root);
21469+}
21470+
1facf9fc 21471+/* free the plinks on a branch specified by @br_id */
21472+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
21473+{
21474+ struct au_sbinfo *sbinfo;
21475+ struct list_head *plink_list;
21476+ struct pseudo_link *plink, *tmp;
21477+ struct inode *inode;
21478+ aufs_bindex_t bstart, bend, bindex;
21479+ unsigned char do_put;
21480+
dece6358
AM
21481+ SiMustWriteLock(sb);
21482+
1facf9fc 21483+ sbinfo = au_sbi(sb);
21484+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21485+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21486+
21487+ plink_list = &sbinfo->si_plink.head;
21488+ /* no spin_lock since sbinfo is write-locked */
21489+ list_for_each_entry_safe(plink, tmp, plink_list, list) {
21490+ do_put = 0;
21491+ inode = au_igrab(plink->inode);
21492+ ii_write_lock_child(inode);
21493+ bstart = au_ibstart(inode);
21494+ bend = au_ibend(inode);
21495+ if (bstart >= 0) {
21496+ for (bindex = bstart; bindex <= bend; bindex++) {
21497+ if (!au_h_iptr(inode, bindex)
21498+ || au_ii_br_id(inode, bindex) != br_id)
21499+ continue;
21500+ au_set_h_iptr(inode, bindex, NULL, 0);
21501+ do_put = 1;
21502+ break;
21503+ }
21504+ } else
21505+ do_put_plink(plink, 1);
21506+
dece6358
AM
21507+ if (do_put) {
21508+ for (bindex = bstart; bindex <= bend; bindex++)
21509+ if (au_h_iptr(inode, bindex)) {
21510+ do_put = 0;
21511+ break;
21512+ }
21513+ if (do_put)
21514+ do_put_plink(plink, 1);
21515+ }
21516+ ii_write_unlock(inode);
21517+ iput(inode);
21518+ }
21519+}
7f207e10
AM
21520diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
21521--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 21522+++ linux/fs/aufs/poll.c 2012-02-13 21:54:56.973105100 +0100
dece6358
AM
21523@@ -0,0 +1,56 @@
21524+/*
f6c5ef8b 21525+ * Copyright (C) 2005-2012 Junjiro R. Okajima
dece6358
AM
21526+ *
21527+ * This program, aufs is free software; you can redistribute it and/or modify
21528+ * it under the terms of the GNU General Public License as published by
21529+ * the Free Software Foundation; either version 2 of the License, or
21530+ * (at your option) any later version.
21531+ *
21532+ * This program is distributed in the hope that it will be useful,
21533+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21534+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21535+ * GNU General Public License for more details.
21536+ *
21537+ * You should have received a copy of the GNU General Public License
21538+ * along with this program; if not, write to the Free Software
21539+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21540+ */
21541+
1308ab2a 21542+/*
21543+ * poll operation
21544+ * There is only one filesystem which implements ->poll operation, currently.
21545+ */
21546+
21547+#include "aufs.h"
21548+
21549+unsigned int aufs_poll(struct file *file, poll_table *wait)
21550+{
21551+ unsigned int mask;
21552+ int err;
21553+ struct file *h_file;
21554+ struct dentry *dentry;
21555+ struct super_block *sb;
21556+
21557+ /* We should pretend an error happened. */
21558+ mask = POLLERR /* | POLLIN | POLLOUT */;
21559+ dentry = file->f_dentry;
21560+ sb = dentry->d_sb;
e49829fe 21561+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
1308ab2a 21562+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
21563+ if (unlikely(err))
21564+ goto out;
21565+
21566+ /* it is not an error if h_file has no operation */
21567+ mask = DEFAULT_POLLMASK;
4a4d8108 21568+ h_file = au_hf_top(file);
1308ab2a 21569+ if (h_file->f_op && h_file->f_op->poll)
21570+ mask = h_file->f_op->poll(h_file, wait);
21571+
21572+ di_read_unlock(dentry, AuLock_IR);
21573+ fi_read_unlock(file);
21574+
4f0767ce 21575+out:
1308ab2a 21576+ si_read_unlock(sb);
21577+ AuTraceErr((int)mask);
21578+ return mask;
21579+}
7f207e10
AM
21580diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
21581--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 21582+++ linux/fs/aufs/procfs.c 2012-02-13 21:54:56.973105100 +0100
53392da6 21583@@ -0,0 +1,170 @@
e49829fe 21584+/*
f6c5ef8b 21585+ * Copyright (C) 2010-2012 Junjiro R. Okajima
e49829fe
JR
21586+ *
21587+ * This program, aufs is free software; you can redistribute it and/or modify
21588+ * it under the terms of the GNU General Public License as published by
21589+ * the Free Software Foundation; either version 2 of the License, or
21590+ * (at your option) any later version.
21591+ *
21592+ * This program is distributed in the hope that it will be useful,
21593+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21594+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21595+ * GNU General Public License for more details.
21596+ *
21597+ * You should have received a copy of the GNU General Public License
21598+ * along with this program; if not, write to the Free Software
21599+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21600+ */
21601+
21602+/*
21603+ * procfs interfaces
21604+ */
21605+
21606+#include <linux/proc_fs.h>
21607+#include "aufs.h"
21608+
21609+static int au_procfs_plm_release(struct inode *inode, struct file *file)
21610+{
21611+ struct au_sbinfo *sbinfo;
21612+
21613+ sbinfo = file->private_data;
21614+ if (sbinfo) {
21615+ au_plink_maint_leave(sbinfo);
21616+ kobject_put(&sbinfo->si_kobj);
21617+ }
21618+
21619+ return 0;
21620+}
21621+
21622+static void au_procfs_plm_write_clean(struct file *file)
21623+{
21624+ struct au_sbinfo *sbinfo;
21625+
21626+ sbinfo = file->private_data;
21627+ if (sbinfo)
21628+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
21629+}
21630+
21631+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
21632+{
21633+ int err;
21634+ struct super_block *sb;
21635+ struct au_sbinfo *sbinfo;
21636+
21637+ err = -EBUSY;
21638+ if (unlikely(file->private_data))
21639+ goto out;
21640+
21641+ sb = NULL;
53392da6 21642+ /* don't use au_sbilist_lock() here */
e49829fe
JR
21643+ spin_lock(&au_sbilist.spin);
21644+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
21645+ if (id == sysaufs_si_id(sbinfo)) {
21646+ kobject_get(&sbinfo->si_kobj);
21647+ sb = sbinfo->si_sb;
21648+ break;
21649+ }
21650+ spin_unlock(&au_sbilist.spin);
21651+
21652+ err = -EINVAL;
21653+ if (unlikely(!sb))
21654+ goto out;
21655+
21656+ err = au_plink_maint_enter(sb);
21657+ if (!err)
21658+ /* keep kobject_get() */
21659+ file->private_data = sbinfo;
21660+ else
21661+ kobject_put(&sbinfo->si_kobj);
21662+out:
21663+ return err;
21664+}
21665+
21666+/*
21667+ * Accept a valid "si=xxxx" only.
21668+ * Once it is accepted successfully, accept "clean" too.
21669+ */
21670+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
21671+ size_t count, loff_t *ppos)
21672+{
21673+ ssize_t err;
21674+ unsigned long id;
21675+ /* last newline is allowed */
21676+ char buf[3 + sizeof(unsigned long) * 2 + 1];
21677+
21678+ err = -EACCES;
21679+ if (unlikely(!capable(CAP_SYS_ADMIN)))
21680+ goto out;
21681+
21682+ err = -EINVAL;
21683+ if (unlikely(count > sizeof(buf)))
21684+ goto out;
21685+
21686+ err = copy_from_user(buf, ubuf, count);
21687+ if (unlikely(err)) {
21688+ err = -EFAULT;
21689+ goto out;
21690+ }
21691+ buf[count] = 0;
21692+
21693+ err = -EINVAL;
21694+ if (!strcmp("clean", buf)) {
21695+ au_procfs_plm_write_clean(file);
21696+ goto out_success;
21697+ } else if (unlikely(strncmp("si=", buf, 3)))
21698+ goto out;
21699+
9dbd164d 21700+ err = kstrtoul(buf + 3, 16, &id);
e49829fe
JR
21701+ if (unlikely(err))
21702+ goto out;
21703+
21704+ err = au_procfs_plm_write_si(file, id);
21705+ if (unlikely(err))
21706+ goto out;
21707+
21708+out_success:
21709+ err = count; /* success */
21710+out:
21711+ return err;
21712+}
21713+
21714+static const struct file_operations au_procfs_plm_fop = {
21715+ .write = au_procfs_plm_write,
21716+ .release = au_procfs_plm_release,
21717+ .owner = THIS_MODULE
21718+};
21719+
21720+/* ---------------------------------------------------------------------- */
21721+
21722+static struct proc_dir_entry *au_procfs_dir;
21723+
21724+void au_procfs_fin(void)
21725+{
21726+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
21727+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
21728+}
21729+
21730+int __init au_procfs_init(void)
21731+{
21732+ int err;
21733+ struct proc_dir_entry *entry;
21734+
21735+ err = -ENOMEM;
21736+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
21737+ if (unlikely(!au_procfs_dir))
21738+ goto out;
21739+
21740+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
21741+ au_procfs_dir, &au_procfs_plm_fop);
21742+ if (unlikely(!entry))
21743+ goto out_dir;
21744+
21745+ err = 0;
21746+ goto out; /* success */
21747+
21748+
21749+out_dir:
21750+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
21751+out:
21752+ return err;
21753+}
7f207e10
AM
21754diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
21755--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
21756+++ linux/fs/aufs/rdu.c 2012-02-13 21:54:56.973105100 +0100
21757@@ -0,0 +1,383 @@
1308ab2a 21758+/*
f6c5ef8b 21759+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1308ab2a 21760+ *
21761+ * This program, aufs is free software; you can redistribute it and/or modify
21762+ * it under the terms of the GNU General Public License as published by
21763+ * the Free Software Foundation; either version 2 of the License, or
21764+ * (at your option) any later version.
21765+ *
21766+ * This program is distributed in the hope that it will be useful,
21767+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21768+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21769+ * GNU General Public License for more details.
21770+ *
21771+ * You should have received a copy of the GNU General Public License
21772+ * along with this program; if not, write to the Free Software
21773+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21774+ */
21775+
21776+/*
21777+ * readdir in userspace.
21778+ */
21779+
b752ccd1 21780+#include <linux/compat.h>
4a4d8108 21781+#include <linux/fs_stack.h>
1308ab2a 21782+#include <linux/security.h>
1308ab2a 21783+#include "aufs.h"
21784+
21785+/* bits for struct aufs_rdu.flags */
21786+#define AuRdu_CALLED 1
21787+#define AuRdu_CONT (1 << 1)
21788+#define AuRdu_FULL (1 << 2)
21789+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
7f207e10
AM
21790+#define au_fset_rdu(flags, name) \
21791+ do { (flags) |= AuRdu_##name; } while (0)
21792+#define au_fclr_rdu(flags, name) \
21793+ do { (flags) &= ~AuRdu_##name; } while (0)
1308ab2a 21794+
21795+struct au_rdu_arg {
21796+ struct aufs_rdu *rdu;
21797+ union au_rdu_ent_ul ent;
21798+ unsigned long end;
21799+
21800+ struct super_block *sb;
21801+ int err;
21802+};
21803+
21804+static int au_rdu_fill(void *__arg, const char *name, int nlen,
21805+ loff_t offset, u64 h_ino, unsigned int d_type)
21806+{
21807+ int err, len;
21808+ struct au_rdu_arg *arg = __arg;
21809+ struct aufs_rdu *rdu = arg->rdu;
21810+ struct au_rdu_ent ent;
21811+
21812+ err = 0;
21813+ arg->err = 0;
21814+ au_fset_rdu(rdu->cookie.flags, CALLED);
21815+ len = au_rdu_len(nlen);
21816+ if (arg->ent.ul + len < arg->end) {
21817+ ent.ino = h_ino;
21818+ ent.bindex = rdu->cookie.bindex;
21819+ ent.type = d_type;
21820+ ent.nlen = nlen;
4a4d8108
AM
21821+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
21822+ ent.type = DT_UNKNOWN;
1308ab2a 21823+
9dbd164d 21824+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 21825+ err = -EFAULT;
21826+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
21827+ goto out;
21828+ if (copy_to_user(arg->ent.e->name, name, nlen))
21829+ goto out;
21830+ /* the terminating NULL */
21831+ if (__put_user(0, arg->ent.e->name + nlen))
21832+ goto out;
21833+ err = 0;
21834+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
21835+ arg->ent.ul += len;
21836+ rdu->rent++;
21837+ } else {
21838+ err = -EFAULT;
21839+ au_fset_rdu(rdu->cookie.flags, FULL);
21840+ rdu->full = 1;
21841+ rdu->tail = arg->ent;
21842+ }
21843+
4f0767ce 21844+out:
1308ab2a 21845+ /* AuTraceErr(err); */
21846+ return err;
21847+}
21848+
21849+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
21850+{
21851+ int err;
21852+ loff_t offset;
21853+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
21854+
21855+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
21856+ err = offset;
21857+ if (unlikely(offset != cookie->h_pos))
21858+ goto out;
21859+
21860+ err = 0;
21861+ do {
21862+ arg->err = 0;
21863+ au_fclr_rdu(cookie->flags, CALLED);
21864+ /* smp_mb(); */
21865+ err = vfsub_readdir(h_file, au_rdu_fill, arg);
21866+ if (err >= 0)
21867+ err = arg->err;
21868+ } while (!err
21869+ && au_ftest_rdu(cookie->flags, CALLED)
21870+ && !au_ftest_rdu(cookie->flags, FULL));
21871+ cookie->h_pos = h_file->f_pos;
21872+
4f0767ce 21873+out:
1308ab2a 21874+ AuTraceErr(err);
21875+ return err;
21876+}
21877+
21878+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
21879+{
21880+ int err;
21881+ aufs_bindex_t bend;
21882+ struct au_rdu_arg arg;
21883+ struct dentry *dentry;
21884+ struct inode *inode;
21885+ struct file *h_file;
21886+ struct au_rdu_cookie *cookie = &rdu->cookie;
21887+
21888+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
21889+ if (unlikely(err)) {
21890+ err = -EFAULT;
21891+ AuTraceErr(err);
21892+ goto out;
21893+ }
21894+ rdu->rent = 0;
21895+ rdu->tail = rdu->ent;
21896+ rdu->full = 0;
21897+ arg.rdu = rdu;
21898+ arg.ent = rdu->ent;
21899+ arg.end = arg.ent.ul;
21900+ arg.end += rdu->sz;
21901+
21902+ err = -ENOTDIR;
21903+ if (unlikely(!file->f_op || !file->f_op->readdir))
21904+ goto out;
21905+
21906+ err = security_file_permission(file, MAY_READ);
21907+ AuTraceErr(err);
21908+ if (unlikely(err))
21909+ goto out;
21910+
21911+ dentry = file->f_dentry;
21912+ inode = dentry->d_inode;
21913+#if 1
21914+ mutex_lock(&inode->i_mutex);
21915+#else
21916+ err = mutex_lock_killable(&inode->i_mutex);
21917+ AuTraceErr(err);
21918+ if (unlikely(err))
21919+ goto out;
21920+#endif
1308ab2a 21921+
21922+ arg.sb = inode->i_sb;
e49829fe
JR
21923+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
21924+ if (unlikely(err))
21925+ goto out_mtx;
027c5e7a
AM
21926+ err = au_alive_dir(dentry);
21927+ if (unlikely(err))
21928+ goto out_si;
e49829fe 21929+ /* todo: reval? */
1308ab2a 21930+ fi_read_lock(file);
21931+
21932+ err = -EAGAIN;
21933+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
21934+ && cookie->generation != au_figen(file)))
21935+ goto out_unlock;
21936+
21937+ err = 0;
21938+ if (!rdu->blk) {
21939+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
21940+ if (!rdu->blk)
21941+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
21942+ }
21943+ bend = au_fbstart(file);
21944+ if (cookie->bindex < bend)
21945+ cookie->bindex = bend;
4a4d8108 21946+ bend = au_fbend_dir(file);
1308ab2a 21947+ /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
21948+ for (; !err && cookie->bindex <= bend;
21949+ cookie->bindex++, cookie->h_pos = 0) {
4a4d8108 21950+ h_file = au_hf_dir(file, cookie->bindex);
1308ab2a 21951+ if (!h_file)
21952+ continue;
21953+
21954+ au_fclr_rdu(cookie->flags, FULL);
21955+ err = au_rdu_do(h_file, &arg);
21956+ AuTraceErr(err);
21957+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
21958+ break;
21959+ }
21960+ AuDbg("rent %llu\n", rdu->rent);
21961+
21962+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
21963+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
21964+ au_fset_rdu(cookie->flags, CONT);
21965+ cookie->generation = au_figen(file);
21966+ }
21967+
21968+ ii_read_lock_child(inode);
21969+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
21970+ ii_read_unlock(inode);
21971+
4f0767ce 21972+out_unlock:
1308ab2a 21973+ fi_read_unlock(file);
027c5e7a 21974+out_si:
1308ab2a 21975+ si_read_unlock(arg.sb);
4f0767ce 21976+out_mtx:
1308ab2a 21977+ mutex_unlock(&inode->i_mutex);
4f0767ce 21978+out:
1308ab2a 21979+ AuTraceErr(err);
21980+ return err;
21981+}
21982+
21983+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
21984+{
21985+ int err;
21986+ ino_t ino;
21987+ unsigned long long nent;
21988+ union au_rdu_ent_ul *u;
21989+ struct au_rdu_ent ent;
21990+ struct super_block *sb;
21991+
21992+ err = 0;
21993+ nent = rdu->nent;
21994+ u = &rdu->ent;
21995+ sb = file->f_dentry->d_sb;
21996+ si_read_lock(sb, AuLock_FLUSH);
21997+ while (nent-- > 0) {
9dbd164d 21998+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 21999+ err = copy_from_user(&ent, u->e, sizeof(ent));
4a4d8108
AM
22000+ if (!err)
22001+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
1308ab2a 22002+ if (unlikely(err)) {
22003+ err = -EFAULT;
22004+ AuTraceErr(err);
22005+ break;
22006+ }
22007+
22008+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
22009+ if (!ent.wh)
22010+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
22011+ else
22012+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
22013+ &ino);
22014+ if (unlikely(err)) {
22015+ AuTraceErr(err);
22016+ break;
22017+ }
22018+
22019+ err = __put_user(ino, &u->e->ino);
22020+ if (unlikely(err)) {
22021+ err = -EFAULT;
22022+ AuTraceErr(err);
22023+ break;
22024+ }
22025+ u->ul += au_rdu_len(ent.nlen);
22026+ }
22027+ si_read_unlock(sb);
22028+
22029+ return err;
22030+}
22031+
22032+/* ---------------------------------------------------------------------- */
22033+
22034+static int au_rdu_verify(struct aufs_rdu *rdu)
22035+{
b752ccd1 22036+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
1308ab2a 22037+ "%llu, b%d, 0x%x, g%u}\n",
b752ccd1 22038+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
1308ab2a 22039+ rdu->blk,
22040+ rdu->rent, rdu->shwh, rdu->full,
22041+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
22042+ rdu->cookie.generation);
dece6358 22043+
b752ccd1 22044+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
1308ab2a 22045+ return 0;
dece6358 22046+
b752ccd1
AM
22047+ AuDbg("%u:%u\n",
22048+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
1308ab2a 22049+ return -EINVAL;
22050+}
22051+
22052+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 22053+{
1308ab2a 22054+ long err, e;
22055+ struct aufs_rdu rdu;
22056+ void __user *p = (void __user *)arg;
dece6358 22057+
1308ab2a 22058+ err = copy_from_user(&rdu, p, sizeof(rdu));
22059+ if (unlikely(err)) {
22060+ err = -EFAULT;
22061+ AuTraceErr(err);
22062+ goto out;
22063+ }
22064+ err = au_rdu_verify(&rdu);
dece6358
AM
22065+ if (unlikely(err))
22066+ goto out;
22067+
1308ab2a 22068+ switch (cmd) {
22069+ case AUFS_CTL_RDU:
22070+ err = au_rdu(file, &rdu);
22071+ if (unlikely(err))
22072+ break;
dece6358 22073+
1308ab2a 22074+ e = copy_to_user(p, &rdu, sizeof(rdu));
22075+ if (unlikely(e)) {
22076+ err = -EFAULT;
22077+ AuTraceErr(err);
22078+ }
22079+ break;
22080+ case AUFS_CTL_RDU_INO:
22081+ err = au_rdu_ino(file, &rdu);
22082+ break;
22083+
22084+ default:
4a4d8108 22085+ /* err = -ENOTTY; */
1308ab2a 22086+ err = -EINVAL;
22087+ }
dece6358 22088+
4f0767ce 22089+out:
1308ab2a 22090+ AuTraceErr(err);
22091+ return err;
1facf9fc 22092+}
b752ccd1
AM
22093+
22094+#ifdef CONFIG_COMPAT
22095+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
22096+{
22097+ long err, e;
22098+ struct aufs_rdu rdu;
22099+ void __user *p = compat_ptr(arg);
22100+
22101+ /* todo: get_user()? */
22102+ err = copy_from_user(&rdu, p, sizeof(rdu));
22103+ if (unlikely(err)) {
22104+ err = -EFAULT;
22105+ AuTraceErr(err);
22106+ goto out;
22107+ }
22108+ rdu.ent.e = compat_ptr(rdu.ent.ul);
22109+ err = au_rdu_verify(&rdu);
22110+ if (unlikely(err))
22111+ goto out;
22112+
22113+ switch (cmd) {
22114+ case AUFS_CTL_RDU:
22115+ err = au_rdu(file, &rdu);
22116+ if (unlikely(err))
22117+ break;
22118+
22119+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
22120+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
22121+ e = copy_to_user(p, &rdu, sizeof(rdu));
22122+ if (unlikely(e)) {
22123+ err = -EFAULT;
22124+ AuTraceErr(err);
22125+ }
22126+ break;
22127+ case AUFS_CTL_RDU_INO:
22128+ err = au_rdu_ino(file, &rdu);
22129+ break;
22130+
22131+ default:
22132+ /* err = -ENOTTY; */
22133+ err = -EINVAL;
22134+ }
22135+
4f0767ce 22136+out:
b752ccd1
AM
22137+ AuTraceErr(err);
22138+ return err;
22139+}
22140+#endif
7f207e10
AM
22141diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
22142--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
22143+++ linux/fs/aufs/rwsem.h 2012-02-13 21:54:56.973105100 +0100
22144@@ -0,0 +1,188 @@
1facf9fc 22145+/*
f6c5ef8b 22146+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 22147+ *
22148+ * This program, aufs is free software; you can redistribute it and/or modify
22149+ * it under the terms of the GNU General Public License as published by
22150+ * the Free Software Foundation; either version 2 of the License, or
22151+ * (at your option) any later version.
dece6358
AM
22152+ *
22153+ * This program is distributed in the hope that it will be useful,
22154+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22155+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22156+ * GNU General Public License for more details.
22157+ *
22158+ * You should have received a copy of the GNU General Public License
22159+ * along with this program; if not, write to the Free Software
22160+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22161+ */
22162+
22163+/*
22164+ * simple read-write semaphore wrappers
22165+ */
22166+
22167+#ifndef __AUFS_RWSEM_H__
22168+#define __AUFS_RWSEM_H__
22169+
22170+#ifdef __KERNEL__
22171+
4a4d8108 22172+#include "debug.h"
dece6358
AM
22173+
22174+struct au_rwsem {
22175+ struct rw_semaphore rwsem;
22176+#ifdef CONFIG_AUFS_DEBUG
22177+ /* just for debugging, not almighty counter */
22178+ atomic_t rcnt, wcnt;
22179+#endif
22180+};
22181+
22182+#ifdef CONFIG_AUFS_DEBUG
22183+#define AuDbgCntInit(rw) do { \
22184+ atomic_set(&(rw)->rcnt, 0); \
22185+ atomic_set(&(rw)->wcnt, 0); \
22186+ smp_mb(); /* atomic set */ \
22187+} while (0)
22188+
e49829fe 22189+#define AuDbgRcntInc(rw) atomic_inc(&(rw)->rcnt)
dece6358 22190+#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
e49829fe 22191+#define AuDbgWcntInc(rw) atomic_inc(&(rw)->wcnt)
dece6358
AM
22192+#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
22193+#else
22194+#define AuDbgCntInit(rw) do {} while (0)
22195+#define AuDbgRcntInc(rw) do {} while (0)
22196+#define AuDbgRcntDec(rw) do {} while (0)
22197+#define AuDbgWcntInc(rw) do {} while (0)
22198+#define AuDbgWcntDec(rw) do {} while (0)
22199+#endif /* CONFIG_AUFS_DEBUG */
22200+
22201+/* to debug easier, do not make them inlined functions */
22202+#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
22203+/* rwsem_is_locked() is unusable */
22204+#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
22205+#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
22206+#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
22207+ && atomic_read(&(rw)->wcnt) <= 0)
22208+#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \
22209+ || atomic_read(&(rw)->wcnt))
22210+
e49829fe
JR
22211+#define au_rw_class(rw, key) lockdep_set_class(&(rw)->rwsem, key)
22212+
dece6358
AM
22213+static inline void au_rw_init(struct au_rwsem *rw)
22214+{
22215+ AuDbgCntInit(rw);
22216+ init_rwsem(&rw->rwsem);
22217+}
22218+
22219+static inline void au_rw_init_wlock(struct au_rwsem *rw)
22220+{
22221+ au_rw_init(rw);
22222+ down_write(&rw->rwsem);
22223+ AuDbgWcntInc(rw);
22224+}
22225+
22226+static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
22227+ unsigned int lsc)
22228+{
22229+ au_rw_init(rw);
22230+ down_write_nested(&rw->rwsem, lsc);
22231+ AuDbgWcntInc(rw);
22232+}
22233+
22234+static inline void au_rw_read_lock(struct au_rwsem *rw)
22235+{
22236+ down_read(&rw->rwsem);
22237+ AuDbgRcntInc(rw);
22238+}
22239+
22240+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
22241+{
22242+ down_read_nested(&rw->rwsem, lsc);
22243+ AuDbgRcntInc(rw);
22244+}
22245+
22246+static inline void au_rw_read_unlock(struct au_rwsem *rw)
22247+{
22248+ AuRwMustReadLock(rw);
22249+ AuDbgRcntDec(rw);
22250+ up_read(&rw->rwsem);
22251+}
22252+
22253+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
22254+{
22255+ AuRwMustWriteLock(rw);
22256+ AuDbgRcntInc(rw);
22257+ AuDbgWcntDec(rw);
22258+ downgrade_write(&rw->rwsem);
22259+}
22260+
22261+static inline void au_rw_write_lock(struct au_rwsem *rw)
22262+{
22263+ down_write(&rw->rwsem);
22264+ AuDbgWcntInc(rw);
22265+}
22266+
22267+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
22268+ unsigned int lsc)
22269+{
22270+ down_write_nested(&rw->rwsem, lsc);
22271+ AuDbgWcntInc(rw);
22272+}
1facf9fc 22273+
dece6358
AM
22274+static inline void au_rw_write_unlock(struct au_rwsem *rw)
22275+{
22276+ AuRwMustWriteLock(rw);
22277+ AuDbgWcntDec(rw);
22278+ up_write(&rw->rwsem);
22279+}
22280+
22281+/* why is not _nested version defined */
22282+static inline int au_rw_read_trylock(struct au_rwsem *rw)
22283+{
22284+ int ret = down_read_trylock(&rw->rwsem);
22285+ if (ret)
22286+ AuDbgRcntInc(rw);
22287+ return ret;
22288+}
22289+
22290+static inline int au_rw_write_trylock(struct au_rwsem *rw)
22291+{
22292+ int ret = down_write_trylock(&rw->rwsem);
22293+ if (ret)
22294+ AuDbgWcntInc(rw);
22295+ return ret;
22296+}
22297+
22298+#undef AuDbgCntInit
22299+#undef AuDbgRcntInc
22300+#undef AuDbgRcntDec
22301+#undef AuDbgWcntInc
22302+#undef AuDbgWcntDec
1facf9fc 22303+
22304+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
22305+static inline void prefix##_read_lock(param) \
dece6358 22306+{ au_rw_read_lock(rwsem); } \
1facf9fc 22307+static inline void prefix##_write_lock(param) \
dece6358 22308+{ au_rw_write_lock(rwsem); } \
1facf9fc 22309+static inline int prefix##_read_trylock(param) \
dece6358 22310+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 22311+static inline int prefix##_write_trylock(param) \
dece6358 22312+{ return au_rw_write_trylock(rwsem); }
1facf9fc 22313+/* why is not _nested version defined */
22314+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 22315+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 22316+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 22317+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 22318+
22319+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
22320+static inline void prefix##_read_unlock(param) \
dece6358 22321+{ au_rw_read_unlock(rwsem); } \
1facf9fc 22322+static inline void prefix##_write_unlock(param) \
dece6358 22323+{ au_rw_write_unlock(rwsem); } \
1facf9fc 22324+static inline void prefix##_downgrade_lock(param) \
dece6358 22325+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 22326+
22327+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
22328+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
22329+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
22330+
22331+#endif /* __KERNEL__ */
22332+#endif /* __AUFS_RWSEM_H__ */
7f207e10
AM
22333diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
22334--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
22335+++ linux/fs/aufs/sbinfo.c 2012-02-13 21:54:56.973105100 +0100
22336@@ -0,0 +1,343 @@
1facf9fc 22337+/*
f6c5ef8b 22338+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 22339+ *
22340+ * This program, aufs is free software; you can redistribute it and/or modify
22341+ * it under the terms of the GNU General Public License as published by
22342+ * the Free Software Foundation; either version 2 of the License, or
22343+ * (at your option) any later version.
dece6358
AM
22344+ *
22345+ * This program is distributed in the hope that it will be useful,
22346+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22347+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22348+ * GNU General Public License for more details.
22349+ *
22350+ * You should have received a copy of the GNU General Public License
22351+ * along with this program; if not, write to the Free Software
22352+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22353+ */
22354+
22355+/*
22356+ * superblock private data
22357+ */
22358+
22359+#include "aufs.h"
22360+
22361+/*
22362+ * they are necessary regardless sysfs is disabled.
22363+ */
22364+void au_si_free(struct kobject *kobj)
22365+{
22366+ struct au_sbinfo *sbinfo;
b752ccd1 22367+ char *locked __maybe_unused; /* debug only */
1facf9fc 22368+
22369+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
22370+ AuDebugOn(!list_empty(&sbinfo->si_plink.head));
e49829fe 22371+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
1facf9fc 22372+
e49829fe 22373+ au_rw_write_lock(&sbinfo->si_rwsem);
1facf9fc 22374+ au_br_free(sbinfo);
e49829fe 22375+ au_rw_write_unlock(&sbinfo->si_rwsem);
b752ccd1
AM
22376+
22377+ AuDebugOn(radix_tree_gang_lookup
22378+ (&sbinfo->au_si_pid.tree, (void **)&locked,
22379+ /*first_index*/PID_MAX_DEFAULT - 1,
22380+ /*max_items*/sizeof(locked)/sizeof(*locked)));
22381+
1facf9fc 22382+ kfree(sbinfo->si_branch);
b752ccd1 22383+ kfree(sbinfo->au_si_pid.bitmap);
1facf9fc 22384+ mutex_destroy(&sbinfo->si_xib_mtx);
dece6358 22385+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 22386+
22387+ kfree(sbinfo);
22388+}
22389+
22390+int au_si_alloc(struct super_block *sb)
22391+{
22392+ int err;
22393+ struct au_sbinfo *sbinfo;
e49829fe 22394+ static struct lock_class_key aufs_si;
1facf9fc 22395+
22396+ err = -ENOMEM;
4a4d8108 22397+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
1facf9fc 22398+ if (unlikely(!sbinfo))
22399+ goto out;
22400+
b752ccd1
AM
22401+ BUILD_BUG_ON(sizeof(unsigned long) !=
22402+ sizeof(*sbinfo->au_si_pid.bitmap));
22403+ sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT),
22404+ sizeof(*sbinfo->au_si_pid.bitmap),
22405+ GFP_NOFS);
22406+ if (unlikely(!sbinfo->au_si_pid.bitmap))
22407+ goto out_sbinfo;
22408+
1facf9fc 22409+ /* will be reallocated separately */
22410+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
22411+ if (unlikely(!sbinfo->si_branch))
b752ccd1 22412+ goto out_pidmap;
1facf9fc 22413+
1facf9fc 22414+ err = sysaufs_si_init(sbinfo);
22415+ if (unlikely(err))
22416+ goto out_br;
22417+
22418+ au_nwt_init(&sbinfo->si_nowait);
dece6358 22419+ au_rw_init_wlock(&sbinfo->si_rwsem);
e49829fe 22420+ au_rw_class(&sbinfo->si_rwsem, &aufs_si);
b752ccd1
AM
22421+ spin_lock_init(&sbinfo->au_si_pid.tree_lock);
22422+ INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL);
22423+
7f207e10 22424+ atomic_long_set(&sbinfo->si_ninodes, 0);
7f207e10
AM
22425+ atomic_long_set(&sbinfo->si_nfiles, 0);
22426+
1facf9fc 22427+ sbinfo->si_bend = -1;
1facf9fc 22428+
22429+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
22430+ sbinfo->si_wbr_create = AuWbrCreate_Def;
4a4d8108
AM
22431+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
22432+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
1facf9fc 22433+
e49829fe 22434+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
1facf9fc 22435+
1facf9fc 22436+ mutex_init(&sbinfo->si_xib_mtx);
1facf9fc 22437+ sbinfo->si_xino_brid = -1;
22438+ /* leave si_xib_last_pindex and si_xib_next_bit */
22439+
e49829fe 22440+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
1facf9fc 22441+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
22442+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
22443+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
22444+
22445+ au_spl_init(&sbinfo->si_plink);
22446+ init_waitqueue_head(&sbinfo->si_plink_wq);
4a4d8108 22447+ spin_lock_init(&sbinfo->si_plink_maint_lock);
1facf9fc 22448+
22449+ /* leave other members for sysaufs and si_mnt. */
22450+ sbinfo->si_sb = sb;
22451+ sb->s_fs_info = sbinfo;
b752ccd1 22452+ si_pid_set(sb);
1facf9fc 22453+ au_debug_sbinfo_init(sbinfo);
22454+ return 0; /* success */
22455+
4f0767ce 22456+out_br:
1facf9fc 22457+ kfree(sbinfo->si_branch);
4f0767ce 22458+out_pidmap:
b752ccd1 22459+ kfree(sbinfo->au_si_pid.bitmap);
4f0767ce 22460+out_sbinfo:
1facf9fc 22461+ kfree(sbinfo);
4f0767ce 22462+out:
1facf9fc 22463+ return err;
22464+}
22465+
22466+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
22467+{
22468+ int err, sz;
22469+ struct au_branch **brp;
22470+
dece6358
AM
22471+ AuRwMustWriteLock(&sbinfo->si_rwsem);
22472+
1facf9fc 22473+ err = -ENOMEM;
22474+ sz = sizeof(*brp) * (sbinfo->si_bend + 1);
22475+ if (unlikely(!sz))
22476+ sz = sizeof(*brp);
22477+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
22478+ if (brp) {
22479+ sbinfo->si_branch = brp;
22480+ err = 0;
22481+ }
22482+
22483+ return err;
22484+}
22485+
22486+/* ---------------------------------------------------------------------- */
22487+
22488+unsigned int au_sigen_inc(struct super_block *sb)
22489+{
22490+ unsigned int gen;
22491+
dece6358
AM
22492+ SiMustWriteLock(sb);
22493+
1facf9fc 22494+ gen = ++au_sbi(sb)->si_generation;
22495+ au_update_digen(sb->s_root);
22496+ au_update_iigen(sb->s_root->d_inode);
22497+ sb->s_root->d_inode->i_version++;
22498+ return gen;
22499+}
22500+
22501+aufs_bindex_t au_new_br_id(struct super_block *sb)
22502+{
22503+ aufs_bindex_t br_id;
22504+ int i;
22505+ struct au_sbinfo *sbinfo;
22506+
dece6358
AM
22507+ SiMustWriteLock(sb);
22508+
1facf9fc 22509+ sbinfo = au_sbi(sb);
22510+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
22511+ br_id = ++sbinfo->si_last_br_id;
7f207e10 22512+ AuDebugOn(br_id < 0);
1facf9fc 22513+ if (br_id && au_br_index(sb, br_id) < 0)
22514+ return br_id;
22515+ }
22516+
22517+ return -1;
22518+}
22519+
22520+/* ---------------------------------------------------------------------- */
22521+
e49829fe
JR
22522+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
22523+int si_read_lock(struct super_block *sb, int flags)
22524+{
22525+ int err;
22526+
22527+ err = 0;
22528+ if (au_ftest_lock(flags, FLUSH))
22529+ au_nwt_flush(&au_sbi(sb)->si_nowait);
22530+
22531+ si_noflush_read_lock(sb);
22532+ err = au_plink_maint(sb, flags);
22533+ if (unlikely(err))
22534+ si_read_unlock(sb);
22535+
22536+ return err;
22537+}
22538+
22539+int si_write_lock(struct super_block *sb, int flags)
22540+{
22541+ int err;
22542+
22543+ if (au_ftest_lock(flags, FLUSH))
22544+ au_nwt_flush(&au_sbi(sb)->si_nowait);
22545+
22546+ si_noflush_write_lock(sb);
22547+ err = au_plink_maint(sb, flags);
22548+ if (unlikely(err))
22549+ si_write_unlock(sb);
22550+
22551+ return err;
22552+}
22553+
1facf9fc 22554+/* dentry and super_block lock. call at entry point */
e49829fe 22555+int aufs_read_lock(struct dentry *dentry, int flags)
1facf9fc 22556+{
e49829fe 22557+ int err;
027c5e7a 22558+ struct super_block *sb;
e49829fe 22559+
027c5e7a
AM
22560+ sb = dentry->d_sb;
22561+ err = si_read_lock(sb, flags);
22562+ if (unlikely(err))
22563+ goto out;
22564+
22565+ if (au_ftest_lock(flags, DW))
22566+ di_write_lock_child(dentry);
22567+ else
22568+ di_read_lock_child(dentry, flags);
22569+
22570+ if (au_ftest_lock(flags, GEN)) {
22571+ err = au_digen_test(dentry, au_sigen(sb));
22572+ AuDebugOn(!err && au_dbrange_test(dentry));
22573+ if (unlikely(err))
22574+ aufs_read_unlock(dentry, flags);
e49829fe
JR
22575+ }
22576+
027c5e7a 22577+out:
e49829fe 22578+ return err;
1facf9fc 22579+}
22580+
22581+void aufs_read_unlock(struct dentry *dentry, int flags)
22582+{
22583+ if (au_ftest_lock(flags, DW))
22584+ di_write_unlock(dentry);
22585+ else
22586+ di_read_unlock(dentry, flags);
22587+ si_read_unlock(dentry->d_sb);
22588+}
22589+
22590+void aufs_write_lock(struct dentry *dentry)
22591+{
e49829fe 22592+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 22593+ di_write_lock_child(dentry);
22594+}
22595+
22596+void aufs_write_unlock(struct dentry *dentry)
22597+{
22598+ di_write_unlock(dentry);
22599+ si_write_unlock(dentry->d_sb);
22600+}
22601+
e49829fe 22602+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
1facf9fc 22603+{
e49829fe 22604+ int err;
027c5e7a
AM
22605+ unsigned int sigen;
22606+ struct super_block *sb;
e49829fe 22607+
027c5e7a
AM
22608+ sb = d1->d_sb;
22609+ err = si_read_lock(sb, flags);
22610+ if (unlikely(err))
22611+ goto out;
22612+
22613+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIR));
22614+
22615+ if (au_ftest_lock(flags, GEN)) {
22616+ sigen = au_sigen(sb);
22617+ err = au_digen_test(d1, sigen);
22618+ AuDebugOn(!err && au_dbrange_test(d1));
22619+ if (!err) {
22620+ err = au_digen_test(d2, sigen);
22621+ AuDebugOn(!err && au_dbrange_test(d2));
22622+ }
22623+ if (unlikely(err))
22624+ aufs_read_and_write_unlock2(d1, d2);
22625+ }
22626+
22627+out:
e49829fe 22628+ return err;
1facf9fc 22629+}
22630+
22631+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
22632+{
22633+ di_write_unlock2(d1, d2);
22634+ si_read_unlock(d1->d_sb);
22635+}
b752ccd1
AM
22636+
22637+/* ---------------------------------------------------------------------- */
22638+
22639+int si_pid_test_slow(struct super_block *sb)
22640+{
22641+ void *p;
22642+
22643+ rcu_read_lock();
22644+ p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid);
22645+ rcu_read_unlock();
22646+
027c5e7a 22647+ return (long)!!p;
b752ccd1
AM
22648+}
22649+
22650+void si_pid_set_slow(struct super_block *sb)
22651+{
22652+ int err;
22653+ struct au_sbinfo *sbinfo;
22654+
22655+ AuDebugOn(si_pid_test_slow(sb));
22656+
22657+ sbinfo = au_sbi(sb);
22658+ err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
22659+ AuDebugOn(err);
22660+ spin_lock(&sbinfo->au_si_pid.tree_lock);
22661+ err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid,
027c5e7a 22662+ /*any valid ptr*/sb);
b752ccd1
AM
22663+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
22664+ AuDebugOn(err);
22665+ radix_tree_preload_end();
22666+}
22667+
22668+void si_pid_clr_slow(struct super_block *sb)
22669+{
22670+ void *p;
22671+ struct au_sbinfo *sbinfo;
22672+
22673+ AuDebugOn(!si_pid_test_slow(sb));
22674+
22675+ sbinfo = au_sbi(sb);
22676+ spin_lock(&sbinfo->au_si_pid.tree_lock);
22677+ p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
22678+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
b752ccd1 22679+}
7f207e10
AM
22680diff -urN /usr/share/empty/fs/aufs/spl.h linux/fs/aufs/spl.h
22681--- /usr/share/empty/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
22682+++ linux/fs/aufs/spl.h 2012-02-13 21:54:56.973105100 +0100
22683@@ -0,0 +1,62 @@
1facf9fc 22684+/*
f6c5ef8b 22685+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 22686+ *
22687+ * This program, aufs is free software; you can redistribute it and/or modify
22688+ * it under the terms of the GNU General Public License as published by
22689+ * the Free Software Foundation; either version 2 of the License, or
22690+ * (at your option) any later version.
dece6358
AM
22691+ *
22692+ * This program is distributed in the hope that it will be useful,
22693+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22694+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22695+ * GNU General Public License for more details.
22696+ *
22697+ * You should have received a copy of the GNU General Public License
22698+ * along with this program; if not, write to the Free Software
22699+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22700+ */
22701+
22702+/*
22703+ * simple list protected by a spinlock
22704+ */
22705+
22706+#ifndef __AUFS_SPL_H__
22707+#define __AUFS_SPL_H__
22708+
22709+#ifdef __KERNEL__
22710+
1facf9fc 22711+struct au_splhead {
22712+ spinlock_t spin;
22713+ struct list_head head;
22714+};
22715+
22716+static inline void au_spl_init(struct au_splhead *spl)
22717+{
22718+ spin_lock_init(&spl->spin);
22719+ INIT_LIST_HEAD(&spl->head);
22720+}
22721+
22722+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
22723+{
22724+ spin_lock(&spl->spin);
22725+ list_add(list, &spl->head);
22726+ spin_unlock(&spl->spin);
22727+}
22728+
22729+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
22730+{
22731+ spin_lock(&spl->spin);
22732+ list_del(list);
22733+ spin_unlock(&spl->spin);
22734+}
22735+
4a4d8108
AM
22736+static inline void au_spl_del_rcu(struct list_head *list,
22737+ struct au_splhead *spl)
22738+{
22739+ spin_lock(&spl->spin);
22740+ list_del_rcu(list);
22741+ spin_unlock(&spl->spin);
22742+}
22743+
1facf9fc 22744+#endif /* __KERNEL__ */
22745+#endif /* __AUFS_SPL_H__ */
7f207e10
AM
22746diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
22747--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
22748+++ linux/fs/aufs/super.c 2012-02-13 21:54:56.973105100 +0100
22749@@ -0,0 +1,938 @@
1facf9fc 22750+/*
f6c5ef8b 22751+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 22752+ *
22753+ * This program, aufs is free software; you can redistribute it and/or modify
22754+ * it under the terms of the GNU General Public License as published by
22755+ * the Free Software Foundation; either version 2 of the License, or
22756+ * (at your option) any later version.
dece6358
AM
22757+ *
22758+ * This program is distributed in the hope that it will be useful,
22759+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22760+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22761+ * GNU General Public License for more details.
22762+ *
22763+ * You should have received a copy of the GNU General Public License
22764+ * along with this program; if not, write to the Free Software
22765+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22766+ */
22767+
22768+/*
22769+ * mount and super_block operations
22770+ */
22771+
f6c5ef8b 22772+#include <linux/mm.h>
dece6358 22773+#include <linux/module.h>
1facf9fc 22774+#include <linux/seq_file.h>
22775+#include <linux/statfs.h>
7f207e10
AM
22776+#include <linux/vmalloc.h>
22777+#include <linux/writeback.h>
1facf9fc 22778+#include "aufs.h"
22779+
22780+/*
22781+ * super_operations
22782+ */
22783+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
22784+{
22785+ struct au_icntnr *c;
22786+
22787+ c = au_cache_alloc_icntnr();
22788+ if (c) {
027c5e7a 22789+ au_icntnr_init(c);
1facf9fc 22790+ c->vfs_inode.i_version = 1; /* sigen(sb); */
22791+ c->iinfo.ii_hinode = NULL;
22792+ return &c->vfs_inode;
22793+ }
22794+ return NULL;
22795+}
22796+
027c5e7a
AM
22797+static void aufs_destroy_inode_cb(struct rcu_head *head)
22798+{
22799+ struct inode *inode = container_of(head, struct inode, i_rcu);
22800+
22801+ INIT_LIST_HEAD(&inode->i_dentry);
22802+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
22803+}
22804+
1facf9fc 22805+static void aufs_destroy_inode(struct inode *inode)
22806+{
22807+ au_iinfo_fin(inode);
027c5e7a 22808+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
1facf9fc 22809+}
22810+
22811+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
22812+{
22813+ struct inode *inode;
22814+ int err;
22815+
22816+ inode = iget_locked(sb, ino);
22817+ if (unlikely(!inode)) {
22818+ inode = ERR_PTR(-ENOMEM);
22819+ goto out;
22820+ }
22821+ if (!(inode->i_state & I_NEW))
22822+ goto out;
22823+
22824+ err = au_xigen_new(inode);
22825+ if (!err)
22826+ err = au_iinfo_init(inode);
22827+ if (!err)
22828+ inode->i_version++;
22829+ else {
22830+ iget_failed(inode);
22831+ inode = ERR_PTR(err);
22832+ }
22833+
4f0767ce 22834+out:
1facf9fc 22835+ /* never return NULL */
22836+ AuDebugOn(!inode);
22837+ AuTraceErrPtr(inode);
22838+ return inode;
22839+}
22840+
22841+/* lock free root dinfo */
22842+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
22843+{
22844+ int err;
22845+ aufs_bindex_t bindex, bend;
22846+ struct path path;
4a4d8108 22847+ struct au_hdentry *hdp;
1facf9fc 22848+ struct au_branch *br;
1e00d052 22849+ char *perm;
1facf9fc 22850+
22851+ err = 0;
22852+ bend = au_sbend(sb);
4a4d8108 22853+ hdp = au_di(sb->s_root)->di_hdentry;
1facf9fc 22854+ for (bindex = 0; !err && bindex <= bend; bindex++) {
22855+ br = au_sbr(sb, bindex);
22856+ path.mnt = br->br_mnt;
4a4d8108 22857+ path.dentry = hdp[bindex].hd_dentry;
1facf9fc 22858+ err = au_seq_path(seq, &path);
1e00d052
AM
22859+ if (err > 0) {
22860+ perm = au_optstr_br_perm(br->br_perm);
22861+ if (perm) {
22862+ err = seq_printf(seq, "=%s", perm);
22863+ kfree(perm);
22864+ if (err == -1)
22865+ err = -E2BIG;
22866+ } else
22867+ err = -ENOMEM;
22868+ }
1facf9fc 22869+ if (!err && bindex != bend)
22870+ err = seq_putc(seq, ':');
22871+ }
22872+
22873+ return err;
22874+}
22875+
22876+static void au_show_wbr_create(struct seq_file *m, int v,
22877+ struct au_sbinfo *sbinfo)
22878+{
22879+ const char *pat;
22880+
dece6358
AM
22881+ AuRwMustAnyLock(&sbinfo->si_rwsem);
22882+
1facf9fc 22883+ seq_printf(m, ",create=");
22884+ pat = au_optstr_wbr_create(v);
22885+ switch (v) {
22886+ case AuWbrCreate_TDP:
22887+ case AuWbrCreate_RR:
22888+ case AuWbrCreate_MFS:
22889+ case AuWbrCreate_PMFS:
22890+ seq_printf(m, pat);
22891+ break;
22892+ case AuWbrCreate_MFSV:
22893+ seq_printf(m, /*pat*/"mfs:%lu",
e49829fe
JR
22894+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22895+ / MSEC_PER_SEC);
1facf9fc 22896+ break;
22897+ case AuWbrCreate_PMFSV:
22898+ seq_printf(m, /*pat*/"pmfs:%lu",
e49829fe
JR
22899+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22900+ / MSEC_PER_SEC);
1facf9fc 22901+ break;
22902+ case AuWbrCreate_MFSRR:
22903+ seq_printf(m, /*pat*/"mfsrr:%llu",
22904+ sbinfo->si_wbr_mfs.mfsrr_watermark);
22905+ break;
22906+ case AuWbrCreate_MFSRRV:
22907+ seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
22908+ sbinfo->si_wbr_mfs.mfsrr_watermark,
e49829fe
JR
22909+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22910+ / MSEC_PER_SEC);
1facf9fc 22911+ break;
22912+ }
22913+}
22914+
22915+static int au_show_xino(struct seq_file *seq, struct vfsmount *mnt)
22916+{
22917+#ifdef CONFIG_SYSFS
22918+ return 0;
22919+#else
22920+ int err;
22921+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
22922+ aufs_bindex_t bindex, brid;
22923+ struct super_block *sb;
22924+ struct qstr *name;
22925+ struct file *f;
22926+ struct dentry *d, *h_root;
4a4d8108 22927+ struct au_hdentry *hdp;
1facf9fc 22928+
dece6358
AM
22929+ AuRwMustAnyLock(&sbinfo->si_rwsem);
22930+
1facf9fc 22931+ err = 0;
22932+ sb = mnt->mnt_sb;
22933+ f = au_sbi(sb)->si_xib;
22934+ if (!f)
22935+ goto out;
22936+
22937+ /* stop printing the default xino path on the first writable branch */
22938+ h_root = NULL;
22939+ brid = au_xino_brid(sb);
22940+ if (brid >= 0) {
22941+ bindex = au_br_index(sb, brid);
4a4d8108
AM
22942+ hdp = au_di(sb->s_root)->di_hdentry;
22943+ h_root = hdp[0 + bindex].hd_dentry;
1facf9fc 22944+ }
22945+ d = f->f_dentry;
22946+ name = &d->d_name;
22947+ /* safe ->d_parent because the file is unlinked */
22948+ if (d->d_parent == h_root
22949+ && name->len == len
22950+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
22951+ goto out;
22952+
22953+ seq_puts(seq, ",xino=");
22954+ err = au_xino_path(seq, f);
22955+
4f0767ce 22956+out:
1facf9fc 22957+ return err;
22958+#endif
22959+}
22960+
22961+/* seq_file will re-call me in case of too long string */
22962+static int aufs_show_options(struct seq_file *m, struct vfsmount *mnt)
22963+{
027c5e7a 22964+ int err;
1facf9fc 22965+ unsigned int mnt_flags, v;
22966+ struct super_block *sb;
22967+ struct au_sbinfo *sbinfo;
22968+
22969+#define AuBool(name, str) do { \
22970+ v = au_opt_test(mnt_flags, name); \
22971+ if (v != au_opt_test(AuOpt_Def, name)) \
22972+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
22973+} while (0)
22974+
22975+#define AuStr(name, str) do { \
22976+ v = mnt_flags & AuOptMask_##name; \
22977+ if (v != (AuOpt_Def & AuOptMask_##name)) \
22978+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
22979+} while (0)
22980+
22981+#define AuUInt(name, str, val) do { \
22982+ if (val != AUFS_##name##_DEF) \
22983+ seq_printf(m, "," #str "=%u", val); \
22984+} while (0)
22985+
22986+ /* lock free root dinfo */
22987+ sb = mnt->mnt_sb;
22988+ si_noflush_read_lock(sb);
22989+ sbinfo = au_sbi(sb);
22990+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
22991+
22992+ mnt_flags = au_mntflags(sb);
22993+ if (au_opt_test(mnt_flags, XINO)) {
22994+ err = au_show_xino(m, mnt);
22995+ if (unlikely(err))
22996+ goto out;
22997+ } else
22998+ seq_puts(m, ",noxino");
22999+
23000+ AuBool(TRUNC_XINO, trunc_xino);
23001+ AuStr(UDBA, udba);
dece6358 23002+ AuBool(SHWH, shwh);
1facf9fc 23003+ AuBool(PLINK, plink);
4a4d8108 23004+ AuBool(DIO, dio);
1facf9fc 23005+ /* AuBool(DIRPERM1, dirperm1); */
23006+ /* AuBool(REFROF, refrof); */
23007+
23008+ v = sbinfo->si_wbr_create;
23009+ if (v != AuWbrCreate_Def)
23010+ au_show_wbr_create(m, v, sbinfo);
23011+
23012+ v = sbinfo->si_wbr_copyup;
23013+ if (v != AuWbrCopyup_Def)
23014+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
23015+
23016+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
23017+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
23018+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
23019+
23020+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
23021+
027c5e7a
AM
23022+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
23023+ AuUInt(RDCACHE, rdcache, v);
1facf9fc 23024+
23025+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
23026+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
23027+
23028+ AuBool(SUM, sum);
23029+ /* AuBool(SUM_W, wsum); */
23030+ AuBool(WARN_PERM, warn_perm);
23031+ AuBool(VERBOSE, verbose);
23032+
4f0767ce 23033+out:
1facf9fc 23034+ /* be sure to print "br:" last */
23035+ if (!sysaufs_brs) {
23036+ seq_puts(m, ",br:");
23037+ au_show_brs(m, sb);
23038+ }
23039+ si_read_unlock(sb);
23040+ return 0;
23041+
1facf9fc 23042+#undef AuBool
23043+#undef AuStr
4a4d8108 23044+#undef AuUInt
1facf9fc 23045+}
23046+
23047+/* ---------------------------------------------------------------------- */
23048+
23049+/* sum mode which returns the summation for statfs(2) */
23050+
23051+static u64 au_add_till_max(u64 a, u64 b)
23052+{
23053+ u64 old;
23054+
23055+ old = a;
23056+ a += b;
23057+ if (old < a)
23058+ return a;
23059+ return ULLONG_MAX;
23060+}
23061+
23062+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
23063+{
23064+ int err;
23065+ u64 blocks, bfree, bavail, files, ffree;
23066+ aufs_bindex_t bend, bindex, i;
23067+ unsigned char shared;
7f207e10 23068+ struct path h_path;
1facf9fc 23069+ struct super_block *h_sb;
23070+
23071+ blocks = 0;
23072+ bfree = 0;
23073+ bavail = 0;
23074+ files = 0;
23075+ ffree = 0;
23076+
23077+ err = 0;
23078+ bend = au_sbend(sb);
23079+ for (bindex = bend; bindex >= 0; bindex--) {
7f207e10
AM
23080+ h_path.mnt = au_sbr_mnt(sb, bindex);
23081+ h_sb = h_path.mnt->mnt_sb;
1facf9fc 23082+ shared = 0;
23083+ for (i = bindex + 1; !shared && i <= bend; i++)
23084+ shared = (au_sbr_sb(sb, i) == h_sb);
23085+ if (shared)
23086+ continue;
23087+
23088+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
23089+ h_path.dentry = h_path.mnt->mnt_root;
23090+ err = vfs_statfs(&h_path, buf);
1facf9fc 23091+ if (unlikely(err))
23092+ goto out;
23093+
23094+ blocks = au_add_till_max(blocks, buf->f_blocks);
23095+ bfree = au_add_till_max(bfree, buf->f_bfree);
23096+ bavail = au_add_till_max(bavail, buf->f_bavail);
23097+ files = au_add_till_max(files, buf->f_files);
23098+ ffree = au_add_till_max(ffree, buf->f_ffree);
23099+ }
23100+
23101+ buf->f_blocks = blocks;
23102+ buf->f_bfree = bfree;
23103+ buf->f_bavail = bavail;
23104+ buf->f_files = files;
23105+ buf->f_ffree = ffree;
23106+
4f0767ce 23107+out:
1facf9fc 23108+ return err;
23109+}
23110+
23111+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
23112+{
23113+ int err;
7f207e10 23114+ struct path h_path;
1facf9fc 23115+ struct super_block *sb;
23116+
23117+ /* lock free root dinfo */
23118+ sb = dentry->d_sb;
23119+ si_noflush_read_lock(sb);
7f207e10 23120+ if (!au_opt_test(au_mntflags(sb), SUM)) {
1facf9fc 23121+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
23122+ h_path.mnt = au_sbr_mnt(sb, 0);
23123+ h_path.dentry = h_path.mnt->mnt_root;
23124+ err = vfs_statfs(&h_path, buf);
23125+ } else
1facf9fc 23126+ err = au_statfs_sum(sb, buf);
23127+ si_read_unlock(sb);
23128+
23129+ if (!err) {
23130+ buf->f_type = AUFS_SUPER_MAGIC;
4a4d8108 23131+ buf->f_namelen = AUFS_MAX_NAMELEN;
1facf9fc 23132+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
23133+ }
23134+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
23135+
23136+ return err;
23137+}
23138+
23139+/* ---------------------------------------------------------------------- */
23140+
1facf9fc 23141+/* final actions when unmounting a file system */
23142+static void aufs_put_super(struct super_block *sb)
23143+{
23144+ struct au_sbinfo *sbinfo;
23145+
23146+ sbinfo = au_sbi(sb);
23147+ if (!sbinfo)
23148+ return;
23149+
1facf9fc 23150+ dbgaufs_si_fin(sbinfo);
23151+ kobject_put(&sbinfo->si_kobj);
23152+}
23153+
23154+/* ---------------------------------------------------------------------- */
23155+
7f207e10
AM
23156+void au_array_free(void *array)
23157+{
23158+ if (array) {
23159+ if (!is_vmalloc_addr(array))
23160+ kfree(array);
23161+ else
23162+ vfree(array);
23163+ }
23164+}
23165+
23166+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg)
23167+{
23168+ void *array;
23169+ unsigned long long n;
23170+
23171+ array = NULL;
23172+ n = 0;
23173+ if (!*hint)
23174+ goto out;
23175+
23176+ if (*hint > ULLONG_MAX / sizeof(array)) {
23177+ array = ERR_PTR(-EMFILE);
23178+ pr_err("hint %llu\n", *hint);
23179+ goto out;
23180+ }
23181+
23182+ array = kmalloc(sizeof(array) * *hint, GFP_NOFS);
23183+ if (unlikely(!array))
23184+ array = vmalloc(sizeof(array) * *hint);
23185+ if (unlikely(!array)) {
23186+ array = ERR_PTR(-ENOMEM);
23187+ goto out;
23188+ }
23189+
23190+ n = cb(array, *hint, arg);
23191+ AuDebugOn(n > *hint);
23192+
23193+out:
23194+ *hint = n;
23195+ return array;
23196+}
23197+
23198+static unsigned long long au_iarray_cb(void *a,
23199+ unsigned long long max __maybe_unused,
23200+ void *arg)
23201+{
23202+ unsigned long long n;
23203+ struct inode **p, *inode;
23204+ struct list_head *head;
23205+
23206+ n = 0;
23207+ p = a;
23208+ head = arg;
2cbb1c4b 23209+ spin_lock(&inode_sb_list_lock);
7f207e10
AM
23210+ list_for_each_entry(inode, head, i_sb_list) {
23211+ if (!is_bad_inode(inode)
23212+ && au_ii(inode)->ii_bstart >= 0) {
2cbb1c4b
JR
23213+ spin_lock(&inode->i_lock);
23214+ if (atomic_read(&inode->i_count)) {
23215+ au_igrab(inode);
23216+ *p++ = inode;
23217+ n++;
23218+ AuDebugOn(n > max);
23219+ }
23220+ spin_unlock(&inode->i_lock);
7f207e10
AM
23221+ }
23222+ }
2cbb1c4b 23223+ spin_unlock(&inode_sb_list_lock);
7f207e10
AM
23224+
23225+ return n;
23226+}
23227+
23228+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
23229+{
23230+ *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
23231+ return au_array_alloc(max, au_iarray_cb, &sb->s_inodes);
23232+}
23233+
23234+void au_iarray_free(struct inode **a, unsigned long long max)
23235+{
23236+ unsigned long long ull;
23237+
23238+ for (ull = 0; ull < max; ull++)
23239+ iput(a[ull]);
23240+ au_array_free(a);
23241+}
23242+
23243+/* ---------------------------------------------------------------------- */
23244+
1facf9fc 23245+/*
23246+ * refresh dentry and inode at remount time.
23247+ */
027c5e7a
AM
23248+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
23249+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
23250+ struct dentry *parent)
1facf9fc 23251+{
23252+ int err;
1facf9fc 23253+
23254+ di_write_lock_child(dentry);
1facf9fc 23255+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
23256+ err = au_refresh_dentry(dentry, parent);
23257+ if (!err && dir_flags)
23258+ au_hn_reset(dentry->d_inode, dir_flags);
1facf9fc 23259+ di_read_unlock(parent, AuLock_IR);
1facf9fc 23260+ di_write_unlock(dentry);
23261+
23262+ return err;
23263+}
23264+
027c5e7a
AM
23265+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
23266+ struct au_sbinfo *sbinfo,
23267+ const unsigned int dir_flags)
1facf9fc 23268+{
027c5e7a
AM
23269+ int err;
23270+ struct dentry *parent;
23271+ struct inode *inode;
23272+
23273+ err = 0;
23274+ parent = dget_parent(dentry);
23275+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
23276+ inode = dentry->d_inode;
23277+ if (inode) {
23278+ if (!S_ISDIR(inode->i_mode))
23279+ err = au_do_refresh(dentry, /*dir_flags*/0,
23280+ parent);
23281+ else {
23282+ err = au_do_refresh(dentry, dir_flags, parent);
23283+ if (unlikely(err))
23284+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
23285+ }
23286+ } else
23287+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
23288+ AuDbgDentry(dentry);
23289+ }
23290+ dput(parent);
23291+
23292+ AuTraceErr(err);
23293+ return err;
1facf9fc 23294+}
23295+
027c5e7a 23296+static int au_refresh_d(struct super_block *sb)
1facf9fc 23297+{
23298+ int err, i, j, ndentry, e;
027c5e7a 23299+ unsigned int sigen;
1facf9fc 23300+ struct au_dcsub_pages dpages;
23301+ struct au_dpage *dpage;
027c5e7a
AM
23302+ struct dentry **dentries, *d;
23303+ struct au_sbinfo *sbinfo;
23304+ struct dentry *root = sb->s_root;
23305+ const unsigned int dir_flags = au_hi_flags(root->d_inode, /*isdir*/1);
1facf9fc 23306+
027c5e7a
AM
23307+ err = au_dpages_init(&dpages, GFP_NOFS);
23308+ if (unlikely(err))
1facf9fc 23309+ goto out;
027c5e7a
AM
23310+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
23311+ if (unlikely(err))
1facf9fc 23312+ goto out_dpages;
1facf9fc 23313+
027c5e7a
AM
23314+ sigen = au_sigen(sb);
23315+ sbinfo = au_sbi(sb);
23316+ for (i = 0; i < dpages.ndpage; i++) {
1facf9fc 23317+ dpage = dpages.dpages + i;
23318+ dentries = dpage->dentries;
23319+ ndentry = dpage->ndentry;
027c5e7a 23320+ for (j = 0; j < ndentry; j++) {
1facf9fc 23321+ d = dentries[j];
027c5e7a
AM
23322+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags);
23323+ if (unlikely(e && !err))
23324+ err = e;
23325+ /* go on even err */
1facf9fc 23326+ }
23327+ }
23328+
4f0767ce 23329+out_dpages:
1facf9fc 23330+ au_dpages_free(&dpages);
4f0767ce 23331+out:
1facf9fc 23332+ return err;
23333+}
23334+
027c5e7a 23335+static int au_refresh_i(struct super_block *sb)
1facf9fc 23336+{
027c5e7a
AM
23337+ int err, e;
23338+ unsigned int sigen;
23339+ unsigned long long max, ull;
23340+ struct inode *inode, **array;
1facf9fc 23341+
027c5e7a
AM
23342+ array = au_iarray_alloc(sb, &max);
23343+ err = PTR_ERR(array);
23344+ if (IS_ERR(array))
23345+ goto out;
1facf9fc 23346+
23347+ err = 0;
027c5e7a
AM
23348+ sigen = au_sigen(sb);
23349+ for (ull = 0; ull < max; ull++) {
23350+ inode = array[ull];
23351+ if (au_iigen(inode) != sigen) {
1facf9fc 23352+ ii_write_lock_child(inode);
027c5e7a 23353+ e = au_refresh_hinode_self(inode);
1facf9fc 23354+ ii_write_unlock(inode);
23355+ if (unlikely(e)) {
027c5e7a 23356+ pr_err("error %d, i%lu\n", e, inode->i_ino);
1facf9fc 23357+ if (!err)
23358+ err = e;
23359+ /* go on even if err */
23360+ }
23361+ }
1facf9fc 23362+ }
23363+
027c5e7a 23364+ au_iarray_free(array, max);
1facf9fc 23365+
4f0767ce 23366+out:
1facf9fc 23367+ return err;
23368+}
23369+
027c5e7a 23370+static void au_remount_refresh(struct super_block *sb)
1facf9fc 23371+{
027c5e7a
AM
23372+ int err, e;
23373+ unsigned int udba;
23374+ aufs_bindex_t bindex, bend;
1facf9fc 23375+ struct dentry *root;
23376+ struct inode *inode;
027c5e7a 23377+ struct au_branch *br;
1facf9fc 23378+
23379+ au_sigen_inc(sb);
027c5e7a 23380+ au_fclr_si(au_sbi(sb), FAILED_REFRESH_DIR);
1facf9fc 23381+
23382+ root = sb->s_root;
23383+ DiMustNoWaiters(root);
23384+ inode = root->d_inode;
23385+ IiMustNoWaiters(inode);
1facf9fc 23386+
027c5e7a
AM
23387+ udba = au_opt_udba(sb);
23388+ bend = au_sbend(sb);
23389+ for (bindex = 0; bindex <= bend; bindex++) {
23390+ br = au_sbr(sb, bindex);
23391+ err = au_hnotify_reset_br(udba, br, br->br_perm);
1facf9fc 23392+ if (unlikely(err))
027c5e7a
AM
23393+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
23394+ bindex, err);
23395+ /* go on even if err */
1facf9fc 23396+ }
027c5e7a 23397+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
1facf9fc 23398+
027c5e7a
AM
23399+ di_write_unlock(root);
23400+ err = au_refresh_d(sb);
23401+ e = au_refresh_i(sb);
23402+ if (unlikely(e && !err))
23403+ err = e;
1facf9fc 23404+ /* aufs_write_lock() calls ..._child() */
23405+ di_write_lock_child(root);
027c5e7a
AM
23406+
23407+ au_cpup_attr_all(inode, /*force*/1);
23408+
23409+ if (unlikely(err))
23410+ AuIOErr("refresh failed, ignored, %d\n", err);
1facf9fc 23411+}
23412+
23413+/* stop extra interpretation of errno in mount(8), and strange error messages */
23414+static int cvt_err(int err)
23415+{
23416+ AuTraceErr(err);
23417+
23418+ switch (err) {
23419+ case -ENOENT:
23420+ case -ENOTDIR:
23421+ case -EEXIST:
23422+ case -EIO:
23423+ err = -EINVAL;
23424+ }
23425+ return err;
23426+}
23427+
23428+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
23429+{
4a4d8108
AM
23430+ int err, do_dx;
23431+ unsigned int mntflags;
1facf9fc 23432+ struct au_opts opts;
23433+ struct dentry *root;
23434+ struct inode *inode;
23435+ struct au_sbinfo *sbinfo;
23436+
23437+ err = 0;
23438+ root = sb->s_root;
23439+ if (!data || !*data) {
e49829fe
JR
23440+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
23441+ if (!err) {
23442+ di_write_lock_child(root);
23443+ err = au_opts_verify(sb, *flags, /*pending*/0);
23444+ aufs_write_unlock(root);
23445+ }
1facf9fc 23446+ goto out;
23447+ }
23448+
23449+ err = -ENOMEM;
23450+ memset(&opts, 0, sizeof(opts));
23451+ opts.opt = (void *)__get_free_page(GFP_NOFS);
23452+ if (unlikely(!opts.opt))
23453+ goto out;
23454+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
23455+ opts.flags = AuOpts_REMOUNT;
23456+ opts.sb_flags = *flags;
23457+
23458+ /* parse it before aufs lock */
23459+ err = au_opts_parse(sb, data, &opts);
23460+ if (unlikely(err))
23461+ goto out_opts;
23462+
23463+ sbinfo = au_sbi(sb);
23464+ inode = root->d_inode;
23465+ mutex_lock(&inode->i_mutex);
e49829fe
JR
23466+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
23467+ if (unlikely(err))
23468+ goto out_mtx;
23469+ di_write_lock_child(root);
1facf9fc 23470+
23471+ /* au_opts_remount() may return an error */
23472+ err = au_opts_remount(sb, &opts);
23473+ au_opts_free(&opts);
23474+
027c5e7a
AM
23475+ if (au_ftest_opts(opts.flags, REFRESH))
23476+ au_remount_refresh(sb);
1facf9fc 23477+
4a4d8108
AM
23478+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
23479+ mntflags = au_mntflags(sb);
23480+ do_dx = !!au_opt_test(mntflags, DIO);
23481+ au_dy_arefresh(do_dx);
23482+ }
23483+
1facf9fc 23484+ aufs_write_unlock(root);
953406b4 23485+
e49829fe
JR
23486+out_mtx:
23487+ mutex_unlock(&inode->i_mutex);
4f0767ce 23488+out_opts:
1facf9fc 23489+ free_page((unsigned long)opts.opt);
4f0767ce 23490+out:
1facf9fc 23491+ err = cvt_err(err);
23492+ AuTraceErr(err);
23493+ return err;
23494+}
23495+
4a4d8108 23496+static const struct super_operations aufs_sop = {
1facf9fc 23497+ .alloc_inode = aufs_alloc_inode,
23498+ .destroy_inode = aufs_destroy_inode,
b752ccd1 23499+ /* always deleting, no clearing */
1facf9fc 23500+ .drop_inode = generic_delete_inode,
23501+ .show_options = aufs_show_options,
23502+ .statfs = aufs_statfs,
23503+ .put_super = aufs_put_super,
23504+ .remount_fs = aufs_remount_fs
23505+};
23506+
23507+/* ---------------------------------------------------------------------- */
23508+
23509+static int alloc_root(struct super_block *sb)
23510+{
23511+ int err;
23512+ struct inode *inode;
23513+ struct dentry *root;
23514+
23515+ err = -ENOMEM;
23516+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
23517+ err = PTR_ERR(inode);
23518+ if (IS_ERR(inode))
23519+ goto out;
23520+
23521+ inode->i_op = &aufs_dir_iop;
23522+ inode->i_fop = &aufs_dir_fop;
23523+ inode->i_mode = S_IFDIR;
9dbd164d 23524+ set_nlink(inode, 2);
1facf9fc 23525+ unlock_new_inode(inode);
23526+
23527+ root = d_alloc_root(inode);
23528+ if (unlikely(!root))
23529+ goto out_iput;
23530+ err = PTR_ERR(root);
23531+ if (IS_ERR(root))
23532+ goto out_iput;
23533+
4a4d8108 23534+ err = au_di_init(root);
1facf9fc 23535+ if (!err) {
23536+ sb->s_root = root;
23537+ return 0; /* success */
23538+ }
23539+ dput(root);
23540+ goto out; /* do not iput */
23541+
4f0767ce 23542+out_iput:
1facf9fc 23543+ iget_failed(inode);
4f0767ce 23544+out:
1facf9fc 23545+ return err;
23546+
23547+}
23548+
23549+static int aufs_fill_super(struct super_block *sb, void *raw_data,
23550+ int silent __maybe_unused)
23551+{
23552+ int err;
23553+ struct au_opts opts;
23554+ struct dentry *root;
23555+ struct inode *inode;
23556+ char *arg = raw_data;
23557+
23558+ if (unlikely(!arg || !*arg)) {
23559+ err = -EINVAL;
4a4d8108 23560+ pr_err("no arg\n");
1facf9fc 23561+ goto out;
23562+ }
23563+
23564+ err = -ENOMEM;
23565+ memset(&opts, 0, sizeof(opts));
23566+ opts.opt = (void *)__get_free_page(GFP_NOFS);
23567+ if (unlikely(!opts.opt))
23568+ goto out;
23569+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
23570+ opts.sb_flags = sb->s_flags;
23571+
23572+ err = au_si_alloc(sb);
23573+ if (unlikely(err))
23574+ goto out_opts;
23575+
23576+ /* all timestamps always follow the ones on the branch */
23577+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
23578+ sb->s_op = &aufs_sop;
027c5e7a 23579+ sb->s_d_op = &aufs_dop;
1facf9fc 23580+ sb->s_magic = AUFS_SUPER_MAGIC;
23581+ sb->s_maxbytes = 0;
23582+ au_export_init(sb);
23583+
23584+ err = alloc_root(sb);
23585+ if (unlikely(err)) {
23586+ si_write_unlock(sb);
23587+ goto out_info;
23588+ }
23589+ root = sb->s_root;
23590+ inode = root->d_inode;
23591+
23592+ /*
23593+ * actually we can parse options regardless aufs lock here.
23594+ * but at remount time, parsing must be done before aufs lock.
23595+ * so we follow the same rule.
23596+ */
23597+ ii_write_lock_parent(inode);
23598+ aufs_write_unlock(root);
23599+ err = au_opts_parse(sb, arg, &opts);
23600+ if (unlikely(err))
23601+ goto out_root;
23602+
23603+ /* lock vfs_inode first, then aufs. */
23604+ mutex_lock(&inode->i_mutex);
1facf9fc 23605+ aufs_write_lock(root);
23606+ err = au_opts_mount(sb, &opts);
23607+ au_opts_free(&opts);
1facf9fc 23608+ aufs_write_unlock(root);
23609+ mutex_unlock(&inode->i_mutex);
4a4d8108
AM
23610+ if (!err)
23611+ goto out_opts; /* success */
1facf9fc 23612+
4f0767ce 23613+out_root:
1facf9fc 23614+ dput(root);
23615+ sb->s_root = NULL;
4f0767ce 23616+out_info:
2cbb1c4b 23617+ dbgaufs_si_fin(au_sbi(sb));
1facf9fc 23618+ kobject_put(&au_sbi(sb)->si_kobj);
23619+ sb->s_fs_info = NULL;
4f0767ce 23620+out_opts:
1facf9fc 23621+ free_page((unsigned long)opts.opt);
4f0767ce 23622+out:
1facf9fc 23623+ AuTraceErr(err);
23624+ err = cvt_err(err);
23625+ AuTraceErr(err);
23626+ return err;
23627+}
23628+
23629+/* ---------------------------------------------------------------------- */
23630+
027c5e7a
AM
23631+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
23632+ const char *dev_name __maybe_unused,
23633+ void *raw_data)
1facf9fc 23634+{
027c5e7a 23635+ struct dentry *root;
1facf9fc 23636+ struct super_block *sb;
23637+
23638+ /* all timestamps always follow the ones on the branch */
23639+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
027c5e7a
AM
23640+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
23641+ if (IS_ERR(root))
23642+ goto out;
23643+
23644+ sb = root->d_sb;
23645+ si_write_lock(sb, !AuLock_FLUSH);
23646+ sysaufs_brs_add(sb, 0);
23647+ si_write_unlock(sb);
23648+ au_sbilist_add(sb);
23649+
23650+out:
23651+ return root;
1facf9fc 23652+}
23653+
e49829fe
JR
23654+static void aufs_kill_sb(struct super_block *sb)
23655+{
23656+ struct au_sbinfo *sbinfo;
23657+
23658+ sbinfo = au_sbi(sb);
23659+ if (sbinfo) {
23660+ au_sbilist_del(sb);
23661+ aufs_write_lock(sb->s_root);
23662+ if (sbinfo->si_wbr_create_ops->fin)
23663+ sbinfo->si_wbr_create_ops->fin(sb);
23664+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
23665+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
027c5e7a 23666+ au_remount_refresh(sb);
e49829fe
JR
23667+ }
23668+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
23669+ au_plink_put(sb, /*verbose*/1);
23670+ au_xino_clr(sb);
1e00d052 23671+ sbinfo->si_sb = NULL;
e49829fe 23672+ aufs_write_unlock(sb->s_root);
e49829fe
JR
23673+ au_nwt_flush(&sbinfo->si_nowait);
23674+ }
23675+ generic_shutdown_super(sb);
23676+}
23677+
1facf9fc 23678+struct file_system_type aufs_fs_type = {
23679+ .name = AUFS_FSTYPE,
23680+ .fs_flags =
23681+ FS_RENAME_DOES_D_MOVE /* a race between rename and others */
23682+ | FS_REVAL_DOT, /* for NFS branch and udba */
027c5e7a 23683+ .mount = aufs_mount,
e49829fe 23684+ .kill_sb = aufs_kill_sb,
1facf9fc 23685+ /* no need to __module_get() and module_put(). */
23686+ .owner = THIS_MODULE,
23687+};
7f207e10
AM
23688diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
23689--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
23690+++ linux/fs/aufs/super.h 2012-02-13 21:54:56.973105100 +0100
23691@@ -0,0 +1,546 @@
1facf9fc 23692+/*
f6c5ef8b 23693+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 23694+ *
23695+ * This program, aufs is free software; you can redistribute it and/or modify
23696+ * it under the terms of the GNU General Public License as published by
23697+ * the Free Software Foundation; either version 2 of the License, or
23698+ * (at your option) any later version.
dece6358
AM
23699+ *
23700+ * This program is distributed in the hope that it will be useful,
23701+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23702+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23703+ * GNU General Public License for more details.
23704+ *
23705+ * You should have received a copy of the GNU General Public License
23706+ * along with this program; if not, write to the Free Software
23707+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 23708+ */
23709+
23710+/*
23711+ * super_block operations
23712+ */
23713+
23714+#ifndef __AUFS_SUPER_H__
23715+#define __AUFS_SUPER_H__
23716+
23717+#ifdef __KERNEL__
23718+
23719+#include <linux/fs.h>
1facf9fc 23720+#include "rwsem.h"
23721+#include "spl.h"
23722+#include "wkq.h"
23723+
23724+typedef ssize_t (*au_readf_t)(struct file *, char __user *, size_t, loff_t *);
23725+typedef ssize_t (*au_writef_t)(struct file *, const char __user *, size_t,
23726+ loff_t *);
23727+
23728+/* policies to select one among multiple writable branches */
23729+struct au_wbr_copyup_operations {
23730+ int (*copyup)(struct dentry *dentry);
23731+};
23732+
23733+struct au_wbr_create_operations {
23734+ int (*create)(struct dentry *dentry, int isdir);
23735+ int (*init)(struct super_block *sb);
23736+ int (*fin)(struct super_block *sb);
23737+};
23738+
23739+struct au_wbr_mfs {
23740+ struct mutex mfs_lock; /* protect this structure */
23741+ unsigned long mfs_jiffy;
23742+ unsigned long mfs_expire;
23743+ aufs_bindex_t mfs_bindex;
23744+
23745+ unsigned long long mfsrr_bytes;
23746+ unsigned long long mfsrr_watermark;
23747+};
23748+
1facf9fc 23749+struct au_branch;
23750+struct au_sbinfo {
23751+ /* nowait tasks in the system-wide workqueue */
23752+ struct au_nowait_tasks si_nowait;
23753+
b752ccd1
AM
23754+ /*
23755+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
23756+ * rwsem for au_sbinfo is necessary.
23757+ */
dece6358 23758+ struct au_rwsem si_rwsem;
1facf9fc 23759+
b752ccd1
AM
23760+ /* prevent recursive locking in deleting inode */
23761+ struct {
23762+ unsigned long *bitmap;
23763+ spinlock_t tree_lock;
23764+ struct radix_tree_root tree;
23765+ } au_si_pid;
23766+
7f207e10
AM
23767+ /*
23768+ * dirty approach to protect sb->sb_inodes and ->s_files from remount.
23769+ */
23770+ atomic_long_t si_ninodes, si_nfiles;
23771+
1facf9fc 23772+ /* branch management */
23773+ unsigned int si_generation;
23774+
23775+ /* see above flags */
23776+ unsigned char au_si_status;
23777+
23778+ aufs_bindex_t si_bend;
7f207e10
AM
23779+
23780+ /* dirty trick to keep br_id plus */
23781+ unsigned int si_last_br_id :
23782+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
1facf9fc 23783+ struct au_branch **si_branch;
23784+
23785+ /* policy to select a writable branch */
23786+ unsigned char si_wbr_copyup;
23787+ unsigned char si_wbr_create;
23788+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
23789+ struct au_wbr_create_operations *si_wbr_create_ops;
23790+
23791+ /* round robin */
23792+ atomic_t si_wbr_rr_next;
23793+
23794+ /* most free space */
23795+ struct au_wbr_mfs si_wbr_mfs;
23796+
23797+ /* mount flags */
23798+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
23799+ unsigned int si_mntflags;
23800+
23801+ /* external inode number (bitmap and translation table) */
23802+ au_readf_t si_xread;
23803+ au_writef_t si_xwrite;
23804+ struct file *si_xib;
23805+ struct mutex si_xib_mtx; /* protect xib members */
23806+ unsigned long *si_xib_buf;
23807+ unsigned long si_xib_last_pindex;
23808+ int si_xib_next_bit;
23809+ aufs_bindex_t si_xino_brid;
23810+ /* reserved for future use */
23811+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
23812+
23813+#ifdef CONFIG_AUFS_EXPORT
23814+ /* i_generation */
23815+ struct file *si_xigen;
23816+ atomic_t si_xigen_next;
23817+#endif
23818+
23819+ /* vdir parameters */
e49829fe 23820+ unsigned long si_rdcache; /* max cache time in jiffies */
1facf9fc 23821+ unsigned int si_rdblk; /* deblk size */
23822+ unsigned int si_rdhash; /* hash size */
23823+
23824+ /*
23825+ * If the number of whiteouts are larger than si_dirwh, leave all of
23826+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
23827+ * future fsck.aufs or kernel thread will remove them later.
23828+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
23829+ */
23830+ unsigned int si_dirwh;
23831+
23832+ /*
23833+ * rename(2) a directory with all children.
23834+ */
23835+ /* reserved for future use */
23836+ /* int si_rendir; */
23837+
23838+ /* pseudo_link list */
23839+ struct au_splhead si_plink;
23840+ wait_queue_head_t si_plink_wq;
4a4d8108 23841+ spinlock_t si_plink_maint_lock;
e49829fe 23842+ pid_t si_plink_maint_pid;
1facf9fc 23843+
23844+ /*
23845+ * sysfs and lifetime management.
23846+ * this is not a small structure and it may be a waste of memory in case
23847+ * of sysfs is disabled, particulary when many aufs-es are mounted.
23848+ * but using sysfs is majority.
23849+ */
23850+ struct kobject si_kobj;
23851+#ifdef CONFIG_DEBUG_FS
23852+ struct dentry *si_dbgaufs, *si_dbgaufs_xib;
23853+#ifdef CONFIG_AUFS_EXPORT
23854+ struct dentry *si_dbgaufs_xigen;
23855+#endif
23856+#endif
23857+
e49829fe
JR
23858+#ifdef CONFIG_AUFS_SBILIST
23859+ struct list_head si_list;
23860+#endif
23861+
1facf9fc 23862+ /* dirty, necessary for unmounting, sysfs and sysrq */
23863+ struct super_block *si_sb;
23864+};
23865+
dece6358
AM
23866+/* sbinfo status flags */
23867+/*
23868+ * set true when refresh_dirs() failed at remount time.
23869+ * then try refreshing dirs at access time again.
23870+ * if it is false, refreshing dirs at access time is unnecesary
23871+ */
027c5e7a 23872+#define AuSi_FAILED_REFRESH_DIR 1
dece6358
AM
23873+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
23874+ unsigned int flag)
23875+{
23876+ AuRwMustAnyLock(&sbi->si_rwsem);
23877+ return sbi->au_si_status & flag;
23878+}
23879+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
23880+#define au_fset_si(sbinfo, name) do { \
23881+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
23882+ (sbinfo)->au_si_status |= AuSi_##name; \
23883+} while (0)
23884+#define au_fclr_si(sbinfo, name) do { \
23885+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
23886+ (sbinfo)->au_si_status &= ~AuSi_##name; \
23887+} while (0)
23888+
1facf9fc 23889+/* ---------------------------------------------------------------------- */
23890+
23891+/* policy to select one among writable branches */
4a4d8108
AM
23892+#define AuWbrCopyup(sbinfo, ...) \
23893+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
23894+#define AuWbrCreate(sbinfo, ...) \
23895+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
1facf9fc 23896+
23897+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
23898+#define AuLock_DW 1 /* write-lock dentry */
23899+#define AuLock_IR (1 << 1) /* read-lock inode */
23900+#define AuLock_IW (1 << 2) /* write-lock inode */
23901+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
23902+#define AuLock_DIR (1 << 4) /* target is a dir */
e49829fe
JR
23903+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
23904+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
027c5e7a 23905+#define AuLock_GEN (1 << 7) /* test digen/iigen */
1facf9fc 23906+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
7f207e10
AM
23907+#define au_fset_lock(flags, name) \
23908+ do { (flags) |= AuLock_##name; } while (0)
23909+#define au_fclr_lock(flags, name) \
23910+ do { (flags) &= ~AuLock_##name; } while (0)
1facf9fc 23911+
23912+/* ---------------------------------------------------------------------- */
23913+
23914+/* super.c */
23915+extern struct file_system_type aufs_fs_type;
23916+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
7f207e10
AM
23917+typedef unsigned long long (*au_arraycb_t)(void *array, unsigned long long max,
23918+ void *arg);
23919+void au_array_free(void *array);
23920+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg);
23921+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
23922+void au_iarray_free(struct inode **a, unsigned long long max);
1facf9fc 23923+
23924+/* sbinfo.c */
23925+void au_si_free(struct kobject *kobj);
23926+int au_si_alloc(struct super_block *sb);
23927+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
23928+
23929+unsigned int au_sigen_inc(struct super_block *sb);
23930+aufs_bindex_t au_new_br_id(struct super_block *sb);
23931+
e49829fe
JR
23932+int si_read_lock(struct super_block *sb, int flags);
23933+int si_write_lock(struct super_block *sb, int flags);
23934+int aufs_read_lock(struct dentry *dentry, int flags);
1facf9fc 23935+void aufs_read_unlock(struct dentry *dentry, int flags);
23936+void aufs_write_lock(struct dentry *dentry);
23937+void aufs_write_unlock(struct dentry *dentry);
e49829fe 23938+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
1facf9fc 23939+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
23940+
b752ccd1
AM
23941+int si_pid_test_slow(struct super_block *sb);
23942+void si_pid_set_slow(struct super_block *sb);
23943+void si_pid_clr_slow(struct super_block *sb);
23944+
1facf9fc 23945+/* wbr_policy.c */
23946+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
23947+extern struct au_wbr_create_operations au_wbr_create_ops[];
23948+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
23949+
23950+/* ---------------------------------------------------------------------- */
23951+
23952+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
23953+{
23954+ return sb->s_fs_info;
23955+}
23956+
23957+/* ---------------------------------------------------------------------- */
23958+
23959+#ifdef CONFIG_AUFS_EXPORT
23960+void au_export_init(struct super_block *sb);
23961+
b752ccd1 23962+static inline int au_test_nfsd(void)
1facf9fc 23963+{
b752ccd1
AM
23964+ struct task_struct *tsk = current;
23965+
23966+ return (tsk->flags & PF_KTHREAD)
23967+ && !strcmp(tsk->comm, "nfsd");
1facf9fc 23968+}
23969+
b752ccd1 23970+void au_xigen_inc(struct inode *inode);
1facf9fc 23971+int au_xigen_new(struct inode *inode);
23972+int au_xigen_set(struct super_block *sb, struct file *base);
23973+void au_xigen_clr(struct super_block *sb);
23974+
23975+static inline int au_busy_or_stale(void)
23976+{
b752ccd1 23977+ if (!au_test_nfsd())
1facf9fc 23978+ return -EBUSY;
23979+ return -ESTALE;
23980+}
23981+#else
4a4d8108 23982+AuStubVoid(au_export_init, struct super_block *sb)
b752ccd1
AM
23983+AuStubInt0(au_test_nfsd, void)
23984+AuStubVoid(au_xigen_inc, struct inode *inode)
4a4d8108
AM
23985+AuStubInt0(au_xigen_new, struct inode *inode)
23986+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
23987+AuStubVoid(au_xigen_clr, struct super_block *sb)
1facf9fc 23988+static inline int au_busy_or_stale(void)
23989+{
23990+ return -EBUSY;
23991+}
23992+#endif /* CONFIG_AUFS_EXPORT */
23993+
23994+/* ---------------------------------------------------------------------- */
23995+
e49829fe
JR
23996+#ifdef CONFIG_AUFS_SBILIST
23997+/* module.c */
23998+extern struct au_splhead au_sbilist;
23999+
24000+static inline void au_sbilist_init(void)
24001+{
24002+ au_spl_init(&au_sbilist);
24003+}
24004+
24005+static inline void au_sbilist_add(struct super_block *sb)
24006+{
24007+ au_spl_add(&au_sbi(sb)->si_list, &au_sbilist);
24008+}
24009+
24010+static inline void au_sbilist_del(struct super_block *sb)
24011+{
24012+ au_spl_del(&au_sbi(sb)->si_list, &au_sbilist);
24013+}
53392da6
AM
24014+
24015+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
24016+static inline void au_sbilist_lock(void)
24017+{
24018+ spin_lock(&au_sbilist.spin);
24019+}
24020+
24021+static inline void au_sbilist_unlock(void)
24022+{
24023+ spin_unlock(&au_sbilist.spin);
24024+}
24025+#define AuGFP_SBILIST GFP_ATOMIC
24026+#else
24027+AuStubVoid(au_sbilist_lock, void)
24028+AuStubVoid(au_sbilist_unlock, void)
24029+#define AuGFP_SBILIST GFP_NOFS
24030+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
e49829fe
JR
24031+#else
24032+AuStubVoid(au_sbilist_init, void)
24033+AuStubVoid(au_sbilist_add, struct super_block*)
24034+AuStubVoid(au_sbilist_del, struct super_block*)
53392da6
AM
24035+AuStubVoid(au_sbilist_lock, void)
24036+AuStubVoid(au_sbilist_unlock, void)
24037+#define AuGFP_SBILIST GFP_NOFS
e49829fe
JR
24038+#endif
24039+
24040+/* ---------------------------------------------------------------------- */
24041+
1facf9fc 24042+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
24043+{
dece6358
AM
24044+ /*
24045+ * This function is a dynamic '__init' fucntion actually,
24046+ * so the tiny check for si_rwsem is unnecessary.
24047+ */
24048+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 24049+#ifdef CONFIG_DEBUG_FS
24050+ sbinfo->si_dbgaufs = NULL;
24051+ sbinfo->si_dbgaufs_xib = NULL;
24052+#ifdef CONFIG_AUFS_EXPORT
24053+ sbinfo->si_dbgaufs_xigen = NULL;
24054+#endif
24055+#endif
24056+}
24057+
24058+/* ---------------------------------------------------------------------- */
24059+
b752ccd1
AM
24060+static inline pid_t si_pid_bit(void)
24061+{
24062+ /* the origin of pid is 1, but the bitmap's is 0 */
24063+ return current->pid - 1;
24064+}
24065+
24066+static inline int si_pid_test(struct super_block *sb)
24067+{
24068+ pid_t bit = si_pid_bit();
24069+ if (bit < PID_MAX_DEFAULT)
24070+ return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
24071+ else
24072+ return si_pid_test_slow(sb);
24073+}
24074+
24075+static inline void si_pid_set(struct super_block *sb)
24076+{
24077+ pid_t bit = si_pid_bit();
24078+ if (bit < PID_MAX_DEFAULT) {
24079+ AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
24080+ set_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
24081+ /* smp_mb(); */
24082+ } else
24083+ si_pid_set_slow(sb);
24084+}
24085+
24086+static inline void si_pid_clr(struct super_block *sb)
24087+{
24088+ pid_t bit = si_pid_bit();
24089+ if (bit < PID_MAX_DEFAULT) {
24090+ AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
24091+ clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
24092+ /* smp_mb(); */
24093+ } else
24094+ si_pid_clr_slow(sb);
24095+}
24096+
24097+/* ---------------------------------------------------------------------- */
24098+
1facf9fc 24099+/* lock superblock. mainly for entry point functions */
24100+/*
b752ccd1
AM
24101+ * __si_read_lock, __si_write_lock,
24102+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
1facf9fc 24103+ */
b752ccd1 24104+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
1facf9fc 24105+
dece6358
AM
24106+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
24107+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
24108+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
24109+
b752ccd1
AM
24110+static inline void si_noflush_read_lock(struct super_block *sb)
24111+{
24112+ __si_read_lock(sb);
24113+ si_pid_set(sb);
24114+}
24115+
24116+static inline int si_noflush_read_trylock(struct super_block *sb)
24117+{
24118+ int locked = __si_read_trylock(sb);
24119+ if (locked)
24120+ si_pid_set(sb);
24121+ return locked;
24122+}
24123+
24124+static inline void si_noflush_write_lock(struct super_block *sb)
24125+{
24126+ __si_write_lock(sb);
24127+ si_pid_set(sb);
24128+}
24129+
24130+static inline int si_noflush_write_trylock(struct super_block *sb)
24131+{
24132+ int locked = __si_write_trylock(sb);
24133+ if (locked)
24134+ si_pid_set(sb);
24135+ return locked;
24136+}
24137+
e49829fe 24138+#if 0 /* unused */
1facf9fc 24139+static inline int si_read_trylock(struct super_block *sb, int flags)
24140+{
24141+ if (au_ftest_lock(flags, FLUSH))
24142+ au_nwt_flush(&au_sbi(sb)->si_nowait);
24143+ return si_noflush_read_trylock(sb);
24144+}
e49829fe 24145+#endif
1facf9fc 24146+
b752ccd1
AM
24147+static inline void si_read_unlock(struct super_block *sb)
24148+{
24149+ si_pid_clr(sb);
24150+ __si_read_unlock(sb);
24151+}
24152+
b752ccd1 24153+#if 0 /* unused */
1facf9fc 24154+static inline int si_write_trylock(struct super_block *sb, int flags)
24155+{
24156+ if (au_ftest_lock(flags, FLUSH))
24157+ au_nwt_flush(&au_sbi(sb)->si_nowait);
24158+ return si_noflush_write_trylock(sb);
24159+}
b752ccd1
AM
24160+#endif
24161+
24162+static inline void si_write_unlock(struct super_block *sb)
24163+{
24164+ si_pid_clr(sb);
24165+ __si_write_unlock(sb);
24166+}
24167+
24168+#if 0 /* unused */
24169+static inline void si_downgrade_lock(struct super_block *sb)
24170+{
24171+ __si_downgrade_lock(sb);
24172+}
24173+#endif
1facf9fc 24174+
24175+/* ---------------------------------------------------------------------- */
24176+
24177+static inline aufs_bindex_t au_sbend(struct super_block *sb)
24178+{
dece6358 24179+ SiMustAnyLock(sb);
1facf9fc 24180+ return au_sbi(sb)->si_bend;
24181+}
24182+
24183+static inline unsigned int au_mntflags(struct super_block *sb)
24184+{
dece6358 24185+ SiMustAnyLock(sb);
1facf9fc 24186+ return au_sbi(sb)->si_mntflags;
24187+}
24188+
24189+static inline unsigned int au_sigen(struct super_block *sb)
24190+{
dece6358 24191+ SiMustAnyLock(sb);
1facf9fc 24192+ return au_sbi(sb)->si_generation;
24193+}
24194+
7f207e10
AM
24195+static inline void au_ninodes_inc(struct super_block *sb)
24196+{
24197+ atomic_long_inc(&au_sbi(sb)->si_ninodes);
24198+}
24199+
24200+static inline void au_ninodes_dec(struct super_block *sb)
24201+{
24202+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
24203+ atomic_long_dec(&au_sbi(sb)->si_ninodes);
24204+}
24205+
24206+static inline void au_nfiles_inc(struct super_block *sb)
24207+{
24208+ atomic_long_inc(&au_sbi(sb)->si_nfiles);
24209+}
24210+
24211+static inline void au_nfiles_dec(struct super_block *sb)
24212+{
24213+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
24214+ atomic_long_dec(&au_sbi(sb)->si_nfiles);
24215+}
24216+
1facf9fc 24217+static inline struct au_branch *au_sbr(struct super_block *sb,
24218+ aufs_bindex_t bindex)
24219+{
dece6358 24220+ SiMustAnyLock(sb);
1facf9fc 24221+ return au_sbi(sb)->si_branch[0 + bindex];
24222+}
24223+
24224+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
24225+{
dece6358 24226+ SiMustWriteLock(sb);
1facf9fc 24227+ au_sbi(sb)->si_xino_brid = brid;
24228+}
24229+
24230+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
24231+{
dece6358 24232+ SiMustAnyLock(sb);
1facf9fc 24233+ return au_sbi(sb)->si_xino_brid;
24234+}
24235+
24236+#endif /* __KERNEL__ */
24237+#endif /* __AUFS_SUPER_H__ */
7f207e10
AM
24238diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
24239--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
24240+++ linux/fs/aufs/sysaufs.c 2012-02-13 21:54:56.973105100 +0100
24241@@ -0,0 +1,105 @@
1facf9fc 24242+/*
f6c5ef8b 24243+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 24244+ *
24245+ * This program, aufs is free software; you can redistribute it and/or modify
24246+ * it under the terms of the GNU General Public License as published by
24247+ * the Free Software Foundation; either version 2 of the License, or
24248+ * (at your option) any later version.
dece6358
AM
24249+ *
24250+ * This program is distributed in the hope that it will be useful,
24251+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24252+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24253+ * GNU General Public License for more details.
24254+ *
24255+ * You should have received a copy of the GNU General Public License
24256+ * along with this program; if not, write to the Free Software
24257+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24258+ */
24259+
24260+/*
24261+ * sysfs interface and lifetime management
24262+ * they are necessary regardless sysfs is disabled.
24263+ */
24264+
1facf9fc 24265+#include <linux/random.h>
1facf9fc 24266+#include "aufs.h"
24267+
24268+unsigned long sysaufs_si_mask;
e49829fe 24269+struct kset *sysaufs_kset;
1facf9fc 24270+
24271+#define AuSiAttr(_name) { \
24272+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
24273+ .show = sysaufs_si_##_name, \
24274+}
24275+
24276+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
24277+struct attribute *sysaufs_si_attrs[] = {
24278+ &sysaufs_si_attr_xi_path.attr,
24279+ NULL,
24280+};
24281+
4a4d8108 24282+static const struct sysfs_ops au_sbi_ops = {
1facf9fc 24283+ .show = sysaufs_si_show
24284+};
24285+
24286+static struct kobj_type au_sbi_ktype = {
24287+ .release = au_si_free,
24288+ .sysfs_ops = &au_sbi_ops,
24289+ .default_attrs = sysaufs_si_attrs
24290+};
24291+
24292+/* ---------------------------------------------------------------------- */
24293+
24294+int sysaufs_si_init(struct au_sbinfo *sbinfo)
24295+{
24296+ int err;
24297+
e49829fe 24298+ sbinfo->si_kobj.kset = sysaufs_kset;
1facf9fc 24299+ /* cf. sysaufs_name() */
24300+ err = kobject_init_and_add
e49829fe 24301+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
1facf9fc 24302+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
24303+
24304+ dbgaufs_si_null(sbinfo);
24305+ if (!err) {
24306+ err = dbgaufs_si_init(sbinfo);
24307+ if (unlikely(err))
24308+ kobject_put(&sbinfo->si_kobj);
24309+ }
24310+ return err;
24311+}
24312+
24313+void sysaufs_fin(void)
24314+{
24315+ dbgaufs_fin();
e49829fe
JR
24316+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
24317+ kset_unregister(sysaufs_kset);
1facf9fc 24318+}
24319+
24320+int __init sysaufs_init(void)
24321+{
24322+ int err;
24323+
24324+ do {
24325+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
24326+ } while (!sysaufs_si_mask);
24327+
4a4d8108 24328+ err = -EINVAL;
e49829fe
JR
24329+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
24330+ if (unlikely(!sysaufs_kset))
4a4d8108 24331+ goto out;
e49829fe
JR
24332+ err = PTR_ERR(sysaufs_kset);
24333+ if (IS_ERR(sysaufs_kset))
1facf9fc 24334+ goto out;
e49829fe 24335+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
1facf9fc 24336+ if (unlikely(err)) {
e49829fe 24337+ kset_unregister(sysaufs_kset);
1facf9fc 24338+ goto out;
24339+ }
24340+
24341+ err = dbgaufs_init();
24342+ if (unlikely(err))
24343+ sysaufs_fin();
4f0767ce 24344+out:
1facf9fc 24345+ return err;
24346+}
7f207e10
AM
24347diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
24348--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
24349+++ linux/fs/aufs/sysaufs.h 2012-02-13 21:54:56.973105100 +0100
24350@@ -0,0 +1,104 @@
1facf9fc 24351+/*
f6c5ef8b 24352+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 24353+ *
24354+ * This program, aufs is free software; you can redistribute it and/or modify
24355+ * it under the terms of the GNU General Public License as published by
24356+ * the Free Software Foundation; either version 2 of the License, or
24357+ * (at your option) any later version.
dece6358
AM
24358+ *
24359+ * This program is distributed in the hope that it will be useful,
24360+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24361+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24362+ * GNU General Public License for more details.
24363+ *
24364+ * You should have received a copy of the GNU General Public License
24365+ * along with this program; if not, write to the Free Software
24366+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24367+ */
24368+
24369+/*
24370+ * sysfs interface and mount lifetime management
24371+ */
24372+
24373+#ifndef __SYSAUFS_H__
24374+#define __SYSAUFS_H__
24375+
24376+#ifdef __KERNEL__
24377+
1facf9fc 24378+#include <linux/sysfs.h>
1facf9fc 24379+#include "module.h"
24380+
dece6358
AM
24381+struct super_block;
24382+struct au_sbinfo;
24383+
1facf9fc 24384+struct sysaufs_si_attr {
24385+ struct attribute attr;
24386+ int (*show)(struct seq_file *seq, struct super_block *sb);
24387+};
24388+
24389+/* ---------------------------------------------------------------------- */
24390+
24391+/* sysaufs.c */
24392+extern unsigned long sysaufs_si_mask;
e49829fe 24393+extern struct kset *sysaufs_kset;
1facf9fc 24394+extern struct attribute *sysaufs_si_attrs[];
24395+int sysaufs_si_init(struct au_sbinfo *sbinfo);
24396+int __init sysaufs_init(void);
24397+void sysaufs_fin(void);
24398+
24399+/* ---------------------------------------------------------------------- */
24400+
24401+/* some people doesn't like to show a pointer in kernel */
24402+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
24403+{
24404+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
24405+}
24406+
24407+#define SysaufsSiNamePrefix "si_"
24408+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
24409+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
24410+{
24411+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
24412+ sysaufs_si_id(sbinfo));
24413+}
24414+
24415+struct au_branch;
24416+#ifdef CONFIG_SYSFS
24417+/* sysfs.c */
24418+extern struct attribute_group *sysaufs_attr_group;
24419+
24420+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
24421+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
24422+ char *buf);
24423+
24424+void sysaufs_br_init(struct au_branch *br);
24425+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
24426+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
24427+
24428+#define sysaufs_brs_init() do {} while (0)
24429+
24430+#else
24431+#define sysaufs_attr_group NULL
24432+
4a4d8108 24433+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
1facf9fc 24434+
24435+static inline
24436+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
24437+ char *buf)
24438+{
24439+ return 0;
24440+}
24441+
4a4d8108
AM
24442+AuStubVoid(sysaufs_br_init, struct au_branch *br)
24443+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
24444+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
1facf9fc 24445+
24446+static inline void sysaufs_brs_init(void)
24447+{
24448+ sysaufs_brs = 0;
24449+}
24450+
24451+#endif /* CONFIG_SYSFS */
24452+
24453+#endif /* __KERNEL__ */
24454+#endif /* __SYSAUFS_H__ */
7f207e10
AM
24455diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
24456--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
24457+++ linux/fs/aufs/sysfs.c 2012-02-13 21:54:56.973105100 +0100
24458@@ -0,0 +1,257 @@
1facf9fc 24459+/*
f6c5ef8b 24460+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 24461+ *
24462+ * This program, aufs is free software; you can redistribute it and/or modify
24463+ * it under the terms of the GNU General Public License as published by
24464+ * the Free Software Foundation; either version 2 of the License, or
24465+ * (at your option) any later version.
dece6358
AM
24466+ *
24467+ * This program is distributed in the hope that it will be useful,
24468+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24469+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24470+ * GNU General Public License for more details.
24471+ *
24472+ * You should have received a copy of the GNU General Public License
24473+ * along with this program; if not, write to the Free Software
24474+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24475+ */
24476+
24477+/*
24478+ * sysfs interface
24479+ */
24480+
1facf9fc 24481+#include <linux/seq_file.h>
1facf9fc 24482+#include "aufs.h"
24483+
4a4d8108
AM
24484+#ifdef CONFIG_AUFS_FS_MODULE
24485+/* this entry violates the "one line per file" policy of sysfs */
24486+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
24487+ char *buf)
24488+{
24489+ ssize_t err;
24490+ static char *conf =
24491+/* this file is generated at compiling */
24492+#include "conf.str"
24493+ ;
24494+
24495+ err = snprintf(buf, PAGE_SIZE, conf);
24496+ if (unlikely(err >= PAGE_SIZE))
24497+ err = -EFBIG;
24498+ return err;
24499+}
24500+
24501+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
24502+#endif
24503+
1facf9fc 24504+static struct attribute *au_attr[] = {
4a4d8108
AM
24505+#ifdef CONFIG_AUFS_FS_MODULE
24506+ &au_config_attr.attr,
24507+#endif
1facf9fc 24508+ NULL, /* need to NULL terminate the list of attributes */
24509+};
24510+
24511+static struct attribute_group sysaufs_attr_group_body = {
24512+ .attrs = au_attr
24513+};
24514+
24515+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
24516+
24517+/* ---------------------------------------------------------------------- */
24518+
24519+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
24520+{
24521+ int err;
24522+
dece6358
AM
24523+ SiMustAnyLock(sb);
24524+
1facf9fc 24525+ err = 0;
24526+ if (au_opt_test(au_mntflags(sb), XINO)) {
24527+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
24528+ seq_putc(seq, '\n');
24529+ }
24530+ return err;
24531+}
24532+
24533+/*
24534+ * the lifetime of branch is independent from the entry under sysfs.
24535+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
24536+ * unlinked.
24537+ */
24538+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
24539+ aufs_bindex_t bindex)
24540+{
1e00d052 24541+ int err;
1facf9fc 24542+ struct path path;
24543+ struct dentry *root;
24544+ struct au_branch *br;
1e00d052 24545+ char *perm;
1facf9fc 24546+
24547+ AuDbg("b%d\n", bindex);
24548+
1e00d052 24549+ err = 0;
1facf9fc 24550+ root = sb->s_root;
24551+ di_read_lock_parent(root, !AuLock_IR);
24552+ br = au_sbr(sb, bindex);
24553+ path.mnt = br->br_mnt;
24554+ path.dentry = au_h_dptr(root, bindex);
24555+ au_seq_path(seq, &path);
24556+ di_read_unlock(root, !AuLock_IR);
1e00d052
AM
24557+ perm = au_optstr_br_perm(br->br_perm);
24558+ if (perm) {
24559+ err = seq_printf(seq, "=%s\n", perm);
24560+ kfree(perm);
24561+ if (err == -1)
24562+ err = -E2BIG;
24563+ } else
24564+ err = -ENOMEM;
24565+ return err;
1facf9fc 24566+}
24567+
24568+/* ---------------------------------------------------------------------- */
24569+
24570+static struct seq_file *au_seq(char *p, ssize_t len)
24571+{
24572+ struct seq_file *seq;
24573+
24574+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
24575+ if (seq) {
24576+ /* mutex_init(&seq.lock); */
24577+ seq->buf = p;
24578+ seq->size = len;
24579+ return seq; /* success */
24580+ }
24581+
24582+ seq = ERR_PTR(-ENOMEM);
24583+ return seq;
24584+}
24585+
24586+#define SysaufsBr_PREFIX "br"
24587+
24588+/* todo: file size may exceed PAGE_SIZE */
24589+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 24590+ char *buf)
1facf9fc 24591+{
24592+ ssize_t err;
24593+ long l;
24594+ aufs_bindex_t bend;
24595+ struct au_sbinfo *sbinfo;
24596+ struct super_block *sb;
24597+ struct seq_file *seq;
24598+ char *name;
24599+ struct attribute **cattr;
24600+
24601+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
24602+ sb = sbinfo->si_sb;
1308ab2a 24603+
24604+ /*
24605+ * prevent a race condition between sysfs and aufs.
24606+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
24607+ * prohibits maintaining the sysfs entries.
24608+ * hew we acquire read lock after sysfs_get_active_two().
24609+ * on the other hand, the remount process may maintain the sysfs/aufs
24610+ * entries after acquiring write lock.
24611+ * it can cause a deadlock.
24612+ * simply we gave up processing read here.
24613+ */
24614+ err = -EBUSY;
24615+ if (unlikely(!si_noflush_read_trylock(sb)))
24616+ goto out;
1facf9fc 24617+
24618+ seq = au_seq(buf, PAGE_SIZE);
24619+ err = PTR_ERR(seq);
24620+ if (IS_ERR(seq))
1308ab2a 24621+ goto out_unlock;
1facf9fc 24622+
24623+ name = (void *)attr->name;
24624+ cattr = sysaufs_si_attrs;
24625+ while (*cattr) {
24626+ if (!strcmp(name, (*cattr)->name)) {
24627+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
24628+ ->show(seq, sb);
24629+ goto out_seq;
24630+ }
24631+ cattr++;
24632+ }
24633+
24634+ bend = au_sbend(sb);
24635+ if (!strncmp(name, SysaufsBr_PREFIX, sizeof(SysaufsBr_PREFIX) - 1)) {
24636+ name += sizeof(SysaufsBr_PREFIX) - 1;
9dbd164d 24637+ err = kstrtol(name, 10, &l);
1facf9fc 24638+ if (!err) {
24639+ if (l <= bend)
24640+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l);
24641+ else
24642+ err = -ENOENT;
24643+ }
24644+ goto out_seq;
24645+ }
24646+ BUG();
24647+
4f0767ce 24648+out_seq:
1facf9fc 24649+ if (!err) {
24650+ err = seq->count;
24651+ /* sysfs limit */
24652+ if (unlikely(err == PAGE_SIZE))
24653+ err = -EFBIG;
24654+ }
24655+ kfree(seq);
4f0767ce 24656+out_unlock:
1facf9fc 24657+ si_read_unlock(sb);
4f0767ce 24658+out:
1facf9fc 24659+ return err;
24660+}
24661+
24662+/* ---------------------------------------------------------------------- */
24663+
24664+void sysaufs_br_init(struct au_branch *br)
24665+{
4a4d8108
AM
24666+ struct attribute *attr = &br->br_attr;
24667+
24668+ sysfs_attr_init(attr);
24669+ attr->name = br->br_name;
24670+ attr->mode = S_IRUGO;
1facf9fc 24671+}
24672+
24673+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
24674+{
24675+ struct au_branch *br;
24676+ struct kobject *kobj;
24677+ aufs_bindex_t bend;
24678+
24679+ dbgaufs_brs_del(sb, bindex);
24680+
24681+ if (!sysaufs_brs)
24682+ return;
24683+
24684+ kobj = &au_sbi(sb)->si_kobj;
24685+ bend = au_sbend(sb);
24686+ for (; bindex <= bend; bindex++) {
24687+ br = au_sbr(sb, bindex);
24688+ sysfs_remove_file(kobj, &br->br_attr);
24689+ }
24690+}
24691+
24692+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
24693+{
24694+ int err;
24695+ aufs_bindex_t bend;
24696+ struct kobject *kobj;
24697+ struct au_branch *br;
24698+
24699+ dbgaufs_brs_add(sb, bindex);
24700+
24701+ if (!sysaufs_brs)
24702+ return;
24703+
24704+ kobj = &au_sbi(sb)->si_kobj;
24705+ bend = au_sbend(sb);
24706+ for (; bindex <= bend; bindex++) {
24707+ br = au_sbr(sb, bindex);
24708+ snprintf(br->br_name, sizeof(br->br_name), SysaufsBr_PREFIX
24709+ "%d", bindex);
24710+ err = sysfs_create_file(kobj, &br->br_attr);
24711+ if (unlikely(err))
4a4d8108
AM
24712+ pr_warning("failed %s under sysfs(%d)\n",
24713+ br->br_name, err);
1facf9fc 24714+ }
24715+}
7f207e10
AM
24716diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
24717--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
24718+++ linux/fs/aufs/sysrq.c 2012-02-13 21:54:56.973105100 +0100
24719@@ -0,0 +1,148 @@
1facf9fc 24720+/*
f6c5ef8b 24721+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 24722+ *
24723+ * This program, aufs is free software; you can redistribute it and/or modify
24724+ * it under the terms of the GNU General Public License as published by
24725+ * the Free Software Foundation; either version 2 of the License, or
24726+ * (at your option) any later version.
dece6358
AM
24727+ *
24728+ * This program is distributed in the hope that it will be useful,
24729+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24730+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24731+ * GNU General Public License for more details.
24732+ *
24733+ * You should have received a copy of the GNU General Public License
24734+ * along with this program; if not, write to the Free Software
24735+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24736+ */
24737+
24738+/*
24739+ * magic sysrq hanlder
24740+ */
24741+
1facf9fc 24742+/* #include <linux/sysrq.h> */
027c5e7a 24743+#include <linux/writeback.h>
1facf9fc 24744+#include "aufs.h"
24745+
24746+/* ---------------------------------------------------------------------- */
24747+
24748+static void sysrq_sb(struct super_block *sb)
24749+{
24750+ char *plevel;
24751+ struct au_sbinfo *sbinfo;
24752+ struct file *file;
24753+
24754+ plevel = au_plevel;
24755+ au_plevel = KERN_WARNING;
1facf9fc 24756+
24757+ sbinfo = au_sbi(sb);
4a4d8108
AM
24758+ /* since we define pr_fmt, call printk directly */
24759+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
24760+ printk(KERN_WARNING AUFS_NAME ": superblock\n");
1facf9fc 24761+ au_dpri_sb(sb);
027c5e7a
AM
24762+
24763+#if 0
4a4d8108 24764+ printk(KERN_WARNING AUFS_NAME ": root dentry\n");
1facf9fc 24765+ au_dpri_dentry(sb->s_root);
4a4d8108 24766+ printk(KERN_WARNING AUFS_NAME ": root inode\n");
1facf9fc 24767+ au_dpri_inode(sb->s_root->d_inode);
027c5e7a
AM
24768+#endif
24769+
1facf9fc 24770+#if 0
027c5e7a
AM
24771+ do {
24772+ int err, i, j, ndentry;
24773+ struct au_dcsub_pages dpages;
24774+ struct au_dpage *dpage;
24775+
24776+ err = au_dpages_init(&dpages, GFP_ATOMIC);
24777+ if (unlikely(err))
24778+ break;
24779+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
24780+ if (!err)
24781+ for (i = 0; i < dpages.ndpage; i++) {
24782+ dpage = dpages.dpages + i;
24783+ ndentry = dpage->ndentry;
24784+ for (j = 0; j < ndentry; j++)
24785+ au_dpri_dentry(dpage->dentries[j]);
24786+ }
24787+ au_dpages_free(&dpages);
24788+ } while (0);
24789+#endif
24790+
24791+#if 1
24792+ {
24793+ struct inode *i;
24794+ printk(KERN_WARNING AUFS_NAME ": isolated inode\n");
2cbb1c4b
JR
24795+ spin_lock(&inode_sb_list_lock);
24796+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
24797+ spin_lock(&i->i_lock);
027c5e7a
AM
24798+ if (1 || list_empty(&i->i_dentry))
24799+ au_dpri_inode(i);
2cbb1c4b
JR
24800+ spin_unlock(&i->i_lock);
24801+ }
24802+ spin_unlock(&inode_sb_list_lock);
027c5e7a 24803+ }
1facf9fc 24804+#endif
4a4d8108 24805+ printk(KERN_WARNING AUFS_NAME ": files\n");
0c5527e5
AM
24806+ lg_global_lock(files_lglock);
24807+ do_file_list_for_each_entry(sb, file) {
4a4d8108
AM
24808+ umode_t mode;
24809+ mode = file->f_dentry->d_inode->i_mode;
24810+ if (!special_file(mode) || au_special_file(mode))
1facf9fc 24811+ au_dpri_file(file);
0c5527e5
AM
24812+ } while_file_list_for_each_entry;
24813+ lg_global_unlock(files_lglock);
e49829fe 24814+ printk(KERN_WARNING AUFS_NAME ": done\n");
1facf9fc 24815+
24816+ au_plevel = plevel;
1facf9fc 24817+}
24818+
24819+/* ---------------------------------------------------------------------- */
24820+
24821+/* module parameter */
24822+static char *aufs_sysrq_key = "a";
24823+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
24824+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
24825+
0c5527e5 24826+static void au_sysrq(int key __maybe_unused)
1facf9fc 24827+{
1facf9fc 24828+ struct au_sbinfo *sbinfo;
24829+
027c5e7a 24830+ lockdep_off();
53392da6 24831+ au_sbilist_lock();
e49829fe 24832+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
1facf9fc 24833+ sysrq_sb(sbinfo->si_sb);
53392da6 24834+ au_sbilist_unlock();
027c5e7a 24835+ lockdep_on();
1facf9fc 24836+}
24837+
24838+static struct sysrq_key_op au_sysrq_op = {
24839+ .handler = au_sysrq,
24840+ .help_msg = "Aufs",
24841+ .action_msg = "Aufs",
24842+ .enable_mask = SYSRQ_ENABLE_DUMP
24843+};
24844+
24845+/* ---------------------------------------------------------------------- */
24846+
24847+int __init au_sysrq_init(void)
24848+{
24849+ int err;
24850+ char key;
24851+
24852+ err = -1;
24853+ key = *aufs_sysrq_key;
24854+ if ('a' <= key && key <= 'z')
24855+ err = register_sysrq_key(key, &au_sysrq_op);
24856+ if (unlikely(err))
4a4d8108 24857+ pr_err("err %d, sysrq=%c\n", err, key);
1facf9fc 24858+ return err;
24859+}
24860+
24861+void au_sysrq_fin(void)
24862+{
24863+ int err;
24864+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
24865+ if (unlikely(err))
4a4d8108 24866+ pr_err("err %d (ignored)\n", err);
1facf9fc 24867+}
7f207e10
AM
24868diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
24869--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
24870+++ linux/fs/aufs/vdir.c 2012-02-13 21:54:56.973105100 +0100
24871@@ -0,0 +1,885 @@
1facf9fc 24872+/*
f6c5ef8b 24873+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 24874+ *
24875+ * This program, aufs is free software; you can redistribute it and/or modify
24876+ * it under the terms of the GNU General Public License as published by
24877+ * the Free Software Foundation; either version 2 of the License, or
24878+ * (at your option) any later version.
dece6358
AM
24879+ *
24880+ * This program is distributed in the hope that it will be useful,
24881+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24882+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24883+ * GNU General Public License for more details.
24884+ *
24885+ * You should have received a copy of the GNU General Public License
24886+ * along with this program; if not, write to the Free Software
24887+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24888+ */
24889+
24890+/*
24891+ * virtual or vertical directory
24892+ */
24893+
24894+#include "aufs.h"
24895+
dece6358 24896+static unsigned int calc_size(int nlen)
1facf9fc 24897+{
dece6358 24898+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 24899+}
24900+
24901+static int set_deblk_end(union au_vdir_deblk_p *p,
24902+ union au_vdir_deblk_p *deblk_end)
24903+{
24904+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
24905+ p->de->de_str.len = 0;
24906+ /* smp_mb(); */
24907+ return 0;
24908+ }
24909+ return -1; /* error */
24910+}
24911+
24912+/* returns true or false */
24913+static int is_deblk_end(union au_vdir_deblk_p *p,
24914+ union au_vdir_deblk_p *deblk_end)
24915+{
24916+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
24917+ return !p->de->de_str.len;
24918+ return 1;
24919+}
24920+
24921+static unsigned char *last_deblk(struct au_vdir *vdir)
24922+{
24923+ return vdir->vd_deblk[vdir->vd_nblk - 1];
24924+}
24925+
24926+/* ---------------------------------------------------------------------- */
24927+
1308ab2a 24928+/* estimate the apropriate size for name hash table */
24929+unsigned int au_rdhash_est(loff_t sz)
24930+{
24931+ unsigned int n;
24932+
24933+ n = UINT_MAX;
24934+ sz >>= 10;
24935+ if (sz < n)
24936+ n = sz;
24937+ if (sz < AUFS_RDHASH_DEF)
24938+ n = AUFS_RDHASH_DEF;
4a4d8108 24939+ /* pr_info("n %u\n", n); */
1308ab2a 24940+ return n;
24941+}
24942+
1facf9fc 24943+/*
24944+ * the allocated memory has to be freed by
dece6358 24945+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 24946+ */
dece6358 24947+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 24948+{
1facf9fc 24949+ struct hlist_head *head;
dece6358 24950+ unsigned int u;
1facf9fc 24951+
dece6358
AM
24952+ head = kmalloc(sizeof(*nhash->nh_head) * num_hash, gfp);
24953+ if (head) {
24954+ nhash->nh_num = num_hash;
24955+ nhash->nh_head = head;
24956+ for (u = 0; u < num_hash; u++)
1facf9fc 24957+ INIT_HLIST_HEAD(head++);
dece6358 24958+ return 0; /* success */
1facf9fc 24959+ }
1facf9fc 24960+
dece6358 24961+ return -ENOMEM;
1facf9fc 24962+}
24963+
dece6358
AM
24964+static void nhash_count(struct hlist_head *head)
24965+{
24966+#if 0
24967+ unsigned long n;
24968+ struct hlist_node *pos;
24969+
24970+ n = 0;
24971+ hlist_for_each(pos, head)
24972+ n++;
4a4d8108 24973+ pr_info("%lu\n", n);
dece6358
AM
24974+#endif
24975+}
24976+
24977+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 24978+{
1facf9fc 24979+ struct au_vdir_wh *tpos;
24980+ struct hlist_node *pos, *node;
24981+
dece6358
AM
24982+ hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) {
24983+ /* hlist_del(pos); */
24984+ kfree(tpos);
1facf9fc 24985+ }
24986+}
24987+
dece6358 24988+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 24989+{
dece6358
AM
24990+ struct au_vdir_dehstr *tpos;
24991+ struct hlist_node *pos, *node;
1facf9fc 24992+
dece6358
AM
24993+ hlist_for_each_entry_safe(tpos, pos, node, head, hash) {
24994+ /* hlist_del(pos); */
4a4d8108 24995+ au_cache_free_vdir_dehstr(tpos);
1facf9fc 24996+ }
1facf9fc 24997+}
24998+
dece6358
AM
24999+static void au_nhash_do_free(struct au_nhash *nhash,
25000+ void (*free)(struct hlist_head *head))
1facf9fc 25001+{
1308ab2a 25002+ unsigned int n;
1facf9fc 25003+ struct hlist_head *head;
1facf9fc 25004+
dece6358 25005+ n = nhash->nh_num;
1308ab2a 25006+ if (!n)
25007+ return;
25008+
dece6358 25009+ head = nhash->nh_head;
1308ab2a 25010+ while (n-- > 0) {
dece6358
AM
25011+ nhash_count(head);
25012+ free(head++);
1facf9fc 25013+ }
dece6358 25014+ kfree(nhash->nh_head);
1facf9fc 25015+}
25016+
dece6358 25017+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 25018+{
dece6358
AM
25019+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
25020+}
1facf9fc 25021+
dece6358
AM
25022+static void au_nhash_de_free(struct au_nhash *delist)
25023+{
25024+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 25025+}
25026+
25027+/* ---------------------------------------------------------------------- */
25028+
25029+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
25030+ int limit)
25031+{
25032+ int num;
25033+ unsigned int u, n;
25034+ struct hlist_head *head;
25035+ struct au_vdir_wh *tpos;
25036+ struct hlist_node *pos;
25037+
25038+ num = 0;
25039+ n = whlist->nh_num;
25040+ head = whlist->nh_head;
1308ab2a 25041+ for (u = 0; u < n; u++, head++)
1facf9fc 25042+ hlist_for_each_entry(tpos, pos, head, wh_hash)
25043+ if (tpos->wh_bindex == btgt && ++num > limit)
25044+ return 1;
1facf9fc 25045+ return 0;
25046+}
25047+
25048+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 25049+ unsigned char *name,
1facf9fc 25050+ unsigned int len)
25051+{
dece6358
AM
25052+ unsigned int v;
25053+ /* const unsigned int magic_bit = 12; */
25054+
1308ab2a 25055+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
25056+
dece6358
AM
25057+ v = 0;
25058+ while (len--)
25059+ v += *name++;
25060+ /* v = hash_long(v, magic_bit); */
25061+ v %= nhash->nh_num;
25062+ return nhash->nh_head + v;
25063+}
25064+
25065+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
25066+ int nlen)
25067+{
25068+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 25069+}
25070+
25071+/* returns found or not */
dece6358 25072+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 25073+{
25074+ struct hlist_head *head;
25075+ struct au_vdir_wh *tpos;
25076+ struct hlist_node *pos;
25077+ struct au_vdir_destr *str;
25078+
dece6358 25079+ head = au_name_hash(whlist, name, nlen);
1facf9fc 25080+ hlist_for_each_entry(tpos, pos, head, wh_hash) {
25081+ str = &tpos->wh_str;
25082+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
25083+ if (au_nhash_test_name(str, name, nlen))
25084+ return 1;
25085+ }
25086+ return 0;
25087+}
25088+
25089+/* returns found(true) or not */
25090+static int test_known(struct au_nhash *delist, char *name, int nlen)
25091+{
25092+ struct hlist_head *head;
25093+ struct au_vdir_dehstr *tpos;
25094+ struct hlist_node *pos;
25095+ struct au_vdir_destr *str;
25096+
25097+ head = au_name_hash(delist, name, nlen);
25098+ hlist_for_each_entry(tpos, pos, head, hash) {
25099+ str = tpos->str;
25100+ AuDbg("%.*s\n", str->len, str->name);
25101+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 25102+ return 1;
25103+ }
25104+ return 0;
25105+}
25106+
dece6358
AM
25107+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
25108+ unsigned char d_type)
25109+{
25110+#ifdef CONFIG_AUFS_SHWH
25111+ wh->wh_ino = ino;
25112+ wh->wh_type = d_type;
25113+#endif
25114+}
25115+
25116+/* ---------------------------------------------------------------------- */
25117+
25118+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
25119+ unsigned int d_type, aufs_bindex_t bindex,
25120+ unsigned char shwh)
1facf9fc 25121+{
25122+ int err;
25123+ struct au_vdir_destr *str;
25124+ struct au_vdir_wh *wh;
25125+
dece6358 25126+ AuDbg("%.*s\n", nlen, name);
1308ab2a 25127+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
25128+
1facf9fc 25129+ err = -ENOMEM;
dece6358 25130+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 25131+ if (unlikely(!wh))
25132+ goto out;
25133+
25134+ err = 0;
25135+ wh->wh_bindex = bindex;
dece6358
AM
25136+ if (shwh)
25137+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 25138+ str = &wh->wh_str;
dece6358
AM
25139+ str->len = nlen;
25140+ memcpy(str->name, name, nlen);
25141+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 25142+ /* smp_mb(); */
25143+
4f0767ce 25144+out:
1facf9fc 25145+ return err;
25146+}
25147+
1facf9fc 25148+static int append_deblk(struct au_vdir *vdir)
25149+{
25150+ int err;
dece6358 25151+ unsigned long ul;
1facf9fc 25152+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
25153+ union au_vdir_deblk_p p, deblk_end;
25154+ unsigned char **o;
25155+
25156+ err = -ENOMEM;
dece6358
AM
25157+ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
25158+ GFP_NOFS);
1facf9fc 25159+ if (unlikely(!o))
25160+ goto out;
25161+
25162+ vdir->vd_deblk = o;
25163+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
25164+ if (p.deblk) {
25165+ ul = vdir->vd_nblk++;
25166+ vdir->vd_deblk[ul] = p.deblk;
25167+ vdir->vd_last.ul = ul;
25168+ vdir->vd_last.p.deblk = p.deblk;
25169+ deblk_end.deblk = p.deblk + deblk_sz;
25170+ err = set_deblk_end(&p, &deblk_end);
25171+ }
25172+
4f0767ce 25173+out:
1facf9fc 25174+ return err;
25175+}
25176+
dece6358
AM
25177+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
25178+ unsigned int d_type, struct au_nhash *delist)
25179+{
25180+ int err;
25181+ unsigned int sz;
25182+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
25183+ union au_vdir_deblk_p p, *room, deblk_end;
25184+ struct au_vdir_dehstr *dehstr;
25185+
25186+ p.deblk = last_deblk(vdir);
25187+ deblk_end.deblk = p.deblk + deblk_sz;
25188+ room = &vdir->vd_last.p;
25189+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
25190+ || !is_deblk_end(room, &deblk_end));
25191+
25192+ sz = calc_size(nlen);
25193+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
25194+ err = append_deblk(vdir);
25195+ if (unlikely(err))
25196+ goto out;
25197+
25198+ p.deblk = last_deblk(vdir);
25199+ deblk_end.deblk = p.deblk + deblk_sz;
25200+ /* smp_mb(); */
25201+ AuDebugOn(room->deblk != p.deblk);
25202+ }
25203+
25204+ err = -ENOMEM;
4a4d8108 25205+ dehstr = au_cache_alloc_vdir_dehstr();
dece6358
AM
25206+ if (unlikely(!dehstr))
25207+ goto out;
25208+
25209+ dehstr->str = &room->de->de_str;
25210+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
25211+ room->de->de_ino = ino;
25212+ room->de->de_type = d_type;
25213+ room->de->de_str.len = nlen;
25214+ memcpy(room->de->de_str.name, name, nlen);
25215+
25216+ err = 0;
25217+ room->deblk += sz;
25218+ if (unlikely(set_deblk_end(room, &deblk_end)))
25219+ err = append_deblk(vdir);
25220+ /* smp_mb(); */
25221+
4f0767ce 25222+out:
dece6358
AM
25223+ return err;
25224+}
25225+
25226+/* ---------------------------------------------------------------------- */
25227+
25228+void au_vdir_free(struct au_vdir *vdir)
25229+{
25230+ unsigned char **deblk;
25231+
25232+ deblk = vdir->vd_deblk;
25233+ while (vdir->vd_nblk--)
25234+ kfree(*deblk++);
25235+ kfree(vdir->vd_deblk);
25236+ au_cache_free_vdir(vdir);
25237+}
25238+
1308ab2a 25239+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 25240+{
25241+ struct au_vdir *vdir;
1308ab2a 25242+ struct super_block *sb;
1facf9fc 25243+ int err;
25244+
1308ab2a 25245+ sb = file->f_dentry->d_sb;
dece6358
AM
25246+ SiMustAnyLock(sb);
25247+
1facf9fc 25248+ err = -ENOMEM;
25249+ vdir = au_cache_alloc_vdir();
25250+ if (unlikely(!vdir))
25251+ goto out;
25252+
25253+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
25254+ if (unlikely(!vdir->vd_deblk))
25255+ goto out_free;
25256+
25257+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 25258+ if (!vdir->vd_deblk_sz) {
25259+ /* estimate the apropriate size for deblk */
25260+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
4a4d8108 25261+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
1308ab2a 25262+ }
1facf9fc 25263+ vdir->vd_nblk = 0;
25264+ vdir->vd_version = 0;
25265+ vdir->vd_jiffy = 0;
25266+ err = append_deblk(vdir);
25267+ if (!err)
25268+ return vdir; /* success */
25269+
25270+ kfree(vdir->vd_deblk);
25271+
4f0767ce 25272+out_free:
1facf9fc 25273+ au_cache_free_vdir(vdir);
4f0767ce 25274+out:
1facf9fc 25275+ vdir = ERR_PTR(err);
25276+ return vdir;
25277+}
25278+
25279+static int reinit_vdir(struct au_vdir *vdir)
25280+{
25281+ int err;
25282+ union au_vdir_deblk_p p, deblk_end;
25283+
25284+ while (vdir->vd_nblk > 1) {
25285+ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
25286+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
25287+ vdir->vd_nblk--;
25288+ }
25289+ p.deblk = vdir->vd_deblk[0];
25290+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
25291+ err = set_deblk_end(&p, &deblk_end);
25292+ /* keep vd_dblk_sz */
25293+ vdir->vd_last.ul = 0;
25294+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
25295+ vdir->vd_version = 0;
25296+ vdir->vd_jiffy = 0;
25297+ /* smp_mb(); */
25298+ return err;
25299+}
25300+
25301+/* ---------------------------------------------------------------------- */
25302+
1facf9fc 25303+#define AuFillVdir_CALLED 1
25304+#define AuFillVdir_WHABLE (1 << 1)
dece6358 25305+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 25306+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
7f207e10
AM
25307+#define au_fset_fillvdir(flags, name) \
25308+ do { (flags) |= AuFillVdir_##name; } while (0)
25309+#define au_fclr_fillvdir(flags, name) \
25310+ do { (flags) &= ~AuFillVdir_##name; } while (0)
1facf9fc 25311+
dece6358
AM
25312+#ifndef CONFIG_AUFS_SHWH
25313+#undef AuFillVdir_SHWH
25314+#define AuFillVdir_SHWH 0
25315+#endif
25316+
1facf9fc 25317+struct fillvdir_arg {
25318+ struct file *file;
25319+ struct au_vdir *vdir;
dece6358
AM
25320+ struct au_nhash delist;
25321+ struct au_nhash whlist;
1facf9fc 25322+ aufs_bindex_t bindex;
25323+ unsigned int flags;
25324+ int err;
25325+};
25326+
dece6358 25327+static int fillvdir(void *__arg, const char *__name, int nlen,
1facf9fc 25328+ loff_t offset __maybe_unused, u64 h_ino,
25329+ unsigned int d_type)
25330+{
25331+ struct fillvdir_arg *arg = __arg;
25332+ char *name = (void *)__name;
25333+ struct super_block *sb;
1facf9fc 25334+ ino_t ino;
dece6358 25335+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 25336+
1facf9fc 25337+ arg->err = 0;
dece6358 25338+ sb = arg->file->f_dentry->d_sb;
1facf9fc 25339+ au_fset_fillvdir(arg->flags, CALLED);
25340+ /* smp_mb(); */
dece6358 25341+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 25342+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
25343+ if (test_known(&arg->delist, name, nlen)
25344+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
25345+ goto out; /* already exists or whiteouted */
1facf9fc 25346+
25347+ sb = arg->file->f_dentry->d_sb;
dece6358 25348+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
4a4d8108
AM
25349+ if (!arg->err) {
25350+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
25351+ d_type = DT_UNKNOWN;
dece6358
AM
25352+ arg->err = append_de(arg->vdir, name, nlen, ino,
25353+ d_type, &arg->delist);
4a4d8108 25354+ }
1facf9fc 25355+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
25356+ name += AUFS_WH_PFX_LEN;
dece6358
AM
25357+ nlen -= AUFS_WH_PFX_LEN;
25358+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
25359+ goto out; /* already whiteouted */
1facf9fc 25360+
dece6358
AM
25361+ if (shwh)
25362+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
25363+ &ino);
4a4d8108
AM
25364+ if (!arg->err) {
25365+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
25366+ d_type = DT_UNKNOWN;
1facf9fc 25367+ arg->err = au_nhash_append_wh
dece6358
AM
25368+ (&arg->whlist, name, nlen, ino, d_type,
25369+ arg->bindex, shwh);
4a4d8108 25370+ }
1facf9fc 25371+ }
25372+
4f0767ce 25373+out:
1facf9fc 25374+ if (!arg->err)
25375+ arg->vdir->vd_jiffy = jiffies;
25376+ /* smp_mb(); */
25377+ AuTraceErr(arg->err);
25378+ return arg->err;
25379+}
25380+
dece6358
AM
25381+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
25382+ struct au_nhash *whlist, struct au_nhash *delist)
25383+{
25384+#ifdef CONFIG_AUFS_SHWH
25385+ int err;
25386+ unsigned int nh, u;
25387+ struct hlist_head *head;
25388+ struct au_vdir_wh *tpos;
25389+ struct hlist_node *pos, *n;
25390+ char *p, *o;
25391+ struct au_vdir_destr *destr;
25392+
25393+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
25394+
25395+ err = -ENOMEM;
4a4d8108 25396+ o = p = __getname_gfp(GFP_NOFS);
dece6358
AM
25397+ if (unlikely(!p))
25398+ goto out;
25399+
25400+ err = 0;
25401+ nh = whlist->nh_num;
25402+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
25403+ p += AUFS_WH_PFX_LEN;
25404+ for (u = 0; u < nh; u++) {
25405+ head = whlist->nh_head + u;
25406+ hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) {
25407+ destr = &tpos->wh_str;
25408+ memcpy(p, destr->name, destr->len);
25409+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
25410+ tpos->wh_ino, tpos->wh_type, delist);
25411+ if (unlikely(err))
25412+ break;
25413+ }
25414+ }
25415+
25416+ __putname(o);
25417+
4f0767ce 25418+out:
dece6358
AM
25419+ AuTraceErr(err);
25420+ return err;
25421+#else
25422+ return 0;
25423+#endif
25424+}
25425+
1facf9fc 25426+static int au_do_read_vdir(struct fillvdir_arg *arg)
25427+{
25428+ int err;
dece6358 25429+ unsigned int rdhash;
1facf9fc 25430+ loff_t offset;
dece6358
AM
25431+ aufs_bindex_t bend, bindex, bstart;
25432+ unsigned char shwh;
1facf9fc 25433+ struct file *hf, *file;
25434+ struct super_block *sb;
25435+
1facf9fc 25436+ file = arg->file;
25437+ sb = file->f_dentry->d_sb;
dece6358
AM
25438+ SiMustAnyLock(sb);
25439+
25440+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 25441+ if (!rdhash)
25442+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
25443+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
25444+ if (unlikely(err))
1facf9fc 25445+ goto out;
dece6358
AM
25446+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
25447+ if (unlikely(err))
1facf9fc 25448+ goto out_delist;
25449+
25450+ err = 0;
25451+ arg->flags = 0;
dece6358
AM
25452+ shwh = 0;
25453+ if (au_opt_test(au_mntflags(sb), SHWH)) {
25454+ shwh = 1;
25455+ au_fset_fillvdir(arg->flags, SHWH);
25456+ }
25457+ bstart = au_fbstart(file);
4a4d8108 25458+ bend = au_fbend_dir(file);
dece6358 25459+ for (bindex = bstart; !err && bindex <= bend; bindex++) {
4a4d8108 25460+ hf = au_hf_dir(file, bindex);
1facf9fc 25461+ if (!hf)
25462+ continue;
25463+
25464+ offset = vfsub_llseek(hf, 0, SEEK_SET);
25465+ err = offset;
25466+ if (unlikely(offset))
25467+ break;
25468+
25469+ arg->bindex = bindex;
25470+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358
AM
25471+ if (shwh
25472+ || (bindex != bend
25473+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 25474+ au_fset_fillvdir(arg->flags, WHABLE);
25475+ do {
25476+ arg->err = 0;
25477+ au_fclr_fillvdir(arg->flags, CALLED);
25478+ /* smp_mb(); */
25479+ err = vfsub_readdir(hf, fillvdir, arg);
25480+ if (err >= 0)
25481+ err = arg->err;
25482+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
25483+ }
dece6358
AM
25484+
25485+ if (!err && shwh)
25486+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
25487+
25488+ au_nhash_wh_free(&arg->whlist);
1facf9fc 25489+
4f0767ce 25490+out_delist:
dece6358 25491+ au_nhash_de_free(&arg->delist);
4f0767ce 25492+out:
1facf9fc 25493+ return err;
25494+}
25495+
25496+static int read_vdir(struct file *file, int may_read)
25497+{
25498+ int err;
25499+ unsigned long expire;
25500+ unsigned char do_read;
25501+ struct fillvdir_arg arg;
25502+ struct inode *inode;
25503+ struct au_vdir *vdir, *allocated;
25504+
25505+ err = 0;
25506+ inode = file->f_dentry->d_inode;
25507+ IMustLock(inode);
dece6358
AM
25508+ SiMustAnyLock(inode->i_sb);
25509+
1facf9fc 25510+ allocated = NULL;
25511+ do_read = 0;
25512+ expire = au_sbi(inode->i_sb)->si_rdcache;
25513+ vdir = au_ivdir(inode);
25514+ if (!vdir) {
25515+ do_read = 1;
1308ab2a 25516+ vdir = alloc_vdir(file);
1facf9fc 25517+ err = PTR_ERR(vdir);
25518+ if (IS_ERR(vdir))
25519+ goto out;
25520+ err = 0;
25521+ allocated = vdir;
25522+ } else if (may_read
25523+ && (inode->i_version != vdir->vd_version
25524+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
25525+ do_read = 1;
25526+ err = reinit_vdir(vdir);
25527+ if (unlikely(err))
25528+ goto out;
25529+ }
25530+
25531+ if (!do_read)
25532+ return 0; /* success */
25533+
25534+ arg.file = file;
25535+ arg.vdir = vdir;
25536+ err = au_do_read_vdir(&arg);
25537+ if (!err) {
25538+ /* file->f_pos = 0; */
25539+ vdir->vd_version = inode->i_version;
25540+ vdir->vd_last.ul = 0;
25541+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
25542+ if (allocated)
25543+ au_set_ivdir(inode, allocated);
25544+ } else if (allocated)
25545+ au_vdir_free(allocated);
25546+
4f0767ce 25547+out:
1facf9fc 25548+ return err;
25549+}
25550+
25551+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
25552+{
25553+ int err, rerr;
25554+ unsigned long ul, n;
25555+ const unsigned int deblk_sz = src->vd_deblk_sz;
25556+
25557+ AuDebugOn(tgt->vd_nblk != 1);
25558+
25559+ err = -ENOMEM;
25560+ if (tgt->vd_nblk < src->vd_nblk) {
25561+ unsigned char **p;
25562+
dece6358
AM
25563+ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
25564+ GFP_NOFS);
1facf9fc 25565+ if (unlikely(!p))
25566+ goto out;
25567+ tgt->vd_deblk = p;
25568+ }
25569+
1308ab2a 25570+ if (tgt->vd_deblk_sz != deblk_sz) {
25571+ unsigned char *p;
25572+
25573+ tgt->vd_deblk_sz = deblk_sz;
25574+ p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
25575+ if (unlikely(!p))
25576+ goto out;
25577+ tgt->vd_deblk[0] = p;
25578+ }
1facf9fc 25579+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 25580+ tgt->vd_version = src->vd_version;
25581+ tgt->vd_jiffy = src->vd_jiffy;
25582+
25583+ n = src->vd_nblk;
25584+ for (ul = 1; ul < n; ul++) {
dece6358
AM
25585+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
25586+ GFP_NOFS);
25587+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 25588+ goto out;
1308ab2a 25589+ tgt->vd_nblk++;
1facf9fc 25590+ }
1308ab2a 25591+ tgt->vd_nblk = n;
25592+ tgt->vd_last.ul = tgt->vd_last.ul;
25593+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
25594+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
25595+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 25596+ /* smp_mb(); */
25597+ return 0; /* success */
25598+
4f0767ce 25599+out:
1facf9fc 25600+ rerr = reinit_vdir(tgt);
25601+ BUG_ON(rerr);
25602+ return err;
25603+}
25604+
25605+int au_vdir_init(struct file *file)
25606+{
25607+ int err;
25608+ struct inode *inode;
25609+ struct au_vdir *vdir_cache, *allocated;
25610+
25611+ err = read_vdir(file, !file->f_pos);
25612+ if (unlikely(err))
25613+ goto out;
25614+
25615+ allocated = NULL;
25616+ vdir_cache = au_fvdir_cache(file);
25617+ if (!vdir_cache) {
1308ab2a 25618+ vdir_cache = alloc_vdir(file);
1facf9fc 25619+ err = PTR_ERR(vdir_cache);
25620+ if (IS_ERR(vdir_cache))
25621+ goto out;
25622+ allocated = vdir_cache;
25623+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
25624+ err = reinit_vdir(vdir_cache);
25625+ if (unlikely(err))
25626+ goto out;
25627+ } else
25628+ return 0; /* success */
25629+
25630+ inode = file->f_dentry->d_inode;
25631+ err = copy_vdir(vdir_cache, au_ivdir(inode));
25632+ if (!err) {
25633+ file->f_version = inode->i_version;
25634+ if (allocated)
25635+ au_set_fvdir_cache(file, allocated);
25636+ } else if (allocated)
25637+ au_vdir_free(allocated);
25638+
4f0767ce 25639+out:
1facf9fc 25640+ return err;
25641+}
25642+
25643+static loff_t calc_offset(struct au_vdir *vdir)
25644+{
25645+ loff_t offset;
25646+ union au_vdir_deblk_p p;
25647+
25648+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
25649+ offset = vdir->vd_last.p.deblk - p.deblk;
25650+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
25651+ return offset;
25652+}
25653+
25654+/* returns true or false */
25655+static int seek_vdir(struct file *file)
25656+{
25657+ int valid;
25658+ unsigned int deblk_sz;
25659+ unsigned long ul, n;
25660+ loff_t offset;
25661+ union au_vdir_deblk_p p, deblk_end;
25662+ struct au_vdir *vdir_cache;
25663+
25664+ valid = 1;
25665+ vdir_cache = au_fvdir_cache(file);
25666+ offset = calc_offset(vdir_cache);
25667+ AuDbg("offset %lld\n", offset);
25668+ if (file->f_pos == offset)
25669+ goto out;
25670+
25671+ vdir_cache->vd_last.ul = 0;
25672+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
25673+ if (!file->f_pos)
25674+ goto out;
25675+
25676+ valid = 0;
25677+ deblk_sz = vdir_cache->vd_deblk_sz;
25678+ ul = div64_u64(file->f_pos, deblk_sz);
25679+ AuDbg("ul %lu\n", ul);
25680+ if (ul >= vdir_cache->vd_nblk)
25681+ goto out;
25682+
25683+ n = vdir_cache->vd_nblk;
25684+ for (; ul < n; ul++) {
25685+ p.deblk = vdir_cache->vd_deblk[ul];
25686+ deblk_end.deblk = p.deblk + deblk_sz;
25687+ offset = ul;
25688+ offset *= deblk_sz;
25689+ while (!is_deblk_end(&p, &deblk_end) && offset < file->f_pos) {
25690+ unsigned int l;
25691+
25692+ l = calc_size(p.de->de_str.len);
25693+ offset += l;
25694+ p.deblk += l;
25695+ }
25696+ if (!is_deblk_end(&p, &deblk_end)) {
25697+ valid = 1;
25698+ vdir_cache->vd_last.ul = ul;
25699+ vdir_cache->vd_last.p = p;
25700+ break;
25701+ }
25702+ }
25703+
4f0767ce 25704+out:
1facf9fc 25705+ /* smp_mb(); */
25706+ AuTraceErr(!valid);
25707+ return valid;
25708+}
25709+
25710+int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir)
25711+{
25712+ int err;
25713+ unsigned int l, deblk_sz;
25714+ union au_vdir_deblk_p deblk_end;
25715+ struct au_vdir *vdir_cache;
25716+ struct au_vdir_de *de;
25717+
25718+ vdir_cache = au_fvdir_cache(file);
25719+ if (!seek_vdir(file))
25720+ return 0;
25721+
25722+ deblk_sz = vdir_cache->vd_deblk_sz;
25723+ while (1) {
25724+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
25725+ deblk_end.deblk += deblk_sz;
25726+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
25727+ de = vdir_cache->vd_last.p.de;
25728+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
25729+ de->de_str.len, de->de_str.name, file->f_pos,
25730+ (unsigned long)de->de_ino, de->de_type);
25731+ err = filldir(dirent, de->de_str.name, de->de_str.len,
25732+ file->f_pos, de->de_ino, de->de_type);
25733+ if (unlikely(err)) {
25734+ AuTraceErr(err);
25735+ /* todo: ignore the error caused by udba? */
25736+ /* return err; */
25737+ return 0;
25738+ }
25739+
25740+ l = calc_size(de->de_str.len);
25741+ vdir_cache->vd_last.p.deblk += l;
25742+ file->f_pos += l;
25743+ }
25744+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
25745+ vdir_cache->vd_last.ul++;
25746+ vdir_cache->vd_last.p.deblk
25747+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
25748+ file->f_pos = deblk_sz * vdir_cache->vd_last.ul;
25749+ continue;
25750+ }
25751+ break;
25752+ }
25753+
25754+ /* smp_mb(); */
25755+ return 0;
25756+}
7f207e10
AM
25757diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
25758--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
25759+++ linux/fs/aufs/vfsub.c 2012-02-13 21:54:56.973105100 +0100
25760@@ -0,0 +1,835 @@
1facf9fc 25761+/*
f6c5ef8b 25762+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 25763+ *
25764+ * This program, aufs is free software; you can redistribute it and/or modify
25765+ * it under the terms of the GNU General Public License as published by
25766+ * the Free Software Foundation; either version 2 of the License, or
25767+ * (at your option) any later version.
dece6358
AM
25768+ *
25769+ * This program is distributed in the hope that it will be useful,
25770+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25771+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25772+ * GNU General Public License for more details.
25773+ *
25774+ * You should have received a copy of the GNU General Public License
25775+ * along with this program; if not, write to the Free Software
25776+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 25777+ */
25778+
25779+/*
25780+ * sub-routines for VFS
25781+ */
25782+
1308ab2a 25783+#include <linux/ima.h>
dece6358
AM
25784+#include <linux/namei.h>
25785+#include <linux/security.h>
25786+#include <linux/splice.h>
1facf9fc 25787+#include "aufs.h"
25788+
25789+int vfsub_update_h_iattr(struct path *h_path, int *did)
25790+{
25791+ int err;
25792+ struct kstat st;
25793+ struct super_block *h_sb;
25794+
25795+ /* for remote fs, leave work for its getattr or d_revalidate */
25796+ /* for bad i_attr fs, handle them in aufs_getattr() */
25797+ /* still some fs may acquire i_mutex. we need to skip them */
25798+ err = 0;
25799+ if (!did)
25800+ did = &err;
25801+ h_sb = h_path->dentry->d_sb;
25802+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
25803+ if (*did)
25804+ err = vfs_getattr(h_path->mnt, h_path->dentry, &st);
25805+
25806+ return err;
25807+}
25808+
25809+/* ---------------------------------------------------------------------- */
25810+
4a4d8108 25811+struct file *vfsub_dentry_open(struct path *path, int flags)
1308ab2a 25812+{
25813+ struct file *file;
25814+
4a4d8108 25815+ path_get(path);
0c5527e5 25816+ file = dentry_open(path->dentry, path->mnt,
2cbb1c4b 25817+ flags /* | __FMODE_NONOTIFY */,
7f207e10 25818+ current_cred());
2cbb1c4b
JR
25819+ if (!IS_ERR_OR_NULL(file)
25820+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
25821+ i_readcount_inc(path->dentry->d_inode);
4a4d8108 25822+
1308ab2a 25823+ return file;
25824+}
25825+
1facf9fc 25826+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
25827+{
25828+ struct file *file;
25829+
2cbb1c4b 25830+ lockdep_off();
7f207e10 25831+ file = filp_open(path,
2cbb1c4b 25832+ oflags /* | __FMODE_NONOTIFY */,
7f207e10 25833+ mode);
2cbb1c4b 25834+ lockdep_on();
1facf9fc 25835+ if (IS_ERR(file))
25836+ goto out;
25837+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
25838+
4f0767ce 25839+out:
1facf9fc 25840+ return file;
25841+}
25842+
25843+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
25844+{
25845+ int err;
25846+
1facf9fc 25847+ err = kern_path(name, flags, path);
1facf9fc 25848+ if (!err && path->dentry->d_inode)
25849+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
25850+ return err;
25851+}
25852+
25853+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
25854+ int len)
25855+{
25856+ struct path path = {
25857+ .mnt = NULL
25858+ };
25859+
1308ab2a 25860+ /* VFS checks it too, but by WARN_ON_ONCE() */
1facf9fc 25861+ IMustLock(parent->d_inode);
25862+
25863+ path.dentry = lookup_one_len(name, parent, len);
25864+ if (IS_ERR(path.dentry))
25865+ goto out;
25866+ if (path.dentry->d_inode)
25867+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
25868+
4f0767ce 25869+out:
4a4d8108 25870+ AuTraceErrPtr(path.dentry);
1facf9fc 25871+ return path.dentry;
25872+}
25873+
25874+struct dentry *vfsub_lookup_hash(struct nameidata *nd)
25875+{
25876+ struct path path = {
25877+ .mnt = nd->path.mnt
25878+ };
25879+
25880+ IMustLock(nd->path.dentry->d_inode);
25881+
25882+ path.dentry = lookup_hash(nd);
4a4d8108
AM
25883+ if (IS_ERR(path.dentry))
25884+ goto out;
25885+ if (path.dentry->d_inode)
1facf9fc 25886+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
25887+
4f0767ce 25888+out:
4a4d8108 25889+ AuTraceErrPtr(path.dentry);
1facf9fc 25890+ return path.dentry;
25891+}
25892+
2cbb1c4b
JR
25893+/*
25894+ * this is "VFS:__lookup_one_len()" which was removed and merged into
25895+ * VFS:lookup_one_len() by the commit.
25896+ * 6a96ba5 2011-03-14 kill __lookup_one_len()
25897+ * this function should always be equivalent to the corresponding part in
25898+ * VFS:lookup_one_len().
25899+ */
25900+int vfsub_name_hash(const char *name, struct qstr *this, int len)
25901+{
25902+ unsigned long hash;
25903+ unsigned int c;
25904+
25905+ this->name = name;
25906+ this->len = len;
25907+ if (!len)
25908+ return -EACCES;
25909+
25910+ hash = init_name_hash();
25911+ while (len--) {
25912+ c = *(const unsigned char *)name++;
25913+ if (c == '/' || c == '\0')
25914+ return -EACCES;
25915+ hash = partial_name_hash(c, hash);
25916+ }
25917+ this->hash = end_name_hash(hash);
25918+ return 0;
25919+}
25920+
1facf9fc 25921+/* ---------------------------------------------------------------------- */
25922+
25923+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
25924+ struct dentry *d2, struct au_hinode *hdir2)
25925+{
25926+ struct dentry *d;
25927+
2cbb1c4b 25928+ lockdep_off();
1facf9fc 25929+ d = lock_rename(d1, d2);
2cbb1c4b 25930+ lockdep_on();
4a4d8108 25931+ au_hn_suspend(hdir1);
1facf9fc 25932+ if (hdir1 != hdir2)
4a4d8108 25933+ au_hn_suspend(hdir2);
1facf9fc 25934+
25935+ return d;
25936+}
25937+
25938+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
25939+ struct dentry *d2, struct au_hinode *hdir2)
25940+{
4a4d8108 25941+ au_hn_resume(hdir1);
1facf9fc 25942+ if (hdir1 != hdir2)
4a4d8108 25943+ au_hn_resume(hdir2);
2cbb1c4b 25944+ lockdep_off();
1facf9fc 25945+ unlock_rename(d1, d2);
2cbb1c4b 25946+ lockdep_on();
1facf9fc 25947+}
25948+
25949+/* ---------------------------------------------------------------------- */
25950+
25951+int vfsub_create(struct inode *dir, struct path *path, int mode)
25952+{
25953+ int err;
25954+ struct dentry *d;
25955+
25956+ IMustLock(dir);
25957+
25958+ d = path->dentry;
25959+ path->dentry = d->d_parent;
b752ccd1 25960+ err = security_path_mknod(path, d, mode, 0);
1facf9fc 25961+ path->dentry = d;
25962+ if (unlikely(err))
25963+ goto out;
25964+
25965+ if (au_test_fs_null_nd(dir->i_sb))
25966+ err = vfs_create(dir, path->dentry, mode, NULL);
25967+ else {
25968+ struct nameidata h_nd;
25969+
25970+ memset(&h_nd, 0, sizeof(h_nd));
25971+ h_nd.flags = LOOKUP_CREATE;
25972+ h_nd.intent.open.flags = O_CREAT
25973+ | vfsub_fmode_to_uint(FMODE_READ);
25974+ h_nd.intent.open.create_mode = mode;
25975+ h_nd.path.dentry = path->dentry->d_parent;
25976+ h_nd.path.mnt = path->mnt;
25977+ path_get(&h_nd.path);
25978+ err = vfs_create(dir, path->dentry, mode, &h_nd);
25979+ path_put(&h_nd.path);
25980+ }
25981+
25982+ if (!err) {
25983+ struct path tmp = *path;
25984+ int did;
25985+
25986+ vfsub_update_h_iattr(&tmp, &did);
25987+ if (did) {
25988+ tmp.dentry = path->dentry->d_parent;
25989+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25990+ }
25991+ /*ignore*/
25992+ }
25993+
4f0767ce 25994+out:
1facf9fc 25995+ return err;
25996+}
25997+
25998+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
25999+{
26000+ int err;
26001+ struct dentry *d;
26002+
26003+ IMustLock(dir);
26004+
26005+ d = path->dentry;
26006+ path->dentry = d->d_parent;
b752ccd1 26007+ err = security_path_symlink(path, d, symname);
1facf9fc 26008+ path->dentry = d;
26009+ if (unlikely(err))
26010+ goto out;
26011+
26012+ err = vfs_symlink(dir, path->dentry, symname);
26013+ if (!err) {
26014+ struct path tmp = *path;
26015+ int did;
26016+
26017+ vfsub_update_h_iattr(&tmp, &did);
26018+ if (did) {
26019+ tmp.dentry = path->dentry->d_parent;
26020+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26021+ }
26022+ /*ignore*/
26023+ }
26024+
4f0767ce 26025+out:
1facf9fc 26026+ return err;
26027+}
26028+
26029+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
26030+{
26031+ int err;
26032+ struct dentry *d;
26033+
26034+ IMustLock(dir);
26035+
26036+ d = path->dentry;
26037+ path->dentry = d->d_parent;
027c5e7a 26038+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
1facf9fc 26039+ path->dentry = d;
26040+ if (unlikely(err))
26041+ goto out;
26042+
26043+ err = vfs_mknod(dir, path->dentry, mode, dev);
26044+ if (!err) {
26045+ struct path tmp = *path;
26046+ int did;
26047+
26048+ vfsub_update_h_iattr(&tmp, &did);
26049+ if (did) {
26050+ tmp.dentry = path->dentry->d_parent;
26051+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26052+ }
26053+ /*ignore*/
26054+ }
26055+
4f0767ce 26056+out:
1facf9fc 26057+ return err;
26058+}
26059+
26060+static int au_test_nlink(struct inode *inode)
26061+{
26062+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
26063+
26064+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
26065+ || inode->i_nlink < link_max)
26066+ return 0;
26067+ return -EMLINK;
26068+}
26069+
26070+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path)
26071+{
26072+ int err;
26073+ struct dentry *d;
26074+
26075+ IMustLock(dir);
26076+
26077+ err = au_test_nlink(src_dentry->d_inode);
26078+ if (unlikely(err))
26079+ return err;
26080+
26081+ d = path->dentry;
26082+ path->dentry = d->d_parent;
b752ccd1 26083+ err = security_path_link(src_dentry, path, d);
1facf9fc 26084+ path->dentry = d;
26085+ if (unlikely(err))
26086+ goto out;
26087+
2cbb1c4b 26088+ lockdep_off();
1facf9fc 26089+ err = vfs_link(src_dentry, dir, path->dentry);
2cbb1c4b 26090+ lockdep_on();
1facf9fc 26091+ if (!err) {
26092+ struct path tmp = *path;
26093+ int did;
26094+
26095+ /* fuse has different memory inode for the same inumber */
26096+ vfsub_update_h_iattr(&tmp, &did);
26097+ if (did) {
26098+ tmp.dentry = path->dentry->d_parent;
26099+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26100+ tmp.dentry = src_dentry;
26101+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26102+ }
26103+ /*ignore*/
26104+ }
26105+
4f0767ce 26106+out:
1facf9fc 26107+ return err;
26108+}
26109+
26110+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
26111+ struct inode *dir, struct path *path)
26112+{
26113+ int err;
26114+ struct path tmp = {
26115+ .mnt = path->mnt
26116+ };
26117+ struct dentry *d;
26118+
26119+ IMustLock(dir);
26120+ IMustLock(src_dir);
26121+
26122+ d = path->dentry;
26123+ path->dentry = d->d_parent;
26124+ tmp.dentry = src_dentry->d_parent;
b752ccd1 26125+ err = security_path_rename(&tmp, src_dentry, path, d);
1facf9fc 26126+ path->dentry = d;
26127+ if (unlikely(err))
26128+ goto out;
26129+
2cbb1c4b 26130+ lockdep_off();
1facf9fc 26131+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry);
2cbb1c4b 26132+ lockdep_on();
1facf9fc 26133+ if (!err) {
26134+ int did;
26135+
26136+ tmp.dentry = d->d_parent;
26137+ vfsub_update_h_iattr(&tmp, &did);
26138+ if (did) {
26139+ tmp.dentry = src_dentry;
26140+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26141+ tmp.dentry = src_dentry->d_parent;
26142+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26143+ }
26144+ /*ignore*/
26145+ }
26146+
4f0767ce 26147+out:
1facf9fc 26148+ return err;
26149+}
26150+
26151+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
26152+{
26153+ int err;
26154+ struct dentry *d;
26155+
26156+ IMustLock(dir);
26157+
26158+ d = path->dentry;
26159+ path->dentry = d->d_parent;
b752ccd1 26160+ err = security_path_mkdir(path, d, mode);
1facf9fc 26161+ path->dentry = d;
26162+ if (unlikely(err))
26163+ goto out;
26164+
26165+ err = vfs_mkdir(dir, path->dentry, mode);
26166+ if (!err) {
26167+ struct path tmp = *path;
26168+ int did;
26169+
26170+ vfsub_update_h_iattr(&tmp, &did);
26171+ if (did) {
26172+ tmp.dentry = path->dentry->d_parent;
26173+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26174+ }
26175+ /*ignore*/
26176+ }
26177+
4f0767ce 26178+out:
1facf9fc 26179+ return err;
26180+}
26181+
26182+int vfsub_rmdir(struct inode *dir, struct path *path)
26183+{
26184+ int err;
26185+ struct dentry *d;
26186+
26187+ IMustLock(dir);
26188+
26189+ d = path->dentry;
26190+ path->dentry = d->d_parent;
b752ccd1 26191+ err = security_path_rmdir(path, d);
1facf9fc 26192+ path->dentry = d;
26193+ if (unlikely(err))
26194+ goto out;
26195+
2cbb1c4b 26196+ lockdep_off();
1facf9fc 26197+ err = vfs_rmdir(dir, path->dentry);
2cbb1c4b 26198+ lockdep_on();
1facf9fc 26199+ if (!err) {
26200+ struct path tmp = {
26201+ .dentry = path->dentry->d_parent,
26202+ .mnt = path->mnt
26203+ };
26204+
26205+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
26206+ }
26207+
4f0767ce 26208+out:
1facf9fc 26209+ return err;
26210+}
26211+
26212+/* ---------------------------------------------------------------------- */
26213+
9dbd164d 26214+/* todo: support mmap_sem? */
1facf9fc 26215+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
26216+ loff_t *ppos)
26217+{
26218+ ssize_t err;
26219+
2cbb1c4b 26220+ lockdep_off();
1facf9fc 26221+ err = vfs_read(file, ubuf, count, ppos);
2cbb1c4b 26222+ lockdep_on();
1facf9fc 26223+ if (err >= 0)
26224+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26225+ return err;
26226+}
26227+
26228+/* todo: kernel_read()? */
26229+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
26230+ loff_t *ppos)
26231+{
26232+ ssize_t err;
26233+ mm_segment_t oldfs;
b752ccd1
AM
26234+ union {
26235+ void *k;
26236+ char __user *u;
26237+ } buf;
1facf9fc 26238+
b752ccd1 26239+ buf.k = kbuf;
1facf9fc 26240+ oldfs = get_fs();
26241+ set_fs(KERNEL_DS);
b752ccd1 26242+ err = vfsub_read_u(file, buf.u, count, ppos);
1facf9fc 26243+ set_fs(oldfs);
26244+ return err;
26245+}
26246+
26247+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
26248+ loff_t *ppos)
26249+{
26250+ ssize_t err;
26251+
2cbb1c4b 26252+ lockdep_off();
1facf9fc 26253+ err = vfs_write(file, ubuf, count, ppos);
2cbb1c4b 26254+ lockdep_on();
1facf9fc 26255+ if (err >= 0)
26256+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26257+ return err;
26258+}
26259+
26260+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
26261+{
26262+ ssize_t err;
26263+ mm_segment_t oldfs;
b752ccd1
AM
26264+ union {
26265+ void *k;
26266+ const char __user *u;
26267+ } buf;
1facf9fc 26268+
b752ccd1 26269+ buf.k = kbuf;
1facf9fc 26270+ oldfs = get_fs();
26271+ set_fs(KERNEL_DS);
b752ccd1 26272+ err = vfsub_write_u(file, buf.u, count, ppos);
1facf9fc 26273+ set_fs(oldfs);
26274+ return err;
26275+}
26276+
4a4d8108
AM
26277+int vfsub_flush(struct file *file, fl_owner_t id)
26278+{
26279+ int err;
26280+
26281+ err = 0;
26282+ if (file->f_op && file->f_op->flush) {
2cbb1c4b
JR
26283+ if (!au_test_nfs(file->f_dentry->d_sb))
26284+ err = file->f_op->flush(file, id);
26285+ else {
26286+ lockdep_off();
26287+ err = file->f_op->flush(file, id);
26288+ lockdep_on();
26289+ }
4a4d8108
AM
26290+ if (!err)
26291+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
26292+ /*ignore*/
26293+ }
26294+ return err;
26295+}
26296+
1facf9fc 26297+int vfsub_readdir(struct file *file, filldir_t filldir, void *arg)
26298+{
26299+ int err;
26300+
2cbb1c4b 26301+ lockdep_off();
1facf9fc 26302+ err = vfs_readdir(file, filldir, arg);
2cbb1c4b 26303+ lockdep_on();
1facf9fc 26304+ if (err >= 0)
26305+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26306+ return err;
26307+}
26308+
26309+long vfsub_splice_to(struct file *in, loff_t *ppos,
26310+ struct pipe_inode_info *pipe, size_t len,
26311+ unsigned int flags)
26312+{
26313+ long err;
26314+
2cbb1c4b 26315+ lockdep_off();
0fc653ad 26316+ err = do_splice_to(in, ppos, pipe, len, flags);
2cbb1c4b 26317+ lockdep_on();
4a4d8108 26318+ file_accessed(in);
1facf9fc 26319+ if (err >= 0)
26320+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
26321+ return err;
26322+}
26323+
26324+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
26325+ loff_t *ppos, size_t len, unsigned int flags)
26326+{
26327+ long err;
26328+
2cbb1c4b 26329+ lockdep_off();
0fc653ad 26330+ err = do_splice_from(pipe, out, ppos, len, flags);
2cbb1c4b 26331+ lockdep_on();
1facf9fc 26332+ if (err >= 0)
26333+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
26334+ return err;
26335+}
26336+
53392da6
AM
26337+int vfsub_fsync(struct file *file, struct path *path, int datasync)
26338+{
26339+ int err;
26340+
26341+ /* file can be NULL */
26342+ lockdep_off();
26343+ err = vfs_fsync(file, datasync);
26344+ lockdep_on();
26345+ if (!err) {
26346+ if (!path) {
26347+ AuDebugOn(!file);
26348+ path = &file->f_path;
26349+ }
26350+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
26351+ }
26352+ return err;
26353+}
26354+
1facf9fc 26355+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
26356+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
26357+ struct file *h_file)
26358+{
26359+ int err;
26360+ struct inode *h_inode;
26361+
26362+ h_inode = h_path->dentry->d_inode;
26363+ if (!h_file) {
26364+ err = mnt_want_write(h_path->mnt);
26365+ if (err)
26366+ goto out;
26367+ err = inode_permission(h_inode, MAY_WRITE);
26368+ if (err)
26369+ goto out_mnt;
26370+ err = get_write_access(h_inode);
26371+ if (err)
26372+ goto out_mnt;
4a4d8108 26373+ err = break_lease(h_inode, O_WRONLY);
1facf9fc 26374+ if (err)
26375+ goto out_inode;
26376+ }
26377+
26378+ err = locks_verify_truncate(h_inode, h_file, length);
26379+ if (!err)
953406b4 26380+ err = security_path_truncate(h_path);
2cbb1c4b
JR
26381+ if (!err) {
26382+ lockdep_off();
1facf9fc 26383+ err = do_truncate(h_path->dentry, length, attr, h_file);
2cbb1c4b
JR
26384+ lockdep_on();
26385+ }
1facf9fc 26386+
4f0767ce 26387+out_inode:
1facf9fc 26388+ if (!h_file)
26389+ put_write_access(h_inode);
4f0767ce 26390+out_mnt:
1facf9fc 26391+ if (!h_file)
26392+ mnt_drop_write(h_path->mnt);
4f0767ce 26393+out:
1facf9fc 26394+ return err;
26395+}
26396+
26397+/* ---------------------------------------------------------------------- */
26398+
26399+struct au_vfsub_mkdir_args {
26400+ int *errp;
26401+ struct inode *dir;
26402+ struct path *path;
26403+ int mode;
26404+};
26405+
26406+static void au_call_vfsub_mkdir(void *args)
26407+{
26408+ struct au_vfsub_mkdir_args *a = args;
26409+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
26410+}
26411+
26412+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
26413+{
26414+ int err, do_sio, wkq_err;
26415+
26416+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
26417+ if (!do_sio)
26418+ err = vfsub_mkdir(dir, path, mode);
26419+ else {
26420+ struct au_vfsub_mkdir_args args = {
26421+ .errp = &err,
26422+ .dir = dir,
26423+ .path = path,
26424+ .mode = mode
26425+ };
26426+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
26427+ if (unlikely(wkq_err))
26428+ err = wkq_err;
26429+ }
26430+
26431+ return err;
26432+}
26433+
26434+struct au_vfsub_rmdir_args {
26435+ int *errp;
26436+ struct inode *dir;
26437+ struct path *path;
26438+};
26439+
26440+static void au_call_vfsub_rmdir(void *args)
26441+{
26442+ struct au_vfsub_rmdir_args *a = args;
26443+ *a->errp = vfsub_rmdir(a->dir, a->path);
26444+}
26445+
26446+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
26447+{
26448+ int err, do_sio, wkq_err;
26449+
26450+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
26451+ if (!do_sio)
26452+ err = vfsub_rmdir(dir, path);
26453+ else {
26454+ struct au_vfsub_rmdir_args args = {
26455+ .errp = &err,
26456+ .dir = dir,
26457+ .path = path
26458+ };
26459+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
26460+ if (unlikely(wkq_err))
26461+ err = wkq_err;
26462+ }
26463+
26464+ return err;
26465+}
26466+
26467+/* ---------------------------------------------------------------------- */
26468+
26469+struct notify_change_args {
26470+ int *errp;
26471+ struct path *path;
26472+ struct iattr *ia;
26473+};
26474+
26475+static void call_notify_change(void *args)
26476+{
26477+ struct notify_change_args *a = args;
26478+ struct inode *h_inode;
26479+
26480+ h_inode = a->path->dentry->d_inode;
26481+ IMustLock(h_inode);
26482+
26483+ *a->errp = -EPERM;
26484+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
1facf9fc 26485+ *a->errp = notify_change(a->path->dentry, a->ia);
1facf9fc 26486+ if (!*a->errp)
26487+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
26488+ }
26489+ AuTraceErr(*a->errp);
26490+}
26491+
26492+int vfsub_notify_change(struct path *path, struct iattr *ia)
26493+{
26494+ int err;
26495+ struct notify_change_args args = {
26496+ .errp = &err,
26497+ .path = path,
26498+ .ia = ia
26499+ };
26500+
26501+ call_notify_change(&args);
26502+
26503+ return err;
26504+}
26505+
26506+int vfsub_sio_notify_change(struct path *path, struct iattr *ia)
26507+{
26508+ int err, wkq_err;
26509+ struct notify_change_args args = {
26510+ .errp = &err,
26511+ .path = path,
26512+ .ia = ia
26513+ };
26514+
26515+ wkq_err = au_wkq_wait(call_notify_change, &args);
26516+ if (unlikely(wkq_err))
26517+ err = wkq_err;
26518+
26519+ return err;
26520+}
26521+
26522+/* ---------------------------------------------------------------------- */
26523+
26524+struct unlink_args {
26525+ int *errp;
26526+ struct inode *dir;
26527+ struct path *path;
26528+};
26529+
26530+static void call_unlink(void *args)
26531+{
26532+ struct unlink_args *a = args;
26533+ struct dentry *d = a->path->dentry;
26534+ struct inode *h_inode;
26535+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
027c5e7a 26536+ && d->d_count == 1);
1facf9fc 26537+
26538+ IMustLock(a->dir);
26539+
26540+ a->path->dentry = d->d_parent;
26541+ *a->errp = security_path_unlink(a->path, d);
26542+ a->path->dentry = d;
26543+ if (unlikely(*a->errp))
26544+ return;
26545+
26546+ if (!stop_sillyrename)
26547+ dget(d);
26548+ h_inode = d->d_inode;
26549+ if (h_inode)
027c5e7a 26550+ ihold(h_inode);
1facf9fc 26551+
2cbb1c4b 26552+ lockdep_off();
1facf9fc 26553+ *a->errp = vfs_unlink(a->dir, d);
2cbb1c4b 26554+ lockdep_on();
1facf9fc 26555+ if (!*a->errp) {
26556+ struct path tmp = {
26557+ .dentry = d->d_parent,
26558+ .mnt = a->path->mnt
26559+ };
26560+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
26561+ }
26562+
26563+ if (!stop_sillyrename)
26564+ dput(d);
26565+ if (h_inode)
26566+ iput(h_inode);
26567+
26568+ AuTraceErr(*a->errp);
26569+}
26570+
26571+/*
26572+ * @dir: must be locked.
26573+ * @dentry: target dentry.
26574+ */
26575+int vfsub_unlink(struct inode *dir, struct path *path, int force)
26576+{
26577+ int err;
26578+ struct unlink_args args = {
26579+ .errp = &err,
26580+ .dir = dir,
26581+ .path = path
26582+ };
26583+
26584+ if (!force)
26585+ call_unlink(&args);
26586+ else {
26587+ int wkq_err;
26588+
26589+ wkq_err = au_wkq_wait(call_unlink, &args);
26590+ if (unlikely(wkq_err))
26591+ err = wkq_err;
26592+ }
26593+
26594+ return err;
26595+}
7f207e10
AM
26596diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
26597--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 26598+++ linux/fs/aufs/vfsub.h 2012-02-13 21:54:56.973105100 +0100
53392da6 26599@@ -0,0 +1,232 @@
1facf9fc 26600+/*
f6c5ef8b 26601+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 26602+ *
26603+ * This program, aufs is free software; you can redistribute it and/or modify
26604+ * it under the terms of the GNU General Public License as published by
26605+ * the Free Software Foundation; either version 2 of the License, or
26606+ * (at your option) any later version.
dece6358
AM
26607+ *
26608+ * This program is distributed in the hope that it will be useful,
26609+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26610+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26611+ * GNU General Public License for more details.
26612+ *
26613+ * You should have received a copy of the GNU General Public License
26614+ * along with this program; if not, write to the Free Software
26615+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 26616+ */
26617+
26618+/*
26619+ * sub-routines for VFS
26620+ */
26621+
26622+#ifndef __AUFS_VFSUB_H__
26623+#define __AUFS_VFSUB_H__
26624+
26625+#ifdef __KERNEL__
26626+
26627+#include <linux/fs.h>
0c5527e5 26628+#include <linux/lglock.h>
7f207e10 26629+#include "debug.h"
1facf9fc 26630+
7f207e10 26631+/* copied from linux/fs/internal.h */
2cbb1c4b 26632+/* todo: BAD approach!! */
0c5527e5 26633+DECLARE_BRLOCK(vfsmount_lock);
0c5527e5 26634+extern void file_sb_list_del(struct file *f);
2cbb1c4b 26635+extern spinlock_t inode_sb_list_lock;
0c5527e5 26636+
7f207e10
AM
26637+/* copied from linux/fs/file_table.c */
26638+DECLARE_LGLOCK(files_lglock);
0c5527e5
AM
26639+#ifdef CONFIG_SMP
26640+/*
26641+ * These macros iterate all files on all CPUs for a given superblock.
26642+ * files_lglock must be held globally.
26643+ */
26644+#define do_file_list_for_each_entry(__sb, __file) \
26645+{ \
26646+ int i; \
26647+ for_each_possible_cpu(i) { \
26648+ struct list_head *list; \
26649+ list = per_cpu_ptr((__sb)->s_files, i); \
26650+ list_for_each_entry((__file), list, f_u.fu_list)
26651+
26652+#define while_file_list_for_each_entry \
26653+ } \
26654+}
26655+
26656+#else
26657+
26658+#define do_file_list_for_each_entry(__sb, __file) \
26659+{ \
26660+ struct list_head *list; \
26661+ list = &(sb)->s_files; \
26662+ list_for_each_entry((__file), list, f_u.fu_list)
26663+
26664+#define while_file_list_for_each_entry \
26665+}
7f207e10
AM
26666+#endif
26667+
26668+/* ---------------------------------------------------------------------- */
1facf9fc 26669+
26670+/* lock subclass for lower inode */
26671+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
26672+/* reduce? gave up. */
26673+enum {
26674+ AuLsc_I_Begin = I_MUTEX_QUOTA, /* 4 */
26675+ AuLsc_I_PARENT, /* lower inode, parent first */
26676+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 26677+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 26678+ AuLsc_I_CHILD,
26679+ AuLsc_I_CHILD2,
26680+ AuLsc_I_End
26681+};
26682+
26683+/* to debug easier, do not make them inlined functions */
26684+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
26685+#define IMustLock(i) MtxMustLock(&(i)->i_mutex)
26686+
26687+/* ---------------------------------------------------------------------- */
26688+
7f207e10
AM
26689+static inline void vfsub_drop_nlink(struct inode *inode)
26690+{
26691+ AuDebugOn(!inode->i_nlink);
26692+ drop_nlink(inode);
26693+}
26694+
027c5e7a
AM
26695+static inline void vfsub_dead_dir(struct inode *inode)
26696+{
26697+ AuDebugOn(!S_ISDIR(inode->i_mode));
26698+ inode->i_flags |= S_DEAD;
26699+ clear_nlink(inode);
26700+}
26701+
7f207e10
AM
26702+/* ---------------------------------------------------------------------- */
26703+
26704+int vfsub_update_h_iattr(struct path *h_path, int *did);
26705+struct file *vfsub_dentry_open(struct path *path, int flags);
26706+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
1facf9fc 26707+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
26708+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
26709+ int len);
26710+struct dentry *vfsub_lookup_hash(struct nameidata *nd);
2cbb1c4b 26711+int vfsub_name_hash(const char *name, struct qstr *this, int len);
1facf9fc 26712+
26713+/* ---------------------------------------------------------------------- */
26714+
26715+struct au_hinode;
26716+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
26717+ struct dentry *d2, struct au_hinode *hdir2);
26718+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
26719+ struct dentry *d2, struct au_hinode *hdir2);
26720+
26721+int vfsub_create(struct inode *dir, struct path *path, int mode);
26722+int vfsub_symlink(struct inode *dir, struct path *path,
26723+ const char *symname);
26724+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
26725+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
26726+ struct path *path);
26727+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
26728+ struct inode *hdir, struct path *path);
26729+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
26730+int vfsub_rmdir(struct inode *dir, struct path *path);
26731+
26732+/* ---------------------------------------------------------------------- */
26733+
26734+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
26735+ loff_t *ppos);
26736+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
26737+ loff_t *ppos);
26738+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
26739+ loff_t *ppos);
26740+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
26741+ loff_t *ppos);
4a4d8108 26742+int vfsub_flush(struct file *file, fl_owner_t id);
1facf9fc 26743+int vfsub_readdir(struct file *file, filldir_t filldir, void *arg);
26744+
4a4d8108
AM
26745+static inline unsigned int vfsub_file_flags(struct file *file)
26746+{
26747+ unsigned int flags;
26748+
26749+ spin_lock(&file->f_lock);
26750+ flags = file->f_flags;
26751+ spin_unlock(&file->f_lock);
26752+
26753+ return flags;
26754+}
1308ab2a 26755+
1facf9fc 26756+static inline void vfsub_file_accessed(struct file *h_file)
26757+{
26758+ file_accessed(h_file);
26759+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
26760+}
26761+
26762+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
26763+ struct dentry *h_dentry)
26764+{
26765+ struct path h_path = {
26766+ .dentry = h_dentry,
26767+ .mnt = h_mnt
26768+ };
26769+ touch_atime(h_mnt, h_dentry);
26770+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
26771+}
26772+
4a4d8108
AM
26773+long vfsub_splice_to(struct file *in, loff_t *ppos,
26774+ struct pipe_inode_info *pipe, size_t len,
26775+ unsigned int flags);
26776+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
26777+ loff_t *ppos, size_t len, unsigned int flags);
26778+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
26779+ struct file *h_file);
53392da6 26780+int vfsub_fsync(struct file *file, struct path *path, int datasync);
4a4d8108 26781+
1facf9fc 26782+/* ---------------------------------------------------------------------- */
26783+
26784+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
26785+{
26786+ loff_t err;
26787+
2cbb1c4b 26788+ lockdep_off();
1facf9fc 26789+ err = vfs_llseek(file, offset, origin);
2cbb1c4b 26790+ lockdep_on();
1facf9fc 26791+ return err;
26792+}
26793+
26794+/* ---------------------------------------------------------------------- */
26795+
26796+/* dirty workaround for strict type of fmode_t */
26797+union vfsub_fmu {
26798+ fmode_t fm;
26799+ unsigned int ui;
26800+};
26801+
26802+static inline unsigned int vfsub_fmode_to_uint(fmode_t fm)
26803+{
26804+ union vfsub_fmu u = {
26805+ .fm = fm
26806+ };
26807+
26808+ BUILD_BUG_ON(sizeof(u.fm) != sizeof(u.ui));
26809+
26810+ return u.ui;
26811+}
26812+
26813+static inline fmode_t vfsub_uint_to_fmode(unsigned int ui)
26814+{
26815+ union vfsub_fmu u = {
26816+ .ui = ui
26817+ };
26818+
26819+ return u.fm;
26820+}
26821+
4a4d8108
AM
26822+/* ---------------------------------------------------------------------- */
26823+
26824+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
26825+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
26826+int vfsub_sio_notify_change(struct path *path, struct iattr *ia);
26827+int vfsub_notify_change(struct path *path, struct iattr *ia);
26828+int vfsub_unlink(struct inode *dir, struct path *path, int force);
26829+
1facf9fc 26830+#endif /* __KERNEL__ */
26831+#endif /* __AUFS_VFSUB_H__ */
7f207e10
AM
26832diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
26833--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 26834+++ linux/fs/aufs/wbr_policy.c 2012-02-13 21:54:56.973105100 +0100
027c5e7a 26835@@ -0,0 +1,700 @@
1facf9fc 26836+/*
f6c5ef8b 26837+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 26838+ *
26839+ * This program, aufs is free software; you can redistribute it and/or modify
26840+ * it under the terms of the GNU General Public License as published by
26841+ * the Free Software Foundation; either version 2 of the License, or
26842+ * (at your option) any later version.
dece6358
AM
26843+ *
26844+ * This program is distributed in the hope that it will be useful,
26845+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26846+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26847+ * GNU General Public License for more details.
26848+ *
26849+ * You should have received a copy of the GNU General Public License
26850+ * along with this program; if not, write to the Free Software
26851+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 26852+ */
26853+
26854+/*
26855+ * policies for selecting one among multiple writable branches
26856+ */
26857+
26858+#include <linux/statfs.h>
26859+#include "aufs.h"
26860+
26861+/* subset of cpup_attr() */
26862+static noinline_for_stack
26863+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
26864+{
26865+ int err, sbits;
26866+ struct iattr ia;
26867+ struct inode *h_isrc;
26868+
26869+ h_isrc = h_src->d_inode;
26870+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
26871+ ia.ia_mode = h_isrc->i_mode;
26872+ ia.ia_uid = h_isrc->i_uid;
26873+ ia.ia_gid = h_isrc->i_gid;
26874+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
26875+ au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc);
26876+ err = vfsub_sio_notify_change(h_path, &ia);
26877+
26878+ /* is this nfs only? */
26879+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
26880+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
26881+ ia.ia_mode = h_isrc->i_mode;
26882+ err = vfsub_sio_notify_change(h_path, &ia);
26883+ }
26884+
26885+ return err;
26886+}
26887+
26888+#define AuCpdown_PARENT_OPQ 1
26889+#define AuCpdown_WHED (1 << 1)
26890+#define AuCpdown_MADE_DIR (1 << 2)
26891+#define AuCpdown_DIROPQ (1 << 3)
26892+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
7f207e10
AM
26893+#define au_fset_cpdown(flags, name) \
26894+ do { (flags) |= AuCpdown_##name; } while (0)
26895+#define au_fclr_cpdown(flags, name) \
26896+ do { (flags) &= ~AuCpdown_##name; } while (0)
1facf9fc 26897+
26898+struct au_cpdown_dir_args {
26899+ struct dentry *parent;
26900+ unsigned int flags;
26901+};
26902+
26903+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
26904+ struct au_cpdown_dir_args *a)
26905+{
26906+ int err;
26907+ struct dentry *opq_dentry;
26908+
26909+ opq_dentry = au_diropq_create(dentry, bdst);
26910+ err = PTR_ERR(opq_dentry);
26911+ if (IS_ERR(opq_dentry))
26912+ goto out;
26913+ dput(opq_dentry);
26914+ au_fset_cpdown(a->flags, DIROPQ);
26915+
4f0767ce 26916+out:
1facf9fc 26917+ return err;
26918+}
26919+
26920+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
26921+ struct inode *dir, aufs_bindex_t bdst)
26922+{
26923+ int err;
26924+ struct path h_path;
26925+ struct au_branch *br;
26926+
26927+ br = au_sbr(dentry->d_sb, bdst);
26928+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
26929+ err = PTR_ERR(h_path.dentry);
26930+ if (IS_ERR(h_path.dentry))
26931+ goto out;
26932+
26933+ err = 0;
26934+ if (h_path.dentry->d_inode) {
26935+ h_path.mnt = br->br_mnt;
26936+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
26937+ dentry);
26938+ }
26939+ dput(h_path.dentry);
26940+
4f0767ce 26941+out:
1facf9fc 26942+ return err;
26943+}
26944+
26945+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
26946+ struct dentry *h_parent, void *arg)
26947+{
26948+ int err, rerr;
4a4d8108 26949+ aufs_bindex_t bopq, bstart;
1facf9fc 26950+ struct path h_path;
26951+ struct dentry *parent;
26952+ struct inode *h_dir, *h_inode, *inode, *dir;
26953+ struct au_cpdown_dir_args *args = arg;
26954+
26955+ bstart = au_dbstart(dentry);
26956+ /* dentry is di-locked */
26957+ parent = dget_parent(dentry);
26958+ dir = parent->d_inode;
26959+ h_dir = h_parent->d_inode;
26960+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
26961+ IMustLock(h_dir);
26962+
26963+ err = au_lkup_neg(dentry, bdst);
26964+ if (unlikely(err < 0))
26965+ goto out;
26966+ h_path.dentry = au_h_dptr(dentry, bdst);
26967+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
26968+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
26969+ S_IRWXU | S_IRUGO | S_IXUGO);
26970+ if (unlikely(err))
26971+ goto out_put;
26972+ au_fset_cpdown(args->flags, MADE_DIR);
26973+
1facf9fc 26974+ bopq = au_dbdiropq(dentry);
26975+ au_fclr_cpdown(args->flags, WHED);
26976+ au_fclr_cpdown(args->flags, DIROPQ);
26977+ if (au_dbwh(dentry) == bdst)
26978+ au_fset_cpdown(args->flags, WHED);
26979+ if (!au_ftest_cpdown(args->flags, PARENT_OPQ) && bopq <= bdst)
26980+ au_fset_cpdown(args->flags, PARENT_OPQ);
1facf9fc 26981+ h_inode = h_path.dentry->d_inode;
26982+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
26983+ if (au_ftest_cpdown(args->flags, WHED)) {
26984+ err = au_cpdown_dir_opq(dentry, bdst, args);
26985+ if (unlikely(err)) {
26986+ mutex_unlock(&h_inode->i_mutex);
26987+ goto out_dir;
26988+ }
26989+ }
26990+
26991+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
26992+ mutex_unlock(&h_inode->i_mutex);
26993+ if (unlikely(err))
26994+ goto out_opq;
26995+
26996+ if (au_ftest_cpdown(args->flags, WHED)) {
26997+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
26998+ if (unlikely(err))
26999+ goto out_opq;
27000+ }
27001+
27002+ inode = dentry->d_inode;
27003+ if (au_ibend(inode) < bdst)
27004+ au_set_ibend(inode, bdst);
27005+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
27006+ au_hi_flags(inode, /*isdir*/1));
27007+ goto out; /* success */
27008+
27009+ /* revert */
4f0767ce 27010+out_opq:
1facf9fc 27011+ if (au_ftest_cpdown(args->flags, DIROPQ)) {
27012+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
27013+ rerr = au_diropq_remove(dentry, bdst);
27014+ mutex_unlock(&h_inode->i_mutex);
27015+ if (unlikely(rerr)) {
27016+ AuIOErr("failed removing diropq for %.*s b%d (%d)\n",
27017+ AuDLNPair(dentry), bdst, rerr);
27018+ err = -EIO;
27019+ goto out;
27020+ }
27021+ }
4f0767ce 27022+out_dir:
1facf9fc 27023+ if (au_ftest_cpdown(args->flags, MADE_DIR)) {
27024+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
27025+ if (unlikely(rerr)) {
27026+ AuIOErr("failed removing %.*s b%d (%d)\n",
27027+ AuDLNPair(dentry), bdst, rerr);
27028+ err = -EIO;
27029+ }
27030+ }
4f0767ce 27031+out_put:
1facf9fc 27032+ au_set_h_dptr(dentry, bdst, NULL);
27033+ if (au_dbend(dentry) == bdst)
27034+ au_update_dbend(dentry);
4f0767ce 27035+out:
1facf9fc 27036+ dput(parent);
27037+ return err;
27038+}
27039+
27040+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
27041+{
27042+ int err;
27043+ struct au_cpdown_dir_args args = {
27044+ .parent = dget_parent(dentry),
27045+ .flags = 0
27046+ };
27047+
27048+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &args);
27049+ dput(args.parent);
27050+
27051+ return err;
27052+}
27053+
27054+/* ---------------------------------------------------------------------- */
27055+
27056+/* policies for create */
27057+
4a4d8108
AM
27058+static int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
27059+{
27060+ int err, i, j, ndentry;
27061+ aufs_bindex_t bopq;
27062+ struct au_dcsub_pages dpages;
27063+ struct au_dpage *dpage;
27064+ struct dentry **dentries, *parent, *d;
27065+
27066+ err = au_dpages_init(&dpages, GFP_NOFS);
27067+ if (unlikely(err))
27068+ goto out;
27069+ parent = dget_parent(dentry);
027c5e7a 27070+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
4a4d8108
AM
27071+ if (unlikely(err))
27072+ goto out_free;
27073+
27074+ err = bindex;
27075+ for (i = 0; i < dpages.ndpage; i++) {
27076+ dpage = dpages.dpages + i;
27077+ dentries = dpage->dentries;
27078+ ndentry = dpage->ndentry;
27079+ for (j = 0; j < ndentry; j++) {
27080+ d = dentries[j];
27081+ di_read_lock_parent2(d, !AuLock_IR);
27082+ bopq = au_dbdiropq(d);
27083+ di_read_unlock(d, !AuLock_IR);
27084+ if (bopq >= 0 && bopq < err)
27085+ err = bopq;
27086+ }
27087+ }
27088+
27089+out_free:
27090+ dput(parent);
27091+ au_dpages_free(&dpages);
27092+out:
27093+ return err;
27094+}
27095+
1facf9fc 27096+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
27097+{
27098+ for (; bindex >= 0; bindex--)
27099+ if (!au_br_rdonly(au_sbr(sb, bindex)))
27100+ return bindex;
27101+ return -EROFS;
27102+}
27103+
27104+/* top down parent */
27105+static int au_wbr_create_tdp(struct dentry *dentry, int isdir __maybe_unused)
27106+{
27107+ int err;
27108+ aufs_bindex_t bstart, bindex;
27109+ struct super_block *sb;
27110+ struct dentry *parent, *h_parent;
27111+
27112+ sb = dentry->d_sb;
27113+ bstart = au_dbstart(dentry);
27114+ err = bstart;
27115+ if (!au_br_rdonly(au_sbr(sb, bstart)))
27116+ goto out;
27117+
27118+ err = -EROFS;
27119+ parent = dget_parent(dentry);
27120+ for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
27121+ h_parent = au_h_dptr(parent, bindex);
27122+ if (!h_parent || !h_parent->d_inode)
27123+ continue;
27124+
27125+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
27126+ err = bindex;
27127+ break;
27128+ }
27129+ }
27130+ dput(parent);
27131+
27132+ /* bottom up here */
4a4d8108 27133+ if (unlikely(err < 0)) {
1facf9fc 27134+ err = au_wbr_bu(sb, bstart - 1);
4a4d8108
AM
27135+ if (err >= 0)
27136+ err = au_wbr_nonopq(dentry, err);
27137+ }
1facf9fc 27138+
4f0767ce 27139+out:
1facf9fc 27140+ AuDbg("b%d\n", err);
27141+ return err;
27142+}
27143+
27144+/* ---------------------------------------------------------------------- */
27145+
27146+/* an exception for the policy other than tdp */
27147+static int au_wbr_create_exp(struct dentry *dentry)
27148+{
27149+ int err;
27150+ aufs_bindex_t bwh, bdiropq;
27151+ struct dentry *parent;
27152+
27153+ err = -1;
27154+ bwh = au_dbwh(dentry);
27155+ parent = dget_parent(dentry);
27156+ bdiropq = au_dbdiropq(parent);
27157+ if (bwh >= 0) {
27158+ if (bdiropq >= 0)
27159+ err = min(bdiropq, bwh);
27160+ else
27161+ err = bwh;
27162+ AuDbg("%d\n", err);
27163+ } else if (bdiropq >= 0) {
27164+ err = bdiropq;
27165+ AuDbg("%d\n", err);
27166+ }
27167+ dput(parent);
27168+
4a4d8108
AM
27169+ if (err >= 0)
27170+ err = au_wbr_nonopq(dentry, err);
27171+
1facf9fc 27172+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
27173+ err = -1;
27174+
27175+ AuDbg("%d\n", err);
27176+ return err;
27177+}
27178+
27179+/* ---------------------------------------------------------------------- */
27180+
27181+/* round robin */
27182+static int au_wbr_create_init_rr(struct super_block *sb)
27183+{
27184+ int err;
27185+
27186+ err = au_wbr_bu(sb, au_sbend(sb));
27187+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 27188+ /* smp_mb(); */
1facf9fc 27189+
27190+ AuDbg("b%d\n", err);
27191+ return err;
27192+}
27193+
27194+static int au_wbr_create_rr(struct dentry *dentry, int isdir)
27195+{
27196+ int err, nbr;
27197+ unsigned int u;
27198+ aufs_bindex_t bindex, bend;
27199+ struct super_block *sb;
27200+ atomic_t *next;
27201+
27202+ err = au_wbr_create_exp(dentry);
27203+ if (err >= 0)
27204+ goto out;
27205+
27206+ sb = dentry->d_sb;
27207+ next = &au_sbi(sb)->si_wbr_rr_next;
27208+ bend = au_sbend(sb);
27209+ nbr = bend + 1;
27210+ for (bindex = 0; bindex <= bend; bindex++) {
27211+ if (!isdir) {
27212+ err = atomic_dec_return(next) + 1;
27213+ /* modulo for 0 is meaningless */
27214+ if (unlikely(!err))
27215+ err = atomic_dec_return(next) + 1;
27216+ } else
27217+ err = atomic_read(next);
27218+ AuDbg("%d\n", err);
27219+ u = err;
27220+ err = u % nbr;
27221+ AuDbg("%d\n", err);
27222+ if (!au_br_rdonly(au_sbr(sb, err)))
27223+ break;
27224+ err = -EROFS;
27225+ }
27226+
4a4d8108
AM
27227+ if (err >= 0)
27228+ err = au_wbr_nonopq(dentry, err);
27229+
4f0767ce 27230+out:
1facf9fc 27231+ AuDbg("%d\n", err);
27232+ return err;
27233+}
27234+
27235+/* ---------------------------------------------------------------------- */
27236+
27237+/* most free space */
27238+static void au_mfs(struct dentry *dentry)
27239+{
27240+ struct super_block *sb;
27241+ struct au_branch *br;
27242+ struct au_wbr_mfs *mfs;
27243+ aufs_bindex_t bindex, bend;
27244+ int err;
27245+ unsigned long long b, bavail;
7f207e10 27246+ struct path h_path;
1facf9fc 27247+ /* reduce the stack usage */
27248+ struct kstatfs *st;
27249+
27250+ st = kmalloc(sizeof(*st), GFP_NOFS);
27251+ if (unlikely(!st)) {
27252+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
27253+ return;
27254+ }
27255+
27256+ bavail = 0;
27257+ sb = dentry->d_sb;
27258+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 27259+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 27260+ mfs->mfs_bindex = -EROFS;
27261+ mfs->mfsrr_bytes = 0;
27262+ bend = au_sbend(sb);
27263+ for (bindex = 0; bindex <= bend; bindex++) {
27264+ br = au_sbr(sb, bindex);
27265+ if (au_br_rdonly(br))
27266+ continue;
27267+
27268+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27269+ h_path.mnt = br->br_mnt;
27270+ h_path.dentry = h_path.mnt->mnt_root;
27271+ err = vfs_statfs(&h_path, st);
1facf9fc 27272+ if (unlikely(err)) {
27273+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
27274+ continue;
27275+ }
27276+
27277+ /* when the available size is equal, select the lower one */
27278+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
27279+ || sizeof(b) < sizeof(st->f_bsize));
27280+ b = st->f_bavail * st->f_bsize;
27281+ br->br_wbr->wbr_bytes = b;
27282+ if (b >= bavail) {
27283+ bavail = b;
27284+ mfs->mfs_bindex = bindex;
27285+ mfs->mfs_jiffy = jiffies;
27286+ }
27287+ }
27288+
27289+ mfs->mfsrr_bytes = bavail;
27290+ AuDbg("b%d\n", mfs->mfs_bindex);
27291+ kfree(st);
27292+}
27293+
27294+static int au_wbr_create_mfs(struct dentry *dentry, int isdir __maybe_unused)
27295+{
27296+ int err;
27297+ struct super_block *sb;
27298+ struct au_wbr_mfs *mfs;
27299+
27300+ err = au_wbr_create_exp(dentry);
27301+ if (err >= 0)
27302+ goto out;
27303+
27304+ sb = dentry->d_sb;
27305+ mfs = &au_sbi(sb)->si_wbr_mfs;
27306+ mutex_lock(&mfs->mfs_lock);
27307+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
27308+ || mfs->mfs_bindex < 0
27309+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
27310+ au_mfs(dentry);
27311+ mutex_unlock(&mfs->mfs_lock);
27312+ err = mfs->mfs_bindex;
27313+
4a4d8108
AM
27314+ if (err >= 0)
27315+ err = au_wbr_nonopq(dentry, err);
27316+
4f0767ce 27317+out:
1facf9fc 27318+ AuDbg("b%d\n", err);
27319+ return err;
27320+}
27321+
27322+static int au_wbr_create_init_mfs(struct super_block *sb)
27323+{
27324+ struct au_wbr_mfs *mfs;
27325+
27326+ mfs = &au_sbi(sb)->si_wbr_mfs;
27327+ mutex_init(&mfs->mfs_lock);
27328+ mfs->mfs_jiffy = 0;
27329+ mfs->mfs_bindex = -EROFS;
27330+
27331+ return 0;
27332+}
27333+
27334+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
27335+{
27336+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
27337+ return 0;
27338+}
27339+
27340+/* ---------------------------------------------------------------------- */
27341+
27342+/* most free space and then round robin */
27343+static int au_wbr_create_mfsrr(struct dentry *dentry, int isdir)
27344+{
27345+ int err;
27346+ struct au_wbr_mfs *mfs;
27347+
27348+ err = au_wbr_create_mfs(dentry, isdir);
27349+ if (err >= 0) {
27350+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 27351+ mutex_lock(&mfs->mfs_lock);
1facf9fc 27352+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
27353+ err = au_wbr_create_rr(dentry, isdir);
dece6358 27354+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 27355+ }
27356+
27357+ AuDbg("b%d\n", err);
27358+ return err;
27359+}
27360+
27361+static int au_wbr_create_init_mfsrr(struct super_block *sb)
27362+{
27363+ int err;
27364+
27365+ au_wbr_create_init_mfs(sb); /* ignore */
27366+ err = au_wbr_create_init_rr(sb);
27367+
27368+ return err;
27369+}
27370+
27371+/* ---------------------------------------------------------------------- */
27372+
27373+/* top down parent and most free space */
27374+static int au_wbr_create_pmfs(struct dentry *dentry, int isdir)
27375+{
27376+ int err, e2;
27377+ unsigned long long b;
27378+ aufs_bindex_t bindex, bstart, bend;
27379+ struct super_block *sb;
27380+ struct dentry *parent, *h_parent;
27381+ struct au_branch *br;
27382+
27383+ err = au_wbr_create_tdp(dentry, isdir);
27384+ if (unlikely(err < 0))
27385+ goto out;
27386+ parent = dget_parent(dentry);
27387+ bstart = au_dbstart(parent);
27388+ bend = au_dbtaildir(parent);
27389+ if (bstart == bend)
27390+ goto out_parent; /* success */
27391+
27392+ e2 = au_wbr_create_mfs(dentry, isdir);
27393+ if (e2 < 0)
27394+ goto out_parent; /* success */
27395+
27396+ /* when the available size is equal, select upper one */
27397+ sb = dentry->d_sb;
27398+ br = au_sbr(sb, err);
27399+ b = br->br_wbr->wbr_bytes;
27400+ AuDbg("b%d, %llu\n", err, b);
27401+
27402+ for (bindex = bstart; bindex <= bend; bindex++) {
27403+ h_parent = au_h_dptr(parent, bindex);
27404+ if (!h_parent || !h_parent->d_inode)
27405+ continue;
27406+
27407+ br = au_sbr(sb, bindex);
27408+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
27409+ b = br->br_wbr->wbr_bytes;
27410+ err = bindex;
27411+ AuDbg("b%d, %llu\n", err, b);
27412+ }
27413+ }
27414+
4a4d8108
AM
27415+ if (err >= 0)
27416+ err = au_wbr_nonopq(dentry, err);
27417+
4f0767ce 27418+out_parent:
1facf9fc 27419+ dput(parent);
4f0767ce 27420+out:
1facf9fc 27421+ AuDbg("b%d\n", err);
27422+ return err;
27423+}
27424+
27425+/* ---------------------------------------------------------------------- */
27426+
27427+/* policies for copyup */
27428+
27429+/* top down parent */
27430+static int au_wbr_copyup_tdp(struct dentry *dentry)
27431+{
27432+ return au_wbr_create_tdp(dentry, /*isdir, anything is ok*/0);
27433+}
27434+
27435+/* bottom up parent */
27436+static int au_wbr_copyup_bup(struct dentry *dentry)
27437+{
27438+ int err;
27439+ aufs_bindex_t bindex, bstart;
27440+ struct dentry *parent, *h_parent;
27441+ struct super_block *sb;
27442+
27443+ err = -EROFS;
27444+ sb = dentry->d_sb;
27445+ parent = dget_parent(dentry);
27446+ bstart = au_dbstart(parent);
27447+ for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
27448+ h_parent = au_h_dptr(parent, bindex);
27449+ if (!h_parent || !h_parent->d_inode)
27450+ continue;
27451+
27452+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
27453+ err = bindex;
27454+ break;
27455+ }
27456+ }
27457+ dput(parent);
27458+
27459+ /* bottom up here */
27460+ if (unlikely(err < 0))
27461+ err = au_wbr_bu(sb, bstart - 1);
27462+
27463+ AuDbg("b%d\n", err);
27464+ return err;
27465+}
27466+
27467+/* bottom up */
27468+static int au_wbr_copyup_bu(struct dentry *dentry)
27469+{
27470+ int err;
4a4d8108 27471+ aufs_bindex_t bstart;
1facf9fc 27472+
4a4d8108
AM
27473+ bstart = au_dbstart(dentry);
27474+ err = au_wbr_bu(dentry->d_sb, bstart);
27475+ AuDbg("b%d\n", err);
27476+ if (err > bstart)
27477+ err = au_wbr_nonopq(dentry, err);
1facf9fc 27478+
27479+ AuDbg("b%d\n", err);
27480+ return err;
27481+}
27482+
27483+/* ---------------------------------------------------------------------- */
27484+
27485+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
27486+ [AuWbrCopyup_TDP] = {
27487+ .copyup = au_wbr_copyup_tdp
27488+ },
27489+ [AuWbrCopyup_BUP] = {
27490+ .copyup = au_wbr_copyup_bup
27491+ },
27492+ [AuWbrCopyup_BU] = {
27493+ .copyup = au_wbr_copyup_bu
27494+ }
27495+};
27496+
27497+struct au_wbr_create_operations au_wbr_create_ops[] = {
27498+ [AuWbrCreate_TDP] = {
27499+ .create = au_wbr_create_tdp
27500+ },
27501+ [AuWbrCreate_RR] = {
27502+ .create = au_wbr_create_rr,
27503+ .init = au_wbr_create_init_rr
27504+ },
27505+ [AuWbrCreate_MFS] = {
27506+ .create = au_wbr_create_mfs,
27507+ .init = au_wbr_create_init_mfs,
27508+ .fin = au_wbr_create_fin_mfs
27509+ },
27510+ [AuWbrCreate_MFSV] = {
27511+ .create = au_wbr_create_mfs,
27512+ .init = au_wbr_create_init_mfs,
27513+ .fin = au_wbr_create_fin_mfs
27514+ },
27515+ [AuWbrCreate_MFSRR] = {
27516+ .create = au_wbr_create_mfsrr,
27517+ .init = au_wbr_create_init_mfsrr,
27518+ .fin = au_wbr_create_fin_mfs
27519+ },
27520+ [AuWbrCreate_MFSRRV] = {
27521+ .create = au_wbr_create_mfsrr,
27522+ .init = au_wbr_create_init_mfsrr,
27523+ .fin = au_wbr_create_fin_mfs
27524+ },
27525+ [AuWbrCreate_PMFS] = {
27526+ .create = au_wbr_create_pmfs,
27527+ .init = au_wbr_create_init_mfs,
27528+ .fin = au_wbr_create_fin_mfs
27529+ },
27530+ [AuWbrCreate_PMFSV] = {
27531+ .create = au_wbr_create_pmfs,
27532+ .init = au_wbr_create_init_mfs,
27533+ .fin = au_wbr_create_fin_mfs
27534+ }
27535+};
7f207e10
AM
27536diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
27537--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
27538+++ linux/fs/aufs/whout.c 2012-02-13 21:54:56.973105100 +0100
27539@@ -0,0 +1,1049 @@
1facf9fc 27540+/*
f6c5ef8b 27541+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 27542+ *
27543+ * This program, aufs is free software; you can redistribute it and/or modify
27544+ * it under the terms of the GNU General Public License as published by
27545+ * the Free Software Foundation; either version 2 of the License, or
27546+ * (at your option) any later version.
dece6358
AM
27547+ *
27548+ * This program is distributed in the hope that it will be useful,
27549+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27550+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27551+ * GNU General Public License for more details.
27552+ *
27553+ * You should have received a copy of the GNU General Public License
27554+ * along with this program; if not, write to the Free Software
27555+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 27556+ */
27557+
27558+/*
27559+ * whiteout for logical deletion and opaque directory
27560+ */
27561+
1facf9fc 27562+#include "aufs.h"
27563+
27564+#define WH_MASK S_IRUGO
27565+
27566+/*
27567+ * If a directory contains this file, then it is opaque. We start with the
27568+ * .wh. flag so that it is blocked by lookup.
27569+ */
27570+static struct qstr diropq_name = {
27571+ .name = AUFS_WH_DIROPQ,
27572+ .len = sizeof(AUFS_WH_DIROPQ) - 1
27573+};
27574+
27575+/*
27576+ * generate whiteout name, which is NOT terminated by NULL.
27577+ * @name: original d_name.name
27578+ * @len: original d_name.len
27579+ * @wh: whiteout qstr
27580+ * returns zero when succeeds, otherwise error.
27581+ * succeeded value as wh->name should be freed by kfree().
27582+ */
27583+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
27584+{
27585+ char *p;
27586+
27587+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
27588+ return -ENAMETOOLONG;
27589+
27590+ wh->len = name->len + AUFS_WH_PFX_LEN;
27591+ p = kmalloc(wh->len, GFP_NOFS);
27592+ wh->name = p;
27593+ if (p) {
27594+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
27595+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
27596+ /* smp_mb(); */
27597+ return 0;
27598+ }
27599+ return -ENOMEM;
27600+}
27601+
27602+/* ---------------------------------------------------------------------- */
27603+
27604+/*
27605+ * test if the @wh_name exists under @h_parent.
27606+ * @try_sio specifies the necessary of super-io.
27607+ */
27608+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
27609+ struct au_branch *br, int try_sio)
27610+{
27611+ int err;
27612+ struct dentry *wh_dentry;
1facf9fc 27613+
1facf9fc 27614+ if (!try_sio)
27615+ wh_dentry = au_lkup_one(wh_name, h_parent, br, /*nd*/NULL);
27616+ else
27617+ wh_dentry = au_sio_lkup_one(wh_name, h_parent, br);
27618+ err = PTR_ERR(wh_dentry);
27619+ if (IS_ERR(wh_dentry))
27620+ goto out;
27621+
27622+ err = 0;
27623+ if (!wh_dentry->d_inode)
27624+ goto out_wh; /* success */
27625+
27626+ err = 1;
27627+ if (S_ISREG(wh_dentry->d_inode->i_mode))
27628+ goto out_wh; /* success */
27629+
27630+ err = -EIO;
27631+ AuIOErr("%.*s Invalid whiteout entry type 0%o.\n",
27632+ AuDLNPair(wh_dentry), wh_dentry->d_inode->i_mode);
27633+
4f0767ce 27634+out_wh:
1facf9fc 27635+ dput(wh_dentry);
4f0767ce 27636+out:
1facf9fc 27637+ return err;
27638+}
27639+
27640+/*
27641+ * test if the @h_dentry sets opaque or not.
27642+ */
27643+int au_diropq_test(struct dentry *h_dentry, struct au_branch *br)
27644+{
27645+ int err;
27646+ struct inode *h_dir;
27647+
27648+ h_dir = h_dentry->d_inode;
27649+ err = au_wh_test(h_dentry, &diropq_name, br,
27650+ au_test_h_perm_sio(h_dir, MAY_EXEC));
27651+ return err;
27652+}
27653+
27654+/*
27655+ * returns a negative dentry whose name is unique and temporary.
27656+ */
27657+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
27658+ struct qstr *prefix)
27659+{
1facf9fc 27660+ struct dentry *dentry;
27661+ int i;
027c5e7a 27662+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
4a4d8108 27663+ *name, *p;
027c5e7a 27664+ /* strict atomic_t is unnecessary here */
1facf9fc 27665+ static unsigned short cnt;
27666+ struct qstr qs;
27667+
4a4d8108
AM
27668+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
27669+
1facf9fc 27670+ name = defname;
027c5e7a
AM
27671+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
27672+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
1facf9fc 27673+ dentry = ERR_PTR(-ENAMETOOLONG);
4a4d8108 27674+ if (unlikely(qs.len > NAME_MAX))
1facf9fc 27675+ goto out;
27676+ dentry = ERR_PTR(-ENOMEM);
27677+ name = kmalloc(qs.len + 1, GFP_NOFS);
27678+ if (unlikely(!name))
27679+ goto out;
27680+ }
27681+
27682+ /* doubly whiteout-ed */
27683+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
27684+ p = name + AUFS_WH_PFX_LEN * 2;
27685+ memcpy(p, prefix->name, prefix->len);
27686+ p += prefix->len;
27687+ *p++ = '.';
4a4d8108 27688+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
1facf9fc 27689+
27690+ qs.name = name;
27691+ for (i = 0; i < 3; i++) {
b752ccd1 27692+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
1facf9fc 27693+ dentry = au_sio_lkup_one(&qs, h_parent, br);
27694+ if (IS_ERR(dentry) || !dentry->d_inode)
27695+ goto out_name;
27696+ dput(dentry);
27697+ }
4a4d8108 27698+ /* pr_warning("could not get random name\n"); */
1facf9fc 27699+ dentry = ERR_PTR(-EEXIST);
27700+ AuDbg("%.*s\n", AuLNPair(&qs));
27701+ BUG();
27702+
4f0767ce 27703+out_name:
1facf9fc 27704+ if (name != defname)
27705+ kfree(name);
4f0767ce 27706+out:
4a4d8108 27707+ AuTraceErrPtr(dentry);
1facf9fc 27708+ return dentry;
1facf9fc 27709+}
27710+
27711+/*
27712+ * rename the @h_dentry on @br to the whiteouted temporary name.
27713+ */
27714+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
27715+{
27716+ int err;
27717+ struct path h_path = {
27718+ .mnt = br->br_mnt
27719+ };
27720+ struct inode *h_dir;
27721+ struct dentry *h_parent;
27722+
27723+ h_parent = h_dentry->d_parent; /* dir inode is locked */
27724+ h_dir = h_parent->d_inode;
27725+ IMustLock(h_dir);
27726+
27727+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
27728+ err = PTR_ERR(h_path.dentry);
27729+ if (IS_ERR(h_path.dentry))
27730+ goto out;
27731+
27732+ /* under the same dir, no need to lock_rename() */
27733+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path);
27734+ AuTraceErr(err);
27735+ dput(h_path.dentry);
27736+
4f0767ce 27737+out:
4a4d8108 27738+ AuTraceErr(err);
1facf9fc 27739+ return err;
27740+}
27741+
27742+/* ---------------------------------------------------------------------- */
27743+/*
27744+ * functions for removing a whiteout
27745+ */
27746+
27747+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
27748+{
27749+ int force;
27750+
27751+ /*
27752+ * forces superio when the dir has a sticky bit.
27753+ * this may be a violation of unix fs semantics.
27754+ */
27755+ force = (h_dir->i_mode & S_ISVTX)
27756+ && h_path->dentry->d_inode->i_uid != current_fsuid();
27757+ return vfsub_unlink(h_dir, h_path, force);
27758+}
27759+
27760+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
27761+ struct dentry *dentry)
27762+{
27763+ int err;
27764+
27765+ err = do_unlink_wh(h_dir, h_path);
27766+ if (!err && dentry)
27767+ au_set_dbwh(dentry, -1);
27768+
27769+ return err;
27770+}
27771+
27772+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
27773+ struct au_branch *br)
27774+{
27775+ int err;
27776+ struct path h_path = {
27777+ .mnt = br->br_mnt
27778+ };
27779+
27780+ err = 0;
27781+ h_path.dentry = au_lkup_one(wh, h_parent, br, /*nd*/NULL);
27782+ if (IS_ERR(h_path.dentry))
27783+ err = PTR_ERR(h_path.dentry);
27784+ else {
27785+ if (h_path.dentry->d_inode
27786+ && S_ISREG(h_path.dentry->d_inode->i_mode))
27787+ err = do_unlink_wh(h_parent->d_inode, &h_path);
27788+ dput(h_path.dentry);
27789+ }
27790+
27791+ return err;
27792+}
27793+
27794+/* ---------------------------------------------------------------------- */
27795+/*
27796+ * initialize/clean whiteout for a branch
27797+ */
27798+
27799+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
27800+ const int isdir)
27801+{
27802+ int err;
27803+
27804+ if (!whpath->dentry->d_inode)
27805+ return;
27806+
27807+ err = mnt_want_write(whpath->mnt);
27808+ if (!err) {
27809+ if (isdir)
27810+ err = vfsub_rmdir(h_dir, whpath);
27811+ else
27812+ err = vfsub_unlink(h_dir, whpath, /*force*/0);
27813+ mnt_drop_write(whpath->mnt);
27814+ }
27815+ if (unlikely(err))
4a4d8108
AM
27816+ pr_warning("failed removing %.*s (%d), ignored.\n",
27817+ AuDLNPair(whpath->dentry), err);
1facf9fc 27818+}
27819+
27820+static int test_linkable(struct dentry *h_root)
27821+{
27822+ struct inode *h_dir = h_root->d_inode;
27823+
27824+ if (h_dir->i_op->link)
27825+ return 0;
27826+
4a4d8108
AM
27827+ pr_err("%.*s (%s) doesn't support link(2), use noplink and rw+nolwh\n",
27828+ AuDLNPair(h_root), au_sbtype(h_root->d_sb));
1facf9fc 27829+ return -ENOSYS;
27830+}
27831+
27832+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
27833+static int au_whdir(struct inode *h_dir, struct path *path)
27834+{
27835+ int err;
27836+
27837+ err = -EEXIST;
27838+ if (!path->dentry->d_inode) {
27839+ int mode = S_IRWXU;
27840+
27841+ if (au_test_nfs(path->dentry->d_sb))
27842+ mode |= S_IXUGO;
27843+ err = mnt_want_write(path->mnt);
27844+ if (!err) {
27845+ err = vfsub_mkdir(h_dir, path, mode);
27846+ mnt_drop_write(path->mnt);
27847+ }
27848+ } else if (S_ISDIR(path->dentry->d_inode->i_mode))
27849+ err = 0;
27850+ else
4a4d8108 27851+ pr_err("unknown %.*s exists\n", AuDLNPair(path->dentry));
1facf9fc 27852+
27853+ return err;
27854+}
27855+
27856+struct au_wh_base {
27857+ const struct qstr *name;
27858+ struct dentry *dentry;
27859+};
27860+
27861+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
27862+ struct path *h_path)
27863+{
27864+ h_path->dentry = base[AuBrWh_BASE].dentry;
27865+ au_wh_clean(h_dir, h_path, /*isdir*/0);
27866+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27867+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27868+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27869+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27870+}
27871+
27872+/*
27873+ * returns tri-state,
27874+ * minus: error, caller should print the mesage
27875+ * zero: succuess
27876+ * plus: error, caller should NOT print the mesage
27877+ */
27878+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
27879+ int do_plink, struct au_wh_base base[],
27880+ struct path *h_path)
27881+{
27882+ int err;
27883+ struct inode *h_dir;
27884+
27885+ h_dir = h_root->d_inode;
27886+ h_path->dentry = base[AuBrWh_BASE].dentry;
27887+ au_wh_clean(h_dir, h_path, /*isdir*/0);
27888+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27889+ if (do_plink) {
27890+ err = test_linkable(h_root);
27891+ if (unlikely(err)) {
27892+ err = 1;
27893+ goto out;
27894+ }
27895+
27896+ err = au_whdir(h_dir, h_path);
27897+ if (unlikely(err))
27898+ goto out;
27899+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
27900+ } else
27901+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27902+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27903+ err = au_whdir(h_dir, h_path);
27904+ if (unlikely(err))
27905+ goto out;
27906+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
27907+
4f0767ce 27908+out:
1facf9fc 27909+ return err;
27910+}
27911+
27912+/*
27913+ * for the moment, aufs supports the branch filesystem which does not support
27914+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
27915+ * copyup failed. finally, such filesystem will not be used as the writable
27916+ * branch.
27917+ *
27918+ * returns tri-state, see above.
27919+ */
27920+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
27921+ int do_plink, struct au_wh_base base[],
27922+ struct path *h_path)
27923+{
27924+ int err;
27925+ struct inode *h_dir;
27926+
1308ab2a 27927+ WbrWhMustWriteLock(wbr);
27928+
1facf9fc 27929+ err = test_linkable(h_root);
27930+ if (unlikely(err)) {
27931+ err = 1;
27932+ goto out;
27933+ }
27934+
27935+ /*
27936+ * todo: should this create be done in /sbin/mount.aufs helper?
27937+ */
27938+ err = -EEXIST;
27939+ h_dir = h_root->d_inode;
27940+ if (!base[AuBrWh_BASE].dentry->d_inode) {
27941+ err = mnt_want_write(h_path->mnt);
27942+ if (!err) {
27943+ h_path->dentry = base[AuBrWh_BASE].dentry;
27944+ err = vfsub_create(h_dir, h_path, WH_MASK);
27945+ mnt_drop_write(h_path->mnt);
27946+ }
27947+ } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
27948+ err = 0;
27949+ else
4a4d8108
AM
27950+ pr_err("unknown %.*s/%.*s exists\n",
27951+ AuDLNPair(h_root), AuDLNPair(base[AuBrWh_BASE].dentry));
1facf9fc 27952+ if (unlikely(err))
27953+ goto out;
27954+
27955+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27956+ if (do_plink) {
27957+ err = au_whdir(h_dir, h_path);
27958+ if (unlikely(err))
27959+ goto out;
27960+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
27961+ } else
27962+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27963+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
27964+
27965+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27966+ err = au_whdir(h_dir, h_path);
27967+ if (unlikely(err))
27968+ goto out;
27969+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
27970+
4f0767ce 27971+out:
1facf9fc 27972+ return err;
27973+}
27974+
27975+/*
27976+ * initialize the whiteout base file/dir for @br.
27977+ */
27978+int au_wh_init(struct dentry *h_root, struct au_branch *br,
27979+ struct super_block *sb)
27980+{
27981+ int err, i;
27982+ const unsigned char do_plink
27983+ = !!au_opt_test(au_mntflags(sb), PLINK);
27984+ struct path path = {
27985+ .mnt = br->br_mnt
27986+ };
27987+ struct inode *h_dir;
27988+ struct au_wbr *wbr = br->br_wbr;
27989+ static const struct qstr base_name[] = {
27990+ [AuBrWh_BASE] = {
27991+ .name = AUFS_BASE_NAME,
27992+ .len = sizeof(AUFS_BASE_NAME) - 1
27993+ },
27994+ [AuBrWh_PLINK] = {
27995+ .name = AUFS_PLINKDIR_NAME,
27996+ .len = sizeof(AUFS_PLINKDIR_NAME) - 1
27997+ },
27998+ [AuBrWh_ORPH] = {
27999+ .name = AUFS_ORPHDIR_NAME,
28000+ .len = sizeof(AUFS_ORPHDIR_NAME) - 1
28001+ }
28002+ };
28003+ struct au_wh_base base[] = {
28004+ [AuBrWh_BASE] = {
28005+ .name = base_name + AuBrWh_BASE,
28006+ .dentry = NULL
28007+ },
28008+ [AuBrWh_PLINK] = {
28009+ .name = base_name + AuBrWh_PLINK,
28010+ .dentry = NULL
28011+ },
28012+ [AuBrWh_ORPH] = {
28013+ .name = base_name + AuBrWh_ORPH,
28014+ .dentry = NULL
28015+ }
28016+ };
28017+
1308ab2a 28018+ if (wbr)
28019+ WbrWhMustWriteLock(wbr);
1facf9fc 28020+
1facf9fc 28021+ for (i = 0; i < AuBrWh_Last; i++) {
28022+ /* doubly whiteouted */
28023+ struct dentry *d;
28024+
28025+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
28026+ err = PTR_ERR(d);
28027+ if (IS_ERR(d))
28028+ goto out;
28029+
28030+ base[i].dentry = d;
28031+ AuDebugOn(wbr
28032+ && wbr->wbr_wh[i]
28033+ && wbr->wbr_wh[i] != base[i].dentry);
28034+ }
28035+
28036+ if (wbr)
28037+ for (i = 0; i < AuBrWh_Last; i++) {
28038+ dput(wbr->wbr_wh[i]);
28039+ wbr->wbr_wh[i] = NULL;
28040+ }
28041+
28042+ err = 0;
1e00d052 28043+ if (!au_br_writable(br->br_perm)) {
4a4d8108 28044+ h_dir = h_root->d_inode;
1facf9fc 28045+ au_wh_init_ro(h_dir, base, &path);
1e00d052 28046+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 28047+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
28048+ if (err > 0)
28049+ goto out;
28050+ else if (err)
28051+ goto out_err;
1e00d052 28052+ } else {
1facf9fc 28053+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
28054+ if (err > 0)
28055+ goto out;
28056+ else if (err)
28057+ goto out_err;
1facf9fc 28058+ }
28059+ goto out; /* success */
28060+
4f0767ce 28061+out_err:
4a4d8108
AM
28062+ pr_err("an error(%d) on the writable branch %.*s(%s)\n",
28063+ err, AuDLNPair(h_root), au_sbtype(h_root->d_sb));
4f0767ce 28064+out:
1facf9fc 28065+ for (i = 0; i < AuBrWh_Last; i++)
28066+ dput(base[i].dentry);
28067+ return err;
28068+}
28069+
28070+/* ---------------------------------------------------------------------- */
28071+/*
28072+ * whiteouts are all hard-linked usually.
28073+ * when its link count reaches a ceiling, we create a new whiteout base
28074+ * asynchronously.
28075+ */
28076+
28077+struct reinit_br_wh {
28078+ struct super_block *sb;
28079+ struct au_branch *br;
28080+};
28081+
28082+static void reinit_br_wh(void *arg)
28083+{
28084+ int err;
28085+ aufs_bindex_t bindex;
28086+ struct path h_path;
28087+ struct reinit_br_wh *a = arg;
28088+ struct au_wbr *wbr;
28089+ struct inode *dir;
28090+ struct dentry *h_root;
28091+ struct au_hinode *hdir;
28092+
28093+ err = 0;
28094+ wbr = a->br->br_wbr;
28095+ /* big aufs lock */
28096+ si_noflush_write_lock(a->sb);
28097+ if (!au_br_writable(a->br->br_perm))
28098+ goto out;
28099+ bindex = au_br_index(a->sb, a->br->br_id);
28100+ if (unlikely(bindex < 0))
28101+ goto out;
28102+
1308ab2a 28103+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
1facf9fc 28104+ dir = a->sb->s_root->d_inode;
1facf9fc 28105+ hdir = au_hi(dir, bindex);
28106+ h_root = au_h_dptr(a->sb->s_root, bindex);
28107+
4a4d8108 28108+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 28109+ wbr_wh_write_lock(wbr);
28110+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
28111+ h_root, a->br);
28112+ if (!err) {
28113+ err = mnt_want_write(a->br->br_mnt);
28114+ if (!err) {
28115+ h_path.dentry = wbr->wbr_whbase;
28116+ h_path.mnt = a->br->br_mnt;
28117+ err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0);
28118+ mnt_drop_write(a->br->br_mnt);
28119+ }
28120+ } else {
4a4d8108
AM
28121+ pr_warning("%.*s is moved, ignored\n",
28122+ AuDLNPair(wbr->wbr_whbase));
1facf9fc 28123+ err = 0;
28124+ }
28125+ dput(wbr->wbr_whbase);
28126+ wbr->wbr_whbase = NULL;
28127+ if (!err)
28128+ err = au_wh_init(h_root, a->br, a->sb);
28129+ wbr_wh_write_unlock(wbr);
4a4d8108 28130+ au_hn_imtx_unlock(hdir);
1308ab2a 28131+ di_read_unlock(a->sb->s_root, AuLock_IR);
1facf9fc 28132+
4f0767ce 28133+out:
1facf9fc 28134+ if (wbr)
28135+ atomic_dec(&wbr->wbr_wh_running);
28136+ atomic_dec(&a->br->br_count);
1facf9fc 28137+ si_write_unlock(a->sb);
027c5e7a 28138+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
1facf9fc 28139+ kfree(arg);
28140+ if (unlikely(err))
28141+ AuIOErr("err %d\n", err);
28142+}
28143+
28144+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
28145+{
28146+ int do_dec, wkq_err;
28147+ struct reinit_br_wh *arg;
28148+
28149+ do_dec = 1;
28150+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
28151+ goto out;
28152+
28153+ /* ignore ENOMEM */
28154+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
28155+ if (arg) {
28156+ /*
28157+ * dec(wh_running), kfree(arg) and dec(br_count)
28158+ * in reinit function
28159+ */
28160+ arg->sb = sb;
28161+ arg->br = br;
28162+ atomic_inc(&br->br_count);
53392da6 28163+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
1facf9fc 28164+ if (unlikely(wkq_err)) {
28165+ atomic_dec(&br->br_wbr->wbr_wh_running);
28166+ atomic_dec(&br->br_count);
28167+ kfree(arg);
28168+ }
28169+ do_dec = 0;
28170+ }
28171+
4f0767ce 28172+out:
1facf9fc 28173+ if (do_dec)
28174+ atomic_dec(&br->br_wbr->wbr_wh_running);
28175+}
28176+
28177+/* ---------------------------------------------------------------------- */
28178+
28179+/*
28180+ * create the whiteout @wh.
28181+ */
28182+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
28183+ struct dentry *wh)
28184+{
28185+ int err;
28186+ struct path h_path = {
28187+ .dentry = wh
28188+ };
28189+ struct au_branch *br;
28190+ struct au_wbr *wbr;
28191+ struct dentry *h_parent;
28192+ struct inode *h_dir;
28193+
28194+ h_parent = wh->d_parent; /* dir inode is locked */
28195+ h_dir = h_parent->d_inode;
28196+ IMustLock(h_dir);
28197+
28198+ br = au_sbr(sb, bindex);
28199+ h_path.mnt = br->br_mnt;
28200+ wbr = br->br_wbr;
28201+ wbr_wh_read_lock(wbr);
28202+ if (wbr->wbr_whbase) {
28203+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path);
28204+ if (!err || err != -EMLINK)
28205+ goto out;
28206+
28207+ /* link count full. re-initialize br_whbase. */
28208+ kick_reinit_br_wh(sb, br);
28209+ }
28210+
28211+ /* return this error in this context */
28212+ err = vfsub_create(h_dir, &h_path, WH_MASK);
28213+
4f0767ce 28214+out:
1facf9fc 28215+ wbr_wh_read_unlock(wbr);
28216+ return err;
28217+}
28218+
28219+/* ---------------------------------------------------------------------- */
28220+
28221+/*
28222+ * create or remove the diropq.
28223+ */
28224+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
28225+ unsigned int flags)
28226+{
28227+ struct dentry *opq_dentry, *h_dentry;
28228+ struct super_block *sb;
28229+ struct au_branch *br;
28230+ int err;
28231+
28232+ sb = dentry->d_sb;
28233+ br = au_sbr(sb, bindex);
28234+ h_dentry = au_h_dptr(dentry, bindex);
28235+ opq_dentry = au_lkup_one(&diropq_name, h_dentry, br, /*nd*/NULL);
28236+ if (IS_ERR(opq_dentry))
28237+ goto out;
28238+
28239+ if (au_ftest_diropq(flags, CREATE)) {
28240+ err = link_or_create_wh(sb, bindex, opq_dentry);
28241+ if (!err) {
28242+ au_set_dbdiropq(dentry, bindex);
28243+ goto out; /* success */
28244+ }
28245+ } else {
28246+ struct path tmp = {
28247+ .dentry = opq_dentry,
28248+ .mnt = br->br_mnt
28249+ };
28250+ err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
28251+ if (!err)
28252+ au_set_dbdiropq(dentry, -1);
28253+ }
28254+ dput(opq_dentry);
28255+ opq_dentry = ERR_PTR(err);
28256+
4f0767ce 28257+out:
1facf9fc 28258+ return opq_dentry;
28259+}
28260+
28261+struct do_diropq_args {
28262+ struct dentry **errp;
28263+ struct dentry *dentry;
28264+ aufs_bindex_t bindex;
28265+ unsigned int flags;
28266+};
28267+
28268+static void call_do_diropq(void *args)
28269+{
28270+ struct do_diropq_args *a = args;
28271+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
28272+}
28273+
28274+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
28275+ unsigned int flags)
28276+{
28277+ struct dentry *diropq, *h_dentry;
28278+
28279+ h_dentry = au_h_dptr(dentry, bindex);
28280+ if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
28281+ diropq = do_diropq(dentry, bindex, flags);
28282+ else {
28283+ int wkq_err;
28284+ struct do_diropq_args args = {
28285+ .errp = &diropq,
28286+ .dentry = dentry,
28287+ .bindex = bindex,
28288+ .flags = flags
28289+ };
28290+
28291+ wkq_err = au_wkq_wait(call_do_diropq, &args);
28292+ if (unlikely(wkq_err))
28293+ diropq = ERR_PTR(wkq_err);
28294+ }
28295+
28296+ return diropq;
28297+}
28298+
28299+/* ---------------------------------------------------------------------- */
28300+
28301+/*
28302+ * lookup whiteout dentry.
28303+ * @h_parent: lower parent dentry which must exist and be locked
28304+ * @base_name: name of dentry which will be whiteouted
28305+ * returns dentry for whiteout.
28306+ */
28307+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
28308+ struct au_branch *br)
28309+{
28310+ int err;
28311+ struct qstr wh_name;
28312+ struct dentry *wh_dentry;
28313+
28314+ err = au_wh_name_alloc(&wh_name, base_name);
28315+ wh_dentry = ERR_PTR(err);
28316+ if (!err) {
28317+ wh_dentry = au_lkup_one(&wh_name, h_parent, br, /*nd*/NULL);
28318+ kfree(wh_name.name);
28319+ }
28320+ return wh_dentry;
28321+}
28322+
28323+/*
28324+ * link/create a whiteout for @dentry on @bindex.
28325+ */
28326+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
28327+ struct dentry *h_parent)
28328+{
28329+ struct dentry *wh_dentry;
28330+ struct super_block *sb;
28331+ int err;
28332+
28333+ sb = dentry->d_sb;
28334+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
28335+ if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
28336+ err = link_or_create_wh(sb, bindex, wh_dentry);
28337+ if (!err)
28338+ au_set_dbwh(dentry, bindex);
28339+ else {
28340+ dput(wh_dentry);
28341+ wh_dentry = ERR_PTR(err);
28342+ }
28343+ }
28344+
28345+ return wh_dentry;
28346+}
28347+
28348+/* ---------------------------------------------------------------------- */
28349+
28350+/* Delete all whiteouts in this directory on branch bindex. */
28351+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
28352+ aufs_bindex_t bindex, struct au_branch *br)
28353+{
28354+ int err;
28355+ unsigned long ul, n;
28356+ struct qstr wh_name;
28357+ char *p;
28358+ struct hlist_head *head;
28359+ struct au_vdir_wh *tpos;
28360+ struct hlist_node *pos;
28361+ struct au_vdir_destr *str;
28362+
28363+ err = -ENOMEM;
4a4d8108 28364+ p = __getname_gfp(GFP_NOFS);
1facf9fc 28365+ wh_name.name = p;
28366+ if (unlikely(!wh_name.name))
28367+ goto out;
28368+
28369+ err = 0;
28370+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
28371+ p += AUFS_WH_PFX_LEN;
28372+ n = whlist->nh_num;
28373+ head = whlist->nh_head;
28374+ for (ul = 0; !err && ul < n; ul++, head++) {
28375+ hlist_for_each_entry(tpos, pos, head, wh_hash) {
28376+ if (tpos->wh_bindex != bindex)
28377+ continue;
28378+
28379+ str = &tpos->wh_str;
28380+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
28381+ memcpy(p, str->name, str->len);
28382+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
28383+ err = unlink_wh_name(h_dentry, &wh_name, br);
28384+ if (!err)
28385+ continue;
28386+ break;
28387+ }
28388+ AuIOErr("whiteout name too long %.*s\n",
28389+ str->len, str->name);
28390+ err = -EIO;
28391+ break;
28392+ }
28393+ }
28394+ __putname(wh_name.name);
28395+
4f0767ce 28396+out:
1facf9fc 28397+ return err;
28398+}
28399+
28400+struct del_wh_children_args {
28401+ int *errp;
28402+ struct dentry *h_dentry;
1308ab2a 28403+ struct au_nhash *whlist;
1facf9fc 28404+ aufs_bindex_t bindex;
28405+ struct au_branch *br;
28406+};
28407+
28408+static void call_del_wh_children(void *args)
28409+{
28410+ struct del_wh_children_args *a = args;
1308ab2a 28411+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 28412+}
28413+
28414+/* ---------------------------------------------------------------------- */
28415+
28416+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
28417+{
28418+ struct au_whtmp_rmdir *whtmp;
dece6358 28419+ int err;
1308ab2a 28420+ unsigned int rdhash;
dece6358
AM
28421+
28422+ SiMustAnyLock(sb);
1facf9fc 28423+
28424+ whtmp = kmalloc(sizeof(*whtmp), gfp);
dece6358
AM
28425+ if (unlikely(!whtmp)) {
28426+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 28427+ goto out;
dece6358 28428+ }
1facf9fc 28429+
28430+ whtmp->dir = NULL;
027c5e7a 28431+ whtmp->br = NULL;
1facf9fc 28432+ whtmp->wh_dentry = NULL;
1308ab2a 28433+ /* no estimation for dir size */
28434+ rdhash = au_sbi(sb)->si_rdhash;
28435+ if (!rdhash)
28436+ rdhash = AUFS_RDHASH_DEF;
28437+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
28438+ if (unlikely(err)) {
28439+ kfree(whtmp);
28440+ whtmp = ERR_PTR(err);
28441+ }
dece6358 28442+
4f0767ce 28443+out:
dece6358 28444+ return whtmp;
1facf9fc 28445+}
28446+
28447+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
28448+{
027c5e7a
AM
28449+ if (whtmp->br)
28450+ atomic_dec(&whtmp->br->br_count);
1facf9fc 28451+ dput(whtmp->wh_dentry);
28452+ iput(whtmp->dir);
dece6358 28453+ au_nhash_wh_free(&whtmp->whlist);
1facf9fc 28454+ kfree(whtmp);
28455+}
28456+
28457+/*
28458+ * rmdir the whiteouted temporary named dir @h_dentry.
28459+ * @whlist: whiteouted children.
28460+ */
28461+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
28462+ struct dentry *wh_dentry, struct au_nhash *whlist)
28463+{
28464+ int err;
28465+ struct path h_tmp;
28466+ struct inode *wh_inode, *h_dir;
28467+ struct au_branch *br;
28468+
28469+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
28470+ IMustLock(h_dir);
28471+
28472+ br = au_sbr(dir->i_sb, bindex);
28473+ wh_inode = wh_dentry->d_inode;
28474+ mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
28475+
28476+ /*
28477+ * someone else might change some whiteouts while we were sleeping.
28478+ * it means this whlist may have an obsoleted entry.
28479+ */
28480+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
28481+ err = del_wh_children(wh_dentry, whlist, bindex, br);
28482+ else {
28483+ int wkq_err;
28484+ struct del_wh_children_args args = {
28485+ .errp = &err,
28486+ .h_dentry = wh_dentry,
1308ab2a 28487+ .whlist = whlist,
1facf9fc 28488+ .bindex = bindex,
28489+ .br = br
28490+ };
28491+
28492+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
28493+ if (unlikely(wkq_err))
28494+ err = wkq_err;
28495+ }
28496+ mutex_unlock(&wh_inode->i_mutex);
28497+
28498+ if (!err) {
28499+ h_tmp.dentry = wh_dentry;
28500+ h_tmp.mnt = br->br_mnt;
28501+ err = vfsub_rmdir(h_dir, &h_tmp);
1facf9fc 28502+ }
28503+
28504+ if (!err) {
28505+ if (au_ibstart(dir) == bindex) {
7f207e10 28506+ /* todo: dir->i_mutex is necessary */
1facf9fc 28507+ au_cpup_attr_timesizes(dir);
7f207e10 28508+ vfsub_drop_nlink(dir);
1facf9fc 28509+ }
28510+ return 0; /* success */
28511+ }
28512+
4a4d8108
AM
28513+ pr_warning("failed removing %.*s(%d), ignored\n",
28514+ AuDLNPair(wh_dentry), err);
1facf9fc 28515+ return err;
28516+}
28517+
28518+static void call_rmdir_whtmp(void *args)
28519+{
28520+ int err;
e49829fe 28521+ aufs_bindex_t bindex;
1facf9fc 28522+ struct au_whtmp_rmdir *a = args;
28523+ struct super_block *sb;
28524+ struct dentry *h_parent;
28525+ struct inode *h_dir;
1facf9fc 28526+ struct au_hinode *hdir;
28527+
28528+ /* rmdir by nfsd may cause deadlock with this i_mutex */
28529+ /* mutex_lock(&a->dir->i_mutex); */
e49829fe 28530+ err = -EROFS;
1facf9fc 28531+ sb = a->dir->i_sb;
e49829fe
JR
28532+ si_read_lock(sb, !AuLock_FLUSH);
28533+ if (!au_br_writable(a->br->br_perm))
28534+ goto out;
28535+ bindex = au_br_index(sb, a->br->br_id);
28536+ if (unlikely(bindex < 0))
1facf9fc 28537+ goto out;
28538+
28539+ err = -EIO;
1facf9fc 28540+ ii_write_lock_parent(a->dir);
28541+ h_parent = dget_parent(a->wh_dentry);
28542+ h_dir = h_parent->d_inode;
e49829fe 28543+ hdir = au_hi(a->dir, bindex);
4a4d8108 28544+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
e49829fe
JR
28545+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
28546+ a->br);
1facf9fc 28547+ if (!err) {
e49829fe 28548+ err = mnt_want_write(a->br->br_mnt);
1facf9fc 28549+ if (!err) {
e49829fe 28550+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry,
dece6358 28551+ &a->whlist);
e49829fe 28552+ mnt_drop_write(a->br->br_mnt);
1facf9fc 28553+ }
28554+ }
4a4d8108 28555+ au_hn_imtx_unlock(hdir);
1facf9fc 28556+ dput(h_parent);
28557+ ii_write_unlock(a->dir);
28558+
4f0767ce 28559+out:
1facf9fc 28560+ /* mutex_unlock(&a->dir->i_mutex); */
1facf9fc 28561+ au_whtmp_rmdir_free(a);
027c5e7a
AM
28562+ si_read_unlock(sb);
28563+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 28564+ if (unlikely(err))
28565+ AuIOErr("err %d\n", err);
28566+}
28567+
28568+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
28569+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
28570+{
28571+ int wkq_err;
e49829fe 28572+ struct super_block *sb;
1facf9fc 28573+
28574+ IMustLock(dir);
28575+
28576+ /* all post-process will be done in do_rmdir_whtmp(). */
e49829fe 28577+ sb = dir->i_sb;
1facf9fc 28578+ args->dir = au_igrab(dir);
e49829fe
JR
28579+ args->br = au_sbr(sb, bindex);
28580+ atomic_inc(&args->br->br_count);
1facf9fc 28581+ args->wh_dentry = dget(wh_dentry);
53392da6 28582+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1facf9fc 28583+ if (unlikely(wkq_err)) {
4a4d8108
AM
28584+ pr_warning("rmdir error %.*s (%d), ignored\n",
28585+ AuDLNPair(wh_dentry), wkq_err);
1facf9fc 28586+ au_whtmp_rmdir_free(args);
28587+ }
28588+}
7f207e10
AM
28589diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
28590--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
28591+++ linux/fs/aufs/whout.h 2012-02-13 21:54:56.973105100 +0100
28592@@ -0,0 +1,88 @@
1facf9fc 28593+/*
f6c5ef8b 28594+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 28595+ *
28596+ * This program, aufs is free software; you can redistribute it and/or modify
28597+ * it under the terms of the GNU General Public License as published by
28598+ * the Free Software Foundation; either version 2 of the License, or
28599+ * (at your option) any later version.
dece6358
AM
28600+ *
28601+ * This program is distributed in the hope that it will be useful,
28602+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28603+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28604+ * GNU General Public License for more details.
28605+ *
28606+ * You should have received a copy of the GNU General Public License
28607+ * along with this program; if not, write to the Free Software
28608+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28609+ */
28610+
28611+/*
28612+ * whiteout for logical deletion and opaque directory
28613+ */
28614+
28615+#ifndef __AUFS_WHOUT_H__
28616+#define __AUFS_WHOUT_H__
28617+
28618+#ifdef __KERNEL__
28619+
1facf9fc 28620+#include "dir.h"
28621+
28622+/* whout.c */
28623+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
28624+struct au_branch;
28625+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
28626+ struct au_branch *br, int try_sio);
28627+int au_diropq_test(struct dentry *h_dentry, struct au_branch *br);
28628+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
28629+ struct qstr *prefix);
28630+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
28631+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
28632+ struct dentry *dentry);
28633+int au_wh_init(struct dentry *h_parent, struct au_branch *br,
28634+ struct super_block *sb);
28635+
28636+/* diropq flags */
28637+#define AuDiropq_CREATE 1
28638+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
7f207e10
AM
28639+#define au_fset_diropq(flags, name) \
28640+ do { (flags) |= AuDiropq_##name; } while (0)
28641+#define au_fclr_diropq(flags, name) \
28642+ do { (flags) &= ~AuDiropq_##name; } while (0)
1facf9fc 28643+
28644+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
28645+ unsigned int flags);
28646+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
28647+ struct au_branch *br);
28648+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
28649+ struct dentry *h_parent);
28650+
28651+/* real rmdir for the whiteout-ed dir */
28652+struct au_whtmp_rmdir {
28653+ struct inode *dir;
e49829fe 28654+ struct au_branch *br;
1facf9fc 28655+ struct dentry *wh_dentry;
dece6358 28656+ struct au_nhash whlist;
1facf9fc 28657+};
28658+
28659+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
28660+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
28661+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
28662+ struct dentry *wh_dentry, struct au_nhash *whlist);
28663+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
28664+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
28665+
28666+/* ---------------------------------------------------------------------- */
28667+
28668+static inline struct dentry *au_diropq_create(struct dentry *dentry,
28669+ aufs_bindex_t bindex)
28670+{
28671+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
28672+}
28673+
28674+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
28675+{
28676+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
28677+}
28678+
28679+#endif /* __KERNEL__ */
28680+#endif /* __AUFS_WHOUT_H__ */
7f207e10
AM
28681diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
28682--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b 28683+++ linux/fs/aufs/wkq.c 2012-02-13 21:54:56.973105100 +0100
9dbd164d 28684@@ -0,0 +1,214 @@
1facf9fc 28685+/*
f6c5ef8b 28686+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 28687+ *
28688+ * This program, aufs is free software; you can redistribute it and/or modify
28689+ * it under the terms of the GNU General Public License as published by
28690+ * the Free Software Foundation; either version 2 of the License, or
28691+ * (at your option) any later version.
dece6358
AM
28692+ *
28693+ * This program is distributed in the hope that it will be useful,
28694+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28695+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28696+ * GNU General Public License for more details.
28697+ *
28698+ * You should have received a copy of the GNU General Public License
28699+ * along with this program; if not, write to the Free Software
28700+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28701+ */
28702+
28703+/*
28704+ * workqueue for asynchronous/super-io operations
28705+ * todo: try new dredential scheme
28706+ */
28707+
dece6358 28708+#include <linux/module.h>
1facf9fc 28709+#include "aufs.h"
28710+
9dbd164d 28711+/* internal workqueue named AUFS_WKQ_NAME */
b752ccd1 28712+
9dbd164d 28713+static struct workqueue_struct *au_wkq;
1facf9fc 28714+
28715+struct au_wkinfo {
28716+ struct work_struct wk;
7f207e10 28717+ struct kobject *kobj;
1facf9fc 28718+
28719+ unsigned int flags; /* see wkq.h */
28720+
28721+ au_wkq_func_t func;
28722+ void *args;
28723+
1facf9fc 28724+ struct completion *comp;
28725+};
28726+
28727+/* ---------------------------------------------------------------------- */
28728+
1facf9fc 28729+static void wkq_func(struct work_struct *wk)
28730+{
28731+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
28732+
7f207e10
AM
28733+ AuDebugOn(current_fsuid());
28734+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
28735+
1facf9fc 28736+ wkinfo->func(wkinfo->args);
1facf9fc 28737+ if (au_ftest_wkq(wkinfo->flags, WAIT))
28738+ complete(wkinfo->comp);
28739+ else {
7f207e10 28740+ kobject_put(wkinfo->kobj);
9dbd164d 28741+ module_put(THIS_MODULE); /* todo: ?? */
1facf9fc 28742+ kfree(wkinfo);
28743+ }
28744+}
28745+
28746+/*
28747+ * Since struct completion is large, try allocating it dynamically.
28748+ */
28749+#if defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS)
28750+#define AuWkqCompDeclare(name) struct completion *comp = NULL
28751+
28752+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
28753+{
28754+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
28755+ if (*comp) {
28756+ init_completion(*comp);
28757+ wkinfo->comp = *comp;
28758+ return 0;
28759+ }
28760+ return -ENOMEM;
28761+}
28762+
28763+static void au_wkq_comp_free(struct completion *comp)
28764+{
28765+ kfree(comp);
28766+}
28767+
28768+#else
28769+
28770+/* no braces */
28771+#define AuWkqCompDeclare(name) \
28772+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
28773+ struct completion *comp = &_ ## name
28774+
28775+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
28776+{
28777+ wkinfo->comp = *comp;
28778+ return 0;
28779+}
28780+
28781+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
28782+{
28783+ /* empty */
28784+}
28785+#endif /* 4KSTACKS */
28786+
53392da6 28787+static void au_wkq_run(struct au_wkinfo *wkinfo)
1facf9fc 28788+{
53392da6
AM
28789+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
28790+ if (au_wkq_test()) {
28791+ AuWarn1("wkq from wkq, due to a dead dir by UDBA?\n");
28792+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
28793+ }
28794+ } else
28795+ au_dbg_verify_kthread();
28796+
28797+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
a1f66529 28798+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
9dbd164d 28799+ queue_work(au_wkq, &wkinfo->wk);
4a4d8108
AM
28800+ } else {
28801+ INIT_WORK(&wkinfo->wk, wkq_func);
28802+ schedule_work(&wkinfo->wk);
28803+ }
1facf9fc 28804+}
28805+
7f207e10
AM
28806+/*
28807+ * Be careful. It is easy to make deadlock happen.
28808+ * processA: lock, wkq and wait
28809+ * processB: wkq and wait, lock in wkq
28810+ * --> deadlock
28811+ */
b752ccd1 28812+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
1facf9fc 28813+{
28814+ int err;
28815+ AuWkqCompDeclare(comp);
28816+ struct au_wkinfo wkinfo = {
b752ccd1 28817+ .flags = flags,
1facf9fc 28818+ .func = func,
28819+ .args = args
28820+ };
28821+
28822+ err = au_wkq_comp_alloc(&wkinfo, &comp);
28823+ if (!err) {
53392da6 28824+ au_wkq_run(&wkinfo);
1facf9fc 28825+ /* no timeout, no interrupt */
28826+ wait_for_completion(wkinfo.comp);
28827+ au_wkq_comp_free(comp);
4a4d8108 28828+ destroy_work_on_stack(&wkinfo.wk);
1facf9fc 28829+ }
28830+
28831+ return err;
28832+
28833+}
28834+
027c5e7a
AM
28835+/*
28836+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
28837+ * problem in a concurrent umounting.
28838+ */
53392da6
AM
28839+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
28840+ unsigned int flags)
1facf9fc 28841+{
28842+ int err;
28843+ struct au_wkinfo *wkinfo;
28844+
28845+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
28846+
28847+ /*
28848+ * wkq_func() must free this wkinfo.
28849+ * it highly depends upon the implementation of workqueue.
28850+ */
28851+ err = 0;
28852+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
28853+ if (wkinfo) {
7f207e10 28854+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
53392da6 28855+ wkinfo->flags = flags & ~AuWkq_WAIT;
1facf9fc 28856+ wkinfo->func = func;
28857+ wkinfo->args = args;
28858+ wkinfo->comp = NULL;
7f207e10 28859+ kobject_get(wkinfo->kobj);
9dbd164d 28860+ __module_get(THIS_MODULE); /* todo: ?? */
1facf9fc 28861+
53392da6 28862+ au_wkq_run(wkinfo);
1facf9fc 28863+ } else {
28864+ err = -ENOMEM;
e49829fe 28865+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 28866+ }
28867+
28868+ return err;
28869+}
28870+
28871+/* ---------------------------------------------------------------------- */
28872+
28873+void au_nwt_init(struct au_nowait_tasks *nwt)
28874+{
28875+ atomic_set(&nwt->nw_len, 0);
4a4d8108 28876+ /* smp_mb(); */ /* atomic_set */
1facf9fc 28877+ init_waitqueue_head(&nwt->nw_wq);
28878+}
28879+
28880+void au_wkq_fin(void)
28881+{
9dbd164d 28882+ destroy_workqueue(au_wkq);
1facf9fc 28883+}
28884+
28885+int __init au_wkq_init(void)
28886+{
9dbd164d 28887+ int err;
b752ccd1
AM
28888+
28889+ err = 0;
9dbd164d
AM
28890+ BUILD_BUG_ON(!WQ_RESCUER);
28891+ au_wkq = alloc_workqueue(AUFS_WKQ_NAME, !WQ_RESCUER, WQ_DFL_ACTIVE);
28892+ if (IS_ERR(au_wkq))
28893+ err = PTR_ERR(au_wkq);
28894+ else if (!au_wkq)
28895+ err = -ENOMEM;
b752ccd1
AM
28896+
28897+ return err;
1facf9fc 28898+}
7f207e10
AM
28899diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
28900--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
28901+++ linux/fs/aufs/wkq.h 2012-02-13 21:54:56.973105100 +0100
28902@@ -0,0 +1,92 @@
1facf9fc 28903+/*
f6c5ef8b 28904+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 28905+ *
28906+ * This program, aufs is free software; you can redistribute it and/or modify
28907+ * it under the terms of the GNU General Public License as published by
28908+ * the Free Software Foundation; either version 2 of the License, or
28909+ * (at your option) any later version.
dece6358
AM
28910+ *
28911+ * This program is distributed in the hope that it will be useful,
28912+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28913+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28914+ * GNU General Public License for more details.
28915+ *
28916+ * You should have received a copy of the GNU General Public License
28917+ * along with this program; if not, write to the Free Software
28918+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28919+ */
28920+
28921+/*
28922+ * workqueue for asynchronous/super-io operations
28923+ * todo: try new credentials management scheme
28924+ */
28925+
28926+#ifndef __AUFS_WKQ_H__
28927+#define __AUFS_WKQ_H__
28928+
28929+#ifdef __KERNEL__
28930+
dece6358
AM
28931+struct super_block;
28932+
1facf9fc 28933+/* ---------------------------------------------------------------------- */
28934+
28935+/*
28936+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
28937+ */
28938+struct au_nowait_tasks {
28939+ atomic_t nw_len;
28940+ wait_queue_head_t nw_wq;
28941+};
28942+
28943+/* ---------------------------------------------------------------------- */
28944+
28945+typedef void (*au_wkq_func_t)(void *args);
28946+
28947+/* wkq flags */
28948+#define AuWkq_WAIT 1
9dbd164d 28949+#define AuWkq_NEST (1 << 1)
1facf9fc 28950+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
7f207e10
AM
28951+#define au_fset_wkq(flags, name) \
28952+ do { (flags) |= AuWkq_##name; } while (0)
28953+#define au_fclr_wkq(flags, name) \
28954+ do { (flags) &= ~AuWkq_##name; } while (0)
1facf9fc 28955+
9dbd164d
AM
28956+#ifndef CONFIG_AUFS_HNOTIFY
28957+#undef AuWkq_NEST
28958+#define AuWkq_NEST 0
28959+#endif
28960+
1facf9fc 28961+/* wkq.c */
b752ccd1 28962+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
53392da6
AM
28963+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
28964+ unsigned int flags);
1facf9fc 28965+void au_nwt_init(struct au_nowait_tasks *nwt);
28966+int __init au_wkq_init(void);
28967+void au_wkq_fin(void);
28968+
28969+/* ---------------------------------------------------------------------- */
28970+
53392da6
AM
28971+static inline int au_wkq_test(void)
28972+{
28973+ return current->flags & PF_WQ_WORKER;
28974+}
28975+
b752ccd1 28976+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
1facf9fc 28977+{
b752ccd1 28978+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
1facf9fc 28979+}
28980+
28981+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
28982+{
e49829fe 28983+ if (atomic_dec_and_test(&nwt->nw_len))
1facf9fc 28984+ wake_up_all(&nwt->nw_wq);
28985+}
28986+
28987+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
28988+{
28989+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
28990+ return 0;
28991+}
28992+
28993+#endif /* __KERNEL__ */
28994+#endif /* __AUFS_WKQ_H__ */
7f207e10
AM
28995diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
28996--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
28997+++ linux/fs/aufs/xino.c 2012-02-13 21:54:56.973105100 +0100
28998@@ -0,0 +1,1264 @@
1facf9fc 28999+/*
f6c5ef8b 29000+ * Copyright (C) 2005-2012 Junjiro R. Okajima
1facf9fc 29001+ *
29002+ * This program, aufs is free software; you can redistribute it and/or modify
29003+ * it under the terms of the GNU General Public License as published by
29004+ * the Free Software Foundation; either version 2 of the License, or
29005+ * (at your option) any later version.
dece6358
AM
29006+ *
29007+ * This program is distributed in the hope that it will be useful,
29008+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29009+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29010+ * GNU General Public License for more details.
29011+ *
29012+ * You should have received a copy of the GNU General Public License
29013+ * along with this program; if not, write to the Free Software
29014+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 29015+ */
29016+
29017+/*
29018+ * external inode number translation table and bitmap
29019+ */
29020+
29021+#include <linux/seq_file.h>
1facf9fc 29022+#include "aufs.h"
29023+
9dbd164d 29024+/* todo: unnecessary to support mmap_sem since kernel-space? */
b752ccd1 29025+ssize_t xino_fread(au_readf_t func, struct file *file, void *kbuf, size_t size,
1facf9fc 29026+ loff_t *pos)
29027+{
29028+ ssize_t err;
29029+ mm_segment_t oldfs;
b752ccd1
AM
29030+ union {
29031+ void *k;
29032+ char __user *u;
29033+ } buf;
1facf9fc 29034+
b752ccd1 29035+ buf.k = kbuf;
1facf9fc 29036+ oldfs = get_fs();
29037+ set_fs(KERNEL_DS);
29038+ do {
29039+ /* todo: signal_pending? */
b752ccd1 29040+ err = func(file, buf.u, size, pos);
1facf9fc 29041+ } while (err == -EAGAIN || err == -EINTR);
29042+ set_fs(oldfs);
29043+
29044+#if 0 /* reserved for future use */
29045+ if (err > 0)
29046+ fsnotify_access(file->f_dentry);
29047+#endif
29048+
29049+ return err;
29050+}
29051+
29052+/* ---------------------------------------------------------------------- */
29053+
b752ccd1 29054+static ssize_t do_xino_fwrite(au_writef_t func, struct file *file, void *kbuf,
1facf9fc 29055+ size_t size, loff_t *pos)
29056+{
29057+ ssize_t err;
29058+ mm_segment_t oldfs;
b752ccd1
AM
29059+ union {
29060+ void *k;
29061+ const char __user *u;
29062+ } buf;
1facf9fc 29063+
b752ccd1 29064+ buf.k = kbuf;
1facf9fc 29065+ oldfs = get_fs();
29066+ set_fs(KERNEL_DS);
1facf9fc 29067+ do {
29068+ /* todo: signal_pending? */
b752ccd1 29069+ err = func(file, buf.u, size, pos);
1facf9fc 29070+ } while (err == -EAGAIN || err == -EINTR);
1facf9fc 29071+ set_fs(oldfs);
29072+
29073+#if 0 /* reserved for future use */
29074+ if (err > 0)
29075+ fsnotify_modify(file->f_dentry);
29076+#endif
29077+
29078+ return err;
29079+}
29080+
29081+struct do_xino_fwrite_args {
29082+ ssize_t *errp;
29083+ au_writef_t func;
29084+ struct file *file;
29085+ void *buf;
29086+ size_t size;
29087+ loff_t *pos;
29088+};
29089+
29090+static void call_do_xino_fwrite(void *args)
29091+{
29092+ struct do_xino_fwrite_args *a = args;
29093+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
29094+}
29095+
29096+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
29097+ loff_t *pos)
29098+{
29099+ ssize_t err;
29100+
29101+ /* todo: signal block and no wkq? */
b752ccd1
AM
29102+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
29103+ lockdep_off();
29104+ err = do_xino_fwrite(func, file, buf, size, pos);
29105+ lockdep_on();
29106+ } else {
29107+ /*
29108+ * it breaks RLIMIT_FSIZE and normal user's limit,
29109+ * users should care about quota and real 'filesystem full.'
29110+ */
1facf9fc 29111+ int wkq_err;
29112+ struct do_xino_fwrite_args args = {
29113+ .errp = &err,
29114+ .func = func,
29115+ .file = file,
29116+ .buf = buf,
29117+ .size = size,
29118+ .pos = pos
29119+ };
29120+
29121+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
29122+ if (unlikely(wkq_err))
29123+ err = wkq_err;
b752ccd1 29124+ }
1facf9fc 29125+
29126+ return err;
29127+}
29128+
29129+/* ---------------------------------------------------------------------- */
29130+
29131+/*
29132+ * create a new xinofile at the same place/path as @base_file.
29133+ */
29134+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
29135+{
29136+ struct file *file;
4a4d8108 29137+ struct dentry *base, *parent;
1facf9fc 29138+ struct inode *dir;
29139+ struct qstr *name;
1308ab2a 29140+ struct path path;
4a4d8108 29141+ int err;
1facf9fc 29142+
29143+ base = base_file->f_dentry;
29144+ parent = base->d_parent; /* dir inode is locked */
29145+ dir = parent->d_inode;
29146+ IMustLock(dir);
29147+
29148+ file = ERR_PTR(-EINVAL);
29149+ name = &base->d_name;
4a4d8108
AM
29150+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
29151+ if (IS_ERR(path.dentry)) {
29152+ file = (void *)path.dentry;
29153+ pr_err("%.*s lookup err %ld\n",
29154+ AuLNPair(name), PTR_ERR(path.dentry));
1facf9fc 29155+ goto out;
29156+ }
29157+
29158+ /* no need to mnt_want_write() since we call dentry_open() later */
4a4d8108 29159+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
1facf9fc 29160+ if (unlikely(err)) {
29161+ file = ERR_PTR(err);
4a4d8108 29162+ pr_err("%.*s create err %d\n", AuLNPair(name), err);
1facf9fc 29163+ goto out_dput;
29164+ }
29165+
1308ab2a 29166+ path.mnt = base_file->f_vfsmnt;
4a4d8108 29167+ file = vfsub_dentry_open(&path,
7f207e10 29168+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 29169+ /* | __FMODE_NONOTIFY */);
1facf9fc 29170+ if (IS_ERR(file)) {
4a4d8108 29171+ pr_err("%.*s open err %ld\n", AuLNPair(name), PTR_ERR(file));
1facf9fc 29172+ goto out_dput;
29173+ }
29174+
29175+ err = vfsub_unlink(dir, &file->f_path, /*force*/0);
29176+ if (unlikely(err)) {
4a4d8108 29177+ pr_err("%.*s unlink err %d\n", AuLNPair(name), err);
1facf9fc 29178+ goto out_fput;
29179+ }
29180+
29181+ if (copy_src) {
29182+ /* no one can touch copy_src xino */
29183+ err = au_copy_file(file, copy_src,
29184+ i_size_read(copy_src->f_dentry->d_inode));
29185+ if (unlikely(err)) {
4a4d8108 29186+ pr_err("%.*s copy err %d\n", AuLNPair(name), err);
1facf9fc 29187+ goto out_fput;
29188+ }
29189+ }
29190+ goto out_dput; /* success */
29191+
4f0767ce 29192+out_fput:
1facf9fc 29193+ fput(file);
29194+ file = ERR_PTR(err);
4f0767ce 29195+out_dput:
4a4d8108 29196+ dput(path.dentry);
4f0767ce 29197+out:
1facf9fc 29198+ return file;
29199+}
29200+
29201+struct au_xino_lock_dir {
29202+ struct au_hinode *hdir;
29203+ struct dentry *parent;
29204+ struct mutex *mtx;
29205+};
29206+
29207+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
29208+ struct au_xino_lock_dir *ldir)
29209+{
29210+ aufs_bindex_t brid, bindex;
29211+
29212+ ldir->hdir = NULL;
29213+ bindex = -1;
29214+ brid = au_xino_brid(sb);
29215+ if (brid >= 0)
29216+ bindex = au_br_index(sb, brid);
29217+ if (bindex >= 0) {
29218+ ldir->hdir = au_hi(sb->s_root->d_inode, bindex);
4a4d8108 29219+ au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
1facf9fc 29220+ } else {
29221+ ldir->parent = dget_parent(xino->f_dentry);
29222+ ldir->mtx = &ldir->parent->d_inode->i_mutex;
29223+ mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
29224+ }
29225+}
29226+
29227+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
29228+{
29229+ if (ldir->hdir)
4a4d8108 29230+ au_hn_imtx_unlock(ldir->hdir);
1facf9fc 29231+ else {
29232+ mutex_unlock(ldir->mtx);
29233+ dput(ldir->parent);
29234+ }
29235+}
29236+
29237+/* ---------------------------------------------------------------------- */
29238+
29239+/* trucate xino files asynchronously */
29240+
29241+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
29242+{
29243+ int err;
29244+ aufs_bindex_t bi, bend;
29245+ struct au_branch *br;
29246+ struct file *new_xino, *file;
29247+ struct super_block *h_sb;
29248+ struct au_xino_lock_dir ldir;
29249+
29250+ err = -EINVAL;
29251+ bend = au_sbend(sb);
29252+ if (unlikely(bindex < 0 || bend < bindex))
29253+ goto out;
29254+ br = au_sbr(sb, bindex);
29255+ file = br->br_xino.xi_file;
29256+ if (!file)
29257+ goto out;
29258+
29259+ au_xino_lock_dir(sb, file, &ldir);
29260+ /* mnt_want_write() is unnecessary here */
29261+ new_xino = au_xino_create2(file, file);
29262+ au_xino_unlock_dir(&ldir);
29263+ err = PTR_ERR(new_xino);
29264+ if (IS_ERR(new_xino))
29265+ goto out;
29266+ err = 0;
29267+ fput(file);
29268+ br->br_xino.xi_file = new_xino;
29269+
29270+ h_sb = br->br_mnt->mnt_sb;
29271+ for (bi = 0; bi <= bend; bi++) {
29272+ if (unlikely(bi == bindex))
29273+ continue;
29274+ br = au_sbr(sb, bi);
29275+ if (br->br_mnt->mnt_sb != h_sb)
29276+ continue;
29277+
29278+ fput(br->br_xino.xi_file);
29279+ br->br_xino.xi_file = new_xino;
29280+ get_file(new_xino);
29281+ }
29282+
4f0767ce 29283+out:
1facf9fc 29284+ return err;
29285+}
29286+
29287+struct xino_do_trunc_args {
29288+ struct super_block *sb;
29289+ struct au_branch *br;
29290+};
29291+
29292+static void xino_do_trunc(void *_args)
29293+{
29294+ struct xino_do_trunc_args *args = _args;
29295+ struct super_block *sb;
29296+ struct au_branch *br;
29297+ struct inode *dir;
29298+ int err;
29299+ aufs_bindex_t bindex;
29300+
29301+ err = 0;
29302+ sb = args->sb;
29303+ dir = sb->s_root->d_inode;
29304+ br = args->br;
29305+
29306+ si_noflush_write_lock(sb);
29307+ ii_read_lock_parent(dir);
29308+ bindex = au_br_index(sb, br->br_id);
29309+ err = au_xino_trunc(sb, bindex);
dece6358
AM
29310+ if (!err
29311+ && br->br_xino.xi_file->f_dentry->d_inode->i_blocks
1facf9fc 29312+ >= br->br_xino_upper)
29313+ br->br_xino_upper += AUFS_XINO_TRUNC_STEP;
29314+
1facf9fc 29315+ ii_read_unlock(dir);
29316+ if (unlikely(err))
4a4d8108 29317+ pr_warning("err b%d, (%d)\n", bindex, err);
1facf9fc 29318+ atomic_dec(&br->br_xino_running);
29319+ atomic_dec(&br->br_count);
1facf9fc 29320+ si_write_unlock(sb);
027c5e7a 29321+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 29322+ kfree(args);
29323+}
29324+
29325+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
29326+{
29327+ struct xino_do_trunc_args *args;
29328+ int wkq_err;
29329+
29330+ if (br->br_xino.xi_file->f_dentry->d_inode->i_blocks
29331+ < br->br_xino_upper)
29332+ return;
29333+
29334+ if (atomic_inc_return(&br->br_xino_running) > 1)
29335+ goto out;
29336+
29337+ /* lock and kfree() will be called in trunc_xino() */
29338+ args = kmalloc(sizeof(*args), GFP_NOFS);
29339+ if (unlikely(!args)) {
29340+ AuErr1("no memory\n");
29341+ goto out_args;
29342+ }
29343+
e49829fe 29344+ atomic_inc(&br->br_count);
1facf9fc 29345+ args->sb = sb;
29346+ args->br = br;
53392da6 29347+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
1facf9fc 29348+ if (!wkq_err)
29349+ return; /* success */
29350+
4a4d8108 29351+ pr_err("wkq %d\n", wkq_err);
e49829fe 29352+ atomic_dec(&br->br_count);
1facf9fc 29353+
4f0767ce 29354+out_args:
1facf9fc 29355+ kfree(args);
4f0767ce 29356+out:
e49829fe 29357+ atomic_dec(&br->br_xino_running);
1facf9fc 29358+}
29359+
29360+/* ---------------------------------------------------------------------- */
29361+
29362+static int au_xino_do_write(au_writef_t write, struct file *file,
29363+ ino_t h_ino, ino_t ino)
29364+{
29365+ loff_t pos;
29366+ ssize_t sz;
29367+
29368+ pos = h_ino;
29369+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
29370+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
29371+ return -EFBIG;
29372+ }
29373+ pos *= sizeof(ino);
29374+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
29375+ if (sz == sizeof(ino))
29376+ return 0; /* success */
29377+
29378+ AuIOErr("write failed (%zd)\n", sz);
29379+ return -EIO;
29380+}
29381+
29382+/*
29383+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
29384+ * at the position of @h_ino.
29385+ * even if @ino is zero, it is written to the xinofile and means no entry.
29386+ * if the size of the xino file on a specific filesystem exceeds the watermark,
29387+ * try truncating it.
29388+ */
29389+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
29390+ ino_t ino)
29391+{
29392+ int err;
29393+ unsigned int mnt_flags;
29394+ struct au_branch *br;
29395+
29396+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
29397+ || ((loff_t)-1) > 0);
dece6358 29398+ SiMustAnyLock(sb);
1facf9fc 29399+
29400+ mnt_flags = au_mntflags(sb);
29401+ if (!au_opt_test(mnt_flags, XINO))
29402+ return 0;
29403+
29404+ br = au_sbr(sb, bindex);
29405+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
29406+ h_ino, ino);
29407+ if (!err) {
29408+ if (au_opt_test(mnt_flags, TRUNC_XINO)
29409+ && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
29410+ xino_try_trunc(sb, br);
29411+ return 0; /* success */
29412+ }
29413+
29414+ AuIOErr("write failed (%d)\n", err);
29415+ return -EIO;
29416+}
29417+
29418+/* ---------------------------------------------------------------------- */
29419+
29420+/* aufs inode number bitmap */
29421+
29422+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
29423+static ino_t xib_calc_ino(unsigned long pindex, int bit)
29424+{
29425+ ino_t ino;
29426+
29427+ AuDebugOn(bit < 0 || page_bits <= bit);
29428+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
29429+ return ino;
29430+}
29431+
29432+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
29433+{
29434+ AuDebugOn(ino < AUFS_FIRST_INO);
29435+ ino -= AUFS_FIRST_INO;
29436+ *pindex = ino / page_bits;
29437+ *bit = ino % page_bits;
29438+}
29439+
29440+static int xib_pindex(struct super_block *sb, unsigned long pindex)
29441+{
29442+ int err;
29443+ loff_t pos;
29444+ ssize_t sz;
29445+ struct au_sbinfo *sbinfo;
29446+ struct file *xib;
29447+ unsigned long *p;
29448+
29449+ sbinfo = au_sbi(sb);
29450+ MtxMustLock(&sbinfo->si_xib_mtx);
29451+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
29452+ || !au_opt_test(sbinfo->si_mntflags, XINO));
29453+
29454+ if (pindex == sbinfo->si_xib_last_pindex)
29455+ return 0;
29456+
29457+ xib = sbinfo->si_xib;
29458+ p = sbinfo->si_xib_buf;
29459+ pos = sbinfo->si_xib_last_pindex;
29460+ pos *= PAGE_SIZE;
29461+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
29462+ if (unlikely(sz != PAGE_SIZE))
29463+ goto out;
29464+
29465+ pos = pindex;
29466+ pos *= PAGE_SIZE;
29467+ if (i_size_read(xib->f_dentry->d_inode) >= pos + PAGE_SIZE)
29468+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
29469+ else {
29470+ memset(p, 0, PAGE_SIZE);
29471+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
29472+ }
29473+ if (sz == PAGE_SIZE) {
29474+ sbinfo->si_xib_last_pindex = pindex;
29475+ return 0; /* success */
29476+ }
29477+
4f0767ce 29478+out:
b752ccd1
AM
29479+ AuIOErr1("write failed (%zd)\n", sz);
29480+ err = sz;
29481+ if (sz >= 0)
29482+ err = -EIO;
29483+ return err;
29484+}
29485+
29486+/* ---------------------------------------------------------------------- */
29487+
29488+static void au_xib_clear_bit(struct inode *inode)
29489+{
29490+ int err, bit;
29491+ unsigned long pindex;
29492+ struct super_block *sb;
29493+ struct au_sbinfo *sbinfo;
29494+
29495+ AuDebugOn(inode->i_nlink);
29496+
29497+ sb = inode->i_sb;
29498+ xib_calc_bit(inode->i_ino, &pindex, &bit);
29499+ AuDebugOn(page_bits <= bit);
29500+ sbinfo = au_sbi(sb);
29501+ mutex_lock(&sbinfo->si_xib_mtx);
29502+ err = xib_pindex(sb, pindex);
29503+ if (!err) {
29504+ clear_bit(bit, sbinfo->si_xib_buf);
29505+ sbinfo->si_xib_next_bit = bit;
29506+ }
29507+ mutex_unlock(&sbinfo->si_xib_mtx);
29508+}
29509+
29510+/* for s_op->delete_inode() */
29511+void au_xino_delete_inode(struct inode *inode, const int unlinked)
29512+{
29513+ int err;
29514+ unsigned int mnt_flags;
29515+ aufs_bindex_t bindex, bend, bi;
29516+ unsigned char try_trunc;
29517+ struct au_iinfo *iinfo;
29518+ struct super_block *sb;
29519+ struct au_hinode *hi;
29520+ struct inode *h_inode;
29521+ struct au_branch *br;
29522+ au_writef_t xwrite;
29523+
29524+ sb = inode->i_sb;
29525+ mnt_flags = au_mntflags(sb);
29526+ if (!au_opt_test(mnt_flags, XINO)
29527+ || inode->i_ino == AUFS_ROOT_INO)
29528+ return;
29529+
29530+ if (unlinked) {
29531+ au_xigen_inc(inode);
29532+ au_xib_clear_bit(inode);
29533+ }
29534+
29535+ iinfo = au_ii(inode);
29536+ if (!iinfo)
29537+ return;
1facf9fc 29538+
b752ccd1
AM
29539+ bindex = iinfo->ii_bstart;
29540+ if (bindex < 0)
29541+ return;
1facf9fc 29542+
b752ccd1
AM
29543+ xwrite = au_sbi(sb)->si_xwrite;
29544+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
29545+ hi = iinfo->ii_hinode + bindex;
29546+ bend = iinfo->ii_bend;
29547+ for (; bindex <= bend; bindex++, hi++) {
29548+ h_inode = hi->hi_inode;
29549+ if (!h_inode
29550+ || (!unlinked && h_inode->i_nlink))
29551+ continue;
1facf9fc 29552+
b752ccd1
AM
29553+ /* inode may not be revalidated */
29554+ bi = au_br_index(sb, hi->hi_id);
29555+ if (bi < 0)
29556+ continue;
1facf9fc 29557+
b752ccd1
AM
29558+ br = au_sbr(sb, bi);
29559+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
29560+ h_inode->i_ino, /*ino*/0);
29561+ if (!err && try_trunc
29562+ && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
29563+ xino_try_trunc(sb, br);
1facf9fc 29564+ }
1facf9fc 29565+}
29566+
29567+/* get an unused inode number from bitmap */
29568+ino_t au_xino_new_ino(struct super_block *sb)
29569+{
29570+ ino_t ino;
29571+ unsigned long *p, pindex, ul, pend;
29572+ struct au_sbinfo *sbinfo;
29573+ struct file *file;
29574+ int free_bit, err;
29575+
29576+ if (!au_opt_test(au_mntflags(sb), XINO))
29577+ return iunique(sb, AUFS_FIRST_INO);
29578+
29579+ sbinfo = au_sbi(sb);
29580+ mutex_lock(&sbinfo->si_xib_mtx);
29581+ p = sbinfo->si_xib_buf;
29582+ free_bit = sbinfo->si_xib_next_bit;
29583+ if (free_bit < page_bits && !test_bit(free_bit, p))
29584+ goto out; /* success */
29585+ free_bit = find_first_zero_bit(p, page_bits);
29586+ if (free_bit < page_bits)
29587+ goto out; /* success */
29588+
29589+ pindex = sbinfo->si_xib_last_pindex;
29590+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
29591+ err = xib_pindex(sb, ul);
29592+ if (unlikely(err))
29593+ goto out_err;
29594+ free_bit = find_first_zero_bit(p, page_bits);
29595+ if (free_bit < page_bits)
29596+ goto out; /* success */
29597+ }
29598+
29599+ file = sbinfo->si_xib;
29600+ pend = i_size_read(file->f_dentry->d_inode) / PAGE_SIZE;
29601+ for (ul = pindex + 1; ul <= pend; ul++) {
29602+ err = xib_pindex(sb, ul);
29603+ if (unlikely(err))
29604+ goto out_err;
29605+ free_bit = find_first_zero_bit(p, page_bits);
29606+ if (free_bit < page_bits)
29607+ goto out; /* success */
29608+ }
29609+ BUG();
29610+
4f0767ce 29611+out:
1facf9fc 29612+ set_bit(free_bit, p);
7f207e10 29613+ sbinfo->si_xib_next_bit = free_bit + 1;
1facf9fc 29614+ pindex = sbinfo->si_xib_last_pindex;
29615+ mutex_unlock(&sbinfo->si_xib_mtx);
29616+ ino = xib_calc_ino(pindex, free_bit);
29617+ AuDbg("i%lu\n", (unsigned long)ino);
29618+ return ino;
4f0767ce 29619+out_err:
1facf9fc 29620+ mutex_unlock(&sbinfo->si_xib_mtx);
29621+ AuDbg("i0\n");
29622+ return 0;
29623+}
29624+
29625+/*
29626+ * read @ino from xinofile for the specified branch{@sb, @bindex}
29627+ * at the position of @h_ino.
29628+ * if @ino does not exist and @do_new is true, get new one.
29629+ */
29630+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
29631+ ino_t *ino)
29632+{
29633+ int err;
29634+ ssize_t sz;
29635+ loff_t pos;
29636+ struct file *file;
29637+ struct au_sbinfo *sbinfo;
29638+
29639+ *ino = 0;
29640+ if (!au_opt_test(au_mntflags(sb), XINO))
29641+ return 0; /* no xino */
29642+
29643+ err = 0;
29644+ sbinfo = au_sbi(sb);
29645+ pos = h_ino;
29646+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
29647+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
29648+ return -EFBIG;
29649+ }
29650+ pos *= sizeof(*ino);
29651+
29652+ file = au_sbr(sb, bindex)->br_xino.xi_file;
29653+ if (i_size_read(file->f_dentry->d_inode) < pos + sizeof(*ino))
29654+ return 0; /* no ino */
29655+
29656+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
29657+ if (sz == sizeof(*ino))
29658+ return 0; /* success */
29659+
29660+ err = sz;
29661+ if (unlikely(sz >= 0)) {
29662+ err = -EIO;
29663+ AuIOErr("xino read error (%zd)\n", sz);
29664+ }
29665+
29666+ return err;
29667+}
29668+
29669+/* ---------------------------------------------------------------------- */
29670+
29671+/* create and set a new xino file */
29672+
29673+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
29674+{
29675+ struct file *file;
29676+ struct dentry *h_parent, *d;
29677+ struct inode *h_dir;
29678+ int err;
29679+
29680+ /*
29681+ * at mount-time, and the xino file is the default path,
4a4d8108 29682+ * hnotify is disabled so we have no notify events to ignore.
1facf9fc 29683+ * when a user specified the xino, we cannot get au_hdir to be ignored.
29684+ */
7f207e10 29685+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 29686+ /* | __FMODE_NONOTIFY */,
1facf9fc 29687+ S_IRUGO | S_IWUGO);
29688+ if (IS_ERR(file)) {
29689+ if (!silent)
4a4d8108 29690+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
1facf9fc 29691+ return file;
29692+ }
29693+
29694+ /* keep file count */
29695+ h_parent = dget_parent(file->f_dentry);
29696+ h_dir = h_parent->d_inode;
29697+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
29698+ /* mnt_want_write() is unnecessary here */
29699+ err = vfsub_unlink(h_dir, &file->f_path, /*force*/0);
29700+ mutex_unlock(&h_dir->i_mutex);
29701+ dput(h_parent);
29702+ if (unlikely(err)) {
29703+ if (!silent)
4a4d8108 29704+ pr_err("unlink %s(%d)\n", fname, err);
1facf9fc 29705+ goto out;
29706+ }
29707+
29708+ err = -EINVAL;
29709+ d = file->f_dentry;
29710+ if (unlikely(sb == d->d_sb)) {
29711+ if (!silent)
4a4d8108 29712+ pr_err("%s must be outside\n", fname);
1facf9fc 29713+ goto out;
29714+ }
29715+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
29716+ if (!silent)
4a4d8108
AM
29717+ pr_err("xino doesn't support %s(%s)\n",
29718+ fname, au_sbtype(d->d_sb));
1facf9fc 29719+ goto out;
29720+ }
29721+ return file; /* success */
29722+
4f0767ce 29723+out:
1facf9fc 29724+ fput(file);
29725+ file = ERR_PTR(err);
29726+ return file;
29727+}
29728+
29729+/*
29730+ * find another branch who is on the same filesystem of the specified
29731+ * branch{@btgt}. search until @bend.
29732+ */
29733+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
29734+ aufs_bindex_t bend)
29735+{
29736+ aufs_bindex_t bindex;
29737+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
29738+
29739+ for (bindex = 0; bindex < btgt; bindex++)
29740+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
29741+ return bindex;
29742+ for (bindex++; bindex <= bend; bindex++)
29743+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
29744+ return bindex;
29745+ return -1;
29746+}
29747+
29748+/* ---------------------------------------------------------------------- */
29749+
29750+/*
29751+ * initialize the xinofile for the specified branch @br
29752+ * at the place/path where @base_file indicates.
29753+ * test whether another branch is on the same filesystem or not,
29754+ * if @do_test is true.
29755+ */
29756+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
29757+ struct file *base_file, int do_test)
29758+{
29759+ int err;
29760+ ino_t ino;
29761+ aufs_bindex_t bend, bindex;
29762+ struct au_branch *shared_br, *b;
29763+ struct file *file;
29764+ struct super_block *tgt_sb;
29765+
29766+ shared_br = NULL;
29767+ bend = au_sbend(sb);
29768+ if (do_test) {
29769+ tgt_sb = br->br_mnt->mnt_sb;
29770+ for (bindex = 0; bindex <= bend; bindex++) {
29771+ b = au_sbr(sb, bindex);
29772+ if (tgt_sb == b->br_mnt->mnt_sb) {
29773+ shared_br = b;
29774+ break;
29775+ }
29776+ }
29777+ }
29778+
29779+ if (!shared_br || !shared_br->br_xino.xi_file) {
29780+ struct au_xino_lock_dir ldir;
29781+
29782+ au_xino_lock_dir(sb, base_file, &ldir);
29783+ /* mnt_want_write() is unnecessary here */
29784+ file = au_xino_create2(base_file, NULL);
29785+ au_xino_unlock_dir(&ldir);
29786+ err = PTR_ERR(file);
29787+ if (IS_ERR(file))
29788+ goto out;
29789+ br->br_xino.xi_file = file;
29790+ } else {
29791+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
29792+ get_file(br->br_xino.xi_file);
29793+ }
29794+
29795+ ino = AUFS_ROOT_INO;
29796+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
29797+ h_ino, ino);
b752ccd1
AM
29798+ if (unlikely(err)) {
29799+ fput(br->br_xino.xi_file);
29800+ br->br_xino.xi_file = NULL;
29801+ }
1facf9fc 29802+
4f0767ce 29803+out:
1facf9fc 29804+ return err;
29805+}
29806+
29807+/* ---------------------------------------------------------------------- */
29808+
29809+/* trucate a xino bitmap file */
29810+
29811+/* todo: slow */
29812+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
29813+{
29814+ int err, bit;
29815+ ssize_t sz;
29816+ unsigned long pindex;
29817+ loff_t pos, pend;
29818+ struct au_sbinfo *sbinfo;
29819+ au_readf_t func;
29820+ ino_t *ino;
29821+ unsigned long *p;
29822+
29823+ err = 0;
29824+ sbinfo = au_sbi(sb);
dece6358 29825+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 29826+ p = sbinfo->si_xib_buf;
29827+ func = sbinfo->si_xread;
29828+ pend = i_size_read(file->f_dentry->d_inode);
29829+ pos = 0;
29830+ while (pos < pend) {
29831+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
29832+ err = sz;
29833+ if (unlikely(sz <= 0))
29834+ goto out;
29835+
29836+ err = 0;
29837+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
29838+ if (unlikely(*ino < AUFS_FIRST_INO))
29839+ continue;
29840+
29841+ xib_calc_bit(*ino, &pindex, &bit);
29842+ AuDebugOn(page_bits <= bit);
29843+ err = xib_pindex(sb, pindex);
29844+ if (!err)
29845+ set_bit(bit, p);
29846+ else
29847+ goto out;
29848+ }
29849+ }
29850+
4f0767ce 29851+out:
1facf9fc 29852+ return err;
29853+}
29854+
29855+static int xib_restore(struct super_block *sb)
29856+{
29857+ int err;
29858+ aufs_bindex_t bindex, bend;
29859+ void *page;
29860+
29861+ err = -ENOMEM;
29862+ page = (void *)__get_free_page(GFP_NOFS);
29863+ if (unlikely(!page))
29864+ goto out;
29865+
29866+ err = 0;
29867+ bend = au_sbend(sb);
29868+ for (bindex = 0; !err && bindex <= bend; bindex++)
29869+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
29870+ err = do_xib_restore
29871+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
29872+ else
29873+ AuDbg("b%d\n", bindex);
29874+ free_page((unsigned long)page);
29875+
4f0767ce 29876+out:
1facf9fc 29877+ return err;
29878+}
29879+
29880+int au_xib_trunc(struct super_block *sb)
29881+{
29882+ int err;
29883+ ssize_t sz;
29884+ loff_t pos;
29885+ struct au_xino_lock_dir ldir;
29886+ struct au_sbinfo *sbinfo;
29887+ unsigned long *p;
29888+ struct file *file;
29889+
dece6358
AM
29890+ SiMustWriteLock(sb);
29891+
1facf9fc 29892+ err = 0;
29893+ sbinfo = au_sbi(sb);
29894+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
29895+ goto out;
29896+
29897+ file = sbinfo->si_xib;
29898+ if (i_size_read(file->f_dentry->d_inode) <= PAGE_SIZE)
29899+ goto out;
29900+
29901+ au_xino_lock_dir(sb, file, &ldir);
29902+ /* mnt_want_write() is unnecessary here */
29903+ file = au_xino_create2(sbinfo->si_xib, NULL);
29904+ au_xino_unlock_dir(&ldir);
29905+ err = PTR_ERR(file);
29906+ if (IS_ERR(file))
29907+ goto out;
29908+ fput(sbinfo->si_xib);
29909+ sbinfo->si_xib = file;
29910+
29911+ p = sbinfo->si_xib_buf;
29912+ memset(p, 0, PAGE_SIZE);
29913+ pos = 0;
29914+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
29915+ if (unlikely(sz != PAGE_SIZE)) {
29916+ err = sz;
29917+ AuIOErr("err %d\n", err);
29918+ if (sz >= 0)
29919+ err = -EIO;
29920+ goto out;
29921+ }
29922+
29923+ mutex_lock(&sbinfo->si_xib_mtx);
29924+ /* mnt_want_write() is unnecessary here */
29925+ err = xib_restore(sb);
29926+ mutex_unlock(&sbinfo->si_xib_mtx);
29927+
29928+out:
29929+ return err;
29930+}
29931+
29932+/* ---------------------------------------------------------------------- */
29933+
29934+/*
29935+ * xino mount option handlers
29936+ */
29937+static au_readf_t find_readf(struct file *h_file)
29938+{
29939+ const struct file_operations *fop = h_file->f_op;
29940+
29941+ if (fop) {
29942+ if (fop->read)
29943+ return fop->read;
29944+ if (fop->aio_read)
29945+ return do_sync_read;
29946+ }
29947+ return ERR_PTR(-ENOSYS);
29948+}
29949+
29950+static au_writef_t find_writef(struct file *h_file)
29951+{
29952+ const struct file_operations *fop = h_file->f_op;
29953+
29954+ if (fop) {
29955+ if (fop->write)
29956+ return fop->write;
29957+ if (fop->aio_write)
29958+ return do_sync_write;
29959+ }
29960+ return ERR_PTR(-ENOSYS);
29961+}
29962+
29963+/* xino bitmap */
29964+static void xino_clear_xib(struct super_block *sb)
29965+{
29966+ struct au_sbinfo *sbinfo;
29967+
dece6358
AM
29968+ SiMustWriteLock(sb);
29969+
1facf9fc 29970+ sbinfo = au_sbi(sb);
29971+ sbinfo->si_xread = NULL;
29972+ sbinfo->si_xwrite = NULL;
29973+ if (sbinfo->si_xib)
29974+ fput(sbinfo->si_xib);
29975+ sbinfo->si_xib = NULL;
29976+ free_page((unsigned long)sbinfo->si_xib_buf);
29977+ sbinfo->si_xib_buf = NULL;
29978+}
29979+
29980+static int au_xino_set_xib(struct super_block *sb, struct file *base)
29981+{
29982+ int err;
29983+ loff_t pos;
29984+ struct au_sbinfo *sbinfo;
29985+ struct file *file;
29986+
dece6358
AM
29987+ SiMustWriteLock(sb);
29988+
1facf9fc 29989+ sbinfo = au_sbi(sb);
29990+ file = au_xino_create2(base, sbinfo->si_xib);
29991+ err = PTR_ERR(file);
29992+ if (IS_ERR(file))
29993+ goto out;
29994+ if (sbinfo->si_xib)
29995+ fput(sbinfo->si_xib);
29996+ sbinfo->si_xib = file;
29997+ sbinfo->si_xread = find_readf(file);
29998+ sbinfo->si_xwrite = find_writef(file);
29999+
30000+ err = -ENOMEM;
30001+ if (!sbinfo->si_xib_buf)
30002+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
30003+ if (unlikely(!sbinfo->si_xib_buf))
30004+ goto out_unset;
30005+
30006+ sbinfo->si_xib_last_pindex = 0;
30007+ sbinfo->si_xib_next_bit = 0;
30008+ if (i_size_read(file->f_dentry->d_inode) < PAGE_SIZE) {
30009+ pos = 0;
30010+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
30011+ PAGE_SIZE, &pos);
30012+ if (unlikely(err != PAGE_SIZE))
30013+ goto out_free;
30014+ }
30015+ err = 0;
30016+ goto out; /* success */
30017+
4f0767ce 30018+out_free:
1facf9fc 30019+ free_page((unsigned long)sbinfo->si_xib_buf);
b752ccd1
AM
30020+ sbinfo->si_xib_buf = NULL;
30021+ if (err >= 0)
30022+ err = -EIO;
4f0767ce 30023+out_unset:
b752ccd1
AM
30024+ fput(sbinfo->si_xib);
30025+ sbinfo->si_xib = NULL;
30026+ sbinfo->si_xread = NULL;
30027+ sbinfo->si_xwrite = NULL;
4f0767ce 30028+out:
b752ccd1 30029+ return err;
1facf9fc 30030+}
30031+
b752ccd1
AM
30032+/* xino for each branch */
30033+static void xino_clear_br(struct super_block *sb)
30034+{
30035+ aufs_bindex_t bindex, bend;
30036+ struct au_branch *br;
1facf9fc 30037+
b752ccd1
AM
30038+ bend = au_sbend(sb);
30039+ for (bindex = 0; bindex <= bend; bindex++) {
30040+ br = au_sbr(sb, bindex);
30041+ if (!br || !br->br_xino.xi_file)
30042+ continue;
30043+
30044+ fput(br->br_xino.xi_file);
30045+ br->br_xino.xi_file = NULL;
30046+ }
30047+}
30048+
30049+static int au_xino_set_br(struct super_block *sb, struct file *base)
1facf9fc 30050+{
30051+ int err;
b752ccd1
AM
30052+ ino_t ino;
30053+ aufs_bindex_t bindex, bend, bshared;
30054+ struct {
30055+ struct file *old, *new;
30056+ } *fpair, *p;
30057+ struct au_branch *br;
30058+ struct inode *inode;
30059+ au_writef_t writef;
1facf9fc 30060+
b752ccd1
AM
30061+ SiMustWriteLock(sb);
30062+
30063+ err = -ENOMEM;
30064+ bend = au_sbend(sb);
30065+ fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
30066+ if (unlikely(!fpair))
1facf9fc 30067+ goto out;
30068+
b752ccd1
AM
30069+ inode = sb->s_root->d_inode;
30070+ ino = AUFS_ROOT_INO;
30071+ writef = au_sbi(sb)->si_xwrite;
30072+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
30073+ br = au_sbr(sb, bindex);
30074+ bshared = is_sb_shared(sb, bindex, bindex - 1);
30075+ if (bshared >= 0) {
30076+ /* shared xino */
30077+ *p = fpair[bshared];
30078+ get_file(p->new);
30079+ }
30080+
30081+ if (!p->new) {
30082+ /* new xino */
30083+ p->old = br->br_xino.xi_file;
30084+ p->new = au_xino_create2(base, br->br_xino.xi_file);
30085+ err = PTR_ERR(p->new);
30086+ if (IS_ERR(p->new)) {
30087+ p->new = NULL;
30088+ goto out_pair;
30089+ }
30090+ }
30091+
30092+ err = au_xino_do_write(writef, p->new,
30093+ au_h_iptr(inode, bindex)->i_ino, ino);
30094+ if (unlikely(err))
30095+ goto out_pair;
30096+ }
30097+
30098+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
30099+ br = au_sbr(sb, bindex);
30100+ if (br->br_xino.xi_file)
30101+ fput(br->br_xino.xi_file);
30102+ get_file(p->new);
30103+ br->br_xino.xi_file = p->new;
30104+ }
1facf9fc 30105+
4f0767ce 30106+out_pair:
b752ccd1
AM
30107+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
30108+ if (p->new)
30109+ fput(p->new);
30110+ else
30111+ break;
30112+ kfree(fpair);
4f0767ce 30113+out:
1facf9fc 30114+ return err;
30115+}
b752ccd1
AM
30116+
30117+void au_xino_clr(struct super_block *sb)
30118+{
30119+ struct au_sbinfo *sbinfo;
30120+
30121+ au_xigen_clr(sb);
30122+ xino_clear_xib(sb);
30123+ xino_clear_br(sb);
30124+ sbinfo = au_sbi(sb);
30125+ /* lvalue, do not call au_mntflags() */
30126+ au_opt_clr(sbinfo->si_mntflags, XINO);
30127+}
30128+
30129+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
30130+{
30131+ int err, skip;
30132+ struct dentry *parent, *cur_parent;
30133+ struct qstr *dname, *cur_name;
30134+ struct file *cur_xino;
30135+ struct inode *dir;
30136+ struct au_sbinfo *sbinfo;
30137+
30138+ SiMustWriteLock(sb);
30139+
30140+ err = 0;
30141+ sbinfo = au_sbi(sb);
30142+ parent = dget_parent(xino->file->f_dentry);
30143+ if (remount) {
30144+ skip = 0;
30145+ dname = &xino->file->f_dentry->d_name;
30146+ cur_xino = sbinfo->si_xib;
30147+ if (cur_xino) {
30148+ cur_parent = dget_parent(cur_xino->f_dentry);
30149+ cur_name = &cur_xino->f_dentry->d_name;
30150+ skip = (cur_parent == parent
30151+ && dname->len == cur_name->len
30152+ && !memcmp(dname->name, cur_name->name,
30153+ dname->len));
30154+ dput(cur_parent);
30155+ }
30156+ if (skip)
30157+ goto out;
30158+ }
30159+
30160+ au_opt_set(sbinfo->si_mntflags, XINO);
30161+ dir = parent->d_inode;
30162+ mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
30163+ /* mnt_want_write() is unnecessary here */
30164+ err = au_xino_set_xib(sb, xino->file);
30165+ if (!err)
30166+ err = au_xigen_set(sb, xino->file);
30167+ if (!err)
30168+ err = au_xino_set_br(sb, xino->file);
30169+ mutex_unlock(&dir->i_mutex);
30170+ if (!err)
30171+ goto out; /* success */
30172+
30173+ /* reset all */
30174+ AuIOErr("failed creating xino(%d).\n", err);
30175+
4f0767ce 30176+out:
b752ccd1
AM
30177+ dput(parent);
30178+ return err;
30179+}
30180+
30181+/* ---------------------------------------------------------------------- */
30182+
30183+/*
30184+ * create a xinofile at the default place/path.
30185+ */
30186+struct file *au_xino_def(struct super_block *sb)
30187+{
30188+ struct file *file;
30189+ char *page, *p;
30190+ struct au_branch *br;
30191+ struct super_block *h_sb;
30192+ struct path path;
30193+ aufs_bindex_t bend, bindex, bwr;
30194+
30195+ br = NULL;
30196+ bend = au_sbend(sb);
30197+ bwr = -1;
30198+ for (bindex = 0; bindex <= bend; bindex++) {
30199+ br = au_sbr(sb, bindex);
30200+ if (au_br_writable(br->br_perm)
30201+ && !au_test_fs_bad_xino(br->br_mnt->mnt_sb)) {
30202+ bwr = bindex;
30203+ break;
30204+ }
30205+ }
30206+
7f207e10
AM
30207+ if (bwr >= 0) {
30208+ file = ERR_PTR(-ENOMEM);
30209+ page = __getname_gfp(GFP_NOFS);
30210+ if (unlikely(!page))
30211+ goto out;
30212+ path.mnt = br->br_mnt;
30213+ path.dentry = au_h_dptr(sb->s_root, bwr);
30214+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
30215+ file = (void *)p;
30216+ if (!IS_ERR(p)) {
30217+ strcat(p, "/" AUFS_XINO_FNAME);
30218+ AuDbg("%s\n", p);
30219+ file = au_xino_create(sb, p, /*silent*/0);
30220+ if (!IS_ERR(file))
30221+ au_xino_brid_set(sb, br->br_id);
30222+ }
30223+ __putname(page);
30224+ } else {
30225+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
30226+ if (IS_ERR(file))
30227+ goto out;
30228+ h_sb = file->f_dentry->d_sb;
30229+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
30230+ pr_err("xino doesn't support %s(%s)\n",
30231+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
30232+ fput(file);
30233+ file = ERR_PTR(-EINVAL);
30234+ }
30235+ if (!IS_ERR(file))
30236+ au_xino_brid_set(sb, -1);
30237+ }
0c5527e5 30238+
7f207e10
AM
30239+out:
30240+ return file;
30241+}
30242+
30243+/* ---------------------------------------------------------------------- */
30244+
30245+int au_xino_path(struct seq_file *seq, struct file *file)
30246+{
30247+ int err;
30248+
30249+ err = au_seq_path(seq, &file->f_path);
30250+ if (unlikely(err < 0))
30251+ goto out;
30252+
30253+ err = 0;
30254+#define Deleted "\\040(deleted)"
30255+ seq->count -= sizeof(Deleted) - 1;
30256+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
30257+ sizeof(Deleted) - 1));
30258+#undef Deleted
30259+
30260+out:
30261+ return err;
30262+}
30263diff -urN /usr/share/empty/include/linux/aufs_type.h linux/include/linux/aufs_type.h
30264--- /usr/share/empty/include/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
f6c5ef8b
AM
30265+++ linux/include/linux/aufs_type.h 2012-02-13 21:54:56.973105100 +0100
30266@@ -0,0 +1,233 @@
7f207e10 30267+/*
f6c5ef8b 30268+ * Copyright (C) 2005-2012 Junjiro R. Okajima
7f207e10
AM
30269+ *
30270+ * This program, aufs is free software; you can redistribute it and/or modify
30271+ * it under the terms of the GNU General Public License as published by
30272+ * the Free Software Foundation; either version 2 of the License, or
30273+ * (at your option) any later version.
30274+ *
30275+ * This program is distributed in the hope that it will be useful,
30276+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30277+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30278+ * GNU General Public License for more details.
30279+ *
30280+ * You should have received a copy of the GNU General Public License
30281+ * along with this program; if not, write to the Free Software
30282+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30283+ */
30284+
30285+#ifndef __AUFS_TYPE_H__
30286+#define __AUFS_TYPE_H__
30287+
f6c5ef8b
AM
30288+#define AUFS_NAME "aufs"
30289+
9dbd164d 30290+#ifdef __KERNEL__
f6c5ef8b
AM
30291+/*
30292+ * define it before including all other headers.
30293+ * sched.h may use pr_* macros before defining "current", so define the
30294+ * no-current version first, and re-define later.
30295+ */
30296+#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
30297+#include <linux/sched.h>
30298+#undef pr_fmt
30299+#define pr_fmt(fmt) AUFS_NAME " %s:%d:%s[%d]: " fmt, \
30300+ __func__, __LINE__, current->comm, current->pid
9dbd164d
AM
30301+#else
30302+#include <stdint.h>
30303+#include <sys/types.h>
f6c5ef8b 30304+#endif /* __KERNEL__ */
7f207e10 30305+
f6c5ef8b
AM
30306+#include <linux/limits.h>
30307+
30308+#define AUFS_VERSION "3.2-20120109"
7f207e10
AM
30309+
30310+/* todo? move this to linux-2.6.19/include/magic.h */
30311+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
30312+
30313+/* ---------------------------------------------------------------------- */
30314+
30315+#ifdef CONFIG_AUFS_BRANCH_MAX_127
9dbd164d 30316+typedef int8_t aufs_bindex_t;
7f207e10
AM
30317+#define AUFS_BRANCH_MAX 127
30318+#else
9dbd164d 30319+typedef int16_t aufs_bindex_t;
7f207e10
AM
30320+#ifdef CONFIG_AUFS_BRANCH_MAX_511
30321+#define AUFS_BRANCH_MAX 511
30322+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
30323+#define AUFS_BRANCH_MAX 1023
30324+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
30325+#define AUFS_BRANCH_MAX 32767
30326+#endif
30327+#endif
30328+
30329+#ifdef __KERNEL__
30330+#ifndef AUFS_BRANCH_MAX
30331+#error unknown CONFIG_AUFS_BRANCH_MAX value
30332+#endif
30333+#endif /* __KERNEL__ */
30334+
30335+/* ---------------------------------------------------------------------- */
30336+
7f207e10
AM
30337+#define AUFS_FSTYPE AUFS_NAME
30338+
30339+#define AUFS_ROOT_INO 2
30340+#define AUFS_FIRST_INO 11
30341+
30342+#define AUFS_WH_PFX ".wh."
30343+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
30344+#define AUFS_WH_TMP_LEN 4
30345+/* a limit for rmdir/rename a dir */
30346+#define AUFS_MAX_NAMELEN (NAME_MAX \
30347+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
30348+ - 1 /* dot */\
30349+ - AUFS_WH_TMP_LEN) /* hex */
30350+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
30351+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
30352+#define AUFS_XINO_TRUNC_INIT 64 /* blocks */
30353+#define AUFS_XINO_TRUNC_STEP 4 /* blocks */
30354+#define AUFS_DIRWH_DEF 3
30355+#define AUFS_RDCACHE_DEF 10 /* seconds */
027c5e7a 30356+#define AUFS_RDCACHE_MAX 3600 /* seconds */
7f207e10
AM
30357+#define AUFS_RDBLK_DEF 512 /* bytes */
30358+#define AUFS_RDHASH_DEF 32
30359+#define AUFS_WKQ_NAME AUFS_NAME "d"
027c5e7a
AM
30360+#define AUFS_MFS_DEF_SEC 30 /* seconds */
30361+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
7f207e10
AM
30362+#define AUFS_PLINK_WARN 100 /* number of plinks */
30363+
30364+/* pseudo-link maintenace under /proc */
30365+#define AUFS_PLINK_MAINT_NAME "plink_maint"
30366+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
30367+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
30368+
30369+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
30370+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
30371+
30372+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
30373+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
30374+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
30375+
30376+/* doubly whiteouted */
30377+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
30378+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
30379+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
30380+
1e00d052 30381+/* branch permissions and attributes */
7f207e10
AM
30382+#define AUFS_BRPERM_RW "rw"
30383+#define AUFS_BRPERM_RO "ro"
30384+#define AUFS_BRPERM_RR "rr"
1e00d052
AM
30385+#define AUFS_BRRATTR_WH "wh"
30386+#define AUFS_BRWATTR_NLWH "nolwh"
7f207e10
AM
30387+
30388+/* ---------------------------------------------------------------------- */
30389+
30390+/* ioctl */
30391+enum {
30392+ /* readdir in userspace */
30393+ AuCtl_RDU,
30394+ AuCtl_RDU_INO,
30395+
30396+ /* pathconf wrapper */
027c5e7a
AM
30397+ AuCtl_WBR_FD,
30398+
30399+ /* busy inode */
30400+ AuCtl_IBUSY
7f207e10
AM
30401+};
30402+
30403+/* borrowed from linux/include/linux/kernel.h */
30404+#ifndef ALIGN
30405+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
30406+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
30407+#endif
30408+
30409+/* borrowed from linux/include/linux/compiler-gcc3.h */
30410+#ifndef __aligned
30411+#define __aligned(x) __attribute__((aligned(x)))
53392da6
AM
30412+#endif
30413+
30414+#ifdef __KERNEL__
30415+#ifndef __packed
7f207e10
AM
30416+#define __packed __attribute__((packed))
30417+#endif
53392da6 30418+#endif
7f207e10
AM
30419+
30420+struct au_rdu_cookie {
9dbd164d
AM
30421+ uint64_t h_pos;
30422+ int16_t bindex;
30423+ uint8_t flags;
30424+ uint8_t pad;
30425+ uint32_t generation;
7f207e10
AM
30426+} __aligned(8);
30427+
30428+struct au_rdu_ent {
9dbd164d
AM
30429+ uint64_t ino;
30430+ int16_t bindex;
30431+ uint8_t type;
30432+ uint8_t nlen;
30433+ uint8_t wh;
7f207e10
AM
30434+ char name[0];
30435+} __aligned(8);
30436+
30437+static inline int au_rdu_len(int nlen)
30438+{
30439+ /* include the terminating NULL */
30440+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
9dbd164d 30441+ sizeof(uint64_t));
7f207e10
AM
30442+}
30443+
30444+union au_rdu_ent_ul {
30445+ struct au_rdu_ent __user *e;
9dbd164d 30446+ uint64_t ul;
7f207e10
AM
30447+};
30448+
30449+enum {
30450+ AufsCtlRduV_SZ,
30451+ AufsCtlRduV_End
30452+};
30453+
30454+struct aufs_rdu {
30455+ /* input */
30456+ union {
9dbd164d
AM
30457+ uint64_t sz; /* AuCtl_RDU */
30458+ uint64_t nent; /* AuCtl_RDU_INO */
7f207e10
AM
30459+ };
30460+ union au_rdu_ent_ul ent;
9dbd164d 30461+ uint16_t verify[AufsCtlRduV_End];
7f207e10
AM
30462+
30463+ /* input/output */
9dbd164d 30464+ uint32_t blk;
7f207e10
AM
30465+
30466+ /* output */
30467+ union au_rdu_ent_ul tail;
30468+ /* number of entries which were added in a single call */
9dbd164d
AM
30469+ uint64_t rent;
30470+ uint8_t full;
30471+ uint8_t shwh;
7f207e10
AM
30472+
30473+ struct au_rdu_cookie cookie;
30474+} __aligned(8);
30475+
1e00d052
AM
30476+/* ---------------------------------------------------------------------- */
30477+
30478+struct aufs_wbr_fd {
9dbd164d
AM
30479+ uint32_t oflags;
30480+ int16_t brid;
1e00d052
AM
30481+} __aligned(8);
30482+
30483+/* ---------------------------------------------------------------------- */
30484+
027c5e7a 30485+struct aufs_ibusy {
9dbd164d
AM
30486+ uint64_t ino, h_ino;
30487+ int16_t bindex;
027c5e7a
AM
30488+} __aligned(8);
30489+
1e00d052
AM
30490+/* ---------------------------------------------------------------------- */
30491+
7f207e10
AM
30492+#define AuCtlType 'A'
30493+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
30494+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
1e00d052
AM
30495+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
30496+ struct aufs_wbr_fd)
027c5e7a 30497+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
7f207e10
AM
30498+
30499+#endif /* __AUFS_TYPE_H__ */
9dbd164d 30500
This page took 4.4268 seconds and 4 git commands to generate.