]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs3.patch
- fix --with rescuecd
[packages/kernel.git] / kernel-aufs3.patch
CommitLineData
53392da6 1aufs3.0 kbuild patch
7f207e10
AM
2
3diff --git a/fs/Kconfig b/fs/Kconfig
2cbb1c4b 4index 19891aa..b660b64 100644
7f207e10
AM
5--- a/fs/Kconfig
6+++ b/fs/Kconfig
2cbb1c4b 7@@ -208,6 +208,7 @@ source "fs/pstore/Kconfig"
7f207e10
AM
8 source "fs/sysv/Kconfig"
9 source "fs/ufs/Kconfig"
10 source "fs/exofs/Kconfig"
11+source "fs/aufs/Kconfig"
12
13 endif # MISC_FILESYSTEMS
14
15diff --git a/fs/Makefile b/fs/Makefile
2cbb1c4b 16index fb68c2b..c031a85 100644
7f207e10
AM
17--- a/fs/Makefile
18+++ b/fs/Makefile
2cbb1c4b 19@@ -124,3 +124,4 @@ obj-$(CONFIG_GFS2_FS) += gfs2/
7f207e10
AM
20 obj-$(CONFIG_EXOFS_FS) += exofs/
21 obj-$(CONFIG_CEPH_FS) += ceph/
bf0370f2 22 obj-$(CONFIG_PSTORE) += pstore/
2cbb1c4b 23+obj-$(CONFIG_AUFS_FS) += aufs/
7f207e10 24diff --git a/include/linux/Kbuild b/include/linux/Kbuild
2cbb1c4b 25index 01f6362..8b3b9f1 100644
7f207e10
AM
26--- a/include/linux/Kbuild
27+++ b/include/linux/Kbuild
2cbb1c4b 28@@ -65,6 +65,7 @@ header-y += atmppp.h
7f207e10
AM
29 header-y += atmsap.h
30 header-y += atmsvc.h
31 header-y += audit.h
32+header-y += aufs_type.h
33 header-y += auto_fs.h
34 header-y += auto_fs4.h
35 header-y += auxvec.h
53392da6 36aufs3.0 base patch
7f207e10
AM
37
38diff --git a/fs/namei.c b/fs/namei.c
53392da6 39index 14ab8d3..eb4aef1 100644
7f207e10
AM
40--- a/fs/namei.c
41+++ b/fs/namei.c
53392da6 42@@ -1697,7 +1697,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
7f207e10
AM
43 * needs parent already locked. Doesn't follow mounts.
44 * SMP-safe.
45 */
46-static struct dentry *lookup_hash(struct nameidata *nd)
47+struct dentry *lookup_hash(struct nameidata *nd)
48 {
7f207e10
AM
49 return __lookup_hash(&nd->last, nd->path.dentry, nd);
50 }
7f207e10 51diff --git a/fs/splice.c b/fs/splice.c
2cbb1c4b 52index aa866d3..19afec6 100644
7f207e10
AM
53--- a/fs/splice.c
54+++ b/fs/splice.c
2cbb1c4b 55@@ -1085,8 +1085,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
7f207e10
AM
56 /*
57 * Attempt to initiate a splice from pipe to file.
58 */
59-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
60- loff_t *ppos, size_t len, unsigned int flags)
61+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
62+ loff_t *ppos, size_t len, unsigned int flags)
63 {
64 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
65 loff_t *, size_t, unsigned int);
2cbb1c4b 66@@ -1113,9 +1113,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
67 /*
68 * Attempt to initiate a splice from a file to a pipe.
69 */
70-static long do_splice_to(struct file *in, loff_t *ppos,
71- struct pipe_inode_info *pipe, size_t len,
72- unsigned int flags)
73+long do_splice_to(struct file *in, loff_t *ppos,
74+ struct pipe_inode_info *pipe, size_t len,
75+ unsigned int flags)
76 {
77 ssize_t (*splice_read)(struct file *, loff_t *,
78 struct pipe_inode_info *, size_t, unsigned int);
79diff --git a/include/linux/namei.h b/include/linux/namei.h
2cbb1c4b 80index eba45ea..21ed6c9 100644
7f207e10
AM
81--- a/include/linux/namei.h
82+++ b/include/linux/namei.h
2cbb1c4b 83@@ -82,6 +82,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
7f207e10
AM
84 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
85 int (*open)(struct inode *, struct file *));
86
87+extern struct dentry *lookup_hash(struct nameidata *nd);
7f207e10
AM
88 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
89
027c5e7a 90 extern int follow_down_one(struct path *);
7f207e10
AM
91diff --git a/include/linux/splice.h b/include/linux/splice.h
92index 997c3b4..be9a153 100644
93--- a/include/linux/splice.h
94+++ b/include/linux/splice.h
95@@ -89,4 +89,10 @@ extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *);
96 extern void splice_shrink_spd(struct pipe_inode_info *,
97 struct splice_pipe_desc *);
98
99+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
100+ loff_t *ppos, size_t len, unsigned int flags);
101+extern long do_splice_to(struct file *in, loff_t *ppos,
102+ struct pipe_inode_info *pipe, size_t len,
103+ unsigned int flags);
1facf9fc 104+
7f207e10 105 #endif
53392da6 106aufs3.0 standalone patch
7f207e10
AM
107
108diff --git a/fs/file_table.c b/fs/file_table.c
2cbb1c4b 109index 01e4c1e..0e800e2 100644
7f207e10
AM
110--- a/fs/file_table.c
111+++ b/fs/file_table.c
2cbb1c4b 112@@ -443,6 +443,8 @@ void file_sb_list_del(struct file *file)
7f207e10
AM
113 }
114 }
115
116+EXPORT_SYMBOL(file_sb_list_del);
1facf9fc 117+
7f207e10
AM
118 #ifdef CONFIG_SMP
119
120 /*
121diff --git a/fs/inode.c b/fs/inode.c
2cbb1c4b 122index 43566d1..4291eae 100644
7f207e10
AM
123--- a/fs/inode.c
124+++ b/fs/inode.c
2cbb1c4b
JR
125@@ -69,6 +69,7 @@ static DEFINE_SPINLOCK(inode_lru_lock);
126
127 __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
128 __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
129+EXPORT_SYMBOL(inode_sb_list_lock);
7f207e10
AM
130
131 /*
2cbb1c4b 132 * iprune_sem provides exclusion between the icache shrinking and the
7f207e10 133diff --git a/fs/namei.c b/fs/namei.c
53392da6 134index eb4aef1..66d04c6 100644
7f207e10
AM
135--- a/fs/namei.c
136+++ b/fs/namei.c
2cbb1c4b 137@@ -365,6 +365,7 @@ int deny_write_access(struct file * file)
7f207e10
AM
138
139 return 0;
140 }
141+EXPORT_SYMBOL(deny_write_access);
142
143 /**
144 * path_get - get a reference to a path
53392da6 145@@ -1701,6 +1702,7 @@ struct dentry *lookup_hash(struct nameidata *nd)
027c5e7a 146 {
7f207e10
AM
147 return __lookup_hash(&nd->last, nd->path.dentry, nd);
148 }
149+EXPORT_SYMBOL(lookup_hash);
150
7f207e10
AM
151 /**
152 * lookup_one_len - filesystem helper to lookup single pathname component
153diff --git a/fs/namespace.c b/fs/namespace.c
2cbb1c4b 154index fe59bd1..7d3843f 100644
7f207e10
AM
155--- a/fs/namespace.c
156+++ b/fs/namespace.c
2cbb1c4b 157@@ -1508,6 +1508,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
7f207e10
AM
158 }
159 return 0;
160 }
161+EXPORT_SYMBOL(iterate_mounts);
162
163 static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
164 {
165diff --git a/fs/notify/group.c b/fs/notify/group.c
166index d309f38..f0e9568 100644
167--- a/fs/notify/group.c
168+++ b/fs/notify/group.c
169@@ -22,6 +22,7 @@
170 #include <linux/srcu.h>
171 #include <linux/rculist.h>
172 #include <linux/wait.h>
173+#include <linux/module.h>
174
175 #include <linux/fsnotify_backend.h>
176 #include "fsnotify.h"
177@@ -70,6 +71,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
178 if (atomic_dec_and_test(&group->refcnt))
179 fsnotify_destroy_group(group);
180 }
181+EXPORT_SYMBOL(fsnotify_put_group);
182
183 /*
184 * Create a new fsnotify_group and hold a reference for the group returned.
185@@ -102,3 +104,4 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
186
187 return group;
188 }
189+EXPORT_SYMBOL(fsnotify_alloc_group);
190diff --git a/fs/notify/mark.c b/fs/notify/mark.c
2cbb1c4b 191index 252ab1f..2199b9b 100644
7f207e10
AM
192--- a/fs/notify/mark.c
193+++ b/fs/notify/mark.c
2cbb1c4b 194@@ -112,6 +112,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
7f207e10
AM
195 if (atomic_dec_and_test(&mark->refcnt))
196 mark->free_mark(mark);
197 }
198+EXPORT_SYMBOL(fsnotify_put_mark);
199
200 /*
201 * Any time a mark is getting freed we end up here.
2cbb1c4b 202@@ -189,6 +190,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
7f207e10
AM
203 if (unlikely(atomic_dec_and_test(&group->num_marks)))
204 fsnotify_final_destroy_group(group);
205 }
206+EXPORT_SYMBOL(fsnotify_destroy_mark);
207
208 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
209 {
2cbb1c4b 210@@ -276,6 +278,7 @@ err:
7f207e10
AM
211
212 return ret;
213 }
214+EXPORT_SYMBOL(fsnotify_add_mark);
215
216 /*
217 * clear any marks in a group in which mark->flags & flags is true
2cbb1c4b 218@@ -331,6 +334,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
7f207e10
AM
219 atomic_set(&mark->refcnt, 1);
220 mark->free_mark = free_mark;
221 }
222+EXPORT_SYMBOL(fsnotify_init_mark);
223
224 static int fsnotify_mark_destroy(void *ignored)
225 {
226diff --git a/fs/open.c b/fs/open.c
2cbb1c4b 227index b52cf01..c1b341c 100644
7f207e10
AM
228--- a/fs/open.c
229+++ b/fs/open.c
230@@ -60,6 +60,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
231 mutex_unlock(&dentry->d_inode->i_mutex);
232 return ret;
233 }
234+EXPORT_SYMBOL(do_truncate);
235
236 static long do_sys_truncate(const char __user *pathname, loff_t length)
237 {
238diff --git a/fs/splice.c b/fs/splice.c
2cbb1c4b 239index 19afec6..11f07f8 100644
7f207e10
AM
240--- a/fs/splice.c
241+++ b/fs/splice.c
2cbb1c4b 242@@ -1109,6 +1109,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
243
244 return splice_write(pipe, out, ppos, len, flags);
245 }
246+EXPORT_SYMBOL(do_splice_from);
247
248 /*
249 * Attempt to initiate a splice from a file to a pipe.
2cbb1c4b 250@@ -1135,6 +1136,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
7f207e10
AM
251
252 return splice_read(in, ppos, pipe, len, flags);
253 }
254+EXPORT_SYMBOL(do_splice_to);
255
256 /**
257 * splice_direct_to_actor - splices data directly between two non-pipes
258diff --git a/security/commoncap.c b/security/commoncap.c
2cbb1c4b 259index a93b3b7..024282c 100644
7f207e10
AM
260--- a/security/commoncap.c
261+++ b/security/commoncap.c
53fd41f5 262@@ -978,3 +978,4 @@ int cap_file_mmap(struct file *file, uns
94337f0d 263 }
7f207e10
AM
264 return ret;
265 }
266+EXPORT_SYMBOL(cap_file_mmap);
267diff --git a/security/device_cgroup.c b/security/device_cgroup.c
2cbb1c4b 268index 1be6826..215278c 100644
7f207e10
AM
269--- a/security/device_cgroup.c
270+++ b/security/device_cgroup.c
2cbb1c4b 271@@ -508,6 +508,7 @@ found:
7f207e10
AM
272
273 return -EPERM;
274 }
2cbb1c4b 275+EXPORT_SYMBOL(__devcgroup_inode_permission);
7f207e10
AM
276
277 int devcgroup_inode_mknod(int mode, dev_t dev)
278 {
279diff --git a/security/security.c b/security/security.c
2cbb1c4b 280index 4ba6d4c..9f64bb8 100644
7f207e10
AM
281--- a/security/security.c
282+++ b/security/security.c
2cbb1c4b 283@@ -373,6 +373,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry)
7f207e10
AM
284 return 0;
285 return security_ops->path_rmdir(dir, dentry);
286 }
287+EXPORT_SYMBOL(security_path_rmdir);
288
289 int security_path_unlink(struct path *dir, struct dentry *dentry)
290 {
2cbb1c4b 291@@ -389,6 +390,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
7f207e10
AM
292 return 0;
293 return security_ops->path_symlink(dir, dentry, old_name);
294 }
295+EXPORT_SYMBOL(security_path_symlink);
296
297 int security_path_link(struct dentry *old_dentry, struct path *new_dir,
298 struct dentry *new_dentry)
2cbb1c4b 299@@ -397,6 +399,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
7f207e10
AM
300 return 0;
301 return security_ops->path_link(old_dentry, new_dir, new_dentry);
302 }
303+EXPORT_SYMBOL(security_path_link);
304
305 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
306 struct path *new_dir, struct dentry *new_dentry)
2cbb1c4b 307@@ -415,6 +418,7 @@ int security_path_truncate(struct path *path)
7f207e10
AM
308 return 0;
309 return security_ops->path_truncate(path);
310 }
311+EXPORT_SYMBOL(security_path_truncate);
312
313 int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
314 mode_t mode)
2cbb1c4b 315@@ -423,6 +427,7 @@ int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
7f207e10
AM
316 return 0;
317 return security_ops->path_chmod(dentry, mnt, mode);
318 }
319+EXPORT_SYMBOL(security_path_chmod);
320
321 int security_path_chown(struct path *path, uid_t uid, gid_t gid)
322 {
2cbb1c4b 323@@ -430,6 +435,7 @@ int security_path_chown(struct path *path, uid_t uid, gid_t gid)
7f207e10
AM
324 return 0;
325 return security_ops->path_chown(path, uid, gid);
326 }
327+EXPORT_SYMBOL(security_path_chown);
328
329 int security_path_chroot(struct path *path)
330 {
2cbb1c4b 331@@ -506,6 +512,7 @@ int security_inode_readlink(struct dentry *dentry)
7f207e10
AM
332 return 0;
333 return security_ops->inode_readlink(dentry);
334 }
335+EXPORT_SYMBOL(security_inode_readlink);
336
337 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
338 {
2cbb1c4b 339@@ -520,6 +527,7 @@ int security_inode_permission(struct inode *inode, int mask)
7f207e10 340 return 0;
2cbb1c4b 341 return security_ops->inode_permission(inode, mask, 0);
7f207e10
AM
342 }
343+EXPORT_SYMBOL(security_inode_permission);
344
027c5e7a 345 int security_inode_exec_permission(struct inode *inode, unsigned int flags)
7f207e10 346 {
2cbb1c4b 347@@ -626,6 +634,7 @@ int security_file_permission(struct file *file, int mask)
7f207e10
AM
348
349 return fsnotify_perm(file, mask);
350 }
351+EXPORT_SYMBOL(security_file_permission);
352
353 int security_file_alloc(struct file *file)
354 {
2cbb1c4b 355@@ -653,6 +662,7 @@ int security_file_mmap(struct file *file, unsigned long reqprot,
7f207e10
AM
356 return ret;
357 return ima_file_mmap(file, prot);
358 }
359+EXPORT_SYMBOL(security_file_mmap);
360
361 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
362 unsigned long prot)
363diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
364--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
53392da6 365+++ linux/Documentation/ABI/testing/debugfs-aufs 2011-08-24 13:30:24.727980364 +0200
7f207e10
AM
366@@ -0,0 +1,37 @@
367+What: /debug/aufs/si_<id>/
368+Date: March 2009
369+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
370+Description:
371+ Under /debug/aufs, a directory named si_<id> is created
372+ per aufs mount, where <id> is a unique id generated
373+ internally.
1facf9fc 374+
7f207e10
AM
375+What: /debug/aufs/si_<id>/xib
376+Date: March 2009
377+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
378+Description:
379+ It shows the consumed blocks by xib (External Inode Number
380+ Bitmap), its block size and file size.
381+ When the aufs mount option 'noxino' is specified, it
382+ will be empty. About XINO files, see the aufs manual.
383+
384+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
385+Date: March 2009
386+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
387+Description:
388+ It shows the consumed blocks by xino (External Inode Number
389+ Translation Table), its link count, block size and file
390+ size.
391+ When the aufs mount option 'noxino' is specified, it
392+ will be empty. About XINO files, see the aufs manual.
393+
394+What: /debug/aufs/si_<id>/xigen
395+Date: March 2009
396+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
397+Description:
398+ It shows the consumed blocks by xigen (External Inode
399+ Generation Table), its block size and file size.
400+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
401+ be created.
402+ When the aufs mount option 'noxino' is specified, it
403+ will be empty. About XINO files, see the aufs manual.
404diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
405--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
53392da6 406+++ linux/Documentation/ABI/testing/sysfs-aufs 2011-08-24 13:30:24.727980364 +0200
7f207e10
AM
407@@ -0,0 +1,24 @@
408+What: /sys/fs/aufs/si_<id>/
409+Date: March 2009
410+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
411+Description:
412+ Under /sys/fs/aufs, a directory named si_<id> is created
413+ per aufs mount, where <id> is a unique id generated
414+ internally.
415+
416+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
417+Date: March 2009
418+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
419+Description:
420+ It shows the abolute path of a member directory (which
421+ is called branch) in aufs, and its permission.
422+
423+What: /sys/fs/aufs/si_<id>/xi_path
424+Date: March 2009
425+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
426+Description:
427+ It shows the abolute path of XINO (External Inode Number
428+ Bitmap, Translation Table and Generation Table) file
429+ even if it is the default path.
430+ When the aufs mount option 'noxino' is specified, it
431+ will be empty. About XINO files, see the aufs manual.
53392da6
AM
432diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
433--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
434+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2011-08-24 13:30:24.727980364 +0200
435@@ -0,0 +1,162 @@
436+
437+# Copyright (C) 2005-2011 Junjiro R. Okajima
438+#
439+# This program is free software; you can redistribute it and/or modify
440+# it under the terms of the GNU General Public License as published by
441+# the Free Software Foundation; either version 2 of the License, or
442+# (at your option) any later version.
443+#
444+# This program is distributed in the hope that it will be useful,
445+# but WITHOUT ANY WARRANTY; without even the implied warranty of
446+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
447+# GNU General Public License for more details.
448+#
449+# You should have received a copy of the GNU General Public License
450+# along with this program; if not, write to the Free Software
451+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
452+
453+Introduction
454+----------------------------------------
455+
456+aufs [ei ju: ef es] | [a u f s]
457+1. abbrev. for "advanced multi-layered unification filesystem".
458+2. abbrev. for "another unionfs".
459+3. abbrev. for "auf das" in German which means "on the" in English.
460+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
461+ But "Filesystem aufs Filesystem" is hard to understand.
462+
463+AUFS is a filesystem with features:
464+- multi layered stackable unification filesystem, the member directory
465+ is called as a branch.
466+- branch permission and attribute, 'readonly', 'real-readonly',
467+ 'readwrite', 'whiteout-able', 'link-able whiteout' and their
468+ combination.
469+- internal "file copy-on-write".
470+- logical deletion, whiteout.
471+- dynamic branch manipulation, adding, deleting and changing permission.
472+- allow bypassing aufs, user's direct branch access.
473+- external inode number translation table and bitmap which maintains the
474+ persistent aufs inode number.
475+- seekable directory, including NFS readdir.
476+- file mapping, mmap and sharing pages.
477+- pseudo-link, hardlink over branches.
478+- loopback mounted filesystem as a branch.
479+- several policies to select one among multiple writable branches.
480+- revert a single systemcall when an error occurs in aufs.
481+- and more...
482+
483+
484+Multi Layered Stackable Unification Filesystem
485+----------------------------------------------------------------------
486+Most people already knows what it is.
487+It is a filesystem which unifies several directories and provides a
488+merged single directory. When users access a file, the access will be
489+passed/re-directed/converted (sorry, I am not sure which English word is
490+correct) to the real file on the member filesystem. The member
491+filesystem is called 'lower filesystem' or 'branch' and has a mode
492+'readonly' and 'readwrite.' And the deletion for a file on the lower
493+readonly branch is handled by creating 'whiteout' on the upper writable
494+branch.
495+
496+On LKML, there have been discussions about UnionMount (Jan Blunck,
497+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
498+different approaches to implement the merged-view.
499+The former tries putting it into VFS, and the latter implements as a
500+separate filesystem.
501+(If I misunderstand about these implementations, please let me know and
502+I shall correct it. Because it is a long time ago when I read their
503+source files last time).
504+
505+UnionMount's approach will be able to small, but may be hard to share
506+branches between several UnionMount since the whiteout in it is
507+implemented in the inode on branch filesystem and always
508+shared. According to Bharata's post, readdir does not seems to be
509+finished yet.
510+There are several missing features known in this implementations such as
511+- for users, the inode number may change silently. eg. copy-up.
512+- link(2) may break by copy-up.
513+- read(2) may get an obsoleted filedata (fstat(2) too).
514+- fcntl(F_SETLK) may be broken by copy-up.
515+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
516+ open(O_RDWR).
517+
518+Unionfs has a longer history. When I started implementing a stacking filesystem
519+(Aug 2005), it already existed. It has virtual super_block, inode,
520+dentry and file objects and they have an array pointing lower same kind
521+objects. After contributing many patches for Unionfs, I re-started my
522+project AUFS (Jun 2006).
523+
524+In AUFS, the structure of filesystem resembles to Unionfs, but I
525+implemented my own ideas, approaches and enhancements and it became
526+totally different one.
527+
528+Comparing DM snapshot and fs based implementation
529+- the number of bytes to be copied between devices is much smaller.
530+- the type of filesystem must be one and only.
531+- the fs must be writable, no readonly fs, even for the lower original
532+ device. so the compression fs will not be usable. but if we use
533+ loopback mount, we may address this issue.
534+ for instance,
535+ mount /cdrom/squashfs.img /sq
536+ losetup /sq/ext2.img
537+ losetup /somewhere/cow
538+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
539+- it will be difficult (or needs more operations) to extract the
540+ difference between the original device and COW.
541+- DM snapshot-merge may help a lot when users try merging. in the
542+ fs-layer union, users will use rsync(1).
543+
544+
545+Several characters/aspects of aufs
546+----------------------------------------------------------------------
547+
548+Aufs has several characters or aspects.
549+1. a filesystem, callee of VFS helper
550+2. sub-VFS, caller of VFS helper for branches
551+3. a virtual filesystem which maintains persistent inode number
552+4. reader/writer of files on branches such like an application
553+
554+1. Callee of VFS Helper
555+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
556+unlink(2) from an application reaches sys_unlink() kernel function and
557+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
558+calls filesystem specific unlink operation. Actually aufs implements the
559+unlink operation but it behaves like a redirector.
560+
561+2. Caller of VFS Helper for Branches
562+aufs_unlink() passes the unlink request to the branch filesystem as if
563+it were called from VFS. So the called unlink operation of the branch
564+filesystem acts as usual. As a caller of VFS helper, aufs should handle
565+every necessary pre/post operation for the branch filesystem.
566+- acquire the lock for the parent dir on a branch
567+- lookup in a branch
568+- revalidate dentry on a branch
569+- mnt_want_write() for a branch
570+- vfs_unlink() for a branch
571+- mnt_drop_write() for a branch
572+- release the lock on a branch
573+
574+3. Persistent Inode Number
575+One of the most important issue for a filesystem is to maintain inode
576+numbers. This is particularly important to support exporting a
577+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
578+backend block device for its own. But some storage is necessary to
579+maintain inode number. It may be a large space and may not suit to keep
580+in memory. Aufs rents some space from its first writable branch
581+filesystem (by default) and creates file(s) on it. These files are
582+created by aufs internally and removed soon (currently) keeping opened.
583+Note: Because these files are removed, they are totally gone after
584+ unmounting aufs. It means the inode numbers are not persistent
585+ across unmount or reboot. I have a plan to make them really
586+ persistent which will be important for aufs on NFS server.
587+
588+4. Read/Write Files Internally (copy-on-write)
589+Because a branch can be readonly, when you write a file on it, aufs will
590+"copy-up" it to the upper writable branch internally. And then write the
591+originally requested thing to the file. Generally kernel doesn't
592+open/read/write file actively. In aufs, even a single write may cause a
593+internal "file copy". This behaviour is very similar to cp(1) command.
594+
595+Some people may think it is better to pass such work to user space
596+helper, instead of doing in kernel space. Actually I am still thinking
597+about it. But currently I have implemented it in kernel space.
598diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
599--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
600+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2011-08-24 13:30:24.727980364 +0200
601@@ -0,0 +1,226 @@
602+
603+# Copyright (C) 2005-2011 Junjiro R. Okajima
604+#
605+# This program is free software; you can redistribute it and/or modify
606+# it under the terms of the GNU General Public License as published by
607+# the Free Software Foundation; either version 2 of the License, or
608+# (at your option) any later version.
609+#
610+# This program is distributed in the hope that it will be useful,
611+# but WITHOUT ANY WARRANTY; without even the implied warranty of
612+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
613+# GNU General Public License for more details.
614+#
615+# You should have received a copy of the GNU General Public License
616+# along with this program; if not, write to the Free Software
617+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
618+
619+Basic Aufs Internal Structure
620+
621+Superblock/Inode/Dentry/File Objects
622+----------------------------------------------------------------------
623+As like an ordinary filesystem, aufs has its own
624+superblock/inode/dentry/file objects. All these objects have a
625+dynamically allocated array and store the same kind of pointers to the
626+lower filesystem, branch.
627+For example, when you build a union with one readwrite branch and one
628+readonly, mounted /au, /rw and /ro respectively.
629+- /au = /rw + /ro
630+- /ro/fileA exists but /rw/fileA
631+
632+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
633+pointers are stored in a aufs dentry. The array in aufs dentry will be,
634+- [0] = NULL
635+- [1] = /ro/fileA
636+
637+This style of an array is essentially same to the aufs
638+superblock/inode/dentry/file objects.
639+
640+Because aufs supports manipulating branches, ie. add/delete/change
641+dynamically, these objects has its own generation. When branches are
642+changed, the generation in aufs superblock is incremented. And a
643+generation in other object are compared when it is accessed.
644+When a generation in other objects are obsoleted, aufs refreshes the
645+internal array.
646+
647+
648+Superblock
649+----------------------------------------------------------------------
650+Additionally aufs superblock has some data for policies to select one
651+among multiple writable branches, XIB files, pseudo-links and kobject.
652+See below in detail.
653+About the policies which supports copy-down a directory, see policy.txt
654+too.
655+
656+
657+Branch and XINO(External Inode Number Translation Table)
658+----------------------------------------------------------------------
659+Every branch has its own xino (external inode number translation table)
660+file. The xino file is created and unlinked by aufs internally. When two
661+members of a union exist on the same filesystem, they share the single
662+xino file.
663+The struct of a xino file is simple, just a sequence of aufs inode
664+numbers which is indexed by the lower inode number.
665+In the above sample, assume the inode number of /ro/fileA is i111 and
666+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
667+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
668+
669+When the inode numbers are not contiguous, the xino file will be sparse
670+which has a hole in it and doesn't consume as much disk space as it
671+might appear. If your branch filesystem consumes disk space for such
672+holes, then you should specify 'xino=' option at mounting aufs.
673+
674+Also a writable branch has three kinds of "whiteout bases". All these
675+are existed when the branch is joined to aufs and the names are
676+whiteout-ed doubly, so that users will never see their names in aufs
677+hierarchy.
678+1. a regular file which will be linked to all whiteouts.
679+2. a directory to store a pseudo-link.
680+3. a directory to store an "orphan-ed" file temporary.
681+
682+1. Whiteout Base
683+ When you remove a file on a readonly branch, aufs handles it as a
684+ logical deletion and creates a whiteout on the upper writable branch
685+ as a hardlink of this file in order not to consume inode on the
686+ writable branch.
687+2. Pseudo-link Dir
688+ See below, Pseudo-link.
689+3. Step-Parent Dir
690+ When "fileC" exists on the lower readonly branch only and it is
691+ opened and removed with its parent dir, and then user writes
692+ something into it, then aufs copies-up fileC to this
693+ directory. Because there is no other dir to store fileC. After
694+ creating a file under this dir, the file is unlinked.
695+
696+Because aufs supports manipulating branches, ie. add/delete/change
697+dynamically, a branch has its own id. When the branch order changes, aufs
698+finds the new index by searching the branch id.
699+
700+
701+Pseudo-link
702+----------------------------------------------------------------------
703+Assume "fileA" exists on the lower readonly branch only and it is
704+hardlinked to "fileB" on the branch. When you write something to fileA,
705+aufs copies-up it to the upper writable branch. Additionally aufs
706+creates a hardlink under the Pseudo-link Directory of the writable
707+branch. The inode of a pseudo-link is kept in aufs super_block as a
708+simple list. If fileB is read after unlinking fileA, aufs returns
709+filedata from the pseudo-link instead of the lower readonly
710+branch. Because the pseudo-link is based upon the inode, to keep the
711+inode number by xino (see above) is important.
712+
713+All the hardlinks under the Pseudo-link Directory of the writable branch
714+should be restored in a proper location later. Aufs provides a utility
715+to do this. The userspace helpers executed at remounting and unmounting
716+aufs by default.
717+During this utility is running, it puts aufs into the pseudo-link
718+maintenance mode. In this mode, only the process which began the
719+maintenance mode (and its child processes) is allowed to operate in
720+aufs. Some other processes which are not related to the pseudo-link will
721+be allowed to run too, but the rest have to return an error or wait
722+until the maintenance mode ends. If a process already acquires an inode
723+mutex (in VFS), it has to return an error.
724+
725+
726+XIB(external inode number bitmap)
727+----------------------------------------------------------------------
728+Addition to the xino file per a branch, aufs has an external inode number
729+bitmap in a superblock object. It is also a file such like a xino file.
730+It is a simple bitmap to mark whether the aufs inode number is in-use or
731+not.
732+To reduce the file I/O, aufs prepares a single memory page to cache xib.
733+
734+Aufs implements a feature to truncate/refresh both of xino and xib to
735+reduce the number of consumed disk blocks for these files.
736+
737+
738+Virtual or Vertical Dir, and Readdir in Userspace
739+----------------------------------------------------------------------
740+In order to support multiple layers (branches), aufs readdir operation
741+constructs a virtual dir block on memory. For readdir, aufs calls
742+vfs_readdir() internally for each dir on branches, merges their entries
743+with eliminating the whiteout-ed ones, and sets it to file (dir)
744+object. So the file object has its entry list until it is closed. The
745+entry list will be updated when the file position is zero and becomes
746+old. This decision is made in aufs automatically.
747+
748+The dynamically allocated memory block for the name of entries has a
749+unit of 512 bytes (by default) and stores the names contiguously (no
750+padding). Another block for each entry is handled by kmem_cache too.
751+During building dir blocks, aufs creates hash list and judging whether
752+the entry is whiteouted by its upper branch or already listed.
753+The merged result is cached in the corresponding inode object and
754+maintained by a customizable life-time option.
755+
756+Some people may call it can be a security hole or invite DoS attack
757+since the opened and once readdir-ed dir (file object) holds its entry
758+list and becomes a pressure for system memory. But I'd say it is similar
759+to files under /proc or /sys. The virtual files in them also holds a
760+memory page (generally) while they are opened. When an idea to reduce
761+memory for them is introduced, it will be applied to aufs too.
762+For those who really hate this situation, I've developed readdir(3)
763+library which operates this merging in userspace. You just need to set
764+LD_PRELOAD environment variable, and aufs will not consume no memory in
765+kernel space for readdir(3).
766+
767+
768+Workqueue
769+----------------------------------------------------------------------
770+Aufs sometimes requires privilege access to a branch. For instance,
771+in copy-up/down operation. When a user process is going to make changes
772+to a file which exists in the lower readonly branch only, and the mode
773+of one of ancestor directories may not be writable by a user
774+process. Here aufs copy-up the file with its ancestors and they may
775+require privilege to set its owner/group/mode/etc.
776+This is a typical case of a application character of aufs (see
777+Introduction).
778+
779+Aufs uses workqueue synchronously for this case. It creates its own
780+workqueue. The workqueue is a kernel thread and has privilege. Aufs
781+passes the request to call mkdir or write (for example), and wait for
782+its completion. This approach solves a problem of a signal handler
783+simply.
784+If aufs didn't adopt the workqueue and changed the privilege of the
785+process, and if the mkdir/write call arises SIGXFSZ or other signal,
786+then the user process might gain a privilege or the generated core file
787+was owned by a superuser.
788+
789+Also aufs uses the system global workqueue ("events" kernel thread) too
790+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
791+whiteout base and etc. This is unrelated to a privilege.
792+Most of aufs operation tries acquiring a rw_semaphore for aufs
793+superblock at the beginning, at the same time waits for the completion
794+of all queued asynchronous tasks.
795+
796+
797+Whiteout
798+----------------------------------------------------------------------
799+The whiteout in aufs is very similar to Unionfs's. That is represented
800+by its filename. UnionMount takes an approach of a file mode, but I am
801+afraid several utilities (find(1) or something) will have to support it.
802+
803+Basically the whiteout represents "logical deletion" which stops aufs to
804+lookup further, but also it represents "dir is opaque" which also stop
805+lookup.
806+
807+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
808+In order to make several functions in a single systemcall to be
809+revertible, aufs adopts an approach to rename a directory to a temporary
810+unique whiteouted name.
811+For example, in rename(2) dir where the target dir already existed, aufs
812+renames the target dir to a temporary unique whiteouted name before the
813+actual rename on a branch and then handles other actions (make it opaque,
814+update the attributes, etc). If an error happens in these actions, aufs
815+simply renames the whiteouted name back and returns an error. If all are
816+succeeded, aufs registers a function to remove the whiteouted unique
817+temporary name completely and asynchronously to the system global
818+workqueue.
819+
820+
821+Copy-up
822+----------------------------------------------------------------------
823+It is a well-known feature or concept.
824+When user modifies a file on a readonly branch, aufs operate "copy-up"
825+internally and makes change to the new file on the upper writable branch.
826+When the trigger systemcall does not update the timestamps of the parent
827+dir, aufs reverts it after copy-up.
828diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
829--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
830+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2011-08-24 13:30:24.727980364 +0200
831@@ -0,0 +1,106 @@
832+
833+# Copyright (C) 2005-2011 Junjiro R. Okajima
834+#
835+# This program is free software; you can redistribute it and/or modify
836+# it under the terms of the GNU General Public License as published by
837+# the Free Software Foundation; either version 2 of the License, or
838+# (at your option) any later version.
839+#
840+# This program is distributed in the hope that it will be useful,
841+# but WITHOUT ANY WARRANTY; without even the implied warranty of
842+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
843+# GNU General Public License for more details.
844+#
845+# You should have received a copy of the GNU General Public License
846+# along with this program; if not, write to the Free Software
847+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
848+
849+Lookup in a Branch
850+----------------------------------------------------------------------
851+Since aufs has a character of sub-VFS (see Introduction), it operates
852+lookup for branches as VFS does. It may be a heavy work. Generally
853+speaking struct nameidata is a bigger structure and includes many
854+information. But almost all lookup operation in aufs is the simplest
855+case, ie. lookup only an entry directly connected to its parent. Digging
856+down the directory hierarchy is unnecessary.
857+
858+VFS has a function lookup_one_len() for that use, but it is not usable
859+for a branch filesystem which requires struct nameidata. So aufs
860+implements a simple lookup wrapper function. When a branch filesystem
861+allows NULL as nameidata, it calls lookup_one_len(). Otherwise it builds
862+a simplest nameidata and calls lookup_hash().
863+Here aufs applies "a principle in NFSD", ie. if the filesystem supports
864+NFS-export, then it has to support NULL as a nameidata parameter for
865+->create(), ->lookup() and ->d_revalidate(). So the lookup wrapper in
866+aufs tests if ->s_export_op in the branch is NULL or not.
867+
868+When a branch is a remote filesystem, aufs basically trusts its
869+->d_revalidate(), also aufs forces the hardest revalidate tests for
870+them.
871+For d_revalidate, aufs implements three levels of revalidate tests. See
872+"Revalidate Dentry and UDBA" in detail.
873+
874+
875+Loopback Mount
876+----------------------------------------------------------------------
877+Basically aufs supports any type of filesystem and block device for a
878+branch (actually there are some exceptions). But it is prohibited to add
879+a loopback mounted one whose backend file exists in a filesystem which is
880+already added to aufs. The reason is to protect aufs from a recursive
881+lookup. If it was allowed, the aufs lookup operation might re-enter a
882+lookup for the loopback mounted branch in the same context, and will
883+cause a deadlock.
884+
885+
886+Revalidate Dentry and UDBA (User's Direct Branch Access)
887+----------------------------------------------------------------------
888+Generally VFS helpers re-validate a dentry as a part of lookup.
889+0. digging down the directory hierarchy.
890+1. lock the parent dir by its i_mutex.
891+2. lookup the final (child) entry.
892+3. revalidate it.
893+4. call the actual operation (create, unlink, etc.)
894+5. unlock the parent dir
895+
896+If the filesystem implements its ->d_revalidate() (step 3), then it is
897+called. Actually aufs implements it and checks the dentry on a branch is
898+still valid.
899+But it is not enough. Because aufs has to release the lock for the
900+parent dir on a branch at the end of ->lookup() (step 2) and
901+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
902+held by VFS.
903+If the file on a branch is changed directly, eg. bypassing aufs, after
904+aufs released the lock, then the subsequent operation may cause
905+something unpleasant result.
906+
907+This situation is a result of VFS architecture, ->lookup() and
908+->d_revalidate() is separated. But I never say it is wrong. It is a good
909+design from VFS's point of view. It is just not suitable for sub-VFS
910+character in aufs.
911+
912+Aufs supports such case by three level of revalidation which is
913+selectable by user.
914+1. Simple Revalidate
915+ Addition to the native flow in VFS's, confirm the child-parent
916+ relationship on the branch just after locking the parent dir on the
917+ branch in the "actual operation" (step 4). When this validation
918+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
919+ checks the validation of the dentry on branches.
920+2. Monitor Changes Internally by Inotify/Fsnotify
921+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
922+ the dentry on the branch, and returns EBUSY if it finds different
923+ dentry.
924+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
925+ during it is in cache. When the event is notified, aufs registers a
926+ function to kernel 'events' thread by schedule_work(). And the
927+ function sets some special status to the cached aufs dentry and inode
928+ private data. If they are not cached, then aufs has nothing to
929+ do. When the same file is accessed through aufs (step 0-3) later,
930+ aufs will detect the status and refresh all necessary data.
931+ In this mode, aufs has to ignore the event which is fired by aufs
932+ itself.
933+3. No Extra Validation
934+ This is the simplest test and doesn't add any additional revalidation
935+ test, and skip therevalidatin in step 4. It is useful and improves
936+ aufs performance when system surely hide the aufs branches from user,
937+ by over-mounting something (or another method).
938diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
939--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
940+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2011-08-24 13:30:24.727980364 +0200
941@@ -0,0 +1,76 @@
942+
943+# Copyright (C) 2005-2011 Junjiro R. Okajima
944+#
945+# This program is free software; you can redistribute it and/or modify
946+# it under the terms of the GNU General Public License as published by
947+# the Free Software Foundation; either version 2 of the License, or
948+# (at your option) any later version.
949+#
950+# This program is distributed in the hope that it will be useful,
951+# but WITHOUT ANY WARRANTY; without even the implied warranty of
952+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
953+# GNU General Public License for more details.
954+#
955+# You should have received a copy of the GNU General Public License
956+# along with this program; if not, write to the Free Software
957+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
958+
959+Branch Manipulation
960+
961+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
962+and changing its permission/attribute, there are a lot of works to do.
963+
964+
965+Add a Branch
966+----------------------------------------------------------------------
967+o Confirm the adding dir exists outside of aufs, including loopback
968+ mount.
969+- and other various attributes...
970+o Initialize the xino file and whiteout bases if necessary.
971+ See struct.txt.
972+
973+o Check the owner/group/mode of the directory
974+ When the owner/group/mode of the adding directory differs from the
975+ existing branch, aufs issues a warning because it may impose a
976+ security risk.
977+ For example, when a upper writable branch has a world writable empty
978+ top directory, a malicious user can create any files on the writable
979+ branch directly, like copy-up and modify manually. If something like
980+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
981+ writable branch, and the writable branch is world-writable, then a
982+ malicious guy may create /etc/passwd on the writable branch directly
983+ and the infected file will be valid in aufs.
984+ I am afraid it can be a security issue, but nothing to do except
985+ producing a warning.
986+
987+
988+Delete a Branch
989+----------------------------------------------------------------------
990+o Confirm the deleting branch is not busy
991+ To be general, there is one merit to adopt "remount" interface to
992+ manipulate branches. It is to discard caches. At deleting a branch,
993+ aufs checks the still cached (and connected) dentries and inodes. If
994+ there are any, then they are all in-use. An inode without its
995+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
996+
997+ For the cached one, aufs checks whether the same named entry exists on
998+ other branches.
999+ If the cached one is a directory, because aufs provides a merged view
1000+ to users, as long as one dir is left on any branch aufs can show the
1001+ dir to users. In this case, the branch can be removed from aufs.
1002+ Otherwise aufs rejects deleting the branch.
1003+
1004+ If any file on the deleting branch is opened by aufs, then aufs
1005+ rejects deleting.
1006+
1007+
1008+Modify the Permission of a Branch
1009+----------------------------------------------------------------------
1010+o Re-initialize or remove the xino file and whiteout bases if necessary.
1011+ See struct.txt.
1012+
1013+o rw --> ro: Confirm the modifying branch is not busy
1014+ Aufs rejects the request if any of these conditions are true.
1015+ - a file on the branch is mmap-ed.
1016+ - a regular file on the branch is opened for write and there is no
1017+ same named entry on the upper branch.
1018diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1019--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
1020+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2011-08-24 13:30:24.727980364 +0200
1021@@ -0,0 +1,65 @@
1022+
1023+# Copyright (C) 2005-2011 Junjiro R. Okajima
1024+#
1025+# This program is free software; you can redistribute it and/or modify
1026+# it under the terms of the GNU General Public License as published by
1027+# the Free Software Foundation; either version 2 of the License, or
1028+# (at your option) any later version.
1029+#
1030+# This program is distributed in the hope that it will be useful,
1031+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1032+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1033+# GNU General Public License for more details.
1034+#
1035+# You should have received a copy of the GNU General Public License
1036+# along with this program; if not, write to the Free Software
1037+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1038+
1039+Policies to Select One among Multiple Writable Branches
1040+----------------------------------------------------------------------
1041+When the number of writable branch is more than one, aufs has to decide
1042+the target branch for file creation or copy-up. By default, the highest
1043+writable branch which has the parent (or ancestor) dir of the target
1044+file is chosen (top-down-parent policy).
1045+By user's request, aufs implements some other policies to select the
1046+writable branch, for file creation two policies, round-robin and
1047+most-free-space policies. For copy-up three policies, top-down-parent,
1048+bottom-up-parent and bottom-up policies.
1049+
1050+As expected, the round-robin policy selects the branch in circular. When
1051+you have two writable branches and creates 10 new files, 5 files will be
1052+created for each branch. mkdir(2) systemcall is an exception. When you
1053+create 10 new directories, all will be created on the same branch.
1054+And the most-free-space policy selects the one which has most free
1055+space among the writable branches. The amount of free space will be
1056+checked by aufs internally, and users can specify its time interval.
1057+
1058+The policies for copy-up is more simple,
1059+top-down-parent is equivalent to the same named on in create policy,
1060+bottom-up-parent selects the writable branch where the parent dir
1061+exists and the nearest upper one from the copyup-source,
1062+bottom-up selects the nearest upper writable branch from the
1063+copyup-source, regardless the existence of the parent dir.
1064+
1065+There are some rules or exceptions to apply these policies.
1066+- If there is a readonly branch above the policy-selected branch and
1067+ the parent dir is marked as opaque (a variation of whiteout), or the
1068+ target (creating) file is whiteout-ed on the upper readonly branch,
1069+ then the result of the policy is ignored and the target file will be
1070+ created on the nearest upper writable branch than the readonly branch.
1071+- If there is a writable branch above the policy-selected branch and
1072+ the parent dir is marked as opaque or the target file is whiteouted
1073+ on the branch, then the result of the policy is ignored and the target
1074+ file will be created on the highest one among the upper writable
1075+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1076+ it as usual.
1077+- link(2) and rename(2) systemcalls are exceptions in every policy.
1078+ They try selecting the branch where the source exists as possible
1079+ since copyup a large file will take long time. If it can't be,
1080+ ie. the branch where the source exists is readonly, then they will
1081+ follow the copyup policy.
1082+- There is an exception for rename(2) when the target exists.
1083+ If the rename target exists, aufs compares the index of the branches
1084+ where the source and the target exists and selects the higher
1085+ one. If the selected branch is readonly, then aufs follows the
1086+ copyup policy.
1087diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
1088--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
1089+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2011-08-24 13:30:24.727980364 +0200
1090@@ -0,0 +1,47 @@
1091+
1092+# Copyright (C) 2005-2011 Junjiro R. Okajima
1093+#
1094+# This program is free software; you can redistribute it and/or modify
1095+# it under the terms of the GNU General Public License as published by
1096+# the Free Software Foundation; either version 2 of the License, or
1097+# (at your option) any later version.
1098+#
1099+# This program is distributed in the hope that it will be useful,
1100+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1101+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1102+# GNU General Public License for more details.
1103+#
1104+# You should have received a copy of the GNU General Public License
1105+# along with this program; if not, write to the Free Software
1106+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1107+
1108+mmap(2) -- File Memory Mapping
1109+----------------------------------------------------------------------
1110+In aufs, the file-mapped pages are handled by a branch fs directly, no
1111+interaction with aufs. It means aufs_mmap() calls the branch fs's
1112+->mmap().
1113+This approach is simple and good, but there is one problem.
1114+Under /proc, several entries show the mmap-ped files by its path (with
1115+device and inode number), and the printed path will be the path on the
1116+branch fs's instead of virtual aufs's.
1117+This is not a problem in most cases, but some utilities lsof(1) (and its
1118+user) may expect the path on aufs.
1119+
1120+To address this issue, aufs adds a new member called vm_prfile in struct
1121+vm_area_struct (and struct vm_region). The original vm_file points to
1122+the file on the branch fs in order to handle everything correctly as
1123+usual. The new vm_prfile points to a virtual file in aufs, and the
1124+show-functions in procfs refers to vm_prfile if it is set.
1125+Also we need to maintain several other places where touching vm_file
1126+such like
1127+- fork()/clone() copies vma and the reference count of vm_file is
1128+ incremented.
1129+- merging vma maintains the ref count too.
1130+
1131+This is not a good approach. It just faking the printed path. But it
1132+leaves all behaviour around f_mapping unchanged. This is surely an
1133+advantage.
1134+Actually aufs had adopted another complicated approach which calls
1135+generic_file_mmap() and handles struct vm_operations_struct. In this
1136+approach, aufs met a hard problem and I could not solve it without
1137+switching the approach.
1138diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
1139--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
1140+++ linux/Documentation/filesystems/aufs/design/07export.txt 2011-08-24 13:30:24.727980364 +0200
1141@@ -0,0 +1,59 @@
1142+
1143+# Copyright (C) 2005-2011 Junjiro R. Okajima
1144+#
1145+# This program is free software; you can redistribute it and/or modify
1146+# it under the terms of the GNU General Public License as published by
1147+# the Free Software Foundation; either version 2 of the License, or
1148+# (at your option) any later version.
1149+#
1150+# This program is distributed in the hope that it will be useful,
1151+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1152+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1153+# GNU General Public License for more details.
1154+#
1155+# You should have received a copy of the GNU General Public License
1156+# along with this program; if not, write to the Free Software
1157+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1158+
1159+Export Aufs via NFS
1160+----------------------------------------------------------------------
1161+Here is an approach.
1162+- like xino/xib, add a new file 'xigen' which stores aufs inode
1163+ generation.
1164+- iget_locked(): initialize aufs inode generation for a new inode, and
1165+ store it in xigen file.
1166+- destroy_inode(): increment aufs inode generation and store it in xigen
1167+ file. it is necessary even if it is not unlinked, because any data of
1168+ inode may be changed by UDBA.
1169+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
1170+ build file handle by
1171+ + branch id (4 bytes)
1172+ + superblock generation (4 bytes)
1173+ + inode number (4 or 8 bytes)
1174+ + parent dir inode number (4 or 8 bytes)
1175+ + inode generation (4 bytes))
1176+ + return value of exportfs_encode_fh() for the parent on a branch (4
1177+ bytes)
1178+ + file handle for a branch (by exportfs_encode_fh())
1179+- fh_to_dentry():
1180+ + find the index of a branch from its id in handle, and check it is
1181+ still exist in aufs.
1182+ + 1st level: get the inode number from handle and search it in cache.
1183+ + 2nd level: if not found, get the parent inode number from handle and
1184+ search it in cache. and then open the parent dir, find the matching
1185+ inode number by vfs_readdir() and get its name, and call
1186+ lookup_one_len() for the target dentry.
1187+ + 3rd level: if the parent dir is not cached, call
1188+ exportfs_decode_fh() for a branch and get the parent on a branch,
1189+ build a pathname of it, convert it a pathname in aufs, call
1190+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
1191+ the 2nd level.
1192+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
1193+ for every branch, but not itself. to get this, (currently) aufs
1194+ searches in current->nsproxy->mnt_ns list. it may not be a good
1195+ idea, but I didn't get other approach.
1196+ + test the generation of the gotten inode.
1197+- every inode operation: they may get EBUSY due to UDBA. in this case,
1198+ convert it into ESTALE for NFSD.
1199+- readdir(): call lockdep_on/off() because filldir in NFSD calls
1200+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
1201diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
1202--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
1203+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2011-08-24 13:30:24.727980364 +0200
1204@@ -0,0 +1,53 @@
1205+
1206+# Copyright (C) 2005-2011 Junjiro R. Okajima
1207+#
1208+# This program is free software; you can redistribute it and/or modify
1209+# it under the terms of the GNU General Public License as published by
1210+# the Free Software Foundation; either version 2 of the License, or
1211+# (at your option) any later version.
1212+#
1213+# This program is distributed in the hope that it will be useful,
1214+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1215+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1216+# GNU General Public License for more details.
1217+#
1218+# You should have received a copy of the GNU General Public License
1219+# along with this program; if not, write to the Free Software
1220+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1221+
1222+Show Whiteout Mode (shwh)
1223+----------------------------------------------------------------------
1224+Generally aufs hides the name of whiteouts. But in some cases, to show
1225+them is very useful for users. For instance, creating a new middle layer
1226+(branch) by merging existing layers.
1227+
1228+(borrowing aufs1 HOW-TO from a user, Michael Towers)
1229+When you have three branches,
1230+- Bottom: 'system', squashfs (underlying base system), read-only
1231+- Middle: 'mods', squashfs, read-only
1232+- Top: 'overlay', ram (tmpfs), read-write
1233+
1234+The top layer is loaded at boot time and saved at shutdown, to preserve
1235+the changes made to the system during the session.
1236+When larger changes have been made, or smaller changes have accumulated,
1237+the size of the saved top layer data grows. At this point, it would be
1238+nice to be able to merge the two overlay branches ('mods' and 'overlay')
1239+and rewrite the 'mods' squashfs, clearing the top layer and thus
1240+restoring save and load speed.
1241+
1242+This merging is simplified by the use of another aufs mount, of just the
1243+two overlay branches using the 'shwh' option.
1244+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
1245+ aufs /livesys/merge_union
1246+
1247+A merged view of these two branches is then available at
1248+/livesys/merge_union, and the new feature is that the whiteouts are
1249+visible!
1250+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
1251+writing to all branches. Also the default mode for all branches is 'ro'.
1252+It is now possible to save the combined contents of the two overlay
1253+branches to a new squashfs, e.g.:
1254+# mksquashfs /livesys/merge_union /path/to/newmods.squash
1255+
1256+This new squashfs archive can be stored on the boot device and the
1257+initramfs will use it to replace the old one at the next boot.
1258diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
1259--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
1260+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2011-08-24 13:30:24.727980364 +0200
1261@@ -0,0 +1,47 @@
1262+
1263+# Copyright (C) 2010-2011 Junjiro R. Okajima
1264+#
1265+# This program is free software; you can redistribute it and/or modify
1266+# it under the terms of the GNU General Public License as published by
1267+# the Free Software Foundation; either version 2 of the License, or
1268+# (at your option) any later version.
1269+#
1270+# This program is distributed in the hope that it will be useful,
1271+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1272+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1273+# GNU General Public License for more details.
1274+#
1275+# You should have received a copy of the GNU General Public License
1276+# along with this program; if not, write to the Free Software
1277+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1278+
1279+Dynamically customizable FS operations
1280+----------------------------------------------------------------------
1281+Generally FS operations (struct inode_operations, struct
1282+address_space_operations, struct file_operations, etc.) are defined as
1283+"static const", but it never means that FS have only one set of
1284+operation. Some FS have multiple sets of them. For instance, ext2 has
1285+three sets, one for XIP, for NOBH, and for normal.
1286+Since aufs overrides and redirects these operations, sometimes aufs has
1287+to change its behaviour according to the branch FS type. More imporantly
1288+VFS acts differently if a function (member in the struct) is set or
1289+not. It means aufs should have several sets of operations and select one
1290+among them according to the branch FS definition.
1291+
1292+In order to solve this problem and not to affect the behavour of VFS,
1293+aufs defines these operations dynamically. For instance, aufs defines
1294+aio_read function for struct file_operations, but it may not be set to
1295+the file_operations. When the branch FS doesn't have it, aufs doesn't
1296+set it to its file_operations while the function definition itself is
1297+still alive. So the behaviour of io_submit(2) will not change, and it
1298+will return an error when aio_read is not defined.
1299+
1300+The lifetime of these dynamically generated operation object is
1301+maintained by aufs branch object. When the branch is removed from aufs,
1302+the reference counter of the object is decremented. When it reaches
1303+zero, the dynamically generated operation object will be freed.
1304+
1305+This approach is designed to support AIO (io_submit), Direcit I/O and
1306+XIP mainly.
1307+Currently this approach is applied to file_operations and
1308+vm_operations_struct for regular files only.
1309diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/99plan.txt linux/Documentation/filesystems/aufs/design/99plan.txt
1310--- /usr/share/empty/Documentation/filesystems/aufs/design/99plan.txt 1970-01-01 01:00:00.000000000 +0100
1311+++ linux/Documentation/filesystems/aufs/design/99plan.txt 2011-08-24 13:30:24.727980364 +0200
1312@@ -0,0 +1,96 @@
1313+
1314+# Copyright (C) 2005-2011 Junjiro R. Okajima
1315+#
1316+# This program is free software; you can redistribute it and/or modify
1317+# it under the terms of the GNU General Public License as published by
1318+# the Free Software Foundation; either version 2 of the License, or
1319+# (at your option) any later version.
1320+#
1321+# This program is distributed in the hope that it will be useful,
1322+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1323+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1324+# GNU General Public License for more details.
1325+#
1326+# You should have received a copy of the GNU General Public License
1327+# along with this program; if not, write to the Free Software
1328+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1329+
1330+Plan
1331+
1332+Restoring some features which was implemented in aufs1.
1333+They were dropped in aufs2 in order to make source files simpler and
1334+easier to be reviewed.
1335+
1336+
1337+Test Only the Highest One for the Directory Permission (dirperm1 option)
1338+----------------------------------------------------------------------
1339+Let's try case study.
1340+- aufs has two branches, upper readwrite and lower readonly.
1341+ /au = /rw + /ro
1342+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1343+- user invoked "chmod a+rx /au/dirA"
1344+- then "dirA" becomes world readable?
1345+
1346+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1347+or it may be a natively readonly filesystem. If aufs respects the lower
1348+branch, it should not respond readdir request from other users. But user
1349+allowed it by chmod. Should really aufs rejects showing the entries
1350+under /ro/dirA?
1351+
1352+To be honest, I don't have a best solution for this case. So I
1353+implemented 'dirperm1' and 'nodirperm1' option in aufs1, and leave it to
1354+users.
1355+When dirperm1 is specified, aufs checks only the highest one for the
1356+directory permission, and shows the entries. Otherwise, as usual, checks
1357+every dir existing on all branches and rejects the request.
1358+
1359+As a side effect, dirperm1 option improves the performance of aufs
1360+because the number of permission check is reduced.
1361+
1362+
1363+Being Another Aufs's Readonly Branch (robr)
1364+----------------------------------------------------------------------
1365+Aufs1 allows aufs to be another aufs's readonly branch.
1366+This feature was developed by a user's request. But it may not be used
1367+currecnly.
1368+
1369+
1370+Copy-up on Open (coo=)
1371+----------------------------------------------------------------------
1372+By default the internal copy-up is executed when it is really necessary.
1373+It is not done when a file is opened for writing, but when write(2) is
1374+done. Users who have many (over 100) branches want to know and analyse
1375+when and what file is copied-up. To insert a new upper branch which
1376+contains such files only may improve the performance of aufs.
1377+
1378+Aufs1 implemented "coo=none | leaf | all" option.
1379+
1380+
1381+Refresh the Opened File (refrof)
1382+----------------------------------------------------------------------
1383+This option is implemented in aufs1 but incomplete.
1384+
1385+When user reads from a file, he expects to get its latest filedata
1386+generally. If the file is removed and a new same named file is created,
1387+the content he gets is unchanged, ie. the unlinked filedata.
1388+
1389+Let's try case study again.
1390+- aufs has two branches.
1391+ /au = /rw + /ro
1392+- "fileA" exists under /ro, but /rw.
1393+- user opened "/au/fileA".
1394+- he or someone else inserts a branch (/new) between /rw and /ro.
1395+ /au = /rw + /new + /ro
1396+- the new branch has "fileA".
1397+- user reads from the opened "fileA"
1398+- which filedata should aufs return, from /ro or /new?
1399+
1400+Some people says it has to be "from /ro" and it is a semantics of Unix.
1401+The others say it should be "from /new" because the file is not removed
1402+and it is equivalent to the case of someone else modifies the file.
1403+
1404+Here again I don't have a best and final answer. I got an idea to
1405+implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the
1406+Opened File) is specified (by default), aufs returns the filedata from
1407+/new.
1408+Otherwise from /new.
1409diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
1410--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
1411+++ linux/Documentation/filesystems/aufs/README 2011-08-24 13:30:24.727980364 +0200
1412@@ -0,0 +1,290 @@
1413+
1414+Aufs3 -- advanced multi layered unification filesystem version 3.x
1415+http://aufs.sf.net
1416+Junjiro R. Okajima
1417+
1418+
1419+0. Introduction
1420+----------------------------------------
1421+In the early days, aufs was entirely re-designed and re-implemented
1422+Unionfs Version 1.x series. After many original ideas, approaches,
1423+improvements and implementations, it becomes totally different from
1424+Unionfs while keeping the basic features.
1425+Recently, Unionfs Version 2.x series begin taking some of the same
1426+approaches to aufs1's.
1427+Unionfs is being developed by Professor Erez Zadok at Stony Brook
1428+University and his team.
1429+
1430+Aufs3 supports linux-3.0 and later.
1431+If you want older kernel version support, try aufs2-2.6.git or
1432+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
1433+
1434+Note: it becomes clear that "Aufs was rejected. Let's give it up."
1435+According to Christoph Hellwig, linux rejects all union-type filesystems
1436+but UnionMount.
1437+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
1438+
1439+
1440+1. Features
1441+----------------------------------------
1442+- unite several directories into a single virtual filesystem. The member
1443+ directory is called as a branch.
1444+- you can specify the permission flags to the branch, which are 'readonly',
1445+ 'readwrite' and 'whiteout-able.'
1446+- by upper writable branch, internal copyup and whiteout, files/dirs on
1447+ readonly branch are modifiable logically.
1448+- dynamic branch manipulation, add, del.
1449+- etc...
1450+
1451+Also there are many enhancements in aufs1, such as:
1452+- readdir(3) in userspace.
1453+- keep inode number by external inode number table
1454+- keep the timestamps of file/dir in internal copyup operation
1455+- seekable directory, supporting NFS readdir.
1456+- whiteout is hardlinked in order to reduce the consumption of inodes
1457+ on branch
1458+- do not copyup, nor create a whiteout when it is unnecessary
1459+- revert a single systemcall when an error occurs in aufs
1460+- remount interface instead of ioctl
1461+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
1462+- loopback mounted filesystem as a branch
1463+- kernel thread for removing the dir who has a plenty of whiteouts
1464+- support copyup sparse file (a file which has a 'hole' in it)
1465+- default permission flags for branches
1466+- selectable permission flags for ro branch, whether whiteout can
1467+ exist or not
1468+- export via NFS.
1469+- support <sysfs>/fs/aufs and <debugfs>/aufs.
1470+- support multiple writable branches, some policies to select one
1471+ among multiple writable branches.
1472+- a new semantics for link(2) and rename(2) to support multiple
1473+ writable branches.
1474+- no glibc changes are required.
1475+- pseudo hardlink (hardlink over branches)
1476+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
1477+ including NFS or remote filesystem branch.
1478+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
1479+- and more...
1480+
1481+Currently these features are dropped temporary from aufs3.
1482+See design/08plan.txt in detail.
1483+- test only the highest one for the directory permission (dirperm1)
1484+- copyup on open (coo=)
1485+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
1486+ (robr)
1487+- statistics of aufs thread (/sys/fs/aufs/stat)
1488+- delegation mode (dlgt)
1489+ a delegation of the internal branch access to support task I/O
1490+ accounting, which also supports Linux Security Modules (LSM) mainly
1491+ for Suse AppArmor.
1492+- intent.open/create (file open in a single lookup)
1493+
1494+Features or just an idea in the future (see also design/*.txt),
1495+- reorder the branch index without del/re-add.
1496+- permanent xino files for NFSD
1497+- an option for refreshing the opened files after add/del branches
1498+- 'move' policy for copy-up between two writable branches, after
1499+ checking free space.
1500+- light version, without branch manipulation. (unnecessary?)
1501+- copyup in userspace
1502+- inotify in userspace
1503+- readv/writev
1504+- xattr, acl
1505+
1506+
1507+2. Download
1508+----------------------------------------
1509+There were three GIT trees for aufs2, but for aufs3 two GIT trees,
1510+aufs3-standalone and aufs-util. Note that there is no "3" in "aufs-util."
1511+The aufs3-standalone tree has only aufs source files
1512+and necessary patches, and you can select CONFIG_AUFS_FS=m.
1513+
1514+You will find GIT branches whose name is in form of "aufs3.x" where "x"
1515+represents the linux kernel version, "linux-3.x". For instance,
1516+"aufs3.0" is for linux-3.0.
1517+
1518+o aufs3-standalone tree
1519+$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs3-standalone.git \
1520+ aufs3-standalone.git
1521+$ cd aufs3-standalone.git
1522+$ git checkout origin/aufs3.0
1523+
1524+o aufs-util tree
1525+$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs-util.git \
1526+ aufs-util.git
1527+$ cd aufs-util.git
1528+$ git checkout origin/aufs3.0
1529+
1530+You may not be able to find the GIT branch in aufs-util for your
1531+version. In this case, you should git-checkout the branch for the
1532+nearest lower number.
1533+If you are using linux-3.10 and aufs3.10 (which are not released yet),
1534+but the "aufs3.10" branch doesn't exit in this repository, then
1535+"aufs3.9", "aufs3.8", ... or something is the branch for you.
1536+Also you can view all branches by
1537+ $ git branch -a
1538+
1539+
1540+3. Configuration and Compilation
1541+----------------------------------------
1542+Make sure you have git-checkout'ed the correct branch.
1543+
1544+For aufs3-standalone tree,
1545+There are several ways to build.
1546+
1547+1.
1548+- apply ./aufs3-kbuild.patch to your kernel source files.
1549+- apply ./aufs3-base.patch too.
1550+- apply ./aufs3-proc_map.patch too, if you want to make /proc/PID/maps (and
1551+ others including lsof(1)) show the file path on aufs instead of the
1552+ path on the branch fs.
1553+- apply ./aufs3-standalone.patch too, if you have a plan to set
1554+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs3-standalone.patch.
1555+- copy ./{Documentation,fs,include/linux/aufs_type.h} files to your
1556+ kernel source tree. Never copy ./include/linux/Kbuild.
1557+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS, you can select either
1558+ =m or =y.
1559+- and build your kernel as usual.
1560+- install the built kernel.
1561+- install the header files too by "make headers_install".
1562+- and reboot your system.
1563+
1564+2.
1565+- module only (CONFIG_AUFS_FS=m).
1566+- apply ./aufs3-base.patch to your kernel source files.
1567+- apply ./aufs3-proc_map.patch too to your kernel source files,
1568+ if you want to make /proc/PID/maps (and others including lsof(1)) show
1569+ the file path on aufs instead of the path on the branch fs.
1570+- apply ./aufs3-standalone.patch too.
1571+- build your kernel, don't forget "make headers_install", and reboot.
1572+- edit ./config.mk and set other aufs configurations if necessary.
1573+ Note: You should read ./fs/aufs/Kconfig carefully which describes
1574+ every aufs configurations.
1575+- build the module by simple "make".
1576+- you can specify ${KDIR} make variable which points to your kernel
1577+ source tree.
1578+- install the files
1579+ + run "make install" to install the aufs module, or copy the built
1580+ ./aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
1581+ + run "make headers_install" to install the aufs header file (you can
1582+ specify DESTDIR), or copty ./usr/include/linux/aufs_type.h to
1583+ /usr/include/linux or wherever you like.
1584+- no need to apply aufs3-kbuild.patch, nor copying source files to your
1585+ kernel source tree.
1586+
1587+Note: The haeder file aufs_type.h is necessary to build aufs-util
1588+ as well as "make headers_install" in the kernel source tree.
1589+ headers_install is subject to be forgotten, but it is essentially
1590+ necessary, not only for building aufs-util.
1591+ You may not meet problems without headers_install in some older
1592+ version though.
1593+
1594+And then,
1595+- read README in aufs-util, build and install it
1596+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
1597+ then run "make install_ulib" too. And refer to the aufs manual in
1598+ detail.
1599+
1600+
1601+4. Usage
1602+----------------------------------------
1603+At first, make sure aufs-util are installed, and please read the aufs
1604+manual, aufs.5 in aufs-util.git tree.
1605+$ man -l aufs.5
1606+
1607+And then,
1608+$ mkdir /tmp/rw /tmp/aufs
1609+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
1610+
1611+Here is another example. The result is equivalent.
1612+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
1613+ Or
1614+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
1615+# mount -o remount,append:${HOME} /tmp/aufs
1616+
1617+Then, you can see whole tree of your home dir through /tmp/aufs. If
1618+you modify a file under /tmp/aufs, the one on your home directory is
1619+not affected, instead the same named file will be newly created under
1620+/tmp/rw. And all of your modification to a file will be applied to
1621+the one under /tmp/rw. This is called the file based Copy on Write
1622+(COW) method.
1623+Aufs mount options are described in aufs.5.
1624+If you run chroot or something and make your aufs as a root directory,
1625+then you need to customize the shutdown script. See the aufs manual in
1626+detail.
1627+
1628+Additionally, there are some sample usages of aufs which are a
1629+diskless system with network booting, and LiveCD over NFS.
1630+See sample dir in CVS tree on SourceForge.
1631+
1632+
1633+5. Contact
1634+----------------------------------------
1635+When you have any problems or strange behaviour in aufs, please let me
1636+know with:
1637+- /proc/mounts (instead of the output of mount(8))
1638+- /sys/module/aufs/*
1639+- /sys/fs/aufs/* (if you have them)
1640+- /debug/aufs/* (if you have them)
1641+- linux kernel version
1642+ if your kernel is not plain, for example modified by distributor,
1643+ the url where i can download its source is necessary too.
1644+- aufs version which was printed at loading the module or booting the
1645+ system, instead of the date you downloaded.
1646+- configuration (define/undefine CONFIG_AUFS_xxx)
1647+- kernel configuration or /proc/config.gz (if you have it)
1648+- behaviour which you think to be incorrect
1649+- actual operation, reproducible one is better
1650+- mailto: aufs-users at lists.sourceforge.net
1651+
1652+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
1653+and Feature Requests) on SourceForge. Please join and write to
1654+aufs-users ML.
1655+
1656+
1657+6. Acknowledgements
1658+----------------------------------------
1659+Thanks to everyone who have tried and are using aufs, whoever
1660+have reported a bug or any feedback.
1661+
1662+Especially donators:
1663+Tomas Matejicek(slax.org) made a donation (much more than once).
1664+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
1665+ scripts) is making "doubling" donations.
1666+ Unfortunately I cannot list all of the donators, but I really
1667+ appriciate.
1668+ It ends Aug 2010, but the ordinary donation URL is still available.
1669+ <http://sourceforge.net/donate/index.php?group_id=167503>
1670+Dai Itasaka made a donation (2007/8).
1671+Chuck Smith made a donation (2008/4, 10 and 12).
1672+Henk Schoneveld made a donation (2008/9).
1673+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
1674+Francois Dupoux made a donation (2008/11).
1675+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
1676+ aufs2 GIT tree (2009/2).
1677+William Grant made a donation (2009/3).
1678+Patrick Lane made a donation (2009/4).
1679+The Mail Archive (mail-archive.com) made donations (2009/5).
1680+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
1681+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
1682+Pavel Pronskiy made a donation (2011/2).
1683+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
1684+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
1685+Max Lekomcev (DOM-TV project) made a donation (2011/7).
1686+
1687+Thank you very much.
1688+Donations are always, including future donations, very important and
1689+helpful for me to keep on developing aufs.
1690+
1691+
1692+7.
1693+----------------------------------------
1694+If you are an experienced user, no explanation is needed. Aufs is
1695+just a linux filesystem.
1696+
1697+
1698+Enjoy!
1699+
1700+# Local variables: ;
1701+# mode: text;
1702+# End: ;
7f207e10
AM
1703diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
1704--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
53392da6 1705+++ linux/fs/aufs/aufs.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 1706@@ -0,0 +1,60 @@
7f207e10 1707+/*
027c5e7a 1708+ * Copyright (C) 2005-2011 Junjiro R. Okajima
7f207e10
AM
1709+ *
1710+ * This program, aufs is free software; you can redistribute it and/or modify
1711+ * it under the terms of the GNU General Public License as published by
1712+ * the Free Software Foundation; either version 2 of the License, or
1713+ * (at your option) any later version.
1714+ *
1715+ * This program is distributed in the hope that it will be useful,
1716+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1717+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1718+ * GNU General Public License for more details.
1719+ *
1720+ * You should have received a copy of the GNU General Public License
1721+ * along with this program; if not, write to the Free Software
1722+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1723+ */
1724+
1725+/*
1726+ * all header files
1727+ */
1728+
1729+#ifndef __AUFS_H__
1730+#define __AUFS_H__
1731+
1732+#ifdef __KERNEL__
1733+
1734+#define AuStub(type, name, body, ...) \
1735+ static inline type name(__VA_ARGS__) { body; }
1736+
1737+#define AuStubVoid(name, ...) \
1738+ AuStub(void, name, , __VA_ARGS__)
1739+#define AuStubInt0(name, ...) \
1740+ AuStub(int, name, return 0, __VA_ARGS__)
1741+
1742+#include "debug.h"
1743+
1744+#include "branch.h"
1745+#include "cpup.h"
1746+#include "dcsub.h"
1747+#include "dbgaufs.h"
1748+#include "dentry.h"
1749+#include "dir.h"
1750+#include "dynop.h"
1751+#include "file.h"
1752+#include "fstype.h"
1753+#include "inode.h"
1754+#include "loop.h"
1755+#include "module.h"
7f207e10
AM
1756+#include "opts.h"
1757+#include "rwsem.h"
1758+#include "spl.h"
1759+#include "super.h"
1760+#include "sysaufs.h"
1761+#include "vfsub.h"
1762+#include "whout.h"
1763+#include "wkq.h"
1764+
1765+#endif /* __KERNEL__ */
1766+#endif /* __AUFS_H__ */
1767diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
1768--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
1769+++ linux/fs/aufs/branch.c 2011-08-24 13:30:24.731313534 +0200
1770@@ -0,0 +1,1170 @@
7f207e10 1771+/*
027c5e7a 1772+ * Copyright (C) 2005-2011 Junjiro R. Okajima
7f207e10
AM
1773+ *
1774+ * This program, aufs is free software; you can redistribute it and/or modify
1775+ * it under the terms of the GNU General Public License as published by
1776+ * the Free Software Foundation; either version 2 of the License, or
1777+ * (at your option) any later version.
1778+ *
1779+ * This program is distributed in the hope that it will be useful,
1780+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1781+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1782+ * GNU General Public License for more details.
1783+ *
1784+ * You should have received a copy of the GNU General Public License
1785+ * along with this program; if not, write to the Free Software
1786+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1787+ */
1788+
1789+/*
1790+ * branch management
1791+ */
1792+
027c5e7a 1793+#include <linux/compat.h>
7f207e10
AM
1794+#include <linux/file.h>
1795+#include <linux/statfs.h>
1796+#include "aufs.h"
1797+
1798+/*
1799+ * free a single branch
1facf9fc 1800+ */
1801+static void au_br_do_free(struct au_branch *br)
1802+{
1803+ int i;
1804+ struct au_wbr *wbr;
4a4d8108 1805+ struct au_dykey **key;
1facf9fc 1806+
027c5e7a
AM
1807+ au_hnotify_fin_br(br);
1808+
1facf9fc 1809+ if (br->br_xino.xi_file)
1810+ fput(br->br_xino.xi_file);
1811+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
1812+
1813+ AuDebugOn(atomic_read(&br->br_count));
1814+
1815+ wbr = br->br_wbr;
1816+ if (wbr) {
1817+ for (i = 0; i < AuBrWh_Last; i++)
1818+ dput(wbr->wbr_wh[i]);
1819+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 1820+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 1821+ }
1822+
4a4d8108
AM
1823+ key = br->br_dykey;
1824+ for (i = 0; i < AuBrDynOp; i++, key++)
1825+ if (*key)
1826+ au_dy_put(*key);
1827+ else
1828+ break;
1829+
1facf9fc 1830+ mntput(br->br_mnt);
1facf9fc 1831+ kfree(wbr);
1832+ kfree(br);
1833+}
1834+
1835+/*
1836+ * frees all branches
1837+ */
1838+void au_br_free(struct au_sbinfo *sbinfo)
1839+{
1840+ aufs_bindex_t bmax;
1841+ struct au_branch **br;
1842+
dece6358
AM
1843+ AuRwMustWriteLock(&sbinfo->si_rwsem);
1844+
1facf9fc 1845+ bmax = sbinfo->si_bend + 1;
1846+ br = sbinfo->si_branch;
1847+ while (bmax--)
1848+ au_br_do_free(*br++);
1849+}
1850+
1851+/*
1852+ * find the index of a branch which is specified by @br_id.
1853+ */
1854+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
1855+{
1856+ aufs_bindex_t bindex, bend;
1857+
1858+ bend = au_sbend(sb);
1859+ for (bindex = 0; bindex <= bend; bindex++)
1860+ if (au_sbr_id(sb, bindex) == br_id)
1861+ return bindex;
1862+ return -1;
1863+}
1864+
1865+/* ---------------------------------------------------------------------- */
1866+
1867+/*
1868+ * add a branch
1869+ */
1870+
b752ccd1
AM
1871+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
1872+ struct dentry *h_root)
1facf9fc 1873+{
b752ccd1
AM
1874+ if (unlikely(h_adding == h_root
1875+ || au_test_loopback_overlap(sb, h_adding)))
1facf9fc 1876+ return 1;
b752ccd1
AM
1877+ if (h_adding->d_sb != h_root->d_sb)
1878+ return 0;
1879+ return au_test_subdir(h_adding, h_root)
1880+ || au_test_subdir(h_root, h_adding);
1facf9fc 1881+}
1882+
1883+/*
1884+ * returns a newly allocated branch. @new_nbranch is a number of branches
1885+ * after adding a branch.
1886+ */
1887+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
1888+ int perm)
1889+{
1890+ struct au_branch *add_branch;
1891+ struct dentry *root;
4a4d8108 1892+ int err;
1facf9fc 1893+
4a4d8108 1894+ err = -ENOMEM;
1facf9fc 1895+ root = sb->s_root;
1896+ add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
1897+ if (unlikely(!add_branch))
1898+ goto out;
1899+
027c5e7a
AM
1900+ err = au_hnotify_init_br(add_branch, perm);
1901+ if (unlikely(err))
1902+ goto out_br;
1903+
1facf9fc 1904+ add_branch->br_wbr = NULL;
1905+ if (au_br_writable(perm)) {
1906+ /* may be freed separately at changing the branch permission */
1907+ add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
1908+ GFP_NOFS);
1909+ if (unlikely(!add_branch->br_wbr))
027c5e7a 1910+ goto out_hnotify;
1facf9fc 1911+ }
1912+
4a4d8108
AM
1913+ err = au_sbr_realloc(au_sbi(sb), new_nbranch);
1914+ if (!err)
1915+ err = au_di_realloc(au_di(root), new_nbranch);
1916+ if (!err)
1917+ err = au_ii_realloc(au_ii(root->d_inode), new_nbranch);
1918+ if (!err)
1919+ return add_branch; /* success */
1facf9fc 1920+
1facf9fc 1921+ kfree(add_branch->br_wbr);
4a4d8108 1922+
027c5e7a
AM
1923+out_hnotify:
1924+ au_hnotify_fin_br(add_branch);
4f0767ce 1925+out_br:
1facf9fc 1926+ kfree(add_branch);
4f0767ce 1927+out:
4a4d8108 1928+ return ERR_PTR(err);
1facf9fc 1929+}
1930+
1931+/*
1932+ * test if the branch permission is legal or not.
1933+ */
1934+static int test_br(struct inode *inode, int brperm, char *path)
1935+{
1936+ int err;
1937+
4a4d8108
AM
1938+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
1939+ if (!err)
1940+ goto out;
1facf9fc 1941+
4a4d8108
AM
1942+ err = -EINVAL;
1943+ pr_err("write permission for readonly mount or inode, %s\n", path);
1944+
4f0767ce 1945+out:
1facf9fc 1946+ return err;
1947+}
1948+
1949+/*
1950+ * returns:
1951+ * 0: success, the caller will add it
1952+ * plus: success, it is already unified, the caller should ignore it
1953+ * minus: error
1954+ */
1955+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
1956+{
1957+ int err;
1958+ aufs_bindex_t bend, bindex;
1959+ struct dentry *root;
1960+ struct inode *inode, *h_inode;
1961+
1962+ root = sb->s_root;
1963+ bend = au_sbend(sb);
1964+ if (unlikely(bend >= 0
1965+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
1966+ err = 1;
1967+ if (!remount) {
1968+ err = -EINVAL;
4a4d8108 1969+ pr_err("%s duplicated\n", add->pathname);
1facf9fc 1970+ }
1971+ goto out;
1972+ }
1973+
1974+ err = -ENOSPC; /* -E2BIG; */
1975+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
1976+ || AUFS_BRANCH_MAX - 1 <= bend)) {
4a4d8108 1977+ pr_err("number of branches exceeded %s\n", add->pathname);
1facf9fc 1978+ goto out;
1979+ }
1980+
1981+ err = -EDOM;
1982+ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
4a4d8108 1983+ pr_err("bad index %d\n", add->bindex);
1facf9fc 1984+ goto out;
1985+ }
1986+
1987+ inode = add->path.dentry->d_inode;
1988+ err = -ENOENT;
1989+ if (unlikely(!inode->i_nlink)) {
4a4d8108 1990+ pr_err("no existence %s\n", add->pathname);
1facf9fc 1991+ goto out;
1992+ }
1993+
1994+ err = -EINVAL;
1995+ if (unlikely(inode->i_sb == sb)) {
4a4d8108 1996+ pr_err("%s must be outside\n", add->pathname);
1facf9fc 1997+ goto out;
1998+ }
1999+
2000+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
4a4d8108
AM
2001+ pr_err("unsupported filesystem, %s (%s)\n",
2002+ add->pathname, au_sbtype(inode->i_sb));
1facf9fc 2003+ goto out;
2004+ }
2005+
2006+ err = test_br(add->path.dentry->d_inode, add->perm, add->pathname);
2007+ if (unlikely(err))
2008+ goto out;
2009+
2010+ if (bend < 0)
2011+ return 0; /* success */
2012+
2013+ err = -EINVAL;
2014+ for (bindex = 0; bindex <= bend; bindex++)
2015+ if (unlikely(test_overlap(sb, add->path.dentry,
2016+ au_h_dptr(root, bindex)))) {
4a4d8108 2017+ pr_err("%s is overlapped\n", add->pathname);
1facf9fc 2018+ goto out;
2019+ }
2020+
2021+ err = 0;
2022+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
2023+ h_inode = au_h_dptr(root, 0)->d_inode;
2024+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
2025+ || h_inode->i_uid != inode->i_uid
2026+ || h_inode->i_gid != inode->i_gid)
4a4d8108
AM
2027+ pr_warning("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
2028+ add->pathname,
2029+ inode->i_uid, inode->i_gid,
2030+ (inode->i_mode & S_IALLUGO),
2031+ h_inode->i_uid, h_inode->i_gid,
2032+ (h_inode->i_mode & S_IALLUGO));
1facf9fc 2033+ }
2034+
4f0767ce 2035+out:
1facf9fc 2036+ return err;
2037+}
2038+
2039+/*
2040+ * initialize or clean the whiteouts for an adding branch
2041+ */
2042+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
2043+ int new_perm, struct dentry *h_root)
2044+{
2045+ int err, old_perm;
2046+ aufs_bindex_t bindex;
2047+ struct mutex *h_mtx;
2048+ struct au_wbr *wbr;
2049+ struct au_hinode *hdir;
2050+
2051+ wbr = br->br_wbr;
2052+ old_perm = br->br_perm;
2053+ br->br_perm = new_perm;
2054+ hdir = NULL;
2055+ h_mtx = NULL;
2056+ bindex = au_br_index(sb, br->br_id);
2057+ if (0 <= bindex) {
2058+ hdir = au_hi(sb->s_root->d_inode, bindex);
4a4d8108 2059+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 2060+ } else {
2061+ h_mtx = &h_root->d_inode->i_mutex;
2062+ mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
2063+ }
2064+ if (!wbr)
2065+ err = au_wh_init(h_root, br, sb);
2066+ else {
2067+ wbr_wh_write_lock(wbr);
2068+ err = au_wh_init(h_root, br, sb);
2069+ wbr_wh_write_unlock(wbr);
2070+ }
2071+ if (hdir)
4a4d8108 2072+ au_hn_imtx_unlock(hdir);
1facf9fc 2073+ else
2074+ mutex_unlock(h_mtx);
2075+ br->br_perm = old_perm;
2076+
2077+ if (!err && wbr && !au_br_writable(new_perm)) {
2078+ kfree(wbr);
2079+ br->br_wbr = NULL;
2080+ }
2081+
2082+ return err;
2083+}
2084+
2085+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
2086+ int perm, struct path *path)
2087+{
2088+ int err;
4a4d8108 2089+ struct kstatfs kst;
1facf9fc 2090+ struct au_wbr *wbr;
4a4d8108 2091+ struct dentry *h_dentry;
1facf9fc 2092+
2093+ wbr = br->br_wbr;
dece6358 2094+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 2095+ memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
2096+ atomic_set(&wbr->wbr_wh_running, 0);
2097+ wbr->wbr_bytes = 0;
2098+
4a4d8108
AM
2099+ /*
2100+ * a limit for rmdir/rename a dir
2101+ * cf. AUFS_MAX_NAMELEN in include/linux/aufs_type.h
2102+ */
7f207e10 2103+ err = vfs_statfs(path, &kst);
4a4d8108
AM
2104+ if (unlikely(err))
2105+ goto out;
2106+ err = -EINVAL;
7f207e10 2107+ h_dentry = path->dentry;
4a4d8108
AM
2108+ if (kst.f_namelen >= NAME_MAX)
2109+ err = au_br_init_wh(sb, br, perm, h_dentry);
2110+ else
2111+ pr_err("%.*s(%s), unsupported namelen %ld\n",
2112+ AuDLNPair(h_dentry), au_sbtype(h_dentry->d_sb),
2113+ kst.f_namelen);
1facf9fc 2114+
4f0767ce 2115+out:
1facf9fc 2116+ return err;
2117+}
2118+
2119+/* intialize a new branch */
2120+static int au_br_init(struct au_branch *br, struct super_block *sb,
2121+ struct au_opt_add *add)
2122+{
2123+ int err;
2124+
2125+ err = 0;
2126+ memset(&br->br_xino, 0, sizeof(br->br_xino));
2127+ mutex_init(&br->br_xino.xi_nondir_mtx);
2128+ br->br_perm = add->perm;
2129+ br->br_mnt = add->path.mnt; /* set first, mntget() later */
4a4d8108
AM
2130+ spin_lock_init(&br->br_dykey_lock);
2131+ memset(br->br_dykey, 0, sizeof(br->br_dykey));
1facf9fc 2132+ atomic_set(&br->br_count, 0);
2133+ br->br_xino_upper = AUFS_XINO_TRUNC_INIT;
2134+ atomic_set(&br->br_xino_running, 0);
2135+ br->br_id = au_new_br_id(sb);
7f207e10 2136+ AuDebugOn(br->br_id < 0);
1facf9fc 2137+
2138+ if (au_br_writable(add->perm)) {
2139+ err = au_wbr_init(br, sb, add->perm, &add->path);
2140+ if (unlikely(err))
b752ccd1 2141+ goto out_err;
1facf9fc 2142+ }
2143+
2144+ if (au_opt_test(au_mntflags(sb), XINO)) {
2145+ err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino,
2146+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
2147+ if (unlikely(err)) {
2148+ AuDebugOn(br->br_xino.xi_file);
b752ccd1 2149+ goto out_err;
1facf9fc 2150+ }
2151+ }
2152+
2153+ sysaufs_br_init(br);
2154+ mntget(add->path.mnt);
b752ccd1 2155+ goto out; /* success */
1facf9fc 2156+
4f0767ce 2157+out_err:
b752ccd1 2158+ br->br_mnt = NULL;
4f0767ce 2159+out:
1facf9fc 2160+ return err;
2161+}
2162+
2163+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
2164+ struct au_branch *br, aufs_bindex_t bend,
2165+ aufs_bindex_t amount)
2166+{
2167+ struct au_branch **brp;
2168+
dece6358
AM
2169+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2170+
1facf9fc 2171+ brp = sbinfo->si_branch + bindex;
2172+ memmove(brp + 1, brp, sizeof(*brp) * amount);
2173+ *brp = br;
2174+ sbinfo->si_bend++;
2175+ if (unlikely(bend < 0))
2176+ sbinfo->si_bend = 0;
2177+}
2178+
2179+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
2180+ aufs_bindex_t bend, aufs_bindex_t amount)
2181+{
2182+ struct au_hdentry *hdp;
2183+
1308ab2a 2184+ AuRwMustWriteLock(&dinfo->di_rwsem);
2185+
1facf9fc 2186+ hdp = dinfo->di_hdentry + bindex;
2187+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
2188+ au_h_dentry_init(hdp);
2189+ dinfo->di_bend++;
2190+ if (unlikely(bend < 0))
2191+ dinfo->di_bstart = 0;
2192+}
2193+
2194+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
2195+ aufs_bindex_t bend, aufs_bindex_t amount)
2196+{
2197+ struct au_hinode *hip;
2198+
1308ab2a 2199+ AuRwMustWriteLock(&iinfo->ii_rwsem);
2200+
1facf9fc 2201+ hip = iinfo->ii_hinode + bindex;
2202+ memmove(hip + 1, hip, sizeof(*hip) * amount);
2203+ hip->hi_inode = NULL;
4a4d8108 2204+ au_hn_init(hip);
1facf9fc 2205+ iinfo->ii_bend++;
2206+ if (unlikely(bend < 0))
2207+ iinfo->ii_bstart = 0;
2208+}
2209+
2210+static void au_br_do_add(struct super_block *sb, struct dentry *h_dentry,
2211+ struct au_branch *br, aufs_bindex_t bindex)
2212+{
2213+ struct dentry *root;
2214+ struct inode *root_inode;
2215+ aufs_bindex_t bend, amount;
2216+
2217+ root = sb->s_root;
2218+ root_inode = root->d_inode;
1facf9fc 2219+ bend = au_sbend(sb);
2220+ amount = bend + 1 - bindex;
53392da6 2221+ au_sbilist_lock();
1facf9fc 2222+ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
2223+ au_br_do_add_hdp(au_di(root), bindex, bend, amount);
2224+ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
2225+ au_set_h_dptr(root, bindex, dget(h_dentry));
2226+ au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode),
2227+ /*flags*/0);
53392da6 2228+ au_sbilist_unlock();
1facf9fc 2229+}
2230+
2231+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
2232+{
2233+ int err;
1facf9fc 2234+ aufs_bindex_t bend, add_bindex;
2235+ struct dentry *root, *h_dentry;
2236+ struct inode *root_inode;
2237+ struct au_branch *add_branch;
2238+
2239+ root = sb->s_root;
2240+ root_inode = root->d_inode;
2241+ IMustLock(root_inode);
2242+ err = test_add(sb, add, remount);
2243+ if (unlikely(err < 0))
2244+ goto out;
2245+ if (err) {
2246+ err = 0;
2247+ goto out; /* success */
2248+ }
2249+
2250+ bend = au_sbend(sb);
2251+ add_branch = au_br_alloc(sb, bend + 2, add->perm);
2252+ err = PTR_ERR(add_branch);
2253+ if (IS_ERR(add_branch))
2254+ goto out;
2255+
2256+ err = au_br_init(add_branch, sb, add);
2257+ if (unlikely(err)) {
2258+ au_br_do_free(add_branch);
2259+ goto out;
2260+ }
2261+
2262+ add_bindex = add->bindex;
2263+ h_dentry = add->path.dentry;
2264+ if (!remount)
2265+ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2266+ else {
2267+ sysaufs_brs_del(sb, add_bindex);
2268+ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2269+ sysaufs_brs_add(sb, add_bindex);
2270+ }
2271+
1308ab2a 2272+ if (!add_bindex) {
1facf9fc 2273+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 2274+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
2275+ } else
1facf9fc 2276+ au_add_nlink(root_inode, h_dentry->d_inode);
1facf9fc 2277+
2278+ /*
4a4d8108 2279+ * this test/set prevents aufs from handling unnecesary notify events
027c5e7a 2280+ * of xino files, in case of re-adding a writable branch which was
1facf9fc 2281+ * once detached from aufs.
2282+ */
2283+ if (au_xino_brid(sb) < 0
2284+ && au_br_writable(add_branch->br_perm)
2285+ && !au_test_fs_bad_xino(h_dentry->d_sb)
2286+ && add_branch->br_xino.xi_file
2287+ && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry)
2288+ au_xino_brid_set(sb, add_branch->br_id);
2289+
4f0767ce 2290+out:
1facf9fc 2291+ return err;
2292+}
2293+
2294+/* ---------------------------------------------------------------------- */
2295+
2296+/*
2297+ * delete a branch
2298+ */
2299+
2300+/* to show the line number, do not make it inlined function */
4a4d8108 2301+#define AuVerbose(do_info, fmt, ...) do { \
1facf9fc 2302+ if (do_info) \
4a4d8108 2303+ pr_info(fmt, ##__VA_ARGS__); \
1facf9fc 2304+} while (0)
2305+
027c5e7a
AM
2306+static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
2307+ aufs_bindex_t bend)
2308+{
2309+ return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
2310+}
2311+
2312+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
2313+ aufs_bindex_t bend)
2314+{
2315+ return au_test_ibusy(dentry->d_inode, bstart, bend);
2316+}
2317+
1facf9fc 2318+/*
2319+ * test if the branch is deletable or not.
2320+ */
2321+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
b752ccd1 2322+ unsigned int sigen, const unsigned int verbose)
1facf9fc 2323+{
2324+ int err, i, j, ndentry;
2325+ aufs_bindex_t bstart, bend;
1facf9fc 2326+ struct au_dcsub_pages dpages;
2327+ struct au_dpage *dpage;
2328+ struct dentry *d;
1facf9fc 2329+
2330+ err = au_dpages_init(&dpages, GFP_NOFS);
2331+ if (unlikely(err))
2332+ goto out;
2333+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
2334+ if (unlikely(err))
2335+ goto out_dpages;
2336+
1facf9fc 2337+ for (i = 0; !err && i < dpages.ndpage; i++) {
2338+ dpage = dpages.dpages + i;
2339+ ndentry = dpage->ndentry;
2340+ for (j = 0; !err && j < ndentry; j++) {
2341+ d = dpage->dentries[j];
027c5e7a
AM
2342+ AuDebugOn(!d->d_count);
2343+ if (!au_digen_test(d, sigen)) {
1facf9fc 2344+ di_read_lock_child(d, AuLock_IR);
027c5e7a
AM
2345+ if (unlikely(au_dbrange_test(d))) {
2346+ di_read_unlock(d, AuLock_IR);
2347+ continue;
2348+ }
2349+ } else {
1facf9fc 2350+ di_write_lock_child(d);
027c5e7a
AM
2351+ if (unlikely(au_dbrange_test(d))) {
2352+ di_write_unlock(d);
2353+ continue;
2354+ }
1facf9fc 2355+ err = au_reval_dpath(d, sigen);
2356+ if (!err)
2357+ di_downgrade_lock(d, AuLock_IR);
2358+ else {
2359+ di_write_unlock(d);
2360+ break;
2361+ }
2362+ }
2363+
027c5e7a 2364+ /* AuDbgDentry(d); */
1facf9fc 2365+ bstart = au_dbstart(d);
2366+ bend = au_dbend(d);
2367+ if (bstart <= bindex
2368+ && bindex <= bend
2369+ && au_h_dptr(d, bindex)
027c5e7a 2370+ && au_test_dbusy(d, bstart, bend)) {
1facf9fc 2371+ err = -EBUSY;
2372+ AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d));
027c5e7a 2373+ AuDbgDentry(d);
1facf9fc 2374+ }
2375+ di_read_unlock(d, AuLock_IR);
2376+ }
2377+ }
2378+
4f0767ce 2379+out_dpages:
1facf9fc 2380+ au_dpages_free(&dpages);
4f0767ce 2381+out:
1facf9fc 2382+ return err;
2383+}
2384+
2385+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
b752ccd1 2386+ unsigned int sigen, const unsigned int verbose)
1facf9fc 2387+{
2388+ int err;
7f207e10
AM
2389+ unsigned long long max, ull;
2390+ struct inode *i, **array;
1facf9fc 2391+ aufs_bindex_t bstart, bend;
1facf9fc 2392+
7f207e10
AM
2393+ array = au_iarray_alloc(sb, &max);
2394+ err = PTR_ERR(array);
2395+ if (IS_ERR(array))
2396+ goto out;
2397+
1facf9fc 2398+ err = 0;
7f207e10
AM
2399+ AuDbg("b%d\n", bindex);
2400+ for (ull = 0; !err && ull < max; ull++) {
2401+ i = array[ull];
2402+ if (i->i_ino == AUFS_ROOT_INO)
1facf9fc 2403+ continue;
2404+
7f207e10 2405+ /* AuDbgInode(i); */
1facf9fc 2406+ if (au_iigen(i) == sigen)
2407+ ii_read_lock_child(i);
2408+ else {
2409+ ii_write_lock_child(i);
027c5e7a
AM
2410+ err = au_refresh_hinode_self(i);
2411+ au_iigen_dec(i);
1facf9fc 2412+ if (!err)
2413+ ii_downgrade_lock(i);
2414+ else {
2415+ ii_write_unlock(i);
2416+ break;
2417+ }
2418+ }
2419+
2420+ bstart = au_ibstart(i);
2421+ bend = au_ibend(i);
2422+ if (bstart <= bindex
2423+ && bindex <= bend
2424+ && au_h_iptr(i, bindex)
027c5e7a 2425+ && au_test_ibusy(i, bstart, bend)) {
1facf9fc 2426+ err = -EBUSY;
2427+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
7f207e10 2428+ AuDbgInode(i);
1facf9fc 2429+ }
2430+ ii_read_unlock(i);
2431+ }
7f207e10 2432+ au_iarray_free(array, max);
1facf9fc 2433+
7f207e10 2434+out:
1facf9fc 2435+ return err;
2436+}
2437+
b752ccd1
AM
2438+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
2439+ const unsigned int verbose)
1facf9fc 2440+{
2441+ int err;
2442+ unsigned int sigen;
2443+
2444+ sigen = au_sigen(root->d_sb);
2445+ DiMustNoWaiters(root);
2446+ IiMustNoWaiters(root->d_inode);
2447+ di_write_unlock(root);
b752ccd1 2448+ err = test_dentry_busy(root, bindex, sigen, verbose);
1facf9fc 2449+ if (!err)
b752ccd1 2450+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
1facf9fc 2451+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
2452+
2453+ return err;
2454+}
2455+
2456+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
2457+ const aufs_bindex_t bindex,
2458+ const aufs_bindex_t bend)
2459+{
2460+ struct au_branch **brp, **p;
2461+
dece6358
AM
2462+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2463+
1facf9fc 2464+ brp = sbinfo->si_branch + bindex;
2465+ if (bindex < bend)
2466+ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
2467+ sbinfo->si_branch[0 + bend] = NULL;
2468+ sbinfo->si_bend--;
2469+
53392da6 2470+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2471+ if (p)
2472+ sbinfo->si_branch = p;
4a4d8108 2473+ /* harmless error */
1facf9fc 2474+}
2475+
2476+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
2477+ const aufs_bindex_t bend)
2478+{
2479+ struct au_hdentry *hdp, *p;
2480+
1308ab2a 2481+ AuRwMustWriteLock(&dinfo->di_rwsem);
2482+
4a4d8108 2483+ hdp = dinfo->di_hdentry;
1facf9fc 2484+ if (bindex < bend)
4a4d8108
AM
2485+ memmove(hdp + bindex, hdp + bindex + 1,
2486+ sizeof(*hdp) * (bend - bindex));
2487+ hdp[0 + bend].hd_dentry = NULL;
1facf9fc 2488+ dinfo->di_bend--;
2489+
53392da6 2490+ p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2491+ if (p)
2492+ dinfo->di_hdentry = p;
4a4d8108 2493+ /* harmless error */
1facf9fc 2494+}
2495+
2496+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
2497+ const aufs_bindex_t bend)
2498+{
2499+ struct au_hinode *hip, *p;
2500+
1308ab2a 2501+ AuRwMustWriteLock(&iinfo->ii_rwsem);
2502+
1facf9fc 2503+ hip = iinfo->ii_hinode + bindex;
2504+ if (bindex < bend)
2505+ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
2506+ iinfo->ii_hinode[0 + bend].hi_inode = NULL;
4a4d8108 2507+ au_hn_init(iinfo->ii_hinode + bend);
1facf9fc 2508+ iinfo->ii_bend--;
2509+
53392da6 2510+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2511+ if (p)
2512+ iinfo->ii_hinode = p;
4a4d8108 2513+ /* harmless error */
1facf9fc 2514+}
2515+
2516+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
2517+ struct au_branch *br)
2518+{
2519+ aufs_bindex_t bend;
2520+ struct au_sbinfo *sbinfo;
53392da6
AM
2521+ struct dentry *root, *h_root;
2522+ struct inode *inode, *h_inode;
2523+ struct au_hinode *hinode;
1facf9fc 2524+
dece6358
AM
2525+ SiMustWriteLock(sb);
2526+
1facf9fc 2527+ root = sb->s_root;
2528+ inode = root->d_inode;
1facf9fc 2529+ sbinfo = au_sbi(sb);
2530+ bend = sbinfo->si_bend;
2531+
53392da6
AM
2532+ h_root = au_h_dptr(root, bindex);
2533+ hinode = au_hi(inode, bindex);
2534+ h_inode = au_igrab(hinode->hi_inode);
2535+ au_hiput(hinode);
1facf9fc 2536+
53392da6 2537+ au_sbilist_lock();
1facf9fc 2538+ au_br_do_del_brp(sbinfo, bindex, bend);
2539+ au_br_do_del_hdp(au_di(root), bindex, bend);
2540+ au_br_do_del_hip(au_ii(inode), bindex, bend);
53392da6
AM
2541+ au_sbilist_unlock();
2542+
2543+ dput(h_root);
2544+ iput(h_inode);
2545+ au_br_do_free(br);
1facf9fc 2546+}
2547+
2548+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
2549+{
2550+ int err, rerr, i;
2551+ unsigned int mnt_flags;
2552+ aufs_bindex_t bindex, bend, br_id;
2553+ unsigned char do_wh, verbose;
2554+ struct au_branch *br;
2555+ struct au_wbr *wbr;
2556+
2557+ err = 0;
2558+ bindex = au_find_dbindex(sb->s_root, del->h_path.dentry);
2559+ if (bindex < 0) {
2560+ if (remount)
2561+ goto out; /* success */
2562+ err = -ENOENT;
4a4d8108 2563+ pr_err("%s no such branch\n", del->pathname);
1facf9fc 2564+ goto out;
2565+ }
2566+ AuDbg("bindex b%d\n", bindex);
2567+
2568+ err = -EBUSY;
2569+ mnt_flags = au_mntflags(sb);
2570+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
2571+ bend = au_sbend(sb);
2572+ if (unlikely(!bend)) {
2573+ AuVerbose(verbose, "no more branches left\n");
2574+ goto out;
2575+ }
2576+ br = au_sbr(sb, bindex);
2577+ i = atomic_read(&br->br_count);
2578+ if (unlikely(i)) {
2579+ AuVerbose(verbose, "%d file(s) opened\n", i);
e49829fe 2580+ goto out;
1facf9fc 2581+ }
2582+
2583+ wbr = br->br_wbr;
2584+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
2585+ if (do_wh) {
1308ab2a 2586+ /* instead of WbrWhMustWriteLock(wbr) */
2587+ SiMustWriteLock(sb);
1facf9fc 2588+ for (i = 0; i < AuBrWh_Last; i++) {
2589+ dput(wbr->wbr_wh[i]);
2590+ wbr->wbr_wh[i] = NULL;
2591+ }
2592+ }
2593+
b752ccd1 2594+ err = test_children_busy(sb->s_root, bindex, verbose);
1facf9fc 2595+ if (unlikely(err)) {
2596+ if (do_wh)
2597+ goto out_wh;
2598+ goto out;
2599+ }
2600+
2601+ err = 0;
2602+ br_id = br->br_id;
2603+ if (!remount)
2604+ au_br_do_del(sb, bindex, br);
2605+ else {
2606+ sysaufs_brs_del(sb, bindex);
2607+ au_br_do_del(sb, bindex, br);
2608+ sysaufs_brs_add(sb, bindex);
2609+ }
2610+
1308ab2a 2611+ if (!bindex) {
1facf9fc 2612+ au_cpup_attr_all(sb->s_root->d_inode, /*force*/1);
1308ab2a 2613+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
2614+ } else
1facf9fc 2615+ au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode);
2616+ if (au_opt_test(mnt_flags, PLINK))
2617+ au_plink_half_refresh(sb, br_id);
2618+
b752ccd1 2619+ if (au_xino_brid(sb) == br_id)
1facf9fc 2620+ au_xino_brid_set(sb, -1);
2621+ goto out; /* success */
2622+
4f0767ce 2623+out_wh:
1facf9fc 2624+ /* revert */
2625+ rerr = au_br_init_wh(sb, br, br->br_perm, del->h_path.dentry);
2626+ if (rerr)
4a4d8108
AM
2627+ pr_warning("failed re-creating base whiteout, %s. (%d)\n",
2628+ del->pathname, rerr);
4f0767ce 2629+out:
1facf9fc 2630+ return err;
2631+}
2632+
2633+/* ---------------------------------------------------------------------- */
2634+
027c5e7a
AM
2635+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
2636+{
2637+ int err;
2638+ aufs_bindex_t bstart, bend;
2639+ struct aufs_ibusy ibusy;
2640+ struct inode *inode, *h_inode;
2641+
2642+ err = -EPERM;
2643+ if (unlikely(!capable(CAP_SYS_ADMIN)))
2644+ goto out;
2645+
2646+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
2647+ if (!err)
2648+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
2649+ if (unlikely(err)) {
2650+ err = -EFAULT;
2651+ AuTraceErr(err);
2652+ goto out;
2653+ }
2654+
2655+ err = -EINVAL;
2656+ si_read_lock(sb, AuLock_FLUSH);
2657+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
2658+ goto out_unlock;
2659+
2660+ err = 0;
2661+ ibusy.h_ino = 0; /* invalid */
2662+ inode = ilookup(sb, ibusy.ino);
2663+ if (!inode
2664+ || inode->i_ino == AUFS_ROOT_INO
2665+ || is_bad_inode(inode))
2666+ goto out_unlock;
2667+
2668+ ii_read_lock_child(inode);
2669+ bstart = au_ibstart(inode);
2670+ bend = au_ibend(inode);
2671+ if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
2672+ h_inode = au_h_iptr(inode, ibusy.bindex);
2673+ if (h_inode && au_test_ibusy(inode, bstart, bend))
2674+ ibusy.h_ino = h_inode->i_ino;
2675+ }
2676+ ii_read_unlock(inode);
2677+ iput(inode);
2678+
2679+out_unlock:
2680+ si_read_unlock(sb);
2681+ if (!err) {
2682+ err = __put_user(ibusy.h_ino, &arg->h_ino);
2683+ if (unlikely(err)) {
2684+ err = -EFAULT;
2685+ AuTraceErr(err);
2686+ }
2687+ }
2688+out:
2689+ return err;
2690+}
2691+
2692+long au_ibusy_ioctl(struct file *file, unsigned long arg)
2693+{
2694+ return au_ibusy(file->f_dentry->d_sb, (void __user *)arg);
2695+}
2696+
2697+#ifdef CONFIG_COMPAT
2698+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
2699+{
2700+ return au_ibusy(file->f_dentry->d_sb, compat_ptr(arg));
2701+}
2702+#endif
2703+
2704+/* ---------------------------------------------------------------------- */
2705+
1facf9fc 2706+/*
2707+ * change a branch permission
2708+ */
2709+
dece6358
AM
2710+static void au_warn_ima(void)
2711+{
2712+#ifdef CONFIG_IMA
1308ab2a 2713+ /* since it doesn't support mark_files_ro() */
027c5e7a 2714+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
dece6358
AM
2715+#endif
2716+}
2717+
1facf9fc 2718+static int do_need_sigen_inc(int a, int b)
2719+{
2720+ return au_br_whable(a) && !au_br_whable(b);
2721+}
2722+
2723+static int need_sigen_inc(int old, int new)
2724+{
2725+ return do_need_sigen_inc(old, new)
2726+ || do_need_sigen_inc(new, old);
2727+}
2728+
7f207e10
AM
2729+static unsigned long long au_farray_cb(void *a,
2730+ unsigned long long max __maybe_unused,
2731+ void *arg)
2732+{
2733+ unsigned long long n;
2734+ struct file **p, *f;
2735+ struct super_block *sb = arg;
2736+
2737+ n = 0;
2738+ p = a;
2739+ lg_global_lock(files_lglock);
2740+ do_file_list_for_each_entry(sb, f) {
2741+ if (au_fi(f)
027c5e7a 2742+ && file_count(f)
7f207e10
AM
2743+ && !special_file(f->f_dentry->d_inode->i_mode)) {
2744+ get_file(f);
2745+ *p++ = f;
2746+ n++;
2747+ AuDebugOn(n > max);
2748+ }
2749+ } while_file_list_for_each_entry;
2750+ lg_global_unlock(files_lglock);
2751+
2752+ return n;
2753+}
2754+
2755+static struct file **au_farray_alloc(struct super_block *sb,
2756+ unsigned long long *max)
2757+{
2758+ *max = atomic_long_read(&au_sbi(sb)->si_nfiles);
2759+ return au_array_alloc(max, au_farray_cb, sb);
2760+}
2761+
2762+static void au_farray_free(struct file **a, unsigned long long max)
2763+{
2764+ unsigned long long ull;
2765+
2766+ for (ull = 0; ull < max; ull++)
2767+ if (a[ull])
2768+ fput(a[ull]);
2769+ au_array_free(a);
2770+}
2771+
1facf9fc 2772+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
2773+{
7f207e10 2774+ int err, do_warn;
027c5e7a 2775+ unsigned int mnt_flags;
7f207e10 2776+ unsigned long long ull, max;
e49829fe 2777+ aufs_bindex_t br_id;
027c5e7a 2778+ unsigned char verbose;
7f207e10 2779+ struct file *file, *hf, **array;
e49829fe
JR
2780+ struct inode *inode;
2781+ struct au_hfile *hfile;
1facf9fc 2782+
027c5e7a
AM
2783+ mnt_flags = au_mntflags(sb);
2784+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
2785+
7f207e10
AM
2786+ array = au_farray_alloc(sb, &max);
2787+ err = PTR_ERR(array);
2788+ if (IS_ERR(array))
1facf9fc 2789+ goto out;
2790+
7f207e10 2791+ do_warn = 0;
e49829fe 2792+ br_id = au_sbr_id(sb, bindex);
7f207e10
AM
2793+ for (ull = 0; ull < max; ull++) {
2794+ file = array[ull];
1facf9fc 2795+
7f207e10 2796+ /* AuDbg("%.*s\n", AuDLNPair(file->f_dentry)); */
1facf9fc 2797+ fi_read_lock(file);
2798+ if (unlikely(au_test_mmapped(file))) {
2799+ err = -EBUSY;
027c5e7a
AM
2800+ AuVerbose(verbose, "mmapped %.*s\n",
2801+ AuDLNPair(file->f_dentry));
7f207e10 2802+ AuDbgFile(file);
1facf9fc 2803+ FiMustNoWaiters(file);
2804+ fi_read_unlock(file);
7f207e10 2805+ goto out_array;
1facf9fc 2806+ }
2807+
027c5e7a 2808+ inode = file->f_dentry->d_inode;
e49829fe
JR
2809+ hfile = &au_fi(file)->fi_htop;
2810+ hf = hfile->hf_file;
2811+ if (!S_ISREG(inode->i_mode)
1facf9fc 2812+ || !(file->f_mode & FMODE_WRITE)
e49829fe 2813+ || hfile->hf_br->br_id != br_id
7f207e10
AM
2814+ || !(hf->f_mode & FMODE_WRITE))
2815+ array[ull] = NULL;
2816+ else {
2817+ do_warn = 1;
2818+ get_file(file);
1facf9fc 2819+ }
2820+
1facf9fc 2821+ FiMustNoWaiters(file);
2822+ fi_read_unlock(file);
7f207e10
AM
2823+ fput(file);
2824+ }
1facf9fc 2825+
2826+ err = 0;
7f207e10 2827+ if (do_warn)
dece6358 2828+ au_warn_ima();
7f207e10
AM
2829+
2830+ for (ull = 0; ull < max; ull++) {
2831+ file = array[ull];
2832+ if (!file)
2833+ continue;
2834+
1facf9fc 2835+ /* todo: already flushed? */
2836+ /* cf. fs/super.c:mark_files_ro() */
7f207e10
AM
2837+ /* fi_read_lock(file); */
2838+ hfile = &au_fi(file)->fi_htop;
2839+ hf = hfile->hf_file;
2840+ /* fi_read_unlock(file); */
027c5e7a 2841+ spin_lock(&hf->f_lock);
1facf9fc 2842+ hf->f_mode &= ~FMODE_WRITE;
027c5e7a 2843+ spin_unlock(&hf->f_lock);
1facf9fc 2844+ if (!file_check_writeable(hf)) {
2845+ file_release_write(hf);
2846+ mnt_drop_write(hf->f_vfsmnt);
2847+ }
2848+ }
2849+
7f207e10
AM
2850+out_array:
2851+ au_farray_free(array, max);
4f0767ce 2852+out:
7f207e10 2853+ AuTraceErr(err);
1facf9fc 2854+ return err;
2855+}
2856+
2857+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 2858+ int *do_refresh)
1facf9fc 2859+{
2860+ int err, rerr;
2861+ aufs_bindex_t bindex;
1308ab2a 2862+ struct path path;
1facf9fc 2863+ struct dentry *root;
2864+ struct au_branch *br;
2865+
2866+ root = sb->s_root;
1facf9fc 2867+ bindex = au_find_dbindex(root, mod->h_root);
2868+ if (bindex < 0) {
2869+ if (remount)
2870+ return 0; /* success */
2871+ err = -ENOENT;
4a4d8108 2872+ pr_err("%s no such branch\n", mod->path);
1facf9fc 2873+ goto out;
2874+ }
2875+ AuDbg("bindex b%d\n", bindex);
2876+
2877+ err = test_br(mod->h_root->d_inode, mod->perm, mod->path);
2878+ if (unlikely(err))
2879+ goto out;
2880+
2881+ br = au_sbr(sb, bindex);
2882+ if (br->br_perm == mod->perm)
2883+ return 0; /* success */
2884+
2885+ if (au_br_writable(br->br_perm)) {
2886+ /* remove whiteout base */
2887+ err = au_br_init_wh(sb, br, mod->perm, mod->h_root);
2888+ if (unlikely(err))
2889+ goto out;
2890+
2891+ if (!au_br_writable(mod->perm)) {
2892+ /* rw --> ro, file might be mmapped */
2893+ DiMustNoWaiters(root);
2894+ IiMustNoWaiters(root->d_inode);
2895+ di_write_unlock(root);
2896+ err = au_br_mod_files_ro(sb, bindex);
2897+ /* aufs_write_lock() calls ..._child() */
2898+ di_write_lock_child(root);
2899+
2900+ if (unlikely(err)) {
2901+ rerr = -ENOMEM;
2902+ br->br_wbr = kmalloc(sizeof(*br->br_wbr),
2903+ GFP_NOFS);
1308ab2a 2904+ if (br->br_wbr) {
2905+ path.mnt = br->br_mnt;
2906+ path.dentry = mod->h_root;
2907+ rerr = au_wbr_init(br, sb, br->br_perm,
2908+ &path);
2909+ }
1facf9fc 2910+ if (unlikely(rerr)) {
2911+ AuIOErr("nested error %d (%d)\n",
2912+ rerr, err);
2913+ br->br_perm = mod->perm;
2914+ }
2915+ }
2916+ }
2917+ } else if (au_br_writable(mod->perm)) {
2918+ /* ro --> rw */
2919+ err = -ENOMEM;
2920+ br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
2921+ if (br->br_wbr) {
1308ab2a 2922+ path.mnt = br->br_mnt;
2923+ path.dentry = mod->h_root;
1facf9fc 2924+ err = au_wbr_init(br, sb, mod->perm, &path);
2925+ if (unlikely(err)) {
2926+ kfree(br->br_wbr);
2927+ br->br_wbr = NULL;
2928+ }
2929+ }
2930+ }
2931+
2932+ if (!err) {
7f207e10 2933+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
1facf9fc 2934+ br->br_perm = mod->perm;
2935+ }
2936+
4f0767ce 2937+out:
7f207e10 2938+ AuTraceErr(err);
1facf9fc 2939+ return err;
2940+}
7f207e10
AM
2941diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
2942--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
53392da6 2943+++ linux/fs/aufs/branch.h 2011-08-24 13:30:24.731313534 +0200
027c5e7a 2944@@ -0,0 +1,233 @@
1facf9fc 2945+/*
027c5e7a 2946+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 2947+ *
2948+ * This program, aufs is free software; you can redistribute it and/or modify
2949+ * it under the terms of the GNU General Public License as published by
2950+ * the Free Software Foundation; either version 2 of the License, or
2951+ * (at your option) any later version.
dece6358
AM
2952+ *
2953+ * This program is distributed in the hope that it will be useful,
2954+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2955+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2956+ * GNU General Public License for more details.
2957+ *
2958+ * You should have received a copy of the GNU General Public License
2959+ * along with this program; if not, write to the Free Software
2960+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 2961+ */
2962+
2963+/*
2964+ * branch filesystems and xino for them
2965+ */
2966+
2967+#ifndef __AUFS_BRANCH_H__
2968+#define __AUFS_BRANCH_H__
2969+
2970+#ifdef __KERNEL__
2971+
2972+#include <linux/fs.h>
2973+#include <linux/mount.h>
1facf9fc 2974+#include <linux/aufs_type.h>
4a4d8108 2975+#include "dynop.h"
1facf9fc 2976+#include "rwsem.h"
2977+#include "super.h"
2978+
2979+/* ---------------------------------------------------------------------- */
2980+
2981+/* a xino file */
2982+struct au_xino_file {
2983+ struct file *xi_file;
2984+ struct mutex xi_nondir_mtx;
2985+
2986+ /* todo: make xino files an array to support huge inode number */
2987+
2988+#ifdef CONFIG_DEBUG_FS
2989+ struct dentry *xi_dbgaufs;
2990+#endif
2991+};
2992+
2993+/* members for writable branch only */
2994+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
2995+struct au_wbr {
dece6358 2996+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 2997+ struct dentry *wbr_wh[AuBrWh_Last];
4a4d8108 2998+ atomic_t wbr_wh_running;
1facf9fc 2999+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
3000+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
3001+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
3002+
3003+ /* mfs mode */
3004+ unsigned long long wbr_bytes;
3005+};
3006+
4a4d8108
AM
3007+/* ext2 has 3 types of operations at least, ext3 has 4 */
3008+#define AuBrDynOp (AuDyLast * 4)
3009+
1facf9fc 3010+/* protected by superblock rwsem */
3011+struct au_branch {
3012+ struct au_xino_file br_xino;
3013+
3014+ aufs_bindex_t br_id;
3015+
3016+ int br_perm;
3017+ struct vfsmount *br_mnt;
4a4d8108
AM
3018+ spinlock_t br_dykey_lock;
3019+ struct au_dykey *br_dykey[AuBrDynOp];
1facf9fc 3020+ atomic_t br_count;
3021+
3022+ struct au_wbr *br_wbr;
3023+
3024+ /* xino truncation */
3025+ blkcnt_t br_xino_upper; /* watermark in blocks */
3026+ atomic_t br_xino_running;
3027+
027c5e7a
AM
3028+#ifdef CONFIG_AUFS_HFSNOTIFY
3029+ struct fsnotify_group *br_hfsn_group;
3030+ struct fsnotify_ops br_hfsn_ops;
3031+#endif
3032+
1facf9fc 3033+#ifdef CONFIG_SYSFS
3034+ /* an entry under sysfs per mount-point */
3035+ char br_name[8];
3036+ struct attribute br_attr;
3037+#endif
3038+};
3039+
3040+/* ---------------------------------------------------------------------- */
3041+
3042+/* branch permission and attribute */
3043+enum {
3044+ AuBrPerm_RW, /* writable, linkable wh */
3045+ AuBrPerm_RO, /* readonly, no wh */
3046+ AuBrPerm_RR, /* natively readonly, no wh */
3047+
3048+ AuBrPerm_RWNoLinkWH, /* un-linkable whiteouts */
3049+
3050+ AuBrPerm_ROWH, /* whiteout-able */
3051+ AuBrPerm_RRWH, /* whiteout-able */
3052+
3053+ AuBrPerm_Last
3054+};
3055+
3056+static inline int au_br_writable(int brperm)
3057+{
3058+ return brperm == AuBrPerm_RW || brperm == AuBrPerm_RWNoLinkWH;
3059+}
3060+
3061+static inline int au_br_whable(int brperm)
3062+{
3063+ return brperm == AuBrPerm_RW
3064+ || brperm == AuBrPerm_ROWH
3065+ || brperm == AuBrPerm_RRWH;
3066+}
3067+
3068+static inline int au_br_rdonly(struct au_branch *br)
3069+{
3070+ return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY)
3071+ || !au_br_writable(br->br_perm))
3072+ ? -EROFS : 0;
3073+}
3074+
4a4d8108 3075+static inline int au_br_hnotifyable(int brperm __maybe_unused)
1facf9fc 3076+{
4a4d8108 3077+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 3078+ return brperm != AuBrPerm_RR && brperm != AuBrPerm_RRWH;
3079+#else
3080+ return 0;
3081+#endif
3082+}
3083+
3084+/* ---------------------------------------------------------------------- */
3085+
3086+/* branch.c */
3087+struct au_sbinfo;
3088+void au_br_free(struct au_sbinfo *sinfo);
3089+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
3090+struct au_opt_add;
3091+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
3092+struct au_opt_del;
3093+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
027c5e7a
AM
3094+long au_ibusy_ioctl(struct file *file, unsigned long arg);
3095+#ifdef CONFIG_COMPAT
3096+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
3097+#endif
1facf9fc 3098+struct au_opt_mod;
3099+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 3100+ int *do_refresh);
1facf9fc 3101+
3102+/* xino.c */
3103+static const loff_t au_loff_max = LLONG_MAX;
3104+
3105+int au_xib_trunc(struct super_block *sb);
3106+ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
3107+ loff_t *pos);
3108+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
3109+ loff_t *pos);
3110+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
3111+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
3112+ino_t au_xino_new_ino(struct super_block *sb);
b752ccd1 3113+void au_xino_delete_inode(struct inode *inode, const int unlinked);
1facf9fc 3114+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
3115+ ino_t ino);
3116+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
3117+ ino_t *ino);
3118+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
3119+ struct file *base_file, int do_test);
3120+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
3121+
3122+struct au_opt_xino;
3123+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
3124+void au_xino_clr(struct super_block *sb);
3125+struct file *au_xino_def(struct super_block *sb);
3126+int au_xino_path(struct seq_file *seq, struct file *file);
3127+
3128+/* ---------------------------------------------------------------------- */
3129+
3130+/* Superblock to branch */
3131+static inline
3132+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
3133+{
3134+ return au_sbr(sb, bindex)->br_id;
3135+}
3136+
3137+static inline
3138+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
3139+{
3140+ return au_sbr(sb, bindex)->br_mnt;
3141+}
3142+
3143+static inline
3144+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
3145+{
3146+ return au_sbr_mnt(sb, bindex)->mnt_sb;
3147+}
3148+
3149+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
3150+{
e49829fe 3151+ atomic_dec(&au_sbr(sb, bindex)->br_count);
1facf9fc 3152+}
3153+
3154+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
3155+{
3156+ return au_sbr(sb, bindex)->br_perm;
3157+}
3158+
3159+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
3160+{
3161+ return au_br_whable(au_sbr_perm(sb, bindex));
3162+}
3163+
3164+/* ---------------------------------------------------------------------- */
3165+
3166+/*
3167+ * wbr_wh_read_lock, wbr_wh_write_lock
3168+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
3169+ */
3170+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
3171+
dece6358
AM
3172+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
3173+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
3174+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
3175+
1facf9fc 3176+#endif /* __KERNEL__ */
3177+#endif /* __AUFS_BRANCH_H__ */
7f207e10
AM
3178diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
3179--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
53392da6 3180+++ linux/fs/aufs/conf.mk 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 3181@@ -0,0 +1,38 @@
4a4d8108
AM
3182+
3183+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
3184+
3185+define AuConf
3186+ifdef ${1}
3187+AuConfStr += ${1}=${${1}}
3188+endif
3189+endef
3190+
b752ccd1 3191+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
e49829fe 3192+ SBILIST \
7f207e10 3193+ HNOTIFY HFSNOTIFY \
4a4d8108
AM
3194+ EXPORT INO_T_64 \
3195+ RDU \
2cbb1c4b 3196+ PROC_MAP \
4a4d8108
AM
3197+ SP_IATTR \
3198+ SHWH \
3199+ BR_RAMFS \
3200+ BR_FUSE POLL \
3201+ BR_HFSPLUS \
3202+ BDEV_LOOP \
b752ccd1
AM
3203+ DEBUG MAGIC_SYSRQ
3204+$(foreach i, ${AuConfAll}, \
4a4d8108
AM
3205+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
3206+
3207+AuConfName = ${obj}/conf.str
3208+${AuConfName}.tmp: FORCE
3209+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
3210+${AuConfName}: ${AuConfName}.tmp
3211+ @diff -q $< $@ > /dev/null 2>&1 || { \
3212+ echo ' GEN ' $@; \
3213+ cp -p $< $@; \
3214+ }
3215+FORCE:
3216+clean-files += ${AuConfName} ${AuConfName}.tmp
3217+${obj}/sysfs.o: ${AuConfName}
b752ccd1
AM
3218+
3219+-include ${srctree}/${src}/conf_priv.mk
7f207e10
AM
3220diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
3221--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
3222+++ linux/fs/aufs/cpup.c 2011-08-24 13:30:24.731313534 +0200
3223@@ -0,0 +1,1080 @@
1facf9fc 3224+/*
027c5e7a 3225+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 3226+ *
3227+ * This program, aufs is free software; you can redistribute it and/or modify
3228+ * it under the terms of the GNU General Public License as published by
3229+ * the Free Software Foundation; either version 2 of the License, or
3230+ * (at your option) any later version.
dece6358
AM
3231+ *
3232+ * This program is distributed in the hope that it will be useful,
3233+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3234+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3235+ * GNU General Public License for more details.
3236+ *
3237+ * You should have received a copy of the GNU General Public License
3238+ * along with this program; if not, write to the Free Software
3239+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 3240+ */
3241+
3242+/*
3243+ * copy-up functions, see wbr_policy.c for copy-down
3244+ */
3245+
dece6358 3246+#include <linux/file.h>
1facf9fc 3247+#include <linux/fs_stack.h>
dece6358 3248+#include <linux/mm.h>
1facf9fc 3249+#include <linux/uaccess.h>
3250+#include "aufs.h"
3251+
3252+void au_cpup_attr_flags(struct inode *dst, struct inode *src)
3253+{
3254+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
3255+ | S_NOATIME | S_NOCMTIME;
3256+
3257+ dst->i_flags |= src->i_flags & ~mask;
3258+ if (au_test_fs_notime(dst->i_sb))
3259+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
3260+}
3261+
3262+void au_cpup_attr_timesizes(struct inode *inode)
3263+{
3264+ struct inode *h_inode;
3265+
3266+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3267+ fsstack_copy_attr_times(inode, h_inode);
4a4d8108 3268+ fsstack_copy_inode_size(inode, h_inode);
1facf9fc 3269+}
3270+
3271+void au_cpup_attr_nlink(struct inode *inode, int force)
3272+{
3273+ struct inode *h_inode;
3274+ struct super_block *sb;
3275+ aufs_bindex_t bindex, bend;
3276+
3277+ sb = inode->i_sb;
3278+ bindex = au_ibstart(inode);
3279+ h_inode = au_h_iptr(inode, bindex);
3280+ if (!force
3281+ && !S_ISDIR(h_inode->i_mode)
3282+ && au_opt_test(au_mntflags(sb), PLINK)
3283+ && au_plink_test(inode))
3284+ return;
3285+
3286+ inode->i_nlink = h_inode->i_nlink;
3287+
3288+ /*
3289+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
3290+ * it may includes whplink directory.
3291+ */
3292+ if (S_ISDIR(h_inode->i_mode)) {
3293+ bend = au_ibend(inode);
3294+ for (bindex++; bindex <= bend; bindex++) {
3295+ h_inode = au_h_iptr(inode, bindex);
3296+ if (h_inode)
3297+ au_add_nlink(inode, h_inode);
3298+ }
3299+ }
3300+}
3301+
3302+void au_cpup_attr_changeable(struct inode *inode)
3303+{
3304+ struct inode *h_inode;
3305+
3306+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3307+ inode->i_mode = h_inode->i_mode;
3308+ inode->i_uid = h_inode->i_uid;
3309+ inode->i_gid = h_inode->i_gid;
3310+ au_cpup_attr_timesizes(inode);
3311+ au_cpup_attr_flags(inode, h_inode);
3312+}
3313+
3314+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
3315+{
3316+ struct au_iinfo *iinfo = au_ii(inode);
3317+
1308ab2a 3318+ IiMustWriteLock(inode);
3319+
1facf9fc 3320+ iinfo->ii_higen = h_inode->i_generation;
3321+ iinfo->ii_hsb1 = h_inode->i_sb;
3322+}
3323+
3324+void au_cpup_attr_all(struct inode *inode, int force)
3325+{
3326+ struct inode *h_inode;
3327+
3328+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3329+ au_cpup_attr_changeable(inode);
3330+ if (inode->i_nlink > 0)
3331+ au_cpup_attr_nlink(inode, force);
3332+ inode->i_rdev = h_inode->i_rdev;
3333+ inode->i_blkbits = h_inode->i_blkbits;
3334+ au_cpup_igen(inode, h_inode);
3335+}
3336+
3337+/* ---------------------------------------------------------------------- */
3338+
3339+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
3340+
3341+/* keep the timestamps of the parent dir when cpup */
3342+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
3343+ struct path *h_path)
3344+{
3345+ struct inode *h_inode;
3346+
3347+ dt->dt_dentry = dentry;
3348+ dt->dt_h_path = *h_path;
3349+ h_inode = h_path->dentry->d_inode;
3350+ dt->dt_atime = h_inode->i_atime;
3351+ dt->dt_mtime = h_inode->i_mtime;
3352+ /* smp_mb(); */
3353+}
3354+
3355+void au_dtime_revert(struct au_dtime *dt)
3356+{
3357+ struct iattr attr;
3358+ int err;
3359+
3360+ attr.ia_atime = dt->dt_atime;
3361+ attr.ia_mtime = dt->dt_mtime;
3362+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
3363+ | ATTR_ATIME | ATTR_ATIME_SET;
3364+
3365+ err = vfsub_notify_change(&dt->dt_h_path, &attr);
3366+ if (unlikely(err))
4a4d8108 3367+ pr_warning("restoring timestamps failed(%d). ignored\n", err);
1facf9fc 3368+}
3369+
3370+/* ---------------------------------------------------------------------- */
3371+
3372+static noinline_for_stack
3373+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src)
3374+{
3375+ int err, sbits;
3376+ struct iattr ia;
3377+ struct path h_path;
1308ab2a 3378+ struct inode *h_isrc, *h_idst;
1facf9fc 3379+
3380+ h_path.dentry = au_h_dptr(dst, bindex);
1308ab2a 3381+ h_idst = h_path.dentry->d_inode;
1facf9fc 3382+ h_path.mnt = au_sbr_mnt(dst->d_sb, bindex);
3383+ h_isrc = h_src->d_inode;
1308ab2a 3384+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 3385+ | ATTR_ATIME | ATTR_MTIME
3386+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
1facf9fc 3387+ ia.ia_uid = h_isrc->i_uid;
3388+ ia.ia_gid = h_isrc->i_gid;
3389+ ia.ia_atime = h_isrc->i_atime;
3390+ ia.ia_mtime = h_isrc->i_mtime;
1308ab2a 3391+ if (h_idst->i_mode != h_isrc->i_mode
3392+ && !S_ISLNK(h_idst->i_mode)) {
3393+ ia.ia_valid |= ATTR_MODE;
3394+ ia.ia_mode = h_isrc->i_mode;
3395+ }
3396+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
3397+ au_cpup_attr_flags(h_idst, h_isrc);
1facf9fc 3398+ err = vfsub_notify_change(&h_path, &ia);
3399+
3400+ /* is this nfs only? */
3401+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
3402+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
3403+ ia.ia_mode = h_isrc->i_mode;
3404+ err = vfsub_notify_change(&h_path, &ia);
3405+ }
3406+
3407+ return err;
3408+}
3409+
3410+/* ---------------------------------------------------------------------- */
3411+
3412+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
3413+ char *buf, unsigned long blksize)
3414+{
3415+ int err;
3416+ size_t sz, rbytes, wbytes;
3417+ unsigned char all_zero;
3418+ char *p, *zp;
3419+ struct mutex *h_mtx;
3420+ /* reduce stack usage */
3421+ struct iattr *ia;
3422+
3423+ zp = page_address(ZERO_PAGE(0));
3424+ if (unlikely(!zp))
3425+ return -ENOMEM; /* possible? */
3426+
3427+ err = 0;
3428+ all_zero = 0;
3429+ while (len) {
3430+ AuDbg("len %lld\n", len);
3431+ sz = blksize;
3432+ if (len < blksize)
3433+ sz = len;
3434+
3435+ rbytes = 0;
3436+ /* todo: signal_pending? */
3437+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
3438+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
3439+ err = rbytes;
3440+ }
3441+ if (unlikely(err < 0))
3442+ break;
3443+
3444+ all_zero = 0;
3445+ if (len >= rbytes && rbytes == blksize)
3446+ all_zero = !memcmp(buf, zp, rbytes);
3447+ if (!all_zero) {
3448+ wbytes = rbytes;
3449+ p = buf;
3450+ while (wbytes) {
3451+ size_t b;
3452+
3453+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
3454+ err = b;
3455+ /* todo: signal_pending? */
3456+ if (unlikely(err == -EAGAIN || err == -EINTR))
3457+ continue;
3458+ if (unlikely(err < 0))
3459+ break;
3460+ wbytes -= b;
3461+ p += b;
3462+ }
3463+ } else {
3464+ loff_t res;
3465+
3466+ AuLabel(hole);
3467+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
3468+ err = res;
3469+ if (unlikely(res < 0))
3470+ break;
3471+ }
3472+ len -= rbytes;
3473+ err = 0;
3474+ }
3475+
3476+ /* the last block may be a hole */
3477+ if (!err && all_zero) {
3478+ AuLabel(last hole);
3479+
3480+ err = 1;
3481+ if (au_test_nfs(dst->f_dentry->d_sb)) {
3482+ /* nfs requires this step to make last hole */
3483+ /* is this only nfs? */
3484+ do {
3485+ /* todo: signal_pending? */
3486+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
3487+ } while (err == -EAGAIN || err == -EINTR);
3488+ if (err == 1)
3489+ dst->f_pos--;
3490+ }
3491+
3492+ if (err == 1) {
3493+ ia = (void *)buf;
3494+ ia->ia_size = dst->f_pos;
3495+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
3496+ ia->ia_file = dst;
3497+ h_mtx = &dst->f_dentry->d_inode->i_mutex;
3498+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
3499+ err = vfsub_notify_change(&dst->f_path, ia);
3500+ mutex_unlock(h_mtx);
3501+ }
3502+ }
3503+
3504+ return err;
3505+}
3506+
3507+int au_copy_file(struct file *dst, struct file *src, loff_t len)
3508+{
3509+ int err;
3510+ unsigned long blksize;
3511+ unsigned char do_kfree;
3512+ char *buf;
3513+
3514+ err = -ENOMEM;
3515+ blksize = dst->f_dentry->d_sb->s_blocksize;
3516+ if (!blksize || PAGE_SIZE < blksize)
3517+ blksize = PAGE_SIZE;
3518+ AuDbg("blksize %lu\n", blksize);
3519+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
3520+ if (do_kfree)
3521+ buf = kmalloc(blksize, GFP_NOFS);
3522+ else
3523+ buf = (void *)__get_free_page(GFP_NOFS);
3524+ if (unlikely(!buf))
3525+ goto out;
3526+
3527+ if (len > (1 << 22))
3528+ AuDbg("copying a large file %lld\n", (long long)len);
3529+
3530+ src->f_pos = 0;
3531+ dst->f_pos = 0;
3532+ err = au_do_copy_file(dst, src, len, buf, blksize);
3533+ if (do_kfree)
3534+ kfree(buf);
3535+ else
3536+ free_page((unsigned long)buf);
3537+
4f0767ce 3538+out:
1facf9fc 3539+ return err;
3540+}
3541+
3542+/*
3543+ * to support a sparse file which is opened with O_APPEND,
3544+ * we need to close the file.
3545+ */
3546+static int au_cp_regular(struct dentry *dentry, aufs_bindex_t bdst,
4a4d8108 3547+ aufs_bindex_t bsrc, loff_t len)
1facf9fc 3548+{
3549+ int err, i;
3550+ enum { SRC, DST };
3551+ struct {
3552+ aufs_bindex_t bindex;
3553+ unsigned int flags;
3554+ struct dentry *dentry;
3555+ struct file *file;
3556+ void *label, *label_file;
3557+ } *f, file[] = {
3558+ {
3559+ .bindex = bsrc,
3560+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
3561+ .file = NULL,
3562+ .label = &&out,
3563+ .label_file = &&out_src
3564+ },
3565+ {
3566+ .bindex = bdst,
3567+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
3568+ .file = NULL,
3569+ .label = &&out_src,
3570+ .label_file = &&out_dst
3571+ }
3572+ };
3573+ struct super_block *sb;
3574+
3575+ /* bsrc branch can be ro/rw. */
3576+ sb = dentry->d_sb;
3577+ f = file;
3578+ for (i = 0; i < 2; i++, f++) {
3579+ f->dentry = au_h_dptr(dentry, f->bindex);
3580+ f->file = au_h_open(dentry, f->bindex, f->flags, /*file*/NULL);
3581+ err = PTR_ERR(f->file);
3582+ if (IS_ERR(f->file))
3583+ goto *f->label;
3584+ err = -EINVAL;
3585+ if (unlikely(!f->file->f_op))
3586+ goto *f->label_file;
3587+ }
3588+
3589+ /* try stopping to update while we copyup */
3590+ IMustLock(file[SRC].dentry->d_inode);
3591+ err = au_copy_file(file[DST].file, file[SRC].file, len);
3592+
4f0767ce 3593+out_dst:
1facf9fc 3594+ fput(file[DST].file);
3595+ au_sbr_put(sb, file[DST].bindex);
4f0767ce 3596+out_src:
1facf9fc 3597+ fput(file[SRC].file);
3598+ au_sbr_put(sb, file[SRC].bindex);
4f0767ce 3599+out:
1facf9fc 3600+ return err;
3601+}
3602+
3603+static int au_do_cpup_regular(struct dentry *dentry, aufs_bindex_t bdst,
3604+ aufs_bindex_t bsrc, loff_t len,
3605+ struct inode *h_dir, struct path *h_path)
3606+{
3607+ int err, rerr;
3608+ loff_t l;
3609+
3610+ err = 0;
3611+ l = i_size_read(au_h_iptr(dentry->d_inode, bsrc));
3612+ if (len == -1 || l < len)
3613+ len = l;
3614+ if (len)
3615+ err = au_cp_regular(dentry, bdst, bsrc, len);
3616+ if (!err)
3617+ goto out; /* success */
3618+
3619+ rerr = vfsub_unlink(h_dir, h_path, /*force*/0);
3620+ if (rerr) {
3621+ AuIOErr("failed unlinking cpup-ed %.*s(%d, %d)\n",
3622+ AuDLNPair(h_path->dentry), err, rerr);
3623+ err = -EIO;
3624+ }
3625+
4f0767ce 3626+out:
1facf9fc 3627+ return err;
3628+}
3629+
3630+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
3631+ struct inode *h_dir)
3632+{
3633+ int err, symlen;
3634+ mm_segment_t old_fs;
b752ccd1
AM
3635+ union {
3636+ char *k;
3637+ char __user *u;
3638+ } sym;
1facf9fc 3639+
3640+ err = -ENOSYS;
3641+ if (unlikely(!h_src->d_inode->i_op->readlink))
3642+ goto out;
3643+
3644+ err = -ENOMEM;
b752ccd1
AM
3645+ sym.k = __getname_gfp(GFP_NOFS);
3646+ if (unlikely(!sym.k))
1facf9fc 3647+ goto out;
3648+
3649+ old_fs = get_fs();
3650+ set_fs(KERNEL_DS);
b752ccd1 3651+ symlen = h_src->d_inode->i_op->readlink(h_src, sym.u, PATH_MAX);
1facf9fc 3652+ err = symlen;
3653+ set_fs(old_fs);
3654+
3655+ if (symlen > 0) {
b752ccd1
AM
3656+ sym.k[symlen] = 0;
3657+ err = vfsub_symlink(h_dir, h_path, sym.k);
1facf9fc 3658+ }
b752ccd1 3659+ __putname(sym.k);
1facf9fc 3660+
4f0767ce 3661+out:
1facf9fc 3662+ return err;
3663+}
3664+
3665+/* return with the lower dst inode is locked */
3666+static noinline_for_stack
3667+int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst,
3668+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3669+ struct dentry *dst_parent)
3670+{
3671+ int err;
3672+ umode_t mode;
3673+ unsigned int mnt_flags;
3674+ unsigned char isdir;
3675+ const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME);
3676+ struct au_dtime dt;
3677+ struct path h_path;
3678+ struct dentry *h_src, *h_dst, *h_parent;
3679+ struct inode *h_inode, *h_dir;
3680+ struct super_block *sb;
3681+
3682+ /* bsrc branch can be ro/rw. */
3683+ h_src = au_h_dptr(dentry, bsrc);
3684+ h_inode = h_src->d_inode;
3685+ AuDebugOn(h_inode != au_h_iptr(dentry->d_inode, bsrc));
3686+
3687+ /* try stopping to be referenced while we are creating */
3688+ h_dst = au_h_dptr(dentry, bdst);
3689+ h_parent = h_dst->d_parent; /* dir inode is locked */
3690+ h_dir = h_parent->d_inode;
3691+ IMustLock(h_dir);
3692+ AuDebugOn(h_parent != h_dst->d_parent);
3693+
3694+ sb = dentry->d_sb;
3695+ h_path.mnt = au_sbr_mnt(sb, bdst);
3696+ if (do_dt) {
3697+ h_path.dentry = h_parent;
3698+ au_dtime_store(&dt, dst_parent, &h_path);
3699+ }
3700+ h_path.dentry = h_dst;
3701+
3702+ isdir = 0;
3703+ mode = h_inode->i_mode;
3704+ switch (mode & S_IFMT) {
3705+ case S_IFREG:
3706+ /* try stopping to update while we are referencing */
3707+ IMustLock(h_inode);
3708+ err = vfsub_create(h_dir, &h_path, mode | S_IWUSR);
3709+ if (!err)
3710+ err = au_do_cpup_regular
3711+ (dentry, bdst, bsrc, len,
3712+ au_h_iptr(dst_parent->d_inode, bdst), &h_path);
3713+ break;
3714+ case S_IFDIR:
3715+ isdir = 1;
3716+ err = vfsub_mkdir(h_dir, &h_path, mode);
3717+ if (!err) {
3718+ /*
3719+ * strange behaviour from the users view,
3720+ * particularry setattr case
3721+ */
3722+ if (au_ibstart(dst_parent->d_inode) == bdst)
3723+ au_cpup_attr_nlink(dst_parent->d_inode,
3724+ /*force*/1);
3725+ au_cpup_attr_nlink(dentry->d_inode, /*force*/1);
3726+ }
3727+ break;
3728+ case S_IFLNK:
3729+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
3730+ break;
3731+ case S_IFCHR:
3732+ case S_IFBLK:
3733+ AuDebugOn(!capable(CAP_MKNOD));
3734+ /*FALLTHROUGH*/
3735+ case S_IFIFO:
3736+ case S_IFSOCK:
3737+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
3738+ break;
3739+ default:
3740+ AuIOErr("Unknown inode type 0%o\n", mode);
3741+ err = -EIO;
3742+ }
3743+
3744+ mnt_flags = au_mntflags(sb);
3745+ if (!au_opt_test(mnt_flags, UDBA_NONE)
3746+ && !isdir
3747+ && au_opt_test(mnt_flags, XINO)
3748+ && h_inode->i_nlink == 1
3749+ /* todo: unnecessary? */
3750+ /* && dentry->d_inode->i_nlink == 1 */
3751+ && bdst < bsrc
3752+ && !au_ftest_cpup(flags, KEEPLINO))
1308ab2a 3753+ au_xino_write(sb, bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 3754+ /* ignore this error */
3755+
3756+ if (do_dt)
3757+ au_dtime_revert(&dt);
3758+ return err;
3759+}
3760+
3761+/*
3762+ * copyup the @dentry from @bsrc to @bdst.
3763+ * the caller must set the both of lower dentries.
3764+ * @len is for truncating when it is -1 copyup the entire file.
3765+ * in link/rename cases, @dst_parent may be different from the real one.
3766+ */
3767+static int au_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3768+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3769+ struct dentry *dst_parent)
3770+{
3771+ int err, rerr;
3772+ aufs_bindex_t old_ibstart;
3773+ unsigned char isdir, plink;
3774+ struct au_dtime dt;
3775+ struct path h_path;
3776+ struct dentry *h_src, *h_dst, *h_parent;
3777+ struct inode *dst_inode, *h_dir, *inode;
3778+ struct super_block *sb;
3779+
3780+ AuDebugOn(bsrc <= bdst);
3781+
3782+ sb = dentry->d_sb;
3783+ h_path.mnt = au_sbr_mnt(sb, bdst);
3784+ h_dst = au_h_dptr(dentry, bdst);
3785+ h_parent = h_dst->d_parent; /* dir inode is locked */
3786+ h_dir = h_parent->d_inode;
3787+ IMustLock(h_dir);
3788+
3789+ h_src = au_h_dptr(dentry, bsrc);
3790+ inode = dentry->d_inode;
3791+
3792+ if (!dst_parent)
3793+ dst_parent = dget_parent(dentry);
3794+ else
3795+ dget(dst_parent);
3796+
3797+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
3798+ dst_inode = au_h_iptr(inode, bdst);
3799+ if (dst_inode) {
3800+ if (unlikely(!plink)) {
3801+ err = -EIO;
027c5e7a
AM
3802+ AuIOErr("hi%lu(i%lu) exists on b%d "
3803+ "but plink is disabled\n",
3804+ dst_inode->i_ino, inode->i_ino, bdst);
1facf9fc 3805+ goto out;
3806+ }
3807+
3808+ if (dst_inode->i_nlink) {
3809+ const int do_dt = au_ftest_cpup(flags, DTIME);
3810+
3811+ h_src = au_plink_lkup(inode, bdst);
3812+ err = PTR_ERR(h_src);
3813+ if (IS_ERR(h_src))
3814+ goto out;
3815+ if (unlikely(!h_src->d_inode)) {
3816+ err = -EIO;
3817+ AuIOErr("i%lu exists on a upper branch "
027c5e7a
AM
3818+ "but not pseudo-linked\n",
3819+ inode->i_ino);
1facf9fc 3820+ dput(h_src);
3821+ goto out;
3822+ }
3823+
3824+ if (do_dt) {
3825+ h_path.dentry = h_parent;
3826+ au_dtime_store(&dt, dst_parent, &h_path);
3827+ }
3828+ h_path.dentry = h_dst;
3829+ err = vfsub_link(h_src, h_dir, &h_path);
3830+ if (do_dt)
3831+ au_dtime_revert(&dt);
3832+ dput(h_src);
3833+ goto out;
3834+ } else
3835+ /* todo: cpup_wh_file? */
3836+ /* udba work */
4a4d8108 3837+ au_update_ibrange(inode, /*do_put_zero*/1);
1facf9fc 3838+ }
3839+
3840+ old_ibstart = au_ibstart(inode);
3841+ err = cpup_entry(dentry, bdst, bsrc, len, flags, dst_parent);
3842+ if (unlikely(err))
3843+ goto out;
3844+ dst_inode = h_dst->d_inode;
3845+ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
3846+
3847+ err = cpup_iattr(dentry, bdst, h_src);
3848+ isdir = S_ISDIR(dst_inode->i_mode);
3849+ if (!err) {
4a4d8108
AM
3850+ if (bdst < old_ibstart) {
3851+ if (S_ISREG(inode->i_mode)) {
3852+ err = au_dy_iaop(inode, bdst, dst_inode);
3853+ if (unlikely(err))
3854+ goto out_rev;
3855+ }
1facf9fc 3856+ au_set_ibstart(inode, bdst);
4a4d8108 3857+ }
1facf9fc 3858+ au_set_h_iptr(inode, bdst, au_igrab(dst_inode),
3859+ au_hi_flags(inode, isdir));
3860+ mutex_unlock(&dst_inode->i_mutex);
3861+ if (!isdir
3862+ && h_src->d_inode->i_nlink > 1
3863+ && plink)
3864+ au_plink_append(inode, bdst, h_dst);
3865+ goto out; /* success */
3866+ }
3867+
3868+ /* revert */
4a4d8108 3869+out_rev:
1facf9fc 3870+ h_path.dentry = h_parent;
3871+ mutex_unlock(&dst_inode->i_mutex);
3872+ au_dtime_store(&dt, dst_parent, &h_path);
3873+ h_path.dentry = h_dst;
3874+ if (!isdir)
3875+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
3876+ else
3877+ rerr = vfsub_rmdir(h_dir, &h_path);
3878+ au_dtime_revert(&dt);
3879+ if (rerr) {
3880+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
3881+ err = -EIO;
3882+ }
3883+
4f0767ce 3884+out:
1facf9fc 3885+ dput(dst_parent);
3886+ return err;
3887+}
3888+
3889+struct au_cpup_single_args {
3890+ int *errp;
3891+ struct dentry *dentry;
3892+ aufs_bindex_t bdst, bsrc;
3893+ loff_t len;
3894+ unsigned int flags;
3895+ struct dentry *dst_parent;
3896+};
3897+
3898+static void au_call_cpup_single(void *args)
3899+{
3900+ struct au_cpup_single_args *a = args;
3901+ *a->errp = au_cpup_single(a->dentry, a->bdst, a->bsrc, a->len,
3902+ a->flags, a->dst_parent);
3903+}
3904+
53392da6
AM
3905+/*
3906+ * prevent SIGXFSZ in copy-up.
3907+ * testing CAP_MKNOD is for generic fs,
3908+ * but CAP_FSETID is for xfs only, currently.
3909+ */
3910+static int au_cpup_sio_test(struct super_block *sb, umode_t mode)
3911+{
3912+ int do_sio;
3913+
3914+ do_sio = 0;
3915+ if (!au_wkq_test()
3916+ && (!au_sbi(sb)->si_plink_maint_pid
3917+ || au_plink_maint(sb, AuLock_NOPLM))) {
3918+ switch (mode & S_IFMT) {
3919+ case S_IFREG:
3920+ /* no condition about RLIMIT_FSIZE and the file size */
3921+ do_sio = 1;
3922+ break;
3923+ case S_IFCHR:
3924+ case S_IFBLK:
3925+ do_sio = !capable(CAP_MKNOD);
3926+ break;
3927+ }
3928+ if (!do_sio)
3929+ do_sio = ((mode & (S_ISUID | S_ISGID))
3930+ && !capable(CAP_FSETID));
3931+ }
3932+
3933+ return do_sio;
3934+}
3935+
1facf9fc 3936+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3937+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3938+ struct dentry *dst_parent)
3939+{
3940+ int err, wkq_err;
1facf9fc 3941+ struct dentry *h_dentry;
3942+
3943+ h_dentry = au_h_dptr(dentry, bsrc);
53392da6 3944+ if (!au_cpup_sio_test(dentry->d_sb, h_dentry->d_inode->i_mode))
1facf9fc 3945+ err = au_cpup_single(dentry, bdst, bsrc, len, flags,
3946+ dst_parent);
3947+ else {
3948+ struct au_cpup_single_args args = {
3949+ .errp = &err,
3950+ .dentry = dentry,
3951+ .bdst = bdst,
3952+ .bsrc = bsrc,
3953+ .len = len,
3954+ .flags = flags,
3955+ .dst_parent = dst_parent
3956+ };
3957+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
3958+ if (unlikely(wkq_err))
3959+ err = wkq_err;
3960+ }
3961+
3962+ return err;
3963+}
3964+
3965+/*
3966+ * copyup the @dentry from the first active lower branch to @bdst,
3967+ * using au_cpup_single().
3968+ */
3969+static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3970+ unsigned int flags)
3971+{
3972+ int err;
3973+ aufs_bindex_t bsrc, bend;
3974+
3975+ bend = au_dbend(dentry);
3976+ for (bsrc = bdst + 1; bsrc <= bend; bsrc++)
3977+ if (au_h_dptr(dentry, bsrc))
3978+ break;
3979+
3980+ err = au_lkup_neg(dentry, bdst);
3981+ if (!err) {
3982+ err = au_cpup_single(dentry, bdst, bsrc, len, flags, NULL);
3983+ if (!err)
3984+ return 0; /* success */
3985+
3986+ /* revert */
3987+ au_set_h_dptr(dentry, bdst, NULL);
3988+ au_set_dbstart(dentry, bsrc);
3989+ }
3990+
3991+ return err;
3992+}
3993+
3994+struct au_cpup_simple_args {
3995+ int *errp;
3996+ struct dentry *dentry;
3997+ aufs_bindex_t bdst;
3998+ loff_t len;
3999+ unsigned int flags;
4000+};
4001+
4002+static void au_call_cpup_simple(void *args)
4003+{
4004+ struct au_cpup_simple_args *a = args;
4005+ *a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags);
4006+}
4007+
4008+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4009+ unsigned int flags)
4010+{
4011+ int err, wkq_err;
1facf9fc 4012+ struct dentry *parent;
4013+ struct inode *h_dir;
4014+
4015+ parent = dget_parent(dentry);
4016+ h_dir = au_h_iptr(parent->d_inode, bdst);
53392da6
AM
4017+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
4018+ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
1facf9fc 4019+ err = au_cpup_simple(dentry, bdst, len, flags);
4020+ else {
4021+ struct au_cpup_simple_args args = {
4022+ .errp = &err,
4023+ .dentry = dentry,
4024+ .bdst = bdst,
4025+ .len = len,
4026+ .flags = flags
4027+ };
4028+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
4029+ if (unlikely(wkq_err))
4030+ err = wkq_err;
4031+ }
4032+
4033+ dput(parent);
4034+ return err;
4035+}
4036+
4037+/* ---------------------------------------------------------------------- */
4038+
4039+/*
4040+ * copyup the deleted file for writing.
4041+ */
4042+static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst,
4043+ struct dentry *wh_dentry, struct file *file,
4044+ loff_t len)
4045+{
4046+ int err;
4047+ aufs_bindex_t bstart;
4048+ struct au_dinfo *dinfo;
4049+ struct dentry *h_d_dst, *h_d_start;
4a4d8108 4050+ struct au_hdentry *hdp;
1facf9fc 4051+
4052+ dinfo = au_di(dentry);
1308ab2a 4053+ AuRwMustWriteLock(&dinfo->di_rwsem);
4054+
1facf9fc 4055+ bstart = dinfo->di_bstart;
4a4d8108
AM
4056+ hdp = dinfo->di_hdentry;
4057+ h_d_dst = hdp[0 + bdst].hd_dentry;
1facf9fc 4058+ dinfo->di_bstart = bdst;
4a4d8108 4059+ hdp[0 + bdst].hd_dentry = wh_dentry;
027c5e7a
AM
4060+ if (file) {
4061+ h_d_start = hdp[0 + bstart].hd_dentry;
4a4d8108 4062+ hdp[0 + bstart].hd_dentry = au_hf_top(file)->f_dentry;
027c5e7a 4063+ }
1facf9fc 4064+ err = au_cpup_single(dentry, bdst, bstart, len, !AuCpup_DTIME,
4065+ /*h_parent*/NULL);
027c5e7a
AM
4066+ if (file) {
4067+ if (!err)
4068+ err = au_reopen_nondir(file);
4a4d8108 4069+ hdp[0 + bstart].hd_dentry = h_d_start;
1facf9fc 4070+ }
4a4d8108 4071+ hdp[0 + bdst].hd_dentry = h_d_dst;
1facf9fc 4072+ dinfo->di_bstart = bstart;
4073+
4074+ return err;
4075+}
4076+
4077+static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4078+ struct file *file)
4079+{
4080+ int err;
4081+ struct au_dtime dt;
4082+ struct dentry *parent, *h_parent, *wh_dentry;
4083+ struct au_branch *br;
4084+ struct path h_path;
4085+
4086+ br = au_sbr(dentry->d_sb, bdst);
4087+ parent = dget_parent(dentry);
4088+ h_parent = au_h_dptr(parent, bdst);
4089+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
4090+ err = PTR_ERR(wh_dentry);
4091+ if (IS_ERR(wh_dentry))
4092+ goto out;
4093+
4094+ h_path.dentry = h_parent;
4095+ h_path.mnt = br->br_mnt;
4096+ au_dtime_store(&dt, parent, &h_path);
4097+ err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len);
4098+ if (unlikely(err))
4099+ goto out_wh;
4100+
4101+ dget(wh_dentry);
4102+ h_path.dentry = wh_dentry;
4a4d8108
AM
4103+ if (!S_ISDIR(wh_dentry->d_inode->i_mode))
4104+ err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0);
4105+ else
4106+ err = vfsub_rmdir(h_parent->d_inode, &h_path);
1facf9fc 4107+ if (unlikely(err)) {
4108+ AuIOErr("failed remove copied-up tmp file %.*s(%d)\n",
4109+ AuDLNPair(wh_dentry), err);
4110+ err = -EIO;
4111+ }
4112+ au_dtime_revert(&dt);
4113+ au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
4114+
4f0767ce 4115+out_wh:
1facf9fc 4116+ dput(wh_dentry);
4f0767ce 4117+out:
1facf9fc 4118+ dput(parent);
4119+ return err;
4120+}
4121+
4122+struct au_cpup_wh_args {
4123+ int *errp;
4124+ struct dentry *dentry;
4125+ aufs_bindex_t bdst;
4126+ loff_t len;
4127+ struct file *file;
4128+};
4129+
4130+static void au_call_cpup_wh(void *args)
4131+{
4132+ struct au_cpup_wh_args *a = args;
4133+ *a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file);
4134+}
4135+
4136+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4137+ struct file *file)
4138+{
4139+ int err, wkq_err;
4140+ struct dentry *parent, *h_orph, *h_parent, *h_dentry;
4141+ struct inode *dir, *h_dir, *h_tmpdir, *h_inode;
4142+ struct au_wbr *wbr;
4143+
4144+ parent = dget_parent(dentry);
4145+ dir = parent->d_inode;
4146+ h_orph = NULL;
4147+ h_parent = NULL;
4148+ h_dir = au_igrab(au_h_iptr(dir, bdst));
4149+ h_tmpdir = h_dir;
4150+ if (!h_dir->i_nlink) {
4151+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
4152+ h_orph = wbr->wbr_orph;
4153+
4154+ h_parent = dget(au_h_dptr(parent, bdst));
1facf9fc 4155+ au_set_h_dptr(parent, bdst, dget(h_orph));
4156+ h_tmpdir = h_orph->d_inode;
1facf9fc 4157+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
4158+
4159+ /* this temporary unlock is safe */
4160+ if (file)
4a4d8108 4161+ h_dentry = au_hf_top(file)->f_dentry;
1facf9fc 4162+ else
4163+ h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
4164+ h_inode = h_dentry->d_inode;
4165+ IMustLock(h_inode);
4166+ mutex_unlock(&h_inode->i_mutex);
dece6358 4167+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
1facf9fc 4168+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
4a4d8108 4169+ /* todo: au_h_open_pre()? */
1facf9fc 4170+ }
4171+
53392da6
AM
4172+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
4173+ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
1facf9fc 4174+ err = au_cpup_wh(dentry, bdst, len, file);
4175+ else {
4176+ struct au_cpup_wh_args args = {
4177+ .errp = &err,
4178+ .dentry = dentry,
4179+ .bdst = bdst,
4180+ .len = len,
4181+ .file = file
4182+ };
4183+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
4184+ if (unlikely(wkq_err))
4185+ err = wkq_err;
4186+ }
4187+
4188+ if (h_orph) {
4189+ mutex_unlock(&h_tmpdir->i_mutex);
4a4d8108 4190+ /* todo: au_h_open_post()? */
1facf9fc 4191+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1facf9fc 4192+ au_set_h_dptr(parent, bdst, h_parent);
4193+ }
4194+ iput(h_dir);
4195+ dput(parent);
4196+
4197+ return err;
4198+}
4199+
4200+/* ---------------------------------------------------------------------- */
4201+
4202+/*
4203+ * generic routine for both of copy-up and copy-down.
4204+ */
4205+/* cf. revalidate function in file.c */
4206+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
4207+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
4208+ struct dentry *h_parent, void *arg),
4209+ void *arg)
4210+{
4211+ int err;
4212+ struct au_pin pin;
4213+ struct dentry *d, *parent, *h_parent, *real_parent;
4214+
4215+ err = 0;
4216+ parent = dget_parent(dentry);
4217+ if (IS_ROOT(parent))
4218+ goto out;
4219+
4220+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
4221+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
4222+
4223+ /* do not use au_dpage */
4224+ real_parent = parent;
4225+ while (1) {
4226+ dput(parent);
4227+ parent = dget_parent(dentry);
4228+ h_parent = au_h_dptr(parent, bdst);
4229+ if (h_parent)
4230+ goto out; /* success */
4231+
4232+ /* find top dir which is necessary to cpup */
4233+ do {
4234+ d = parent;
4235+ dput(parent);
4236+ parent = dget_parent(d);
4237+ di_read_lock_parent3(parent, !AuLock_IR);
4238+ h_parent = au_h_dptr(parent, bdst);
4239+ di_read_unlock(parent, !AuLock_IR);
4240+ } while (!h_parent);
4241+
4242+ if (d != real_parent)
4243+ di_write_lock_child3(d);
4244+
4245+ /* somebody else might create while we were sleeping */
4246+ if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
4247+ if (au_h_dptr(d, bdst))
4248+ au_update_dbstart(d);
4249+
4250+ au_pin_set_dentry(&pin, d);
4251+ err = au_do_pin(&pin);
4252+ if (!err) {
4253+ err = cp(d, bdst, h_parent, arg);
4254+ au_unpin(&pin);
4255+ }
4256+ }
4257+
4258+ if (d != real_parent)
4259+ di_write_unlock(d);
4260+ if (unlikely(err))
4261+ break;
4262+ }
4263+
4f0767ce 4264+out:
1facf9fc 4265+ dput(parent);
4266+ return err;
4267+}
4268+
4269+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
4270+ struct dentry *h_parent __maybe_unused ,
4271+ void *arg __maybe_unused)
4272+{
4273+ return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME);
4274+}
4275+
4276+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
4277+{
4278+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
4279+}
4280+
4281+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
4282+{
4283+ int err;
4284+ struct dentry *parent;
4285+ struct inode *dir;
4286+
4287+ parent = dget_parent(dentry);
4288+ dir = parent->d_inode;
4289+ err = 0;
4290+ if (au_h_iptr(dir, bdst))
4291+ goto out;
4292+
4293+ di_read_unlock(parent, AuLock_IR);
4294+ di_write_lock_parent(parent);
4295+ /* someone else might change our inode while we were sleeping */
4296+ if (!au_h_iptr(dir, bdst))
4297+ err = au_cpup_dirs(dentry, bdst);
4298+ di_downgrade_lock(parent, AuLock_IR);
4299+
4f0767ce 4300+out:
1facf9fc 4301+ dput(parent);
4302+ return err;
4303+}
7f207e10
AM
4304diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
4305--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
53392da6 4306+++ linux/fs/aufs/cpup.h 2011-08-24 13:30:24.731313534 +0200
7f207e10 4307@@ -0,0 +1,83 @@
1facf9fc 4308+/*
027c5e7a 4309+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4310+ *
4311+ * This program, aufs is free software; you can redistribute it and/or modify
4312+ * it under the terms of the GNU General Public License as published by
4313+ * the Free Software Foundation; either version 2 of the License, or
4314+ * (at your option) any later version.
dece6358
AM
4315+ *
4316+ * This program is distributed in the hope that it will be useful,
4317+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4318+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4319+ * GNU General Public License for more details.
4320+ *
4321+ * You should have received a copy of the GNU General Public License
4322+ * along with this program; if not, write to the Free Software
4323+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4324+ */
4325+
4326+/*
4327+ * copy-up/down functions
4328+ */
4329+
4330+#ifndef __AUFS_CPUP_H__
4331+#define __AUFS_CPUP_H__
4332+
4333+#ifdef __KERNEL__
4334+
dece6358
AM
4335+#include <linux/path.h>
4336+#include <linux/time.h>
1facf9fc 4337+#include <linux/aufs_type.h>
4338+
dece6358
AM
4339+struct inode;
4340+struct file;
4341+
1facf9fc 4342+void au_cpup_attr_flags(struct inode *dst, struct inode *src);
4343+void au_cpup_attr_timesizes(struct inode *inode);
4344+void au_cpup_attr_nlink(struct inode *inode, int force);
4345+void au_cpup_attr_changeable(struct inode *inode);
4346+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
4347+void au_cpup_attr_all(struct inode *inode, int force);
4348+
4349+/* ---------------------------------------------------------------------- */
4350+
4351+/* cpup flags */
4352+#define AuCpup_DTIME 1 /* do dtime_store/revert */
4353+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
4354+ for link(2) */
4355+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
7f207e10
AM
4356+#define au_fset_cpup(flags, name) \
4357+ do { (flags) |= AuCpup_##name; } while (0)
4358+#define au_fclr_cpup(flags, name) \
4359+ do { (flags) &= ~AuCpup_##name; } while (0)
1facf9fc 4360+
4361+int au_copy_file(struct file *dst, struct file *src, loff_t len);
4362+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
4363+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
4364+ struct dentry *dst_parent);
4365+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4366+ unsigned int flags);
4367+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4368+ struct file *file);
4369+
4370+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
4371+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
4372+ struct dentry *h_parent, void *arg),
4373+ void *arg);
4374+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
4375+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
4376+
4377+/* ---------------------------------------------------------------------- */
4378+
4379+/* keep timestamps when copyup */
4380+struct au_dtime {
4381+ struct dentry *dt_dentry;
4382+ struct path dt_h_path;
4383+ struct timespec dt_atime, dt_mtime;
4384+};
4385+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4386+ struct path *h_path);
4387+void au_dtime_revert(struct au_dtime *dt);
4388+
4389+#endif /* __KERNEL__ */
4390+#endif /* __AUFS_CPUP_H__ */
7f207e10
AM
4391diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
4392--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
53392da6 4393+++ linux/fs/aufs/dbgaufs.c 2011-08-24 13:30:24.731313534 +0200
4a4d8108 4394@@ -0,0 +1,334 @@
1facf9fc 4395+/*
027c5e7a 4396+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4397+ *
4398+ * This program, aufs is free software; you can redistribute it and/or modify
4399+ * it under the terms of the GNU General Public License as published by
4400+ * the Free Software Foundation; either version 2 of the License, or
4401+ * (at your option) any later version.
dece6358
AM
4402+ *
4403+ * This program is distributed in the hope that it will be useful,
4404+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4405+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4406+ * GNU General Public License for more details.
4407+ *
4408+ * You should have received a copy of the GNU General Public License
4409+ * along with this program; if not, write to the Free Software
4410+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4411+ */
4412+
4413+/*
4414+ * debugfs interface
4415+ */
4416+
4417+#include <linux/debugfs.h>
4418+#include "aufs.h"
4419+
4420+#ifndef CONFIG_SYSFS
4421+#error DEBUG_FS depends upon SYSFS
4422+#endif
4423+
4424+static struct dentry *dbgaufs;
4425+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
4426+
4427+/* 20 is max digits length of ulong 64 */
4428+struct dbgaufs_arg {
4429+ int n;
4430+ char a[20 * 4];
4431+};
4432+
4433+/*
4434+ * common function for all XINO files
4435+ */
4436+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
4437+ struct file *file)
4438+{
4439+ kfree(file->private_data);
4440+ return 0;
4441+}
4442+
4443+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
4444+{
4445+ int err;
4446+ struct kstat st;
4447+ struct dbgaufs_arg *p;
4448+
4449+ err = -ENOMEM;
4450+ p = kmalloc(sizeof(*p), GFP_NOFS);
4451+ if (unlikely(!p))
4452+ goto out;
4453+
4454+ err = 0;
4455+ p->n = 0;
4456+ file->private_data = p;
4457+ if (!xf)
4458+ goto out;
4459+
4460+ err = vfs_getattr(xf->f_vfsmnt, xf->f_dentry, &st);
4461+ if (!err) {
4462+ if (do_fcnt)
4463+ p->n = snprintf
4464+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
4465+ (long)file_count(xf), st.blocks, st.blksize,
4466+ (long long)st.size);
4467+ else
4468+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
4469+ st.blocks, st.blksize,
4470+ (long long)st.size);
4471+ AuDebugOn(p->n >= sizeof(p->a));
4472+ } else {
4473+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
4474+ err = 0;
4475+ }
4476+
4f0767ce 4477+out:
1facf9fc 4478+ return err;
4479+
4480+}
4481+
4482+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
4483+ size_t count, loff_t *ppos)
4484+{
4485+ struct dbgaufs_arg *p;
4486+
4487+ p = file->private_data;
4488+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
4489+}
4490+
4491+/* ---------------------------------------------------------------------- */
4492+
4493+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
4494+{
4495+ int err;
4496+ struct au_sbinfo *sbinfo;
4497+ struct super_block *sb;
4498+
4499+ sbinfo = inode->i_private;
4500+ sb = sbinfo->si_sb;
4501+ si_noflush_read_lock(sb);
4502+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
4503+ si_read_unlock(sb);
4504+ return err;
4505+}
4506+
4507+static const struct file_operations dbgaufs_xib_fop = {
4a4d8108 4508+ .owner = THIS_MODULE,
1facf9fc 4509+ .open = dbgaufs_xib_open,
4510+ .release = dbgaufs_xi_release,
4511+ .read = dbgaufs_xi_read
4512+};
4513+
4514+/* ---------------------------------------------------------------------- */
4515+
4516+#define DbgaufsXi_PREFIX "xi"
4517+
4518+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
4519+{
4520+ int err;
4521+ long l;
4522+ struct au_sbinfo *sbinfo;
4523+ struct super_block *sb;
4524+ struct file *xf;
4525+ struct qstr *name;
4526+
4527+ err = -ENOENT;
4528+ xf = NULL;
4529+ name = &file->f_dentry->d_name;
4530+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
4531+ || memcmp(name->name, DbgaufsXi_PREFIX,
4532+ sizeof(DbgaufsXi_PREFIX) - 1)))
4533+ goto out;
4534+ err = strict_strtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
4535+ if (unlikely(err))
4536+ goto out;
4537+
4538+ sbinfo = inode->i_private;
4539+ sb = sbinfo->si_sb;
4540+ si_noflush_read_lock(sb);
4541+ if (l <= au_sbend(sb)) {
4542+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
4543+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
4544+ } else
4545+ err = -ENOENT;
4546+ si_read_unlock(sb);
4547+
4f0767ce 4548+out:
1facf9fc 4549+ return err;
4550+}
4551+
4552+static const struct file_operations dbgaufs_xino_fop = {
4a4d8108 4553+ .owner = THIS_MODULE,
1facf9fc 4554+ .open = dbgaufs_xino_open,
4555+ .release = dbgaufs_xi_release,
4556+ .read = dbgaufs_xi_read
4557+};
4558+
4559+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
4560+{
4561+ aufs_bindex_t bend;
4562+ struct au_branch *br;
4563+ struct au_xino_file *xi;
4564+
4565+ if (!au_sbi(sb)->si_dbgaufs)
4566+ return;
4567+
4568+ bend = au_sbend(sb);
4569+ for (; bindex <= bend; bindex++) {
4570+ br = au_sbr(sb, bindex);
4571+ xi = &br->br_xino;
4572+ if (xi->xi_dbgaufs) {
4573+ debugfs_remove(xi->xi_dbgaufs);
4574+ xi->xi_dbgaufs = NULL;
4575+ }
4576+ }
4577+}
4578+
4579+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
4580+{
4581+ struct au_sbinfo *sbinfo;
4582+ struct dentry *parent;
4583+ struct au_branch *br;
4584+ struct au_xino_file *xi;
4585+ aufs_bindex_t bend;
4586+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
4587+
4588+ sbinfo = au_sbi(sb);
4589+ parent = sbinfo->si_dbgaufs;
4590+ if (!parent)
4591+ return;
4592+
4593+ bend = au_sbend(sb);
4594+ for (; bindex <= bend; bindex++) {
4595+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
4596+ br = au_sbr(sb, bindex);
4597+ xi = &br->br_xino;
4598+ AuDebugOn(xi->xi_dbgaufs);
4599+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
4600+ sbinfo, &dbgaufs_xino_fop);
4601+ /* ignore an error */
4602+ if (unlikely(!xi->xi_dbgaufs))
4603+ AuWarn1("failed %s under debugfs\n", name);
4604+ }
4605+}
4606+
4607+/* ---------------------------------------------------------------------- */
4608+
4609+#ifdef CONFIG_AUFS_EXPORT
4610+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
4611+{
4612+ int err;
4613+ struct au_sbinfo *sbinfo;
4614+ struct super_block *sb;
4615+
4616+ sbinfo = inode->i_private;
4617+ sb = sbinfo->si_sb;
4618+ si_noflush_read_lock(sb);
4619+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
4620+ si_read_unlock(sb);
4621+ return err;
4622+}
4623+
4624+static const struct file_operations dbgaufs_xigen_fop = {
4a4d8108 4625+ .owner = THIS_MODULE,
1facf9fc 4626+ .open = dbgaufs_xigen_open,
4627+ .release = dbgaufs_xi_release,
4628+ .read = dbgaufs_xi_read
4629+};
4630+
4631+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4632+{
4633+ int err;
4634+
dece6358
AM
4635+ /*
4636+ * This function is a dynamic '__init' fucntion actually,
4637+ * so the tiny check for si_rwsem is unnecessary.
4638+ */
4639+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4640+
1facf9fc 4641+ err = -EIO;
4642+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
4643+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4644+ &dbgaufs_xigen_fop);
4645+ if (sbinfo->si_dbgaufs_xigen)
4646+ err = 0;
4647+
4648+ return err;
4649+}
4650+#else
4651+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4652+{
4653+ return 0;
4654+}
4655+#endif /* CONFIG_AUFS_EXPORT */
4656+
4657+/* ---------------------------------------------------------------------- */
4658+
4659+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
4660+{
dece6358
AM
4661+ /*
4662+ * This function is a dynamic '__init' fucntion actually,
4663+ * so the tiny check for si_rwsem is unnecessary.
4664+ */
4665+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4666+
1facf9fc 4667+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
4668+ sbinfo->si_dbgaufs = NULL;
4669+ kobject_put(&sbinfo->si_kobj);
4670+}
4671+
4672+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
4673+{
4674+ int err;
4675+ char name[SysaufsSiNameLen];
4676+
dece6358
AM
4677+ /*
4678+ * This function is a dynamic '__init' fucntion actually,
4679+ * so the tiny check for si_rwsem is unnecessary.
4680+ */
4681+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4682+
1facf9fc 4683+ err = -ENOENT;
4684+ if (!dbgaufs) {
4685+ AuErr1("/debug/aufs is uninitialized\n");
4686+ goto out;
4687+ }
4688+
4689+ err = -EIO;
4690+ sysaufs_name(sbinfo, name);
4691+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
4692+ if (unlikely(!sbinfo->si_dbgaufs))
4693+ goto out;
4694+ kobject_get(&sbinfo->si_kobj);
4695+
4696+ sbinfo->si_dbgaufs_xib = debugfs_create_file
4697+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4698+ &dbgaufs_xib_fop);
4699+ if (unlikely(!sbinfo->si_dbgaufs_xib))
4700+ goto out_dir;
4701+
4702+ err = dbgaufs_xigen_init(sbinfo);
4703+ if (!err)
4704+ goto out; /* success */
4705+
4f0767ce 4706+out_dir:
1facf9fc 4707+ dbgaufs_si_fin(sbinfo);
4f0767ce 4708+out:
1facf9fc 4709+ return err;
4710+}
4711+
4712+/* ---------------------------------------------------------------------- */
4713+
4714+void dbgaufs_fin(void)
4715+{
4716+ debugfs_remove(dbgaufs);
4717+}
4718+
4719+int __init dbgaufs_init(void)
4720+{
4721+ int err;
4722+
4723+ err = -EIO;
4724+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
4725+ if (dbgaufs)
4726+ err = 0;
4727+ return err;
4728+}
7f207e10
AM
4729diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
4730--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
53392da6 4731+++ linux/fs/aufs/dbgaufs.h 2011-08-24 13:30:24.731313534 +0200
4a4d8108 4732@@ -0,0 +1,52 @@
1facf9fc 4733+/*
027c5e7a 4734+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4735+ *
4736+ * This program, aufs is free software; you can redistribute it and/or modify
4737+ * it under the terms of the GNU General Public License as published by
4738+ * the Free Software Foundation; either version 2 of the License, or
4739+ * (at your option) any later version.
dece6358
AM
4740+ *
4741+ * This program is distributed in the hope that it will be useful,
4742+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4743+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4744+ * GNU General Public License for more details.
4745+ *
4746+ * You should have received a copy of the GNU General Public License
4747+ * along with this program; if not, write to the Free Software
4748+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4749+ */
4750+
4751+/*
4752+ * debugfs interface
4753+ */
4754+
4755+#ifndef __DBGAUFS_H__
4756+#define __DBGAUFS_H__
4757+
4758+#ifdef __KERNEL__
4759+
dece6358 4760+#include <linux/init.h>
1facf9fc 4761+#include <linux/aufs_type.h>
4762+
dece6358 4763+struct super_block;
1facf9fc 4764+struct au_sbinfo;
dece6358 4765+
1facf9fc 4766+#ifdef CONFIG_DEBUG_FS
4767+/* dbgaufs.c */
4768+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
4769+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
4770+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
4771+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
4772+void dbgaufs_fin(void);
4773+int __init dbgaufs_init(void);
1facf9fc 4774+#else
4a4d8108
AM
4775+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
4776+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
4777+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
4778+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
4779+AuStubVoid(dbgaufs_fin, void)
4780+AuStubInt0(__init dbgaufs_init, void)
1facf9fc 4781+#endif /* CONFIG_DEBUG_FS */
4782+
4783+#endif /* __KERNEL__ */
4784+#endif /* __DBGAUFS_H__ */
7f207e10
AM
4785diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
4786--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
53392da6 4787+++ linux/fs/aufs/dcsub.c 2011-08-24 13:30:24.731313534 +0200
027c5e7a 4788@@ -0,0 +1,243 @@
1facf9fc 4789+/*
027c5e7a 4790+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4791+ *
4792+ * This program, aufs is free software; you can redistribute it and/or modify
4793+ * it under the terms of the GNU General Public License as published by
4794+ * the Free Software Foundation; either version 2 of the License, or
4795+ * (at your option) any later version.
dece6358
AM
4796+ *
4797+ * This program is distributed in the hope that it will be useful,
4798+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4799+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4800+ * GNU General Public License for more details.
4801+ *
4802+ * You should have received a copy of the GNU General Public License
4803+ * along with this program; if not, write to the Free Software
4804+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4805+ */
4806+
4807+/*
4808+ * sub-routines for dentry cache
4809+ */
4810+
4811+#include "aufs.h"
4812+
4813+static void au_dpage_free(struct au_dpage *dpage)
4814+{
4815+ int i;
4816+ struct dentry **p;
4817+
4818+ p = dpage->dentries;
4819+ for (i = 0; i < dpage->ndentry; i++)
4820+ dput(*p++);
4821+ free_page((unsigned long)dpage->dentries);
4822+}
4823+
4824+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
4825+{
4826+ int err;
4827+ void *p;
4828+
4829+ err = -ENOMEM;
4830+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
4831+ if (unlikely(!dpages->dpages))
4832+ goto out;
4833+
4834+ p = (void *)__get_free_page(gfp);
4835+ if (unlikely(!p))
4836+ goto out_dpages;
4837+
4838+ dpages->dpages[0].ndentry = 0;
4839+ dpages->dpages[0].dentries = p;
4840+ dpages->ndpage = 1;
4841+ return 0; /* success */
4842+
4f0767ce 4843+out_dpages:
1facf9fc 4844+ kfree(dpages->dpages);
4f0767ce 4845+out:
1facf9fc 4846+ return err;
4847+}
4848+
4849+void au_dpages_free(struct au_dcsub_pages *dpages)
4850+{
4851+ int i;
4852+ struct au_dpage *p;
4853+
4854+ p = dpages->dpages;
4855+ for (i = 0; i < dpages->ndpage; i++)
4856+ au_dpage_free(p++);
4857+ kfree(dpages->dpages);
4858+}
4859+
4860+static int au_dpages_append(struct au_dcsub_pages *dpages,
4861+ struct dentry *dentry, gfp_t gfp)
4862+{
4863+ int err, sz;
4864+ struct au_dpage *dpage;
4865+ void *p;
4866+
4867+ dpage = dpages->dpages + dpages->ndpage - 1;
4868+ sz = PAGE_SIZE / sizeof(dentry);
4869+ if (unlikely(dpage->ndentry >= sz)) {
4870+ AuLabel(new dpage);
4871+ err = -ENOMEM;
4872+ sz = dpages->ndpage * sizeof(*dpages->dpages);
4873+ p = au_kzrealloc(dpages->dpages, sz,
4874+ sz + sizeof(*dpages->dpages), gfp);
4875+ if (unlikely(!p))
4876+ goto out;
4877+
4878+ dpages->dpages = p;
4879+ dpage = dpages->dpages + dpages->ndpage;
4880+ p = (void *)__get_free_page(gfp);
4881+ if (unlikely(!p))
4882+ goto out;
4883+
4884+ dpage->ndentry = 0;
4885+ dpage->dentries = p;
4886+ dpages->ndpage++;
4887+ }
4888+
027c5e7a
AM
4889+ AuDebugOn(!dentry->d_count);
4890+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
1facf9fc 4891+ return 0; /* success */
4892+
4f0767ce 4893+out:
1facf9fc 4894+ return err;
4895+}
4896+
4897+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
4898+ au_dpages_test test, void *arg)
4899+{
4900+ int err;
027c5e7a 4901+ struct dentry *this_parent;
1facf9fc 4902+ struct list_head *next;
4903+ struct super_block *sb = root->d_sb;
4904+
4905+ err = 0;
027c5e7a
AM
4906+ write_seqlock(&rename_lock);
4907+ this_parent = root;
4908+ spin_lock(&this_parent->d_lock);
4f0767ce 4909+repeat:
1facf9fc 4910+ next = this_parent->d_subdirs.next;
4f0767ce 4911+resume:
1facf9fc 4912+ if (this_parent->d_sb == sb
4913+ && !IS_ROOT(this_parent)
027c5e7a
AM
4914+ && au_di(this_parent)
4915+ && this_parent->d_count
1facf9fc 4916+ && (!test || test(this_parent, arg))) {
4917+ err = au_dpages_append(dpages, this_parent, GFP_ATOMIC);
4918+ if (unlikely(err))
4919+ goto out;
4920+ }
4921+
4922+ while (next != &this_parent->d_subdirs) {
4923+ struct list_head *tmp = next;
4924+ struct dentry *dentry = list_entry(tmp, struct dentry,
4925+ d_u.d_child);
027c5e7a 4926+
1facf9fc 4927+ next = tmp->next;
027c5e7a
AM
4928+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
4929+ if (dentry->d_count) {
4930+ if (!list_empty(&dentry->d_subdirs)) {
4931+ spin_unlock(&this_parent->d_lock);
4932+ spin_release(&dentry->d_lock.dep_map, 1,
4933+ _RET_IP_);
4934+ this_parent = dentry;
4935+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1,
4936+ _RET_IP_);
4937+ goto repeat;
4938+ }
4939+ if (dentry->d_sb == sb
4940+ && au_di(dentry)
4941+ && (!test || test(dentry, arg)))
4942+ err = au_dpages_append(dpages, dentry,
4943+ GFP_ATOMIC);
1facf9fc 4944+ }
027c5e7a
AM
4945+ spin_unlock(&dentry->d_lock);
4946+ if (unlikely(err))
4947+ goto out;
1facf9fc 4948+ }
4949+
4950+ if (this_parent != root) {
027c5e7a
AM
4951+ struct dentry *tmp;
4952+ struct dentry *child;
4953+
4954+ tmp = this_parent->d_parent;
4955+ rcu_read_lock();
4956+ spin_unlock(&this_parent->d_lock);
4957+ child = this_parent;
4958+ this_parent = tmp;
4959+ spin_lock(&this_parent->d_lock);
4960+ rcu_read_unlock();
4961+ next = child->d_u.d_child.next;
1facf9fc 4962+ goto resume;
4963+ }
027c5e7a 4964+
4f0767ce 4965+out:
027c5e7a
AM
4966+ spin_unlock(&this_parent->d_lock);
4967+ write_sequnlock(&rename_lock);
1facf9fc 4968+ return err;
4969+}
4970+
4971+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
4972+ int do_include, au_dpages_test test, void *arg)
4973+{
4974+ int err;
4975+
4976+ err = 0;
027c5e7a
AM
4977+ write_seqlock(&rename_lock);
4978+ spin_lock(&dentry->d_lock);
4979+ if (do_include
4980+ && dentry->d_count
4981+ && (!test || test(dentry, arg)))
1facf9fc 4982+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
4983+ spin_unlock(&dentry->d_lock);
4984+ if (unlikely(err))
4985+ goto out;
4986+
4987+ /*
4988+ * vfsmount_lock is unnecessary since this is a traverse in a single
4989+ * mount
4990+ */
1facf9fc 4991+ while (!IS_ROOT(dentry)) {
027c5e7a
AM
4992+ dentry = dentry->d_parent; /* rename_lock is locked */
4993+ spin_lock(&dentry->d_lock);
4994+ if (dentry->d_count
4995+ && (!test || test(dentry, arg)))
1facf9fc 4996+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
4997+ spin_unlock(&dentry->d_lock);
4998+ if (unlikely(err))
4999+ break;
1facf9fc 5000+ }
5001+
4f0767ce 5002+out:
027c5e7a 5003+ write_sequnlock(&rename_lock);
1facf9fc 5004+ return err;
5005+}
5006+
027c5e7a
AM
5007+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
5008+{
5009+ return au_di(dentry) && dentry->d_sb == arg;
5010+}
5011+
5012+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
5013+ struct dentry *dentry, int do_include)
5014+{
5015+ return au_dcsub_pages_rev(dpages, dentry, do_include,
5016+ au_dcsub_dpages_aufs, dentry->d_sb);
5017+}
5018+
4a4d8108 5019+int au_test_subdir(struct dentry *d1, struct dentry *d2)
1facf9fc 5020+{
4a4d8108
AM
5021+ struct path path[2] = {
5022+ {
5023+ .dentry = d1
5024+ },
5025+ {
5026+ .dentry = d2
5027+ }
5028+ };
1facf9fc 5029+
4a4d8108 5030+ return path_is_under(path + 0, path + 1);
1facf9fc 5031+}
7f207e10
AM
5032diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
5033--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
53392da6 5034+++ linux/fs/aufs/dcsub.h 2011-08-24 13:30:24.731313534 +0200
027c5e7a 5035@@ -0,0 +1,95 @@
1facf9fc 5036+/*
027c5e7a 5037+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5038+ *
5039+ * This program, aufs is free software; you can redistribute it and/or modify
5040+ * it under the terms of the GNU General Public License as published by
5041+ * the Free Software Foundation; either version 2 of the License, or
5042+ * (at your option) any later version.
dece6358
AM
5043+ *
5044+ * This program is distributed in the hope that it will be useful,
5045+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5046+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5047+ * GNU General Public License for more details.
5048+ *
5049+ * You should have received a copy of the GNU General Public License
5050+ * along with this program; if not, write to the Free Software
5051+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5052+ */
5053+
5054+/*
5055+ * sub-routines for dentry cache
5056+ */
5057+
5058+#ifndef __AUFS_DCSUB_H__
5059+#define __AUFS_DCSUB_H__
5060+
5061+#ifdef __KERNEL__
5062+
7f207e10 5063+#include <linux/dcache.h>
027c5e7a 5064+#include <linux/fs.h>
dece6358
AM
5065+#include <linux/types.h>
5066+
5067+struct dentry;
1facf9fc 5068+
5069+struct au_dpage {
5070+ int ndentry;
5071+ struct dentry **dentries;
5072+};
5073+
5074+struct au_dcsub_pages {
5075+ int ndpage;
5076+ struct au_dpage *dpages;
5077+};
5078+
5079+/* ---------------------------------------------------------------------- */
5080+
7f207e10 5081+/* dcsub.c */
1facf9fc 5082+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
5083+void au_dpages_free(struct au_dcsub_pages *dpages);
5084+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
5085+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
5086+ au_dpages_test test, void *arg);
5087+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
5088+ int do_include, au_dpages_test test, void *arg);
027c5e7a
AM
5089+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
5090+ struct dentry *dentry, int do_include);
4a4d8108 5091+int au_test_subdir(struct dentry *d1, struct dentry *d2);
1facf9fc 5092+
7f207e10
AM
5093+/* ---------------------------------------------------------------------- */
5094+
027c5e7a
AM
5095+static inline int au_d_hashed_positive(struct dentry *d)
5096+{
5097+ int err;
5098+ struct inode *inode = d->d_inode;
5099+ err = 0;
5100+ if (unlikely(d_unhashed(d) || !inode || !inode->i_nlink))
5101+ err = -ENOENT;
5102+ return err;
5103+}
5104+
5105+static inline int au_d_alive(struct dentry *d)
5106+{
5107+ int err;
5108+ struct inode *inode;
5109+ err = 0;
5110+ if (!IS_ROOT(d))
5111+ err = au_d_hashed_positive(d);
5112+ else {
5113+ inode = d->d_inode;
5114+ if (unlikely(d_unlinked(d) || !inode || !inode->i_nlink))
5115+ err = -ENOENT;
5116+ }
5117+ return err;
5118+}
5119+
5120+static inline int au_alive_dir(struct dentry *d)
7f207e10 5121+{
027c5e7a
AM
5122+ int err;
5123+ err = au_d_alive(d);
5124+ if (unlikely(err || IS_DEADDIR(d->d_inode)))
5125+ err = -ENOENT;
5126+ return err;
7f207e10
AM
5127+}
5128+
1facf9fc 5129+#endif /* __KERNEL__ */
5130+#endif /* __AUFS_DCSUB_H__ */
7f207e10
AM
5131diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
5132--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
5133+++ linux/fs/aufs/debug.c 2011-08-24 13:30:24.731313534 +0200
5134@@ -0,0 +1,486 @@
1facf9fc 5135+/*
027c5e7a 5136+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5137+ *
5138+ * This program, aufs is free software; you can redistribute it and/or modify
5139+ * it under the terms of the GNU General Public License as published by
5140+ * the Free Software Foundation; either version 2 of the License, or
5141+ * (at your option) any later version.
dece6358
AM
5142+ *
5143+ * This program is distributed in the hope that it will be useful,
5144+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5145+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5146+ * GNU General Public License for more details.
5147+ *
5148+ * You should have received a copy of the GNU General Public License
5149+ * along with this program; if not, write to the Free Software
5150+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5151+ */
5152+
5153+/*
5154+ * debug print functions
5155+ */
5156+
dece6358 5157+#include <linux/module.h>
7f207e10 5158+#include <linux/vt_kern.h>
1facf9fc 5159+#include "aufs.h"
5160+
5161+int aufs_debug;
5162+MODULE_PARM_DESC(debug, "debug print");
5163+module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
5164+
5165+char *au_plevel = KERN_DEBUG;
e49829fe
JR
5166+#define dpri(fmt, ...) do { \
5167+ if ((au_plevel \
5168+ && strcmp(au_plevel, KERN_DEBUG)) \
5169+ || au_debug_test()) \
5170+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
1facf9fc 5171+} while (0)
5172+
5173+/* ---------------------------------------------------------------------- */
5174+
5175+void au_dpri_whlist(struct au_nhash *whlist)
5176+{
5177+ unsigned long ul, n;
5178+ struct hlist_head *head;
5179+ struct au_vdir_wh *tpos;
5180+ struct hlist_node *pos;
5181+
5182+ n = whlist->nh_num;
5183+ head = whlist->nh_head;
5184+ for (ul = 0; ul < n; ul++) {
5185+ hlist_for_each_entry(tpos, pos, head, wh_hash)
5186+ dpri("b%d, %.*s, %d\n",
5187+ tpos->wh_bindex,
5188+ tpos->wh_str.len, tpos->wh_str.name,
5189+ tpos->wh_str.len);
5190+ head++;
5191+ }
5192+}
5193+
5194+void au_dpri_vdir(struct au_vdir *vdir)
5195+{
5196+ unsigned long ul;
5197+ union au_vdir_deblk_p p;
5198+ unsigned char *o;
5199+
5200+ if (!vdir || IS_ERR(vdir)) {
5201+ dpri("err %ld\n", PTR_ERR(vdir));
5202+ return;
5203+ }
5204+
5205+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
5206+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
5207+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
5208+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
5209+ p.deblk = vdir->vd_deblk[ul];
5210+ o = p.deblk;
5211+ dpri("[%lu]: %p\n", ul, o);
5212+ }
5213+}
5214+
53392da6 5215+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
1facf9fc 5216+ struct dentry *wh)
5217+{
5218+ char *n = NULL;
5219+ int l = 0;
5220+
5221+ if (!inode || IS_ERR(inode)) {
5222+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
5223+ return -1;
5224+ }
5225+
5226+ /* the type of i_blocks depends upon CONFIG_LSF */
5227+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
5228+ && sizeof(inode->i_blocks) != sizeof(u64));
5229+ if (wh) {
5230+ n = (void *)wh->d_name.name;
5231+ l = wh->d_name.len;
5232+ }
5233+
53392da6
AM
5234+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
5235+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
5236+ bindex, inode,
1facf9fc 5237+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
5238+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
5239+ i_size_read(inode), (unsigned long long)inode->i_blocks,
53392da6 5240+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
1facf9fc 5241+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
b752ccd1
AM
5242+ inode->i_state, inode->i_flags, inode->i_version,
5243+ inode->i_generation,
1facf9fc 5244+ l ? ", wh " : "", l, n);
5245+ return 0;
5246+}
5247+
5248+void au_dpri_inode(struct inode *inode)
5249+{
5250+ struct au_iinfo *iinfo;
5251+ aufs_bindex_t bindex;
53392da6 5252+ int err, hn;
1facf9fc 5253+
53392da6 5254+ err = do_pri_inode(-1, inode, -1, NULL);
1facf9fc 5255+ if (err || !au_test_aufs(inode->i_sb))
5256+ return;
5257+
5258+ iinfo = au_ii(inode);
5259+ if (!iinfo)
5260+ return;
5261+ dpri("i-1: bstart %d, bend %d, gen %d\n",
5262+ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode));
5263+ if (iinfo->ii_bstart < 0)
5264+ return;
53392da6
AM
5265+ hn = 0;
5266+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
5267+ hn = !!au_hn(iinfo->ii_hinode + bindex);
5268+ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
1facf9fc 5269+ iinfo->ii_hinode[0 + bindex].hi_whdentry);
53392da6 5270+ }
1facf9fc 5271+}
5272+
2cbb1c4b
JR
5273+void au_dpri_dalias(struct inode *inode)
5274+{
5275+ struct dentry *d;
5276+
5277+ spin_lock(&inode->i_lock);
5278+ list_for_each_entry(d, &inode->i_dentry, d_alias)
5279+ au_dpri_dentry(d);
5280+ spin_unlock(&inode->i_lock);
5281+}
5282+
1facf9fc 5283+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
5284+{
5285+ struct dentry *wh = NULL;
53392da6 5286+ int hn;
1facf9fc 5287+
5288+ if (!dentry || IS_ERR(dentry)) {
5289+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
5290+ return -1;
5291+ }
5292+ /* do not call dget_parent() here */
027c5e7a 5293+ /* note: access d_xxx without d_lock */
1facf9fc 5294+ dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n",
5295+ bindex,
5296+ AuDLNPair(dentry->d_parent), AuDLNPair(dentry),
5297+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
027c5e7a 5298+ dentry->d_count, dentry->d_flags);
53392da6 5299+ hn = -1;
1facf9fc 5300+ if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) {
5301+ struct au_iinfo *iinfo = au_ii(dentry->d_inode);
53392da6
AM
5302+ if (iinfo) {
5303+ hn = !!au_hn(iinfo->ii_hinode + bindex);
1facf9fc 5304+ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
53392da6 5305+ }
1facf9fc 5306+ }
53392da6 5307+ do_pri_inode(bindex, dentry->d_inode, hn, wh);
1facf9fc 5308+ return 0;
5309+}
5310+
5311+void au_dpri_dentry(struct dentry *dentry)
5312+{
5313+ struct au_dinfo *dinfo;
5314+ aufs_bindex_t bindex;
5315+ int err;
4a4d8108 5316+ struct au_hdentry *hdp;
1facf9fc 5317+
5318+ err = do_pri_dentry(-1, dentry);
5319+ if (err || !au_test_aufs(dentry->d_sb))
5320+ return;
5321+
5322+ dinfo = au_di(dentry);
5323+ if (!dinfo)
5324+ return;
5325+ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n",
5326+ dinfo->di_bstart, dinfo->di_bend,
5327+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry));
5328+ if (dinfo->di_bstart < 0)
5329+ return;
4a4d8108 5330+ hdp = dinfo->di_hdentry;
1facf9fc 5331+ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
4a4d8108 5332+ do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
1facf9fc 5333+}
5334+
5335+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
5336+{
5337+ char a[32];
5338+
5339+ if (!file || IS_ERR(file)) {
5340+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
5341+ return -1;
5342+ }
5343+ a[0] = 0;
5344+ if (bindex < 0
5345+ && file->f_dentry
5346+ && au_test_aufs(file->f_dentry->d_sb)
5347+ && au_fi(file))
e49829fe 5348+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
2cbb1c4b 5349+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
b752ccd1 5350+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
1facf9fc 5351+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
b752ccd1 5352+ file->f_version, file->f_pos, a);
1facf9fc 5353+ if (file->f_dentry)
5354+ do_pri_dentry(bindex, file->f_dentry);
5355+ return 0;
5356+}
5357+
5358+void au_dpri_file(struct file *file)
5359+{
5360+ struct au_finfo *finfo;
4a4d8108
AM
5361+ struct au_fidir *fidir;
5362+ struct au_hfile *hfile;
1facf9fc 5363+ aufs_bindex_t bindex;
5364+ int err;
5365+
5366+ err = do_pri_file(-1, file);
5367+ if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb))
5368+ return;
5369+
5370+ finfo = au_fi(file);
5371+ if (!finfo)
5372+ return;
4a4d8108 5373+ if (finfo->fi_btop < 0)
1facf9fc 5374+ return;
4a4d8108
AM
5375+ fidir = finfo->fi_hdir;
5376+ if (!fidir)
5377+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
5378+ else
e49829fe
JR
5379+ for (bindex = finfo->fi_btop;
5380+ bindex >= 0 && bindex <= fidir->fd_bbot;
4a4d8108
AM
5381+ bindex++) {
5382+ hfile = fidir->fd_hfile + bindex;
5383+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
5384+ }
1facf9fc 5385+}
5386+
5387+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
5388+{
5389+ struct vfsmount *mnt;
5390+ struct super_block *sb;
5391+
5392+ if (!br || IS_ERR(br))
5393+ goto out;
5394+ mnt = br->br_mnt;
5395+ if (!mnt || IS_ERR(mnt))
5396+ goto out;
5397+ sb = mnt->mnt_sb;
5398+ if (!sb || IS_ERR(sb))
5399+ goto out;
5400+
5401+ dpri("s%d: {perm 0x%x, cnt %d, wbr %p}, "
b752ccd1 5402+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
1facf9fc 5403+ "xino %d\n",
5404+ bindex, br->br_perm, atomic_read(&br->br_count), br->br_wbr,
5405+ au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
b752ccd1 5406+ sb->s_flags, sb->s_count,
1facf9fc 5407+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
5408+ return 0;
5409+
4f0767ce 5410+out:
1facf9fc 5411+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
5412+ return -1;
5413+}
5414+
5415+void au_dpri_sb(struct super_block *sb)
5416+{
5417+ struct au_sbinfo *sbinfo;
5418+ aufs_bindex_t bindex;
5419+ int err;
5420+ /* to reuduce stack size */
5421+ struct {
5422+ struct vfsmount mnt;
5423+ struct au_branch fake;
5424+ } *a;
5425+
5426+ /* this function can be called from magic sysrq */
5427+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
5428+ if (unlikely(!a)) {
5429+ dpri("no memory\n");
5430+ return;
5431+ }
5432+
5433+ a->mnt.mnt_sb = sb;
5434+ a->fake.br_perm = 0;
5435+ a->fake.br_mnt = &a->mnt;
5436+ a->fake.br_xino.xi_file = NULL;
5437+ atomic_set(&a->fake.br_count, 0);
5438+ smp_mb(); /* atomic_set */
5439+ err = do_pri_br(-1, &a->fake);
5440+ kfree(a);
5441+ dpri("dev 0x%x\n", sb->s_dev);
5442+ if (err || !au_test_aufs(sb))
5443+ return;
5444+
5445+ sbinfo = au_sbi(sb);
5446+ if (!sbinfo)
5447+ return;
5448+ dpri("nw %d, gen %u, kobj %d\n",
5449+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
5450+ atomic_read(&sbinfo->si_kobj.kref.refcount));
5451+ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
5452+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
5453+}
5454+
5455+/* ---------------------------------------------------------------------- */
5456+
5457+void au_dbg_sleep_jiffy(int jiffy)
5458+{
5459+ while (jiffy)
5460+ jiffy = schedule_timeout_uninterruptible(jiffy);
5461+}
5462+
5463+void au_dbg_iattr(struct iattr *ia)
5464+{
5465+#define AuBit(name) if (ia->ia_valid & ATTR_ ## name) \
5466+ dpri(#name "\n")
5467+ AuBit(MODE);
5468+ AuBit(UID);
5469+ AuBit(GID);
5470+ AuBit(SIZE);
5471+ AuBit(ATIME);
5472+ AuBit(MTIME);
5473+ AuBit(CTIME);
5474+ AuBit(ATIME_SET);
5475+ AuBit(MTIME_SET);
5476+ AuBit(FORCE);
5477+ AuBit(ATTR_FLAG);
5478+ AuBit(KILL_SUID);
5479+ AuBit(KILL_SGID);
5480+ AuBit(FILE);
5481+ AuBit(KILL_PRIV);
5482+ AuBit(OPEN);
5483+ AuBit(TIMES_SET);
5484+#undef AuBit
5485+ dpri("ia_file %p\n", ia->ia_file);
5486+}
5487+
5488+/* ---------------------------------------------------------------------- */
5489+
027c5e7a
AM
5490+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
5491+{
5492+ struct inode *h_inode, *inode = dentry->d_inode;
5493+ struct dentry *h_dentry;
5494+ aufs_bindex_t bindex, bend, bi;
5495+
5496+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
5497+ return;
5498+
5499+ bend = au_dbend(dentry);
5500+ bi = au_ibend(inode);
5501+ if (bi < bend)
5502+ bend = bi;
5503+ bindex = au_dbstart(dentry);
5504+ bi = au_ibstart(inode);
5505+ if (bi > bindex)
5506+ bindex = bi;
5507+
5508+ for (; bindex <= bend; bindex++) {
5509+ h_dentry = au_h_dptr(dentry, bindex);
5510+ if (!h_dentry)
5511+ continue;
5512+ h_inode = au_h_iptr(inode, bindex);
5513+ if (unlikely(h_inode != h_dentry->d_inode)) {
5514+ int old = au_debug_test();
5515+ if (!old)
5516+ au_debug(1);
5517+ AuDbg("b%d, %s:%d\n", bindex, func, line);
5518+ AuDbgDentry(dentry);
5519+ AuDbgInode(inode);
5520+ if (!old)
5521+ au_debug(0);
5522+ BUG();
5523+ }
5524+ }
5525+}
5526+
1facf9fc 5527+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen)
5528+{
5529+ struct dentry *parent;
5530+
5531+ parent = dget_parent(dentry);
027c5e7a
AM
5532+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
5533+ AuDebugOn(IS_ROOT(dentry));
5534+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 5535+ dput(parent);
5536+}
5537+
5538+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen)
5539+{
5540+ struct dentry *parent;
027c5e7a 5541+ struct inode *inode;
1facf9fc 5542+
5543+ parent = dget_parent(dentry);
027c5e7a
AM
5544+ inode = dentry->d_inode;
5545+ AuDebugOn(inode && S_ISDIR(dentry->d_inode->i_mode));
5546+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 5547+ dput(parent);
5548+}
5549+
5550+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
5551+{
5552+ int err, i, j;
5553+ struct au_dcsub_pages dpages;
5554+ struct au_dpage *dpage;
5555+ struct dentry **dentries;
5556+
5557+ err = au_dpages_init(&dpages, GFP_NOFS);
5558+ AuDebugOn(err);
027c5e7a 5559+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
1facf9fc 5560+ AuDebugOn(err);
5561+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
5562+ dpage = dpages.dpages + i;
5563+ dentries = dpage->dentries;
5564+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
027c5e7a 5565+ AuDebugOn(au_digen_test(dentries[j], sigen));
1facf9fc 5566+ }
5567+ au_dpages_free(&dpages);
5568+}
5569+
1facf9fc 5570+void au_dbg_verify_kthread(void)
5571+{
53392da6 5572+ if (au_wkq_test()) {
1facf9fc 5573+ au_dbg_blocked();
7f207e10 5574+ WARN_ON(1);
1facf9fc 5575+ }
5576+}
5577+
5578+/* ---------------------------------------------------------------------- */
5579+
5580+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused)
5581+{
5582+#ifdef AuForceNoPlink
5583+ au_opt_clr(sbinfo->si_mntflags, PLINK);
5584+#endif
5585+#ifdef AuForceNoXino
5586+ au_opt_clr(sbinfo->si_mntflags, XINO);
5587+#endif
5588+#ifdef AuForceNoRefrof
5589+ au_opt_clr(sbinfo->si_mntflags, REFROF);
5590+#endif
4a4d8108
AM
5591+#ifdef AuForceHnotify
5592+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_HNOTIFY);
1facf9fc 5593+#endif
1308ab2a 5594+#ifdef AuForceRd0
5595+ sbinfo->si_rdblk = 0;
5596+ sbinfo->si_rdhash = 0;
5597+#endif
1facf9fc 5598+}
5599+
5600+int __init au_debug_init(void)
5601+{
5602+ aufs_bindex_t bindex;
5603+ struct au_vdir_destr destr;
5604+
5605+ bindex = -1;
5606+ AuDebugOn(bindex >= 0);
5607+
5608+ destr.len = -1;
5609+ AuDebugOn(destr.len < NAME_MAX);
5610+
5611+#ifdef CONFIG_4KSTACKS
4a4d8108 5612+ pr_warning("CONFIG_4KSTACKS is defined.\n");
1facf9fc 5613+#endif
5614+
5615+#ifdef AuForceNoBrs
5616+ sysaufs_brs = 0;
5617+#endif
5618+
5619+ return 0;
5620+}
7f207e10
AM
5621diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
5622--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
53392da6 5623+++ linux/fs/aufs/debug.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 5624@@ -0,0 +1,252 @@
1facf9fc 5625+/*
027c5e7a 5626+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5627+ *
5628+ * This program, aufs is free software; you can redistribute it and/or modify
5629+ * it under the terms of the GNU General Public License as published by
5630+ * the Free Software Foundation; either version 2 of the License, or
5631+ * (at your option) any later version.
dece6358
AM
5632+ *
5633+ * This program is distributed in the hope that it will be useful,
5634+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5635+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5636+ * GNU General Public License for more details.
5637+ *
5638+ * You should have received a copy of the GNU General Public License
5639+ * along with this program; if not, write to the Free Software
5640+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5641+ */
5642+
5643+/*
5644+ * debug print functions
5645+ */
5646+
5647+#ifndef __AUFS_DEBUG_H__
5648+#define __AUFS_DEBUG_H__
5649+
5650+#ifdef __KERNEL__
5651+
1308ab2a 5652+#include <asm/system.h>
dece6358 5653+#include <linux/bug.h>
7f207e10 5654+/* #include <linux/err.h> */
1308ab2a 5655+#include <linux/init.h>
4a4d8108
AM
5656+#include <linux/module.h>
5657+#include <linux/kallsyms.h>
7f207e10 5658+/* #include <linux/kernel.h> */
1facf9fc 5659+#include <linux/delay.h>
7f207e10 5660+/* #include <linux/kd.h> */
1facf9fc 5661+#include <linux/sysrq.h>
5662+#include <linux/aufs_type.h>
5663+
4a4d8108
AM
5664+#include <asm/system.h>
5665+
1facf9fc 5666+#ifdef CONFIG_AUFS_DEBUG
5667+#define AuDebugOn(a) BUG_ON(a)
5668+
5669+/* module parameter */
5670+extern int aufs_debug;
5671+static inline void au_debug(int n)
5672+{
5673+ aufs_debug = n;
5674+ smp_mb();
5675+}
5676+
5677+static inline int au_debug_test(void)
5678+{
5679+ return aufs_debug;
5680+}
5681+#else
5682+#define AuDebugOn(a) do {} while (0)
4a4d8108
AM
5683+AuStubVoid(au_debug, int n)
5684+AuStubInt0(au_debug_test, void)
1facf9fc 5685+#endif /* CONFIG_AUFS_DEBUG */
5686+
5687+/* ---------------------------------------------------------------------- */
5688+
5689+/* debug print */
5690+
4a4d8108 5691+#define AuDbg(fmt, ...) do { \
1facf9fc 5692+ if (au_debug_test()) \
4a4d8108 5693+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
1facf9fc 5694+} while (0)
4a4d8108
AM
5695+#define AuLabel(l) AuDbg(#l "\n")
5696+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
5697+#define AuWarn1(fmt, ...) do { \
1facf9fc 5698+ static unsigned char _c; \
5699+ if (!_c++) \
4a4d8108 5700+ pr_warning(fmt, ##__VA_ARGS__); \
1facf9fc 5701+} while (0)
5702+
4a4d8108 5703+#define AuErr1(fmt, ...) do { \
1facf9fc 5704+ static unsigned char _c; \
5705+ if (!_c++) \
4a4d8108 5706+ pr_err(fmt, ##__VA_ARGS__); \
1facf9fc 5707+} while (0)
5708+
4a4d8108 5709+#define AuIOErr1(fmt, ...) do { \
1facf9fc 5710+ static unsigned char _c; \
5711+ if (!_c++) \
4a4d8108 5712+ AuIOErr(fmt, ##__VA_ARGS__); \
1facf9fc 5713+} while (0)
5714+
5715+#define AuUnsupportMsg "This operation is not supported." \
5716+ " Please report this application to aufs-users ML."
4a4d8108
AM
5717+#define AuUnsupport(fmt, ...) do { \
5718+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
1facf9fc 5719+ dump_stack(); \
5720+} while (0)
5721+
5722+#define AuTraceErr(e) do { \
5723+ if (unlikely((e) < 0)) \
5724+ AuDbg("err %d\n", (int)(e)); \
5725+} while (0)
5726+
5727+#define AuTraceErrPtr(p) do { \
5728+ if (IS_ERR(p)) \
5729+ AuDbg("err %ld\n", PTR_ERR(p)); \
5730+} while (0)
5731+
5732+/* dirty macros for debug print, use with "%.*s" and caution */
5733+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
5734+#define AuDLNPair(d) AuLNPair(&(d)->d_name)
5735+
5736+/* ---------------------------------------------------------------------- */
5737+
5738+struct au_sbinfo;
5739+struct au_finfo;
dece6358 5740+struct dentry;
1facf9fc 5741+#ifdef CONFIG_AUFS_DEBUG
5742+extern char *au_plevel;
5743+struct au_nhash;
5744+void au_dpri_whlist(struct au_nhash *whlist);
5745+struct au_vdir;
5746+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 5747+struct inode;
1facf9fc 5748+void au_dpri_inode(struct inode *inode);
2cbb1c4b 5749+void au_dpri_dalias(struct inode *inode);
1facf9fc 5750+void au_dpri_dentry(struct dentry *dentry);
dece6358 5751+struct file;
1facf9fc 5752+void au_dpri_file(struct file *filp);
dece6358 5753+struct super_block;
1facf9fc 5754+void au_dpri_sb(struct super_block *sb);
5755+
5756+void au_dbg_sleep_jiffy(int jiffy);
dece6358 5757+struct iattr;
1facf9fc 5758+void au_dbg_iattr(struct iattr *ia);
5759+
027c5e7a
AM
5760+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
5761+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
1facf9fc 5762+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen);
5763+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen);
5764+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
1facf9fc 5765+void au_dbg_verify_kthread(void);
5766+
5767+int __init au_debug_init(void);
5768+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo);
5769+#define AuDbgWhlist(w) do { \
5770+ AuDbg(#w "\n"); \
5771+ au_dpri_whlist(w); \
5772+} while (0)
5773+
5774+#define AuDbgVdir(v) do { \
5775+ AuDbg(#v "\n"); \
5776+ au_dpri_vdir(v); \
5777+} while (0)
5778+
5779+#define AuDbgInode(i) do { \
5780+ AuDbg(#i "\n"); \
5781+ au_dpri_inode(i); \
5782+} while (0)
5783+
2cbb1c4b
JR
5784+#define AuDbgDAlias(i) do { \
5785+ AuDbg(#i "\n"); \
5786+ au_dpri_dalias(i); \
5787+} while (0)
5788+
1facf9fc 5789+#define AuDbgDentry(d) do { \
5790+ AuDbg(#d "\n"); \
5791+ au_dpri_dentry(d); \
5792+} while (0)
5793+
5794+#define AuDbgFile(f) do { \
5795+ AuDbg(#f "\n"); \
5796+ au_dpri_file(f); \
5797+} while (0)
5798+
5799+#define AuDbgSb(sb) do { \
5800+ AuDbg(#sb "\n"); \
5801+ au_dpri_sb(sb); \
5802+} while (0)
5803+
5804+#define AuDbgSleep(sec) do { \
5805+ AuDbg("sleep %d sec\n", sec); \
5806+ ssleep(sec); \
5807+} while (0)
5808+
5809+#define AuDbgSleepJiffy(jiffy) do { \
5810+ AuDbg("sleep %d jiffies\n", jiffy); \
5811+ au_dbg_sleep_jiffy(jiffy); \
5812+} while (0)
5813+
5814+#define AuDbgIAttr(ia) do { \
5815+ AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \
5816+ au_dbg_iattr(ia); \
5817+} while (0)
4a4d8108
AM
5818+
5819+#define AuDbgSym(addr) do { \
5820+ char sym[KSYM_SYMBOL_LEN]; \
5821+ sprint_symbol(sym, (unsigned long)addr); \
5822+ AuDbg("%s\n", sym); \
5823+} while (0)
5824+
5825+#define AuInfoSym(addr) do { \
5826+ char sym[KSYM_SYMBOL_LEN]; \
5827+ sprint_symbol(sym, (unsigned long)addr); \
5828+ AuInfo("%s\n", sym); \
5829+} while (0)
1facf9fc 5830+#else
027c5e7a 5831+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
4a4d8108
AM
5832+AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen)
5833+AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry,
5834+ unsigned int sigen)
5835+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
5836+AuStubVoid(au_dbg_verify_kthread, void)
5837+AuStubInt0(__init au_debug_init, void)
5838+AuStubVoid(au_debug_sbinfo_init, struct au_sbinfo *sbinfo)
1facf9fc 5839+
1facf9fc 5840+#define AuDbgWhlist(w) do {} while (0)
5841+#define AuDbgVdir(v) do {} while (0)
5842+#define AuDbgInode(i) do {} while (0)
2cbb1c4b 5843+#define AuDbgDAlias(i) do {} while (0)
1facf9fc 5844+#define AuDbgDentry(d) do {} while (0)
5845+#define AuDbgFile(f) do {} while (0)
5846+#define AuDbgSb(sb) do {} while (0)
5847+#define AuDbgSleep(sec) do {} while (0)
5848+#define AuDbgSleepJiffy(jiffy) do {} while (0)
5849+#define AuDbgIAttr(ia) do {} while (0)
4a4d8108
AM
5850+#define AuDbgSym(addr) do {} while (0)
5851+#define AuInfoSym(addr) do {} while (0)
1facf9fc 5852+#endif /* CONFIG_AUFS_DEBUG */
5853+
5854+/* ---------------------------------------------------------------------- */
5855+
5856+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
5857+int __init au_sysrq_init(void);
5858+void au_sysrq_fin(void);
5859+
5860+#ifdef CONFIG_HW_CONSOLE
5861+#define au_dbg_blocked() do { \
5862+ WARN_ON(1); \
0c5527e5 5863+ handle_sysrq('w'); \
1facf9fc 5864+} while (0)
5865+#else
4a4d8108 5866+AuStubVoid(au_dbg_blocked, void)
1facf9fc 5867+#endif
5868+
5869+#else
4a4d8108
AM
5870+AuStubInt0(__init au_sysrq_init, void)
5871+AuStubVoid(au_sysrq_fin, void)
5872+AuStubVoid(au_dbg_blocked, void)
1facf9fc 5873+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
5874+
5875+#endif /* __KERNEL__ */
5876+#endif /* __AUFS_DEBUG_H__ */
7f207e10
AM
5877diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
5878--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
53392da6 5879+++ linux/fs/aufs/dentry.c 2011-08-24 13:30:24.731313534 +0200
027c5e7a 5880@@ -0,0 +1,1140 @@
1facf9fc 5881+/*
027c5e7a 5882+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5883+ *
5884+ * This program, aufs is free software; you can redistribute it and/or modify
5885+ * it under the terms of the GNU General Public License as published by
5886+ * the Free Software Foundation; either version 2 of the License, or
5887+ * (at your option) any later version.
dece6358
AM
5888+ *
5889+ * This program is distributed in the hope that it will be useful,
5890+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5891+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5892+ * GNU General Public License for more details.
5893+ *
5894+ * You should have received a copy of the GNU General Public License
5895+ * along with this program; if not, write to the Free Software
5896+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5897+ */
5898+
5899+/*
5900+ * lookup and dentry operations
5901+ */
5902+
dece6358 5903+#include <linux/namei.h>
1facf9fc 5904+#include "aufs.h"
5905+
5906+static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
5907+{
5908+ if (nd) {
5909+ *h_nd = *nd;
5910+
5911+ /*
5912+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
5913+ * due to whiteout and branch permission.
5914+ */
5915+ h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
b752ccd1 5916+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
1facf9fc 5917+ /* unnecessary? */
5918+ h_nd->intent.open.file = NULL;
5919+ } else
5920+ memset(h_nd, 0, sizeof(*h_nd));
5921+}
5922+
5923+struct au_lkup_one_args {
5924+ struct dentry **errp;
5925+ struct qstr *name;
5926+ struct dentry *h_parent;
5927+ struct au_branch *br;
5928+ struct nameidata *nd;
5929+};
5930+
5931+struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
5932+ struct au_branch *br, struct nameidata *nd)
5933+{
5934+ struct dentry *h_dentry;
5935+ int err;
5936+ struct nameidata h_nd;
5937+
5938+ if (au_test_fs_null_nd(h_parent->d_sb))
5939+ return vfsub_lookup_one_len(name->name, h_parent, name->len);
5940+
5941+ au_h_nd(&h_nd, nd);
5942+ h_nd.path.dentry = h_parent;
5943+ h_nd.path.mnt = br->br_mnt;
5944+
2cbb1c4b 5945+ err = vfsub_name_hash(name->name, &h_nd.last, name->len);
1facf9fc 5946+ h_dentry = ERR_PTR(err);
5947+ if (!err) {
5948+ path_get(&h_nd.path);
5949+ h_dentry = vfsub_lookup_hash(&h_nd);
5950+ path_put(&h_nd.path);
5951+ }
5952+
4a4d8108 5953+ AuTraceErrPtr(h_dentry);
1facf9fc 5954+ return h_dentry;
5955+}
5956+
5957+static void au_call_lkup_one(void *args)
5958+{
5959+ struct au_lkup_one_args *a = args;
5960+ *a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
5961+}
5962+
5963+#define AuLkup_ALLOW_NEG 1
5964+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
7f207e10
AM
5965+#define au_fset_lkup(flags, name) \
5966+ do { (flags) |= AuLkup_##name; } while (0)
5967+#define au_fclr_lkup(flags, name) \
5968+ do { (flags) &= ~AuLkup_##name; } while (0)
1facf9fc 5969+
5970+struct au_do_lookup_args {
5971+ unsigned int flags;
5972+ mode_t type;
5973+ struct nameidata *nd;
5974+};
5975+
5976+/*
5977+ * returns positive/negative dentry, NULL or an error.
5978+ * NULL means whiteout-ed or not-found.
5979+ */
5980+static struct dentry*
5981+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
5982+ aufs_bindex_t bindex, struct qstr *wh_name,
5983+ struct au_do_lookup_args *args)
5984+{
5985+ struct dentry *h_dentry;
5986+ struct inode *h_inode, *inode;
1facf9fc 5987+ struct au_branch *br;
5988+ int wh_found, opq;
5989+ unsigned char wh_able;
5990+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
5991+
1facf9fc 5992+ wh_found = 0;
5993+ br = au_sbr(dentry->d_sb, bindex);
5994+ wh_able = !!au_br_whable(br->br_perm);
5995+ if (wh_able)
5996+ wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
5997+ h_dentry = ERR_PTR(wh_found);
5998+ if (!wh_found)
5999+ goto real_lookup;
6000+ if (unlikely(wh_found < 0))
6001+ goto out;
6002+
6003+ /* We found a whiteout */
6004+ /* au_set_dbend(dentry, bindex); */
6005+ au_set_dbwh(dentry, bindex);
6006+ if (!allow_neg)
6007+ return NULL; /* success */
6008+
4f0767ce 6009+real_lookup:
4a4d8108 6010+ h_dentry = au_lkup_one(&dentry->d_name, h_parent, br, args->nd);
1facf9fc 6011+ if (IS_ERR(h_dentry))
6012+ goto out;
6013+
6014+ h_inode = h_dentry->d_inode;
6015+ if (!h_inode) {
6016+ if (!allow_neg)
6017+ goto out_neg;
6018+ } else if (wh_found
6019+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
6020+ goto out_neg;
6021+
6022+ if (au_dbend(dentry) <= bindex)
6023+ au_set_dbend(dentry, bindex);
6024+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
6025+ au_set_dbstart(dentry, bindex);
6026+ au_set_h_dptr(dentry, bindex, h_dentry);
6027+
6028+ inode = dentry->d_inode;
6029+ if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
6030+ || (inode && !S_ISDIR(inode->i_mode)))
6031+ goto out; /* success */
6032+
6033+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
6034+ opq = au_diropq_test(h_dentry, br);
6035+ mutex_unlock(&h_inode->i_mutex);
6036+ if (opq > 0)
6037+ au_set_dbdiropq(dentry, bindex);
6038+ else if (unlikely(opq < 0)) {
6039+ au_set_h_dptr(dentry, bindex, NULL);
6040+ h_dentry = ERR_PTR(opq);
6041+ }
6042+ goto out;
6043+
4f0767ce 6044+out_neg:
1facf9fc 6045+ dput(h_dentry);
6046+ h_dentry = NULL;
4f0767ce 6047+out:
1facf9fc 6048+ return h_dentry;
6049+}
6050+
dece6358
AM
6051+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
6052+{
6053+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
6054+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
6055+ return -EPERM;
6056+ return 0;
6057+}
6058+
1facf9fc 6059+/*
6060+ * returns the number of lower positive dentries,
6061+ * otherwise an error.
6062+ * can be called at unlinking with @type is zero.
6063+ */
6064+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
6065+ struct nameidata *nd)
6066+{
6067+ int npositive, err;
6068+ aufs_bindex_t bindex, btail, bdiropq;
6069+ unsigned char isdir;
6070+ struct qstr whname;
6071+ struct au_do_lookup_args args = {
6072+ .flags = 0,
6073+ .type = type,
6074+ .nd = nd
6075+ };
6076+ const struct qstr *name = &dentry->d_name;
6077+ struct dentry *parent;
6078+ struct inode *inode;
6079+
dece6358
AM
6080+ err = au_test_shwh(dentry->d_sb, name);
6081+ if (unlikely(err))
1facf9fc 6082+ goto out;
6083+
6084+ err = au_wh_name_alloc(&whname, name);
6085+ if (unlikely(err))
6086+ goto out;
6087+
6088+ inode = dentry->d_inode;
6089+ isdir = !!(inode && S_ISDIR(inode->i_mode));
6090+ if (!type)
6091+ au_fset_lkup(args.flags, ALLOW_NEG);
6092+
6093+ npositive = 0;
4a4d8108 6094+ parent = dget_parent(dentry);
1facf9fc 6095+ btail = au_dbtaildir(parent);
6096+ for (bindex = bstart; bindex <= btail; bindex++) {
6097+ struct dentry *h_parent, *h_dentry;
6098+ struct inode *h_inode, *h_dir;
6099+
6100+ h_dentry = au_h_dptr(dentry, bindex);
6101+ if (h_dentry) {
6102+ if (h_dentry->d_inode)
6103+ npositive++;
6104+ if (type != S_IFDIR)
6105+ break;
6106+ continue;
6107+ }
6108+ h_parent = au_h_dptr(parent, bindex);
6109+ if (!h_parent)
6110+ continue;
6111+ h_dir = h_parent->d_inode;
6112+ if (!h_dir || !S_ISDIR(h_dir->i_mode))
6113+ continue;
6114+
6115+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
6116+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
6117+ &args);
6118+ mutex_unlock(&h_dir->i_mutex);
6119+ err = PTR_ERR(h_dentry);
6120+ if (IS_ERR(h_dentry))
4a4d8108 6121+ goto out_parent;
1facf9fc 6122+ au_fclr_lkup(args.flags, ALLOW_NEG);
6123+
6124+ if (au_dbwh(dentry) >= 0)
6125+ break;
6126+ if (!h_dentry)
6127+ continue;
6128+ h_inode = h_dentry->d_inode;
6129+ if (!h_inode)
6130+ continue;
6131+ npositive++;
6132+ if (!args.type)
6133+ args.type = h_inode->i_mode & S_IFMT;
6134+ if (args.type != S_IFDIR)
6135+ break;
6136+ else if (isdir) {
6137+ /* the type of lower may be different */
6138+ bdiropq = au_dbdiropq(dentry);
6139+ if (bdiropq >= 0 && bdiropq <= bindex)
6140+ break;
6141+ }
6142+ }
6143+
6144+ if (npositive) {
6145+ AuLabel(positive);
6146+ au_update_dbstart(dentry);
6147+ }
6148+ err = npositive;
6149+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
027c5e7a 6150+ && au_dbstart(dentry) < 0)) {
1facf9fc 6151+ err = -EIO;
027c5e7a
AM
6152+ AuIOErr("both of real entry and whiteout found, %.*s, err %d\n",
6153+ AuDLNPair(dentry), err);
6154+ }
1facf9fc 6155+
4f0767ce 6156+out_parent:
4a4d8108 6157+ dput(parent);
1facf9fc 6158+ kfree(whname.name);
4f0767ce 6159+out:
1facf9fc 6160+ return err;
6161+}
6162+
6163+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
6164+ struct au_branch *br)
6165+{
6166+ struct dentry *dentry;
6167+ int wkq_err;
6168+
6169+ if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
6170+ dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
6171+ else {
6172+ struct au_lkup_one_args args = {
6173+ .errp = &dentry,
6174+ .name = name,
6175+ .h_parent = parent,
6176+ .br = br,
6177+ .nd = NULL
6178+ };
6179+
6180+ wkq_err = au_wkq_wait(au_call_lkup_one, &args);
6181+ if (unlikely(wkq_err))
6182+ dentry = ERR_PTR(wkq_err);
6183+ }
6184+
6185+ return dentry;
6186+}
6187+
6188+/*
6189+ * lookup @dentry on @bindex which should be negative.
6190+ */
6191+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex)
6192+{
6193+ int err;
6194+ struct dentry *parent, *h_parent, *h_dentry;
1facf9fc 6195+
1facf9fc 6196+ parent = dget_parent(dentry);
6197+ h_parent = au_h_dptr(parent, bindex);
4a4d8108 6198+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent,
1facf9fc 6199+ au_sbr(dentry->d_sb, bindex));
6200+ err = PTR_ERR(h_dentry);
6201+ if (IS_ERR(h_dentry))
6202+ goto out;
6203+ if (unlikely(h_dentry->d_inode)) {
6204+ err = -EIO;
027c5e7a
AM
6205+ AuIOErr("%.*s should be negative on b%d.\n",
6206+ AuDLNPair(h_dentry), bindex);
1facf9fc 6207+ dput(h_dentry);
6208+ goto out;
6209+ }
6210+
4a4d8108 6211+ err = 0;
1facf9fc 6212+ if (bindex < au_dbstart(dentry))
6213+ au_set_dbstart(dentry, bindex);
6214+ if (au_dbend(dentry) < bindex)
6215+ au_set_dbend(dentry, bindex);
6216+ au_set_h_dptr(dentry, bindex, h_dentry);
1facf9fc 6217+
4f0767ce 6218+out:
1facf9fc 6219+ dput(parent);
6220+ return err;
6221+}
6222+
6223+/* ---------------------------------------------------------------------- */
6224+
6225+/* subset of struct inode */
6226+struct au_iattr {
6227+ unsigned long i_ino;
6228+ /* unsigned int i_nlink; */
6229+ uid_t i_uid;
6230+ gid_t i_gid;
6231+ u64 i_version;
6232+/*
6233+ loff_t i_size;
6234+ blkcnt_t i_blocks;
6235+*/
6236+ umode_t i_mode;
6237+};
6238+
6239+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
6240+{
6241+ ia->i_ino = h_inode->i_ino;
6242+ /* ia->i_nlink = h_inode->i_nlink; */
6243+ ia->i_uid = h_inode->i_uid;
6244+ ia->i_gid = h_inode->i_gid;
6245+ ia->i_version = h_inode->i_version;
6246+/*
6247+ ia->i_size = h_inode->i_size;
6248+ ia->i_blocks = h_inode->i_blocks;
6249+*/
6250+ ia->i_mode = (h_inode->i_mode & S_IFMT);
6251+}
6252+
6253+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
6254+{
6255+ return ia->i_ino != h_inode->i_ino
6256+ /* || ia->i_nlink != h_inode->i_nlink */
6257+ || ia->i_uid != h_inode->i_uid
6258+ || ia->i_gid != h_inode->i_gid
6259+ || ia->i_version != h_inode->i_version
6260+/*
6261+ || ia->i_size != h_inode->i_size
6262+ || ia->i_blocks != h_inode->i_blocks
6263+*/
6264+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
6265+}
6266+
6267+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
6268+ struct au_branch *br)
6269+{
6270+ int err;
6271+ struct au_iattr ia;
6272+ struct inode *h_inode;
6273+ struct dentry *h_d;
6274+ struct super_block *h_sb;
6275+
6276+ err = 0;
6277+ memset(&ia, -1, sizeof(ia));
6278+ h_sb = h_dentry->d_sb;
6279+ h_inode = h_dentry->d_inode;
6280+ if (h_inode)
6281+ au_iattr_save(&ia, h_inode);
6282+ else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
6283+ /* nfs d_revalidate may return 0 for negative dentry */
6284+ /* fuse d_revalidate always return 0 for negative dentry */
6285+ goto out;
6286+
6287+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
6288+ h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
6289+ err = PTR_ERR(h_d);
6290+ if (IS_ERR(h_d))
6291+ goto out;
6292+
6293+ err = 0;
6294+ if (unlikely(h_d != h_dentry
6295+ || h_d->d_inode != h_inode
6296+ || (h_inode && au_iattr_test(&ia, h_inode))))
6297+ err = au_busy_or_stale();
6298+ dput(h_d);
6299+
4f0767ce 6300+out:
1facf9fc 6301+ AuTraceErr(err);
6302+ return err;
6303+}
6304+
6305+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
6306+ struct dentry *h_parent, struct au_branch *br)
6307+{
6308+ int err;
6309+
6310+ err = 0;
027c5e7a
AM
6311+ if (udba == AuOpt_UDBA_REVAL
6312+ && !au_test_fs_remote(h_dentry->d_sb)) {
1facf9fc 6313+ IMustLock(h_dir);
6314+ err = (h_dentry->d_parent->d_inode != h_dir);
027c5e7a 6315+ } else if (udba != AuOpt_UDBA_NONE)
1facf9fc 6316+ err = au_h_verify_dentry(h_dentry, h_parent, br);
6317+
6318+ return err;
6319+}
6320+
6321+/* ---------------------------------------------------------------------- */
6322+
027c5e7a 6323+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
1facf9fc 6324+{
027c5e7a 6325+ int err;
1facf9fc 6326+ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
027c5e7a
AM
6327+ struct au_hdentry tmp, *p, *q;
6328+ struct au_dinfo *dinfo;
6329+ struct super_block *sb;
1facf9fc 6330+
027c5e7a 6331+ DiMustWriteLock(dentry);
1308ab2a 6332+
027c5e7a
AM
6333+ sb = dentry->d_sb;
6334+ dinfo = au_di(dentry);
1facf9fc 6335+ bend = dinfo->di_bend;
6336+ bwh = dinfo->di_bwh;
6337+ bdiropq = dinfo->di_bdiropq;
027c5e7a 6338+ p = dinfo->di_hdentry + dinfo->di_bstart;
1facf9fc 6339+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
027c5e7a 6340+ if (!p->hd_dentry)
1facf9fc 6341+ continue;
6342+
027c5e7a
AM
6343+ new_bindex = au_br_index(sb, p->hd_id);
6344+ if (new_bindex == bindex)
1facf9fc 6345+ continue;
1facf9fc 6346+
1facf9fc 6347+ if (dinfo->di_bwh == bindex)
6348+ bwh = new_bindex;
6349+ if (dinfo->di_bdiropq == bindex)
6350+ bdiropq = new_bindex;
6351+ if (new_bindex < 0) {
6352+ au_hdput(p);
6353+ p->hd_dentry = NULL;
6354+ continue;
6355+ }
6356+
6357+ /* swap two lower dentries, and loop again */
6358+ q = dinfo->di_hdentry + new_bindex;
6359+ tmp = *q;
6360+ *q = *p;
6361+ *p = tmp;
6362+ if (tmp.hd_dentry) {
6363+ bindex--;
6364+ p--;
6365+ }
6366+ }
6367+
1facf9fc 6368+ dinfo->di_bwh = -1;
6369+ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
6370+ dinfo->di_bwh = bwh;
6371+
6372+ dinfo->di_bdiropq = -1;
6373+ if (bdiropq >= 0
6374+ && bdiropq <= au_sbend(sb)
6375+ && au_sbr_whable(sb, bdiropq))
6376+ dinfo->di_bdiropq = bdiropq;
6377+
027c5e7a
AM
6378+ err = -EIO;
6379+ dinfo->di_bstart = -1;
6380+ dinfo->di_bend = -1;
1facf9fc 6381+ bend = au_dbend(parent);
6382+ p = dinfo->di_hdentry;
6383+ for (bindex = 0; bindex <= bend; bindex++, p++)
6384+ if (p->hd_dentry) {
6385+ dinfo->di_bstart = bindex;
6386+ break;
6387+ }
6388+
027c5e7a
AM
6389+ if (dinfo->di_bstart >= 0) {
6390+ p = dinfo->di_hdentry + bend;
6391+ for (bindex = bend; bindex >= 0; bindex--, p--)
6392+ if (p->hd_dentry) {
6393+ dinfo->di_bend = bindex;
6394+ err = 0;
6395+ break;
6396+ }
6397+ }
6398+
6399+ return err;
1facf9fc 6400+}
6401+
027c5e7a 6402+static void au_do_hide(struct dentry *dentry)
1facf9fc 6403+{
027c5e7a 6404+ struct inode *inode;
1facf9fc 6405+
027c5e7a
AM
6406+ inode = dentry->d_inode;
6407+ if (inode) {
6408+ if (!S_ISDIR(inode->i_mode)) {
6409+ if (inode->i_nlink && !d_unhashed(dentry))
6410+ drop_nlink(inode);
6411+ } else {
6412+ clear_nlink(inode);
6413+ /* stop next lookup */
6414+ inode->i_flags |= S_DEAD;
6415+ }
6416+ smp_mb(); /* necessary? */
6417+ }
6418+ d_drop(dentry);
6419+}
1308ab2a 6420+
027c5e7a
AM
6421+static int au_hide_children(struct dentry *parent)
6422+{
6423+ int err, i, j, ndentry;
6424+ struct au_dcsub_pages dpages;
6425+ struct au_dpage *dpage;
6426+ struct dentry *dentry;
1facf9fc 6427+
027c5e7a 6428+ err = au_dpages_init(&dpages, GFP_NOFS);
1facf9fc 6429+ if (unlikely(err))
6430+ goto out;
027c5e7a
AM
6431+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
6432+ if (unlikely(err))
6433+ goto out_dpages;
1facf9fc 6434+
027c5e7a
AM
6435+ /* in reverse order */
6436+ for (i = dpages.ndpage - 1; i >= 0; i--) {
6437+ dpage = dpages.dpages + i;
6438+ ndentry = dpage->ndentry;
6439+ for (j = ndentry - 1; j >= 0; j--) {
6440+ dentry = dpage->dentries[j];
6441+ if (dentry != parent)
6442+ au_do_hide(dentry);
6443+ }
6444+ }
1facf9fc 6445+
027c5e7a
AM
6446+out_dpages:
6447+ au_dpages_free(&dpages);
4f0767ce 6448+out:
027c5e7a 6449+ return err;
1facf9fc 6450+}
6451+
027c5e7a 6452+static void au_hide(struct dentry *dentry)
1facf9fc 6453+{
027c5e7a
AM
6454+ int err;
6455+ struct inode *inode;
1facf9fc 6456+
027c5e7a
AM
6457+ AuDbgDentry(dentry);
6458+ inode = dentry->d_inode;
6459+ if (inode && S_ISDIR(inode->i_mode)) {
6460+ /* shrink_dcache_parent(dentry); */
6461+ err = au_hide_children(dentry);
6462+ if (unlikely(err))
6463+ AuIOErr("%.*s, failed hiding children, ignored %d\n",
6464+ AuDLNPair(dentry), err);
6465+ }
6466+ au_do_hide(dentry);
6467+}
1facf9fc 6468+
027c5e7a
AM
6469+/*
6470+ * By adding a dirty branch, a cached dentry may be affected in various ways.
6471+ *
6472+ * a dirty branch is added
6473+ * - on the top of layers
6474+ * - in the middle of layers
6475+ * - to the bottom of layers
6476+ *
6477+ * on the added branch there exists
6478+ * - a whiteout
6479+ * - a diropq
6480+ * - a same named entry
6481+ * + exist
6482+ * * negative --> positive
6483+ * * positive --> positive
6484+ * - type is unchanged
6485+ * - type is changed
6486+ * + doesn't exist
6487+ * * negative --> negative
6488+ * * positive --> negative (rejected by au_br_del() for non-dir case)
6489+ * - none
6490+ */
6491+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
6492+ struct au_dinfo *tmp)
6493+{
6494+ int err;
6495+ aufs_bindex_t bindex, bend;
6496+ struct {
6497+ struct dentry *dentry;
6498+ struct inode *inode;
6499+ mode_t mode;
6500+ } orig_h, tmp_h;
6501+ struct au_hdentry *hd;
6502+ struct inode *inode, *h_inode;
6503+ struct dentry *h_dentry;
6504+
6505+ err = 0;
6506+ AuDebugOn(dinfo->di_bstart < 0);
6507+ orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
6508+ orig_h.inode = orig_h.dentry->d_inode;
6509+ orig_h.mode = 0;
6510+ if (orig_h.inode)
6511+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
6512+ memset(&tmp_h, 0, sizeof(tmp_h));
6513+ if (tmp->di_bstart >= 0) {
6514+ tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
6515+ tmp_h.inode = tmp_h.dentry->d_inode;
6516+ if (tmp_h.inode)
6517+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
6518+ }
6519+
6520+ inode = dentry->d_inode;
6521+ if (!orig_h.inode) {
6522+ AuDbg("nagative originally\n");
6523+ if (inode) {
6524+ au_hide(dentry);
6525+ goto out;
6526+ }
6527+ AuDebugOn(inode);
6528+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
6529+ AuDebugOn(dinfo->di_bdiropq != -1);
6530+
6531+ if (!tmp_h.inode) {
6532+ AuDbg("negative --> negative\n");
6533+ /* should have only one negative lower */
6534+ if (tmp->di_bstart >= 0
6535+ && tmp->di_bstart < dinfo->di_bstart) {
6536+ AuDebugOn(tmp->di_bstart != tmp->di_bend);
6537+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
6538+ au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
6539+ au_di_cp(dinfo, tmp);
6540+ hd = tmp->di_hdentry + tmp->di_bstart;
6541+ au_set_h_dptr(dentry, tmp->di_bstart,
6542+ dget(hd->hd_dentry));
6543+ }
6544+ au_dbg_verify_dinode(dentry);
6545+ } else {
6546+ AuDbg("negative --> positive\n");
6547+ /*
6548+ * similar to the behaviour of creating with bypassing
6549+ * aufs.
6550+ * unhash it in order to force an error in the
6551+ * succeeding create operation.
6552+ * we should not set S_DEAD here.
6553+ */
6554+ d_drop(dentry);
6555+ /* au_di_swap(tmp, dinfo); */
6556+ au_dbg_verify_dinode(dentry);
6557+ }
6558+ } else {
6559+ AuDbg("positive originally\n");
6560+ /* inode may be NULL */
6561+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
6562+ if (!tmp_h.inode) {
6563+ AuDbg("positive --> negative\n");
6564+ /* or bypassing aufs */
6565+ au_hide(dentry);
6566+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
6567+ dinfo->di_bwh = tmp->di_bwh;
6568+ if (inode)
6569+ err = au_refresh_hinode_self(inode);
6570+ au_dbg_verify_dinode(dentry);
6571+ } else if (orig_h.mode == tmp_h.mode) {
6572+ AuDbg("positive --> positive, same type\n");
6573+ if (!S_ISDIR(orig_h.mode)
6574+ && dinfo->di_bstart > tmp->di_bstart) {
6575+ /*
6576+ * similar to the behaviour of removing and
6577+ * creating.
6578+ */
6579+ au_hide(dentry);
6580+ if (inode)
6581+ err = au_refresh_hinode_self(inode);
6582+ au_dbg_verify_dinode(dentry);
6583+ } else {
6584+ /* fill empty slots */
6585+ if (dinfo->di_bstart > tmp->di_bstart)
6586+ dinfo->di_bstart = tmp->di_bstart;
6587+ if (dinfo->di_bend < tmp->di_bend)
6588+ dinfo->di_bend = tmp->di_bend;
6589+ dinfo->di_bwh = tmp->di_bwh;
6590+ dinfo->di_bdiropq = tmp->di_bdiropq;
6591+ hd = tmp->di_hdentry;
6592+ bend = dinfo->di_bend;
6593+ for (bindex = tmp->di_bstart; bindex <= bend;
6594+ bindex++) {
6595+ if (au_h_dptr(dentry, bindex))
6596+ continue;
6597+ h_dentry = hd[bindex].hd_dentry;
6598+ if (!h_dentry)
6599+ continue;
6600+ h_inode = h_dentry->d_inode;
6601+ AuDebugOn(!h_inode);
6602+ AuDebugOn(orig_h.mode
6603+ != (h_inode->i_mode
6604+ & S_IFMT));
6605+ au_set_h_dptr(dentry, bindex,
6606+ dget(h_dentry));
6607+ }
6608+ err = au_refresh_hinode(inode, dentry);
6609+ au_dbg_verify_dinode(dentry);
6610+ }
6611+ } else {
6612+ AuDbg("positive --> positive, different type\n");
6613+ /* similar to the behaviour of removing and creating */
6614+ au_hide(dentry);
6615+ if (inode)
6616+ err = au_refresh_hinode_self(inode);
6617+ au_dbg_verify_dinode(dentry);
6618+ }
6619+ }
6620+
6621+out:
6622+ return err;
6623+}
6624+
6625+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
6626+{
6627+ int err, ebrange;
6628+ unsigned int sigen;
6629+ struct au_dinfo *dinfo, *tmp;
6630+ struct super_block *sb;
6631+ struct inode *inode;
6632+
6633+ DiMustWriteLock(dentry);
6634+ AuDebugOn(IS_ROOT(dentry));
6635+ AuDebugOn(!parent->d_inode);
6636+
6637+ sb = dentry->d_sb;
6638+ inode = dentry->d_inode;
6639+ sigen = au_sigen(sb);
6640+ err = au_digen_test(parent, sigen);
6641+ if (unlikely(err))
6642+ goto out;
6643+
6644+ dinfo = au_di(dentry);
6645+ err = au_di_realloc(dinfo, au_sbend(sb) + 1);
6646+ if (unlikely(err))
6647+ goto out;
6648+ ebrange = au_dbrange_test(dentry);
6649+ if (!ebrange)
6650+ ebrange = au_do_refresh_hdentry(dentry, parent);
6651+
6652+ if (d_unhashed(dentry) || ebrange) {
6653+ AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
6654+ if (inode)
6655+ err = au_refresh_hinode_self(inode);
6656+ au_dbg_verify_dinode(dentry);
6657+ if (!err)
6658+ goto out_dgen; /* success */
6659+ goto out;
6660+ }
6661+
6662+ /* temporary dinfo */
6663+ AuDbgDentry(dentry);
6664+ err = -ENOMEM;
6665+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
6666+ if (unlikely(!tmp))
6667+ goto out;
6668+ au_di_swap(tmp, dinfo);
6669+ /* returns the number of positive dentries */
6670+ /*
6671+ * if current working dir is removed, it returns an error.
6672+ * but the dentry is legal.
6673+ */
6674+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0, /*nd*/NULL);
6675+ AuDbgDentry(dentry);
6676+ au_di_swap(tmp, dinfo);
6677+ if (err == -ENOENT)
6678+ err = 0;
6679+ if (err >= 0) {
6680+ /* compare/refresh by dinfo */
6681+ AuDbgDentry(dentry);
6682+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
6683+ au_dbg_verify_dinode(dentry);
6684+ AuTraceErr(err);
6685+ }
6686+ au_rw_write_unlock(&tmp->di_rwsem);
6687+ au_di_free(tmp);
6688+ if (unlikely(err))
6689+ goto out;
6690+
6691+out_dgen:
6692+ au_update_digen(dentry);
6693+out:
6694+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
6695+ AuIOErr("failed refreshing %.*s, %d\n",
6696+ AuDLNPair(dentry), err);
6697+ AuDbgDentry(dentry);
6698+ }
6699+ AuTraceErr(err);
6700+ return err;
6701+}
6702+
6703+static noinline_for_stack
6704+int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
6705+ struct dentry *dentry, aufs_bindex_t bindex)
6706+{
6707+ int err, valid;
6708+ int (*reval)(struct dentry *, struct nameidata *);
6709+
6710+ err = 0;
6711+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
6712+ goto out;
6713+ reval = h_dentry->d_op->d_revalidate;
6714+
6715+ AuDbg("b%d\n", bindex);
6716+ if (au_test_fs_null_nd(h_dentry->d_sb))
6717+ /* it may return tri-state */
6718+ valid = reval(h_dentry, NULL);
6719+ else {
6720+ struct nameidata h_nd;
6721+ int locked;
1facf9fc 6722+ struct dentry *parent;
6723+
6724+ au_h_nd(&h_nd, nd);
6725+ parent = nd->path.dentry;
6726+ locked = (nd && nd->path.dentry != dentry);
6727+ if (locked)
6728+ di_read_lock_parent(parent, AuLock_IR);
6729+ BUG_ON(bindex > au_dbend(parent));
6730+ h_nd.path.dentry = au_h_dptr(parent, bindex);
6731+ BUG_ON(!h_nd.path.dentry);
6732+ h_nd.path.mnt = au_sbr(parent->d_sb, bindex)->br_mnt;
6733+ path_get(&h_nd.path);
6734+ valid = reval(h_dentry, &h_nd);
6735+ path_put(&h_nd.path);
6736+ if (locked)
6737+ di_read_unlock(parent, AuLock_IR);
6738+ }
6739+
6740+ if (unlikely(valid < 0))
6741+ err = valid;
6742+ else if (!valid)
6743+ err = -EINVAL;
6744+
4f0767ce 6745+out:
1facf9fc 6746+ AuTraceErr(err);
6747+ return err;
6748+}
6749+
6750+/* todo: remove this */
6751+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
6752+ struct nameidata *nd, int do_udba)
6753+{
6754+ int err;
6755+ umode_t mode, h_mode;
6756+ aufs_bindex_t bindex, btail, bstart, ibs, ibe;
6757+ unsigned char plus, unhashed, is_root, h_plus;
4a4d8108 6758+ struct inode *h_inode, *h_cached_inode;
1facf9fc 6759+ struct dentry *h_dentry;
6760+ struct qstr *name, *h_name;
6761+
6762+ err = 0;
6763+ plus = 0;
6764+ mode = 0;
1facf9fc 6765+ ibs = -1;
6766+ ibe = -1;
6767+ unhashed = !!d_unhashed(dentry);
6768+ is_root = !!IS_ROOT(dentry);
6769+ name = &dentry->d_name;
6770+
6771+ /*
7f207e10
AM
6772+ * Theoretically, REVAL test should be unnecessary in case of
6773+ * {FS,I}NOTIFY.
6774+ * But {fs,i}notify doesn't fire some necessary events,
1facf9fc 6775+ * IN_ATTRIB for atime/nlink/pageio
6776+ * IN_DELETE for NFS dentry
6777+ * Let's do REVAL test too.
6778+ */
6779+ if (do_udba && inode) {
6780+ mode = (inode->i_mode & S_IFMT);
6781+ plus = (inode->i_nlink > 0);
1facf9fc 6782+ ibs = au_ibstart(inode);
6783+ ibe = au_ibend(inode);
6784+ }
6785+
6786+ bstart = au_dbstart(dentry);
6787+ btail = bstart;
6788+ if (inode && S_ISDIR(inode->i_mode))
6789+ btail = au_dbtaildir(dentry);
6790+ for (bindex = bstart; bindex <= btail; bindex++) {
6791+ h_dentry = au_h_dptr(dentry, bindex);
6792+ if (!h_dentry)
6793+ continue;
6794+
6795+ AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
027c5e7a 6796+ spin_lock(&h_dentry->d_lock);
1facf9fc 6797+ h_name = &h_dentry->d_name;
6798+ if (unlikely(do_udba
6799+ && !is_root
6800+ && (unhashed != !!d_unhashed(h_dentry)
6801+ || name->len != h_name->len
6802+ || memcmp(name->name, h_name->name, name->len))
6803+ )) {
6804+ AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
6805+ unhashed, d_unhashed(h_dentry),
6806+ AuDLNPair(dentry), AuDLNPair(h_dentry));
027c5e7a 6807+ spin_unlock(&h_dentry->d_lock);
1facf9fc 6808+ goto err;
6809+ }
027c5e7a 6810+ spin_unlock(&h_dentry->d_lock);
1facf9fc 6811+
6812+ err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
6813+ if (unlikely(err))
6814+ /* do not goto err, to keep the errno */
6815+ break;
6816+
6817+ /* todo: plink too? */
6818+ if (!do_udba)
6819+ continue;
6820+
6821+ /* UDBA tests */
6822+ h_inode = h_dentry->d_inode;
6823+ if (unlikely(!!inode != !!h_inode))
6824+ goto err;
6825+
6826+ h_plus = plus;
6827+ h_mode = mode;
6828+ h_cached_inode = h_inode;
6829+ if (h_inode) {
6830+ h_mode = (h_inode->i_mode & S_IFMT);
6831+ h_plus = (h_inode->i_nlink > 0);
6832+ }
6833+ if (inode && ibs <= bindex && bindex <= ibe)
6834+ h_cached_inode = au_h_iptr(inode, bindex);
6835+
6836+ if (unlikely(plus != h_plus
6837+ || mode != h_mode
6838+ || h_cached_inode != h_inode))
6839+ goto err;
6840+ continue;
6841+
6842+ err:
6843+ err = -EINVAL;
6844+ break;
6845+ }
6846+
6847+ return err;
6848+}
6849+
027c5e7a 6850+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
1facf9fc 6851+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
6852+{
6853+ int err;
6854+ struct dentry *parent;
1facf9fc 6855+
027c5e7a 6856+ if (!au_digen_test(dentry, sigen))
1facf9fc 6857+ return 0;
6858+
6859+ parent = dget_parent(dentry);
6860+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 6861+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 6862+ au_dbg_verify_gen(parent, sigen);
027c5e7a 6863+ err = au_refresh_dentry(dentry, parent);
1facf9fc 6864+ di_read_unlock(parent, AuLock_IR);
6865+ dput(parent);
027c5e7a 6866+ AuTraceErr(err);
1facf9fc 6867+ return err;
6868+}
6869+
6870+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
6871+{
6872+ int err;
6873+ struct dentry *d, *parent;
6874+ struct inode *inode;
6875+
027c5e7a 6876+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1facf9fc 6877+ return simple_reval_dpath(dentry, sigen);
6878+
6879+ /* slow loop, keep it simple and stupid */
6880+ /* cf: au_cpup_dirs() */
6881+ err = 0;
6882+ parent = NULL;
027c5e7a 6883+ while (au_digen_test(dentry, sigen)) {
1facf9fc 6884+ d = dentry;
6885+ while (1) {
6886+ dput(parent);
6887+ parent = dget_parent(d);
027c5e7a 6888+ if (!au_digen_test(parent, sigen))
1facf9fc 6889+ break;
6890+ d = parent;
6891+ }
6892+
6893+ inode = d->d_inode;
6894+ if (d != dentry)
027c5e7a 6895+ di_write_lock_child2(d);
1facf9fc 6896+
6897+ /* someone might update our dentry while we were sleeping */
027c5e7a
AM
6898+ if (au_digen_test(d, sigen)) {
6899+ /*
6900+ * todo: consolidate with simple_reval_dpath(),
6901+ * do_refresh() and au_reval_for_attr().
6902+ */
1facf9fc 6903+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 6904+ err = au_refresh_dentry(d, parent);
1facf9fc 6905+ di_read_unlock(parent, AuLock_IR);
6906+ }
6907+
6908+ if (d != dentry)
6909+ di_write_unlock(d);
6910+ dput(parent);
6911+ if (unlikely(err))
6912+ break;
6913+ }
6914+
6915+ return err;
6916+}
6917+
6918+/*
6919+ * if valid returns 1, otherwise 0.
6920+ */
6921+static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
6922+{
6923+ int valid, err;
6924+ unsigned int sigen;
6925+ unsigned char do_udba;
6926+ struct super_block *sb;
6927+ struct inode *inode;
6928+
027c5e7a
AM
6929+ /* todo: support rcu-walk? */
6930+ if (nd && (nd->flags & LOOKUP_RCU))
6931+ return -ECHILD;
6932+
6933+ valid = 0;
6934+ if (unlikely(!au_di(dentry)))
6935+ goto out;
6936+
6937+ inode = dentry->d_inode;
6938+ if (inode && is_bad_inode(inode))
6939+ goto out;
6940+
e49829fe 6941+ valid = 1;
1facf9fc 6942+ sb = dentry->d_sb;
e49829fe
JR
6943+ /*
6944+ * todo: very ugly
6945+ * i_mutex of parent dir may be held,
6946+ * but we should not return 'invalid' due to busy.
6947+ */
6948+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
6949+ if (unlikely(err)) {
6950+ valid = err;
027c5e7a 6951+ AuTraceErr(err);
e49829fe
JR
6952+ goto out;
6953+ }
027c5e7a
AM
6954+ if (unlikely(au_dbrange_test(dentry))) {
6955+ err = -EINVAL;
6956+ AuTraceErr(err);
6957+ goto out_dgrade;
1facf9fc 6958+ }
027c5e7a
AM
6959+
6960+ sigen = au_sigen(sb);
6961+ if (au_digen_test(dentry, sigen)) {
1facf9fc 6962+ AuDebugOn(IS_ROOT(dentry));
027c5e7a
AM
6963+ err = au_reval_dpath(dentry, sigen);
6964+ if (unlikely(err)) {
6965+ AuTraceErr(err);
1facf9fc 6966+ goto out_dgrade;
027c5e7a 6967+ }
1facf9fc 6968+ }
6969+ di_downgrade_lock(dentry, AuLock_IR);
6970+
1facf9fc 6971+ err = -EINVAL;
027c5e7a
AM
6972+ if (inode && (IS_DEADDIR(inode) || !inode->i_nlink))
6973+ goto out_inval;
6974+
1facf9fc 6975+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
6976+ if (do_udba && inode) {
6977+ aufs_bindex_t bstart = au_ibstart(inode);
027c5e7a 6978+ struct inode *h_inode;
1facf9fc 6979+
027c5e7a
AM
6980+ if (bstart >= 0) {
6981+ h_inode = au_h_iptr(inode, bstart);
6982+ if (h_inode && au_test_higen(inode, h_inode))
6983+ goto out_inval;
6984+ }
1facf9fc 6985+ }
6986+
6987+ err = h_d_revalidate(dentry, inode, nd, do_udba);
027c5e7a 6988+ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
1facf9fc 6989+ err = -EIO;
027c5e7a
AM
6990+ AuDbg("both of real entry and whiteout found, %.*s, err %d\n",
6991+ AuDLNPair(dentry), err);
6992+ }
e49829fe 6993+ goto out_inval;
1facf9fc 6994+
4f0767ce 6995+out_dgrade:
1facf9fc 6996+ di_downgrade_lock(dentry, AuLock_IR);
e49829fe 6997+out_inval:
1facf9fc 6998+ aufs_read_unlock(dentry, AuLock_IR);
6999+ AuTraceErr(err);
7000+ valid = !err;
e49829fe 7001+out:
027c5e7a 7002+ if (!valid) {
e49829fe 7003+ AuDbg("%.*s invalid, %d\n", AuDLNPair(dentry), valid);
027c5e7a
AM
7004+ d_drop(dentry);
7005+ }
1facf9fc 7006+ return valid;
7007+}
7008+
7009+static void aufs_d_release(struct dentry *dentry)
7010+{
027c5e7a 7011+ if (au_di(dentry)) {
4a4d8108
AM
7012+ au_di_fin(dentry);
7013+ au_hn_di_reinit(dentry);
1facf9fc 7014+ }
1facf9fc 7015+}
7016+
4a4d8108 7017+const struct dentry_operations aufs_dop = {
1facf9fc 7018+ .d_revalidate = aufs_d_revalidate,
7019+ .d_release = aufs_d_release
7020+};
7f207e10
AM
7021diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
7022--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
53392da6 7023+++ linux/fs/aufs/dentry.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 7024@@ -0,0 +1,238 @@
1facf9fc 7025+/*
027c5e7a 7026+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 7027+ *
7028+ * This program, aufs is free software; you can redistribute it and/or modify
7029+ * it under the terms of the GNU General Public License as published by
7030+ * the Free Software Foundation; either version 2 of the License, or
7031+ * (at your option) any later version.
dece6358
AM
7032+ *
7033+ * This program is distributed in the hope that it will be useful,
7034+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7035+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7036+ * GNU General Public License for more details.
7037+ *
7038+ * You should have received a copy of the GNU General Public License
7039+ * along with this program; if not, write to the Free Software
7040+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7041+ */
7042+
7043+/*
7044+ * lookup and dentry operations
7045+ */
7046+
7047+#ifndef __AUFS_DENTRY_H__
7048+#define __AUFS_DENTRY_H__
7049+
7050+#ifdef __KERNEL__
7051+
dece6358 7052+#include <linux/dcache.h>
1facf9fc 7053+#include <linux/aufs_type.h>
7054+#include "rwsem.h"
7055+
1facf9fc 7056+struct au_hdentry {
7057+ struct dentry *hd_dentry;
027c5e7a 7058+ aufs_bindex_t hd_id;
1facf9fc 7059+};
7060+
7061+struct au_dinfo {
7062+ atomic_t di_generation;
7063+
dece6358 7064+ struct au_rwsem di_rwsem;
1facf9fc 7065+ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
7066+ struct au_hdentry *di_hdentry;
4a4d8108 7067+} ____cacheline_aligned_in_smp;
1facf9fc 7068+
7069+/* ---------------------------------------------------------------------- */
7070+
7071+/* dentry.c */
4a4d8108 7072+extern const struct dentry_operations aufs_dop;
1facf9fc 7073+struct au_branch;
7074+struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
7075+ struct au_branch *br, struct nameidata *nd);
7076+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
7077+ struct au_branch *br);
7078+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
7079+ struct dentry *h_parent, struct au_branch *br);
7080+
7081+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
7082+ struct nameidata *nd);
7083+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex);
027c5e7a 7084+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
1facf9fc 7085+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
7086+
7087+/* dinfo.c */
4a4d8108 7088+void au_di_init_once(void *_di);
027c5e7a
AM
7089+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
7090+void au_di_free(struct au_dinfo *dinfo);
7091+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
7092+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
4a4d8108
AM
7093+int au_di_init(struct dentry *dentry);
7094+void au_di_fin(struct dentry *dentry);
1facf9fc 7095+int au_di_realloc(struct au_dinfo *dinfo, int nbr);
7096+
7097+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
7098+void di_read_unlock(struct dentry *d, int flags);
7099+void di_downgrade_lock(struct dentry *d, int flags);
7100+void di_write_lock(struct dentry *d, unsigned int lsc);
7101+void di_write_unlock(struct dentry *d);
7102+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
7103+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
7104+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
7105+
7106+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
2cbb1c4b 7107+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
1facf9fc 7108+aufs_bindex_t au_dbtail(struct dentry *dentry);
7109+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
7110+
7111+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
7112+ struct dentry *h_dentry);
027c5e7a
AM
7113+int au_digen_test(struct dentry *dentry, unsigned int sigen);
7114+int au_dbrange_test(struct dentry *dentry);
1facf9fc 7115+void au_update_digen(struct dentry *dentry);
7116+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
7117+void au_update_dbstart(struct dentry *dentry);
7118+void au_update_dbend(struct dentry *dentry);
7119+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
7120+
7121+/* ---------------------------------------------------------------------- */
7122+
7123+static inline struct au_dinfo *au_di(struct dentry *dentry)
7124+{
7125+ return dentry->d_fsdata;
7126+}
7127+
7128+/* ---------------------------------------------------------------------- */
7129+
7130+/* lock subclass for dinfo */
7131+enum {
7132+ AuLsc_DI_CHILD, /* child first */
4a4d8108 7133+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
1facf9fc 7134+ AuLsc_DI_CHILD3, /* copyup dirs */
7135+ AuLsc_DI_PARENT,
7136+ AuLsc_DI_PARENT2,
027c5e7a
AM
7137+ AuLsc_DI_PARENT3,
7138+ AuLsc_DI_TMP /* temp for replacing dinfo */
1facf9fc 7139+};
7140+
7141+/*
7142+ * di_read_lock_child, di_write_lock_child,
7143+ * di_read_lock_child2, di_write_lock_child2,
7144+ * di_read_lock_child3, di_write_lock_child3,
7145+ * di_read_lock_parent, di_write_lock_parent,
7146+ * di_read_lock_parent2, di_write_lock_parent2,
7147+ * di_read_lock_parent3, di_write_lock_parent3,
7148+ */
7149+#define AuReadLockFunc(name, lsc) \
7150+static inline void di_read_lock_##name(struct dentry *d, int flags) \
7151+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
7152+
7153+#define AuWriteLockFunc(name, lsc) \
7154+static inline void di_write_lock_##name(struct dentry *d) \
7155+{ di_write_lock(d, AuLsc_DI_##lsc); }
7156+
7157+#define AuRWLockFuncs(name, lsc) \
7158+ AuReadLockFunc(name, lsc) \
7159+ AuWriteLockFunc(name, lsc)
7160+
7161+AuRWLockFuncs(child, CHILD);
7162+AuRWLockFuncs(child2, CHILD2);
7163+AuRWLockFuncs(child3, CHILD3);
7164+AuRWLockFuncs(parent, PARENT);
7165+AuRWLockFuncs(parent2, PARENT2);
7166+AuRWLockFuncs(parent3, PARENT3);
7167+
7168+#undef AuReadLockFunc
7169+#undef AuWriteLockFunc
7170+#undef AuRWLockFuncs
7171+
7172+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
7173+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
7174+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 7175+
7176+/* ---------------------------------------------------------------------- */
7177+
7178+/* todo: memory barrier? */
7179+static inline unsigned int au_digen(struct dentry *d)
7180+{
7181+ return atomic_read(&au_di(d)->di_generation);
7182+}
7183+
7184+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
7185+{
7186+ hdentry->hd_dentry = NULL;
7187+}
7188+
7189+static inline void au_hdput(struct au_hdentry *hd)
7190+{
4a4d8108
AM
7191+ if (hd)
7192+ dput(hd->hd_dentry);
1facf9fc 7193+}
7194+
7195+static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
7196+{
1308ab2a 7197+ DiMustAnyLock(dentry);
1facf9fc 7198+ return au_di(dentry)->di_bstart;
7199+}
7200+
7201+static inline aufs_bindex_t au_dbend(struct dentry *dentry)
7202+{
1308ab2a 7203+ DiMustAnyLock(dentry);
1facf9fc 7204+ return au_di(dentry)->di_bend;
7205+}
7206+
7207+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
7208+{
1308ab2a 7209+ DiMustAnyLock(dentry);
1facf9fc 7210+ return au_di(dentry)->di_bwh;
7211+}
7212+
7213+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
7214+{
1308ab2a 7215+ DiMustAnyLock(dentry);
1facf9fc 7216+ return au_di(dentry)->di_bdiropq;
7217+}
7218+
7219+/* todo: hard/soft set? */
7220+static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
7221+{
1308ab2a 7222+ DiMustWriteLock(dentry);
1facf9fc 7223+ au_di(dentry)->di_bstart = bindex;
7224+}
7225+
7226+static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
7227+{
1308ab2a 7228+ DiMustWriteLock(dentry);
1facf9fc 7229+ au_di(dentry)->di_bend = bindex;
7230+}
7231+
7232+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
7233+{
1308ab2a 7234+ DiMustWriteLock(dentry);
1facf9fc 7235+ /* dbwh can be outside of bstart - bend range */
7236+ au_di(dentry)->di_bwh = bindex;
7237+}
7238+
7239+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
7240+{
1308ab2a 7241+ DiMustWriteLock(dentry);
1facf9fc 7242+ au_di(dentry)->di_bdiropq = bindex;
7243+}
7244+
7245+/* ---------------------------------------------------------------------- */
7246+
4a4d8108 7247+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 7248+static inline void au_digen_dec(struct dentry *d)
7249+{
e49829fe 7250+ atomic_dec(&au_di(d)->di_generation);
1facf9fc 7251+}
7252+
4a4d8108 7253+static inline void au_hn_di_reinit(struct dentry *dentry)
1facf9fc 7254+{
7255+ dentry->d_fsdata = NULL;
7256+}
7257+#else
4a4d8108
AM
7258+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
7259+#endif /* CONFIG_AUFS_HNOTIFY */
1facf9fc 7260+
7261+#endif /* __KERNEL__ */
7262+#endif /* __AUFS_DENTRY_H__ */
7f207e10
AM
7263diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
7264--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 7265+++ linux/fs/aufs/dinfo.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 7266@@ -0,0 +1,543 @@
1facf9fc 7267+/*
027c5e7a 7268+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 7269+ *
7270+ * This program, aufs is free software; you can redistribute it and/or modify
7271+ * it under the terms of the GNU General Public License as published by
7272+ * the Free Software Foundation; either version 2 of the License, or
7273+ * (at your option) any later version.
dece6358
AM
7274+ *
7275+ * This program is distributed in the hope that it will be useful,
7276+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7277+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7278+ * GNU General Public License for more details.
7279+ *
7280+ * You should have received a copy of the GNU General Public License
7281+ * along with this program; if not, write to the Free Software
7282+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7283+ */
7284+
7285+/*
7286+ * dentry private data
7287+ */
7288+
7289+#include "aufs.h"
7290+
e49829fe 7291+void au_di_init_once(void *_dinfo)
4a4d8108 7292+{
e49829fe
JR
7293+ struct au_dinfo *dinfo = _dinfo;
7294+ static struct lock_class_key aufs_di;
4a4d8108 7295+
e49829fe
JR
7296+ au_rw_init(&dinfo->di_rwsem);
7297+ au_rw_class(&dinfo->di_rwsem, &aufs_di);
4a4d8108
AM
7298+}
7299+
027c5e7a 7300+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
1facf9fc 7301+{
7302+ struct au_dinfo *dinfo;
027c5e7a 7303+ int nbr, i;
1facf9fc 7304+
7305+ dinfo = au_cache_alloc_dinfo();
7306+ if (unlikely(!dinfo))
7307+ goto out;
7308+
1facf9fc 7309+ nbr = au_sbend(sb) + 1;
7310+ if (nbr <= 0)
7311+ nbr = 1;
7312+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
027c5e7a
AM
7313+ if (dinfo->di_hdentry) {
7314+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
7315+ dinfo->di_bstart = -1;
7316+ dinfo->di_bend = -1;
7317+ dinfo->di_bwh = -1;
7318+ dinfo->di_bdiropq = -1;
7319+ for (i = 0; i < nbr; i++)
7320+ dinfo->di_hdentry[i].hd_id = -1;
7321+ goto out;
7322+ }
1facf9fc 7323+
1facf9fc 7324+ au_cache_free_dinfo(dinfo);
027c5e7a
AM
7325+ dinfo = NULL;
7326+
4f0767ce 7327+out:
027c5e7a 7328+ return dinfo;
1facf9fc 7329+}
7330+
027c5e7a 7331+void au_di_free(struct au_dinfo *dinfo)
4a4d8108 7332+{
4a4d8108
AM
7333+ struct au_hdentry *p;
7334+ aufs_bindex_t bend, bindex;
7335+
7336+ /* dentry may not be revalidated */
027c5e7a 7337+ bindex = dinfo->di_bstart;
4a4d8108 7338+ if (bindex >= 0) {
027c5e7a
AM
7339+ bend = dinfo->di_bend;
7340+ p = dinfo->di_hdentry + bindex;
4a4d8108
AM
7341+ while (bindex++ <= bend)
7342+ au_hdput(p++);
7343+ }
027c5e7a
AM
7344+ kfree(dinfo->di_hdentry);
7345+ au_cache_free_dinfo(dinfo);
7346+}
7347+
7348+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
7349+{
7350+ struct au_hdentry *p;
7351+ aufs_bindex_t bi;
7352+
7353+ AuRwMustWriteLock(&a->di_rwsem);
7354+ AuRwMustWriteLock(&b->di_rwsem);
7355+
7356+#define DiSwap(v, name) \
7357+ do { \
7358+ v = a->di_##name; \
7359+ a->di_##name = b->di_##name; \
7360+ b->di_##name = v; \
7361+ } while (0)
7362+
7363+ DiSwap(p, hdentry);
7364+ DiSwap(bi, bstart);
7365+ DiSwap(bi, bend);
7366+ DiSwap(bi, bwh);
7367+ DiSwap(bi, bdiropq);
7368+ /* smp_mb(); */
7369+
7370+#undef DiSwap
7371+}
7372+
7373+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
7374+{
7375+ AuRwMustWriteLock(&dst->di_rwsem);
7376+ AuRwMustWriteLock(&src->di_rwsem);
7377+
7378+ dst->di_bstart = src->di_bstart;
7379+ dst->di_bend = src->di_bend;
7380+ dst->di_bwh = src->di_bwh;
7381+ dst->di_bdiropq = src->di_bdiropq;
7382+ /* smp_mb(); */
7383+}
7384+
7385+int au_di_init(struct dentry *dentry)
7386+{
7387+ int err;
7388+ struct super_block *sb;
7389+ struct au_dinfo *dinfo;
7390+
7391+ err = 0;
7392+ sb = dentry->d_sb;
7393+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
7394+ if (dinfo) {
7395+ atomic_set(&dinfo->di_generation, au_sigen(sb));
7396+ /* smp_mb(); */ /* atomic_set */
7397+ dentry->d_fsdata = dinfo;
7398+ } else
7399+ err = -ENOMEM;
7400+
7401+ return err;
7402+}
7403+
7404+void au_di_fin(struct dentry *dentry)
7405+{
7406+ struct au_dinfo *dinfo;
7407+
7408+ dinfo = au_di(dentry);
7409+ AuRwDestroy(&dinfo->di_rwsem);
7410+ au_di_free(dinfo);
4a4d8108
AM
7411+}
7412+
1facf9fc 7413+int au_di_realloc(struct au_dinfo *dinfo, int nbr)
7414+{
7415+ int err, sz;
7416+ struct au_hdentry *hdp;
7417+
1308ab2a 7418+ AuRwMustWriteLock(&dinfo->di_rwsem);
7419+
1facf9fc 7420+ err = -ENOMEM;
7421+ sz = sizeof(*hdp) * (dinfo->di_bend + 1);
7422+ if (!sz)
7423+ sz = sizeof(*hdp);
7424+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
7425+ if (hdp) {
7426+ dinfo->di_hdentry = hdp;
7427+ err = 0;
7428+ }
7429+
7430+ return err;
7431+}
7432+
7433+/* ---------------------------------------------------------------------- */
7434+
7435+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
7436+{
7437+ switch (lsc) {
7438+ case AuLsc_DI_CHILD:
7439+ ii_write_lock_child(inode);
7440+ break;
7441+ case AuLsc_DI_CHILD2:
7442+ ii_write_lock_child2(inode);
7443+ break;
7444+ case AuLsc_DI_CHILD3:
7445+ ii_write_lock_child3(inode);
7446+ break;
7447+ case AuLsc_DI_PARENT:
7448+ ii_write_lock_parent(inode);
7449+ break;
7450+ case AuLsc_DI_PARENT2:
7451+ ii_write_lock_parent2(inode);
7452+ break;
7453+ case AuLsc_DI_PARENT3:
7454+ ii_write_lock_parent3(inode);
7455+ break;
7456+ default:
7457+ BUG();
7458+ }
7459+}
7460+
7461+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
7462+{
7463+ switch (lsc) {
7464+ case AuLsc_DI_CHILD:
7465+ ii_read_lock_child(inode);
7466+ break;
7467+ case AuLsc_DI_CHILD2:
7468+ ii_read_lock_child2(inode);
7469+ break;
7470+ case AuLsc_DI_CHILD3:
7471+ ii_read_lock_child3(inode);
7472+ break;
7473+ case AuLsc_DI_PARENT:
7474+ ii_read_lock_parent(inode);
7475+ break;
7476+ case AuLsc_DI_PARENT2:
7477+ ii_read_lock_parent2(inode);
7478+ break;
7479+ case AuLsc_DI_PARENT3:
7480+ ii_read_lock_parent3(inode);
7481+ break;
7482+ default:
7483+ BUG();
7484+ }
7485+}
7486+
7487+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
7488+{
dece6358 7489+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 7490+ if (d->d_inode) {
7491+ if (au_ftest_lock(flags, IW))
7492+ do_ii_write_lock(d->d_inode, lsc);
7493+ else if (au_ftest_lock(flags, IR))
7494+ do_ii_read_lock(d->d_inode, lsc);
7495+ }
7496+}
7497+
7498+void di_read_unlock(struct dentry *d, int flags)
7499+{
7500+ if (d->d_inode) {
027c5e7a
AM
7501+ if (au_ftest_lock(flags, IW)) {
7502+ au_dbg_verify_dinode(d);
1facf9fc 7503+ ii_write_unlock(d->d_inode);
027c5e7a
AM
7504+ } else if (au_ftest_lock(flags, IR)) {
7505+ au_dbg_verify_dinode(d);
1facf9fc 7506+ ii_read_unlock(d->d_inode);
027c5e7a 7507+ }
1facf9fc 7508+ }
dece6358 7509+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 7510+}
7511+
7512+void di_downgrade_lock(struct dentry *d, int flags)
7513+{
1facf9fc 7514+ if (d->d_inode && au_ftest_lock(flags, IR))
7515+ ii_downgrade_lock(d->d_inode);
dece6358 7516+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 7517+}
7518+
7519+void di_write_lock(struct dentry *d, unsigned int lsc)
7520+{
dece6358 7521+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 7522+ if (d->d_inode)
7523+ do_ii_write_lock(d->d_inode, lsc);
7524+}
7525+
7526+void di_write_unlock(struct dentry *d)
7527+{
027c5e7a 7528+ au_dbg_verify_dinode(d);
1facf9fc 7529+ if (d->d_inode)
7530+ ii_write_unlock(d->d_inode);
dece6358 7531+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 7532+}
7533+
7534+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
7535+{
7536+ AuDebugOn(d1 == d2
7537+ || d1->d_inode == d2->d_inode
7538+ || d1->d_sb != d2->d_sb);
7539+
7540+ if (isdir && au_test_subdir(d1, d2)) {
7541+ di_write_lock_child(d1);
7542+ di_write_lock_child2(d2);
7543+ } else {
7544+ /* there should be no races */
7545+ di_write_lock_child(d2);
7546+ di_write_lock_child2(d1);
7547+ }
7548+}
7549+
7550+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
7551+{
7552+ AuDebugOn(d1 == d2
7553+ || d1->d_inode == d2->d_inode
7554+ || d1->d_sb != d2->d_sb);
7555+
7556+ if (isdir && au_test_subdir(d1, d2)) {
7557+ di_write_lock_parent(d1);
7558+ di_write_lock_parent2(d2);
7559+ } else {
7560+ /* there should be no races */
7561+ di_write_lock_parent(d2);
7562+ di_write_lock_parent2(d1);
7563+ }
7564+}
7565+
7566+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
7567+{
7568+ di_write_unlock(d1);
7569+ if (d1->d_inode == d2->d_inode)
dece6358 7570+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 7571+ else
7572+ di_write_unlock(d2);
7573+}
7574+
7575+/* ---------------------------------------------------------------------- */
7576+
7577+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
7578+{
7579+ struct dentry *d;
7580+
1308ab2a 7581+ DiMustAnyLock(dentry);
7582+
1facf9fc 7583+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
7584+ return NULL;
7585+ AuDebugOn(bindex < 0);
7586+ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
027c5e7a 7587+ AuDebugOn(d && d->d_count <= 0);
1facf9fc 7588+ return d;
7589+}
7590+
2cbb1c4b
JR
7591+/*
7592+ * extended version of au_h_dptr().
7593+ * returns a hashed and positive h_dentry in bindex, NULL, or error.
7594+ */
7595+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
7596+{
7597+ struct dentry *h_dentry;
7598+ struct inode *inode, *h_inode;
7599+
7600+ inode = dentry->d_inode;
7601+ AuDebugOn(!inode);
7602+
7603+ h_dentry = NULL;
7604+ if (au_dbstart(dentry) <= bindex
7605+ && bindex <= au_dbend(dentry))
7606+ h_dentry = au_h_dptr(dentry, bindex);
7607+ if (h_dentry && !au_d_hashed_positive(h_dentry)) {
7608+ dget(h_dentry);
7609+ goto out; /* success */
7610+ }
7611+
7612+ AuDebugOn(bindex < au_ibstart(inode));
7613+ AuDebugOn(au_ibend(inode) < bindex);
7614+ h_inode = au_h_iptr(inode, bindex);
7615+ h_dentry = d_find_alias(h_inode);
7616+ if (h_dentry) {
7617+ if (!IS_ERR(h_dentry)) {
7618+ if (!au_d_hashed_positive(h_dentry))
7619+ goto out; /* success */
7620+ dput(h_dentry);
7621+ } else
7622+ goto out;
7623+ }
7624+
7625+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
7626+ h_dentry = au_plink_lkup(inode, bindex);
7627+ AuDebugOn(!h_dentry);
7628+ if (!IS_ERR(h_dentry)) {
7629+ if (!au_d_hashed_positive(h_dentry))
7630+ goto out; /* success */
7631+ dput(h_dentry);
7632+ h_dentry = NULL;
7633+ }
7634+ }
7635+
7636+out:
7637+ AuDbgDentry(h_dentry);
7638+ return h_dentry;
7639+}
7640+
1facf9fc 7641+aufs_bindex_t au_dbtail(struct dentry *dentry)
7642+{
7643+ aufs_bindex_t bend, bwh;
7644+
7645+ bend = au_dbend(dentry);
7646+ if (0 <= bend) {
7647+ bwh = au_dbwh(dentry);
7648+ if (!bwh)
7649+ return bwh;
7650+ if (0 < bwh && bwh < bend)
7651+ return bwh - 1;
7652+ }
7653+ return bend;
7654+}
7655+
7656+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
7657+{
7658+ aufs_bindex_t bend, bopq;
7659+
7660+ bend = au_dbtail(dentry);
7661+ if (0 <= bend) {
7662+ bopq = au_dbdiropq(dentry);
7663+ if (0 <= bopq && bopq < bend)
7664+ bend = bopq;
7665+ }
7666+ return bend;
7667+}
7668+
7669+/* ---------------------------------------------------------------------- */
7670+
7671+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
7672+ struct dentry *h_dentry)
7673+{
7674+ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
027c5e7a 7675+ struct au_branch *br;
1facf9fc 7676+
1308ab2a 7677+ DiMustWriteLock(dentry);
7678+
4a4d8108 7679+ au_hdput(hd);
1facf9fc 7680+ hd->hd_dentry = h_dentry;
027c5e7a
AM
7681+ if (h_dentry) {
7682+ br = au_sbr(dentry->d_sb, bindex);
7683+ hd->hd_id = br->br_id;
7684+ }
7685+}
7686+
7687+int au_dbrange_test(struct dentry *dentry)
7688+{
7689+ int err;
7690+ aufs_bindex_t bstart, bend;
7691+
7692+ err = 0;
7693+ bstart = au_dbstart(dentry);
7694+ bend = au_dbend(dentry);
7695+ if (bstart >= 0)
7696+ AuDebugOn(bend < 0 && bstart > bend);
7697+ else {
7698+ err = -EIO;
7699+ AuDebugOn(bend >= 0);
7700+ }
7701+
7702+ return err;
7703+}
7704+
7705+int au_digen_test(struct dentry *dentry, unsigned int sigen)
7706+{
7707+ int err;
7708+
7709+ err = 0;
7710+ if (unlikely(au_digen(dentry) != sigen
7711+ || au_iigen_test(dentry->d_inode, sigen)))
7712+ err = -EIO;
7713+
7714+ return err;
1facf9fc 7715+}
7716+
7717+void au_update_digen(struct dentry *dentry)
7718+{
7719+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
7720+ /* smp_mb(); */ /* atomic_set */
7721+}
7722+
7723+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
7724+{
7725+ struct au_dinfo *dinfo;
7726+ struct dentry *h_d;
4a4d8108 7727+ struct au_hdentry *hdp;
1facf9fc 7728+
1308ab2a 7729+ DiMustWriteLock(dentry);
7730+
1facf9fc 7731+ dinfo = au_di(dentry);
7732+ if (!dinfo || dinfo->di_bstart < 0)
7733+ return;
7734+
4a4d8108 7735+ hdp = dinfo->di_hdentry;
1facf9fc 7736+ if (do_put_zero) {
7737+ aufs_bindex_t bindex, bend;
7738+
7739+ bend = dinfo->di_bend;
7740+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
4a4d8108 7741+ h_d = hdp[0 + bindex].hd_dentry;
1facf9fc 7742+ if (h_d && !h_d->d_inode)
7743+ au_set_h_dptr(dentry, bindex, NULL);
7744+ }
7745+ }
7746+
7747+ dinfo->di_bstart = -1;
7748+ while (++dinfo->di_bstart <= dinfo->di_bend)
4a4d8108 7749+ if (hdp[0 + dinfo->di_bstart].hd_dentry)
1facf9fc 7750+ break;
7751+ if (dinfo->di_bstart > dinfo->di_bend) {
7752+ dinfo->di_bstart = -1;
7753+ dinfo->di_bend = -1;
7754+ return;
7755+ }
7756+
7757+ dinfo->di_bend++;
7758+ while (0 <= --dinfo->di_bend)
4a4d8108 7759+ if (hdp[0 + dinfo->di_bend].hd_dentry)
1facf9fc 7760+ break;
7761+ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
7762+}
7763+
7764+void au_update_dbstart(struct dentry *dentry)
7765+{
7766+ aufs_bindex_t bindex, bend;
7767+ struct dentry *h_dentry;
7768+
7769+ bend = au_dbend(dentry);
7770+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
7771+ h_dentry = au_h_dptr(dentry, bindex);
7772+ if (!h_dentry)
7773+ continue;
7774+ if (h_dentry->d_inode) {
7775+ au_set_dbstart(dentry, bindex);
7776+ return;
7777+ }
7778+ au_set_h_dptr(dentry, bindex, NULL);
7779+ }
7780+}
7781+
7782+void au_update_dbend(struct dentry *dentry)
7783+{
7784+ aufs_bindex_t bindex, bstart;
7785+ struct dentry *h_dentry;
7786+
7787+ bstart = au_dbstart(dentry);
7f207e10 7788+ for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
1facf9fc 7789+ h_dentry = au_h_dptr(dentry, bindex);
7790+ if (!h_dentry)
7791+ continue;
7792+ if (h_dentry->d_inode) {
7793+ au_set_dbend(dentry, bindex);
7794+ return;
7795+ }
7796+ au_set_h_dptr(dentry, bindex, NULL);
7797+ }
7798+}
7799+
7800+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
7801+{
7802+ aufs_bindex_t bindex, bend;
7803+
7804+ bend = au_dbend(dentry);
7805+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
7806+ if (au_h_dptr(dentry, bindex) == h_dentry)
7807+ return bindex;
7808+ return -1;
7809+}
7f207e10
AM
7810diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
7811--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
7812+++ linux/fs/aufs/dir.c 2011-08-24 13:30:24.731313534 +0200
7813@@ -0,0 +1,624 @@
1facf9fc 7814+/*
027c5e7a 7815+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 7816+ *
7817+ * This program, aufs is free software; you can redistribute it and/or modify
7818+ * it under the terms of the GNU General Public License as published by
7819+ * the Free Software Foundation; either version 2 of the License, or
7820+ * (at your option) any later version.
dece6358
AM
7821+ *
7822+ * This program is distributed in the hope that it will be useful,
7823+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7824+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7825+ * GNU General Public License for more details.
7826+ *
7827+ * You should have received a copy of the GNU General Public License
7828+ * along with this program; if not, write to the Free Software
7829+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7830+ */
7831+
7832+/*
7833+ * directory operations
7834+ */
7835+
dece6358 7836+#include <linux/file.h>
1facf9fc 7837+#include <linux/fs_stack.h>
7838+#include "aufs.h"
7839+
7840+void au_add_nlink(struct inode *dir, struct inode *h_dir)
7841+{
7842+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
7843+
7844+ dir->i_nlink += h_dir->i_nlink - 2;
7845+ if (h_dir->i_nlink < 2)
7846+ dir->i_nlink += 2;
7847+}
7848+
7849+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
7850+{
7851+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
7852+
7853+ dir->i_nlink -= h_dir->i_nlink - 2;
7854+ if (h_dir->i_nlink < 2)
7855+ dir->i_nlink -= 2;
7856+}
7857+
1308ab2a 7858+loff_t au_dir_size(struct file *file, struct dentry *dentry)
7859+{
7860+ loff_t sz;
7861+ aufs_bindex_t bindex, bend;
7862+ struct file *h_file;
7863+ struct dentry *h_dentry;
7864+
7865+ sz = 0;
7866+ if (file) {
7867+ AuDebugOn(!file->f_dentry);
7868+ AuDebugOn(!file->f_dentry->d_inode);
7869+ AuDebugOn(!S_ISDIR(file->f_dentry->d_inode->i_mode));
7870+
4a4d8108 7871+ bend = au_fbend_dir(file);
1308ab2a 7872+ for (bindex = au_fbstart(file);
7873+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
7874+ bindex++) {
4a4d8108 7875+ h_file = au_hf_dir(file, bindex);
1308ab2a 7876+ if (h_file
7877+ && h_file->f_dentry
7878+ && h_file->f_dentry->d_inode)
7879+ sz += i_size_read(h_file->f_dentry->d_inode);
7880+ }
7881+ } else {
7882+ AuDebugOn(!dentry);
7883+ AuDebugOn(!dentry->d_inode);
7884+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
7885+
7886+ bend = au_dbtaildir(dentry);
7887+ for (bindex = au_dbstart(dentry);
7888+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
7889+ bindex++) {
7890+ h_dentry = au_h_dptr(dentry, bindex);
7891+ if (h_dentry && h_dentry->d_inode)
7892+ sz += i_size_read(h_dentry->d_inode);
7893+ }
7894+ }
7895+ if (sz < KMALLOC_MAX_SIZE)
7896+ sz = roundup_pow_of_two(sz);
7897+ if (sz > KMALLOC_MAX_SIZE)
7898+ sz = KMALLOC_MAX_SIZE;
7899+ else if (sz < NAME_MAX) {
7900+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
7901+ sz = AUFS_RDBLK_DEF;
7902+ }
7903+ return sz;
7904+}
7905+
1facf9fc 7906+/* ---------------------------------------------------------------------- */
7907+
7908+static int reopen_dir(struct file *file)
7909+{
7910+ int err;
7911+ unsigned int flags;
7912+ aufs_bindex_t bindex, btail, bstart;
7913+ struct dentry *dentry, *h_dentry;
7914+ struct file *h_file;
7915+
7916+ /* open all lower dirs */
7917+ dentry = file->f_dentry;
7918+ bstart = au_dbstart(dentry);
7919+ for (bindex = au_fbstart(file); bindex < bstart; bindex++)
7920+ au_set_h_fptr(file, bindex, NULL);
7921+ au_set_fbstart(file, bstart);
7922+
7923+ btail = au_dbtaildir(dentry);
4a4d8108 7924+ for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
1facf9fc 7925+ au_set_h_fptr(file, bindex, NULL);
4a4d8108 7926+ au_set_fbend_dir(file, btail);
1facf9fc 7927+
4a4d8108 7928+ flags = vfsub_file_flags(file);
1facf9fc 7929+ for (bindex = bstart; bindex <= btail; bindex++) {
7930+ h_dentry = au_h_dptr(dentry, bindex);
7931+ if (!h_dentry)
7932+ continue;
4a4d8108 7933+ h_file = au_hf_dir(file, bindex);
1facf9fc 7934+ if (h_file)
7935+ continue;
7936+
7937+ h_file = au_h_open(dentry, bindex, flags, file);
7938+ err = PTR_ERR(h_file);
7939+ if (IS_ERR(h_file))
7940+ goto out; /* close all? */
7941+ au_set_h_fptr(file, bindex, h_file);
7942+ }
7943+ au_update_figen(file);
7944+ /* todo: necessary? */
7945+ /* file->f_ra = h_file->f_ra; */
7946+ err = 0;
7947+
4f0767ce 7948+out:
1facf9fc 7949+ return err;
7950+}
7951+
7952+static int do_open_dir(struct file *file, int flags)
7953+{
7954+ int err;
7955+ aufs_bindex_t bindex, btail;
7956+ struct dentry *dentry, *h_dentry;
7957+ struct file *h_file;
7958+
1308ab2a 7959+ FiMustWriteLock(file);
7960+
1facf9fc 7961+ dentry = file->f_dentry;
027c5e7a
AM
7962+ err = au_alive_dir(dentry);
7963+ if (unlikely(err))
7964+ goto out;
7965+
1facf9fc 7966+ file->f_version = dentry->d_inode->i_version;
7967+ bindex = au_dbstart(dentry);
7968+ au_set_fbstart(file, bindex);
7969+ btail = au_dbtaildir(dentry);
4a4d8108 7970+ au_set_fbend_dir(file, btail);
1facf9fc 7971+ for (; !err && bindex <= btail; bindex++) {
7972+ h_dentry = au_h_dptr(dentry, bindex);
7973+ if (!h_dentry)
7974+ continue;
7975+
7976+ h_file = au_h_open(dentry, bindex, flags, file);
7977+ if (IS_ERR(h_file)) {
7978+ err = PTR_ERR(h_file);
7979+ break;
7980+ }
7981+ au_set_h_fptr(file, bindex, h_file);
7982+ }
7983+ au_update_figen(file);
7984+ /* todo: necessary? */
7985+ /* file->f_ra = h_file->f_ra; */
7986+ if (!err)
7987+ return 0; /* success */
7988+
7989+ /* close all */
7990+ for (bindex = au_fbstart(file); bindex <= btail; bindex++)
7991+ au_set_h_fptr(file, bindex, NULL);
7992+ au_set_fbstart(file, -1);
4a4d8108
AM
7993+ au_set_fbend_dir(file, -1);
7994+
027c5e7a 7995+out:
1facf9fc 7996+ return err;
7997+}
7998+
7999+static int aufs_open_dir(struct inode *inode __maybe_unused,
8000+ struct file *file)
8001+{
4a4d8108
AM
8002+ int err;
8003+ struct super_block *sb;
8004+ struct au_fidir *fidir;
8005+
8006+ err = -ENOMEM;
8007+ sb = file->f_dentry->d_sb;
8008+ si_read_lock(sb, AuLock_FLUSH);
e49829fe 8009+ fidir = au_fidir_alloc(sb);
4a4d8108
AM
8010+ if (fidir) {
8011+ err = au_do_open(file, do_open_dir, fidir);
8012+ if (unlikely(err))
8013+ kfree(fidir);
8014+ }
8015+ si_read_unlock(sb);
8016+ return err;
1facf9fc 8017+}
8018+
8019+static int aufs_release_dir(struct inode *inode __maybe_unused,
8020+ struct file *file)
8021+{
8022+ struct au_vdir *vdir_cache;
4a4d8108
AM
8023+ struct au_finfo *finfo;
8024+ struct au_fidir *fidir;
8025+ aufs_bindex_t bindex, bend;
1facf9fc 8026+
4a4d8108
AM
8027+ finfo = au_fi(file);
8028+ fidir = finfo->fi_hdir;
8029+ if (fidir) {
0c5527e5
AM
8030+ /* remove me from sb->s_files */
8031+ file_sb_list_del(file);
8032+
4a4d8108
AM
8033+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
8034+ if (vdir_cache)
8035+ au_vdir_free(vdir_cache);
8036+
8037+ bindex = finfo->fi_btop;
8038+ if (bindex >= 0) {
8039+ /*
8040+ * calls fput() instead of filp_close(),
8041+ * since no dnotify or lock for the lower file.
8042+ */
8043+ bend = fidir->fd_bbot;
8044+ for (; bindex <= bend; bindex++)
8045+ au_set_h_fptr(file, bindex, NULL);
8046+ }
8047+ kfree(fidir);
8048+ finfo->fi_hdir = NULL;
1facf9fc 8049+ }
1facf9fc 8050+ au_finfo_fin(file);
1facf9fc 8051+ return 0;
8052+}
8053+
8054+/* ---------------------------------------------------------------------- */
8055+
4a4d8108
AM
8056+static int au_do_flush_dir(struct file *file, fl_owner_t id)
8057+{
8058+ int err;
8059+ aufs_bindex_t bindex, bend;
8060+ struct file *h_file;
8061+
8062+ err = 0;
8063+ bend = au_fbend_dir(file);
8064+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
8065+ h_file = au_hf_dir(file, bindex);
8066+ if (h_file)
8067+ err = vfsub_flush(h_file, id);
8068+ }
8069+ return err;
8070+}
8071+
8072+static int aufs_flush_dir(struct file *file, fl_owner_t id)
8073+{
8074+ return au_do_flush(file, id, au_do_flush_dir);
8075+}
8076+
8077+/* ---------------------------------------------------------------------- */
8078+
1facf9fc 8079+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
8080+{
8081+ int err;
8082+ aufs_bindex_t bend, bindex;
8083+ struct inode *inode;
8084+ struct super_block *sb;
8085+
8086+ err = 0;
8087+ sb = dentry->d_sb;
8088+ inode = dentry->d_inode;
8089+ IMustLock(inode);
8090+ bend = au_dbend(dentry);
8091+ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
8092+ struct path h_path;
1facf9fc 8093+
8094+ if (au_test_ro(sb, bindex, inode))
8095+ continue;
8096+ h_path.dentry = au_h_dptr(dentry, bindex);
8097+ if (!h_path.dentry)
8098+ continue;
1facf9fc 8099+
1facf9fc 8100+ h_path.mnt = au_sbr_mnt(sb, bindex);
53392da6 8101+ err = vfsub_fsync(NULL, &h_path, datasync);
1facf9fc 8102+ }
8103+
8104+ return err;
8105+}
8106+
8107+static int au_do_fsync_dir(struct file *file, int datasync)
8108+{
8109+ int err;
8110+ aufs_bindex_t bend, bindex;
8111+ struct file *h_file;
8112+ struct super_block *sb;
8113+ struct inode *inode;
1facf9fc 8114+
8115+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
8116+ if (unlikely(err))
8117+ goto out;
8118+
8119+ sb = file->f_dentry->d_sb;
8120+ inode = file->f_dentry->d_inode;
4a4d8108 8121+ bend = au_fbend_dir(file);
1facf9fc 8122+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
4a4d8108 8123+ h_file = au_hf_dir(file, bindex);
1facf9fc 8124+ if (!h_file || au_test_ro(sb, bindex, inode))
8125+ continue;
8126+
53392da6 8127+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
1facf9fc 8128+ }
8129+
4f0767ce 8130+out:
1facf9fc 8131+ return err;
8132+}
8133+
8134+/*
8135+ * @file may be NULL
8136+ */
b752ccd1 8137+static int aufs_fsync_dir(struct file *file, int datasync)
1facf9fc 8138+{
8139+ int err;
b752ccd1 8140+ struct dentry *dentry;
1facf9fc 8141+ struct super_block *sb;
8142+
b752ccd1 8143+ dentry = file->f_dentry;
1facf9fc 8144+ IMustLock(dentry->d_inode);
8145+
8146+ err = 0;
8147+ sb = dentry->d_sb;
8148+ si_noflush_read_lock(sb);
8149+ if (file)
8150+ err = au_do_fsync_dir(file, datasync);
8151+ else {
8152+ di_write_lock_child(dentry);
8153+ err = au_do_fsync_dir_no_file(dentry, datasync);
8154+ }
8155+ au_cpup_attr_timesizes(dentry->d_inode);
8156+ di_write_unlock(dentry);
8157+ if (file)
8158+ fi_write_unlock(file);
8159+
8160+ si_read_unlock(sb);
8161+ return err;
8162+}
8163+
8164+/* ---------------------------------------------------------------------- */
8165+
8166+static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir)
8167+{
8168+ int err;
8169+ struct dentry *dentry;
8170+ struct inode *inode;
8171+ struct super_block *sb;
8172+
8173+ dentry = file->f_dentry;
8174+ inode = dentry->d_inode;
8175+ IMustLock(inode);
8176+
8177+ sb = dentry->d_sb;
8178+ si_read_lock(sb, AuLock_FLUSH);
8179+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
8180+ if (unlikely(err))
8181+ goto out;
027c5e7a
AM
8182+ err = au_alive_dir(dentry);
8183+ if (!err)
8184+ err = au_vdir_init(file);
1facf9fc 8185+ di_downgrade_lock(dentry, AuLock_IR);
8186+ if (unlikely(err))
8187+ goto out_unlock;
8188+
b752ccd1 8189+ if (!au_test_nfsd()) {
1facf9fc 8190+ err = au_vdir_fill_de(file, dirent, filldir);
8191+ fsstack_copy_attr_atime(inode,
8192+ au_h_iptr(inode, au_ibstart(inode)));
8193+ } else {
8194+ /*
8195+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
8196+ * encode_fh() and others.
8197+ */
8198+ struct inode *h_inode = au_h_iptr(inode, au_ibstart(inode));
8199+
8200+ di_read_unlock(dentry, AuLock_IR);
8201+ si_read_unlock(sb);
1facf9fc 8202+ err = au_vdir_fill_de(file, dirent, filldir);
1facf9fc 8203+ fsstack_copy_attr_atime(inode, h_inode);
8204+ fi_write_unlock(file);
8205+
8206+ AuTraceErr(err);
8207+ return err;
8208+ }
8209+
4f0767ce 8210+out_unlock:
1facf9fc 8211+ di_read_unlock(dentry, AuLock_IR);
8212+ fi_write_unlock(file);
4f0767ce 8213+out:
1facf9fc 8214+ si_read_unlock(sb);
8215+ return err;
8216+}
8217+
8218+/* ---------------------------------------------------------------------- */
8219+
8220+#define AuTestEmpty_WHONLY 1
dece6358
AM
8221+#define AuTestEmpty_CALLED (1 << 1)
8222+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 8223+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7f207e10
AM
8224+#define au_fset_testempty(flags, name) \
8225+ do { (flags) |= AuTestEmpty_##name; } while (0)
8226+#define au_fclr_testempty(flags, name) \
8227+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
1facf9fc 8228+
dece6358
AM
8229+#ifndef CONFIG_AUFS_SHWH
8230+#undef AuTestEmpty_SHWH
8231+#define AuTestEmpty_SHWH 0
8232+#endif
8233+
1facf9fc 8234+struct test_empty_arg {
1308ab2a 8235+ struct au_nhash *whlist;
1facf9fc 8236+ unsigned int flags;
8237+ int err;
8238+ aufs_bindex_t bindex;
8239+};
8240+
8241+static int test_empty_cb(void *__arg, const char *__name, int namelen,
dece6358
AM
8242+ loff_t offset __maybe_unused, u64 ino,
8243+ unsigned int d_type)
1facf9fc 8244+{
8245+ struct test_empty_arg *arg = __arg;
8246+ char *name = (void *)__name;
8247+
8248+ arg->err = 0;
8249+ au_fset_testempty(arg->flags, CALLED);
8250+ /* smp_mb(); */
8251+ if (name[0] == '.'
8252+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
8253+ goto out; /* success */
8254+
8255+ if (namelen <= AUFS_WH_PFX_LEN
8256+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
8257+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 8258+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 8259+ arg->err = -ENOTEMPTY;
8260+ goto out;
8261+ }
8262+
8263+ name += AUFS_WH_PFX_LEN;
8264+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 8265+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 8266+ arg->err = au_nhash_append_wh
1308ab2a 8267+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 8268+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 8269+
4f0767ce 8270+out:
1facf9fc 8271+ /* smp_mb(); */
8272+ AuTraceErr(arg->err);
8273+ return arg->err;
8274+}
8275+
8276+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
8277+{
8278+ int err;
8279+ struct file *h_file;
8280+
8281+ h_file = au_h_open(dentry, arg->bindex,
8282+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
8283+ /*file*/NULL);
8284+ err = PTR_ERR(h_file);
8285+ if (IS_ERR(h_file))
8286+ goto out;
8287+
8288+ err = 0;
8289+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
8290+ && !h_file->f_dentry->d_inode->i_nlink)
8291+ goto out_put;
8292+
8293+ do {
8294+ arg->err = 0;
8295+ au_fclr_testempty(arg->flags, CALLED);
8296+ /* smp_mb(); */
8297+ err = vfsub_readdir(h_file, test_empty_cb, arg);
8298+ if (err >= 0)
8299+ err = arg->err;
8300+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
8301+
4f0767ce 8302+out_put:
1facf9fc 8303+ fput(h_file);
8304+ au_sbr_put(dentry->d_sb, arg->bindex);
4f0767ce 8305+out:
1facf9fc 8306+ return err;
8307+}
8308+
8309+struct do_test_empty_args {
8310+ int *errp;
8311+ struct dentry *dentry;
8312+ struct test_empty_arg *arg;
8313+};
8314+
8315+static void call_do_test_empty(void *args)
8316+{
8317+ struct do_test_empty_args *a = args;
8318+ *a->errp = do_test_empty(a->dentry, a->arg);
8319+}
8320+
8321+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
8322+{
8323+ int err, wkq_err;
8324+ struct dentry *h_dentry;
8325+ struct inode *h_inode;
8326+
8327+ h_dentry = au_h_dptr(dentry, arg->bindex);
8328+ h_inode = h_dentry->d_inode;
53392da6 8329+ /* todo: i_mode changes anytime? */
1facf9fc 8330+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
8331+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
8332+ mutex_unlock(&h_inode->i_mutex);
8333+ if (!err)
8334+ err = do_test_empty(dentry, arg);
8335+ else {
8336+ struct do_test_empty_args args = {
8337+ .errp = &err,
8338+ .dentry = dentry,
8339+ .arg = arg
8340+ };
8341+ unsigned int flags = arg->flags;
8342+
8343+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
8344+ if (unlikely(wkq_err))
8345+ err = wkq_err;
8346+ arg->flags = flags;
8347+ }
8348+
8349+ return err;
8350+}
8351+
8352+int au_test_empty_lower(struct dentry *dentry)
8353+{
8354+ int err;
1308ab2a 8355+ unsigned int rdhash;
1facf9fc 8356+ aufs_bindex_t bindex, bstart, btail;
1308ab2a 8357+ struct au_nhash whlist;
1facf9fc 8358+ struct test_empty_arg arg;
1facf9fc 8359+
dece6358
AM
8360+ SiMustAnyLock(dentry->d_sb);
8361+
1308ab2a 8362+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
8363+ if (!rdhash)
8364+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
8365+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 8366+ if (unlikely(err))
1facf9fc 8367+ goto out;
8368+
1facf9fc 8369+ arg.flags = 0;
1308ab2a 8370+ arg.whlist = &whlist;
8371+ bstart = au_dbstart(dentry);
dece6358
AM
8372+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
8373+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 8374+ arg.bindex = bstart;
8375+ err = do_test_empty(dentry, &arg);
8376+ if (unlikely(err))
8377+ goto out_whlist;
8378+
8379+ au_fset_testempty(arg.flags, WHONLY);
8380+ btail = au_dbtaildir(dentry);
8381+ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
8382+ struct dentry *h_dentry;
8383+
8384+ h_dentry = au_h_dptr(dentry, bindex);
8385+ if (h_dentry && h_dentry->d_inode) {
8386+ arg.bindex = bindex;
8387+ err = do_test_empty(dentry, &arg);
8388+ }
8389+ }
8390+
4f0767ce 8391+out_whlist:
1308ab2a 8392+ au_nhash_wh_free(&whlist);
4f0767ce 8393+out:
1facf9fc 8394+ return err;
8395+}
8396+
8397+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
8398+{
8399+ int err;
8400+ struct test_empty_arg arg;
8401+ aufs_bindex_t bindex, btail;
8402+
8403+ err = 0;
1308ab2a 8404+ arg.whlist = whlist;
1facf9fc 8405+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
8406+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
8407+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 8408+ btail = au_dbtaildir(dentry);
8409+ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
8410+ struct dentry *h_dentry;
8411+
8412+ h_dentry = au_h_dptr(dentry, bindex);
8413+ if (h_dentry && h_dentry->d_inode) {
8414+ arg.bindex = bindex;
8415+ err = sio_test_empty(dentry, &arg);
8416+ }
8417+ }
8418+
8419+ return err;
8420+}
8421+
8422+/* ---------------------------------------------------------------------- */
8423+
8424+const struct file_operations aufs_dir_fop = {
4a4d8108 8425+ .owner = THIS_MODULE,
027c5e7a 8426+ .llseek = default_llseek,
1facf9fc 8427+ .read = generic_read_dir,
8428+ .readdir = aufs_readdir,
8429+ .unlocked_ioctl = aufs_ioctl_dir,
b752ccd1
AM
8430+#ifdef CONFIG_COMPAT
8431+ .compat_ioctl = aufs_compat_ioctl_dir,
8432+#endif
1facf9fc 8433+ .open = aufs_open_dir,
8434+ .release = aufs_release_dir,
4a4d8108 8435+ .flush = aufs_flush_dir,
1facf9fc 8436+ .fsync = aufs_fsync_dir
8437+};
7f207e10
AM
8438diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
8439--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
53392da6 8440+++ linux/fs/aufs/dir.h 2011-08-24 13:30:24.731313534 +0200
b752ccd1 8441@@ -0,0 +1,138 @@
1facf9fc 8442+/*
027c5e7a 8443+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 8444+ *
8445+ * This program, aufs is free software; you can redistribute it and/or modify
8446+ * it under the terms of the GNU General Public License as published by
8447+ * the Free Software Foundation; either version 2 of the License, or
8448+ * (at your option) any later version.
dece6358
AM
8449+ *
8450+ * This program is distributed in the hope that it will be useful,
8451+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8452+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8453+ * GNU General Public License for more details.
8454+ *
8455+ * You should have received a copy of the GNU General Public License
8456+ * along with this program; if not, write to the Free Software
8457+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 8458+ */
8459+
8460+/*
8461+ * directory operations
8462+ */
8463+
8464+#ifndef __AUFS_DIR_H__
8465+#define __AUFS_DIR_H__
8466+
8467+#ifdef __KERNEL__
8468+
8469+#include <linux/fs.h>
8470+#include <linux/aufs_type.h>
8471+
8472+/* ---------------------------------------------------------------------- */
8473+
8474+/* need to be faster and smaller */
8475+
8476+struct au_nhash {
dece6358
AM
8477+ unsigned int nh_num;
8478+ struct hlist_head *nh_head;
1facf9fc 8479+};
8480+
8481+struct au_vdir_destr {
8482+ unsigned char len;
8483+ unsigned char name[0];
8484+} __packed;
8485+
8486+struct au_vdir_dehstr {
8487+ struct hlist_node hash;
8488+ struct au_vdir_destr *str;
4a4d8108 8489+} ____cacheline_aligned_in_smp;
1facf9fc 8490+
8491+struct au_vdir_de {
8492+ ino_t de_ino;
8493+ unsigned char de_type;
8494+ /* caution: packed */
8495+ struct au_vdir_destr de_str;
8496+} __packed;
8497+
8498+struct au_vdir_wh {
8499+ struct hlist_node wh_hash;
dece6358
AM
8500+#ifdef CONFIG_AUFS_SHWH
8501+ ino_t wh_ino;
1facf9fc 8502+ aufs_bindex_t wh_bindex;
dece6358
AM
8503+ unsigned char wh_type;
8504+#else
8505+ aufs_bindex_t wh_bindex;
8506+#endif
8507+ /* caution: packed */
1facf9fc 8508+ struct au_vdir_destr wh_str;
8509+} __packed;
8510+
8511+union au_vdir_deblk_p {
8512+ unsigned char *deblk;
8513+ struct au_vdir_de *de;
8514+};
8515+
8516+struct au_vdir {
8517+ unsigned char **vd_deblk;
8518+ unsigned long vd_nblk;
1facf9fc 8519+ struct {
8520+ unsigned long ul;
8521+ union au_vdir_deblk_p p;
8522+ } vd_last;
8523+
8524+ unsigned long vd_version;
dece6358 8525+ unsigned int vd_deblk_sz;
1facf9fc 8526+ unsigned long vd_jiffy;
4a4d8108 8527+} ____cacheline_aligned_in_smp;
1facf9fc 8528+
8529+/* ---------------------------------------------------------------------- */
8530+
8531+/* dir.c */
8532+extern const struct file_operations aufs_dir_fop;
8533+void au_add_nlink(struct inode *dir, struct inode *h_dir);
8534+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 8535+loff_t au_dir_size(struct file *file, struct dentry *dentry);
1facf9fc 8536+int au_test_empty_lower(struct dentry *dentry);
8537+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
8538+
8539+/* vdir.c */
1308ab2a 8540+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
8541+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
8542+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 8543+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
8544+ int limit);
dece6358
AM
8545+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
8546+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
8547+ unsigned int d_type, aufs_bindex_t bindex,
8548+ unsigned char shwh);
1facf9fc 8549+void au_vdir_free(struct au_vdir *vdir);
8550+int au_vdir_init(struct file *file);
8551+int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir);
8552+
8553+/* ioctl.c */
8554+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
8555+
1308ab2a 8556+#ifdef CONFIG_AUFS_RDU
8557+/* rdu.c */
8558+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
8559+#ifdef CONFIG_COMPAT
8560+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
8561+ unsigned long arg);
8562+#endif
1308ab2a 8563+#else
8564+static inline long au_rdu_ioctl(struct file *file, unsigned int cmd,
8565+ unsigned long arg)
8566+{
8567+ return -EINVAL;
8568+}
b752ccd1
AM
8569+#ifdef CONFIG_COMPAT
8570+static inline long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
8571+ unsigned long arg)
8572+{
8573+ return -EINVAL;
8574+}
8575+#endif
1308ab2a 8576+#endif
8577+
1facf9fc 8578+#endif /* __KERNEL__ */
8579+#endif /* __AUFS_DIR_H__ */
7f207e10
AM
8580diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
8581--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
53392da6 8582+++ linux/fs/aufs/dynop.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 8583@@ -0,0 +1,377 @@
1facf9fc 8584+/*
027c5e7a 8585+ * Copyright (C) 2010-2011 Junjiro R. Okajima
1facf9fc 8586+ *
8587+ * This program, aufs is free software; you can redistribute it and/or modify
8588+ * it under the terms of the GNU General Public License as published by
8589+ * the Free Software Foundation; either version 2 of the License, or
8590+ * (at your option) any later version.
dece6358
AM
8591+ *
8592+ * This program is distributed in the hope that it will be useful,
8593+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8594+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8595+ * GNU General Public License for more details.
8596+ *
8597+ * You should have received a copy of the GNU General Public License
8598+ * along with this program; if not, write to the Free Software
8599+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 8600+ */
8601+
8602+/*
4a4d8108 8603+ * dynamically customizable operations for regular files
1facf9fc 8604+ */
8605+
1facf9fc 8606+#include "aufs.h"
8607+
4a4d8108 8608+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
1facf9fc 8609+
4a4d8108
AM
8610+/*
8611+ * How large will these lists be?
8612+ * Usually just a few elements, 20-30 at most for each, I guess.
8613+ */
8614+static struct au_splhead dynop[AuDyLast];
8615+
8616+static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op)
1facf9fc 8617+{
4a4d8108
AM
8618+ struct au_dykey *key, *tmp;
8619+ struct list_head *head;
1facf9fc 8620+
4a4d8108
AM
8621+ key = NULL;
8622+ head = &spl->head;
8623+ rcu_read_lock();
8624+ list_for_each_entry_rcu(tmp, head, dk_list)
8625+ if (tmp->dk_op.dy_hop == h_op) {
8626+ key = tmp;
8627+ kref_get(&key->dk_kref);
8628+ break;
8629+ }
8630+ rcu_read_unlock();
8631+
8632+ return key;
1facf9fc 8633+}
8634+
4a4d8108 8635+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
1facf9fc 8636+{
4a4d8108
AM
8637+ struct au_dykey **k, *found;
8638+ const void *h_op = key->dk_op.dy_hop;
8639+ int i;
1facf9fc 8640+
4a4d8108
AM
8641+ found = NULL;
8642+ k = br->br_dykey;
8643+ for (i = 0; i < AuBrDynOp; i++)
8644+ if (k[i]) {
8645+ if (k[i]->dk_op.dy_hop == h_op) {
8646+ found = k[i];
8647+ break;
8648+ }
8649+ } else
8650+ break;
8651+ if (!found) {
8652+ spin_lock(&br->br_dykey_lock);
8653+ for (; i < AuBrDynOp; i++)
8654+ if (k[i]) {
8655+ if (k[i]->dk_op.dy_hop == h_op) {
8656+ found = k[i];
8657+ break;
8658+ }
8659+ } else {
8660+ k[i] = key;
8661+ break;
8662+ }
8663+ spin_unlock(&br->br_dykey_lock);
8664+ BUG_ON(i == AuBrDynOp); /* expand the array */
8665+ }
8666+
8667+ return found;
1facf9fc 8668+}
8669+
4a4d8108
AM
8670+/* kref_get() if @key is already added */
8671+static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key)
8672+{
8673+ struct au_dykey *tmp, *found;
8674+ struct list_head *head;
8675+ const void *h_op = key->dk_op.dy_hop;
1facf9fc 8676+
4a4d8108
AM
8677+ found = NULL;
8678+ head = &spl->head;
8679+ spin_lock(&spl->spin);
8680+ list_for_each_entry(tmp, head, dk_list)
8681+ if (tmp->dk_op.dy_hop == h_op) {
8682+ kref_get(&tmp->dk_kref);
8683+ found = tmp;
8684+ break;
8685+ }
8686+ if (!found)
8687+ list_add_rcu(&key->dk_list, head);
8688+ spin_unlock(&spl->spin);
1facf9fc 8689+
4a4d8108
AM
8690+ if (!found)
8691+ DyPrSym(key);
8692+ return found;
8693+}
8694+
8695+static void dy_free_rcu(struct rcu_head *rcu)
1facf9fc 8696+{
4a4d8108
AM
8697+ struct au_dykey *key;
8698+
8699+ key = container_of(rcu, struct au_dykey, dk_rcu);
8700+ DyPrSym(key);
8701+ kfree(key);
1facf9fc 8702+}
8703+
4a4d8108
AM
8704+static void dy_free(struct kref *kref)
8705+{
8706+ struct au_dykey *key;
8707+ struct au_splhead *spl;
1facf9fc 8708+
4a4d8108
AM
8709+ key = container_of(kref, struct au_dykey, dk_kref);
8710+ spl = dynop + key->dk_op.dy_type;
8711+ au_spl_del_rcu(&key->dk_list, spl);
8712+ call_rcu(&key->dk_rcu, dy_free_rcu);
8713+}
8714+
8715+void au_dy_put(struct au_dykey *key)
1facf9fc 8716+{
4a4d8108
AM
8717+ kref_put(&key->dk_kref, dy_free);
8718+}
1facf9fc 8719+
4a4d8108
AM
8720+/* ---------------------------------------------------------------------- */
8721+
8722+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
8723+
8724+#ifdef CONFIG_AUFS_DEBUG
8725+#define DyDbgDeclare(cnt) unsigned int cnt = 0
4f0767ce 8726+#define DyDbgInc(cnt) do { cnt++; } while (0)
4a4d8108
AM
8727+#else
8728+#define DyDbgDeclare(cnt) do {} while (0)
8729+#define DyDbgInc(cnt) do {} while (0)
8730+#endif
8731+
8732+#define DySet(func, dst, src, h_op, h_sb) do { \
8733+ DyDbgInc(cnt); \
8734+ if (h_op->func) { \
8735+ if (src.func) \
8736+ dst.func = src.func; \
8737+ else \
8738+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
8739+ } \
8740+} while (0)
8741+
8742+#define DySetForce(func, dst, src) do { \
8743+ AuDebugOn(!src.func); \
8744+ DyDbgInc(cnt); \
8745+ dst.func = src.func; \
8746+} while (0)
8747+
8748+#define DySetAop(func) \
8749+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
8750+#define DySetAopForce(func) \
8751+ DySetForce(func, dyaop->da_op, aufs_aop)
8752+
8753+static void dy_aop(struct au_dykey *key, const void *h_op,
8754+ struct super_block *h_sb __maybe_unused)
8755+{
8756+ struct au_dyaop *dyaop = (void *)key;
8757+ const struct address_space_operations *h_aop = h_op;
8758+ DyDbgDeclare(cnt);
8759+
8760+ AuDbg("%s\n", au_sbtype(h_sb));
8761+
8762+ DySetAop(writepage);
8763+ DySetAopForce(readpage); /* force */
4a4d8108
AM
8764+ DySetAop(writepages);
8765+ DySetAop(set_page_dirty);
8766+ DySetAop(readpages);
8767+ DySetAop(write_begin);
8768+ DySetAop(write_end);
8769+ DySetAop(bmap);
8770+ DySetAop(invalidatepage);
8771+ DySetAop(releasepage);
027c5e7a 8772+ DySetAop(freepage);
4a4d8108
AM
8773+ /* these two will be changed according to an aufs mount option */
8774+ DySetAop(direct_IO);
8775+ DySetAop(get_xip_mem);
8776+ DySetAop(migratepage);
8777+ DySetAop(launder_page);
8778+ DySetAop(is_partially_uptodate);
8779+ DySetAop(error_remove_page);
8780+
8781+ DyDbgSize(cnt, *h_aop);
8782+ dyaop->da_get_xip_mem = h_aop->get_xip_mem;
8783+}
8784+
4a4d8108
AM
8785+/* ---------------------------------------------------------------------- */
8786+
8787+static void dy_bug(struct kref *kref)
8788+{
8789+ BUG();
8790+}
8791+
8792+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
8793+{
8794+ struct au_dykey *key, *old;
8795+ struct au_splhead *spl;
b752ccd1 8796+ struct op {
4a4d8108 8797+ unsigned int sz;
b752ccd1
AM
8798+ void (*set)(struct au_dykey *key, const void *h_op,
8799+ struct super_block *h_sb __maybe_unused);
8800+ };
8801+ static const struct op a[] = {
4a4d8108
AM
8802+ [AuDy_AOP] = {
8803+ .sz = sizeof(struct au_dyaop),
b752ccd1 8804+ .set = dy_aop
4a4d8108 8805+ }
b752ccd1
AM
8806+ };
8807+ const struct op *p;
4a4d8108
AM
8808+
8809+ spl = dynop + op->dy_type;
8810+ key = dy_gfind_get(spl, op->dy_hop);
8811+ if (key)
8812+ goto out_add; /* success */
8813+
8814+ p = a + op->dy_type;
8815+ key = kzalloc(p->sz, GFP_NOFS);
8816+ if (unlikely(!key)) {
8817+ key = ERR_PTR(-ENOMEM);
8818+ goto out;
8819+ }
8820+
8821+ key->dk_op.dy_hop = op->dy_hop;
8822+ kref_init(&key->dk_kref);
b752ccd1 8823+ p->set(key, op->dy_hop, br->br_mnt->mnt_sb);
4a4d8108
AM
8824+ old = dy_gadd(spl, key);
8825+ if (old) {
8826+ kfree(key);
8827+ key = old;
8828+ }
8829+
8830+out_add:
8831+ old = dy_bradd(br, key);
8832+ if (old)
8833+ /* its ref-count should never be zero here */
8834+ kref_put(&key->dk_kref, dy_bug);
8835+out:
8836+ return key;
8837+}
8838+
8839+/* ---------------------------------------------------------------------- */
8840+/*
8841+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
8842+ * This behaviour is neccessary to return an error from open(O_DIRECT) instead
8843+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
8844+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
8845+ * See the aufs manual in detail.
8846+ *
8847+ * To keep this behaviour, aufs has to set NULL to ->get_xip_mem too, and the
8848+ * performance of fadvise() and madvise() may be affected.
8849+ */
8850+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
8851+{
8852+ if (!do_dx) {
8853+ dyaop->da_op.direct_IO = NULL;
8854+ dyaop->da_op.get_xip_mem = NULL;
8855+ } else {
8856+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
8857+ dyaop->da_op.get_xip_mem = aufs_aop.get_xip_mem;
8858+ if (!dyaop->da_get_xip_mem)
8859+ dyaop->da_op.get_xip_mem = NULL;
8860+ }
8861+}
8862+
8863+static struct au_dyaop *dy_aget(struct au_branch *br,
8864+ const struct address_space_operations *h_aop,
8865+ int do_dx)
8866+{
8867+ struct au_dyaop *dyaop;
8868+ struct au_dynop op;
8869+
8870+ op.dy_type = AuDy_AOP;
8871+ op.dy_haop = h_aop;
8872+ dyaop = (void *)dy_get(&op, br);
8873+ if (IS_ERR(dyaop))
8874+ goto out;
8875+ dy_adx(dyaop, do_dx);
8876+
8877+out:
8878+ return dyaop;
8879+}
8880+
8881+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
8882+ struct inode *h_inode)
8883+{
8884+ int err, do_dx;
8885+ struct super_block *sb;
8886+ struct au_branch *br;
8887+ struct au_dyaop *dyaop;
8888+
8889+ AuDebugOn(!S_ISREG(h_inode->i_mode));
8890+ IiMustWriteLock(inode);
8891+
8892+ sb = inode->i_sb;
8893+ br = au_sbr(sb, bindex);
8894+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
8895+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
8896+ err = PTR_ERR(dyaop);
8897+ if (IS_ERR(dyaop))
8898+ /* unnecessary to call dy_fput() */
8899+ goto out;
8900+
8901+ err = 0;
8902+ inode->i_mapping->a_ops = &dyaop->da_op;
8903+
8904+out:
8905+ return err;
8906+}
8907+
b752ccd1
AM
8908+/*
8909+ * Is it safe to replace a_ops during the inode/file is in operation?
8910+ * Yes, I hope so.
8911+ */
8912+int au_dy_irefresh(struct inode *inode)
8913+{
8914+ int err;
8915+ aufs_bindex_t bstart;
8916+ struct inode *h_inode;
8917+
8918+ err = 0;
8919+ if (S_ISREG(inode->i_mode)) {
8920+ bstart = au_ibstart(inode);
8921+ h_inode = au_h_iptr(inode, bstart);
8922+ err = au_dy_iaop(inode, bstart, h_inode);
8923+ }
8924+ return err;
8925+}
8926+
4a4d8108
AM
8927+void au_dy_arefresh(int do_dx)
8928+{
8929+ struct au_splhead *spl;
8930+ struct list_head *head;
8931+ struct au_dykey *key;
8932+
8933+ spl = dynop + AuDy_AOP;
8934+ head = &spl->head;
8935+ spin_lock(&spl->spin);
8936+ list_for_each_entry(key, head, dk_list)
8937+ dy_adx((void *)key, do_dx);
8938+ spin_unlock(&spl->spin);
8939+}
8940+
4a4d8108
AM
8941+/* ---------------------------------------------------------------------- */
8942+
8943+void __init au_dy_init(void)
8944+{
8945+ int i;
8946+
8947+ /* make sure that 'struct au_dykey *' can be any type */
8948+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
4a4d8108
AM
8949+
8950+ for (i = 0; i < AuDyLast; i++)
8951+ au_spl_init(dynop + i);
8952+}
8953+
8954+void au_dy_fin(void)
8955+{
8956+ int i;
8957+
8958+ for (i = 0; i < AuDyLast; i++)
8959+ WARN_ON(!list_empty(&dynop[i].head));
8960+}
7f207e10
AM
8961diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
8962--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
53392da6 8963+++ linux/fs/aufs/dynop.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 8964@@ -0,0 +1,80 @@
4a4d8108 8965+/*
027c5e7a 8966+ * Copyright (C) 2010-2011 Junjiro R. Okajima
4a4d8108
AM
8967+ *
8968+ * This program, aufs is free software; you can redistribute it and/or modify
8969+ * it under the terms of the GNU General Public License as published by
8970+ * the Free Software Foundation; either version 2 of the License, or
8971+ * (at your option) any later version.
8972+ *
8973+ * This program is distributed in the hope that it will be useful,
8974+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8975+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8976+ * GNU General Public License for more details.
8977+ *
8978+ * You should have received a copy of the GNU General Public License
8979+ * along with this program; if not, write to the Free Software
8980+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
8981+ */
8982+
8983+/*
8984+ * dynamically customizable operations (for regular files only)
8985+ */
8986+
8987+#ifndef __AUFS_DYNOP_H__
8988+#define __AUFS_DYNOP_H__
8989+
8990+#ifdef __KERNEL__
8991+
8992+#include <linux/fs.h>
8993+#include <linux/mm.h>
8994+#include <linux/rcupdate.h>
8995+#include <linux/aufs_type.h>
8996+#include "inode.h"
8997+
2cbb1c4b 8998+enum {AuDy_AOP, AuDyLast};
4a4d8108
AM
8999+
9000+struct au_dynop {
9001+ int dy_type;
9002+ union {
9003+ const void *dy_hop;
9004+ const struct address_space_operations *dy_haop;
4a4d8108
AM
9005+ };
9006+};
9007+
9008+struct au_dykey {
9009+ union {
9010+ struct list_head dk_list;
9011+ struct rcu_head dk_rcu;
9012+ };
9013+ struct au_dynop dk_op;
9014+
9015+ /*
9016+ * during I am in the branch local array, kref is gotten. when the
9017+ * branch is removed, kref is put.
9018+ */
9019+ struct kref dk_kref;
9020+};
9021+
9022+/* stop unioning since their sizes are very different from each other */
9023+struct au_dyaop {
9024+ struct au_dykey da_key;
9025+ struct address_space_operations da_op; /* not const */
9026+ int (*da_get_xip_mem)(struct address_space *, pgoff_t, int,
9027+ void **, unsigned long *);
9028+};
9029+
4a4d8108
AM
9030+/* ---------------------------------------------------------------------- */
9031+
9032+/* dynop.c */
9033+struct au_branch;
9034+void au_dy_put(struct au_dykey *key);
9035+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
9036+ struct inode *h_inode);
b752ccd1 9037+int au_dy_irefresh(struct inode *inode);
4a4d8108 9038+void au_dy_arefresh(int do_dio);
4a4d8108
AM
9039+
9040+void __init au_dy_init(void);
9041+void au_dy_fin(void);
9042+
4a4d8108
AM
9043+#endif /* __KERNEL__ */
9044+#endif /* __AUFS_DYNOP_H__ */
7f207e10
AM
9045diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
9046--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
53392da6 9047+++ linux/fs/aufs/export.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 9048@@ -0,0 +1,805 @@
4a4d8108 9049+/*
027c5e7a 9050+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
9051+ *
9052+ * This program, aufs is free software; you can redistribute it and/or modify
9053+ * it under the terms of the GNU General Public License as published by
9054+ * the Free Software Foundation; either version 2 of the License, or
9055+ * (at your option) any later version.
9056+ *
9057+ * This program is distributed in the hope that it will be useful,
9058+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9059+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9060+ * GNU General Public License for more details.
9061+ *
9062+ * You should have received a copy of the GNU General Public License
9063+ * along with this program; if not, write to the Free Software
9064+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
9065+ */
9066+
9067+/*
9068+ * export via nfs
9069+ */
9070+
9071+#include <linux/exportfs.h>
9072+#include <linux/file.h>
9073+#include <linux/mnt_namespace.h>
9074+#include <linux/namei.h>
9075+#include <linux/nsproxy.h>
9076+#include <linux/random.h>
9077+#include <linux/writeback.h>
9078+#include "aufs.h"
9079+
9080+union conv {
9081+#ifdef CONFIG_AUFS_INO_T_64
9082+ __u32 a[2];
9083+#else
9084+ __u32 a[1];
9085+#endif
9086+ ino_t ino;
9087+};
9088+
9089+static ino_t decode_ino(__u32 *a)
9090+{
9091+ union conv u;
9092+
9093+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
9094+ u.a[0] = a[0];
9095+#ifdef CONFIG_AUFS_INO_T_64
9096+ u.a[1] = a[1];
9097+#endif
9098+ return u.ino;
9099+}
9100+
9101+static void encode_ino(__u32 *a, ino_t ino)
9102+{
9103+ union conv u;
9104+
9105+ u.ino = ino;
9106+ a[0] = u.a[0];
9107+#ifdef CONFIG_AUFS_INO_T_64
9108+ a[1] = u.a[1];
9109+#endif
9110+}
9111+
9112+/* NFS file handle */
9113+enum {
9114+ Fh_br_id,
9115+ Fh_sigen,
9116+#ifdef CONFIG_AUFS_INO_T_64
9117+ /* support 64bit inode number */
9118+ Fh_ino1,
9119+ Fh_ino2,
9120+ Fh_dir_ino1,
9121+ Fh_dir_ino2,
9122+#else
9123+ Fh_ino1,
9124+ Fh_dir_ino1,
9125+#endif
9126+ Fh_igen,
9127+ Fh_h_type,
9128+ Fh_tail,
9129+
9130+ Fh_ino = Fh_ino1,
9131+ Fh_dir_ino = Fh_dir_ino1
9132+};
9133+
9134+static int au_test_anon(struct dentry *dentry)
9135+{
027c5e7a 9136+ /* note: read d_flags without d_lock */
4a4d8108
AM
9137+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
9138+}
9139+
9140+/* ---------------------------------------------------------------------- */
9141+/* inode generation external table */
9142+
b752ccd1 9143+void au_xigen_inc(struct inode *inode)
4a4d8108 9144+{
4a4d8108
AM
9145+ loff_t pos;
9146+ ssize_t sz;
9147+ __u32 igen;
9148+ struct super_block *sb;
9149+ struct au_sbinfo *sbinfo;
9150+
4a4d8108 9151+ sb = inode->i_sb;
b752ccd1 9152+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1facf9fc 9153+
b752ccd1 9154+ sbinfo = au_sbi(sb);
1facf9fc 9155+ pos = inode->i_ino;
9156+ pos *= sizeof(igen);
9157+ igen = inode->i_generation + 1;
1facf9fc 9158+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
9159+ sizeof(igen), &pos);
9160+ if (sz == sizeof(igen))
b752ccd1 9161+ return; /* success */
1facf9fc 9162+
b752ccd1 9163+ if (unlikely(sz >= 0))
1facf9fc 9164+ AuIOErr("xigen error (%zd)\n", sz);
1facf9fc 9165+}
9166+
9167+int au_xigen_new(struct inode *inode)
9168+{
9169+ int err;
9170+ loff_t pos;
9171+ ssize_t sz;
9172+ struct super_block *sb;
9173+ struct au_sbinfo *sbinfo;
9174+ struct file *file;
9175+
9176+ err = 0;
9177+ /* todo: dirty, at mount time */
9178+ if (inode->i_ino == AUFS_ROOT_INO)
9179+ goto out;
9180+ sb = inode->i_sb;
dece6358 9181+ SiMustAnyLock(sb);
1facf9fc 9182+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
9183+ goto out;
9184+
9185+ err = -EFBIG;
9186+ pos = inode->i_ino;
9187+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
9188+ AuIOErr1("too large i%lld\n", pos);
9189+ goto out;
9190+ }
9191+ pos *= sizeof(inode->i_generation);
9192+
9193+ err = 0;
9194+ sbinfo = au_sbi(sb);
9195+ file = sbinfo->si_xigen;
9196+ BUG_ON(!file);
9197+
9198+ if (i_size_read(file->f_dentry->d_inode)
9199+ < pos + sizeof(inode->i_generation)) {
9200+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
9201+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
9202+ sizeof(inode->i_generation), &pos);
9203+ } else
9204+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
9205+ sizeof(inode->i_generation), &pos);
9206+ if (sz == sizeof(inode->i_generation))
9207+ goto out; /* success */
9208+
9209+ err = sz;
9210+ if (unlikely(sz >= 0)) {
9211+ err = -EIO;
9212+ AuIOErr("xigen error (%zd)\n", sz);
9213+ }
9214+
4f0767ce 9215+out:
1facf9fc 9216+ return err;
9217+}
9218+
9219+int au_xigen_set(struct super_block *sb, struct file *base)
9220+{
9221+ int err;
9222+ struct au_sbinfo *sbinfo;
9223+ struct file *file;
9224+
dece6358
AM
9225+ SiMustWriteLock(sb);
9226+
1facf9fc 9227+ sbinfo = au_sbi(sb);
9228+ file = au_xino_create2(base, sbinfo->si_xigen);
9229+ err = PTR_ERR(file);
9230+ if (IS_ERR(file))
9231+ goto out;
9232+ err = 0;
9233+ if (sbinfo->si_xigen)
9234+ fput(sbinfo->si_xigen);
9235+ sbinfo->si_xigen = file;
9236+
4f0767ce 9237+out:
1facf9fc 9238+ return err;
9239+}
9240+
9241+void au_xigen_clr(struct super_block *sb)
9242+{
9243+ struct au_sbinfo *sbinfo;
9244+
dece6358
AM
9245+ SiMustWriteLock(sb);
9246+
1facf9fc 9247+ sbinfo = au_sbi(sb);
9248+ if (sbinfo->si_xigen) {
9249+ fput(sbinfo->si_xigen);
9250+ sbinfo->si_xigen = NULL;
9251+ }
9252+}
9253+
9254+/* ---------------------------------------------------------------------- */
9255+
9256+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
9257+ ino_t dir_ino)
9258+{
9259+ struct dentry *dentry, *d;
9260+ struct inode *inode;
9261+ unsigned int sigen;
9262+
9263+ dentry = NULL;
9264+ inode = ilookup(sb, ino);
9265+ if (!inode)
9266+ goto out;
9267+
9268+ dentry = ERR_PTR(-ESTALE);
9269+ sigen = au_sigen(sb);
9270+ if (unlikely(is_bad_inode(inode)
9271+ || IS_DEADDIR(inode)
9272+ || sigen != au_iigen(inode)))
9273+ goto out_iput;
9274+
9275+ dentry = NULL;
9276+ if (!dir_ino || S_ISDIR(inode->i_mode))
9277+ dentry = d_find_alias(inode);
9278+ else {
027c5e7a
AM
9279+ spin_lock(&inode->i_lock);
9280+ list_for_each_entry(d, &inode->i_dentry, d_alias) {
9281+ spin_lock(&d->d_lock);
1facf9fc 9282+ if (!au_test_anon(d)
9283+ && d->d_parent->d_inode->i_ino == dir_ino) {
027c5e7a
AM
9284+ dentry = dget_dlock(d);
9285+ spin_unlock(&d->d_lock);
1facf9fc 9286+ break;
9287+ }
027c5e7a
AM
9288+ spin_unlock(&d->d_lock);
9289+ }
9290+ spin_unlock(&inode->i_lock);
1facf9fc 9291+ }
027c5e7a 9292+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
2cbb1c4b 9293+ /* need to refresh */
1facf9fc 9294+ dput(dentry);
2cbb1c4b 9295+ dentry = NULL;
1facf9fc 9296+ }
9297+
4f0767ce 9298+out_iput:
1facf9fc 9299+ iput(inode);
4f0767ce 9300+out:
2cbb1c4b 9301+ AuTraceErrPtr(dentry);
1facf9fc 9302+ return dentry;
9303+}
9304+
9305+/* ---------------------------------------------------------------------- */
9306+
9307+/* todo: dirty? */
9308+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
4a4d8108
AM
9309+
9310+struct au_compare_mnt_args {
9311+ /* input */
9312+ struct super_block *sb;
9313+
9314+ /* output */
9315+ struct vfsmount *mnt;
9316+};
9317+
9318+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
9319+{
9320+ struct au_compare_mnt_args *a = arg;
9321+
9322+ if (mnt->mnt_sb != a->sb)
9323+ return 0;
9324+ a->mnt = mntget(mnt);
9325+ return 1;
9326+}
9327+
1facf9fc 9328+static struct vfsmount *au_mnt_get(struct super_block *sb)
9329+{
4a4d8108
AM
9330+ int err;
9331+ struct au_compare_mnt_args args = {
9332+ .sb = sb
9333+ };
1facf9fc 9334+ struct mnt_namespace *ns;
1facf9fc 9335+
0c5527e5 9336+ br_read_lock(vfsmount_lock);
1facf9fc 9337+ /* no get/put ?? */
9338+ AuDebugOn(!current->nsproxy);
9339+ ns = current->nsproxy->mnt_ns;
9340+ AuDebugOn(!ns);
4a4d8108 9341+ err = iterate_mounts(au_compare_mnt, &args, ns->root);
0c5527e5 9342+ br_read_unlock(vfsmount_lock);
4a4d8108
AM
9343+ AuDebugOn(!err);
9344+ AuDebugOn(!args.mnt);
9345+ return args.mnt;
1facf9fc 9346+}
9347+
9348+struct au_nfsd_si_lock {
4a4d8108 9349+ unsigned int sigen;
027c5e7a 9350+ aufs_bindex_t bindex, br_id;
1facf9fc 9351+ unsigned char force_lock;
9352+};
9353+
027c5e7a
AM
9354+static int si_nfsd_read_lock(struct super_block *sb,
9355+ struct au_nfsd_si_lock *nsi_lock)
1facf9fc 9356+{
027c5e7a 9357+ int err;
1facf9fc 9358+ aufs_bindex_t bindex;
9359+
9360+ si_read_lock(sb, AuLock_FLUSH);
9361+
9362+ /* branch id may be wrapped around */
027c5e7a 9363+ err = 0;
1facf9fc 9364+ bindex = au_br_index(sb, nsi_lock->br_id);
9365+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
9366+ goto out; /* success */
9367+
027c5e7a
AM
9368+ err = -ESTALE;
9369+ bindex = -1;
1facf9fc 9370+ if (!nsi_lock->force_lock)
9371+ si_read_unlock(sb);
1facf9fc 9372+
4f0767ce 9373+out:
027c5e7a
AM
9374+ nsi_lock->bindex = bindex;
9375+ return err;
1facf9fc 9376+}
9377+
9378+struct find_name_by_ino {
9379+ int called, found;
9380+ ino_t ino;
9381+ char *name;
9382+ int namelen;
9383+};
9384+
9385+static int
9386+find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset,
9387+ u64 ino, unsigned int d_type)
9388+{
9389+ struct find_name_by_ino *a = arg;
9390+
9391+ a->called++;
9392+ if (a->ino != ino)
9393+ return 0;
9394+
9395+ memcpy(a->name, name, namelen);
9396+ a->namelen = namelen;
9397+ a->found = 1;
9398+ return 1;
9399+}
9400+
9401+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
9402+ struct au_nfsd_si_lock *nsi_lock)
9403+{
9404+ struct dentry *dentry, *parent;
9405+ struct file *file;
9406+ struct inode *dir;
9407+ struct find_name_by_ino arg;
9408+ int err;
9409+
9410+ parent = path->dentry;
9411+ if (nsi_lock)
9412+ si_read_unlock(parent->d_sb);
4a4d8108 9413+ file = vfsub_dentry_open(path, au_dir_roflags);
1facf9fc 9414+ dentry = (void *)file;
9415+ if (IS_ERR(file))
9416+ goto out;
9417+
9418+ dentry = ERR_PTR(-ENOMEM);
4a4d8108 9419+ arg.name = __getname_gfp(GFP_NOFS);
1facf9fc 9420+ if (unlikely(!arg.name))
9421+ goto out_file;
9422+ arg.ino = ino;
9423+ arg.found = 0;
9424+ do {
9425+ arg.called = 0;
9426+ /* smp_mb(); */
9427+ err = vfsub_readdir(file, find_name_by_ino, &arg);
9428+ } while (!err && !arg.found && arg.called);
9429+ dentry = ERR_PTR(err);
9430+ if (unlikely(err))
9431+ goto out_name;
9432+ dentry = ERR_PTR(-ENOENT);
9433+ if (!arg.found)
9434+ goto out_name;
9435+
9436+ /* do not call au_lkup_one() */
9437+ dir = parent->d_inode;
9438+ mutex_lock(&dir->i_mutex);
9439+ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
9440+ mutex_unlock(&dir->i_mutex);
9441+ AuTraceErrPtr(dentry);
9442+ if (IS_ERR(dentry))
9443+ goto out_name;
9444+ AuDebugOn(au_test_anon(dentry));
9445+ if (unlikely(!dentry->d_inode)) {
9446+ dput(dentry);
9447+ dentry = ERR_PTR(-ENOENT);
9448+ }
9449+
4f0767ce 9450+out_name:
1facf9fc 9451+ __putname(arg.name);
4f0767ce 9452+out_file:
1facf9fc 9453+ fput(file);
4f0767ce 9454+out:
1facf9fc 9455+ if (unlikely(nsi_lock
9456+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
9457+ if (!IS_ERR(dentry)) {
9458+ dput(dentry);
9459+ dentry = ERR_PTR(-ESTALE);
9460+ }
9461+ AuTraceErrPtr(dentry);
9462+ return dentry;
9463+}
9464+
9465+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
9466+ ino_t dir_ino,
9467+ struct au_nfsd_si_lock *nsi_lock)
9468+{
9469+ struct dentry *dentry;
9470+ struct path path;
9471+
9472+ if (dir_ino != AUFS_ROOT_INO) {
9473+ path.dentry = decode_by_ino(sb, dir_ino, 0);
9474+ dentry = path.dentry;
9475+ if (!path.dentry || IS_ERR(path.dentry))
9476+ goto out;
9477+ AuDebugOn(au_test_anon(path.dentry));
9478+ } else
9479+ path.dentry = dget(sb->s_root);
9480+
9481+ path.mnt = au_mnt_get(sb);
9482+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
9483+ path_put(&path);
9484+
4f0767ce 9485+out:
1facf9fc 9486+ AuTraceErrPtr(dentry);
9487+ return dentry;
9488+}
9489+
9490+/* ---------------------------------------------------------------------- */
9491+
9492+static int h_acceptable(void *expv, struct dentry *dentry)
9493+{
9494+ return 1;
9495+}
9496+
9497+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
9498+ char *buf, int len, struct super_block *sb)
9499+{
9500+ char *p;
9501+ int n;
9502+ struct path path;
9503+
9504+ p = d_path(h_rootpath, buf, len);
9505+ if (IS_ERR(p))
9506+ goto out;
9507+ n = strlen(p);
9508+
9509+ path.mnt = h_rootpath->mnt;
9510+ path.dentry = h_parent;
9511+ p = d_path(&path, buf, len);
9512+ if (IS_ERR(p))
9513+ goto out;
9514+ if (n != 1)
9515+ p += n;
9516+
9517+ path.mnt = au_mnt_get(sb);
9518+ path.dentry = sb->s_root;
9519+ p = d_path(&path, buf, len - strlen(p));
9520+ mntput(path.mnt);
9521+ if (IS_ERR(p))
9522+ goto out;
9523+ if (n != 1)
9524+ p[strlen(p)] = '/';
9525+
4f0767ce 9526+out:
1facf9fc 9527+ AuTraceErrPtr(p);
9528+ return p;
9529+}
9530+
9531+static
027c5e7a
AM
9532+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
9533+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
1facf9fc 9534+{
9535+ struct dentry *dentry, *h_parent, *root;
9536+ struct super_block *h_sb;
9537+ char *pathname, *p;
9538+ struct vfsmount *h_mnt;
9539+ struct au_branch *br;
9540+ int err;
9541+ struct path path;
9542+
027c5e7a 9543+ br = au_sbr(sb, nsi_lock->bindex);
1facf9fc 9544+ h_mnt = br->br_mnt;
9545+ h_sb = h_mnt->mnt_sb;
9546+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
9547+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
9548+ fh_len - Fh_tail, fh[Fh_h_type],
9549+ h_acceptable, /*context*/NULL);
9550+ dentry = h_parent;
9551+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
9552+ AuWarn1("%s decode_fh failed, %ld\n",
9553+ au_sbtype(h_sb), PTR_ERR(h_parent));
9554+ goto out;
9555+ }
9556+ dentry = NULL;
9557+ if (unlikely(au_test_anon(h_parent))) {
9558+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
9559+ au_sbtype(h_sb));
9560+ goto out_h_parent;
9561+ }
9562+
9563+ dentry = ERR_PTR(-ENOMEM);
9564+ pathname = (void *)__get_free_page(GFP_NOFS);
9565+ if (unlikely(!pathname))
9566+ goto out_h_parent;
9567+
9568+ root = sb->s_root;
9569+ path.mnt = h_mnt;
9570+ di_read_lock_parent(root, !AuLock_IR);
027c5e7a 9571+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
1facf9fc 9572+ di_read_unlock(root, !AuLock_IR);
9573+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
9574+ dentry = (void *)p;
9575+ if (IS_ERR(p))
9576+ goto out_pathname;
9577+
9578+ si_read_unlock(sb);
9579+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
9580+ dentry = ERR_PTR(err);
9581+ if (unlikely(err))
9582+ goto out_relock;
9583+
9584+ dentry = ERR_PTR(-ENOENT);
9585+ AuDebugOn(au_test_anon(path.dentry));
9586+ if (unlikely(!path.dentry->d_inode))
9587+ goto out_path;
9588+
9589+ if (ino != path.dentry->d_inode->i_ino)
9590+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
9591+ else
9592+ dentry = dget(path.dentry);
9593+
4f0767ce 9594+out_path:
1facf9fc 9595+ path_put(&path);
4f0767ce 9596+out_relock:
1facf9fc 9597+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
9598+ if (!IS_ERR(dentry)) {
9599+ dput(dentry);
9600+ dentry = ERR_PTR(-ESTALE);
9601+ }
4f0767ce 9602+out_pathname:
1facf9fc 9603+ free_page((unsigned long)pathname);
4f0767ce 9604+out_h_parent:
1facf9fc 9605+ dput(h_parent);
4f0767ce 9606+out:
1facf9fc 9607+ AuTraceErrPtr(dentry);
9608+ return dentry;
9609+}
9610+
9611+/* ---------------------------------------------------------------------- */
9612+
9613+static struct dentry *
9614+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
9615+ int fh_type)
9616+{
9617+ struct dentry *dentry;
9618+ __u32 *fh = fid->raw;
027c5e7a 9619+ struct au_branch *br;
1facf9fc 9620+ ino_t ino, dir_ino;
1facf9fc 9621+ struct au_nfsd_si_lock nsi_lock = {
1facf9fc 9622+ .force_lock = 0
9623+ };
9624+
1facf9fc 9625+ dentry = ERR_PTR(-ESTALE);
4a4d8108
AM
9626+ /* it should never happen, but the file handle is unreliable */
9627+ if (unlikely(fh_len < Fh_tail))
9628+ goto out;
9629+ nsi_lock.sigen = fh[Fh_sigen];
9630+ nsi_lock.br_id = fh[Fh_br_id];
9631+
1facf9fc 9632+ /* branch id may be wrapped around */
027c5e7a
AM
9633+ br = NULL;
9634+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
1facf9fc 9635+ goto out;
9636+ nsi_lock.force_lock = 1;
9637+
9638+ /* is this inode still cached? */
9639+ ino = decode_ino(fh + Fh_ino);
4a4d8108
AM
9640+ /* it should never happen */
9641+ if (unlikely(ino == AUFS_ROOT_INO))
9642+ goto out;
9643+
1facf9fc 9644+ dir_ino = decode_ino(fh + Fh_dir_ino);
9645+ dentry = decode_by_ino(sb, ino, dir_ino);
9646+ if (IS_ERR(dentry))
9647+ goto out_unlock;
9648+ if (dentry)
9649+ goto accept;
9650+
9651+ /* is the parent dir cached? */
027c5e7a
AM
9652+ br = au_sbr(sb, nsi_lock.bindex);
9653+ atomic_inc(&br->br_count);
1facf9fc 9654+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
9655+ if (IS_ERR(dentry))
9656+ goto out_unlock;
9657+ if (dentry)
9658+ goto accept;
9659+
9660+ /* lookup path */
027c5e7a 9661+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
1facf9fc 9662+ if (IS_ERR(dentry))
9663+ goto out_unlock;
9664+ if (unlikely(!dentry))
9665+ /* todo?: make it ESTALE */
9666+ goto out_unlock;
9667+
4f0767ce 9668+accept:
027c5e7a
AM
9669+ if (!au_digen_test(dentry, au_sigen(sb))
9670+ && dentry->d_inode->i_generation == fh[Fh_igen])
1facf9fc 9671+ goto out_unlock; /* success */
9672+
9673+ dput(dentry);
9674+ dentry = ERR_PTR(-ESTALE);
4f0767ce 9675+out_unlock:
027c5e7a
AM
9676+ if (br)
9677+ atomic_dec(&br->br_count);
1facf9fc 9678+ si_read_unlock(sb);
4f0767ce 9679+out:
1facf9fc 9680+ AuTraceErrPtr(dentry);
9681+ return dentry;
9682+}
9683+
9684+#if 0 /* reserved for future use */
9685+/* support subtreecheck option */
9686+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
9687+ int fh_len, int fh_type)
9688+{
9689+ struct dentry *parent;
9690+ __u32 *fh = fid->raw;
9691+ ino_t dir_ino;
9692+
9693+ dir_ino = decode_ino(fh + Fh_dir_ino);
9694+ parent = decode_by_ino(sb, dir_ino, 0);
9695+ if (IS_ERR(parent))
9696+ goto out;
9697+ if (!parent)
9698+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
9699+ dir_ino, fh, fh_len);
9700+
4f0767ce 9701+out:
1facf9fc 9702+ AuTraceErrPtr(parent);
9703+ return parent;
9704+}
9705+#endif
9706+
9707+/* ---------------------------------------------------------------------- */
9708+
9709+static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
9710+ int connectable)
9711+{
9712+ int err;
9713+ aufs_bindex_t bindex, bend;
9714+ struct super_block *sb, *h_sb;
9715+ struct inode *inode;
9716+ struct dentry *parent, *h_parent;
9717+ struct au_branch *br;
9718+
9719+ AuDebugOn(au_test_anon(dentry));
9720+
9721+ parent = NULL;
9722+ err = -ENOSPC;
9723+ if (unlikely(*max_len <= Fh_tail)) {
9724+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
9725+ goto out;
9726+ }
9727+
9728+ err = FILEID_ROOT;
9729+ if (IS_ROOT(dentry)) {
9730+ AuDebugOn(dentry->d_inode->i_ino != AUFS_ROOT_INO);
9731+ goto out;
9732+ }
9733+
1facf9fc 9734+ h_parent = NULL;
027c5e7a
AM
9735+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_IR | AuLock_GEN);
9736+ if (unlikely(err))
9737+ goto out;
9738+
1facf9fc 9739+ inode = dentry->d_inode;
9740+ AuDebugOn(!inode);
027c5e7a 9741+ sb = dentry->d_sb;
1facf9fc 9742+#ifdef CONFIG_AUFS_DEBUG
9743+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
9744+ AuWarn1("NFS-exporting requires xino\n");
9745+#endif
027c5e7a
AM
9746+ err = -EIO;
9747+ parent = dget_parent(dentry);
9748+ di_read_lock_parent(parent, !AuLock_IR);
1facf9fc 9749+ bend = au_dbtaildir(parent);
9750+ for (bindex = au_dbstart(parent); bindex <= bend; bindex++) {
9751+ h_parent = au_h_dptr(parent, bindex);
9752+ if (h_parent) {
9753+ dget(h_parent);
9754+ break;
9755+ }
9756+ }
9757+ if (unlikely(!h_parent))
9758+ goto out_unlock;
9759+
9760+ err = -EPERM;
9761+ br = au_sbr(sb, bindex);
9762+ h_sb = br->br_mnt->mnt_sb;
9763+ if (unlikely(!h_sb->s_export_op)) {
9764+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
9765+ goto out_dput;
9766+ }
9767+
9768+ fh[Fh_br_id] = br->br_id;
9769+ fh[Fh_sigen] = au_sigen(sb);
9770+ encode_ino(fh + Fh_ino, inode->i_ino);
9771+ encode_ino(fh + Fh_dir_ino, parent->d_inode->i_ino);
9772+ fh[Fh_igen] = inode->i_generation;
9773+
9774+ *max_len -= Fh_tail;
9775+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
9776+ max_len,
9777+ /*connectable or subtreecheck*/0);
9778+ err = fh[Fh_h_type];
9779+ *max_len += Fh_tail;
9780+ /* todo: macros? */
9781+ if (err != 255)
9782+ err = 99;
9783+ else
9784+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
9785+
4f0767ce 9786+out_dput:
1facf9fc 9787+ dput(h_parent);
4f0767ce 9788+out_unlock:
1facf9fc 9789+ di_read_unlock(parent, !AuLock_IR);
9790+ dput(parent);
9791+ aufs_read_unlock(dentry, AuLock_IR);
4f0767ce 9792+out:
1facf9fc 9793+ if (unlikely(err < 0))
9794+ err = 255;
9795+ return err;
9796+}
9797+
9798+/* ---------------------------------------------------------------------- */
9799+
4a4d8108
AM
9800+static int aufs_commit_metadata(struct inode *inode)
9801+{
9802+ int err;
9803+ aufs_bindex_t bindex;
9804+ struct super_block *sb;
9805+ struct inode *h_inode;
9806+ int (*f)(struct inode *inode);
9807+
9808+ sb = inode->i_sb;
e49829fe 9809+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
9810+ ii_write_lock_child(inode);
9811+ bindex = au_ibstart(inode);
9812+ AuDebugOn(bindex < 0);
9813+ h_inode = au_h_iptr(inode, bindex);
9814+
9815+ f = h_inode->i_sb->s_export_op->commit_metadata;
9816+ if (f)
9817+ err = f(h_inode);
9818+ else {
9819+ struct writeback_control wbc = {
9820+ .sync_mode = WB_SYNC_ALL,
9821+ .nr_to_write = 0 /* metadata only */
9822+ };
9823+
9824+ err = sync_inode(h_inode, &wbc);
9825+ }
9826+
9827+ au_cpup_attr_timesizes(inode);
9828+ ii_write_unlock(inode);
9829+ si_read_unlock(sb);
9830+ return err;
9831+}
9832+
9833+/* ---------------------------------------------------------------------- */
9834+
1facf9fc 9835+static struct export_operations aufs_export_op = {
4a4d8108 9836+ .fh_to_dentry = aufs_fh_to_dentry,
1facf9fc 9837+ /* .fh_to_parent = aufs_fh_to_parent, */
4a4d8108
AM
9838+ .encode_fh = aufs_encode_fh,
9839+ .commit_metadata = aufs_commit_metadata
1facf9fc 9840+};
9841+
9842+void au_export_init(struct super_block *sb)
9843+{
9844+ struct au_sbinfo *sbinfo;
9845+ __u32 u;
9846+
9847+ sb->s_export_op = &aufs_export_op;
9848+ sbinfo = au_sbi(sb);
9849+ sbinfo->si_xigen = NULL;
9850+ get_random_bytes(&u, sizeof(u));
9851+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
9852+ atomic_set(&sbinfo->si_xigen_next, u);
9853+}
7f207e10
AM
9854diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
9855--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
53392da6 9856+++ linux/fs/aufs/file.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 9857@@ -0,0 +1,676 @@
1facf9fc 9858+/*
027c5e7a 9859+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 9860+ *
9861+ * This program, aufs is free software; you can redistribute it and/or modify
9862+ * it under the terms of the GNU General Public License as published by
9863+ * the Free Software Foundation; either version 2 of the License, or
9864+ * (at your option) any later version.
dece6358
AM
9865+ *
9866+ * This program is distributed in the hope that it will be useful,
9867+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9868+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9869+ * GNU General Public License for more details.
9870+ *
9871+ * You should have received a copy of the GNU General Public License
9872+ * along with this program; if not, write to the Free Software
9873+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 9874+ */
9875+
9876+/*
4a4d8108 9877+ * handling file/dir, and address_space operation
1facf9fc 9878+ */
9879+
dece6358 9880+#include <linux/file.h>
4a4d8108
AM
9881+#include <linux/fsnotify.h>
9882+#include <linux/namei.h>
9883+#include <linux/pagemap.h>
1facf9fc 9884+#include "aufs.h"
9885+
4a4d8108
AM
9886+/* drop flags for writing */
9887+unsigned int au_file_roflags(unsigned int flags)
9888+{
9889+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
9890+ flags |= O_RDONLY | O_NOATIME;
9891+ return flags;
9892+}
9893+
9894+/* common functions to regular file and dir */
9895+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
9896+ struct file *file)
1facf9fc 9897+{
1308ab2a 9898+ struct file *h_file;
4a4d8108
AM
9899+ struct dentry *h_dentry;
9900+ struct inode *h_inode;
9901+ struct super_block *sb;
9902+ struct au_branch *br;
9903+ struct path h_path;
9904+ int err, exec_flag;
1facf9fc 9905+
4a4d8108
AM
9906+ /* a race condition can happen between open and unlink/rmdir */
9907+ h_file = ERR_PTR(-ENOENT);
9908+ h_dentry = au_h_dptr(dentry, bindex);
b752ccd1 9909+ if (au_test_nfsd() && !h_dentry)
4a4d8108
AM
9910+ goto out;
9911+ h_inode = h_dentry->d_inode;
b752ccd1 9912+ if (au_test_nfsd() && !h_inode)
4a4d8108 9913+ goto out;
027c5e7a
AM
9914+ spin_lock(&h_dentry->d_lock);
9915+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
9916+ || !h_inode
9917+ /* || !dentry->d_inode->i_nlink */
9918+ ;
9919+ spin_unlock(&h_dentry->d_lock);
9920+ if (unlikely(err))
4a4d8108 9921+ goto out;
1facf9fc 9922+
4a4d8108
AM
9923+ sb = dentry->d_sb;
9924+ br = au_sbr(sb, bindex);
9925+ h_file = ERR_PTR(-EACCES);
2cbb1c4b 9926+ exec_flag = flags & __FMODE_EXEC;
4a4d8108 9927+ if (exec_flag && (br->br_mnt->mnt_flags & MNT_NOEXEC))
027c5e7a 9928+ goto out;
1facf9fc 9929+
4a4d8108
AM
9930+ /* drop flags for writing */
9931+ if (au_test_ro(sb, bindex, dentry->d_inode))
9932+ flags = au_file_roflags(flags);
9933+ flags &= ~O_CREAT;
9934+ atomic_inc(&br->br_count);
9935+ h_path.dentry = h_dentry;
9936+ h_path.mnt = br->br_mnt;
9937+ if (!au_special_file(h_inode->i_mode))
9938+ h_file = vfsub_dentry_open(&h_path, flags);
9939+ else {
9940+ /* this block depends upon the configuration */
9941+ di_read_unlock(dentry, AuLock_IR);
9942+ fi_write_unlock(file);
9943+ si_read_unlock(sb);
9944+ h_file = vfsub_dentry_open(&h_path, flags);
9945+ si_noflush_read_lock(sb);
9946+ fi_write_lock(file);
9947+ di_read_lock_child(dentry, AuLock_IR);
dece6358 9948+ }
4a4d8108
AM
9949+ if (IS_ERR(h_file))
9950+ goto out_br;
dece6358 9951+
4a4d8108
AM
9952+ if (exec_flag) {
9953+ err = deny_write_access(h_file);
9954+ if (unlikely(err)) {
9955+ fput(h_file);
9956+ h_file = ERR_PTR(err);
9957+ goto out_br;
9958+ }
9959+ }
953406b4 9960+ fsnotify_open(h_file);
4a4d8108 9961+ goto out; /* success */
1facf9fc 9962+
4f0767ce 9963+out_br:
4a4d8108 9964+ atomic_dec(&br->br_count);
4f0767ce 9965+out:
4a4d8108
AM
9966+ return h_file;
9967+}
1308ab2a 9968+
4a4d8108
AM
9969+int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
9970+ struct au_fidir *fidir)
1facf9fc 9971+{
dece6358 9972+ int err;
1facf9fc 9973+ struct dentry *dentry;
1308ab2a 9974+
4a4d8108
AM
9975+ err = au_finfo_init(file, fidir);
9976+ if (unlikely(err))
9977+ goto out;
1facf9fc 9978+
9979+ dentry = file->f_dentry;
4a4d8108
AM
9980+ di_read_lock_child(dentry, AuLock_IR);
9981+ err = open(file, vfsub_file_flags(file));
9982+ di_read_unlock(dentry, AuLock_IR);
1facf9fc 9983+
4a4d8108
AM
9984+ fi_write_unlock(file);
9985+ if (unlikely(err)) {
9986+ au_fi(file)->fi_hdir = NULL;
9987+ au_finfo_fin(file);
1308ab2a 9988+ }
4a4d8108 9989+
4f0767ce 9990+out:
1308ab2a 9991+ return err;
9992+}
dece6358 9993+
4a4d8108 9994+int au_reopen_nondir(struct file *file)
1308ab2a 9995+{
4a4d8108
AM
9996+ int err;
9997+ aufs_bindex_t bstart;
9998+ struct dentry *dentry;
9999+ struct file *h_file, *h_file_tmp;
1308ab2a 10000+
4a4d8108
AM
10001+ dentry = file->f_dentry;
10002+ AuDebugOn(au_special_file(dentry->d_inode->i_mode));
10003+ bstart = au_dbstart(dentry);
10004+ h_file_tmp = NULL;
10005+ if (au_fbstart(file) == bstart) {
10006+ h_file = au_hf_top(file);
10007+ if (file->f_mode == h_file->f_mode)
10008+ return 0; /* success */
10009+ h_file_tmp = h_file;
10010+ get_file(h_file_tmp);
10011+ au_set_h_fptr(file, bstart, NULL);
10012+ }
10013+ AuDebugOn(au_fi(file)->fi_hdir);
10014+ AuDebugOn(au_fbstart(file) < bstart);
1308ab2a 10015+
4a4d8108
AM
10016+ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
10017+ file);
10018+ err = PTR_ERR(h_file);
10019+ if (IS_ERR(h_file))
10020+ goto out; /* todo: close all? */
10021+
10022+ err = 0;
10023+ au_set_fbstart(file, bstart);
10024+ au_set_h_fptr(file, bstart, h_file);
10025+ au_update_figen(file);
10026+ /* todo: necessary? */
10027+ /* file->f_ra = h_file->f_ra; */
10028+
4f0767ce 10029+out:
4a4d8108
AM
10030+ if (h_file_tmp)
10031+ fput(h_file_tmp);
10032+ return err;
1facf9fc 10033+}
10034+
1308ab2a 10035+/* ---------------------------------------------------------------------- */
10036+
4a4d8108
AM
10037+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
10038+ struct dentry *hi_wh)
1facf9fc 10039+{
4a4d8108
AM
10040+ int err;
10041+ aufs_bindex_t bstart;
10042+ struct au_dinfo *dinfo;
10043+ struct dentry *h_dentry;
10044+ struct au_hdentry *hdp;
1facf9fc 10045+
4a4d8108
AM
10046+ dinfo = au_di(file->f_dentry);
10047+ AuRwMustWriteLock(&dinfo->di_rwsem);
dece6358 10048+
4a4d8108
AM
10049+ bstart = dinfo->di_bstart;
10050+ dinfo->di_bstart = btgt;
10051+ hdp = dinfo->di_hdentry;
10052+ h_dentry = hdp[0 + btgt].hd_dentry;
10053+ hdp[0 + btgt].hd_dentry = hi_wh;
10054+ err = au_reopen_nondir(file);
10055+ hdp[0 + btgt].hd_dentry = h_dentry;
10056+ dinfo->di_bstart = bstart;
1facf9fc 10057+
1facf9fc 10058+ return err;
10059+}
10060+
4a4d8108
AM
10061+static int au_ready_to_write_wh(struct file *file, loff_t len,
10062+ aufs_bindex_t bcpup)
1facf9fc 10063+{
4a4d8108 10064+ int err;
027c5e7a
AM
10065+ struct inode *inode, *h_inode;
10066+ struct dentry *dentry, *h_dentry, *hi_wh;
1facf9fc 10067+
dece6358 10068+ dentry = file->f_dentry;
4a4d8108 10069+ au_update_dbstart(dentry);
dece6358 10070+ inode = dentry->d_inode;
027c5e7a
AM
10071+ h_inode = NULL;
10072+ if (au_dbstart(dentry) <= bcpup && au_dbend(dentry) >= bcpup) {
10073+ h_dentry = au_h_dptr(dentry, bcpup);
10074+ if (h_dentry)
10075+ h_inode = h_dentry->d_inode;
10076+ }
4a4d8108 10077+ hi_wh = au_hi_wh(inode, bcpup);
027c5e7a 10078+ if (!hi_wh && !h_inode)
4a4d8108
AM
10079+ err = au_sio_cpup_wh(dentry, bcpup, len, file);
10080+ else
10081+ /* already copied-up after unlink */
10082+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 10083+
4a4d8108
AM
10084+ if (!err
10085+ && inode->i_nlink > 1
10086+ && au_opt_test(au_mntflags(dentry->d_sb), PLINK))
10087+ au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup));
1308ab2a 10088+
dece6358 10089+ return err;
1facf9fc 10090+}
10091+
4a4d8108
AM
10092+/*
10093+ * prepare the @file for writing.
10094+ */
10095+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
1facf9fc 10096+{
4a4d8108 10097+ int err;
027c5e7a 10098+ aufs_bindex_t bstart, bcpup, dbstart;
4a4d8108
AM
10099+ struct dentry *dentry, *parent, *h_dentry;
10100+ struct inode *h_inode, *inode;
1facf9fc 10101+ struct super_block *sb;
4a4d8108 10102+ struct file *h_file;
1facf9fc 10103+
10104+ dentry = file->f_dentry;
1facf9fc 10105+ sb = dentry->d_sb;
4a4d8108
AM
10106+ inode = dentry->d_inode;
10107+ AuDebugOn(au_special_file(inode->i_mode));
10108+ bstart = au_fbstart(file);
10109+ err = au_test_ro(sb, bstart, inode);
10110+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
10111+ err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0);
1facf9fc 10112+ goto out;
4a4d8108 10113+ }
1facf9fc 10114+
027c5e7a 10115+ /* need to cpup or reopen */
4a4d8108
AM
10116+ parent = dget_parent(dentry);
10117+ di_write_lock_parent(parent);
10118+ err = AuWbrCopyup(au_sbi(sb), dentry);
10119+ bcpup = err;
10120+ if (unlikely(err < 0))
10121+ goto out_dgrade;
10122+ err = 0;
10123+
027c5e7a 10124+ if (!d_unhashed(dentry) && !au_h_dptr(parent, bcpup)) {
4a4d8108 10125+ err = au_cpup_dirs(dentry, bcpup);
1facf9fc 10126+ if (unlikely(err))
4a4d8108
AM
10127+ goto out_dgrade;
10128+ }
10129+
10130+ err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE,
10131+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
10132+ if (unlikely(err))
10133+ goto out_dgrade;
10134+
10135+ h_dentry = au_hf_top(file)->f_dentry;
10136+ h_inode = h_dentry->d_inode;
027c5e7a
AM
10137+ dbstart = au_dbstart(dentry);
10138+ if (dbstart <= bcpup) {
10139+ h_dentry = au_h_dptr(dentry, bcpup);
10140+ AuDebugOn(!h_dentry);
10141+ h_inode = h_dentry->d_inode;
10142+ AuDebugOn(!h_inode);
10143+ bstart = bcpup;
10144+ }
10145+
10146+ if (dbstart <= bcpup /* just reopen */
10147+ || !d_unhashed(dentry) /* copyup and reopen */
10148+ ) {
10149+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
10150+ h_file = au_h_open_pre(dentry, bstart);
10151+ if (IS_ERR(h_file)) {
10152+ err = PTR_ERR(h_file);
10153+ h_file = NULL;
10154+ } else {
10155+ di_downgrade_lock(parent, AuLock_IR);
10156+ if (dbstart > bcpup)
10157+ err = au_sio_cpup_simple(dentry, bcpup, len,
10158+ AuCpup_DTIME);
10159+ if (!err)
10160+ err = au_reopen_nondir(file);
10161+ }
10162+ mutex_unlock(&h_inode->i_mutex);
10163+ au_h_open_post(dentry, bstart, h_file);
10164+ } else { /* copyup as wh and reopen */
10165+ /*
10166+ * since writable hfsplus branch is not supported,
10167+ * h_open_pre/post() are unnecessary.
10168+ */
10169+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
4a4d8108
AM
10170+ err = au_ready_to_write_wh(file, len, bcpup);
10171+ di_downgrade_lock(parent, AuLock_IR);
027c5e7a 10172+ mutex_unlock(&h_inode->i_mutex);
4a4d8108 10173+ }
4a4d8108
AM
10174+
10175+ if (!err) {
10176+ au_pin_set_parent_lflag(pin, /*lflag*/0);
10177+ goto out_dput; /* success */
10178+ }
10179+ au_unpin(pin);
10180+ goto out_unlock;
1facf9fc 10181+
4f0767ce 10182+out_dgrade:
4a4d8108 10183+ di_downgrade_lock(parent, AuLock_IR);
4f0767ce 10184+out_unlock:
4a4d8108 10185+ di_read_unlock(parent, AuLock_IR);
4f0767ce 10186+out_dput:
4a4d8108 10187+ dput(parent);
4f0767ce 10188+out:
1facf9fc 10189+ return err;
10190+}
10191+
4a4d8108
AM
10192+/* ---------------------------------------------------------------------- */
10193+
10194+int au_do_flush(struct file *file, fl_owner_t id,
10195+ int (*flush)(struct file *file, fl_owner_t id))
1facf9fc 10196+{
4a4d8108 10197+ int err;
1308ab2a 10198+ struct dentry *dentry;
1facf9fc 10199+ struct super_block *sb;
4a4d8108 10200+ struct inode *inode;
1facf9fc 10201+
1facf9fc 10202+ dentry = file->f_dentry;
10203+ sb = dentry->d_sb;
dece6358 10204+ inode = dentry->d_inode;
4a4d8108
AM
10205+ si_noflush_read_lock(sb);
10206+ fi_read_lock(file);
b752ccd1 10207+ ii_read_lock_child(inode);
1facf9fc 10208+
4a4d8108
AM
10209+ err = flush(file, id);
10210+ au_cpup_attr_timesizes(inode);
1facf9fc 10211+
b752ccd1 10212+ ii_read_unlock(inode);
4a4d8108 10213+ fi_read_unlock(file);
1308ab2a 10214+ si_read_unlock(sb);
dece6358 10215+ return err;
1facf9fc 10216+}
10217+
4a4d8108
AM
10218+/* ---------------------------------------------------------------------- */
10219+
10220+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 10221+{
4a4d8108
AM
10222+ int err;
10223+ aufs_bindex_t bstart;
10224+ struct au_pin pin;
10225+ struct au_finfo *finfo;
10226+ struct dentry *dentry, *parent, *hi_wh;
10227+ struct inode *inode;
1facf9fc 10228+ struct super_block *sb;
10229+
4a4d8108
AM
10230+ FiMustWriteLock(file);
10231+
10232+ err = 0;
10233+ finfo = au_fi(file);
1308ab2a 10234+ dentry = file->f_dentry;
10235+ sb = dentry->d_sb;
4a4d8108
AM
10236+ inode = dentry->d_inode;
10237+ bstart = au_ibstart(inode);
027c5e7a 10238+ if (bstart == finfo->fi_btop || IS_ROOT(dentry))
1308ab2a 10239+ goto out;
dece6358 10240+
4a4d8108
AM
10241+ parent = dget_parent(dentry);
10242+ if (au_test_ro(sb, bstart, inode)) {
10243+ di_read_lock_parent(parent, !AuLock_IR);
10244+ err = AuWbrCopyup(au_sbi(sb), dentry);
10245+ bstart = err;
10246+ di_read_unlock(parent, !AuLock_IR);
10247+ if (unlikely(err < 0))
10248+ goto out_parent;
10249+ err = 0;
1facf9fc 10250+ }
1facf9fc 10251+
4a4d8108
AM
10252+ di_read_lock_parent(parent, AuLock_IR);
10253+ hi_wh = au_hi_wh(inode, bstart);
7f207e10
AM
10254+ if (!S_ISDIR(inode->i_mode)
10255+ && au_opt_test(au_mntflags(sb), PLINK)
4a4d8108
AM
10256+ && au_plink_test(inode)
10257+ && !d_unhashed(dentry)) {
10258+ err = au_test_and_cpup_dirs(dentry, bstart);
10259+ if (unlikely(err))
10260+ goto out_unlock;
10261+
10262+ /* always superio. */
10263+ err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE,
10264+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
10265+ if (!err)
10266+ err = au_sio_cpup_simple(dentry, bstart, -1,
10267+ AuCpup_DTIME);
10268+ au_unpin(&pin);
10269+ } else if (hi_wh) {
10270+ /* already copied-up after unlink */
10271+ err = au_reopen_wh(file, bstart, hi_wh);
10272+ *need_reopen = 0;
10273+ }
1facf9fc 10274+
4f0767ce 10275+out_unlock:
4a4d8108 10276+ di_read_unlock(parent, AuLock_IR);
4f0767ce 10277+out_parent:
4a4d8108 10278+ dput(parent);
4f0767ce 10279+out:
1308ab2a 10280+ return err;
dece6358 10281+}
1facf9fc 10282+
4a4d8108 10283+static void au_do_refresh_dir(struct file *file)
dece6358 10284+{
4a4d8108
AM
10285+ aufs_bindex_t bindex, bend, new_bindex, brid;
10286+ struct au_hfile *p, tmp, *q;
10287+ struct au_finfo *finfo;
1308ab2a 10288+ struct super_block *sb;
4a4d8108 10289+ struct au_fidir *fidir;
1facf9fc 10290+
4a4d8108 10291+ FiMustWriteLock(file);
1facf9fc 10292+
4a4d8108
AM
10293+ sb = file->f_dentry->d_sb;
10294+ finfo = au_fi(file);
10295+ fidir = finfo->fi_hdir;
10296+ AuDebugOn(!fidir);
10297+ p = fidir->fd_hfile + finfo->fi_btop;
10298+ brid = p->hf_br->br_id;
10299+ bend = fidir->fd_bbot;
10300+ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
10301+ if (!p->hf_file)
10302+ continue;
1308ab2a 10303+
4a4d8108
AM
10304+ new_bindex = au_br_index(sb, p->hf_br->br_id);
10305+ if (new_bindex == bindex)
10306+ continue;
10307+ if (new_bindex < 0) {
10308+ au_set_h_fptr(file, bindex, NULL);
10309+ continue;
10310+ }
1308ab2a 10311+
4a4d8108
AM
10312+ /* swap two lower inode, and loop again */
10313+ q = fidir->fd_hfile + new_bindex;
10314+ tmp = *q;
10315+ *q = *p;
10316+ *p = tmp;
10317+ if (tmp.hf_file) {
10318+ bindex--;
10319+ p--;
10320+ }
10321+ }
1308ab2a 10322+
4a4d8108 10323+ p = fidir->fd_hfile;
027c5e7a 10324+ if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) {
4a4d8108
AM
10325+ bend = au_sbend(sb);
10326+ for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
10327+ finfo->fi_btop++, p++)
10328+ if (p->hf_file) {
10329+ if (p->hf_file->f_dentry
10330+ && p->hf_file->f_dentry->d_inode)
10331+ break;
10332+ else
10333+ au_hfput(p, file);
10334+ }
10335+ } else {
10336+ bend = au_br_index(sb, brid);
10337+ for (finfo->fi_btop = 0; finfo->fi_btop < bend;
10338+ finfo->fi_btop++, p++)
10339+ if (p->hf_file)
10340+ au_hfput(p, file);
10341+ bend = au_sbend(sb);
10342+ }
1308ab2a 10343+
4a4d8108
AM
10344+ p = fidir->fd_hfile + bend;
10345+ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
10346+ fidir->fd_bbot--, p--)
10347+ if (p->hf_file) {
10348+ if (p->hf_file->f_dentry
10349+ && p->hf_file->f_dentry->d_inode)
10350+ break;
10351+ else
10352+ au_hfput(p, file);
10353+ }
10354+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
1308ab2a 10355+}
10356+
4a4d8108
AM
10357+/*
10358+ * after branch manipulating, refresh the file.
10359+ */
10360+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 10361+{
4a4d8108
AM
10362+ int err, need_reopen;
10363+ aufs_bindex_t bend, bindex;
10364+ struct dentry *dentry;
1308ab2a 10365+ struct au_finfo *finfo;
4a4d8108 10366+ struct au_hfile *hfile;
1facf9fc 10367+
4a4d8108 10368+ dentry = file->f_dentry;
1308ab2a 10369+ finfo = au_fi(file);
4a4d8108
AM
10370+ if (!finfo->fi_hdir) {
10371+ hfile = &finfo->fi_htop;
10372+ AuDebugOn(!hfile->hf_file);
10373+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
10374+ AuDebugOn(bindex < 0);
10375+ if (bindex != finfo->fi_btop)
10376+ au_set_fbstart(file, bindex);
10377+ } else {
10378+ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
10379+ if (unlikely(err))
10380+ goto out;
10381+ au_do_refresh_dir(file);
10382+ }
1facf9fc 10383+
4a4d8108
AM
10384+ err = 0;
10385+ need_reopen = 1;
10386+ if (!au_test_mmapped(file))
10387+ err = au_file_refresh_by_inode(file, &need_reopen);
027c5e7a 10388+ if (!err && need_reopen && !d_unlinked(dentry))
4a4d8108
AM
10389+ err = reopen(file);
10390+ if (!err) {
10391+ au_update_figen(file);
10392+ goto out; /* success */
10393+ }
10394+
10395+ /* error, close all lower files */
10396+ if (finfo->fi_hdir) {
10397+ bend = au_fbend_dir(file);
10398+ for (bindex = au_fbstart(file); bindex <= bend; bindex++)
10399+ au_set_h_fptr(file, bindex, NULL);
10400+ }
1facf9fc 10401+
4f0767ce 10402+out:
1facf9fc 10403+ return err;
10404+}
10405+
4a4d8108
AM
10406+/* common function to regular file and dir */
10407+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
10408+ int wlock)
dece6358 10409+{
1308ab2a 10410+ int err;
4a4d8108
AM
10411+ unsigned int sigen, figen;
10412+ aufs_bindex_t bstart;
10413+ unsigned char pseudo_link;
10414+ struct dentry *dentry;
10415+ struct inode *inode;
1facf9fc 10416+
4a4d8108
AM
10417+ err = 0;
10418+ dentry = file->f_dentry;
10419+ inode = dentry->d_inode;
10420+ AuDebugOn(au_special_file(inode->i_mode));
10421+ sigen = au_sigen(dentry->d_sb);
10422+ fi_write_lock(file);
10423+ figen = au_figen(file);
10424+ di_write_lock_child(dentry);
10425+ bstart = au_dbstart(dentry);
10426+ pseudo_link = (bstart != au_ibstart(inode));
10427+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
10428+ if (!wlock) {
10429+ di_downgrade_lock(dentry, AuLock_IR);
10430+ fi_downgrade_lock(file);
10431+ }
10432+ goto out; /* success */
10433+ }
dece6358 10434+
4a4d8108 10435+ AuDbg("sigen %d, figen %d\n", sigen, figen);
027c5e7a 10436+ if (au_digen_test(dentry, sigen)) {
4a4d8108 10437+ err = au_reval_dpath(dentry, sigen);
027c5e7a 10438+ AuDebugOn(!err && au_digen_test(dentry, sigen));
4a4d8108 10439+ }
dece6358 10440+
027c5e7a
AM
10441+ if (!err)
10442+ err = refresh_file(file, reopen);
4a4d8108
AM
10443+ if (!err) {
10444+ if (!wlock) {
10445+ di_downgrade_lock(dentry, AuLock_IR);
10446+ fi_downgrade_lock(file);
10447+ }
10448+ } else {
10449+ di_write_unlock(dentry);
10450+ fi_write_unlock(file);
10451+ }
1facf9fc 10452+
4f0767ce 10453+out:
1308ab2a 10454+ return err;
10455+}
1facf9fc 10456+
4a4d8108
AM
10457+/* ---------------------------------------------------------------------- */
10458+
10459+/* cf. aufs_nopage() */
10460+/* for madvise(2) */
10461+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1308ab2a 10462+{
4a4d8108
AM
10463+ unlock_page(page);
10464+ return 0;
10465+}
1facf9fc 10466+
4a4d8108
AM
10467+/* it will never be called, but necessary to support O_DIRECT */
10468+static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
10469+ const struct iovec *iov, loff_t offset,
10470+ unsigned long nr_segs)
10471+{ BUG(); return 0; }
1facf9fc 10472+
4a4d8108
AM
10473+/*
10474+ * it will never be called, but madvise and fadvise behaves differently
10475+ * when get_xip_mem is defined
10476+ */
10477+static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff,
10478+ int create, void **kmem, unsigned long *pfn)
10479+{ BUG(); return 0; }
1facf9fc 10480+
4a4d8108
AM
10481+/* they will never be called. */
10482+#ifdef CONFIG_AUFS_DEBUG
10483+static int aufs_write_begin(struct file *file, struct address_space *mapping,
10484+ loff_t pos, unsigned len, unsigned flags,
10485+ struct page **pagep, void **fsdata)
10486+{ AuUnsupport(); return 0; }
10487+static int aufs_write_end(struct file *file, struct address_space *mapping,
10488+ loff_t pos, unsigned len, unsigned copied,
10489+ struct page *page, void *fsdata)
10490+{ AuUnsupport(); return 0; }
10491+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
10492+{ AuUnsupport(); return 0; }
1308ab2a 10493+
4a4d8108
AM
10494+static int aufs_set_page_dirty(struct page *page)
10495+{ AuUnsupport(); return 0; }
10496+static void aufs_invalidatepage(struct page *page, unsigned long offset)
10497+{ AuUnsupport(); }
10498+static int aufs_releasepage(struct page *page, gfp_t gfp)
10499+{ AuUnsupport(); return 0; }
10500+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
10501+ struct page *page)
10502+{ AuUnsupport(); return 0; }
10503+static int aufs_launder_page(struct page *page)
10504+{ AuUnsupport(); return 0; }
10505+static int aufs_is_partially_uptodate(struct page *page,
10506+ read_descriptor_t *desc,
10507+ unsigned long from)
10508+{ AuUnsupport(); return 0; }
10509+static int aufs_error_remove_page(struct address_space *mapping,
10510+ struct page *page)
10511+{ AuUnsupport(); return 0; }
10512+#endif /* CONFIG_AUFS_DEBUG */
10513+
10514+const struct address_space_operations aufs_aop = {
10515+ .readpage = aufs_readpage,
10516+ .direct_IO = aufs_direct_IO,
10517+ .get_xip_mem = aufs_get_xip_mem,
10518+#ifdef CONFIG_AUFS_DEBUG
10519+ .writepage = aufs_writepage,
4a4d8108
AM
10520+ /* no writepages, because of writepage */
10521+ .set_page_dirty = aufs_set_page_dirty,
10522+ /* no readpages, because of readpage */
10523+ .write_begin = aufs_write_begin,
10524+ .write_end = aufs_write_end,
10525+ /* no bmap, no block device */
10526+ .invalidatepage = aufs_invalidatepage,
10527+ .releasepage = aufs_releasepage,
10528+ .migratepage = aufs_migratepage,
10529+ .launder_page = aufs_launder_page,
10530+ .is_partially_uptodate = aufs_is_partially_uptodate,
10531+ .error_remove_page = aufs_error_remove_page
10532+#endif /* CONFIG_AUFS_DEBUG */
dece6358 10533+};
7f207e10
AM
10534diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
10535--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
10536+++ linux/fs/aufs/file.h 2011-08-24 13:30:24.731313534 +0200
10537@@ -0,0 +1,299 @@
4a4d8108 10538+/*
027c5e7a 10539+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
10540+ *
10541+ * This program, aufs is free software; you can redistribute it and/or modify
10542+ * it under the terms of the GNU General Public License as published by
10543+ * the Free Software Foundation; either version 2 of the License, or
10544+ * (at your option) any later version.
10545+ *
10546+ * This program is distributed in the hope that it will be useful,
10547+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10548+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10549+ * GNU General Public License for more details.
10550+ *
10551+ * You should have received a copy of the GNU General Public License
10552+ * along with this program; if not, write to the Free Software
10553+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
10554+ */
1facf9fc 10555+
4a4d8108
AM
10556+/*
10557+ * file operations
10558+ */
1facf9fc 10559+
4a4d8108
AM
10560+#ifndef __AUFS_FILE_H__
10561+#define __AUFS_FILE_H__
1facf9fc 10562+
4a4d8108 10563+#ifdef __KERNEL__
1facf9fc 10564+
2cbb1c4b 10565+#include <linux/file.h>
4a4d8108
AM
10566+#include <linux/fs.h>
10567+#include <linux/poll.h>
10568+#include <linux/aufs_type.h>
10569+#include "rwsem.h"
1facf9fc 10570+
4a4d8108
AM
10571+struct au_branch;
10572+struct au_hfile {
10573+ struct file *hf_file;
10574+ struct au_branch *hf_br;
10575+};
1facf9fc 10576+
4a4d8108
AM
10577+struct au_vdir;
10578+struct au_fidir {
10579+ aufs_bindex_t fd_bbot;
10580+ aufs_bindex_t fd_nent;
10581+ struct au_vdir *fd_vdir_cache;
10582+ struct au_hfile fd_hfile[];
10583+};
1facf9fc 10584+
4a4d8108 10585+static inline int au_fidir_sz(int nent)
dece6358 10586+{
4f0767ce
JR
10587+ AuDebugOn(nent < 0);
10588+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
4a4d8108 10589+}
1facf9fc 10590+
4a4d8108
AM
10591+struct au_finfo {
10592+ atomic_t fi_generation;
dece6358 10593+
4a4d8108
AM
10594+ struct au_rwsem fi_rwsem;
10595+ aufs_bindex_t fi_btop;
10596+
10597+ /* do not union them */
10598+ struct { /* for non-dir */
10599+ struct au_hfile fi_htop;
2cbb1c4b 10600+ atomic_t fi_mmapped;
4a4d8108
AM
10601+ };
10602+ struct au_fidir *fi_hdir; /* for dir only */
10603+} ____cacheline_aligned_in_smp;
1facf9fc 10604+
4a4d8108 10605+/* ---------------------------------------------------------------------- */
1facf9fc 10606+
4a4d8108
AM
10607+/* file.c */
10608+extern const struct address_space_operations aufs_aop;
10609+unsigned int au_file_roflags(unsigned int flags);
10610+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
10611+ struct file *file);
10612+int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
10613+ struct au_fidir *fidir);
10614+int au_reopen_nondir(struct file *file);
10615+struct au_pin;
10616+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
10617+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
10618+ int wlock);
10619+int au_do_flush(struct file *file, fl_owner_t id,
10620+ int (*flush)(struct file *file, fl_owner_t id));
1facf9fc 10621+
4a4d8108
AM
10622+/* poll.c */
10623+#ifdef CONFIG_AUFS_POLL
10624+unsigned int aufs_poll(struct file *file, poll_table *wait);
10625+#endif
1facf9fc 10626+
4a4d8108
AM
10627+#ifdef CONFIG_AUFS_BR_HFSPLUS
10628+/* hfsplus.c */
10629+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex);
10630+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
10631+ struct file *h_file);
10632+#else
10633+static inline
10634+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
dece6358 10635+{
4a4d8108
AM
10636+ return NULL;
10637+}
1facf9fc 10638+
4a4d8108
AM
10639+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
10640+ struct file *h_file);
10641+#endif
1facf9fc 10642+
4a4d8108
AM
10643+/* f_op.c */
10644+extern const struct file_operations aufs_file_fop;
4a4d8108
AM
10645+int au_do_open_nondir(struct file *file, int flags);
10646+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
10647+
10648+#ifdef CONFIG_AUFS_SP_IATTR
10649+/* f_op_sp.c */
10650+int au_special_file(umode_t mode);
10651+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev);
10652+#else
10653+AuStubInt0(au_special_file, umode_t mode)
10654+static inline void au_init_special_fop(struct inode *inode, umode_t mode,
10655+ dev_t rdev)
10656+{
10657+ init_special_inode(inode, mode, rdev);
10658+}
10659+#endif
1facf9fc 10660+
4a4d8108
AM
10661+/* finfo.c */
10662+void au_hfput(struct au_hfile *hf, struct file *file);
10663+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
10664+ struct file *h_file);
1facf9fc 10665+
4a4d8108 10666+void au_update_figen(struct file *file);
4a4d8108
AM
10667+struct au_fidir *au_fidir_alloc(struct super_block *sb);
10668+int au_fidir_realloc(struct au_finfo *finfo, int nbr);
1facf9fc 10669+
4a4d8108
AM
10670+void au_fi_init_once(void *_fi);
10671+void au_finfo_fin(struct file *file);
10672+int au_finfo_init(struct file *file, struct au_fidir *fidir);
1facf9fc 10673+
4a4d8108
AM
10674+/* ioctl.c */
10675+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
10676+#ifdef CONFIG_COMPAT
10677+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
10678+ unsigned long arg);
10679+#endif
1facf9fc 10680+
4a4d8108 10681+/* ---------------------------------------------------------------------- */
1facf9fc 10682+
4a4d8108
AM
10683+static inline struct au_finfo *au_fi(struct file *file)
10684+{
10685+ return file->private_data;
10686+}
1facf9fc 10687+
4a4d8108 10688+/* ---------------------------------------------------------------------- */
1facf9fc 10689+
4a4d8108
AM
10690+/*
10691+ * fi_read_lock, fi_write_lock,
10692+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
10693+ */
10694+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
1308ab2a 10695+
4a4d8108
AM
10696+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
10697+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
10698+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
1facf9fc 10699+
1308ab2a 10700+/* ---------------------------------------------------------------------- */
10701+
4a4d8108
AM
10702+/* todo: hard/soft set? */
10703+static inline aufs_bindex_t au_fbstart(struct file *file)
dece6358 10704+{
4a4d8108
AM
10705+ FiMustAnyLock(file);
10706+ return au_fi(file)->fi_btop;
10707+}
dece6358 10708+
4a4d8108
AM
10709+static inline aufs_bindex_t au_fbend_dir(struct file *file)
10710+{
10711+ FiMustAnyLock(file);
10712+ AuDebugOn(!au_fi(file)->fi_hdir);
10713+ return au_fi(file)->fi_hdir->fd_bbot;
10714+}
1facf9fc 10715+
4a4d8108
AM
10716+static inline struct au_vdir *au_fvdir_cache(struct file *file)
10717+{
10718+ FiMustAnyLock(file);
10719+ AuDebugOn(!au_fi(file)->fi_hdir);
10720+ return au_fi(file)->fi_hdir->fd_vdir_cache;
10721+}
1facf9fc 10722+
4a4d8108
AM
10723+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
10724+{
10725+ FiMustWriteLock(file);
10726+ au_fi(file)->fi_btop = bindex;
10727+}
1facf9fc 10728+
4a4d8108
AM
10729+static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
10730+{
10731+ FiMustWriteLock(file);
10732+ AuDebugOn(!au_fi(file)->fi_hdir);
10733+ au_fi(file)->fi_hdir->fd_bbot = bindex;
10734+}
1308ab2a 10735+
4a4d8108
AM
10736+static inline void au_set_fvdir_cache(struct file *file,
10737+ struct au_vdir *vdir_cache)
10738+{
10739+ FiMustWriteLock(file);
10740+ AuDebugOn(!au_fi(file)->fi_hdir);
10741+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
10742+}
dece6358 10743+
4a4d8108
AM
10744+static inline struct file *au_hf_top(struct file *file)
10745+{
10746+ FiMustAnyLock(file);
10747+ AuDebugOn(au_fi(file)->fi_hdir);
10748+ return au_fi(file)->fi_htop.hf_file;
10749+}
1facf9fc 10750+
4a4d8108
AM
10751+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
10752+{
10753+ FiMustAnyLock(file);
10754+ AuDebugOn(!au_fi(file)->fi_hdir);
10755+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
dece6358
AM
10756+}
10757+
4a4d8108
AM
10758+/* todo: memory barrier? */
10759+static inline unsigned int au_figen(struct file *f)
dece6358 10760+{
4a4d8108
AM
10761+ return atomic_read(&au_fi(f)->fi_generation);
10762+}
dece6358 10763+
2cbb1c4b
JR
10764+static inline void au_set_mmapped(struct file *f)
10765+{
10766+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
10767+ return;
10768+ pr_warning("fi_mmapped wrapped around\n");
10769+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
10770+ ;
10771+}
10772+
10773+static inline void au_unset_mmapped(struct file *f)
10774+{
10775+ atomic_dec(&au_fi(f)->fi_mmapped);
10776+}
10777+
4a4d8108
AM
10778+static inline int au_test_mmapped(struct file *f)
10779+{
2cbb1c4b
JR
10780+ return atomic_read(&au_fi(f)->fi_mmapped);
10781+}
10782+
10783+/* customize vma->vm_file */
10784+
10785+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
10786+ struct file *file)
10787+{
53392da6
AM
10788+ struct file *f;
10789+
10790+ f = vma->vm_file;
2cbb1c4b
JR
10791+ get_file(file);
10792+ vma->vm_file = file;
53392da6 10793+ fput(f);
2cbb1c4b
JR
10794+}
10795+
10796+#ifdef CONFIG_MMU
10797+#define AuDbgVmRegion(file, vma) do {} while (0)
10798+
10799+static inline void au_vm_file_reset(struct vm_area_struct *vma,
10800+ struct file *file)
10801+{
10802+ au_do_vm_file_reset(vma, file);
10803+}
10804+#else
10805+#define AuDbgVmRegion(file, vma) \
10806+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
10807+
10808+static inline void au_vm_file_reset(struct vm_area_struct *vma,
10809+ struct file *file)
10810+{
53392da6
AM
10811+ struct file *f;
10812+
2cbb1c4b 10813+ au_do_vm_file_reset(vma, file);
53392da6 10814+ f = vma->vm_region->vm_file;
2cbb1c4b
JR
10815+ get_file(file);
10816+ vma->vm_region->vm_file = file;
53392da6 10817+ fput(f);
2cbb1c4b
JR
10818+}
10819+#endif /* CONFIG_MMU */
10820+
10821+/* handle vma->vm_prfile */
10822+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
10823+ struct file *file)
10824+{
10825+#ifdef CONFIG_AUFS_PROC_MAP
10826+ get_file(file);
10827+ vma->vm_prfile = file;
10828+#ifndef CONFIG_MMU
10829+ get_file(file);
10830+ vma->vm_region->vm_prfile = file;
10831+#endif
10832+#endif
4a4d8108 10833+}
1308ab2a 10834+
4a4d8108
AM
10835+#endif /* __KERNEL__ */
10836+#endif /* __AUFS_FILE_H__ */
7f207e10
AM
10837diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
10838--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 10839+++ linux/fs/aufs/finfo.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 10840@@ -0,0 +1,153 @@
4a4d8108 10841+/*
027c5e7a 10842+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
10843+ *
10844+ * This program, aufs is free software; you can redistribute it and/or modify
10845+ * it under the terms of the GNU General Public License as published by
10846+ * the Free Software Foundation; either version 2 of the License, or
10847+ * (at your option) any later version.
10848+ *
10849+ * This program is distributed in the hope that it will be useful,
10850+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10851+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10852+ * GNU General Public License for more details.
10853+ *
10854+ * You should have received a copy of the GNU General Public License
10855+ * along with this program; if not, write to the Free Software
10856+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
10857+ */
1308ab2a 10858+
4a4d8108
AM
10859+/*
10860+ * file private data
10861+ */
1facf9fc 10862+
4a4d8108
AM
10863+#include <linux/file.h>
10864+#include "aufs.h"
1facf9fc 10865+
4a4d8108
AM
10866+void au_hfput(struct au_hfile *hf, struct file *file)
10867+{
10868+ /* todo: direct access f_flags */
2cbb1c4b 10869+ if (vfsub_file_flags(file) & __FMODE_EXEC)
4a4d8108
AM
10870+ allow_write_access(hf->hf_file);
10871+ fput(hf->hf_file);
10872+ hf->hf_file = NULL;
e49829fe 10873+ atomic_dec(&hf->hf_br->br_count);
4a4d8108
AM
10874+ hf->hf_br = NULL;
10875+}
1facf9fc 10876+
4a4d8108
AM
10877+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
10878+{
10879+ struct au_finfo *finfo = au_fi(file);
10880+ struct au_hfile *hf;
10881+ struct au_fidir *fidir;
10882+
10883+ fidir = finfo->fi_hdir;
10884+ if (!fidir) {
10885+ AuDebugOn(finfo->fi_btop != bindex);
10886+ hf = &finfo->fi_htop;
10887+ } else
10888+ hf = fidir->fd_hfile + bindex;
10889+
10890+ if (hf && hf->hf_file)
10891+ au_hfput(hf, file);
10892+ if (val) {
10893+ FiMustWriteLock(file);
10894+ hf->hf_file = val;
10895+ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
1308ab2a 10896+ }
4a4d8108 10897+}
1facf9fc 10898+
4a4d8108
AM
10899+void au_update_figen(struct file *file)
10900+{
10901+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
10902+ /* smp_mb(); */ /* atomic_set */
1facf9fc 10903+}
10904+
4a4d8108
AM
10905+/* ---------------------------------------------------------------------- */
10906+
4a4d8108
AM
10907+struct au_fidir *au_fidir_alloc(struct super_block *sb)
10908+{
10909+ struct au_fidir *fidir;
10910+ int nbr;
10911+
10912+ nbr = au_sbend(sb) + 1;
10913+ if (nbr < 2)
10914+ nbr = 2; /* initial allocate for 2 branches */
10915+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
10916+ if (fidir) {
10917+ fidir->fd_bbot = -1;
10918+ fidir->fd_nent = nbr;
10919+ fidir->fd_vdir_cache = NULL;
10920+ }
10921+
10922+ return fidir;
10923+}
10924+
10925+int au_fidir_realloc(struct au_finfo *finfo, int nbr)
10926+{
10927+ int err;
10928+ struct au_fidir *fidir, *p;
10929+
10930+ AuRwMustWriteLock(&finfo->fi_rwsem);
10931+ fidir = finfo->fi_hdir;
10932+ AuDebugOn(!fidir);
10933+
10934+ err = -ENOMEM;
10935+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
10936+ GFP_NOFS);
10937+ if (p) {
10938+ p->fd_nent = nbr;
10939+ finfo->fi_hdir = p;
10940+ err = 0;
10941+ }
1facf9fc 10942+
dece6358 10943+ return err;
1facf9fc 10944+}
1308ab2a 10945+
10946+/* ---------------------------------------------------------------------- */
10947+
4a4d8108 10948+void au_finfo_fin(struct file *file)
1308ab2a 10949+{
4a4d8108
AM
10950+ struct au_finfo *finfo;
10951+
7f207e10
AM
10952+ au_nfiles_dec(file->f_dentry->d_sb);
10953+
4a4d8108
AM
10954+ finfo = au_fi(file);
10955+ AuDebugOn(finfo->fi_hdir);
10956+ AuRwDestroy(&finfo->fi_rwsem);
10957+ au_cache_free_finfo(finfo);
1308ab2a 10958+}
1308ab2a 10959+
e49829fe 10960+void au_fi_init_once(void *_finfo)
4a4d8108 10961+{
e49829fe 10962+ struct au_finfo *finfo = _finfo;
2cbb1c4b 10963+ static struct lock_class_key aufs_fi;
1308ab2a 10964+
e49829fe
JR
10965+ au_rw_init(&finfo->fi_rwsem);
10966+ au_rw_class(&finfo->fi_rwsem, &aufs_fi);
4a4d8108 10967+}
1308ab2a 10968+
4a4d8108
AM
10969+int au_finfo_init(struct file *file, struct au_fidir *fidir)
10970+{
10971+ int err;
10972+ struct au_finfo *finfo;
10973+ struct dentry *dentry;
10974+
10975+ err = -ENOMEM;
10976+ dentry = file->f_dentry;
10977+ finfo = au_cache_alloc_finfo();
10978+ if (unlikely(!finfo))
10979+ goto out;
10980+
10981+ err = 0;
7f207e10 10982+ au_nfiles_inc(dentry->d_sb);
4a4d8108
AM
10983+ au_rw_write_lock(&finfo->fi_rwsem);
10984+ finfo->fi_btop = -1;
10985+ finfo->fi_hdir = fidir;
10986+ atomic_set(&finfo->fi_generation, au_digen(dentry));
10987+ /* smp_mb(); */ /* atomic_set */
10988+
10989+ file->private_data = finfo;
10990+
10991+out:
10992+ return err;
10993+}
7f207e10
AM
10994diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
10995--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
10996+++ linux/fs/aufs/f_op.c 2011-08-24 13:30:24.731313534 +0200
10997@@ -0,0 +1,717 @@
dece6358 10998+/*
027c5e7a 10999+ * Copyright (C) 2005-2011 Junjiro R. Okajima
dece6358
AM
11000+ *
11001+ * This program, aufs is free software; you can redistribute it and/or modify
11002+ * it under the terms of the GNU General Public License as published by
11003+ * the Free Software Foundation; either version 2 of the License, or
11004+ * (at your option) any later version.
11005+ *
11006+ * This program is distributed in the hope that it will be useful,
11007+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11008+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11009+ * GNU General Public License for more details.
11010+ *
11011+ * You should have received a copy of the GNU General Public License
11012+ * along with this program; if not, write to the Free Software
11013+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11014+ */
1facf9fc 11015+
11016+/*
4a4d8108 11017+ * file and vm operations
1facf9fc 11018+ */
dece6358
AM
11019+
11020+#include <linux/file.h>
4a4d8108
AM
11021+#include <linux/fs_stack.h>
11022+#include <linux/mman.h>
11023+#include <linux/mm.h>
11024+#include <linux/security.h>
dece6358
AM
11025+#include "aufs.h"
11026+
4a4d8108 11027+int au_do_open_nondir(struct file *file, int flags)
1facf9fc 11028+{
4a4d8108
AM
11029+ int err;
11030+ aufs_bindex_t bindex;
11031+ struct file *h_file;
11032+ struct dentry *dentry;
11033+ struct au_finfo *finfo;
11034+
11035+ FiMustWriteLock(file);
11036+
4a4d8108 11037+ dentry = file->f_dentry;
027c5e7a
AM
11038+ err = au_d_alive(dentry);
11039+ if (unlikely(err))
11040+ goto out;
11041+
4a4d8108
AM
11042+ finfo = au_fi(file);
11043+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
2cbb1c4b 11044+ atomic_set(&finfo->fi_mmapped, 0);
4a4d8108
AM
11045+ bindex = au_dbstart(dentry);
11046+ h_file = au_h_open(dentry, bindex, flags, file);
11047+ if (IS_ERR(h_file))
11048+ err = PTR_ERR(h_file);
11049+ else {
11050+ au_set_fbstart(file, bindex);
11051+ au_set_h_fptr(file, bindex, h_file);
11052+ au_update_figen(file);
11053+ /* todo: necessary? */
11054+ /* file->f_ra = h_file->f_ra; */
11055+ }
027c5e7a
AM
11056+
11057+out:
4a4d8108 11058+ return err;
1facf9fc 11059+}
11060+
4a4d8108
AM
11061+static int aufs_open_nondir(struct inode *inode __maybe_unused,
11062+ struct file *file)
1facf9fc 11063+{
4a4d8108 11064+ int err;
1308ab2a 11065+ struct super_block *sb;
1facf9fc 11066+
2cbb1c4b 11067+ AuDbg("%.*s, f_flags 0x%x, f_mode 0x%x\n",
4a4d8108
AM
11068+ AuDLNPair(file->f_dentry), vfsub_file_flags(file),
11069+ file->f_mode);
1facf9fc 11070+
4a4d8108
AM
11071+ sb = file->f_dentry->d_sb;
11072+ si_read_lock(sb, AuLock_FLUSH);
11073+ err = au_do_open(file, au_do_open_nondir, /*fidir*/NULL);
11074+ si_read_unlock(sb);
11075+ return err;
11076+}
1facf9fc 11077+
4a4d8108
AM
11078+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
11079+{
11080+ struct au_finfo *finfo;
11081+ aufs_bindex_t bindex;
1facf9fc 11082+
4a4d8108
AM
11083+ finfo = au_fi(file);
11084+ bindex = finfo->fi_btop;
0c5527e5
AM
11085+ if (bindex >= 0) {
11086+ /* remove me from sb->s_files */
11087+ file_sb_list_del(file);
4a4d8108 11088+ au_set_h_fptr(file, bindex, NULL);
0c5527e5 11089+ }
7f207e10 11090+
4a4d8108
AM
11091+ au_finfo_fin(file);
11092+ return 0;
1facf9fc 11093+}
11094+
4a4d8108
AM
11095+/* ---------------------------------------------------------------------- */
11096+
11097+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
dece6358 11098+{
1308ab2a 11099+ int err;
4a4d8108
AM
11100+ struct file *h_file;
11101+
11102+ err = 0;
11103+ h_file = au_hf_top(file);
11104+ if (h_file)
11105+ err = vfsub_flush(h_file, id);
11106+ return err;
11107+}
11108+
11109+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
11110+{
11111+ return au_do_flush(file, id, au_do_flush_nondir);
11112+}
11113+
11114+/* ---------------------------------------------------------------------- */
11115+
11116+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
11117+ loff_t *ppos)
11118+{
11119+ ssize_t err;
dece6358 11120+ struct dentry *dentry;
4a4d8108 11121+ struct file *h_file;
dece6358 11122+ struct super_block *sb;
1facf9fc 11123+
dece6358
AM
11124+ dentry = file->f_dentry;
11125+ sb = dentry->d_sb;
e49829fe 11126+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 11127+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
dece6358
AM
11128+ if (unlikely(err))
11129+ goto out;
1facf9fc 11130+
4a4d8108
AM
11131+ h_file = au_hf_top(file);
11132+ err = vfsub_read_u(h_file, buf, count, ppos);
11133+ /* todo: necessary? */
11134+ /* file->f_ra = h_file->f_ra; */
11135+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
1308ab2a 11136+
4a4d8108
AM
11137+ di_read_unlock(dentry, AuLock_IR);
11138+ fi_read_unlock(file);
4f0767ce 11139+out:
dece6358
AM
11140+ si_read_unlock(sb);
11141+ return err;
11142+}
1facf9fc 11143+
e49829fe
JR
11144+/*
11145+ * todo: very ugly
11146+ * it locks both of i_mutex and si_rwsem for read in safe.
11147+ * if the plink maintenance mode continues forever (that is the problem),
11148+ * may loop forever.
11149+ */
11150+static void au_mtx_and_read_lock(struct inode *inode)
11151+{
11152+ int err;
11153+ struct super_block *sb = inode->i_sb;
11154+
11155+ while (1) {
11156+ mutex_lock(&inode->i_mutex);
11157+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
11158+ if (!err)
11159+ break;
11160+ mutex_unlock(&inode->i_mutex);
11161+ si_read_lock(sb, AuLock_NOPLMW);
11162+ si_read_unlock(sb);
11163+ }
11164+}
11165+
4a4d8108
AM
11166+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
11167+ size_t count, loff_t *ppos)
dece6358 11168+{
4a4d8108
AM
11169+ ssize_t err;
11170+ struct au_pin pin;
dece6358 11171+ struct dentry *dentry;
4a4d8108 11172+ struct inode *inode;
4a4d8108
AM
11173+ struct file *h_file;
11174+ char __user *buf = (char __user *)ubuf;
1facf9fc 11175+
dece6358 11176+ dentry = file->f_dentry;
4a4d8108 11177+ inode = dentry->d_inode;
e49829fe 11178+ au_mtx_and_read_lock(inode);
1facf9fc 11179+
4a4d8108
AM
11180+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11181+ if (unlikely(err))
11182+ goto out;
1facf9fc 11183+
4a4d8108
AM
11184+ err = au_ready_to_write(file, -1, &pin);
11185+ di_downgrade_lock(dentry, AuLock_IR);
11186+ if (unlikely(err))
11187+ goto out_unlock;
1facf9fc 11188+
4a4d8108
AM
11189+ h_file = au_hf_top(file);
11190+ au_unpin(&pin);
11191+ err = vfsub_write_u(h_file, buf, count, ppos);
11192+ au_cpup_attr_timesizes(inode);
11193+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
1facf9fc 11194+
4f0767ce 11195+out_unlock:
4a4d8108
AM
11196+ di_read_unlock(dentry, AuLock_IR);
11197+ fi_write_unlock(file);
4f0767ce 11198+out:
e49829fe 11199+ si_read_unlock(inode->i_sb);
4a4d8108 11200+ mutex_unlock(&inode->i_mutex);
dece6358
AM
11201+ return err;
11202+}
1facf9fc 11203+
4a4d8108
AM
11204+static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio,
11205+ const struct iovec *iov, unsigned long nv, loff_t pos)
dece6358 11206+{
4a4d8108
AM
11207+ ssize_t err;
11208+ struct file *file;
11209+ ssize_t (*func)(struct kiocb *, const struct iovec *, unsigned long,
11210+ loff_t);
1facf9fc 11211+
4a4d8108
AM
11212+ err = security_file_permission(h_file, rw);
11213+ if (unlikely(err))
11214+ goto out;
1facf9fc 11215+
4a4d8108
AM
11216+ err = -ENOSYS;
11217+ func = NULL;
11218+ if (rw == MAY_READ)
11219+ func = h_file->f_op->aio_read;
11220+ else if (rw == MAY_WRITE)
11221+ func = h_file->f_op->aio_write;
11222+ if (func) {
11223+ file = kio->ki_filp;
11224+ kio->ki_filp = h_file;
2cbb1c4b 11225+ lockdep_off();
4a4d8108 11226+ err = func(kio, iov, nv, pos);
2cbb1c4b 11227+ lockdep_on();
4a4d8108
AM
11228+ kio->ki_filp = file;
11229+ } else
11230+ /* currently there is no such fs */
11231+ WARN_ON_ONCE(1);
1facf9fc 11232+
4f0767ce 11233+out:
dece6358
AM
11234+ return err;
11235+}
1facf9fc 11236+
4a4d8108
AM
11237+static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
11238+ unsigned long nv, loff_t pos)
1facf9fc 11239+{
4a4d8108
AM
11240+ ssize_t err;
11241+ struct file *file, *h_file;
11242+ struct dentry *dentry;
dece6358 11243+ struct super_block *sb;
1facf9fc 11244+
4a4d8108 11245+ file = kio->ki_filp;
dece6358 11246+ dentry = file->f_dentry;
1308ab2a 11247+ sb = dentry->d_sb;
e49829fe 11248+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11249+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11250+ if (unlikely(err))
11251+ goto out;
11252+
11253+ h_file = au_hf_top(file);
11254+ err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos);
11255+ /* todo: necessary? */
11256+ /* file->f_ra = h_file->f_ra; */
11257+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
11258+ di_read_unlock(dentry, AuLock_IR);
11259+ fi_read_unlock(file);
1facf9fc 11260+
4f0767ce 11261+out:
4a4d8108 11262+ si_read_unlock(sb);
1308ab2a 11263+ return err;
11264+}
1facf9fc 11265+
4a4d8108
AM
11266+static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
11267+ unsigned long nv, loff_t pos)
1308ab2a 11268+{
4a4d8108
AM
11269+ ssize_t err;
11270+ struct au_pin pin;
11271+ struct dentry *dentry;
11272+ struct inode *inode;
4a4d8108 11273+ struct file *file, *h_file;
1308ab2a 11274+
4a4d8108 11275+ file = kio->ki_filp;
1308ab2a 11276+ dentry = file->f_dentry;
1308ab2a 11277+ inode = dentry->d_inode;
e49829fe
JR
11278+ au_mtx_and_read_lock(inode);
11279+
4a4d8108
AM
11280+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11281+ if (unlikely(err))
1308ab2a 11282+ goto out;
1facf9fc 11283+
4a4d8108
AM
11284+ err = au_ready_to_write(file, -1, &pin);
11285+ di_downgrade_lock(dentry, AuLock_IR);
dece6358 11286+ if (unlikely(err))
4a4d8108 11287+ goto out_unlock;
1facf9fc 11288+
4a4d8108
AM
11289+ au_unpin(&pin);
11290+ h_file = au_hf_top(file);
11291+ err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos);
11292+ au_cpup_attr_timesizes(inode);
11293+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
1facf9fc 11294+
4f0767ce 11295+out_unlock:
4a4d8108
AM
11296+ di_read_unlock(dentry, AuLock_IR);
11297+ fi_write_unlock(file);
4f0767ce 11298+out:
e49829fe 11299+ si_read_unlock(inode->i_sb);
4a4d8108 11300+ mutex_unlock(&inode->i_mutex);
dece6358 11301+ return err;
1facf9fc 11302+}
11303+
4a4d8108
AM
11304+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
11305+ struct pipe_inode_info *pipe, size_t len,
11306+ unsigned int flags)
1facf9fc 11307+{
4a4d8108
AM
11308+ ssize_t err;
11309+ struct file *h_file;
11310+ struct dentry *dentry;
dece6358 11311+ struct super_block *sb;
1facf9fc 11312+
dece6358 11313+ dentry = file->f_dentry;
dece6358 11314+ sb = dentry->d_sb;
e49829fe 11315+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11316+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11317+ if (unlikely(err))
dece6358 11318+ goto out;
1facf9fc 11319+
4a4d8108
AM
11320+ err = -EINVAL;
11321+ h_file = au_hf_top(file);
11322+ if (au_test_loopback_kthread()) {
87a755f4
AM
11323+ au_warn_loopback(h_file->f_dentry->d_sb);
11324+ if (file->f_mapping != h_file->f_mapping) {
11325+ file->f_mapping = h_file->f_mapping;
11326+ smp_mb(); /* unnecessary? */
11327+ }
1308ab2a 11328+ }
4a4d8108
AM
11329+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
11330+ /* todo: necessasry? */
11331+ /* file->f_ra = h_file->f_ra; */
11332+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
1facf9fc 11333+
4a4d8108
AM
11334+ di_read_unlock(dentry, AuLock_IR);
11335+ fi_read_unlock(file);
1facf9fc 11336+
4f0767ce 11337+out:
4a4d8108 11338+ si_read_unlock(sb);
dece6358 11339+ return err;
1facf9fc 11340+}
11341+
4a4d8108
AM
11342+static ssize_t
11343+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
11344+ size_t len, unsigned int flags)
1facf9fc 11345+{
4a4d8108
AM
11346+ ssize_t err;
11347+ struct au_pin pin;
11348+ struct dentry *dentry;
11349+ struct inode *inode;
4a4d8108 11350+ struct file *h_file;
1facf9fc 11351+
4a4d8108
AM
11352+ dentry = file->f_dentry;
11353+ inode = dentry->d_inode;
e49829fe 11354+ au_mtx_and_read_lock(inode);
4a4d8108
AM
11355+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11356+ if (unlikely(err))
11357+ goto out;
1facf9fc 11358+
4a4d8108
AM
11359+ err = au_ready_to_write(file, -1, &pin);
11360+ di_downgrade_lock(dentry, AuLock_IR);
11361+ if (unlikely(err))
11362+ goto out_unlock;
1facf9fc 11363+
4a4d8108
AM
11364+ h_file = au_hf_top(file);
11365+ au_unpin(&pin);
11366+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
11367+ au_cpup_attr_timesizes(inode);
11368+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
1facf9fc 11369+
4f0767ce 11370+out_unlock:
4a4d8108
AM
11371+ di_read_unlock(dentry, AuLock_IR);
11372+ fi_write_unlock(file);
4f0767ce 11373+out:
e49829fe 11374+ si_read_unlock(inode->i_sb);
4a4d8108
AM
11375+ mutex_unlock(&inode->i_mutex);
11376+ return err;
11377+}
1facf9fc 11378+
4a4d8108
AM
11379+/* ---------------------------------------------------------------------- */
11380+
4a4d8108
AM
11381+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
11382+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
1308ab2a 11383+
4a4d8108 11384+static unsigned long au_arch_prot_conv(unsigned long flags)
dece6358 11385+{
4a4d8108
AM
11386+ /* currently ppc64 only */
11387+#ifdef CONFIG_PPC64
11388+ /* cf. linux/arch/powerpc/include/asm/mman.h */
11389+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
11390+ return AuConv_VM_PROT(flags, SAO);
11391+#else
11392+ AuDebugOn(arch_calc_vm_prot_bits(-1));
11393+ return 0;
11394+#endif
dece6358
AM
11395+}
11396+
4a4d8108 11397+static unsigned long au_prot_conv(unsigned long flags)
dece6358 11398+{
4a4d8108
AM
11399+ return AuConv_VM_PROT(flags, READ)
11400+ | AuConv_VM_PROT(flags, WRITE)
11401+ | AuConv_VM_PROT(flags, EXEC)
11402+ | au_arch_prot_conv(flags);
dece6358
AM
11403+}
11404+
4a4d8108
AM
11405+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
11406+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
dece6358 11407+
4a4d8108 11408+static unsigned long au_flag_conv(unsigned long flags)
dece6358 11409+{
4a4d8108
AM
11410+ return AuConv_VM_MAP(flags, GROWSDOWN)
11411+ | AuConv_VM_MAP(flags, DENYWRITE)
11412+ | AuConv_VM_MAP(flags, EXECUTABLE)
11413+ | AuConv_VM_MAP(flags, LOCKED);
dece6358 11414+}
1308ab2a 11415+/*
4a4d8108
AM
11416+ * This is another ugly approach to keep the lock order, particularly
11417+ * mm->mmap_sem and aufs rwsem. The previous approach was reverted and you can
11418+ * find it in git-log, if you want.
1308ab2a 11419+ *
4a4d8108
AM
11420+ * native readdir: i_mutex, copy_to_user, mmap_sem
11421+ * aufs readdir: i_mutex, rwsem, nested-i_mutex, copy_to_user, mmap_sem
1308ab2a 11422+ *
4a4d8108
AM
11423+ * Before aufs_mmap() mmap_sem is acquired already, but aufs_mmap() has to
11424+ * acquire aufs rwsem. It introduces a circular locking dependency.
11425+ * To address this problem, aufs_mmap() delegates the part which requires aufs
11426+ * rwsem to its internal workqueue.
1308ab2a 11427+ */
11428+
4a4d8108
AM
11429+struct au_mmap_pre_args {
11430+ /* input */
11431+ struct file *file;
11432+ struct vm_area_struct *vma;
1308ab2a 11433+
4a4d8108
AM
11434+ /* output */
11435+ int *errp;
11436+ struct file *h_file;
11437+ struct au_branch *br;
4a4d8108 11438+};
dece6358 11439+
4a4d8108 11440+static int au_mmap_pre(struct file *file, struct vm_area_struct *vma,
2cbb1c4b 11441+ struct file **h_file, struct au_branch **br)
dece6358 11442+{
4a4d8108
AM
11443+ int err;
11444+ aufs_bindex_t bstart;
11445+ const unsigned char wlock
11446+ = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
11447+ struct dentry *dentry;
11448+ struct super_block *sb;
1308ab2a 11449+
4a4d8108
AM
11450+ dentry = file->f_dentry;
11451+ sb = dentry->d_sb;
e49829fe 11452+ si_read_lock(sb, AuLock_NOPLMW);
4a4d8108
AM
11453+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11454+ if (unlikely(err))
11455+ goto out;
11456+
4a4d8108
AM
11457+ if (wlock) {
11458+ struct au_pin pin;
11459+
11460+ err = au_ready_to_write(file, -1, &pin);
11461+ di_write_unlock(dentry);
11462+ if (unlikely(err))
11463+ goto out_unlock;
11464+ au_unpin(&pin);
11465+ } else
11466+ di_write_unlock(dentry);
11467+ bstart = au_fbstart(file);
11468+ *br = au_sbr(sb, bstart);
11469+ *h_file = au_hf_top(file);
11470+ get_file(*h_file);
2cbb1c4b 11471+ au_set_mmapped(file);
4a4d8108
AM
11472+
11473+out_unlock:
11474+ fi_write_unlock(file);
11475+out:
11476+ si_read_unlock(sb);
11477+ return err;
dece6358
AM
11478+}
11479+
4a4d8108 11480+static void au_call_mmap_pre(void *args)
dece6358 11481+{
4a4d8108 11482+ struct au_mmap_pre_args *a = args;
2cbb1c4b 11483+ *a->errp = au_mmap_pre(a->file, a->vma, &a->h_file, &a->br);
dece6358
AM
11484+}
11485+
4a4d8108 11486+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
dece6358 11487+{
4a4d8108 11488+ int err, wkq_err;
2cbb1c4b 11489+ unsigned long prot;
4a4d8108
AM
11490+ struct au_mmap_pre_args args = {
11491+ .file = file,
11492+ .vma = vma,
11493+ .errp = &err
11494+ };
11495+
2cbb1c4b 11496+ AuDbgVmRegion(file, vma);
b752ccd1 11497+ wkq_err = au_wkq_wait_pre(au_call_mmap_pre, &args);
4a4d8108
AM
11498+ if (unlikely(wkq_err))
11499+ err = wkq_err;
11500+ if (unlikely(err))
11501+ goto out;
1308ab2a 11502+
2cbb1c4b
JR
11503+ au_vm_file_reset(vma, args.h_file);
11504+ prot = au_prot_conv(vma->vm_flags);
11505+ err = security_file_mmap(args.h_file, /*reqprot*/prot, prot,
11506+ au_flag_conv(vma->vm_flags), vma->vm_start, 0);
4a4d8108 11507+ if (unlikely(err))
2cbb1c4b 11508+ goto out_reset;
4a4d8108 11509+
2cbb1c4b
JR
11510+ err = args.h_file->f_op->mmap(args.h_file, vma);
11511+ if (unlikely(err))
11512+ goto out_reset;
4a4d8108 11513+
2cbb1c4b 11514+ au_vm_prfile_set(vma, file);
4a4d8108
AM
11515+ vfsub_file_accessed(args.h_file);
11516+ /* update without lock, I don't think it a problem */
2cbb1c4b
JR
11517+ fsstack_copy_attr_atime(file->f_dentry->d_inode,
11518+ args.h_file->f_dentry->d_inode);
11519+ goto out_fput; /* success */
4a4d8108 11520+
2cbb1c4b
JR
11521+out_reset:
11522+ au_unset_mmapped(file);
11523+ au_vm_file_reset(vma, file);
11524+out_fput:
4a4d8108 11525+ fput(args.h_file);
4f0767ce 11526+out:
4a4d8108
AM
11527+ return err;
11528+}
11529+
11530+/* ---------------------------------------------------------------------- */
11531+
b752ccd1 11532+static int aufs_fsync_nondir(struct file *file, int datasync)
4a4d8108
AM
11533+{
11534+ int err;
11535+ struct au_pin pin;
b752ccd1 11536+ struct dentry *dentry;
4a4d8108
AM
11537+ struct inode *inode;
11538+ struct file *h_file;
11539+ struct super_block *sb;
11540+
b752ccd1 11541+ dentry = file->f_dentry;
4a4d8108
AM
11542+ inode = dentry->d_inode;
11543+ IMustLock(file->f_mapping->host);
11544+ if (inode != file->f_mapping->host) {
11545+ mutex_unlock(&file->f_mapping->host->i_mutex);
11546+ mutex_lock(&inode->i_mutex);
11547+ }
11548+ IMustLock(inode);
11549+
11550+ sb = dentry->d_sb;
e49829fe
JR
11551+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
11552+ if (unlikely(err))
11553+ goto out;
4a4d8108
AM
11554+
11555+ err = 0; /* -EBADF; */ /* posix? */
11556+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
e49829fe 11557+ goto out_si;
4a4d8108
AM
11558+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11559+ if (unlikely(err))
e49829fe 11560+ goto out_si;
4a4d8108
AM
11561+
11562+ err = au_ready_to_write(file, -1, &pin);
11563+ di_downgrade_lock(dentry, AuLock_IR);
11564+ if (unlikely(err))
11565+ goto out_unlock;
11566+ au_unpin(&pin);
11567+
11568+ err = -EINVAL;
11569+ h_file = au_hf_top(file);
53392da6
AM
11570+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
11571+ au_cpup_attr_timesizes(inode);
4a4d8108 11572+
4f0767ce 11573+out_unlock:
4a4d8108 11574+ di_read_unlock(dentry, AuLock_IR);
1308ab2a 11575+ fi_write_unlock(file);
e49829fe 11576+out_si:
953406b4 11577+ si_read_unlock(sb);
e49829fe 11578+out:
4a4d8108
AM
11579+ if (inode != file->f_mapping->host) {
11580+ mutex_unlock(&inode->i_mutex);
11581+ mutex_lock(&file->f_mapping->host->i_mutex);
11582+ }
11583+ return err;
dece6358
AM
11584+}
11585+
4a4d8108
AM
11586+/* no one supports this operation, currently */
11587+#if 0
11588+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
dece6358 11589+{
4a4d8108
AM
11590+ int err;
11591+ struct au_pin pin;
1308ab2a 11592+ struct dentry *dentry;
4a4d8108
AM
11593+ struct inode *inode;
11594+ struct file *file, *h_file;
1308ab2a 11595+
4a4d8108 11596+ file = kio->ki_filp;
1308ab2a 11597+ dentry = file->f_dentry;
4a4d8108 11598+ inode = dentry->d_inode;
e49829fe 11599+ au_mtx_and_read_lock(inode);
4a4d8108
AM
11600+
11601+ err = 0; /* -EBADF; */ /* posix? */
11602+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
11603+ goto out;
11604+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11605+ if (unlikely(err))
1308ab2a 11606+ goto out;
11607+
4a4d8108
AM
11608+ err = au_ready_to_write(file, -1, &pin);
11609+ di_downgrade_lock(dentry, AuLock_IR);
11610+ if (unlikely(err))
11611+ goto out_unlock;
11612+ au_unpin(&pin);
1308ab2a 11613+
4a4d8108
AM
11614+ err = -ENOSYS;
11615+ h_file = au_hf_top(file);
11616+ if (h_file->f_op && h_file->f_op->aio_fsync) {
11617+ struct dentry *h_d;
11618+ struct mutex *h_mtx;
1308ab2a 11619+
4a4d8108
AM
11620+ h_d = h_file->f_dentry;
11621+ h_mtx = &h_d->d_inode->i_mutex;
11622+ if (!is_sync_kiocb(kio)) {
11623+ get_file(h_file);
11624+ fput(file);
11625+ }
11626+ kio->ki_filp = h_file;
11627+ err = h_file->f_op->aio_fsync(kio, datasync);
11628+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
11629+ if (!err)
11630+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
11631+ /*ignore*/
11632+ au_cpup_attr_timesizes(inode);
11633+ mutex_unlock(h_mtx);
11634+ }
1308ab2a 11635+
4f0767ce 11636+out_unlock:
4a4d8108
AM
11637+ di_read_unlock(dentry, AuLock_IR);
11638+ fi_write_unlock(file);
4f0767ce 11639+out:
e49829fe 11640+ si_read_unlock(inode->sb);
4a4d8108
AM
11641+ mutex_unlock(&inode->i_mutex);
11642+ return err;
dece6358 11643+}
4a4d8108 11644+#endif
dece6358 11645+
4a4d8108 11646+static int aufs_fasync(int fd, struct file *file, int flag)
dece6358 11647+{
4a4d8108
AM
11648+ int err;
11649+ struct file *h_file;
11650+ struct dentry *dentry;
11651+ struct super_block *sb;
1308ab2a 11652+
4a4d8108
AM
11653+ dentry = file->f_dentry;
11654+ sb = dentry->d_sb;
e49829fe 11655+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11656+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11657+ if (unlikely(err))
11658+ goto out;
11659+
11660+ h_file = au_hf_top(file);
11661+ if (h_file->f_op && h_file->f_op->fasync)
11662+ err = h_file->f_op->fasync(fd, h_file, flag);
11663+
11664+ di_read_unlock(dentry, AuLock_IR);
11665+ fi_read_unlock(file);
1308ab2a 11666+
4f0767ce 11667+out:
4a4d8108 11668+ si_read_unlock(sb);
1308ab2a 11669+ return err;
dece6358 11670+}
4a4d8108
AM
11671+
11672+/* ---------------------------------------------------------------------- */
11673+
11674+/* no one supports this operation, currently */
11675+#if 0
11676+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
11677+ size_t len, loff_t *pos , int more)
11678+{
11679+}
11680+#endif
11681+
11682+/* ---------------------------------------------------------------------- */
11683+
11684+const struct file_operations aufs_file_fop = {
11685+ .owner = THIS_MODULE,
2cbb1c4b 11686+
027c5e7a 11687+ .llseek = default_llseek,
4a4d8108
AM
11688+
11689+ .read = aufs_read,
11690+ .write = aufs_write,
11691+ .aio_read = aufs_aio_read,
11692+ .aio_write = aufs_aio_write,
11693+#ifdef CONFIG_AUFS_POLL
11694+ .poll = aufs_poll,
11695+#endif
11696+ .unlocked_ioctl = aufs_ioctl_nondir,
b752ccd1
AM
11697+#ifdef CONFIG_COMPAT
11698+ .compat_ioctl = aufs_ioctl_nondir, /* same */
11699+#endif
4a4d8108
AM
11700+ .mmap = aufs_mmap,
11701+ .open = aufs_open_nondir,
11702+ .flush = aufs_flush_nondir,
11703+ .release = aufs_release_nondir,
11704+ .fsync = aufs_fsync_nondir,
11705+ /* .aio_fsync = aufs_aio_fsync_nondir, */
11706+ .fasync = aufs_fasync,
11707+ /* .sendpage = aufs_sendpage, */
11708+ .splice_write = aufs_splice_write,
11709+ .splice_read = aufs_splice_read,
11710+#if 0
11711+ .aio_splice_write = aufs_aio_splice_write,
11712+ .aio_splice_read = aufs_aio_splice_read
11713+#endif
11714+};
7f207e10
AM
11715diff -urN /usr/share/empty/fs/aufs/f_op_sp.c linux/fs/aufs/f_op_sp.c
11716--- /usr/share/empty/fs/aufs/f_op_sp.c 1970-01-01 01:00:00.000000000 +0100
53392da6 11717+++ linux/fs/aufs/f_op_sp.c 2011-08-24 13:30:24.731313534 +0200
e49829fe 11718@@ -0,0 +1,299 @@
1308ab2a 11719+/*
027c5e7a 11720+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1308ab2a 11721+ *
11722+ * This program, aufs is free software; you can redistribute it and/or modify
11723+ * it under the terms of the GNU General Public License as published by
11724+ * the Free Software Foundation; either version 2 of the License, or
11725+ * (at your option) any later version.
11726+ *
11727+ * This program is distributed in the hope that it will be useful,
11728+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11729+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11730+ * GNU General Public License for more details.
11731+ *
11732+ * You should have received a copy of the GNU General Public License
11733+ * along with this program; if not, write to the Free Software
11734+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11735+ */
dece6358 11736+
1308ab2a 11737+/*
4a4d8108
AM
11738+ * file operations for special files.
11739+ * while they exist in aufs virtually,
11740+ * their file I/O is handled out of aufs.
1308ab2a 11741+ */
11742+
4a4d8108
AM
11743+#include <linux/fs_stack.h>
11744+#include "aufs.h"
1308ab2a 11745+
4a4d8108
AM
11746+static ssize_t aufs_aio_read_sp(struct kiocb *kio, const struct iovec *iov,
11747+ unsigned long nv, loff_t pos)
dece6358 11748+{
4a4d8108
AM
11749+ ssize_t err;
11750+ aufs_bindex_t bstart;
11751+ unsigned char wbr;
11752+ struct file *file, *h_file;
11753+ struct super_block *sb;
1308ab2a 11754+
4a4d8108
AM
11755+ file = kio->ki_filp;
11756+ sb = file->f_dentry->d_sb;
11757+ si_read_lock(sb, AuLock_FLUSH);
11758+ fi_read_lock(file);
11759+ bstart = au_fbstart(file);
11760+ h_file = au_hf_top(file);
11761+ fi_read_unlock(file);
11762+ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
11763+ si_read_unlock(sb);
11764+
11765+ /* do not change the file in kio */
11766+ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_read);
11767+ err = h_file->f_op->aio_read(kio, iov, nv, pos);
11768+ if (err > 0 && wbr)
11769+ file_accessed(h_file);
11770+
11771+ return err;
11772+}
11773+
11774+static ssize_t aufs_aio_write_sp(struct kiocb *kio, const struct iovec *iov,
11775+ unsigned long nv, loff_t pos)
11776+{
11777+ ssize_t err;
11778+ aufs_bindex_t bstart;
11779+ unsigned char wbr;
11780+ struct super_block *sb;
11781+ struct file *file, *h_file;
11782+
11783+ file = kio->ki_filp;
11784+ sb = file->f_dentry->d_sb;
11785+ si_read_lock(sb, AuLock_FLUSH);
11786+ fi_read_lock(file);
11787+ bstart = au_fbstart(file);
11788+ h_file = au_hf_top(file);
11789+ fi_read_unlock(file);
11790+ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
11791+ si_read_unlock(sb);
11792+
11793+ /* do not change the file in kio */
11794+ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_write);
11795+ err = h_file->f_op->aio_write(kio, iov, nv, pos);
11796+ if (err > 0 && wbr)
11797+ file_update_time(h_file);
11798+
11799+ return err;
11800+}
11801+
11802+/* ---------------------------------------------------------------------- */
11803+
11804+static int aufs_release_sp(struct inode *inode, struct file *file)
11805+{
11806+ int err;
11807+ struct file *h_file;
11808+
11809+ fi_read_lock(file);
11810+ h_file = au_hf_top(file);
11811+ fi_read_unlock(file);
11812+ /* close this fifo in aufs */
11813+ err = h_file->f_op->release(inode, file); /* ignore */
11814+ aufs_release_nondir(inode, file); /* ignore */
11815+ return err;
11816+}
11817+
11818+/* ---------------------------------------------------------------------- */
11819+
11820+/* currently, support only FIFO */
4f0767ce
JR
11821+enum {
11822+ AuSp_FIFO, AuSp_FIFO_R, AuSp_FIFO_W, AuSp_FIFO_RW,
11823+ /* AuSp_SOCK, AuSp_CHR, AuSp_BLK, */
11824+ AuSp_Last
11825+};
4a4d8108
AM
11826+static int aufs_open_sp(struct inode *inode, struct file *file);
11827+static struct au_sp_fop {
11828+ int done;
11829+ struct file_operations fop; /* not 'const' */
11830+ spinlock_t spin;
11831+} au_sp_fop[AuSp_Last] = {
11832+ [AuSp_FIFO] = {
11833+ .fop = {
11834+ .owner = THIS_MODULE,
11835+ .open = aufs_open_sp
11836+ }
11837+ }
11838+};
11839+
11840+static void au_init_fop_sp(struct file *file)
11841+{
11842+ struct au_sp_fop *p;
11843+ int i;
11844+ struct file *h_file;
11845+
11846+ p = au_sp_fop;
11847+ if (unlikely(!p->done)) {
11848+ /* initialize first time only */
11849+ static DEFINE_SPINLOCK(spin);
11850+
11851+ spin_lock(&spin);
11852+ if (!p->done) {
11853+ BUILD_BUG_ON(sizeof(au_sp_fop)/sizeof(*au_sp_fop)
11854+ != AuSp_Last);
11855+ for (i = 0; i < AuSp_Last; i++)
11856+ spin_lock_init(&p[i].spin);
11857+ p->done = 1;
11858+ }
11859+ spin_unlock(&spin);
11860+ }
11861+
11862+ switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
11863+ case FMODE_READ:
11864+ i = AuSp_FIFO_R;
11865+ break;
11866+ case FMODE_WRITE:
11867+ i = AuSp_FIFO_W;
11868+ break;
11869+ case FMODE_READ | FMODE_WRITE:
11870+ i = AuSp_FIFO_RW;
11871+ break;
11872+ default:
11873+ BUG();
11874+ }
11875+
11876+ p += i;
11877+ if (unlikely(!p->done)) {
11878+ /* initialize first time only */
11879+ h_file = au_hf_top(file);
11880+ spin_lock(&p->spin);
11881+ if (!p->done) {
11882+ p->fop = *h_file->f_op;
11883+ p->fop.owner = THIS_MODULE;
11884+ if (p->fop.aio_read)
11885+ p->fop.aio_read = aufs_aio_read_sp;
11886+ if (p->fop.aio_write)
11887+ p->fop.aio_write = aufs_aio_write_sp;
11888+ p->fop.release = aufs_release_sp;
11889+ p->done = 1;
11890+ }
11891+ spin_unlock(&p->spin);
11892+ }
11893+ file->f_op = &p->fop;
11894+}
11895+
11896+static int au_cpup_sp(struct dentry *dentry)
11897+{
11898+ int err;
11899+ aufs_bindex_t bcpup;
11900+ struct au_pin pin;
11901+ struct au_wr_dir_args wr_dir_args = {
11902+ .force_btgt = -1,
11903+ .flags = 0
11904+ };
11905+
11906+ AuDbg("%.*s\n", AuDLNPair(dentry));
11907+
11908+ di_read_unlock(dentry, AuLock_IR);
11909+ di_write_lock_child(dentry);
11910+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
11911+ if (unlikely(err < 0))
11912+ goto out;
11913+ bcpup = err;
11914+ err = 0;
11915+ if (bcpup == au_dbstart(dentry))
11916+ goto out; /* success */
11917+
11918+ err = au_pin(&pin, dentry, bcpup, au_opt_udba(dentry->d_sb),
11919+ AuPin_MNT_WRITE);
11920+ if (!err) {
11921+ err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME);
11922+ au_unpin(&pin);
11923+ }
11924+
4f0767ce 11925+out:
4a4d8108
AM
11926+ di_downgrade_lock(dentry, AuLock_IR);
11927+ return err;
11928+}
11929+
11930+static int au_do_open_sp(struct file *file, int flags)
11931+{
11932+ int err;
11933+ struct dentry *dentry;
11934+ struct super_block *sb;
11935+ struct file *h_file;
11936+ struct inode *h_inode;
11937+
11938+ dentry = file->f_dentry;
11939+ AuDbg("%.*s\n", AuDLNPair(dentry));
11940+
11941+ /*
11942+ * try copying-up.
11943+ * operate on the ro branch is not an error.
11944+ */
11945+ au_cpup_sp(dentry); /* ignore */
11946+
11947+ /* prepare h_file */
11948+ err = au_do_open_nondir(file, vfsub_file_flags(file));
11949+ if (unlikely(err))
11950+ goto out;
11951+
11952+ sb = dentry->d_sb;
11953+ h_file = au_hf_top(file);
11954+ h_inode = h_file->f_dentry->d_inode;
11955+ di_read_unlock(dentry, AuLock_IR);
11956+ fi_write_unlock(file);
11957+ si_read_unlock(sb);
11958+ /* open this fifo in aufs */
11959+ err = h_inode->i_fop->open(file->f_dentry->d_inode, file);
11960+ si_noflush_read_lock(sb);
11961+ fi_write_lock(file);
11962+ di_read_lock_child(dentry, AuLock_IR);
11963+ if (!err)
11964+ au_init_fop_sp(file);
4a4d8108 11965+
4f0767ce 11966+out:
4a4d8108
AM
11967+ return err;
11968+}
11969+
11970+static int aufs_open_sp(struct inode *inode, struct file *file)
11971+{
11972+ int err;
11973+ struct super_block *sb;
11974+
11975+ sb = file->f_dentry->d_sb;
11976+ si_read_lock(sb, AuLock_FLUSH);
11977+ err = au_do_open(file, au_do_open_sp, /*fidir*/NULL);
11978+ si_read_unlock(sb);
11979+ return err;
11980+}
11981+
11982+/* ---------------------------------------------------------------------- */
11983+
11984+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev)
11985+{
11986+ init_special_inode(inode, mode, rdev);
11987+
11988+ switch (mode & S_IFMT) {
11989+ case S_IFIFO:
11990+ inode->i_fop = &au_sp_fop[AuSp_FIFO].fop;
11991+ /*FALLTHROUGH*/
11992+ case S_IFCHR:
11993+ case S_IFBLK:
11994+ case S_IFSOCK:
11995+ break;
11996+ default:
11997+ AuDebugOn(1);
11998+ }
11999+}
12000+
12001+int au_special_file(umode_t mode)
12002+{
12003+ int ret;
12004+
12005+ ret = 0;
12006+ switch (mode & S_IFMT) {
12007+ case S_IFIFO:
12008+#if 0
12009+ case S_IFCHR:
12010+ case S_IFBLK:
12011+ case S_IFSOCK:
12012+#endif
12013+ ret = 1;
12014+ }
12015+
12016+ return ret;
12017+}
7f207e10
AM
12018diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
12019--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
53392da6 12020+++ linux/fs/aufs/fstype.h 2011-08-24 13:30:24.731313534 +0200
4a4d8108
AM
12021@@ -0,0 +1,497 @@
12022+/*
027c5e7a 12023+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
12024+ *
12025+ * This program, aufs is free software; you can redistribute it and/or modify
12026+ * it under the terms of the GNU General Public License as published by
12027+ * the Free Software Foundation; either version 2 of the License, or
12028+ * (at your option) any later version.
12029+ *
12030+ * This program is distributed in the hope that it will be useful,
12031+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12032+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12033+ * GNU General Public License for more details.
12034+ *
12035+ * You should have received a copy of the GNU General Public License
12036+ * along with this program; if not, write to the Free Software
12037+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12038+ */
12039+
12040+/*
12041+ * judging filesystem type
12042+ */
12043+
12044+#ifndef __AUFS_FSTYPE_H__
12045+#define __AUFS_FSTYPE_H__
12046+
12047+#ifdef __KERNEL__
12048+
12049+#include <linux/fs.h>
12050+#include <linux/magic.h>
12051+#include <linux/romfs_fs.h>
12052+#include <linux/aufs_type.h>
12053+
12054+static inline int au_test_aufs(struct super_block *sb)
12055+{
12056+ return sb->s_magic == AUFS_SUPER_MAGIC;
12057+}
12058+
12059+static inline const char *au_sbtype(struct super_block *sb)
12060+{
12061+ return sb->s_type->name;
12062+}
1308ab2a 12063+
12064+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
12065+{
12066+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
12067+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
12068+#else
12069+ return 0;
12070+#endif
12071+}
12072+
1308ab2a 12073+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 12074+{
1308ab2a 12075+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
12076+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
12077+#else
12078+ return 0;
12079+#endif
12080+}
12081+
1308ab2a 12082+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 12083+{
1308ab2a 12084+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
12085+ return sb->s_magic == CRAMFS_MAGIC;
12086+#endif
12087+ return 0;
12088+}
12089+
12090+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
12091+{
12092+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
12093+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
12094+#else
12095+ return 0;
12096+#endif
12097+}
12098+
1308ab2a 12099+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 12100+{
1308ab2a 12101+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
12102+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
12103+#else
12104+ return 0;
12105+#endif
12106+}
12107+
1308ab2a 12108+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 12109+{
1308ab2a 12110+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
12111+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
12112+#else
12113+ return 0;
12114+#endif
12115+}
12116+
1308ab2a 12117+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 12118+{
1308ab2a 12119+#ifdef CONFIG_TMPFS
12120+ return sb->s_magic == TMPFS_MAGIC;
12121+#else
12122+ return 0;
dece6358 12123+#endif
dece6358
AM
12124+}
12125+
1308ab2a 12126+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 12127+{
1308ab2a 12128+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
12129+ return !strcmp(au_sbtype(sb), "ecryptfs");
12130+#else
12131+ return 0;
12132+#endif
1facf9fc 12133+}
12134+
1308ab2a 12135+static inline int au_test_smbfs(struct super_block *sb __maybe_unused)
1facf9fc 12136+{
1308ab2a 12137+#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE)
12138+ return sb->s_magic == SMB_SUPER_MAGIC;
12139+#else
12140+ return 0;
1facf9fc 12141+#endif
1facf9fc 12142+}
12143+
1308ab2a 12144+static inline int au_test_ocfs2(struct super_block *sb __maybe_unused)
1facf9fc 12145+{
1308ab2a 12146+#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE)
12147+ return sb->s_magic == OCFS2_SUPER_MAGIC;
12148+#else
12149+ return 0;
12150+#endif
1facf9fc 12151+}
12152+
1308ab2a 12153+static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused)
1facf9fc 12154+{
1308ab2a 12155+#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE)
12156+ return sb->s_magic == DLMFS_MAGIC;
12157+#else
12158+ return 0;
12159+#endif
1facf9fc 12160+}
12161+
1308ab2a 12162+static inline int au_test_coda(struct super_block *sb __maybe_unused)
1facf9fc 12163+{
1308ab2a 12164+#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE)
12165+ return sb->s_magic == CODA_SUPER_MAGIC;
12166+#else
12167+ return 0;
12168+#endif
12169+}
12170+
12171+static inline int au_test_v9fs(struct super_block *sb __maybe_unused)
12172+{
12173+#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE)
12174+ return sb->s_magic == V9FS_MAGIC;
12175+#else
12176+ return 0;
12177+#endif
12178+}
12179+
12180+static inline int au_test_ext4(struct super_block *sb __maybe_unused)
12181+{
12182+#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE)
12183+ return sb->s_magic == EXT4_SUPER_MAGIC;
12184+#else
12185+ return 0;
12186+#endif
12187+}
12188+
12189+static inline int au_test_sysv(struct super_block *sb __maybe_unused)
12190+{
12191+#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE)
12192+ return !strcmp(au_sbtype(sb), "sysv");
12193+#else
12194+ return 0;
12195+#endif
12196+}
12197+
12198+static inline int au_test_ramfs(struct super_block *sb)
12199+{
12200+ return sb->s_magic == RAMFS_MAGIC;
12201+}
12202+
12203+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
12204+{
12205+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
12206+ return sb->s_magic == UBIFS_SUPER_MAGIC;
12207+#else
12208+ return 0;
12209+#endif
12210+}
12211+
12212+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
12213+{
12214+#ifdef CONFIG_PROC_FS
12215+ return sb->s_magic == PROC_SUPER_MAGIC;
12216+#else
12217+ return 0;
12218+#endif
12219+}
12220+
12221+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
12222+{
12223+#ifdef CONFIG_SYSFS
12224+ return sb->s_magic == SYSFS_MAGIC;
12225+#else
12226+ return 0;
12227+#endif
12228+}
12229+
12230+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
12231+{
12232+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
12233+ return sb->s_magic == CONFIGFS_MAGIC;
12234+#else
12235+ return 0;
12236+#endif
12237+}
12238+
12239+static inline int au_test_minix(struct super_block *sb __maybe_unused)
12240+{
12241+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
12242+ return sb->s_magic == MINIX3_SUPER_MAGIC
12243+ || sb->s_magic == MINIX2_SUPER_MAGIC
12244+ || sb->s_magic == MINIX2_SUPER_MAGIC2
12245+ || sb->s_magic == MINIX_SUPER_MAGIC
12246+ || sb->s_magic == MINIX_SUPER_MAGIC2;
12247+#else
12248+ return 0;
12249+#endif
12250+}
12251+
12252+static inline int au_test_cifs(struct super_block *sb __maybe_unused)
12253+{
12254+#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE)
12255+ return sb->s_magic == CIFS_MAGIC_NUMBER;
12256+#else
12257+ return 0;
12258+#endif
12259+}
12260+
12261+static inline int au_test_fat(struct super_block *sb __maybe_unused)
12262+{
12263+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
12264+ return sb->s_magic == MSDOS_SUPER_MAGIC;
12265+#else
12266+ return 0;
12267+#endif
12268+}
12269+
12270+static inline int au_test_msdos(struct super_block *sb)
12271+{
12272+ return au_test_fat(sb);
12273+}
12274+
12275+static inline int au_test_vfat(struct super_block *sb)
12276+{
12277+ return au_test_fat(sb);
12278+}
12279+
12280+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
12281+{
12282+#ifdef CONFIG_SECURITYFS
12283+ return sb->s_magic == SECURITYFS_MAGIC;
12284+#else
12285+ return 0;
12286+#endif
12287+}
12288+
12289+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
12290+{
12291+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
12292+ return sb->s_magic == SQUASHFS_MAGIC;
12293+#else
12294+ return 0;
12295+#endif
12296+}
12297+
12298+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
12299+{
12300+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
12301+ return sb->s_magic == BTRFS_SUPER_MAGIC;
12302+#else
12303+ return 0;
12304+#endif
12305+}
12306+
12307+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
12308+{
12309+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
12310+ return sb->s_magic == XENFS_SUPER_MAGIC;
12311+#else
12312+ return 0;
12313+#endif
12314+}
12315+
12316+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
12317+{
12318+#ifdef CONFIG_DEBUG_FS
12319+ return sb->s_magic == DEBUGFS_MAGIC;
12320+#else
12321+ return 0;
12322+#endif
12323+}
12324+
12325+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
12326+{
12327+#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
12328+ return sb->s_magic == NILFS_SUPER_MAGIC;
12329+#else
12330+ return 0;
12331+#endif
12332+}
12333+
4a4d8108
AM
12334+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
12335+{
12336+#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE)
12337+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
12338+#else
12339+ return 0;
12340+#endif
12341+}
12342+
1308ab2a 12343+/* ---------------------------------------------------------------------- */
12344+/*
12345+ * they can't be an aufs branch.
12346+ */
12347+static inline int au_test_fs_unsuppoted(struct super_block *sb)
12348+{
12349+ return
12350+#ifndef CONFIG_AUFS_BR_RAMFS
12351+ au_test_ramfs(sb) ||
12352+#endif
12353+ au_test_procfs(sb)
12354+ || au_test_sysfs(sb)
12355+ || au_test_configfs(sb)
12356+ || au_test_debugfs(sb)
12357+ || au_test_securityfs(sb)
12358+ || au_test_xenfs(sb)
12359+ || au_test_ecryptfs(sb)
12360+ /* || !strcmp(au_sbtype(sb), "unionfs") */
12361+ || au_test_aufs(sb); /* will be supported in next version */
12362+}
12363+
12364+/*
12365+ * If the filesystem supports NFS-export, then it has to support NULL as
12366+ * a nameidata parameter for ->create(), ->lookup() and ->d_revalidate().
12367+ * We can apply this principle when we handle a lower filesystem.
12368+ */
12369+static inline int au_test_fs_null_nd(struct super_block *sb)
12370+{
12371+ return !!sb->s_export_op;
12372+}
12373+
12374+static inline int au_test_fs_remote(struct super_block *sb)
12375+{
12376+ return !au_test_tmpfs(sb)
12377+#ifdef CONFIG_AUFS_BR_RAMFS
12378+ && !au_test_ramfs(sb)
12379+#endif
12380+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
12381+}
12382+
12383+/* ---------------------------------------------------------------------- */
12384+
12385+/*
12386+ * Note: these functions (below) are created after reading ->getattr() in all
12387+ * filesystems under linux/fs. it means we have to do so in every update...
12388+ */
12389+
12390+/*
12391+ * some filesystems require getattr to refresh the inode attributes before
12392+ * referencing.
12393+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
12394+ * and leave the work for d_revalidate()
12395+ */
12396+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
12397+{
12398+ return au_test_nfs(sb)
12399+ || au_test_fuse(sb)
12400+ /* || au_test_smbfs(sb) */ /* untested */
12401+ /* || au_test_ocfs2(sb) */ /* untested */
12402+ /* || au_test_btrfs(sb) */ /* untested */
12403+ /* || au_test_coda(sb) */ /* untested */
12404+ /* || au_test_v9fs(sb) */ /* untested */
12405+ ;
12406+}
12407+
12408+/*
12409+ * filesystems which don't maintain i_size or i_blocks.
12410+ */
12411+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
12412+{
12413+ return au_test_xfs(sb)
4a4d8108
AM
12414+ || au_test_btrfs(sb)
12415+ || au_test_ubifs(sb)
12416+ || au_test_hfsplus(sb) /* maintained, but incorrect */
1308ab2a 12417+ /* || au_test_ext4(sb) */ /* untested */
12418+ /* || au_test_ocfs2(sb) */ /* untested */
12419+ /* || au_test_ocfs2_dlmfs(sb) */ /* untested */
12420+ /* || au_test_sysv(sb) */ /* untested */
1308ab2a 12421+ /* || au_test_minix(sb) */ /* untested */
12422+ ;
12423+}
12424+
12425+/*
12426+ * filesystems which don't store the correct value in some of their inode
12427+ * attributes.
12428+ */
12429+static inline int au_test_fs_bad_iattr(struct super_block *sb)
12430+{
12431+ return au_test_fs_bad_iattr_size(sb)
12432+ /* || au_test_cifs(sb) */ /* untested */
12433+ || au_test_fat(sb)
12434+ || au_test_msdos(sb)
12435+ || au_test_vfat(sb);
1facf9fc 12436+}
12437+
12438+/* they don't check i_nlink in link(2) */
12439+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
12440+{
12441+ return au_test_tmpfs(sb)
12442+#ifdef CONFIG_AUFS_BR_RAMFS
12443+ || au_test_ramfs(sb)
12444+#endif
4a4d8108
AM
12445+ || au_test_ubifs(sb)
12446+ || au_test_btrfs(sb)
12447+ || au_test_hfsplus(sb);
1facf9fc 12448+}
12449+
12450+/*
12451+ * filesystems which sets S_NOATIME and S_NOCMTIME.
12452+ */
12453+static inline int au_test_fs_notime(struct super_block *sb)
12454+{
12455+ return au_test_nfs(sb)
12456+ || au_test_fuse(sb)
dece6358 12457+ || au_test_ubifs(sb)
1facf9fc 12458+ /* || au_test_cifs(sb) */ /* untested */
1facf9fc 12459+ ;
12460+}
12461+
12462+/*
12463+ * filesystems which requires replacing i_mapping.
12464+ */
12465+static inline int au_test_fs_bad_mapping(struct super_block *sb)
12466+{
dece6358
AM
12467+ return au_test_fuse(sb)
12468+ || au_test_ubifs(sb);
1facf9fc 12469+}
12470+
12471+/* temporary support for i#1 in cramfs */
12472+static inline int au_test_fs_unique_ino(struct inode *inode)
12473+{
12474+ if (au_test_cramfs(inode->i_sb))
12475+ return inode->i_ino != 1;
12476+ return 1;
12477+}
12478+
12479+/* ---------------------------------------------------------------------- */
12480+
12481+/*
12482+ * the filesystem where the xino files placed must support i/o after unlink and
12483+ * maintain i_size and i_blocks.
12484+ */
12485+static inline int au_test_fs_bad_xino(struct super_block *sb)
12486+{
12487+ return au_test_fs_remote(sb)
12488+ || au_test_fs_bad_iattr_size(sb)
12489+#ifdef CONFIG_AUFS_BR_RAMFS
12490+ || !(au_test_ramfs(sb) || au_test_fs_null_nd(sb))
12491+#else
12492+ || !au_test_fs_null_nd(sb) /* to keep xino code simple */
12493+#endif
12494+ /* don't want unnecessary work for xino */
12495+ || au_test_aufs(sb)
1308ab2a 12496+ || au_test_ecryptfs(sb)
12497+ || au_test_nilfs(sb);
1facf9fc 12498+}
12499+
12500+static inline int au_test_fs_trunc_xino(struct super_block *sb)
12501+{
12502+ return au_test_tmpfs(sb)
12503+ || au_test_ramfs(sb);
12504+}
12505+
12506+/*
12507+ * test if the @sb is real-readonly.
12508+ */
12509+static inline int au_test_fs_rr(struct super_block *sb)
12510+{
12511+ return au_test_squashfs(sb)
12512+ || au_test_iso9660(sb)
12513+ || au_test_cramfs(sb)
12514+ || au_test_romfs(sb);
12515+}
12516+
12517+#endif /* __KERNEL__ */
12518+#endif /* __AUFS_FSTYPE_H__ */
7f207e10
AM
12519diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
12520--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
53392da6 12521+++ linux/fs/aufs/hfsnotify.c 2011-08-24 13:30:24.731313534 +0200
027c5e7a 12522@@ -0,0 +1,247 @@
1facf9fc 12523+/*
027c5e7a 12524+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 12525+ *
12526+ * This program, aufs is free software; you can redistribute it and/or modify
12527+ * it under the terms of the GNU General Public License as published by
12528+ * the Free Software Foundation; either version 2 of the License, or
12529+ * (at your option) any later version.
dece6358
AM
12530+ *
12531+ * This program is distributed in the hope that it will be useful,
12532+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12533+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12534+ * GNU General Public License for more details.
12535+ *
12536+ * You should have received a copy of the GNU General Public License
12537+ * along with this program; if not, write to the Free Software
12538+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 12539+ */
12540+
12541+/*
4a4d8108 12542+ * fsnotify for the lower directories
1facf9fc 12543+ */
12544+
12545+#include "aufs.h"
12546+
4a4d8108
AM
12547+/* FS_IN_IGNORED is unnecessary */
12548+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
12549+ | FS_CREATE | FS_EVENT_ON_CHILD);
7f207e10 12550+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
1facf9fc 12551+
0c5527e5 12552+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
1facf9fc 12553+{
0c5527e5
AM
12554+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
12555+ hn_mark);
4a4d8108 12556+ AuDbg("here\n");
7f207e10
AM
12557+ hn->hn_mark_dead = 1;
12558+ smp_mb();
12559+ wake_up_all(&au_hfsn_wq);
4a4d8108 12560+}
1facf9fc 12561+
027c5e7a 12562+static int au_hfsn_alloc(struct au_hinode *hinode)
4a4d8108 12563+{
027c5e7a
AM
12564+ struct au_hnotify *hn;
12565+ struct super_block *sb;
12566+ struct au_branch *br;
0c5527e5 12567+ struct fsnotify_mark *mark;
027c5e7a 12568+ aufs_bindex_t bindex;
1facf9fc 12569+
027c5e7a
AM
12570+ hn = hinode->hi_notify;
12571+ sb = hn->hn_aufs_inode->i_sb;
12572+ bindex = au_br_index(sb, hinode->hi_id);
12573+ br = au_sbr(sb, bindex);
7f207e10 12574+ hn->hn_mark_dead = 0;
0c5527e5
AM
12575+ mark = &hn->hn_mark;
12576+ fsnotify_init_mark(mark, au_hfsn_free_mark);
12577+ mark->mask = AuHfsnMask;
7f207e10
AM
12578+ /*
12579+ * by udba rename or rmdir, aufs assign a new inode to the known
12580+ * h_inode, so specify 1 to allow dups.
12581+ */
027c5e7a
AM
12582+ return fsnotify_add_mark(mark, br->br_hfsn_group, hinode->hi_inode,
12583+ /*mnt*/NULL, /*allow_dups*/1);
1facf9fc 12584+}
12585+
027c5e7a 12586+static void au_hfsn_free(struct au_hinode *hinode)
1facf9fc 12587+{
027c5e7a 12588+ struct au_hnotify *hn;
0c5527e5 12589+ struct fsnotify_mark *mark;
953406b4 12590+
027c5e7a 12591+ hn = hinode->hi_notify;
0c5527e5
AM
12592+ mark = &hn->hn_mark;
12593+ fsnotify_destroy_mark(mark);
12594+ fsnotify_put_mark(mark);
7f207e10
AM
12595+
12596+ /* TODO: bad approach */
12597+ wait_event(au_hfsn_wq, hn->hn_mark_dead);
1facf9fc 12598+}
12599+
12600+/* ---------------------------------------------------------------------- */
12601+
4a4d8108 12602+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
1facf9fc 12603+{
0c5527e5 12604+ struct fsnotify_mark *mark;
1facf9fc 12605+
0c5527e5
AM
12606+ mark = &hinode->hi_notify->hn_mark;
12607+ spin_lock(&mark->lock);
1facf9fc 12608+ if (do_set) {
0c5527e5
AM
12609+ AuDebugOn(mark->mask & AuHfsnMask);
12610+ mark->mask |= AuHfsnMask;
1facf9fc 12611+ } else {
0c5527e5
AM
12612+ AuDebugOn(!(mark->mask & AuHfsnMask));
12613+ mark->mask &= ~AuHfsnMask;
1facf9fc 12614+ }
0c5527e5 12615+ spin_unlock(&mark->lock);
4a4d8108 12616+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
1facf9fc 12617+}
12618+
4a4d8108 12619+/* ---------------------------------------------------------------------- */
1facf9fc 12620+
4a4d8108
AM
12621+/* #define AuDbgHnotify */
12622+#ifdef AuDbgHnotify
12623+static char *au_hfsn_name(u32 mask)
12624+{
12625+#ifdef CONFIG_AUFS_DEBUG
12626+#define test_ret(flag) if (mask & flag) \
12627+ return #flag;
12628+ test_ret(FS_ACCESS);
12629+ test_ret(FS_MODIFY);
12630+ test_ret(FS_ATTRIB);
12631+ test_ret(FS_CLOSE_WRITE);
12632+ test_ret(FS_CLOSE_NOWRITE);
12633+ test_ret(FS_OPEN);
12634+ test_ret(FS_MOVED_FROM);
12635+ test_ret(FS_MOVED_TO);
12636+ test_ret(FS_CREATE);
12637+ test_ret(FS_DELETE);
12638+ test_ret(FS_DELETE_SELF);
12639+ test_ret(FS_MOVE_SELF);
12640+ test_ret(FS_UNMOUNT);
12641+ test_ret(FS_Q_OVERFLOW);
12642+ test_ret(FS_IN_IGNORED);
12643+ test_ret(FS_IN_ISDIR);
12644+ test_ret(FS_IN_ONESHOT);
12645+ test_ret(FS_EVENT_ON_CHILD);
12646+ return "";
12647+#undef test_ret
12648+#else
12649+ return "??";
12650+#endif
1facf9fc 12651+}
4a4d8108 12652+#endif
1facf9fc 12653+
12654+/* ---------------------------------------------------------------------- */
12655+
4a4d8108 12656+static int au_hfsn_handle_event(struct fsnotify_group *group,
0c5527e5
AM
12657+ struct fsnotify_mark *inode_mark,
12658+ struct fsnotify_mark *vfsmount_mark,
4a4d8108 12659+ struct fsnotify_event *event)
1facf9fc 12660+{
12661+ int err;
4a4d8108
AM
12662+ struct au_hnotify *hnotify;
12663+ struct inode *h_dir, *h_inode;
12664+ __u32 mask;
4a4d8108
AM
12665+ struct qstr h_child_qstr = {
12666+ .name = event->file_name,
12667+ .len = event->name_len
12668+ };
12669+
12670+ AuDebugOn(event->data_type != FSNOTIFY_EVENT_INODE);
1facf9fc 12671+
12672+ err = 0;
0c5527e5 12673+ /* if FS_UNMOUNT happens, there must be another bug */
4a4d8108
AM
12674+ mask = event->mask;
12675+ AuDebugOn(mask & FS_UNMOUNT);
0c5527e5 12676+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
1facf9fc 12677+ goto out;
1facf9fc 12678+
4a4d8108
AM
12679+ h_dir = event->to_tell;
12680+ h_inode = event->inode;
12681+#ifdef AuDbgHnotify
12682+ au_debug(1);
12683+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
12684+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
12685+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
12686+ h_dir->i_ino, mask, au_hfsn_name(mask),
12687+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
12688+ /* WARN_ON(1); */
1facf9fc 12689+ }
4a4d8108 12690+ au_debug(0);
1facf9fc 12691+#endif
4a4d8108 12692+
0c5527e5
AM
12693+ AuDebugOn(!inode_mark);
12694+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
12695+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
1facf9fc 12696+
4a4d8108
AM
12697+out:
12698+ return err;
12699+}
1facf9fc 12700+
027c5e7a 12701+/* isn't it waste to ask every registered 'group'? */
7f207e10 12702+/* copied from linux/fs/notify/inotify/inotify_fsnotiry.c */
4a4d8108 12703+/* it should be exported to modules */
7f207e10
AM
12704+static bool au_hfsn_should_send_event(struct fsnotify_group *group,
12705+ struct inode *h_inode,
0c5527e5
AM
12706+ struct fsnotify_mark *inode_mark,
12707+ struct fsnotify_mark *vfsmount_mark,
12708+ __u32 mask, void *data, int data_type)
4a4d8108 12709+{
4a4d8108 12710+ mask = (mask & ~FS_EVENT_ON_CHILD);
7f207e10 12711+ return inode_mark->mask & mask;
4a4d8108
AM
12712+}
12713+
12714+static struct fsnotify_ops au_hfsn_ops = {
12715+ .should_send_event = au_hfsn_should_send_event,
12716+ .handle_event = au_hfsn_handle_event
12717+};
12718+
12719+/* ---------------------------------------------------------------------- */
12720+
027c5e7a
AM
12721+static void au_hfsn_fin_br(struct au_branch *br)
12722+{
12723+ if (br->br_hfsn_group)
12724+ fsnotify_put_group(br->br_hfsn_group);
12725+}
12726+
12727+static int au_hfsn_init_br(struct au_branch *br, int perm)
12728+{
12729+ br->br_hfsn_group = NULL;
12730+ br->br_hfsn_ops = au_hfsn_ops;
12731+ return 0;
12732+}
12733+
12734+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
4a4d8108
AM
12735+{
12736+ int err;
1facf9fc 12737+
4a4d8108 12738+ err = 0;
027c5e7a
AM
12739+ if (udba != AuOpt_UDBA_HNOTIFY
12740+ || !au_br_hnotifyable(perm)) {
12741+ au_hfsn_fin_br(br);
12742+ br->br_hfsn_group = NULL;
12743+ goto out;
12744+ }
12745+
12746+ if (br->br_hfsn_group)
12747+ goto out;
12748+
12749+ br->br_hfsn_group = fsnotify_alloc_group(&br->br_hfsn_ops);
12750+ if (IS_ERR(br->br_hfsn_group)) {
12751+ err = PTR_ERR(br->br_hfsn_group);
0c5527e5 12752+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
027c5e7a 12753+ br->br_hfsn_group = NULL;
4a4d8108 12754+ }
1facf9fc 12755+
027c5e7a 12756+out:
1facf9fc 12757+ AuTraceErr(err);
12758+ return err;
12759+}
12760+
4a4d8108
AM
12761+const struct au_hnotify_op au_hnotify_op = {
12762+ .ctl = au_hfsn_ctl,
12763+ .alloc = au_hfsn_alloc,
12764+ .free = au_hfsn_free,
1facf9fc 12765+
027c5e7a
AM
12766+ .reset_br = au_hfsn_reset_br,
12767+ .fin_br = au_hfsn_fin_br,
12768+ .init_br = au_hfsn_init_br
4a4d8108 12769+};
7f207e10
AM
12770diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
12771--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
53392da6 12772+++ linux/fs/aufs/hfsplus.c 2011-08-24 13:30:24.731313534 +0200
4a4d8108
AM
12773@@ -0,0 +1,58 @@
12774+/*
027c5e7a 12775+ * Copyright (C) 2010-2011 Junjiro R. Okajima
4a4d8108
AM
12776+ *
12777+ * This program, aufs is free software; you can redistribute it and/or modify
12778+ * it under the terms of the GNU General Public License as published by
12779+ * the Free Software Foundation; either version 2 of the License, or
12780+ * (at your option) any later version.
12781+ *
12782+ * This program is distributed in the hope that it will be useful,
12783+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12784+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12785+ * GNU General Public License for more details.
12786+ *
12787+ * You should have received a copy of the GNU General Public License
12788+ * along with this program; if not, write to the Free Software
12789+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12790+ */
1facf9fc 12791+
4a4d8108
AM
12792+/*
12793+ * special support for filesystems which aqucires an inode mutex
12794+ * at final closing a file, eg, hfsplus.
12795+ *
12796+ * This trick is very simple and stupid, just to open the file before really
12797+ * neceeary open to tell hfsplus that this is not the final closing.
12798+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
12799+ * and au_h_open_post() after releasing it.
12800+ */
1facf9fc 12801+
4a4d8108
AM
12802+#include <linux/file.h>
12803+#include "aufs.h"
1facf9fc 12804+
4a4d8108
AM
12805+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
12806+{
12807+ struct file *h_file;
12808+ struct dentry *h_dentry;
1facf9fc 12809+
4a4d8108
AM
12810+ h_dentry = au_h_dptr(dentry, bindex);
12811+ AuDebugOn(!h_dentry);
12812+ AuDebugOn(!h_dentry->d_inode);
12813+ IMustLock(h_dentry->d_inode);
12814+
12815+ h_file = NULL;
12816+ if (au_test_hfsplus(h_dentry->d_sb)
12817+ && S_ISREG(h_dentry->d_inode->i_mode))
12818+ h_file = au_h_open(dentry, bindex,
12819+ O_RDONLY | O_NOATIME | O_LARGEFILE,
12820+ /*file*/NULL);
12821+ return h_file;
1facf9fc 12822+}
12823+
4a4d8108
AM
12824+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
12825+ struct file *h_file)
12826+{
12827+ if (h_file) {
12828+ fput(h_file);
12829+ au_sbr_put(dentry->d_sb, bindex);
12830+ }
12831+}
7f207e10
AM
12832diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
12833--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
12834+++ linux/fs/aufs/hnotify.c 2011-08-24 13:30:24.731313534 +0200
12835@@ -0,0 +1,712 @@
e49829fe 12836+/*
027c5e7a 12837+ * Copyright (C) 2005-2011 Junjiro R. Okajima
e49829fe
JR
12838+ *
12839+ * This program, aufs is free software; you can redistribute it and/or modify
12840+ * it under the terms of the GNU General Public License as published by
12841+ * the Free Software Foundation; either version 2 of the License, or
12842+ * (at your option) any later version.
12843+ *
12844+ * This program is distributed in the hope that it will be useful,
12845+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12846+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12847+ * GNU General Public License for more details.
12848+ *
12849+ * You should have received a copy of the GNU General Public License
12850+ * along with this program; if not, write to the Free Software
12851+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12852+ */
12853+
12854+/*
7f207e10 12855+ * abstraction to notify the direct changes on lower directories
e49829fe
JR
12856+ */
12857+
12858+#include "aufs.h"
12859+
027c5e7a 12860+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
e49829fe
JR
12861+{
12862+ int err;
7f207e10 12863+ struct au_hnotify *hn;
1facf9fc 12864+
4a4d8108
AM
12865+ err = -ENOMEM;
12866+ hn = au_cache_alloc_hnotify();
12867+ if (hn) {
12868+ hn->hn_aufs_inode = inode;
027c5e7a
AM
12869+ hinode->hi_notify = hn;
12870+ err = au_hnotify_op.alloc(hinode);
12871+ AuTraceErr(err);
12872+ if (unlikely(err)) {
12873+ hinode->hi_notify = NULL;
4a4d8108
AM
12874+ au_cache_free_hnotify(hn);
12875+ /*
12876+ * The upper dir was removed by udba, but the same named
12877+ * dir left. In this case, aufs assignes a new inode
12878+ * number and set the monitor again.
12879+ * For the lower dir, the old monitnor is still left.
12880+ */
12881+ if (err == -EEXIST)
12882+ err = 0;
12883+ }
1308ab2a 12884+ }
1308ab2a 12885+
027c5e7a 12886+ AuTraceErr(err);
1308ab2a 12887+ return err;
dece6358 12888+}
1facf9fc 12889+
4a4d8108 12890+void au_hn_free(struct au_hinode *hinode)
dece6358 12891+{
4a4d8108 12892+ struct au_hnotify *hn;
1facf9fc 12893+
4a4d8108
AM
12894+ hn = hinode->hi_notify;
12895+ if (hn) {
027c5e7a 12896+ au_hnotify_op.free(hinode);
4a4d8108
AM
12897+ au_cache_free_hnotify(hn);
12898+ hinode->hi_notify = NULL;
12899+ }
12900+}
dece6358 12901+
4a4d8108 12902+/* ---------------------------------------------------------------------- */
dece6358 12903+
4a4d8108
AM
12904+void au_hn_ctl(struct au_hinode *hinode, int do_set)
12905+{
12906+ if (hinode->hi_notify)
12907+ au_hnotify_op.ctl(hinode, do_set);
12908+}
12909+
12910+void au_hn_reset(struct inode *inode, unsigned int flags)
12911+{
12912+ aufs_bindex_t bindex, bend;
12913+ struct inode *hi;
12914+ struct dentry *iwhdentry;
1facf9fc 12915+
1308ab2a 12916+ bend = au_ibend(inode);
4a4d8108
AM
12917+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
12918+ hi = au_h_iptr(inode, bindex);
12919+ if (!hi)
12920+ continue;
1308ab2a 12921+
4a4d8108
AM
12922+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
12923+ iwhdentry = au_hi_wh(inode, bindex);
12924+ if (iwhdentry)
12925+ dget(iwhdentry);
12926+ au_igrab(hi);
12927+ au_set_h_iptr(inode, bindex, NULL, 0);
12928+ au_set_h_iptr(inode, bindex, au_igrab(hi),
12929+ flags & ~AuHi_XINO);
12930+ iput(hi);
12931+ dput(iwhdentry);
12932+ /* mutex_unlock(&hi->i_mutex); */
1facf9fc 12933+ }
1facf9fc 12934+}
12935+
1308ab2a 12936+/* ---------------------------------------------------------------------- */
1facf9fc 12937+
4a4d8108 12938+static int hn_xino(struct inode *inode, struct inode *h_inode)
1facf9fc 12939+{
4a4d8108
AM
12940+ int err;
12941+ aufs_bindex_t bindex, bend, bfound, bstart;
12942+ struct inode *h_i;
1facf9fc 12943+
4a4d8108
AM
12944+ err = 0;
12945+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
12946+ pr_warning("branch root dir was changed\n");
12947+ goto out;
12948+ }
1facf9fc 12949+
4a4d8108
AM
12950+ bfound = -1;
12951+ bend = au_ibend(inode);
12952+ bstart = au_ibstart(inode);
12953+#if 0 /* reserved for future use */
12954+ if (bindex == bend) {
12955+ /* keep this ino in rename case */
12956+ goto out;
12957+ }
12958+#endif
12959+ for (bindex = bstart; bindex <= bend; bindex++)
12960+ if (au_h_iptr(inode, bindex) == h_inode) {
12961+ bfound = bindex;
12962+ break;
12963+ }
12964+ if (bfound < 0)
1308ab2a 12965+ goto out;
1facf9fc 12966+
4a4d8108
AM
12967+ for (bindex = bstart; bindex <= bend; bindex++) {
12968+ h_i = au_h_iptr(inode, bindex);
12969+ if (!h_i)
12970+ continue;
1facf9fc 12971+
4a4d8108
AM
12972+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
12973+ /* ignore this error */
12974+ /* bad action? */
1facf9fc 12975+ }
1facf9fc 12976+
4a4d8108 12977+ /* children inode number will be broken */
1facf9fc 12978+
4f0767ce 12979+out:
4a4d8108
AM
12980+ AuTraceErr(err);
12981+ return err;
1facf9fc 12982+}
12983+
4a4d8108 12984+static int hn_gen_tree(struct dentry *dentry)
1facf9fc 12985+{
4a4d8108
AM
12986+ int err, i, j, ndentry;
12987+ struct au_dcsub_pages dpages;
12988+ struct au_dpage *dpage;
12989+ struct dentry **dentries;
1facf9fc 12990+
4a4d8108
AM
12991+ err = au_dpages_init(&dpages, GFP_NOFS);
12992+ if (unlikely(err))
12993+ goto out;
12994+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
12995+ if (unlikely(err))
12996+ goto out_dpages;
1facf9fc 12997+
4a4d8108
AM
12998+ for (i = 0; i < dpages.ndpage; i++) {
12999+ dpage = dpages.dpages + i;
13000+ dentries = dpage->dentries;
13001+ ndentry = dpage->ndentry;
13002+ for (j = 0; j < ndentry; j++) {
13003+ struct dentry *d;
13004+
13005+ d = dentries[j];
13006+ if (IS_ROOT(d))
13007+ continue;
13008+
4a4d8108
AM
13009+ au_digen_dec(d);
13010+ if (d->d_inode)
13011+ /* todo: reset children xino?
13012+ cached children only? */
13013+ au_iigen_dec(d->d_inode);
1308ab2a 13014+ }
dece6358 13015+ }
1facf9fc 13016+
4f0767ce 13017+out_dpages:
4a4d8108 13018+ au_dpages_free(&dpages);
dece6358 13019+
027c5e7a 13020+#if 0
4a4d8108
AM
13021+ /* discard children */
13022+ dentry_unhash(dentry);
13023+ dput(dentry);
027c5e7a 13024+#endif
4f0767ce 13025+out:
dece6358
AM
13026+ return err;
13027+}
13028+
1308ab2a 13029+/*
4a4d8108 13030+ * return 0 if processed.
1308ab2a 13031+ */
4a4d8108
AM
13032+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
13033+ const unsigned int isdir)
dece6358 13034+{
1308ab2a 13035+ int err;
4a4d8108
AM
13036+ struct dentry *d;
13037+ struct qstr *dname;
1facf9fc 13038+
4a4d8108
AM
13039+ err = 1;
13040+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
13041+ pr_warning("branch root dir was changed\n");
13042+ err = 0;
13043+ goto out;
13044+ }
dece6358 13045+
4a4d8108
AM
13046+ if (!isdir) {
13047+ AuDebugOn(!name);
13048+ au_iigen_dec(inode);
027c5e7a 13049+ spin_lock(&inode->i_lock);
4a4d8108 13050+ list_for_each_entry(d, &inode->i_dentry, d_alias) {
027c5e7a 13051+ spin_lock(&d->d_lock);
4a4d8108
AM
13052+ dname = &d->d_name;
13053+ if (dname->len != nlen
027c5e7a
AM
13054+ && memcmp(dname->name, name, nlen)) {
13055+ spin_unlock(&d->d_lock);
4a4d8108 13056+ continue;
027c5e7a 13057+ }
4a4d8108 13058+ err = 0;
4a4d8108
AM
13059+ au_digen_dec(d);
13060+ spin_unlock(&d->d_lock);
13061+ break;
1facf9fc 13062+ }
027c5e7a 13063+ spin_unlock(&inode->i_lock);
1308ab2a 13064+ } else {
027c5e7a 13065+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
4a4d8108
AM
13066+ d = d_find_alias(inode);
13067+ if (!d) {
13068+ au_iigen_dec(inode);
13069+ goto out;
13070+ }
1facf9fc 13071+
027c5e7a 13072+ spin_lock(&d->d_lock);
4a4d8108 13073+ dname = &d->d_name;
027c5e7a
AM
13074+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
13075+ spin_unlock(&d->d_lock);
4a4d8108 13076+ err = hn_gen_tree(d);
027c5e7a
AM
13077+ spin_lock(&d->d_lock);
13078+ }
13079+ spin_unlock(&d->d_lock);
4a4d8108
AM
13080+ dput(d);
13081+ }
1facf9fc 13082+
4f0767ce 13083+out:
4a4d8108 13084+ AuTraceErr(err);
1308ab2a 13085+ return err;
13086+}
dece6358 13087+
4a4d8108 13088+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
1facf9fc 13089+{
4a4d8108
AM
13090+ int err;
13091+ struct inode *inode;
1facf9fc 13092+
4a4d8108
AM
13093+ inode = dentry->d_inode;
13094+ if (IS_ROOT(dentry)
13095+ /* || (inode && inode->i_ino == AUFS_ROOT_INO) */
13096+ ) {
13097+ pr_warning("branch root dir was changed\n");
13098+ return 0;
13099+ }
1308ab2a 13100+
4a4d8108
AM
13101+ err = 0;
13102+ if (!isdir) {
4a4d8108
AM
13103+ au_digen_dec(dentry);
13104+ if (inode)
13105+ au_iigen_dec(inode);
13106+ } else {
027c5e7a 13107+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
4a4d8108
AM
13108+ if (inode)
13109+ err = hn_gen_tree(dentry);
13110+ }
13111+
13112+ AuTraceErr(err);
13113+ return err;
1facf9fc 13114+}
13115+
4a4d8108 13116+/* ---------------------------------------------------------------------- */
1facf9fc 13117+
4a4d8108
AM
13118+/* hnotify job flags */
13119+#define AuHnJob_XINO0 1
13120+#define AuHnJob_GEN (1 << 1)
13121+#define AuHnJob_DIRENT (1 << 2)
13122+#define AuHnJob_ISDIR (1 << 3)
13123+#define AuHnJob_TRYXINO0 (1 << 4)
13124+#define AuHnJob_MNTPNT (1 << 5)
13125+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
7f207e10
AM
13126+#define au_fset_hnjob(flags, name) \
13127+ do { (flags) |= AuHnJob_##name; } while (0)
13128+#define au_fclr_hnjob(flags, name) \
13129+ do { (flags) &= ~AuHnJob_##name; } while (0)
1facf9fc 13130+
4a4d8108
AM
13131+enum {
13132+ AuHn_CHILD,
13133+ AuHn_PARENT,
13134+ AuHnLast
13135+};
1facf9fc 13136+
4a4d8108
AM
13137+struct au_hnotify_args {
13138+ struct inode *h_dir, *dir, *h_child_inode;
13139+ u32 mask;
13140+ unsigned int flags[AuHnLast];
13141+ unsigned int h_child_nlen;
13142+ char h_child_name[];
13143+};
1facf9fc 13144+
4a4d8108
AM
13145+struct hn_job_args {
13146+ unsigned int flags;
13147+ struct inode *inode, *h_inode, *dir, *h_dir;
13148+ struct dentry *dentry;
13149+ char *h_name;
13150+ int h_nlen;
13151+};
1308ab2a 13152+
4a4d8108
AM
13153+static int hn_job(struct hn_job_args *a)
13154+{
13155+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
1308ab2a 13156+
4a4d8108
AM
13157+ /* reset xino */
13158+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
13159+ hn_xino(a->inode, a->h_inode); /* ignore this error */
1308ab2a 13160+
4a4d8108
AM
13161+ if (au_ftest_hnjob(a->flags, TRYXINO0)
13162+ && a->inode
13163+ && a->h_inode) {
13164+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
13165+ if (!a->h_inode->i_nlink)
13166+ hn_xino(a->inode, a->h_inode); /* ignore this error */
13167+ mutex_unlock(&a->h_inode->i_mutex);
1308ab2a 13168+ }
1facf9fc 13169+
4a4d8108
AM
13170+ /* make the generation obsolete */
13171+ if (au_ftest_hnjob(a->flags, GEN)) {
13172+ int err = -1;
13173+ if (a->inode)
13174+ err = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
13175+ isdir);
13176+ if (err && a->dentry)
13177+ hn_gen_by_name(a->dentry, isdir);
13178+ /* ignore this error */
1facf9fc 13179+ }
1facf9fc 13180+
4a4d8108
AM
13181+ /* make dir entries obsolete */
13182+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
13183+ struct au_vdir *vdir;
1facf9fc 13184+
4a4d8108
AM
13185+ vdir = au_ivdir(a->inode);
13186+ if (vdir)
13187+ vdir->vd_jiffy = 0;
13188+ /* IMustLock(a->inode); */
13189+ /* a->inode->i_version++; */
13190+ }
1facf9fc 13191+
4a4d8108
AM
13192+ /* can do nothing but warn */
13193+ if (au_ftest_hnjob(a->flags, MNTPNT)
13194+ && a->dentry
13195+ && d_mountpoint(a->dentry))
13196+ pr_warning("mount-point %.*s is removed or renamed\n",
13197+ AuDLNPair(a->dentry));
1facf9fc 13198+
4a4d8108 13199+ return 0;
1308ab2a 13200+}
1facf9fc 13201+
1308ab2a 13202+/* ---------------------------------------------------------------------- */
1facf9fc 13203+
4a4d8108
AM
13204+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
13205+ struct inode *dir)
1308ab2a 13206+{
4a4d8108
AM
13207+ struct dentry *dentry, *d, *parent;
13208+ struct qstr *dname;
1308ab2a 13209+
4a4d8108
AM
13210+ parent = d_find_alias(dir);
13211+ if (!parent)
13212+ return NULL;
1308ab2a 13213+
4a4d8108 13214+ dentry = NULL;
027c5e7a 13215+ spin_lock(&parent->d_lock);
4a4d8108
AM
13216+ list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) {
13217+ /* AuDbg("%.*s\n", AuDLNPair(d)); */
027c5e7a 13218+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
4a4d8108
AM
13219+ dname = &d->d_name;
13220+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
027c5e7a
AM
13221+ goto cont_unlock;
13222+ if (au_di(d))
13223+ au_digen_dec(d);
13224+ else
13225+ goto cont_unlock;
13226+ if (d->d_count) {
13227+ dentry = dget_dlock(d);
4a4d8108 13228+ spin_unlock(&d->d_lock);
027c5e7a 13229+ break;
dece6358 13230+ }
1facf9fc 13231+
027c5e7a
AM
13232+ cont_unlock:
13233+ spin_unlock(&d->d_lock);
1308ab2a 13234+ }
027c5e7a 13235+ spin_unlock(&parent->d_lock);
4a4d8108 13236+ dput(parent);
1facf9fc 13237+
4a4d8108
AM
13238+ if (dentry)
13239+ di_write_lock_child(dentry);
1308ab2a 13240+
4a4d8108
AM
13241+ return dentry;
13242+}
dece6358 13243+
4a4d8108
AM
13244+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
13245+ aufs_bindex_t bindex, ino_t h_ino)
13246+{
13247+ struct inode *inode;
13248+ ino_t ino;
13249+ int err;
13250+
13251+ inode = NULL;
13252+ err = au_xino_read(sb, bindex, h_ino, &ino);
13253+ if (!err && ino)
13254+ inode = ilookup(sb, ino);
13255+ if (!inode)
13256+ goto out;
13257+
13258+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
13259+ pr_warning("wrong root branch\n");
13260+ iput(inode);
13261+ inode = NULL;
13262+ goto out;
1308ab2a 13263+ }
13264+
4a4d8108 13265+ ii_write_lock_child(inode);
1308ab2a 13266+
4f0767ce 13267+out:
4a4d8108 13268+ return inode;
dece6358
AM
13269+}
13270+
4a4d8108 13271+static void au_hn_bh(void *_args)
1facf9fc 13272+{
4a4d8108
AM
13273+ struct au_hnotify_args *a = _args;
13274+ struct super_block *sb;
13275+ aufs_bindex_t bindex, bend, bfound;
13276+ unsigned char xino, try_iput;
1facf9fc 13277+ int err;
1308ab2a 13278+ struct inode *inode;
4a4d8108
AM
13279+ ino_t h_ino;
13280+ struct hn_job_args args;
13281+ struct dentry *dentry;
13282+ struct au_sbinfo *sbinfo;
1facf9fc 13283+
4a4d8108
AM
13284+ AuDebugOn(!_args);
13285+ AuDebugOn(!a->h_dir);
13286+ AuDebugOn(!a->dir);
13287+ AuDebugOn(!a->mask);
13288+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
13289+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
13290+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
1facf9fc 13291+
4a4d8108
AM
13292+ inode = NULL;
13293+ dentry = NULL;
13294+ /*
13295+ * do not lock a->dir->i_mutex here
13296+ * because of d_revalidate() may cause a deadlock.
13297+ */
13298+ sb = a->dir->i_sb;
13299+ AuDebugOn(!sb);
13300+ sbinfo = au_sbi(sb);
13301+ AuDebugOn(!sbinfo);
7f207e10 13302+ si_write_lock(sb, AuLock_NOPLMW);
1facf9fc 13303+
4a4d8108
AM
13304+ ii_read_lock_parent(a->dir);
13305+ bfound = -1;
13306+ bend = au_ibend(a->dir);
13307+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
13308+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
13309+ bfound = bindex;
13310+ break;
13311+ }
13312+ ii_read_unlock(a->dir);
13313+ if (unlikely(bfound < 0))
13314+ goto out;
1facf9fc 13315+
4a4d8108
AM
13316+ xino = !!au_opt_test(au_mntflags(sb), XINO);
13317+ h_ino = 0;
13318+ if (a->h_child_inode)
13319+ h_ino = a->h_child_inode->i_ino;
1facf9fc 13320+
4a4d8108
AM
13321+ if (a->h_child_nlen
13322+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
13323+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
13324+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
13325+ a->dir);
13326+ try_iput = 0;
13327+ if (dentry)
13328+ inode = dentry->d_inode;
13329+ if (xino && !inode && h_ino
13330+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
13331+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
13332+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
13333+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
13334+ try_iput = 1;
13335+ }
1facf9fc 13336+
4a4d8108
AM
13337+ args.flags = a->flags[AuHn_CHILD];
13338+ args.dentry = dentry;
13339+ args.inode = inode;
13340+ args.h_inode = a->h_child_inode;
13341+ args.dir = a->dir;
13342+ args.h_dir = a->h_dir;
13343+ args.h_name = a->h_child_name;
13344+ args.h_nlen = a->h_child_nlen;
13345+ err = hn_job(&args);
13346+ if (dentry) {
027c5e7a 13347+ if (au_di(dentry))
4a4d8108
AM
13348+ di_write_unlock(dentry);
13349+ dput(dentry);
13350+ }
13351+ if (inode && try_iput) {
13352+ ii_write_unlock(inode);
13353+ iput(inode);
13354+ }
1facf9fc 13355+
4a4d8108
AM
13356+ ii_write_lock_parent(a->dir);
13357+ args.flags = a->flags[AuHn_PARENT];
13358+ args.dentry = NULL;
13359+ args.inode = a->dir;
13360+ args.h_inode = a->h_dir;
13361+ args.dir = NULL;
13362+ args.h_dir = NULL;
13363+ args.h_name = NULL;
13364+ args.h_nlen = 0;
13365+ err = hn_job(&args);
13366+ ii_write_unlock(a->dir);
1facf9fc 13367+
4f0767ce 13368+out:
4a4d8108
AM
13369+ iput(a->h_child_inode);
13370+ iput(a->h_dir);
13371+ iput(a->dir);
027c5e7a
AM
13372+ si_write_unlock(sb);
13373+ au_nwt_done(&sbinfo->si_nowait);
1308ab2a 13374+ kfree(a);
dece6358 13375+}
1facf9fc 13376+
4a4d8108
AM
13377+/* ---------------------------------------------------------------------- */
13378+
13379+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
13380+ struct qstr *h_child_qstr, struct inode *h_child_inode)
dece6358 13381+{
4a4d8108 13382+ int err, len;
53392da6 13383+ unsigned int flags[AuHnLast], f;
4a4d8108
AM
13384+ unsigned char isdir, isroot, wh;
13385+ struct inode *dir;
13386+ struct au_hnotify_args *args;
13387+ char *p, *h_child_name;
dece6358 13388+
1308ab2a 13389+ err = 0;
4a4d8108
AM
13390+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
13391+ dir = igrab(hnotify->hn_aufs_inode);
13392+ if (!dir)
13393+ goto out;
1facf9fc 13394+
4a4d8108
AM
13395+ isroot = (dir->i_ino == AUFS_ROOT_INO);
13396+ wh = 0;
13397+ h_child_name = (void *)h_child_qstr->name;
13398+ len = h_child_qstr->len;
13399+ if (h_child_name) {
13400+ if (len > AUFS_WH_PFX_LEN
13401+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
13402+ h_child_name += AUFS_WH_PFX_LEN;
13403+ len -= AUFS_WH_PFX_LEN;
13404+ wh = 1;
13405+ }
1facf9fc 13406+ }
dece6358 13407+
4a4d8108
AM
13408+ isdir = 0;
13409+ if (h_child_inode)
13410+ isdir = !!S_ISDIR(h_child_inode->i_mode);
13411+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
13412+ flags[AuHn_CHILD] = 0;
13413+ if (isdir)
13414+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
13415+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
13416+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
13417+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
13418+ case FS_MOVED_FROM:
13419+ case FS_MOVED_TO:
13420+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
13421+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
13422+ /*FALLTHROUGH*/
13423+ case FS_CREATE:
13424+ AuDebugOn(!h_child_name || !h_child_inode);
13425+ break;
1facf9fc 13426+
4a4d8108
AM
13427+ case FS_DELETE:
13428+ /*
13429+ * aufs never be able to get this child inode.
13430+ * revalidation should be in d_revalidate()
13431+ * by checking i_nlink, i_generation or d_unhashed().
13432+ */
13433+ AuDebugOn(!h_child_name);
13434+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
13435+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
13436+ break;
dece6358 13437+
4a4d8108
AM
13438+ default:
13439+ AuDebugOn(1);
13440+ }
1308ab2a 13441+
4a4d8108
AM
13442+ if (wh)
13443+ h_child_inode = NULL;
1308ab2a 13444+
4a4d8108
AM
13445+ err = -ENOMEM;
13446+ /* iput() and kfree() will be called in au_hnotify() */
4a4d8108 13447+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
4a4d8108
AM
13448+ if (unlikely(!args)) {
13449+ AuErr1("no memory\n");
13450+ iput(dir);
13451+ goto out;
13452+ }
13453+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
13454+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
13455+ args->mask = mask;
13456+ args->dir = dir;
13457+ args->h_dir = igrab(h_dir);
13458+ if (h_child_inode)
13459+ h_child_inode = igrab(h_child_inode); /* can be NULL */
13460+ args->h_child_inode = h_child_inode;
13461+ args->h_child_nlen = len;
13462+ if (len) {
13463+ p = (void *)args;
13464+ p += sizeof(*args);
13465+ memcpy(p, h_child_name, len);
13466+ p[len] = 0;
1308ab2a 13467+ }
1308ab2a 13468+
53392da6
AM
13469+ f = 0;
13470+ if (!dir->i_nlink)
13471+ f = AuWkq_NEST;
13472+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
4a4d8108
AM
13473+ if (unlikely(err)) {
13474+ pr_err("wkq %d\n", err);
13475+ iput(args->h_child_inode);
13476+ iput(args->h_dir);
13477+ iput(args->dir);
13478+ kfree(args);
1facf9fc 13479+ }
1facf9fc 13480+
4a4d8108 13481+out:
1facf9fc 13482+ return err;
13483+}
13484+
027c5e7a
AM
13485+/* ---------------------------------------------------------------------- */
13486+
13487+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
13488+{
13489+ int err;
13490+
13491+ AuDebugOn(!(udba & AuOptMask_UDBA));
13492+
13493+ err = 0;
13494+ if (au_hnotify_op.reset_br)
13495+ err = au_hnotify_op.reset_br(udba, br, perm);
13496+
13497+ return err;
13498+}
13499+
13500+int au_hnotify_init_br(struct au_branch *br, int perm)
13501+{
13502+ int err;
13503+
13504+ err = 0;
13505+ if (au_hnotify_op.init_br)
13506+ err = au_hnotify_op.init_br(br, perm);
13507+
13508+ return err;
13509+}
13510+
13511+void au_hnotify_fin_br(struct au_branch *br)
13512+{
13513+ if (au_hnotify_op.fin_br)
13514+ au_hnotify_op.fin_br(br);
13515+}
13516+
4a4d8108
AM
13517+static void au_hn_destroy_cache(void)
13518+{
13519+ kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]);
13520+ au_cachep[AuCache_HNOTIFY] = NULL;
13521+}
1308ab2a 13522+
4a4d8108 13523+int __init au_hnotify_init(void)
1facf9fc 13524+{
1308ab2a 13525+ int err;
1308ab2a 13526+
4a4d8108
AM
13527+ err = -ENOMEM;
13528+ au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify);
13529+ if (au_cachep[AuCache_HNOTIFY]) {
027c5e7a
AM
13530+ err = 0;
13531+ if (au_hnotify_op.init)
13532+ err = au_hnotify_op.init();
4a4d8108
AM
13533+ if (unlikely(err))
13534+ au_hn_destroy_cache();
1308ab2a 13535+ }
1308ab2a 13536+ AuTraceErr(err);
4a4d8108 13537+ return err;
1308ab2a 13538+}
13539+
4a4d8108 13540+void au_hnotify_fin(void)
1308ab2a 13541+{
027c5e7a
AM
13542+ if (au_hnotify_op.fin)
13543+ au_hnotify_op.fin();
4a4d8108
AM
13544+ /* cf. au_cache_fin() */
13545+ if (au_cachep[AuCache_HNOTIFY])
13546+ au_hn_destroy_cache();
dece6358 13547+}
7f207e10
AM
13548diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
13549--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 13550+++ linux/fs/aufs/iinfo.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 13551@@ -0,0 +1,264 @@
dece6358 13552+/*
027c5e7a 13553+ * Copyright (C) 2005-2011 Junjiro R. Okajima
dece6358
AM
13554+ *
13555+ * This program, aufs is free software; you can redistribute it and/or modify
13556+ * it under the terms of the GNU General Public License as published by
13557+ * the Free Software Foundation; either version 2 of the License, or
13558+ * (at your option) any later version.
13559+ *
13560+ * This program is distributed in the hope that it will be useful,
13561+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13562+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13563+ * GNU General Public License for more details.
13564+ *
13565+ * You should have received a copy of the GNU General Public License
13566+ * along with this program; if not, write to the Free Software
13567+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
13568+ */
1facf9fc 13569+
dece6358 13570+/*
4a4d8108 13571+ * inode private data
dece6358 13572+ */
1facf9fc 13573+
1308ab2a 13574+#include "aufs.h"
1facf9fc 13575+
4a4d8108 13576+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 13577+{
4a4d8108 13578+ struct inode *h_inode;
1facf9fc 13579+
4a4d8108 13580+ IiMustAnyLock(inode);
1facf9fc 13581+
4a4d8108
AM
13582+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
13583+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
13584+ return h_inode;
13585+}
1facf9fc 13586+
4a4d8108
AM
13587+/* todo: hard/soft set? */
13588+void au_hiput(struct au_hinode *hinode)
13589+{
13590+ au_hn_free(hinode);
13591+ dput(hinode->hi_whdentry);
13592+ iput(hinode->hi_inode);
13593+}
1facf9fc 13594+
4a4d8108
AM
13595+unsigned int au_hi_flags(struct inode *inode, int isdir)
13596+{
13597+ unsigned int flags;
13598+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
1facf9fc 13599+
4a4d8108
AM
13600+ flags = 0;
13601+ if (au_opt_test(mnt_flags, XINO))
13602+ au_fset_hi(flags, XINO);
13603+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
13604+ au_fset_hi(flags, HNOTIFY);
13605+ return flags;
1facf9fc 13606+}
13607+
4a4d8108
AM
13608+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
13609+ struct inode *h_inode, unsigned int flags)
1308ab2a 13610+{
4a4d8108
AM
13611+ struct au_hinode *hinode;
13612+ struct inode *hi;
13613+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 13614+
4a4d8108 13615+ IiMustWriteLock(inode);
dece6358 13616+
4a4d8108
AM
13617+ hinode = iinfo->ii_hinode + bindex;
13618+ hi = hinode->hi_inode;
13619+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
13620+
13621+ if (hi)
13622+ au_hiput(hinode);
13623+ hinode->hi_inode = h_inode;
13624+ if (h_inode) {
13625+ int err;
13626+ struct super_block *sb = inode->i_sb;
13627+ struct au_branch *br;
13628+
027c5e7a
AM
13629+ AuDebugOn(inode->i_mode
13630+ && (h_inode->i_mode & S_IFMT)
13631+ != (inode->i_mode & S_IFMT));
4a4d8108
AM
13632+ if (bindex == iinfo->ii_bstart)
13633+ au_cpup_igen(inode, h_inode);
13634+ br = au_sbr(sb, bindex);
13635+ hinode->hi_id = br->br_id;
13636+ if (au_ftest_hi(flags, XINO)) {
13637+ err = au_xino_write(sb, bindex, h_inode->i_ino,
13638+ inode->i_ino);
13639+ if (unlikely(err))
13640+ AuIOErr1("failed au_xino_write() %d\n", err);
13641+ }
13642+
13643+ if (au_ftest_hi(flags, HNOTIFY)
13644+ && au_br_hnotifyable(br->br_perm)) {
027c5e7a 13645+ err = au_hn_alloc(hinode, inode);
4a4d8108
AM
13646+ if (unlikely(err))
13647+ AuIOErr1("au_hn_alloc() %d\n", err);
1308ab2a 13648+ }
13649+ }
4a4d8108 13650+}
dece6358 13651+
4a4d8108
AM
13652+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
13653+ struct dentry *h_wh)
13654+{
13655+ struct au_hinode *hinode;
dece6358 13656+
4a4d8108
AM
13657+ IiMustWriteLock(inode);
13658+
13659+ hinode = au_ii(inode)->ii_hinode + bindex;
13660+ AuDebugOn(hinode->hi_whdentry);
13661+ hinode->hi_whdentry = h_wh;
1facf9fc 13662+}
13663+
4a4d8108 13664+void au_update_iigen(struct inode *inode)
1308ab2a 13665+{
4a4d8108
AM
13666+ atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
13667+ /* smp_mb(); */ /* atomic_set */
13668+}
1facf9fc 13669+
4a4d8108
AM
13670+/* it may be called at remount time, too */
13671+void au_update_ibrange(struct inode *inode, int do_put_zero)
13672+{
13673+ struct au_iinfo *iinfo;
027c5e7a 13674+ aufs_bindex_t bindex, bend;
1facf9fc 13675+
4a4d8108 13676+ iinfo = au_ii(inode);
027c5e7a 13677+ if (!iinfo)
4a4d8108 13678+ return;
1facf9fc 13679+
4a4d8108 13680+ IiMustWriteLock(inode);
1facf9fc 13681+
027c5e7a 13682+ if (do_put_zero && iinfo->ii_bstart >= 0) {
4a4d8108
AM
13683+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
13684+ bindex++) {
13685+ struct inode *h_i;
1facf9fc 13686+
4a4d8108 13687+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
027c5e7a
AM
13688+ if (h_i && !h_i->i_nlink)
13689+ au_set_h_iptr(inode, bindex, NULL, 0);
13690+ }
4a4d8108
AM
13691+ }
13692+
027c5e7a
AM
13693+ iinfo->ii_bstart = -1;
13694+ iinfo->ii_bend = -1;
13695+ bend = au_sbend(inode->i_sb);
13696+ for (bindex = 0; bindex <= bend; bindex++)
13697+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
13698+ iinfo->ii_bstart = bindex;
4a4d8108 13699+ break;
027c5e7a
AM
13700+ }
13701+ if (iinfo->ii_bstart >= 0)
13702+ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
13703+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
13704+ iinfo->ii_bend = bindex;
13705+ break;
13706+ }
13707+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
1308ab2a 13708+}
1facf9fc 13709+
dece6358 13710+/* ---------------------------------------------------------------------- */
1facf9fc 13711+
4a4d8108 13712+void au_icntnr_init_once(void *_c)
dece6358 13713+{
4a4d8108
AM
13714+ struct au_icntnr *c = _c;
13715+ struct au_iinfo *iinfo = &c->iinfo;
e49829fe 13716+ static struct lock_class_key aufs_ii;
1facf9fc 13717+
4a4d8108 13718+ au_rw_init(&iinfo->ii_rwsem);
e49829fe 13719+ au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
4a4d8108
AM
13720+ inode_init_once(&c->vfs_inode);
13721+}
1facf9fc 13722+
4a4d8108
AM
13723+int au_iinfo_init(struct inode *inode)
13724+{
13725+ struct au_iinfo *iinfo;
13726+ struct super_block *sb;
13727+ int nbr, i;
1facf9fc 13728+
4a4d8108
AM
13729+ sb = inode->i_sb;
13730+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
13731+ nbr = au_sbend(sb) + 1;
13732+ if (unlikely(nbr <= 0))
13733+ nbr = 1;
13734+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
13735+ if (iinfo->ii_hinode) {
7f207e10 13736+ au_ninodes_inc(sb);
4a4d8108
AM
13737+ for (i = 0; i < nbr; i++)
13738+ iinfo->ii_hinode[i].hi_id = -1;
1facf9fc 13739+
4a4d8108
AM
13740+ atomic_set(&iinfo->ii_generation, au_sigen(sb));
13741+ /* smp_mb(); */ /* atomic_set */
13742+ iinfo->ii_bstart = -1;
13743+ iinfo->ii_bend = -1;
13744+ iinfo->ii_vdir = NULL;
13745+ return 0;
1308ab2a 13746+ }
4a4d8108
AM
13747+ return -ENOMEM;
13748+}
1facf9fc 13749+
4a4d8108
AM
13750+int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
13751+{
13752+ int err, sz;
13753+ struct au_hinode *hip;
1facf9fc 13754+
4a4d8108
AM
13755+ AuRwMustWriteLock(&iinfo->ii_rwsem);
13756+
13757+ err = -ENOMEM;
13758+ sz = sizeof(*hip) * (iinfo->ii_bend + 1);
13759+ if (!sz)
13760+ sz = sizeof(*hip);
13761+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
13762+ if (hip) {
13763+ iinfo->ii_hinode = hip;
13764+ err = 0;
1308ab2a 13765+ }
4a4d8108 13766+
1308ab2a 13767+ return err;
1facf9fc 13768+}
13769+
4a4d8108 13770+void au_iinfo_fin(struct inode *inode)
1facf9fc 13771+{
4a4d8108
AM
13772+ struct au_iinfo *iinfo;
13773+ struct au_hinode *hi;
13774+ struct super_block *sb;
b752ccd1
AM
13775+ aufs_bindex_t bindex, bend;
13776+ const unsigned char unlinked = !inode->i_nlink;
1308ab2a 13777+
4a4d8108
AM
13778+ iinfo = au_ii(inode);
13779+ /* bad_inode case */
13780+ if (!iinfo)
13781+ return;
1308ab2a 13782+
b752ccd1 13783+ sb = inode->i_sb;
7f207e10 13784+ au_ninodes_dec(sb);
b752ccd1
AM
13785+ if (si_pid_test(sb))
13786+ au_xino_delete_inode(inode, unlinked);
13787+ else {
13788+ /*
13789+ * it is safe to hide the dependency between sbinfo and
13790+ * sb->s_umount.
13791+ */
13792+ lockdep_off();
13793+ si_noflush_read_lock(sb);
13794+ au_xino_delete_inode(inode, unlinked);
13795+ si_read_unlock(sb);
13796+ lockdep_on();
13797+ }
13798+
4a4d8108
AM
13799+ if (iinfo->ii_vdir)
13800+ au_vdir_free(iinfo->ii_vdir);
1308ab2a 13801+
b752ccd1
AM
13802+ bindex = iinfo->ii_bstart;
13803+ if (bindex >= 0) {
13804+ hi = iinfo->ii_hinode + bindex;
4a4d8108 13805+ bend = iinfo->ii_bend;
b752ccd1
AM
13806+ while (bindex++ <= bend) {
13807+ if (hi->hi_inode)
4a4d8108 13808+ au_hiput(hi);
4a4d8108
AM
13809+ hi++;
13810+ }
13811+ }
4a4d8108 13812+ kfree(iinfo->ii_hinode);
027c5e7a 13813+ iinfo->ii_hinode = NULL;
4a4d8108 13814+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 13815+}
7f207e10
AM
13816diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
13817--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
53392da6 13818+++ linux/fs/aufs/inode.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 13819@@ -0,0 +1,471 @@
4a4d8108 13820+/*
027c5e7a 13821+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
13822+ *
13823+ * This program, aufs is free software; you can redistribute it and/or modify
13824+ * it under the terms of the GNU General Public License as published by
13825+ * the Free Software Foundation; either version 2 of the License, or
13826+ * (at your option) any later version.
13827+ *
13828+ * This program is distributed in the hope that it will be useful,
13829+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13830+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13831+ * GNU General Public License for more details.
13832+ *
13833+ * You should have received a copy of the GNU General Public License
13834+ * along with this program; if not, write to the Free Software
13835+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
13836+ */
1facf9fc 13837+
4a4d8108
AM
13838+/*
13839+ * inode functions
13840+ */
1facf9fc 13841+
4a4d8108 13842+#include "aufs.h"
1308ab2a 13843+
4a4d8108
AM
13844+struct inode *au_igrab(struct inode *inode)
13845+{
13846+ if (inode) {
13847+ AuDebugOn(!atomic_read(&inode->i_count));
027c5e7a 13848+ ihold(inode);
1facf9fc 13849+ }
4a4d8108
AM
13850+ return inode;
13851+}
1facf9fc 13852+
4a4d8108
AM
13853+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
13854+{
13855+ au_cpup_attr_all(inode, /*force*/0);
13856+ au_update_iigen(inode);
13857+ if (do_version)
13858+ inode->i_version++;
dece6358 13859+}
1facf9fc 13860+
027c5e7a 13861+static int au_ii_refresh(struct inode *inode, int *update)
dece6358 13862+{
4a4d8108 13863+ int err, e;
027c5e7a 13864+ umode_t type;
4a4d8108 13865+ aufs_bindex_t bindex, new_bindex;
1308ab2a 13866+ struct super_block *sb;
4a4d8108 13867+ struct au_iinfo *iinfo;
027c5e7a 13868+ struct au_hinode *p, *q, tmp;
1facf9fc 13869+
4a4d8108 13870+ IiMustWriteLock(inode);
1facf9fc 13871+
027c5e7a 13872+ *update = 0;
4a4d8108 13873+ sb = inode->i_sb;
027c5e7a 13874+ type = inode->i_mode & S_IFMT;
4a4d8108
AM
13875+ iinfo = au_ii(inode);
13876+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
13877+ if (unlikely(err))
1308ab2a 13878+ goto out;
1facf9fc 13879+
027c5e7a 13880+ AuDebugOn(iinfo->ii_bstart < 0);
4a4d8108 13881+ p = iinfo->ii_hinode + iinfo->ii_bstart;
4a4d8108
AM
13882+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
13883+ bindex++, p++) {
13884+ if (!p->hi_inode)
13885+ continue;
1facf9fc 13886+
027c5e7a 13887+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
4a4d8108
AM
13888+ new_bindex = au_br_index(sb, p->hi_id);
13889+ if (new_bindex == bindex)
13890+ continue;
1facf9fc 13891+
4a4d8108 13892+ if (new_bindex < 0) {
027c5e7a 13893+ *update = 1;
4a4d8108
AM
13894+ au_hiput(p);
13895+ p->hi_inode = NULL;
13896+ continue;
1308ab2a 13897+ }
4a4d8108
AM
13898+
13899+ if (new_bindex < iinfo->ii_bstart)
13900+ iinfo->ii_bstart = new_bindex;
13901+ if (iinfo->ii_bend < new_bindex)
13902+ iinfo->ii_bend = new_bindex;
13903+ /* swap two lower inode, and loop again */
13904+ q = iinfo->ii_hinode + new_bindex;
13905+ tmp = *q;
13906+ *q = *p;
13907+ *p = tmp;
13908+ if (tmp.hi_inode) {
13909+ bindex--;
13910+ p--;
1308ab2a 13911+ }
13912+ }
4a4d8108
AM
13913+ au_update_ibrange(inode, /*do_put_zero*/0);
13914+ e = au_dy_irefresh(inode);
13915+ if (unlikely(e && !err))
13916+ err = e;
1facf9fc 13917+
4f0767ce 13918+out:
027c5e7a
AM
13919+ AuTraceErr(err);
13920+ return err;
13921+}
13922+
13923+int au_refresh_hinode_self(struct inode *inode)
13924+{
13925+ int err, update;
13926+
13927+ err = au_ii_refresh(inode, &update);
13928+ if (!err)
13929+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
13930+
13931+ AuTraceErr(err);
4a4d8108
AM
13932+ return err;
13933+}
1facf9fc 13934+
4a4d8108
AM
13935+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
13936+{
027c5e7a 13937+ int err, e, update;
4a4d8108 13938+ unsigned int flags;
027c5e7a 13939+ umode_t mode;
4a4d8108 13940+ aufs_bindex_t bindex, bend;
027c5e7a 13941+ unsigned char isdir;
4a4d8108
AM
13942+ struct au_hinode *p;
13943+ struct au_iinfo *iinfo;
1facf9fc 13944+
027c5e7a 13945+ err = au_ii_refresh(inode, &update);
4a4d8108
AM
13946+ if (unlikely(err))
13947+ goto out;
13948+
13949+ update = 0;
13950+ iinfo = au_ii(inode);
13951+ p = iinfo->ii_hinode + iinfo->ii_bstart;
027c5e7a
AM
13952+ mode = (inode->i_mode & S_IFMT);
13953+ isdir = S_ISDIR(mode);
4a4d8108
AM
13954+ flags = au_hi_flags(inode, isdir);
13955+ bend = au_dbend(dentry);
13956+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
13957+ struct inode *h_i;
13958+ struct dentry *h_d;
13959+
13960+ h_d = au_h_dptr(dentry, bindex);
13961+ if (!h_d || !h_d->d_inode)
13962+ continue;
13963+
027c5e7a 13964+ AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT));
4a4d8108
AM
13965+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
13966+ h_i = au_h_iptr(inode, bindex);
13967+ if (h_i) {
13968+ if (h_i == h_d->d_inode)
13969+ continue;
13970+ err = -EIO;
13971+ break;
13972+ }
13973+ }
13974+ if (bindex < iinfo->ii_bstart)
13975+ iinfo->ii_bstart = bindex;
13976+ if (iinfo->ii_bend < bindex)
13977+ iinfo->ii_bend = bindex;
13978+ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
13979+ update = 1;
1308ab2a 13980+ }
4a4d8108
AM
13981+ au_update_ibrange(inode, /*do_put_zero*/0);
13982+ e = au_dy_irefresh(inode);
13983+ if (unlikely(e && !err))
13984+ err = e;
027c5e7a
AM
13985+ if (!err)
13986+ au_refresh_hinode_attr(inode, update && isdir);
4a4d8108 13987+
4f0767ce 13988+out:
4a4d8108 13989+ AuTraceErr(err);
1308ab2a 13990+ return err;
dece6358
AM
13991+}
13992+
4a4d8108 13993+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 13994+{
4a4d8108
AM
13995+ int err;
13996+ unsigned int flags;
13997+ umode_t mode;
13998+ aufs_bindex_t bindex, bstart, btail;
13999+ unsigned char isdir;
14000+ struct dentry *h_dentry;
14001+ struct inode *h_inode;
14002+ struct au_iinfo *iinfo;
dece6358 14003+
4a4d8108 14004+ IiMustWriteLock(inode);
dece6358 14005+
4a4d8108
AM
14006+ err = 0;
14007+ isdir = 0;
14008+ bstart = au_dbstart(dentry);
14009+ h_inode = au_h_dptr(dentry, bstart)->d_inode;
14010+ mode = h_inode->i_mode;
14011+ switch (mode & S_IFMT) {
14012+ case S_IFREG:
14013+ btail = au_dbtail(dentry);
14014+ inode->i_op = &aufs_iop;
14015+ inode->i_fop = &aufs_file_fop;
14016+ err = au_dy_iaop(inode, bstart, h_inode);
14017+ if (unlikely(err))
14018+ goto out;
14019+ break;
14020+ case S_IFDIR:
14021+ isdir = 1;
14022+ btail = au_dbtaildir(dentry);
14023+ inode->i_op = &aufs_dir_iop;
14024+ inode->i_fop = &aufs_dir_fop;
14025+ break;
14026+ case S_IFLNK:
14027+ btail = au_dbtail(dentry);
14028+ inode->i_op = &aufs_symlink_iop;
14029+ break;
14030+ case S_IFBLK:
14031+ case S_IFCHR:
14032+ case S_IFIFO:
14033+ case S_IFSOCK:
14034+ btail = au_dbtail(dentry);
14035+ inode->i_op = &aufs_iop;
14036+ au_init_special_fop(inode, mode, h_inode->i_rdev);
14037+ break;
14038+ default:
14039+ AuIOErr("Unknown file type 0%o\n", mode);
14040+ err = -EIO;
1308ab2a 14041+ goto out;
4a4d8108 14042+ }
dece6358 14043+
4a4d8108
AM
14044+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
14045+ flags = au_hi_flags(inode, isdir);
14046+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
14047+ && au_ftest_hi(flags, HNOTIFY)
14048+ && dentry->d_name.len > AUFS_WH_PFX_LEN
14049+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
14050+ au_fclr_hi(flags, HNOTIFY);
14051+ iinfo = au_ii(inode);
14052+ iinfo->ii_bstart = bstart;
14053+ iinfo->ii_bend = btail;
14054+ for (bindex = bstart; bindex <= btail; bindex++) {
14055+ h_dentry = au_h_dptr(dentry, bindex);
14056+ if (h_dentry)
14057+ au_set_h_iptr(inode, bindex,
14058+ au_igrab(h_dentry->d_inode), flags);
14059+ }
14060+ au_cpup_attr_all(inode, /*force*/1);
dece6358 14061+
4f0767ce 14062+out:
4a4d8108
AM
14063+ return err;
14064+}
dece6358 14065+
027c5e7a
AM
14066+/*
14067+ * successful returns with iinfo write_locked
14068+ * minus: errno
14069+ * zero: success, matched
14070+ * plus: no error, but unmatched
14071+ */
14072+static int reval_inode(struct inode *inode, struct dentry *dentry)
4a4d8108
AM
14073+{
14074+ int err;
14075+ aufs_bindex_t bindex, bend;
14076+ struct inode *h_inode, *h_dinode;
dece6358 14077+
4a4d8108
AM
14078+ /*
14079+ * before this function, if aufs got any iinfo lock, it must be only
14080+ * one, the parent dir.
14081+ * it can happen by UDBA and the obsoleted inode number.
14082+ */
14083+ err = -EIO;
14084+ if (unlikely(inode->i_ino == parent_ino(dentry)))
14085+ goto out;
14086+
027c5e7a 14087+ err = 1;
4a4d8108
AM
14088+ ii_write_lock_new_child(inode);
14089+ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
14090+ bend = au_ibend(inode);
14091+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
14092+ h_inode = au_h_iptr(inode, bindex);
14093+ if (h_inode && h_inode == h_dinode) {
4a4d8108 14094+ err = 0;
027c5e7a 14095+ if (au_iigen_test(inode, au_digen(dentry)))
4a4d8108
AM
14096+ err = au_refresh_hinode(inode, dentry);
14097+ break;
1308ab2a 14098+ }
1facf9fc 14099+ }
dece6358 14100+
4a4d8108
AM
14101+ if (unlikely(err))
14102+ ii_write_unlock(inode);
4f0767ce 14103+out:
1facf9fc 14104+ return err;
14105+}
1facf9fc 14106+
4a4d8108
AM
14107+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
14108+ unsigned int d_type, ino_t *ino)
1facf9fc 14109+{
4a4d8108
AM
14110+ int err;
14111+ struct mutex *mtx;
1facf9fc 14112+
b752ccd1 14113+ /* prevent hardlinked inode number from race condition */
4a4d8108 14114+ mtx = NULL;
b752ccd1 14115+ if (d_type != DT_DIR) {
4a4d8108
AM
14116+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
14117+ mutex_lock(mtx);
14118+ }
14119+ err = au_xino_read(sb, bindex, h_ino, ino);
14120+ if (unlikely(err))
14121+ goto out;
1308ab2a 14122+
4a4d8108
AM
14123+ if (!*ino) {
14124+ err = -EIO;
14125+ *ino = au_xino_new_ino(sb);
14126+ if (unlikely(!*ino))
1facf9fc 14127+ goto out;
4a4d8108
AM
14128+ err = au_xino_write(sb, bindex, h_ino, *ino);
14129+ if (unlikely(err))
1308ab2a 14130+ goto out;
1308ab2a 14131+ }
1facf9fc 14132+
4f0767ce 14133+out:
b752ccd1 14134+ if (mtx)
4a4d8108 14135+ mutex_unlock(mtx);
1facf9fc 14136+ return err;
14137+}
14138+
4a4d8108
AM
14139+/* successful returns with iinfo write_locked */
14140+/* todo: return with unlocked? */
14141+struct inode *au_new_inode(struct dentry *dentry, int must_new)
1facf9fc 14142+{
b752ccd1 14143+ struct inode *inode, *h_inode;
4a4d8108
AM
14144+ struct dentry *h_dentry;
14145+ struct super_block *sb;
b752ccd1 14146+ struct mutex *mtx;
4a4d8108 14147+ ino_t h_ino, ino;
027c5e7a 14148+ int err;
4a4d8108 14149+ aufs_bindex_t bstart;
1facf9fc 14150+
4a4d8108
AM
14151+ sb = dentry->d_sb;
14152+ bstart = au_dbstart(dentry);
14153+ h_dentry = au_h_dptr(dentry, bstart);
b752ccd1
AM
14154+ h_inode = h_dentry->d_inode;
14155+ h_ino = h_inode->i_ino;
14156+
14157+ /*
14158+ * stop 'race'-ing between hardlinks under different
14159+ * parents.
14160+ */
14161+ mtx = NULL;
14162+ if (!S_ISDIR(h_inode->i_mode))
14163+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
14164+
4f0767ce 14165+new_ino:
b752ccd1
AM
14166+ if (mtx)
14167+ mutex_lock(mtx);
4a4d8108
AM
14168+ err = au_xino_read(sb, bstart, h_ino, &ino);
14169+ inode = ERR_PTR(err);
14170+ if (unlikely(err))
14171+ goto out;
b752ccd1 14172+
4a4d8108
AM
14173+ if (!ino) {
14174+ ino = au_xino_new_ino(sb);
14175+ if (unlikely(!ino)) {
14176+ inode = ERR_PTR(-EIO);
dece6358
AM
14177+ goto out;
14178+ }
14179+ }
1facf9fc 14180+
4a4d8108
AM
14181+ AuDbg("i%lu\n", (unsigned long)ino);
14182+ inode = au_iget_locked(sb, ino);
14183+ err = PTR_ERR(inode);
14184+ if (IS_ERR(inode))
1facf9fc 14185+ goto out;
1facf9fc 14186+
4a4d8108
AM
14187+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
14188+ if (inode->i_state & I_NEW) {
14189+ ii_write_lock_new_child(inode);
14190+ err = set_inode(inode, dentry);
14191+ if (!err) {
14192+ unlock_new_inode(inode);
14193+ goto out; /* success */
14194+ }
1308ab2a 14195+
027c5e7a
AM
14196+ /*
14197+ * iget_failed() calls iput(), but we need to call
14198+ * ii_write_unlock() after iget_failed(). so dirty hack for
14199+ * i_count.
14200+ */
14201+ atomic_inc(&inode->i_count);
4a4d8108 14202+ iget_failed(inode);
027c5e7a
AM
14203+ ii_write_unlock(inode);
14204+ au_xino_write(sb, bstart, h_ino, /*ino*/0);
14205+ /* ignore this error */
14206+ goto out_iput;
14207+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
b752ccd1
AM
14208+ /*
14209+ * horrible race condition between lookup, readdir and copyup
14210+ * (or something).
14211+ */
14212+ if (mtx)
14213+ mutex_unlock(mtx);
027c5e7a
AM
14214+ err = reval_inode(inode, dentry);
14215+ if (unlikely(err < 0)) {
14216+ mtx = NULL;
14217+ goto out_iput;
14218+ }
14219+
b752ccd1
AM
14220+ if (!err) {
14221+ mtx = NULL;
4a4d8108 14222+ goto out; /* success */
b752ccd1
AM
14223+ } else if (mtx)
14224+ mutex_lock(mtx);
4a4d8108
AM
14225+ }
14226+
14227+ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
14228+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
14229+ " b%d, %s, %.*s, hi%lu, i%lu.\n",
14230+ bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
14231+ (unsigned long)h_ino, (unsigned long)ino);
14232+ ino = 0;
14233+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
14234+ if (!err) {
14235+ iput(inode);
b752ccd1
AM
14236+ if (mtx)
14237+ mutex_unlock(mtx);
4a4d8108
AM
14238+ goto new_ino;
14239+ }
1308ab2a 14240+
4f0767ce 14241+out_iput:
4a4d8108 14242+ iput(inode);
4a4d8108 14243+ inode = ERR_PTR(err);
4f0767ce 14244+out:
b752ccd1
AM
14245+ if (mtx)
14246+ mutex_unlock(mtx);
4a4d8108 14247+ return inode;
1facf9fc 14248+}
14249+
4a4d8108 14250+/* ---------------------------------------------------------------------- */
1facf9fc 14251+
4a4d8108
AM
14252+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
14253+ struct inode *inode)
14254+{
14255+ int err;
1facf9fc 14256+
4a4d8108 14257+ err = au_br_rdonly(au_sbr(sb, bindex));
1facf9fc 14258+
4a4d8108
AM
14259+ /* pseudo-link after flushed may happen out of bounds */
14260+ if (!err
14261+ && inode
14262+ && au_ibstart(inode) <= bindex
14263+ && bindex <= au_ibend(inode)) {
14264+ /*
14265+ * permission check is unnecessary since vfsub routine
14266+ * will be called later
14267+ */
14268+ struct inode *hi = au_h_iptr(inode, bindex);
14269+ if (hi)
14270+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
1facf9fc 14271+ }
14272+
4a4d8108
AM
14273+ return err;
14274+}
dece6358 14275+
4a4d8108
AM
14276+int au_test_h_perm(struct inode *h_inode, int mask)
14277+{
14278+ if (!current_fsuid())
14279+ return 0;
14280+ return inode_permission(h_inode, mask);
14281+}
1facf9fc 14282+
4a4d8108
AM
14283+int au_test_h_perm_sio(struct inode *h_inode, int mask)
14284+{
14285+ if (au_test_nfs(h_inode->i_sb)
14286+ && (mask & MAY_WRITE)
14287+ && S_ISDIR(h_inode->i_mode))
14288+ mask |= MAY_READ; /* force permission check */
14289+ return au_test_h_perm(h_inode, mask);
1facf9fc 14290+}
7f207e10
AM
14291diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
14292--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
14293+++ linux/fs/aufs/inode.h 2011-08-24 13:30:24.734646739 +0200
14294@@ -0,0 +1,556 @@
4a4d8108 14295+/*
027c5e7a 14296+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
14297+ *
14298+ * This program, aufs is free software; you can redistribute it and/or modify
14299+ * it under the terms of the GNU General Public License as published by
14300+ * the Free Software Foundation; either version 2 of the License, or
14301+ * (at your option) any later version.
14302+ *
14303+ * This program is distributed in the hope that it will be useful,
14304+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14305+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14306+ * GNU General Public License for more details.
14307+ *
14308+ * You should have received a copy of the GNU General Public License
14309+ * along with this program; if not, write to the Free Software
14310+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
14311+ */
1facf9fc 14312+
1308ab2a 14313+/*
4a4d8108 14314+ * inode operations
1308ab2a 14315+ */
dece6358 14316+
4a4d8108
AM
14317+#ifndef __AUFS_INODE_H__
14318+#define __AUFS_INODE_H__
dece6358 14319+
4a4d8108 14320+#ifdef __KERNEL__
1308ab2a 14321+
4a4d8108
AM
14322+#include <linux/fs.h>
14323+#include <linux/fsnotify.h>
14324+#include <linux/aufs_type.h>
14325+#include "rwsem.h"
1308ab2a 14326+
4a4d8108 14327+struct vfsmount;
1facf9fc 14328+
4a4d8108
AM
14329+struct au_hnotify {
14330+#ifdef CONFIG_AUFS_HNOTIFY
14331+#ifdef CONFIG_AUFS_HFSNOTIFY
7f207e10 14332+ /* never use fsnotify_add_vfsmount_mark() */
0c5527e5 14333+ struct fsnotify_mark hn_mark;
7f207e10 14334+ int hn_mark_dead;
4a4d8108 14335+#endif
7f207e10 14336+ struct inode *hn_aufs_inode; /* no get/put */
4a4d8108
AM
14337+#endif
14338+} ____cacheline_aligned_in_smp;
1facf9fc 14339+
4a4d8108
AM
14340+struct au_hinode {
14341+ struct inode *hi_inode;
14342+ aufs_bindex_t hi_id;
14343+#ifdef CONFIG_AUFS_HNOTIFY
14344+ struct au_hnotify *hi_notify;
14345+#endif
dece6358 14346+
4a4d8108
AM
14347+ /* reference to the copied-up whiteout with get/put */
14348+ struct dentry *hi_whdentry;
14349+};
dece6358 14350+
4a4d8108
AM
14351+struct au_vdir;
14352+struct au_iinfo {
14353+ atomic_t ii_generation;
14354+ struct super_block *ii_hsb1; /* no get/put */
1facf9fc 14355+
4a4d8108
AM
14356+ struct au_rwsem ii_rwsem;
14357+ aufs_bindex_t ii_bstart, ii_bend;
14358+ __u32 ii_higen;
14359+ struct au_hinode *ii_hinode;
14360+ struct au_vdir *ii_vdir;
14361+};
1facf9fc 14362+
4a4d8108
AM
14363+struct au_icntnr {
14364+ struct au_iinfo iinfo;
14365+ struct inode vfs_inode;
14366+} ____cacheline_aligned_in_smp;
1308ab2a 14367+
4a4d8108
AM
14368+/* au_pin flags */
14369+#define AuPin_DI_LOCKED 1
14370+#define AuPin_MNT_WRITE (1 << 1)
14371+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
7f207e10
AM
14372+#define au_fset_pin(flags, name) \
14373+ do { (flags) |= AuPin_##name; } while (0)
14374+#define au_fclr_pin(flags, name) \
14375+ do { (flags) &= ~AuPin_##name; } while (0)
4a4d8108
AM
14376+
14377+struct au_pin {
14378+ /* input */
14379+ struct dentry *dentry;
14380+ unsigned int udba;
14381+ unsigned char lsc_di, lsc_hi, flags;
14382+ aufs_bindex_t bindex;
14383+
14384+ /* output */
14385+ struct dentry *parent;
14386+ struct au_hinode *hdir;
14387+ struct vfsmount *h_mnt;
14388+};
1facf9fc 14389+
1308ab2a 14390+/* ---------------------------------------------------------------------- */
14391+
4a4d8108 14392+static inline struct au_iinfo *au_ii(struct inode *inode)
1facf9fc 14393+{
4a4d8108 14394+ struct au_iinfo *iinfo;
1facf9fc 14395+
4a4d8108
AM
14396+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
14397+ if (iinfo->ii_hinode)
14398+ return iinfo;
14399+ return NULL; /* debugging bad_inode case */
14400+}
1facf9fc 14401+
4a4d8108 14402+/* ---------------------------------------------------------------------- */
1facf9fc 14403+
4a4d8108
AM
14404+/* inode.c */
14405+struct inode *au_igrab(struct inode *inode);
027c5e7a 14406+int au_refresh_hinode_self(struct inode *inode);
4a4d8108
AM
14407+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
14408+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
14409+ unsigned int d_type, ino_t *ino);
14410+struct inode *au_new_inode(struct dentry *dentry, int must_new);
14411+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
14412+ struct inode *inode);
14413+int au_test_h_perm(struct inode *h_inode, int mask);
14414+int au_test_h_perm_sio(struct inode *h_inode, int mask);
1facf9fc 14415+
4a4d8108
AM
14416+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
14417+ ino_t h_ino, unsigned int d_type, ino_t *ino)
14418+{
14419+#ifdef CONFIG_AUFS_SHWH
14420+ return au_ino(sb, bindex, h_ino, d_type, ino);
14421+#else
14422+ return 0;
14423+#endif
14424+}
1facf9fc 14425+
4a4d8108
AM
14426+/* i_op.c */
14427+extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
1308ab2a 14428+
4a4d8108
AM
14429+/* au_wr_dir flags */
14430+#define AuWrDir_ADD_ENTRY 1
14431+#define AuWrDir_ISDIR (1 << 1)
14432+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
7f207e10
AM
14433+#define au_fset_wrdir(flags, name) \
14434+ do { (flags) |= AuWrDir_##name; } while (0)
14435+#define au_fclr_wrdir(flags, name) \
14436+ do { (flags) &= ~AuWrDir_##name; } while (0)
1facf9fc 14437+
4a4d8108
AM
14438+struct au_wr_dir_args {
14439+ aufs_bindex_t force_btgt;
14440+ unsigned char flags;
14441+};
14442+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
14443+ struct au_wr_dir_args *args);
dece6358 14444+
4a4d8108
AM
14445+struct dentry *au_pinned_h_parent(struct au_pin *pin);
14446+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
14447+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
14448+ unsigned int udba, unsigned char flags);
14449+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
14450+ unsigned int udba, unsigned char flags) __must_check;
14451+int au_do_pin(struct au_pin *pin) __must_check;
14452+void au_unpin(struct au_pin *pin);
1facf9fc 14453+
4a4d8108
AM
14454+/* i_op_add.c */
14455+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
14456+ struct dentry *h_parent, int isdir);
14457+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
14458+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
14459+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
14460+ struct nameidata *nd);
14461+int aufs_link(struct dentry *src_dentry, struct inode *dir,
14462+ struct dentry *dentry);
14463+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
1facf9fc 14464+
4a4d8108
AM
14465+/* i_op_del.c */
14466+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
14467+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
14468+ struct dentry *h_parent, int isdir);
14469+int aufs_unlink(struct inode *dir, struct dentry *dentry);
14470+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1308ab2a 14471+
4a4d8108
AM
14472+/* i_op_ren.c */
14473+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
14474+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
14475+ struct inode *dir, struct dentry *dentry);
1facf9fc 14476+
4a4d8108
AM
14477+/* iinfo.c */
14478+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
14479+void au_hiput(struct au_hinode *hinode);
14480+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
14481+ struct dentry *h_wh);
14482+unsigned int au_hi_flags(struct inode *inode, int isdir);
1308ab2a 14483+
4a4d8108
AM
14484+/* hinode flags */
14485+#define AuHi_XINO 1
14486+#define AuHi_HNOTIFY (1 << 1)
14487+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
7f207e10
AM
14488+#define au_fset_hi(flags, name) \
14489+ do { (flags) |= AuHi_##name; } while (0)
14490+#define au_fclr_hi(flags, name) \
14491+ do { (flags) &= ~AuHi_##name; } while (0)
1facf9fc 14492+
4a4d8108
AM
14493+#ifndef CONFIG_AUFS_HNOTIFY
14494+#undef AuHi_HNOTIFY
14495+#define AuHi_HNOTIFY 0
14496+#endif
1facf9fc 14497+
4a4d8108
AM
14498+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
14499+ struct inode *h_inode, unsigned int flags);
1facf9fc 14500+
4a4d8108
AM
14501+void au_update_iigen(struct inode *inode);
14502+void au_update_ibrange(struct inode *inode, int do_put_zero);
1facf9fc 14503+
4a4d8108
AM
14504+void au_icntnr_init_once(void *_c);
14505+int au_iinfo_init(struct inode *inode);
14506+void au_iinfo_fin(struct inode *inode);
14507+int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
1308ab2a 14508+
e49829fe 14509+#ifdef CONFIG_PROC_FS
4a4d8108 14510+/* plink.c */
e49829fe
JR
14511+int au_plink_maint(struct super_block *sb, int flags);
14512+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
14513+int au_plink_maint_enter(struct super_block *sb);
4a4d8108
AM
14514+#ifdef CONFIG_AUFS_DEBUG
14515+void au_plink_list(struct super_block *sb);
14516+#else
14517+AuStubVoid(au_plink_list, struct super_block *sb)
14518+#endif
14519+int au_plink_test(struct inode *inode);
14520+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
14521+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
14522+ struct dentry *h_dentry);
e49829fe
JR
14523+void au_plink_put(struct super_block *sb, int verbose);
14524+void au_plink_clean(struct super_block *sb, int verbose);
4a4d8108 14525+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
e49829fe
JR
14526+#else
14527+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
14528+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
14529+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
14530+AuStubVoid(au_plink_list, struct super_block *sb);
14531+AuStubInt0(au_plink_test, struct inode *inode);
14532+AuStub(struct dentry *, au_plink_lkup, return NULL,
14533+ struct inode *inode, aufs_bindex_t bindex);
14534+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
14535+ struct dentry *h_dentry);
14536+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
14537+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
14538+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
14539+#endif /* CONFIG_PROC_FS */
1facf9fc 14540+
4a4d8108 14541+/* ---------------------------------------------------------------------- */
1308ab2a 14542+
4a4d8108
AM
14543+/* lock subclass for iinfo */
14544+enum {
14545+ AuLsc_II_CHILD, /* child first */
14546+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
14547+ AuLsc_II_CHILD3, /* copyup dirs */
14548+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
14549+ AuLsc_II_PARENT2,
14550+ AuLsc_II_PARENT3, /* copyup dirs */
14551+ AuLsc_II_NEW_CHILD
14552+};
1308ab2a 14553+
1facf9fc 14554+/*
4a4d8108
AM
14555+ * ii_read_lock_child, ii_write_lock_child,
14556+ * ii_read_lock_child2, ii_write_lock_child2,
14557+ * ii_read_lock_child3, ii_write_lock_child3,
14558+ * ii_read_lock_parent, ii_write_lock_parent,
14559+ * ii_read_lock_parent2, ii_write_lock_parent2,
14560+ * ii_read_lock_parent3, ii_write_lock_parent3,
14561+ * ii_read_lock_new_child, ii_write_lock_new_child,
1facf9fc 14562+ */
4a4d8108
AM
14563+#define AuReadLockFunc(name, lsc) \
14564+static inline void ii_read_lock_##name(struct inode *i) \
14565+{ \
14566+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
14567+}
14568+
14569+#define AuWriteLockFunc(name, lsc) \
14570+static inline void ii_write_lock_##name(struct inode *i) \
14571+{ \
14572+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
14573+}
14574+
14575+#define AuRWLockFuncs(name, lsc) \
14576+ AuReadLockFunc(name, lsc) \
14577+ AuWriteLockFunc(name, lsc)
14578+
14579+AuRWLockFuncs(child, CHILD);
14580+AuRWLockFuncs(child2, CHILD2);
14581+AuRWLockFuncs(child3, CHILD3);
14582+AuRWLockFuncs(parent, PARENT);
14583+AuRWLockFuncs(parent2, PARENT2);
14584+AuRWLockFuncs(parent3, PARENT3);
14585+AuRWLockFuncs(new_child, NEW_CHILD);
14586+
14587+#undef AuReadLockFunc
14588+#undef AuWriteLockFunc
14589+#undef AuRWLockFuncs
1facf9fc 14590+
14591+/*
4a4d8108 14592+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
1facf9fc 14593+ */
4a4d8108 14594+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
1facf9fc 14595+
4a4d8108
AM
14596+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
14597+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
14598+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
1facf9fc 14599+
4a4d8108 14600+/* ---------------------------------------------------------------------- */
1308ab2a 14601+
027c5e7a
AM
14602+static inline void au_icntnr_init(struct au_icntnr *c)
14603+{
14604+#ifdef CONFIG_AUFS_DEBUG
14605+ c->vfs_inode.i_mode = 0;
14606+#endif
14607+}
14608+
4a4d8108
AM
14609+static inline unsigned int au_iigen(struct inode *inode)
14610+{
14611+ return atomic_read(&au_ii(inode)->ii_generation);
14612+}
1308ab2a 14613+
4a4d8108
AM
14614+/* tiny test for inode number */
14615+/* tmpfs generation is too rough */
14616+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
14617+{
14618+ struct au_iinfo *iinfo;
1308ab2a 14619+
4a4d8108
AM
14620+ iinfo = au_ii(inode);
14621+ AuRwMustAnyLock(&iinfo->ii_rwsem);
14622+ return !(iinfo->ii_hsb1 == h_inode->i_sb
14623+ && iinfo->ii_higen == h_inode->i_generation);
14624+}
1308ab2a 14625+
4a4d8108
AM
14626+static inline void au_iigen_dec(struct inode *inode)
14627+{
e49829fe 14628+ atomic_dec(&au_ii(inode)->ii_generation);
027c5e7a
AM
14629+}
14630+
14631+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
14632+{
14633+ int err;
14634+
14635+ err = 0;
14636+ if (unlikely(inode && au_iigen(inode) != sigen))
14637+ err = -EIO;
14638+
14639+ return err;
4a4d8108 14640+}
1308ab2a 14641+
4a4d8108 14642+/* ---------------------------------------------------------------------- */
1308ab2a 14643+
4a4d8108
AM
14644+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
14645+ aufs_bindex_t bindex)
14646+{
14647+ IiMustAnyLock(inode);
14648+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
14649+}
1308ab2a 14650+
4a4d8108
AM
14651+static inline aufs_bindex_t au_ibstart(struct inode *inode)
14652+{
14653+ IiMustAnyLock(inode);
14654+ return au_ii(inode)->ii_bstart;
14655+}
1308ab2a 14656+
4a4d8108
AM
14657+static inline aufs_bindex_t au_ibend(struct inode *inode)
14658+{
14659+ IiMustAnyLock(inode);
14660+ return au_ii(inode)->ii_bend;
14661+}
1308ab2a 14662+
4a4d8108
AM
14663+static inline struct au_vdir *au_ivdir(struct inode *inode)
14664+{
14665+ IiMustAnyLock(inode);
14666+ return au_ii(inode)->ii_vdir;
14667+}
1308ab2a 14668+
4a4d8108
AM
14669+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
14670+{
14671+ IiMustAnyLock(inode);
14672+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
14673+}
1308ab2a 14674+
4a4d8108 14675+static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 14676+{
4a4d8108
AM
14677+ IiMustWriteLock(inode);
14678+ au_ii(inode)->ii_bstart = bindex;
14679+}
1308ab2a 14680+
4a4d8108
AM
14681+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
14682+{
14683+ IiMustWriteLock(inode);
14684+ au_ii(inode)->ii_bend = bindex;
1308ab2a 14685+}
14686+
4a4d8108
AM
14687+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
14688+{
14689+ IiMustWriteLock(inode);
14690+ au_ii(inode)->ii_vdir = vdir;
14691+}
1facf9fc 14692+
4a4d8108 14693+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 14694+{
4a4d8108
AM
14695+ IiMustAnyLock(inode);
14696+ return au_ii(inode)->ii_hinode + bindex;
14697+}
dece6358 14698+
4a4d8108 14699+/* ---------------------------------------------------------------------- */
1facf9fc 14700+
4a4d8108
AM
14701+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
14702+{
14703+ if (pin)
14704+ return pin->parent;
14705+ return NULL;
1facf9fc 14706+}
14707+
4a4d8108 14708+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
1facf9fc 14709+{
4a4d8108
AM
14710+ if (pin && pin->hdir)
14711+ return pin->hdir->hi_inode;
14712+ return NULL;
1308ab2a 14713+}
1facf9fc 14714+
4a4d8108
AM
14715+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
14716+{
14717+ if (pin)
14718+ return pin->hdir;
14719+ return NULL;
14720+}
1facf9fc 14721+
4a4d8108 14722+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
1308ab2a 14723+{
4a4d8108
AM
14724+ if (pin)
14725+ pin->dentry = dentry;
14726+}
1308ab2a 14727+
4a4d8108
AM
14728+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
14729+ unsigned char lflag)
14730+{
14731+ if (pin) {
7f207e10 14732+ if (lflag)
4a4d8108 14733+ au_fset_pin(pin->flags, DI_LOCKED);
7f207e10 14734+ else
4a4d8108 14735+ au_fclr_pin(pin->flags, DI_LOCKED);
1308ab2a 14736+ }
4a4d8108
AM
14737+}
14738+
14739+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
14740+{
14741+ if (pin) {
14742+ dput(pin->parent);
14743+ pin->parent = dget(parent);
1facf9fc 14744+ }
4a4d8108 14745+}
1facf9fc 14746+
4a4d8108
AM
14747+/* ---------------------------------------------------------------------- */
14748+
027c5e7a 14749+struct au_branch;
4a4d8108
AM
14750+#ifdef CONFIG_AUFS_HNOTIFY
14751+struct au_hnotify_op {
14752+ void (*ctl)(struct au_hinode *hinode, int do_set);
027c5e7a
AM
14753+ int (*alloc)(struct au_hinode *hinode);
14754+ void (*free)(struct au_hinode *hinode);
4a4d8108
AM
14755+
14756+ void (*fin)(void);
14757+ int (*init)(void);
027c5e7a
AM
14758+
14759+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
14760+ void (*fin_br)(struct au_branch *br);
14761+ int (*init_br)(struct au_branch *br, int perm);
4a4d8108
AM
14762+};
14763+
14764+/* hnotify.c */
027c5e7a 14765+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
4a4d8108
AM
14766+void au_hn_free(struct au_hinode *hinode);
14767+void au_hn_ctl(struct au_hinode *hinode, int do_set);
14768+void au_hn_reset(struct inode *inode, unsigned int flags);
14769+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
14770+ struct qstr *h_child_qstr, struct inode *h_child_inode);
027c5e7a
AM
14771+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
14772+int au_hnotify_init_br(struct au_branch *br, int perm);
14773+void au_hnotify_fin_br(struct au_branch *br);
4a4d8108
AM
14774+int __init au_hnotify_init(void);
14775+void au_hnotify_fin(void);
14776+
7f207e10 14777+/* hfsnotify.c */
4a4d8108
AM
14778+extern const struct au_hnotify_op au_hnotify_op;
14779+
14780+static inline
14781+void au_hn_init(struct au_hinode *hinode)
14782+{
14783+ hinode->hi_notify = NULL;
1308ab2a 14784+}
14785+
53392da6
AM
14786+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
14787+{
14788+ return hinode->hi_notify;
14789+}
14790+
4a4d8108
AM
14791+#else
14792+static inline
14793+int au_hn_alloc(struct au_hinode *hinode __maybe_unused,
027c5e7a 14794+ struct inode *inode __maybe_unused)
1308ab2a 14795+{
4a4d8108
AM
14796+ return -EOPNOTSUPP;
14797+}
1308ab2a 14798+
53392da6
AM
14799+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
14800+{
14801+ return NULL;
14802+}
14803+
4a4d8108
AM
14804+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
14805+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
14806+ int do_set __maybe_unused)
14807+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
14808+ unsigned int flags __maybe_unused)
027c5e7a
AM
14809+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
14810+ struct au_branch *br __maybe_unused,
14811+ int perm __maybe_unused)
14812+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
14813+ int perm __maybe_unused)
14814+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
4a4d8108
AM
14815+AuStubInt0(__init au_hnotify_init, void)
14816+AuStubVoid(au_hnotify_fin, void)
14817+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
14818+#endif /* CONFIG_AUFS_HNOTIFY */
14819+
14820+static inline void au_hn_suspend(struct au_hinode *hdir)
14821+{
14822+ au_hn_ctl(hdir, /*do_set*/0);
1308ab2a 14823+}
14824+
4a4d8108 14825+static inline void au_hn_resume(struct au_hinode *hdir)
1308ab2a 14826+{
4a4d8108
AM
14827+ au_hn_ctl(hdir, /*do_set*/1);
14828+}
1308ab2a 14829+
4a4d8108
AM
14830+static inline void au_hn_imtx_lock(struct au_hinode *hdir)
14831+{
14832+ mutex_lock(&hdir->hi_inode->i_mutex);
14833+ au_hn_suspend(hdir);
14834+}
dece6358 14835+
4a4d8108
AM
14836+static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
14837+ unsigned int sc __maybe_unused)
14838+{
14839+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
14840+ au_hn_suspend(hdir);
1facf9fc 14841+}
1facf9fc 14842+
4a4d8108
AM
14843+static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
14844+{
14845+ au_hn_resume(hdir);
14846+ mutex_unlock(&hdir->hi_inode->i_mutex);
14847+}
14848+
14849+#endif /* __KERNEL__ */
14850+#endif /* __AUFS_INODE_H__ */
7f207e10
AM
14851diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
14852--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
53392da6 14853+++ linux/fs/aufs/ioctl.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 14854@@ -0,0 +1,158 @@
4a4d8108 14855+/*
027c5e7a 14856+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
14857+ *
14858+ * This program, aufs is free software; you can redistribute it and/or modify
14859+ * it under the terms of the GNU General Public License as published by
14860+ * the Free Software Foundation; either version 2 of the License, or
14861+ * (at your option) any later version.
14862+ *
14863+ * This program is distributed in the hope that it will be useful,
14864+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14865+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14866+ * GNU General Public License for more details.
14867+ *
14868+ * You should have received a copy of the GNU General Public License
14869+ * along with this program; if not, write to the Free Software
14870+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
14871+ */
14872+
14873+/*
14874+ * ioctl
14875+ * plink-management and readdir in userspace.
14876+ * assist the pathconf(3) wrapper library.
14877+ */
14878+
14879+#include <linux/file.h>
14880+#include "aufs.h"
14881+
14882+static int au_wbr_fd(struct path *path)
14883+{
14884+ int err, fd;
14885+ aufs_bindex_t wbi, bindex, bend;
14886+ struct file *h_file;
14887+ struct super_block *sb;
14888+ struct dentry *root;
14889+ struct au_branch *wbr;
14890+
14891+ err = get_unused_fd();
14892+ if (unlikely(err < 0))
14893+ goto out;
14894+ fd = err;
14895+
14896+ wbi = 0;
14897+ sb = path->dentry->d_sb;
14898+ root = sb->s_root;
14899+ aufs_read_lock(root, AuLock_IR);
14900+ wbr = au_sbr(sb, wbi);
14901+ if (!(path->mnt->mnt_flags & MNT_READONLY)
14902+ && !au_br_writable(wbr->br_perm)) {
14903+ bend = au_sbend(sb);
14904+ for (bindex = 1; bindex <= bend; bindex++) {
14905+ wbr = au_sbr(sb, bindex);
14906+ if (au_br_writable(wbr->br_perm)) {
14907+ wbi = bindex;
14908+ break;
14909+ }
14910+ }
14911+ wbr = au_sbr(sb, wbi);
14912+ }
14913+ AuDbg("wbi %d\n", wbi);
14914+ h_file = au_h_open(root, wbi, O_RDONLY | O_DIRECTORY | O_LARGEFILE,
14915+ NULL);
14916+ aufs_read_unlock(root, AuLock_IR);
14917+ err = PTR_ERR(h_file);
14918+ if (IS_ERR(h_file))
14919+ goto out_fd;
14920+
14921+ atomic_dec(&wbr->br_count); /* cf. au_h_open() */
14922+ fd_install(fd, h_file);
14923+ err = fd;
14924+ goto out; /* success */
14925+
4f0767ce 14926+out_fd:
4a4d8108 14927+ put_unused_fd(fd);
4f0767ce 14928+out:
4a4d8108
AM
14929+ return err;
14930+}
14931+
14932+/* ---------------------------------------------------------------------- */
14933+
14934+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
14935+{
14936+ long err;
14937+
14938+ switch (cmd) {
4a4d8108
AM
14939+ case AUFS_CTL_RDU:
14940+ case AUFS_CTL_RDU_INO:
14941+ err = au_rdu_ioctl(file, cmd, arg);
14942+ break;
14943+
14944+ case AUFS_CTL_WBR_FD:
14945+ err = au_wbr_fd(&file->f_path);
14946+ break;
14947+
027c5e7a
AM
14948+ case AUFS_CTL_IBUSY:
14949+ err = au_ibusy_ioctl(file, arg);
14950+ break;
14951+
4a4d8108
AM
14952+ default:
14953+ /* do not call the lower */
14954+ AuDbg("0x%x\n", cmd);
14955+ err = -ENOTTY;
14956+ }
14957+
14958+ AuTraceErr(err);
14959+ return err;
14960+}
14961+
14962+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
14963+{
14964+ long err;
14965+
14966+ switch (cmd) {
14967+ case AUFS_CTL_WBR_FD:
14968+ err = au_wbr_fd(&file->f_path);
14969+ break;
14970+
14971+ default:
14972+ /* do not call the lower */
14973+ AuDbg("0x%x\n", cmd);
14974+ err = -ENOTTY;
14975+ }
14976+
14977+ AuTraceErr(err);
14978+ return err;
14979+}
b752ccd1
AM
14980+
14981+#ifdef CONFIG_COMPAT
14982+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
14983+ unsigned long arg)
14984+{
14985+ long err;
14986+
14987+ switch (cmd) {
14988+ case AUFS_CTL_RDU:
14989+ case AUFS_CTL_RDU_INO:
14990+ err = au_rdu_compat_ioctl(file, cmd, arg);
14991+ break;
14992+
027c5e7a
AM
14993+ case AUFS_CTL_IBUSY:
14994+ err = au_ibusy_compat_ioctl(file, arg);
14995+ break;
14996+
b752ccd1
AM
14997+ default:
14998+ err = aufs_ioctl_dir(file, cmd, arg);
14999+ }
15000+
15001+ AuTraceErr(err);
15002+ return err;
15003+}
15004+
15005+#if 0 /* unused yet */
15006+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
15007+ unsigned long arg)
15008+{
15009+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
15010+}
15011+#endif
15012+#endif
7f207e10
AM
15013diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
15014--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
53392da6 15015+++ linux/fs/aufs/i_op_add.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 15016@@ -0,0 +1,711 @@
4a4d8108 15017+/*
027c5e7a 15018+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
15019+ *
15020+ * This program, aufs is free software; you can redistribute it and/or modify
15021+ * it under the terms of the GNU General Public License as published by
15022+ * the Free Software Foundation; either version 2 of the License, or
15023+ * (at your option) any later version.
15024+ *
15025+ * This program is distributed in the hope that it will be useful,
15026+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15027+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15028+ * GNU General Public License for more details.
15029+ *
15030+ * You should have received a copy of the GNU General Public License
15031+ * along with this program; if not, write to the Free Software
15032+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
15033+ */
15034+
15035+/*
15036+ * inode operations (add entry)
15037+ */
15038+
15039+#include "aufs.h"
15040+
15041+/*
15042+ * final procedure of adding a new entry, except link(2).
15043+ * remove whiteout, instantiate, copyup the parent dir's times and size
15044+ * and update version.
15045+ * if it failed, re-create the removed whiteout.
15046+ */
15047+static int epilog(struct inode *dir, aufs_bindex_t bindex,
15048+ struct dentry *wh_dentry, struct dentry *dentry)
15049+{
15050+ int err, rerr;
15051+ aufs_bindex_t bwh;
15052+ struct path h_path;
15053+ struct inode *inode, *h_dir;
15054+ struct dentry *wh;
15055+
15056+ bwh = -1;
15057+ if (wh_dentry) {
15058+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
15059+ IMustLock(h_dir);
15060+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
15061+ bwh = au_dbwh(dentry);
15062+ h_path.dentry = wh_dentry;
15063+ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
15064+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
15065+ dentry);
15066+ if (unlikely(err))
15067+ goto out;
15068+ }
15069+
15070+ inode = au_new_inode(dentry, /*must_new*/1);
15071+ if (!IS_ERR(inode)) {
15072+ d_instantiate(dentry, inode);
15073+ dir = dentry->d_parent->d_inode; /* dir inode is locked */
15074+ IMustLock(dir);
15075+ if (au_ibstart(dir) == au_dbstart(dentry))
15076+ au_cpup_attr_timesizes(dir);
15077+ dir->i_version++;
15078+ return 0; /* success */
15079+ }
15080+
15081+ err = PTR_ERR(inode);
15082+ if (!wh_dentry)
15083+ goto out;
15084+
15085+ /* revert */
15086+ /* dir inode is locked */
15087+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
15088+ rerr = PTR_ERR(wh);
15089+ if (IS_ERR(wh)) {
15090+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
15091+ AuDLNPair(dentry), err, rerr);
15092+ err = -EIO;
15093+ } else
15094+ dput(wh);
15095+
4f0767ce 15096+out:
4a4d8108
AM
15097+ return err;
15098+}
15099+
027c5e7a
AM
15100+static int au_d_may_add(struct dentry *dentry)
15101+{
15102+ int err;
15103+
15104+ err = 0;
15105+ if (unlikely(d_unhashed(dentry)))
15106+ err = -ENOENT;
15107+ if (unlikely(dentry->d_inode))
15108+ err = -EEXIST;
15109+ return err;
15110+}
15111+
4a4d8108
AM
15112+/*
15113+ * simple tests for the adding inode operations.
15114+ * following the checks in vfs, plus the parent-child relationship.
15115+ */
15116+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
15117+ struct dentry *h_parent, int isdir)
15118+{
15119+ int err;
15120+ umode_t h_mode;
15121+ struct dentry *h_dentry;
15122+ struct inode *h_inode;
15123+
15124+ err = -ENAMETOOLONG;
15125+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
15126+ goto out;
15127+
15128+ h_dentry = au_h_dptr(dentry, bindex);
15129+ h_inode = h_dentry->d_inode;
15130+ if (!dentry->d_inode) {
15131+ err = -EEXIST;
15132+ if (unlikely(h_inode))
15133+ goto out;
15134+ } else {
15135+ /* rename(2) case */
15136+ err = -EIO;
15137+ if (unlikely(!h_inode || !h_inode->i_nlink))
15138+ goto out;
15139+
15140+ h_mode = h_inode->i_mode;
15141+ if (!isdir) {
15142+ err = -EISDIR;
15143+ if (unlikely(S_ISDIR(h_mode)))
15144+ goto out;
15145+ } else if (unlikely(!S_ISDIR(h_mode))) {
15146+ err = -ENOTDIR;
15147+ goto out;
15148+ }
15149+ }
15150+
15151+ err = 0;
15152+ /* expected parent dir is locked */
15153+ if (unlikely(h_parent != h_dentry->d_parent))
15154+ err = -EIO;
15155+
4f0767ce 15156+out:
4a4d8108
AM
15157+ AuTraceErr(err);
15158+ return err;
15159+}
15160+
15161+/*
15162+ * initial procedure of adding a new entry.
15163+ * prepare writable branch and the parent dir, lock it,
15164+ * and lookup whiteout for the new entry.
15165+ */
15166+static struct dentry*
15167+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
15168+ struct dentry *src_dentry, struct au_pin *pin,
15169+ struct au_wr_dir_args *wr_dir_args)
15170+{
15171+ struct dentry *wh_dentry, *h_parent;
15172+ struct super_block *sb;
15173+ struct au_branch *br;
15174+ int err;
15175+ unsigned int udba;
15176+ aufs_bindex_t bcpup;
15177+
15178+ AuDbg("%.*s\n", AuDLNPair(dentry));
15179+
15180+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
15181+ bcpup = err;
15182+ wh_dentry = ERR_PTR(err);
15183+ if (unlikely(err < 0))
15184+ goto out;
15185+
15186+ sb = dentry->d_sb;
15187+ udba = au_opt_udba(sb);
15188+ err = au_pin(pin, dentry, bcpup, udba,
15189+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15190+ wh_dentry = ERR_PTR(err);
15191+ if (unlikely(err))
15192+ goto out;
15193+
15194+ h_parent = au_pinned_h_parent(pin);
15195+ if (udba != AuOpt_UDBA_NONE
15196+ && au_dbstart(dentry) == bcpup)
15197+ err = au_may_add(dentry, bcpup, h_parent,
15198+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
15199+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
15200+ err = -ENAMETOOLONG;
15201+ wh_dentry = ERR_PTR(err);
15202+ if (unlikely(err))
15203+ goto out_unpin;
15204+
15205+ br = au_sbr(sb, bcpup);
15206+ if (dt) {
15207+ struct path tmp = {
15208+ .dentry = h_parent,
15209+ .mnt = br->br_mnt
15210+ };
15211+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
15212+ }
15213+
15214+ wh_dentry = NULL;
15215+ if (bcpup != au_dbwh(dentry))
15216+ goto out; /* success */
15217+
15218+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
15219+
4f0767ce 15220+out_unpin:
4a4d8108
AM
15221+ if (IS_ERR(wh_dentry))
15222+ au_unpin(pin);
4f0767ce 15223+out:
4a4d8108
AM
15224+ return wh_dentry;
15225+}
15226+
15227+/* ---------------------------------------------------------------------- */
15228+
15229+enum { Mknod, Symlink, Creat };
15230+struct simple_arg {
15231+ int type;
15232+ union {
15233+ struct {
15234+ int mode;
15235+ struct nameidata *nd;
15236+ } c;
15237+ struct {
15238+ const char *symname;
15239+ } s;
15240+ struct {
15241+ int mode;
15242+ dev_t dev;
15243+ } m;
15244+ } u;
15245+};
15246+
15247+static int add_simple(struct inode *dir, struct dentry *dentry,
15248+ struct simple_arg *arg)
15249+{
15250+ int err;
15251+ aufs_bindex_t bstart;
15252+ unsigned char created;
15253+ struct au_dtime dt;
15254+ struct au_pin pin;
15255+ struct path h_path;
15256+ struct dentry *wh_dentry, *parent;
15257+ struct inode *h_dir;
15258+ struct au_wr_dir_args wr_dir_args = {
15259+ .force_btgt = -1,
15260+ .flags = AuWrDir_ADD_ENTRY
15261+ };
15262+
15263+ AuDbg("%.*s\n", AuDLNPair(dentry));
15264+ IMustLock(dir);
15265+
15266+ parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
15267+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
15268+ if (unlikely(err))
15269+ goto out;
15270+ err = au_d_may_add(dentry);
15271+ if (unlikely(err))
15272+ goto out_unlock;
4a4d8108
AM
15273+ di_write_lock_parent(parent);
15274+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
15275+ &wr_dir_args);
15276+ err = PTR_ERR(wh_dentry);
15277+ if (IS_ERR(wh_dentry))
027c5e7a 15278+ goto out_parent;
4a4d8108
AM
15279+
15280+ bstart = au_dbstart(dentry);
15281+ h_path.dentry = au_h_dptr(dentry, bstart);
15282+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
15283+ h_dir = au_pinned_h_dir(&pin);
15284+ switch (arg->type) {
15285+ case Creat:
15286+ err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
15287+ break;
15288+ case Symlink:
15289+ err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
15290+ break;
15291+ case Mknod:
15292+ err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
15293+ break;
15294+ default:
15295+ BUG();
15296+ }
15297+ created = !err;
15298+ if (!err)
15299+ err = epilog(dir, bstart, wh_dentry, dentry);
15300+
15301+ /* revert */
15302+ if (unlikely(created && err && h_path.dentry->d_inode)) {
15303+ int rerr;
15304+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
15305+ if (rerr) {
15306+ AuIOErr("%.*s revert failure(%d, %d)\n",
15307+ AuDLNPair(dentry), err, rerr);
15308+ err = -EIO;
15309+ }
15310+ au_dtime_revert(&dt);
4a4d8108
AM
15311+ }
15312+
15313+ au_unpin(&pin);
15314+ dput(wh_dentry);
15315+
027c5e7a
AM
15316+out_parent:
15317+ di_write_unlock(parent);
15318+out_unlock:
4a4d8108
AM
15319+ if (unlikely(err)) {
15320+ au_update_dbstart(dentry);
15321+ d_drop(dentry);
15322+ }
4a4d8108 15323+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 15324+out:
4a4d8108
AM
15325+ return err;
15326+}
15327+
15328+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
15329+{
15330+ struct simple_arg arg = {
15331+ .type = Mknod,
15332+ .u.m = {
15333+ .mode = mode,
15334+ .dev = dev
15335+ }
15336+ };
15337+ return add_simple(dir, dentry, &arg);
15338+}
15339+
15340+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
15341+{
15342+ struct simple_arg arg = {
15343+ .type = Symlink,
15344+ .u.s.symname = symname
15345+ };
15346+ return add_simple(dir, dentry, &arg);
15347+}
15348+
15349+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
15350+ struct nameidata *nd)
15351+{
15352+ struct simple_arg arg = {
15353+ .type = Creat,
15354+ .u.c = {
15355+ .mode = mode,
15356+ .nd = nd
15357+ }
15358+ };
15359+ return add_simple(dir, dentry, &arg);
15360+}
15361+
15362+/* ---------------------------------------------------------------------- */
15363+
15364+struct au_link_args {
15365+ aufs_bindex_t bdst, bsrc;
15366+ struct au_pin pin;
15367+ struct path h_path;
15368+ struct dentry *src_parent, *parent;
15369+};
15370+
15371+static int au_cpup_before_link(struct dentry *src_dentry,
15372+ struct au_link_args *a)
15373+{
15374+ int err;
15375+ struct dentry *h_src_dentry;
15376+ struct mutex *h_mtx;
15377+ struct file *h_file;
15378+
15379+ di_read_lock_parent(a->src_parent, AuLock_IR);
15380+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
15381+ if (unlikely(err))
15382+ goto out;
15383+
15384+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
15385+ h_mtx = &h_src_dentry->d_inode->i_mutex;
15386+ err = au_pin(&a->pin, src_dentry, a->bdst,
15387+ au_opt_udba(src_dentry->d_sb),
15388+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15389+ if (unlikely(err))
15390+ goto out;
15391+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15392+ h_file = au_h_open_pre(src_dentry, a->bsrc);
15393+ if (IS_ERR(h_file)) {
15394+ err = PTR_ERR(h_file);
15395+ h_file = NULL;
15396+ } else
15397+ err = au_sio_cpup_simple(src_dentry, a->bdst, a->bsrc,
15398+ AuCpup_DTIME /* | AuCpup_KEEPLINO */);
15399+ mutex_unlock(h_mtx);
15400+ au_h_open_post(src_dentry, a->bsrc, h_file);
15401+ au_unpin(&a->pin);
15402+
4f0767ce 15403+out:
4a4d8108
AM
15404+ di_read_unlock(a->src_parent, AuLock_IR);
15405+ return err;
15406+}
15407+
15408+static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
15409+{
15410+ int err;
15411+ unsigned char plink;
15412+ struct inode *h_inode, *inode;
15413+ struct dentry *h_src_dentry;
15414+ struct super_block *sb;
15415+ struct file *h_file;
15416+
15417+ plink = 0;
15418+ h_inode = NULL;
15419+ sb = src_dentry->d_sb;
15420+ inode = src_dentry->d_inode;
15421+ if (au_ibstart(inode) <= a->bdst)
15422+ h_inode = au_h_iptr(inode, a->bdst);
15423+ if (!h_inode || !h_inode->i_nlink) {
15424+ /* copyup src_dentry as the name of dentry. */
15425+ au_set_dbstart(src_dentry, a->bdst);
15426+ au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
15427+ h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
15428+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
15429+ h_file = au_h_open_pre(src_dentry, a->bsrc);
15430+ if (IS_ERR(h_file)) {
15431+ err = PTR_ERR(h_file);
15432+ h_file = NULL;
15433+ } else
15434+ err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc,
15435+ -1, AuCpup_KEEPLINO,
15436+ a->parent);
15437+ mutex_unlock(&h_inode->i_mutex);
15438+ au_h_open_post(src_dentry, a->bsrc, h_file);
15439+ au_set_h_dptr(src_dentry, a->bdst, NULL);
15440+ au_set_dbstart(src_dentry, a->bsrc);
15441+ } else {
15442+ /* the inode of src_dentry already exists on a.bdst branch */
15443+ h_src_dentry = d_find_alias(h_inode);
15444+ if (!h_src_dentry && au_plink_test(inode)) {
15445+ plink = 1;
15446+ h_src_dentry = au_plink_lkup(inode, a->bdst);
15447+ err = PTR_ERR(h_src_dentry);
15448+ if (IS_ERR(h_src_dentry))
15449+ goto out;
15450+
15451+ if (unlikely(!h_src_dentry->d_inode)) {
15452+ dput(h_src_dentry);
15453+ h_src_dentry = NULL;
15454+ }
15455+
15456+ }
15457+ if (h_src_dentry) {
15458+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
15459+ &a->h_path);
15460+ dput(h_src_dentry);
15461+ } else {
15462+ AuIOErr("no dentry found for hi%lu on b%d\n",
15463+ h_inode->i_ino, a->bdst);
15464+ err = -EIO;
15465+ }
15466+ }
15467+
15468+ if (!err && !plink)
15469+ au_plink_append(inode, a->bdst, a->h_path.dentry);
15470+
15471+out:
2cbb1c4b 15472+ AuTraceErr(err);
4a4d8108
AM
15473+ return err;
15474+}
15475+
15476+int aufs_link(struct dentry *src_dentry, struct inode *dir,
15477+ struct dentry *dentry)
15478+{
15479+ int err, rerr;
15480+ struct au_dtime dt;
15481+ struct au_link_args *a;
15482+ struct dentry *wh_dentry, *h_src_dentry;
15483+ struct inode *inode;
15484+ struct super_block *sb;
15485+ struct au_wr_dir_args wr_dir_args = {
15486+ /* .force_btgt = -1, */
15487+ .flags = AuWrDir_ADD_ENTRY
15488+ };
15489+
15490+ IMustLock(dir);
15491+ inode = src_dentry->d_inode;
15492+ IMustLock(inode);
15493+
4a4d8108
AM
15494+ err = -ENOMEM;
15495+ a = kzalloc(sizeof(*a), GFP_NOFS);
15496+ if (unlikely(!a))
15497+ goto out;
15498+
15499+ a->parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
15500+ err = aufs_read_and_write_lock2(dentry, src_dentry,
15501+ AuLock_NOPLM | AuLock_GEN);
e49829fe
JR
15502+ if (unlikely(err))
15503+ goto out_kfree;
027c5e7a
AM
15504+ err = au_d_hashed_positive(src_dentry);
15505+ if (unlikely(err))
15506+ goto out_unlock;
15507+ err = au_d_may_add(dentry);
15508+ if (unlikely(err))
15509+ goto out_unlock;
e49829fe 15510+
4a4d8108 15511+ a->src_parent = dget_parent(src_dentry);
2cbb1c4b 15512+ wr_dir_args.force_btgt = au_ibstart(inode);
4a4d8108
AM
15513+
15514+ di_write_lock_parent(a->parent);
15515+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
15516+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
15517+ &wr_dir_args);
15518+ err = PTR_ERR(wh_dentry);
15519+ if (IS_ERR(wh_dentry))
027c5e7a 15520+ goto out_parent;
4a4d8108
AM
15521+
15522+ err = 0;
15523+ sb = dentry->d_sb;
15524+ a->bdst = au_dbstart(dentry);
15525+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
15526+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
2cbb1c4b
JR
15527+ a->bsrc = au_ibstart(inode);
15528+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
15529+ if (!h_src_dentry) {
15530+ a->bsrc = au_dbstart(src_dentry);
15531+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
15532+ AuDebugOn(!h_src_dentry);
15533+ } else if (IS_ERR(h_src_dentry))
15534+ goto out_parent;
15535+
4a4d8108
AM
15536+ if (au_opt_test(au_mntflags(sb), PLINK)) {
15537+ if (a->bdst < a->bsrc
15538+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
15539+ err = au_cpup_or_link(src_dentry, a);
2cbb1c4b 15540+ else
4a4d8108
AM
15541+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
15542+ &a->h_path);
2cbb1c4b 15543+ dput(h_src_dentry);
4a4d8108
AM
15544+ } else {
15545+ /*
15546+ * copyup src_dentry to the branch we process,
15547+ * and then link(2) to it.
15548+ */
2cbb1c4b 15549+ dput(h_src_dentry);
4a4d8108
AM
15550+ if (a->bdst < a->bsrc
15551+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
15552+ au_unpin(&a->pin);
15553+ di_write_unlock(a->parent);
15554+ err = au_cpup_before_link(src_dentry, a);
15555+ di_write_lock_parent(a->parent);
15556+ if (!err)
15557+ err = au_pin(&a->pin, dentry, a->bdst,
15558+ au_opt_udba(sb),
15559+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15560+ if (unlikely(err))
15561+ goto out_wh;
15562+ }
15563+ if (!err) {
15564+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
15565+ err = -ENOENT;
15566+ if (h_src_dentry && h_src_dentry->d_inode)
15567+ err = vfsub_link(h_src_dentry,
15568+ au_pinned_h_dir(&a->pin),
15569+ &a->h_path);
15570+ }
15571+ }
15572+ if (unlikely(err))
15573+ goto out_unpin;
15574+
15575+ if (wh_dentry) {
15576+ a->h_path.dentry = wh_dentry;
15577+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
15578+ dentry);
15579+ if (unlikely(err))
15580+ goto out_revert;
15581+ }
15582+
15583+ dir->i_version++;
15584+ if (au_ibstart(dir) == au_dbstart(dentry))
15585+ au_cpup_attr_timesizes(dir);
15586+ inc_nlink(inode);
15587+ inode->i_ctime = dir->i_ctime;
027c5e7a
AM
15588+ d_instantiate(dentry, au_igrab(inode));
15589+ if (d_unhashed(a->h_path.dentry))
4a4d8108
AM
15590+ /* some filesystem calls d_drop() */
15591+ d_drop(dentry);
15592+ goto out_unpin; /* success */
15593+
4f0767ce 15594+out_revert:
4a4d8108 15595+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
027c5e7a
AM
15596+ if (unlikely(rerr)) {
15597+ AuIOErr("%.*s reverting failed(%d, %d)\n",
15598+ AuDLNPair(dentry), err, rerr);
15599+ err = -EIO;
15600+ }
4a4d8108 15601+ au_dtime_revert(&dt);
4f0767ce 15602+out_unpin:
4a4d8108 15603+ au_unpin(&a->pin);
4f0767ce 15604+out_wh:
4a4d8108 15605+ dput(wh_dentry);
027c5e7a
AM
15606+out_parent:
15607+ di_write_unlock(a->parent);
15608+ dput(a->src_parent);
4f0767ce 15609+out_unlock:
4a4d8108
AM
15610+ if (unlikely(err)) {
15611+ au_update_dbstart(dentry);
15612+ d_drop(dentry);
15613+ }
4a4d8108 15614+ aufs_read_and_write_unlock2(dentry, src_dentry);
e49829fe 15615+out_kfree:
4a4d8108 15616+ kfree(a);
4f0767ce 15617+out:
4a4d8108
AM
15618+ return err;
15619+}
15620+
15621+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
15622+{
15623+ int err, rerr;
15624+ aufs_bindex_t bindex;
15625+ unsigned char diropq;
15626+ struct path h_path;
15627+ struct dentry *wh_dentry, *parent, *opq_dentry;
15628+ struct mutex *h_mtx;
15629+ struct super_block *sb;
15630+ struct {
15631+ struct au_pin pin;
15632+ struct au_dtime dt;
15633+ } *a; /* reduce the stack usage */
15634+ struct au_wr_dir_args wr_dir_args = {
15635+ .force_btgt = -1,
15636+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
15637+ };
15638+
15639+ IMustLock(dir);
15640+
15641+ err = -ENOMEM;
15642+ a = kmalloc(sizeof(*a), GFP_NOFS);
15643+ if (unlikely(!a))
15644+ goto out;
15645+
027c5e7a
AM
15646+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
15647+ if (unlikely(err))
15648+ goto out_free;
15649+ err = au_d_may_add(dentry);
15650+ if (unlikely(err))
15651+ goto out_unlock;
15652+
4a4d8108
AM
15653+ parent = dentry->d_parent; /* dir inode is locked */
15654+ di_write_lock_parent(parent);
15655+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
15656+ &a->pin, &wr_dir_args);
15657+ err = PTR_ERR(wh_dentry);
15658+ if (IS_ERR(wh_dentry))
027c5e7a 15659+ goto out_parent;
4a4d8108
AM
15660+
15661+ sb = dentry->d_sb;
15662+ bindex = au_dbstart(dentry);
15663+ h_path.dentry = au_h_dptr(dentry, bindex);
15664+ h_path.mnt = au_sbr_mnt(sb, bindex);
15665+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
15666+ if (unlikely(err))
027c5e7a 15667+ goto out_unpin;
4a4d8108
AM
15668+
15669+ /* make the dir opaque */
15670+ diropq = 0;
15671+ h_mtx = &h_path.dentry->d_inode->i_mutex;
15672+ if (wh_dentry
15673+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
15674+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15675+ opq_dentry = au_diropq_create(dentry, bindex);
15676+ mutex_unlock(h_mtx);
15677+ err = PTR_ERR(opq_dentry);
15678+ if (IS_ERR(opq_dentry))
15679+ goto out_dir;
15680+ dput(opq_dentry);
15681+ diropq = 1;
15682+ }
15683+
15684+ err = epilog(dir, bindex, wh_dentry, dentry);
15685+ if (!err) {
15686+ inc_nlink(dir);
027c5e7a 15687+ goto out_unpin; /* success */
4a4d8108
AM
15688+ }
15689+
15690+ /* revert */
15691+ if (diropq) {
15692+ AuLabel(revert opq);
15693+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15694+ rerr = au_diropq_remove(dentry, bindex);
15695+ mutex_unlock(h_mtx);
15696+ if (rerr) {
15697+ AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
15698+ AuDLNPair(dentry), err, rerr);
15699+ err = -EIO;
15700+ }
15701+ }
15702+
4f0767ce 15703+out_dir:
4a4d8108
AM
15704+ AuLabel(revert dir);
15705+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
15706+ if (rerr) {
15707+ AuIOErr("%.*s reverting dir failed(%d, %d)\n",
15708+ AuDLNPair(dentry), err, rerr);
15709+ err = -EIO;
15710+ }
4a4d8108 15711+ au_dtime_revert(&a->dt);
027c5e7a 15712+out_unpin:
4a4d8108
AM
15713+ au_unpin(&a->pin);
15714+ dput(wh_dentry);
027c5e7a
AM
15715+out_parent:
15716+ di_write_unlock(parent);
15717+out_unlock:
4a4d8108
AM
15718+ if (unlikely(err)) {
15719+ au_update_dbstart(dentry);
15720+ d_drop(dentry);
15721+ }
4a4d8108 15722+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 15723+out_free:
4a4d8108 15724+ kfree(a);
4f0767ce 15725+out:
4a4d8108
AM
15726+ return err;
15727+}
7f207e10
AM
15728diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
15729--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
53392da6 15730+++ linux/fs/aufs/i_op.c 2011-08-24 13:30:24.731313534 +0200
027c5e7a 15731@@ -0,0 +1,976 @@
4a4d8108 15732+/*
027c5e7a 15733+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
15734+ *
15735+ * This program, aufs is free software; you can redistribute it and/or modify
15736+ * it under the terms of the GNU General Public License as published by
15737+ * the Free Software Foundation; either version 2 of the License, or
15738+ * (at your option) any later version.
15739+ *
15740+ * This program is distributed in the hope that it will be useful,
15741+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15742+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15743+ * GNU General Public License for more details.
15744+ *
15745+ * You should have received a copy of the GNU General Public License
15746+ * along with this program; if not, write to the Free Software
15747+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
15748+ */
1facf9fc 15749+
1308ab2a 15750+/*
4a4d8108 15751+ * inode operations (except add/del/rename)
1308ab2a 15752+ */
4a4d8108
AM
15753+
15754+#include <linux/device_cgroup.h>
15755+#include <linux/fs_stack.h>
15756+#include <linux/mm.h>
15757+#include <linux/namei.h>
15758+#include <linux/security.h>
15759+#include <linux/uaccess.h>
15760+#include "aufs.h"
15761+
027c5e7a 15762+static int h_permission(struct inode *h_inode, int mask, unsigned int flags,
4a4d8108 15763+ struct vfsmount *h_mnt, int brperm)
1facf9fc 15764+{
1308ab2a 15765+ int err;
4a4d8108 15766+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
1facf9fc 15767+
4a4d8108
AM
15768+ err = -EACCES;
15769+ if ((write_mask && IS_IMMUTABLE(h_inode))
15770+ || ((mask & MAY_EXEC)
15771+ && S_ISREG(h_inode->i_mode)
15772+ && ((h_mnt->mnt_flags & MNT_NOEXEC)
15773+ || !(h_inode->i_mode & S_IXUGO))))
15774+ goto out;
15775+
15776+ /*
15777+ * - skip the lower fs test in the case of write to ro branch.
15778+ * - nfs dir permission write check is optimized, but a policy for
15779+ * link/rename requires a real check.
15780+ */
15781+ if ((write_mask && !au_br_writable(brperm))
15782+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
15783+ && write_mask && !(mask & MAY_READ))
15784+ || !h_inode->i_op->permission) {
15785+ /* AuLabel(generic_permission); */
027c5e7a 15786+ err = generic_permission(h_inode, mask, flags,
4a4d8108 15787+ h_inode->i_op->check_acl);
1308ab2a 15788+ } else {
4a4d8108 15789+ /* AuLabel(h_inode->permission); */
027c5e7a 15790+ err = h_inode->i_op->permission(h_inode, mask, flags);
4a4d8108
AM
15791+ AuTraceErr(err);
15792+ }
1facf9fc 15793+
4a4d8108
AM
15794+ if (!err)
15795+ err = devcgroup_inode_permission(h_inode, mask);
7f207e10 15796+ if (!err)
4a4d8108 15797+ err = security_inode_permission(h_inode, mask);
4a4d8108
AM
15798+
15799+#if 0
15800+ if (!err) {
15801+ /* todo: do we need to call ima_path_check()? */
15802+ struct path h_path = {
15803+ .dentry =
15804+ .mnt = h_mnt
15805+ };
15806+ err = ima_path_check(&h_path,
15807+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
15808+ IMA_COUNT_LEAVE);
1308ab2a 15809+ }
4a4d8108 15810+#endif
dece6358 15811+
4f0767ce 15812+out:
1308ab2a 15813+ return err;
15814+}
dece6358 15815+
027c5e7a 15816+static int aufs_permission(struct inode *inode, int mask, unsigned int flags)
1308ab2a 15817+{
15818+ int err;
4a4d8108
AM
15819+ aufs_bindex_t bindex, bend;
15820+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
15821+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
15822+ struct inode *h_inode;
15823+ struct super_block *sb;
15824+ struct au_branch *br;
1facf9fc 15825+
027c5e7a
AM
15826+ /* todo: support rcu-walk? */
15827+ if (flags & IPERM_FLAG_RCU)
15828+ return -ECHILD;
15829+
4a4d8108
AM
15830+ sb = inode->i_sb;
15831+ si_read_lock(sb, AuLock_FLUSH);
15832+ ii_read_lock_child(inode);
027c5e7a
AM
15833+#if 0
15834+ err = au_iigen_test(inode, au_sigen(sb));
15835+ if (unlikely(err))
15836+ goto out;
15837+#endif
dece6358 15838+
4a4d8108
AM
15839+ if (!isdir || write_mask) {
15840+ err = au_busy_or_stale();
15841+ h_inode = au_h_iptr(inode, au_ibstart(inode));
15842+ if (unlikely(!h_inode
15843+ || (h_inode->i_mode & S_IFMT)
15844+ != (inode->i_mode & S_IFMT)))
15845+ goto out;
1facf9fc 15846+
4a4d8108
AM
15847+ err = 0;
15848+ bindex = au_ibstart(inode);
15849+ br = au_sbr(sb, bindex);
027c5e7a
AM
15850+ err = h_permission(h_inode, mask, flags, br->br_mnt,
15851+ br->br_perm);
4a4d8108
AM
15852+ if (write_mask
15853+ && !err
15854+ && !special_file(h_inode->i_mode)) {
15855+ /* test whether the upper writable branch exists */
15856+ err = -EROFS;
15857+ for (; bindex >= 0; bindex--)
15858+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
15859+ err = 0;
15860+ break;
15861+ }
15862+ }
15863+ goto out;
15864+ }
dece6358 15865+
4a4d8108 15866+ /* non-write to dir */
1308ab2a 15867+ err = 0;
4a4d8108
AM
15868+ bend = au_ibend(inode);
15869+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
15870+ h_inode = au_h_iptr(inode, bindex);
15871+ if (h_inode) {
15872+ err = au_busy_or_stale();
15873+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
15874+ break;
15875+
15876+ br = au_sbr(sb, bindex);
027c5e7a 15877+ err = h_permission(h_inode, mask, flags, br->br_mnt,
4a4d8108
AM
15878+ br->br_perm);
15879+ }
15880+ }
1308ab2a 15881+
4f0767ce 15882+out:
4a4d8108
AM
15883+ ii_read_unlock(inode);
15884+ si_read_unlock(sb);
1308ab2a 15885+ return err;
15886+}
15887+
4a4d8108 15888+/* ---------------------------------------------------------------------- */
1facf9fc 15889+
4a4d8108
AM
15890+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
15891+ struct nameidata *nd)
15892+{
15893+ struct dentry *ret, *parent;
b752ccd1 15894+ struct inode *inode;
4a4d8108
AM
15895+ struct super_block *sb;
15896+ int err, npositive;
dece6358 15897+
4a4d8108 15898+ IMustLock(dir);
1308ab2a 15899+
4a4d8108 15900+ sb = dir->i_sb;
7f207e10
AM
15901+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
15902+ ret = ERR_PTR(err);
15903+ if (unlikely(err))
15904+ goto out;
15905+
4a4d8108
AM
15906+ ret = ERR_PTR(-ENAMETOOLONG);
15907+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
7f207e10 15908+ goto out_si;
4a4d8108
AM
15909+ err = au_di_init(dentry);
15910+ ret = ERR_PTR(err);
15911+ if (unlikely(err))
7f207e10 15912+ goto out_si;
1308ab2a 15913+
027c5e7a 15914+ npositive = 0; /* suppress a warning */
4a4d8108
AM
15915+ parent = dentry->d_parent; /* dir inode is locked */
15916+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
15917+ err = au_alive_dir(parent);
15918+ if (!err)
15919+ err = au_digen_test(parent, au_sigen(sb));
15920+ if (!err) {
15921+ npositive = au_lkup_dentry(dentry, au_dbstart(parent),
15922+ /*type*/0, nd);
15923+ err = npositive;
15924+ }
4a4d8108 15925+ di_read_unlock(parent, AuLock_IR);
4a4d8108
AM
15926+ ret = ERR_PTR(err);
15927+ if (unlikely(err < 0))
15928+ goto out_unlock;
1308ab2a 15929+
4a4d8108
AM
15930+ inode = NULL;
15931+ if (npositive) {
b752ccd1 15932+ inode = au_new_inode(dentry, /*must_new*/0);
4a4d8108 15933+ ret = (void *)inode;
1facf9fc 15934+ }
4a4d8108
AM
15935+ if (IS_ERR(inode))
15936+ goto out_unlock;
15937+
15938+ ret = d_splice_alias(inode, dentry);
7f207e10 15939+ if (unlikely(IS_ERR(ret) && inode)) {
4a4d8108 15940+ ii_write_unlock(inode);
7f207e10
AM
15941+ iput(inode);
15942+ }
1facf9fc 15943+
4f0767ce 15944+out_unlock:
4a4d8108 15945+ di_write_unlock(dentry);
7f207e10 15946+out_si:
4a4d8108 15947+ si_read_unlock(sb);
7f207e10 15948+out:
4a4d8108
AM
15949+ return ret;
15950+}
1facf9fc 15951+
4a4d8108 15952+/* ---------------------------------------------------------------------- */
1facf9fc 15953+
4a4d8108
AM
15954+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
15955+ const unsigned char add_entry, aufs_bindex_t bcpup,
15956+ aufs_bindex_t bstart)
15957+{
15958+ int err;
15959+ struct dentry *h_parent;
15960+ struct inode *h_dir;
1facf9fc 15961+
027c5e7a 15962+ if (add_entry)
4a4d8108 15963+ IMustLock(parent->d_inode);
027c5e7a 15964+ else
4a4d8108
AM
15965+ di_write_lock_parent(parent);
15966+
15967+ err = 0;
15968+ if (!au_h_dptr(parent, bcpup)) {
15969+ if (bstart < bcpup)
15970+ err = au_cpdown_dirs(dentry, bcpup);
15971+ else
15972+ err = au_cpup_dirs(dentry, bcpup);
15973+ }
15974+ if (!err && add_entry) {
15975+ h_parent = au_h_dptr(parent, bcpup);
15976+ h_dir = h_parent->d_inode;
15977+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
15978+ err = au_lkup_neg(dentry, bcpup);
15979+ /* todo: no unlock here */
15980+ mutex_unlock(&h_dir->i_mutex);
027c5e7a
AM
15981+
15982+ AuDbg("bcpup %d\n", bcpup);
15983+ if (!err) {
15984+ if (!dentry->d_inode)
15985+ au_set_h_dptr(dentry, bstart, NULL);
4a4d8108
AM
15986+ au_update_dbrange(dentry, /*do_put_zero*/0);
15987+ }
1308ab2a 15988+ }
1facf9fc 15989+
4a4d8108
AM
15990+ if (!add_entry)
15991+ di_write_unlock(parent);
15992+ if (!err)
15993+ err = bcpup; /* success */
1308ab2a 15994+
027c5e7a 15995+ AuTraceErr(err);
4a4d8108
AM
15996+ return err;
15997+}
1facf9fc 15998+
4a4d8108
AM
15999+/*
16000+ * decide the branch and the parent dir where we will create a new entry.
16001+ * returns new bindex or an error.
16002+ * copyup the parent dir if needed.
16003+ */
16004+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
16005+ struct au_wr_dir_args *args)
16006+{
16007+ int err;
16008+ aufs_bindex_t bcpup, bstart, src_bstart;
16009+ const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
16010+ ADD_ENTRY);
16011+ struct super_block *sb;
16012+ struct dentry *parent;
16013+ struct au_sbinfo *sbinfo;
1facf9fc 16014+
4a4d8108
AM
16015+ sb = dentry->d_sb;
16016+ sbinfo = au_sbi(sb);
16017+ parent = dget_parent(dentry);
16018+ bstart = au_dbstart(dentry);
16019+ bcpup = bstart;
16020+ if (args->force_btgt < 0) {
16021+ if (src_dentry) {
16022+ src_bstart = au_dbstart(src_dentry);
16023+ if (src_bstart < bstart)
16024+ bcpup = src_bstart;
16025+ } else if (add_entry) {
16026+ err = AuWbrCreate(sbinfo, dentry,
16027+ au_ftest_wrdir(args->flags, ISDIR));
16028+ bcpup = err;
16029+ }
1facf9fc 16030+
4a4d8108
AM
16031+ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
16032+ if (add_entry)
16033+ err = AuWbrCopyup(sbinfo, dentry);
16034+ else {
16035+ if (!IS_ROOT(dentry)) {
16036+ di_read_lock_parent(parent, !AuLock_IR);
16037+ err = AuWbrCopyup(sbinfo, dentry);
16038+ di_read_unlock(parent, !AuLock_IR);
16039+ } else
16040+ err = AuWbrCopyup(sbinfo, dentry);
16041+ }
16042+ bcpup = err;
16043+ if (unlikely(err < 0))
16044+ goto out;
16045+ }
16046+ } else {
16047+ bcpup = args->force_btgt;
16048+ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
1308ab2a 16049+ }
027c5e7a 16050+
4a4d8108
AM
16051+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
16052+ err = bcpup;
16053+ if (bcpup == bstart)
16054+ goto out; /* success */
4a4d8108
AM
16055+
16056+ /* copyup the new parent into the branch we process */
16057+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
027c5e7a
AM
16058+ if (err >= 0) {
16059+ if (!dentry->d_inode) {
16060+ au_set_h_dptr(dentry, bstart, NULL);
16061+ au_set_dbstart(dentry, bcpup);
16062+ au_set_dbend(dentry, bcpup);
16063+ }
16064+ AuDebugOn(add_entry && !au_h_dptr(dentry, bcpup));
16065+ }
4a4d8108 16066+
4f0767ce 16067+out:
4a4d8108 16068+ dput(parent);
dece6358
AM
16069+ return err;
16070+}
1facf9fc 16071+
1308ab2a 16072+/* ---------------------------------------------------------------------- */
16073+
4a4d8108 16074+struct dentry *au_pinned_h_parent(struct au_pin *pin)
1308ab2a 16075+{
4a4d8108
AM
16076+ if (pin && pin->parent)
16077+ return au_h_dptr(pin->parent, pin->bindex);
16078+ return NULL;
dece6358 16079+}
1facf9fc 16080+
4a4d8108 16081+void au_unpin(struct au_pin *p)
dece6358 16082+{
e49829fe 16083+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
4a4d8108
AM
16084+ mnt_drop_write(p->h_mnt);
16085+ if (!p->hdir)
16086+ return;
1facf9fc 16087+
4a4d8108
AM
16088+ au_hn_imtx_unlock(p->hdir);
16089+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16090+ di_read_unlock(p->parent, AuLock_IR);
16091+ iput(p->hdir->hi_inode);
16092+ dput(p->parent);
16093+ p->parent = NULL;
16094+ p->hdir = NULL;
16095+ p->h_mnt = NULL;
16096+}
1308ab2a 16097+
4a4d8108
AM
16098+int au_do_pin(struct au_pin *p)
16099+{
16100+ int err;
16101+ struct super_block *sb;
16102+ struct dentry *h_dentry, *h_parent;
16103+ struct au_branch *br;
16104+ struct inode *h_dir;
16105+
16106+ err = 0;
16107+ sb = p->dentry->d_sb;
16108+ br = au_sbr(sb, p->bindex);
16109+ if (IS_ROOT(p->dentry)) {
16110+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
16111+ p->h_mnt = br->br_mnt;
16112+ err = mnt_want_write(p->h_mnt);
16113+ if (unlikely(err)) {
16114+ au_fclr_pin(p->flags, MNT_WRITE);
16115+ goto out_err;
16116+ }
16117+ }
dece6358 16118+ goto out;
1facf9fc 16119+ }
16120+
4a4d8108
AM
16121+ h_dentry = NULL;
16122+ if (p->bindex <= au_dbend(p->dentry))
16123+ h_dentry = au_h_dptr(p->dentry, p->bindex);
dece6358 16124+
4a4d8108
AM
16125+ p->parent = dget_parent(p->dentry);
16126+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16127+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
dece6358 16128+
4a4d8108
AM
16129+ h_dir = NULL;
16130+ h_parent = au_h_dptr(p->parent, p->bindex);
16131+ p->hdir = au_hi(p->parent->d_inode, p->bindex);
16132+ if (p->hdir)
16133+ h_dir = p->hdir->hi_inode;
dece6358 16134+
b752ccd1
AM
16135+ /*
16136+ * udba case, or
16137+ * if DI_LOCKED is not set, then p->parent may be different
16138+ * and h_parent can be NULL.
16139+ */
16140+ if (unlikely(!p->hdir || !h_dir || !h_parent)) {
e49829fe 16141+ err = -EBUSY;
4a4d8108
AM
16142+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16143+ di_read_unlock(p->parent, AuLock_IR);
16144+ dput(p->parent);
16145+ p->parent = NULL;
16146+ goto out_err;
16147+ }
1308ab2a 16148+
4a4d8108
AM
16149+ au_igrab(h_dir);
16150+ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
1308ab2a 16151+
4a4d8108
AM
16152+ if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) {
16153+ err = -EBUSY;
16154+ goto out_unpin;
16155+ }
16156+ if (h_dentry) {
16157+ err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
16158+ if (unlikely(err)) {
16159+ au_fclr_pin(p->flags, MNT_WRITE);
16160+ goto out_unpin;
16161+ }
1facf9fc 16162+ }
dece6358 16163+
4a4d8108
AM
16164+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
16165+ p->h_mnt = br->br_mnt;
16166+ err = mnt_want_write(p->h_mnt);
dece6358 16167+ if (unlikely(err)) {
4a4d8108
AM
16168+ au_fclr_pin(p->flags, MNT_WRITE);
16169+ goto out_unpin;
dece6358
AM
16170+ }
16171+ }
4a4d8108
AM
16172+ goto out; /* success */
16173+
4f0767ce 16174+out_unpin:
4a4d8108 16175+ au_unpin(p);
4f0767ce 16176+out_err:
4a4d8108
AM
16177+ pr_err("err %d\n", err);
16178+ err = au_busy_or_stale();
4f0767ce 16179+out:
1facf9fc 16180+ return err;
16181+}
16182+
4a4d8108
AM
16183+void au_pin_init(struct au_pin *p, struct dentry *dentry,
16184+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
16185+ unsigned int udba, unsigned char flags)
16186+{
16187+ p->dentry = dentry;
16188+ p->udba = udba;
16189+ p->lsc_di = lsc_di;
16190+ p->lsc_hi = lsc_hi;
16191+ p->flags = flags;
16192+ p->bindex = bindex;
16193+
16194+ p->parent = NULL;
16195+ p->hdir = NULL;
16196+ p->h_mnt = NULL;
16197+}
16198+
16199+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
16200+ unsigned int udba, unsigned char flags)
16201+{
16202+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
16203+ udba, flags);
16204+ return au_do_pin(pin);
16205+}
16206+
dece6358
AM
16207+/* ---------------------------------------------------------------------- */
16208+
1308ab2a 16209+/*
4a4d8108
AM
16210+ * ->setattr() and ->getattr() are called in various cases.
16211+ * chmod, stat: dentry is revalidated.
16212+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
16213+ * unhashed.
16214+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
1308ab2a 16215+ */
027c5e7a 16216+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
4a4d8108 16217+static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
1facf9fc 16218+{
4a4d8108
AM
16219+ int err;
16220+ struct inode *inode;
16221+ struct dentry *parent;
1facf9fc 16222+
1308ab2a 16223+ err = 0;
4a4d8108 16224+ inode = dentry->d_inode;
027c5e7a 16225+ if (au_digen_test(dentry, sigen)) {
4a4d8108
AM
16226+ parent = dget_parent(dentry);
16227+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 16228+ err = au_refresh_dentry(dentry, parent);
4a4d8108
AM
16229+ di_read_unlock(parent, AuLock_IR);
16230+ dput(parent);
dece6358 16231+ }
1facf9fc 16232+
4a4d8108 16233+ AuTraceErr(err);
1308ab2a 16234+ return err;
16235+}
dece6358 16236+
4a4d8108
AM
16237+#define AuIcpup_DID_CPUP 1
16238+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
7f207e10
AM
16239+#define au_fset_icpup(flags, name) \
16240+ do { (flags) |= AuIcpup_##name; } while (0)
16241+#define au_fclr_icpup(flags, name) \
16242+ do { (flags) &= ~AuIcpup_##name; } while (0)
1308ab2a 16243+
4a4d8108
AM
16244+struct au_icpup_args {
16245+ unsigned char flags;
16246+ unsigned char pin_flags;
16247+ aufs_bindex_t btgt;
16248+ unsigned int udba;
16249+ struct au_pin pin;
16250+ struct path h_path;
16251+ struct inode *h_inode;
16252+};
1308ab2a 16253+
4a4d8108
AM
16254+static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
16255+ struct au_icpup_args *a)
1308ab2a 16256+{
16257+ int err;
4a4d8108 16258+ loff_t sz;
e49829fe 16259+ aufs_bindex_t bstart, ibstart;
4a4d8108
AM
16260+ struct dentry *hi_wh, *parent;
16261+ struct inode *inode;
16262+ struct file *h_file;
16263+ struct au_wr_dir_args wr_dir_args = {
16264+ .force_btgt = -1,
16265+ .flags = 0
16266+ };
16267+
16268+ bstart = au_dbstart(dentry);
16269+ inode = dentry->d_inode;
16270+ if (S_ISDIR(inode->i_mode))
16271+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
16272+ /* plink or hi_wh() case */
e49829fe 16273+ ibstart = au_ibstart(inode);
027c5e7a 16274+ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
e49829fe 16275+ wr_dir_args.force_btgt = ibstart;
4a4d8108
AM
16276+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
16277+ if (unlikely(err < 0))
16278+ goto out;
16279+ a->btgt = err;
16280+ if (err != bstart)
16281+ au_fset_icpup(a->flags, DID_CPUP);
16282+
16283+ err = 0;
16284+ a->pin_flags = AuPin_MNT_WRITE;
16285+ parent = NULL;
16286+ if (!IS_ROOT(dentry)) {
16287+ au_fset_pin(a->pin_flags, DI_LOCKED);
16288+ parent = dget_parent(dentry);
16289+ di_write_lock_parent(parent);
16290+ }
16291+
16292+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
16293+ if (unlikely(err))
16294+ goto out_parent;
16295+
16296+ a->h_path.dentry = au_h_dptr(dentry, bstart);
16297+ a->h_inode = a->h_path.dentry->d_inode;
16298+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
16299+ sz = -1;
16300+ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
16301+ sz = ia->ia_size;
16302+
16303+ h_file = NULL;
16304+ hi_wh = NULL;
027c5e7a 16305+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
4a4d8108
AM
16306+ hi_wh = au_hi_wh(inode, a->btgt);
16307+ if (!hi_wh) {
16308+ err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
16309+ if (unlikely(err))
16310+ goto out_unlock;
16311+ hi_wh = au_hi_wh(inode, a->btgt);
16312+ /* todo: revalidate hi_wh? */
16313+ }
16314+ }
16315+
16316+ if (parent) {
16317+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
16318+ di_downgrade_lock(parent, AuLock_IR);
16319+ dput(parent);
16320+ parent = NULL;
16321+ }
16322+ if (!au_ftest_icpup(a->flags, DID_CPUP))
16323+ goto out; /* success */
16324+
16325+ if (!d_unhashed(dentry)) {
16326+ h_file = au_h_open_pre(dentry, bstart);
16327+ if (IS_ERR(h_file)) {
16328+ err = PTR_ERR(h_file);
16329+ h_file = NULL;
16330+ } else
16331+ err = au_sio_cpup_simple(dentry, a->btgt, sz,
16332+ AuCpup_DTIME);
16333+ if (!err)
16334+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
16335+ } else if (!hi_wh)
16336+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
16337+ else
16338+ a->h_path.dentry = hi_wh; /* do not dget here */
1308ab2a 16339+
4f0767ce 16340+out_unlock:
4a4d8108
AM
16341+ mutex_unlock(&a->h_inode->i_mutex);
16342+ au_h_open_post(dentry, bstart, h_file);
16343+ a->h_inode = a->h_path.dentry->d_inode;
dece6358 16344+ if (!err) {
4a4d8108 16345+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
dece6358 16346+ goto out; /* success */
1facf9fc 16347+ }
dece6358 16348+
4a4d8108 16349+ au_unpin(&a->pin);
4f0767ce 16350+out_parent:
4a4d8108
AM
16351+ if (parent) {
16352+ di_write_unlock(parent);
16353+ dput(parent);
16354+ }
4f0767ce 16355+out:
1facf9fc 16356+ return err;
16357+}
16358+
4a4d8108 16359+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
1facf9fc 16360+{
4a4d8108
AM
16361+ int err;
16362+ struct inode *inode;
16363+ struct super_block *sb;
16364+ struct file *file;
16365+ struct au_icpup_args *a;
1facf9fc 16366+
4a4d8108
AM
16367+ inode = dentry->d_inode;
16368+ IMustLock(inode);
dece6358 16369+
4a4d8108
AM
16370+ err = -ENOMEM;
16371+ a = kzalloc(sizeof(*a), GFP_NOFS);
16372+ if (unlikely(!a))
16373+ goto out;
1facf9fc 16374+
4a4d8108
AM
16375+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
16376+ ia->ia_valid &= ~ATTR_MODE;
dece6358 16377+
4a4d8108
AM
16378+ file = NULL;
16379+ sb = dentry->d_sb;
e49829fe
JR
16380+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
16381+ if (unlikely(err))
16382+ goto out_kfree;
16383+
4a4d8108
AM
16384+ if (ia->ia_valid & ATTR_FILE) {
16385+ /* currently ftruncate(2) only */
16386+ AuDebugOn(!S_ISREG(inode->i_mode));
16387+ file = ia->ia_file;
16388+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
16389+ if (unlikely(err))
16390+ goto out_si;
16391+ ia->ia_file = au_hf_top(file);
16392+ a->udba = AuOpt_UDBA_NONE;
16393+ } else {
16394+ /* fchmod() doesn't pass ia_file */
16395+ a->udba = au_opt_udba(sb);
027c5e7a
AM
16396+ di_write_lock_child(dentry);
16397+ /* no d_unlinked(), to set UDBA_NONE for root */
4a4d8108
AM
16398+ if (d_unhashed(dentry))
16399+ a->udba = AuOpt_UDBA_NONE;
4a4d8108
AM
16400+ if (a->udba != AuOpt_UDBA_NONE) {
16401+ AuDebugOn(IS_ROOT(dentry));
16402+ err = au_reval_for_attr(dentry, au_sigen(sb));
16403+ if (unlikely(err))
16404+ goto out_dentry;
16405+ }
dece6358 16406+ }
dece6358 16407+
4a4d8108
AM
16408+ err = au_pin_and_icpup(dentry, ia, a);
16409+ if (unlikely(err < 0))
16410+ goto out_dentry;
16411+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
16412+ ia->ia_file = NULL;
16413+ ia->ia_valid &= ~ATTR_FILE;
1308ab2a 16414+ }
dece6358 16415+
4a4d8108
AM
16416+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
16417+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
16418+ == (ATTR_MODE | ATTR_CTIME)) {
16419+ err = security_path_chmod(a->h_path.dentry, a->h_path.mnt,
16420+ ia->ia_mode);
16421+ if (unlikely(err))
16422+ goto out_unlock;
16423+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
16424+ && (ia->ia_valid & ATTR_CTIME)) {
16425+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
16426+ if (unlikely(err))
16427+ goto out_unlock;
16428+ }
dece6358 16429+
4a4d8108
AM
16430+ if (ia->ia_valid & ATTR_SIZE) {
16431+ struct file *f;
1308ab2a 16432+
953406b4 16433+ if (ia->ia_size < i_size_read(inode))
4a4d8108 16434+ /* unmap only */
953406b4 16435+ truncate_setsize(inode, ia->ia_size);
1308ab2a 16436+
4a4d8108
AM
16437+ f = NULL;
16438+ if (ia->ia_valid & ATTR_FILE)
16439+ f = ia->ia_file;
16440+ mutex_unlock(&a->h_inode->i_mutex);
16441+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
16442+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
16443+ } else
16444+ err = vfsub_notify_change(&a->h_path, ia);
16445+ if (!err)
16446+ au_cpup_attr_changeable(inode);
1308ab2a 16447+
4f0767ce 16448+out_unlock:
4a4d8108
AM
16449+ mutex_unlock(&a->h_inode->i_mutex);
16450+ au_unpin(&a->pin);
027c5e7a
AM
16451+ if (unlikely(err))
16452+ au_update_dbstart(dentry);
4f0767ce 16453+out_dentry:
4a4d8108
AM
16454+ di_write_unlock(dentry);
16455+ if (file) {
16456+ fi_write_unlock(file);
16457+ ia->ia_file = file;
16458+ ia->ia_valid |= ATTR_FILE;
16459+ }
4f0767ce 16460+out_si:
4a4d8108 16461+ si_read_unlock(sb);
e49829fe 16462+out_kfree:
4a4d8108 16463+ kfree(a);
4f0767ce 16464+out:
4a4d8108
AM
16465+ AuTraceErr(err);
16466+ return err;
1facf9fc 16467+}
16468+
4a4d8108
AM
16469+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
16470+ unsigned int nlink)
1facf9fc 16471+{
4a4d8108
AM
16472+ inode->i_mode = st->mode;
16473+ inode->i_uid = st->uid;
16474+ inode->i_gid = st->gid;
16475+ inode->i_atime = st->atime;
16476+ inode->i_mtime = st->mtime;
16477+ inode->i_ctime = st->ctime;
1facf9fc 16478+
4a4d8108
AM
16479+ au_cpup_attr_nlink(inode, /*force*/0);
16480+ if (S_ISDIR(inode->i_mode)) {
16481+ inode->i_nlink -= nlink;
16482+ inode->i_nlink += st->nlink;
16483+ }
1facf9fc 16484+
4a4d8108
AM
16485+ spin_lock(&inode->i_lock);
16486+ inode->i_blocks = st->blocks;
16487+ i_size_write(inode, st->size);
16488+ spin_unlock(&inode->i_lock);
1facf9fc 16489+}
16490+
4a4d8108
AM
16491+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
16492+ struct dentry *dentry, struct kstat *st)
1facf9fc 16493+{
4a4d8108
AM
16494+ int err;
16495+ unsigned int mnt_flags;
16496+ aufs_bindex_t bindex;
16497+ unsigned char udba_none, positive;
16498+ struct super_block *sb, *h_sb;
16499+ struct inode *inode;
16500+ struct vfsmount *h_mnt;
16501+ struct dentry *h_dentry;
1facf9fc 16502+
4a4d8108
AM
16503+ sb = dentry->d_sb;
16504+ inode = dentry->d_inode;
7f207e10
AM
16505+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
16506+ if (unlikely(err))
16507+ goto out;
4a4d8108
AM
16508+ mnt_flags = au_mntflags(sb);
16509+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
1facf9fc 16510+
4a4d8108 16511+ /* support fstat(2) */
027c5e7a 16512+ if (!d_unlinked(dentry) && !udba_none) {
4a4d8108 16513+ unsigned int sigen = au_sigen(sb);
027c5e7a
AM
16514+ err = au_digen_test(dentry, sigen);
16515+ if (!err) {
4a4d8108 16516+ di_read_lock_child(dentry, AuLock_IR);
027c5e7a
AM
16517+ err = au_dbrange_test(dentry);
16518+ if (unlikely(err))
16519+ goto out_unlock;
16520+ } else {
4a4d8108
AM
16521+ AuDebugOn(IS_ROOT(dentry));
16522+ di_write_lock_child(dentry);
027c5e7a
AM
16523+ err = au_dbrange_test(dentry);
16524+ if (!err)
16525+ err = au_reval_for_attr(dentry, sigen);
4a4d8108
AM
16526+ di_downgrade_lock(dentry, AuLock_IR);
16527+ if (unlikely(err))
7f207e10 16528+ goto out_unlock;
4a4d8108
AM
16529+ }
16530+ } else
16531+ di_read_lock_child(dentry, AuLock_IR);
1facf9fc 16532+
4a4d8108
AM
16533+ bindex = au_ibstart(inode);
16534+ h_mnt = au_sbr_mnt(sb, bindex);
16535+ h_sb = h_mnt->mnt_sb;
16536+ if (!au_test_fs_bad_iattr(h_sb) && udba_none)
16537+ goto out_fill; /* success */
1facf9fc 16538+
4a4d8108
AM
16539+ h_dentry = NULL;
16540+ if (au_dbstart(dentry) == bindex)
16541+ h_dentry = dget(au_h_dptr(dentry, bindex));
16542+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
16543+ h_dentry = au_plink_lkup(inode, bindex);
16544+ if (IS_ERR(h_dentry))
16545+ goto out_fill; /* pretending success */
16546+ }
16547+ /* illegally overlapped or something */
16548+ if (unlikely(!h_dentry))
16549+ goto out_fill; /* pretending success */
16550+
16551+ positive = !!h_dentry->d_inode;
16552+ if (positive)
16553+ err = vfs_getattr(h_mnt, h_dentry, st);
16554+ dput(h_dentry);
16555+ if (!err) {
16556+ if (positive)
16557+ au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
16558+ goto out_fill; /* success */
1facf9fc 16559+ }
7f207e10
AM
16560+ AuTraceErr(err);
16561+ goto out_unlock;
4a4d8108 16562+
4f0767ce 16563+out_fill:
4a4d8108 16564+ generic_fillattr(inode, st);
7f207e10 16565+out_unlock:
4a4d8108
AM
16566+ di_read_unlock(dentry, AuLock_IR);
16567+ si_read_unlock(sb);
7f207e10
AM
16568+out:
16569+ AuTraceErr(err);
4a4d8108 16570+ return err;
1facf9fc 16571+}
16572+
16573+/* ---------------------------------------------------------------------- */
16574+
4a4d8108
AM
16575+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
16576+ int bufsiz)
1facf9fc 16577+{
16578+ int err;
4a4d8108
AM
16579+ struct super_block *sb;
16580+ struct dentry *h_dentry;
1facf9fc 16581+
4a4d8108
AM
16582+ err = -EINVAL;
16583+ h_dentry = au_h_dptr(dentry, bindex);
16584+ if (unlikely(!h_dentry->d_inode->i_op->readlink))
16585+ goto out;
1facf9fc 16586+
4a4d8108
AM
16587+ err = security_inode_readlink(h_dentry);
16588+ if (unlikely(err))
dece6358 16589+ goto out;
1facf9fc 16590+
4a4d8108
AM
16591+ sb = dentry->d_sb;
16592+ if (!au_test_ro(sb, bindex, dentry->d_inode)) {
16593+ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
16594+ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
1facf9fc 16595+ }
4a4d8108 16596+ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
1facf9fc 16597+
4f0767ce 16598+out:
4a4d8108
AM
16599+ return err;
16600+}
1facf9fc 16601+
4a4d8108
AM
16602+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
16603+{
16604+ int err;
1facf9fc 16605+
027c5e7a
AM
16606+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
16607+ if (unlikely(err))
16608+ goto out;
16609+ err = au_d_hashed_positive(dentry);
16610+ if (!err)
16611+ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
4a4d8108 16612+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 16613+
027c5e7a 16614+out:
4a4d8108
AM
16615+ return err;
16616+}
1facf9fc 16617+
4a4d8108
AM
16618+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
16619+{
16620+ int err;
4a4d8108 16621+ mm_segment_t old_fs;
b752ccd1
AM
16622+ union {
16623+ char *k;
16624+ char __user *u;
16625+ } buf;
1facf9fc 16626+
4a4d8108 16627+ err = -ENOMEM;
b752ccd1
AM
16628+ buf.k = __getname_gfp(GFP_NOFS);
16629+ if (unlikely(!buf.k))
4a4d8108 16630+ goto out;
1facf9fc 16631+
027c5e7a
AM
16632+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
16633+ if (unlikely(err))
16634+ goto out_name;
16635+
16636+ err = au_d_hashed_positive(dentry);
16637+ if (!err) {
16638+ old_fs = get_fs();
16639+ set_fs(KERNEL_DS);
16640+ err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX);
16641+ set_fs(old_fs);
16642+ }
4a4d8108 16643+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 16644+
4a4d8108 16645+ if (err >= 0) {
b752ccd1 16646+ buf.k[err] = 0;
4a4d8108 16647+ /* will be freed by put_link */
b752ccd1 16648+ nd_set_link(nd, buf.k);
4a4d8108 16649+ return NULL; /* success */
1308ab2a 16650+ }
1facf9fc 16651+
027c5e7a
AM
16652+out_name:
16653+ __putname(buf.k);
4f0767ce 16654+out:
4a4d8108
AM
16655+ path_put(&nd->path);
16656+ AuTraceErr(err);
16657+ return ERR_PTR(err);
16658+}
1facf9fc 16659+
4a4d8108
AM
16660+static void aufs_put_link(struct dentry *dentry __maybe_unused,
16661+ struct nameidata *nd, void *cookie __maybe_unused)
16662+{
16663+ __putname(nd_get_link(nd));
16664+}
1facf9fc 16665+
4a4d8108 16666+/* ---------------------------------------------------------------------- */
1facf9fc 16667+
4a4d8108
AM
16668+static void aufs_truncate_range(struct inode *inode __maybe_unused,
16669+ loff_t start __maybe_unused,
16670+ loff_t end __maybe_unused)
16671+{
16672+ AuUnsupport();
16673+}
1facf9fc 16674+
4a4d8108 16675+/* ---------------------------------------------------------------------- */
1308ab2a 16676+
4a4d8108
AM
16677+struct inode_operations aufs_symlink_iop = {
16678+ .permission = aufs_permission,
16679+ .setattr = aufs_setattr,
16680+ .getattr = aufs_getattr,
16681+ .readlink = aufs_readlink,
16682+ .follow_link = aufs_follow_link,
16683+ .put_link = aufs_put_link
16684+};
16685+
16686+struct inode_operations aufs_dir_iop = {
16687+ .create = aufs_create,
16688+ .lookup = aufs_lookup,
16689+ .link = aufs_link,
16690+ .unlink = aufs_unlink,
16691+ .symlink = aufs_symlink,
16692+ .mkdir = aufs_mkdir,
16693+ .rmdir = aufs_rmdir,
16694+ .mknod = aufs_mknod,
16695+ .rename = aufs_rename,
16696+
16697+ .permission = aufs_permission,
16698+ .setattr = aufs_setattr,
16699+ .getattr = aufs_getattr
16700+};
16701+
16702+struct inode_operations aufs_iop = {
16703+ .permission = aufs_permission,
16704+ .setattr = aufs_setattr,
16705+ .getattr = aufs_getattr,
16706+ .truncate_range = aufs_truncate_range
16707+};
7f207e10
AM
16708diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
16709--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
16710+++ linux/fs/aufs/i_op_del.c 2011-08-24 13:30:24.731313534 +0200
16711@@ -0,0 +1,478 @@
1facf9fc 16712+/*
027c5e7a 16713+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 16714+ *
16715+ * This program, aufs is free software; you can redistribute it and/or modify
16716+ * it under the terms of the GNU General Public License as published by
16717+ * the Free Software Foundation; either version 2 of the License, or
16718+ * (at your option) any later version.
dece6358
AM
16719+ *
16720+ * This program is distributed in the hope that it will be useful,
16721+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16722+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16723+ * GNU General Public License for more details.
16724+ *
16725+ * You should have received a copy of the GNU General Public License
16726+ * along with this program; if not, write to the Free Software
16727+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 16728+ */
16729+
16730+/*
4a4d8108 16731+ * inode operations (del entry)
1308ab2a 16732+ */
dece6358 16733+
1308ab2a 16734+#include "aufs.h"
dece6358 16735+
4a4d8108
AM
16736+/*
16737+ * decide if a new whiteout for @dentry is necessary or not.
16738+ * when it is necessary, prepare the parent dir for the upper branch whose
16739+ * branch index is @bcpup for creation. the actual creation of the whiteout will
16740+ * be done by caller.
16741+ * return value:
16742+ * 0: wh is unnecessary
16743+ * plus: wh is necessary
16744+ * minus: error
16745+ */
16746+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1308ab2a 16747+{
4a4d8108
AM
16748+ int need_wh, err;
16749+ aufs_bindex_t bstart;
16750+ struct super_block *sb;
dece6358 16751+
4a4d8108
AM
16752+ sb = dentry->d_sb;
16753+ bstart = au_dbstart(dentry);
16754+ if (*bcpup < 0) {
16755+ *bcpup = bstart;
16756+ if (au_test_ro(sb, bstart, dentry->d_inode)) {
16757+ err = AuWbrCopyup(au_sbi(sb), dentry);
16758+ *bcpup = err;
16759+ if (unlikely(err < 0))
16760+ goto out;
16761+ }
16762+ } else
16763+ AuDebugOn(bstart < *bcpup
16764+ || au_test_ro(sb, *bcpup, dentry->d_inode));
16765+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
1308ab2a 16766+
4a4d8108
AM
16767+ if (*bcpup != bstart) {
16768+ err = au_cpup_dirs(dentry, *bcpup);
16769+ if (unlikely(err))
16770+ goto out;
16771+ need_wh = 1;
16772+ } else {
027c5e7a 16773+ struct au_dinfo *dinfo, *tmp;
4a4d8108 16774+
027c5e7a
AM
16775+ need_wh = -ENOMEM;
16776+ dinfo = au_di(dentry);
16777+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
16778+ if (tmp) {
16779+ au_di_cp(tmp, dinfo);
16780+ au_di_swap(tmp, dinfo);
16781+ /* returns the number of positive dentries */
16782+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
16783+ /*nd*/NULL);
16784+ au_di_swap(tmp, dinfo);
16785+ au_rw_write_unlock(&tmp->di_rwsem);
16786+ au_di_free(tmp);
4a4d8108
AM
16787+ }
16788+ }
16789+ AuDbg("need_wh %d\n", need_wh);
16790+ err = need_wh;
16791+
4f0767ce 16792+out:
4a4d8108 16793+ return err;
1facf9fc 16794+}
16795+
4a4d8108
AM
16796+/*
16797+ * simple tests for the del-entry operations.
16798+ * following the checks in vfs, plus the parent-child relationship.
16799+ */
16800+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
16801+ struct dentry *h_parent, int isdir)
1facf9fc 16802+{
4a4d8108
AM
16803+ int err;
16804+ umode_t h_mode;
16805+ struct dentry *h_dentry, *h_latest;
1308ab2a 16806+ struct inode *h_inode;
1facf9fc 16807+
4a4d8108
AM
16808+ h_dentry = au_h_dptr(dentry, bindex);
16809+ h_inode = h_dentry->d_inode;
16810+ if (dentry->d_inode) {
16811+ err = -ENOENT;
16812+ if (unlikely(!h_inode || !h_inode->i_nlink))
16813+ goto out;
1facf9fc 16814+
4a4d8108
AM
16815+ h_mode = h_inode->i_mode;
16816+ if (!isdir) {
16817+ err = -EISDIR;
16818+ if (unlikely(S_ISDIR(h_mode)))
16819+ goto out;
16820+ } else if (unlikely(!S_ISDIR(h_mode))) {
16821+ err = -ENOTDIR;
16822+ goto out;
16823+ }
16824+ } else {
16825+ /* rename(2) case */
16826+ err = -EIO;
16827+ if (unlikely(h_inode))
16828+ goto out;
16829+ }
1facf9fc 16830+
4a4d8108
AM
16831+ err = -ENOENT;
16832+ /* expected parent dir is locked */
16833+ if (unlikely(h_parent != h_dentry->d_parent))
16834+ goto out;
16835+ err = 0;
16836+
16837+ /*
16838+ * rmdir a dir may break the consistency on some filesystem.
16839+ * let's try heavy test.
16840+ */
16841+ err = -EACCES;
16842+ if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
16843+ goto out;
16844+
16845+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
16846+ au_sbr(dentry->d_sb, bindex));
16847+ err = -EIO;
16848+ if (IS_ERR(h_latest))
16849+ goto out;
16850+ if (h_latest == h_dentry)
16851+ err = 0;
16852+ dput(h_latest);
16853+
4f0767ce 16854+out:
4a4d8108 16855+ return err;
1308ab2a 16856+}
1facf9fc 16857+
4a4d8108
AM
16858+/*
16859+ * decide the branch where we operate for @dentry. the branch index will be set
16860+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
16861+ * dir for reverting.
16862+ * when a new whiteout is necessary, create it.
16863+ */
16864+static struct dentry*
16865+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
16866+ struct au_dtime *dt, struct au_pin *pin)
1308ab2a 16867+{
4a4d8108
AM
16868+ struct dentry *wh_dentry;
16869+ struct super_block *sb;
16870+ struct path h_path;
16871+ int err, need_wh;
16872+ unsigned int udba;
16873+ aufs_bindex_t bcpup;
dece6358 16874+
4a4d8108
AM
16875+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
16876+ wh_dentry = ERR_PTR(need_wh);
16877+ if (unlikely(need_wh < 0))
16878+ goto out;
16879+
16880+ sb = dentry->d_sb;
16881+ udba = au_opt_udba(sb);
16882+ bcpup = *rbcpup;
16883+ err = au_pin(pin, dentry, bcpup, udba,
16884+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
16885+ wh_dentry = ERR_PTR(err);
16886+ if (unlikely(err))
16887+ goto out;
16888+
16889+ h_path.dentry = au_pinned_h_parent(pin);
16890+ if (udba != AuOpt_UDBA_NONE
16891+ && au_dbstart(dentry) == bcpup) {
16892+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
16893+ wh_dentry = ERR_PTR(err);
16894+ if (unlikely(err))
16895+ goto out_unpin;
16896+ }
16897+
16898+ h_path.mnt = au_sbr_mnt(sb, bcpup);
16899+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
16900+ wh_dentry = NULL;
16901+ if (!need_wh)
16902+ goto out; /* success, no need to create whiteout */
16903+
16904+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
16905+ if (IS_ERR(wh_dentry))
16906+ goto out_unpin;
16907+
16908+ /* returns with the parent is locked and wh_dentry is dget-ed */
16909+ goto out; /* success */
16910+
4f0767ce 16911+out_unpin:
4a4d8108 16912+ au_unpin(pin);
4f0767ce 16913+out:
4a4d8108 16914+ return wh_dentry;
1facf9fc 16915+}
16916+
4a4d8108
AM
16917+/*
16918+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
16919+ * in order to be revertible and save time for removing many child whiteouts
16920+ * under the dir.
16921+ * returns 1 when there are too many child whiteout and caller should remove
16922+ * them asynchronously. returns 0 when the number of children is enough small to
16923+ * remove now or the branch fs is a remote fs.
16924+ * otherwise return an error.
16925+ */
16926+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
16927+ struct au_nhash *whlist, struct inode *dir)
1facf9fc 16928+{
4a4d8108
AM
16929+ int rmdir_later, err, dirwh;
16930+ struct dentry *h_dentry;
16931+ struct super_block *sb;
16932+
16933+ sb = dentry->d_sb;
16934+ SiMustAnyLock(sb);
16935+ h_dentry = au_h_dptr(dentry, bindex);
16936+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
16937+ if (unlikely(err))
16938+ goto out;
16939+
16940+ /* stop monitoring */
16941+ au_hn_free(au_hi(dentry->d_inode, bindex));
16942+
16943+ if (!au_test_fs_remote(h_dentry->d_sb)) {
16944+ dirwh = au_sbi(sb)->si_dirwh;
16945+ rmdir_later = (dirwh <= 1);
16946+ if (!rmdir_later)
16947+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
16948+ dirwh);
16949+ if (rmdir_later)
16950+ return rmdir_later;
16951+ }
1facf9fc 16952+
4a4d8108
AM
16953+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
16954+ if (unlikely(err)) {
16955+ AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
16956+ AuDLNPair(h_dentry), bindex, err);
16957+ err = 0;
16958+ }
dece6358 16959+
4f0767ce 16960+out:
4a4d8108
AM
16961+ AuTraceErr(err);
16962+ return err;
16963+}
1308ab2a 16964+
4a4d8108
AM
16965+/*
16966+ * final procedure for deleting a entry.
16967+ * maintain dentry and iattr.
16968+ */
16969+static void epilog(struct inode *dir, struct dentry *dentry,
16970+ aufs_bindex_t bindex)
16971+{
16972+ struct inode *inode;
1308ab2a 16973+
4a4d8108
AM
16974+ inode = dentry->d_inode;
16975+ d_drop(dentry);
16976+ inode->i_ctime = dir->i_ctime;
1308ab2a 16977+
4a4d8108
AM
16978+ if (au_ibstart(dir) == bindex)
16979+ au_cpup_attr_timesizes(dir);
16980+ dir->i_version++;
1facf9fc 16981+}
16982+
4a4d8108
AM
16983+/*
16984+ * when an error happened, remove the created whiteout and revert everything.
16985+ */
7f207e10
AM
16986+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
16987+ aufs_bindex_t bwh, struct dentry *wh_dentry,
16988+ struct dentry *dentry, struct au_dtime *dt)
1facf9fc 16989+{
4a4d8108
AM
16990+ int rerr;
16991+ struct path h_path = {
16992+ .dentry = wh_dentry,
7f207e10 16993+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
4a4d8108 16994+ };
dece6358 16995+
7f207e10 16996+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
4a4d8108
AM
16997+ if (!rerr) {
16998+ au_set_dbwh(dentry, bwh);
16999+ au_dtime_revert(dt);
17000+ return 0;
17001+ }
dece6358 17002+
4a4d8108
AM
17003+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
17004+ AuDLNPair(dentry), err, rerr);
17005+ return -EIO;
1facf9fc 17006+}
17007+
4a4d8108 17008+/* ---------------------------------------------------------------------- */
1facf9fc 17009+
4a4d8108 17010+int aufs_unlink(struct inode *dir, struct dentry *dentry)
1308ab2a 17011+{
4a4d8108
AM
17012+ int err;
17013+ aufs_bindex_t bwh, bindex, bstart;
17014+ struct au_dtime dt;
17015+ struct au_pin pin;
17016+ struct path h_path;
17017+ struct inode *inode, *h_dir;
17018+ struct dentry *parent, *wh_dentry;
1facf9fc 17019+
4a4d8108 17020+ IMustLock(dir);
027c5e7a
AM
17021+
17022+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
17023+ if (unlikely(err))
17024+ goto out;
17025+ err = au_d_hashed_positive(dentry);
17026+ if (unlikely(err))
17027+ goto out_unlock;
4a4d8108 17028+ inode = dentry->d_inode;
4a4d8108 17029+ IMustLock(inode);
027c5e7a
AM
17030+ err = -EISDIR;
17031+ if (unlikely(S_ISDIR(inode->i_mode)))
17032+ goto out_unlock; /* possible? */
1facf9fc 17033+
4a4d8108
AM
17034+ bstart = au_dbstart(dentry);
17035+ bwh = au_dbwh(dentry);
17036+ bindex = -1;
027c5e7a
AM
17037+ parent = dentry->d_parent; /* dir inode is locked */
17038+ di_write_lock_parent(parent);
4a4d8108
AM
17039+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
17040+ err = PTR_ERR(wh_dentry);
17041+ if (IS_ERR(wh_dentry))
027c5e7a 17042+ goto out_parent;
1facf9fc 17043+
4a4d8108
AM
17044+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
17045+ h_path.dentry = au_h_dptr(dentry, bstart);
17046+ dget(h_path.dentry);
17047+ if (bindex == bstart) {
17048+ h_dir = au_pinned_h_dir(&pin);
17049+ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
17050+ } else {
17051+ /* dir inode is locked */
17052+ h_dir = wh_dentry->d_parent->d_inode;
17053+ IMustLock(h_dir);
17054+ err = 0;
17055+ }
dece6358 17056+
4a4d8108 17057+ if (!err) {
7f207e10 17058+ vfsub_drop_nlink(inode);
4a4d8108
AM
17059+ epilog(dir, dentry, bindex);
17060+
17061+ /* update target timestamps */
17062+ if (bindex == bstart) {
17063+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
17064+ inode->i_ctime = h_path.dentry->d_inode->i_ctime;
17065+ } else
17066+ /* todo: this timestamp may be reverted later */
17067+ inode->i_ctime = h_dir->i_ctime;
027c5e7a 17068+ goto out_unpin; /* success */
1facf9fc 17069+ }
17070+
4a4d8108
AM
17071+ /* revert */
17072+ if (wh_dentry) {
17073+ int rerr;
17074+
7f207e10 17075+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
4a4d8108
AM
17076+ if (rerr)
17077+ err = rerr;
dece6358 17078+ }
1facf9fc 17079+
027c5e7a 17080+out_unpin:
4a4d8108
AM
17081+ au_unpin(&pin);
17082+ dput(wh_dentry);
17083+ dput(h_path.dentry);
027c5e7a 17084+out_parent:
4a4d8108 17085+ di_write_unlock(parent);
027c5e7a 17086+out_unlock:
4a4d8108 17087+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 17088+out:
4a4d8108 17089+ return err;
dece6358
AM
17090+}
17091+
4a4d8108 17092+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
1308ab2a 17093+{
4a4d8108
AM
17094+ int err, rmdir_later;
17095+ aufs_bindex_t bwh, bindex, bstart;
17096+ struct au_dtime dt;
17097+ struct au_pin pin;
17098+ struct inode *inode;
17099+ struct dentry *parent, *wh_dentry, *h_dentry;
17100+ struct au_whtmp_rmdir *args;
1facf9fc 17101+
4a4d8108 17102+ IMustLock(dir);
027c5e7a
AM
17103+
17104+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
17105+ if (unlikely(err))
4a4d8108 17106+ goto out;
53392da6
AM
17107+ err = au_alive_dir(dentry);
17108+ if (unlikely(err))
027c5e7a 17109+ goto out_unlock;
53392da6 17110+ inode = dentry->d_inode;
4a4d8108 17111+ IMustLock(inode);
027c5e7a
AM
17112+ err = -ENOTDIR;
17113+ if (unlikely(!S_ISDIR(inode->i_mode)))
17114+ goto out_unlock; /* possible? */
dece6358 17115+
4a4d8108
AM
17116+ err = -ENOMEM;
17117+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
17118+ if (unlikely(!args))
17119+ goto out_unlock;
dece6358 17120+
4a4d8108
AM
17121+ parent = dentry->d_parent; /* dir inode is locked */
17122+ di_write_lock_parent(parent);
17123+ err = au_test_empty(dentry, &args->whlist);
17124+ if (unlikely(err))
027c5e7a 17125+ goto out_parent;
1facf9fc 17126+
4a4d8108
AM
17127+ bstart = au_dbstart(dentry);
17128+ bwh = au_dbwh(dentry);
17129+ bindex = -1;
17130+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
17131+ err = PTR_ERR(wh_dentry);
17132+ if (IS_ERR(wh_dentry))
027c5e7a 17133+ goto out_parent;
1facf9fc 17134+
4a4d8108
AM
17135+ h_dentry = au_h_dptr(dentry, bstart);
17136+ dget(h_dentry);
17137+ rmdir_later = 0;
17138+ if (bindex == bstart) {
17139+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
17140+ if (err > 0) {
17141+ rmdir_later = err;
17142+ err = 0;
17143+ }
17144+ } else {
17145+ /* stop monitoring */
17146+ au_hn_free(au_hi(inode, bstart));
17147+
17148+ /* dir inode is locked */
17149+ IMustLock(wh_dentry->d_parent->d_inode);
1facf9fc 17150+ err = 0;
17151+ }
17152+
4a4d8108 17153+ if (!err) {
027c5e7a 17154+ vfsub_dead_dir(inode);
4a4d8108
AM
17155+ au_set_dbdiropq(dentry, -1);
17156+ epilog(dir, dentry, bindex);
1308ab2a 17157+
4a4d8108
AM
17158+ if (rmdir_later) {
17159+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
17160+ args = NULL;
17161+ }
1308ab2a 17162+
4a4d8108 17163+ goto out_unpin; /* success */
1facf9fc 17164+ }
17165+
4a4d8108
AM
17166+ /* revert */
17167+ AuLabel(revert);
17168+ if (wh_dentry) {
17169+ int rerr;
1308ab2a 17170+
7f207e10 17171+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
4a4d8108
AM
17172+ if (rerr)
17173+ err = rerr;
1facf9fc 17174+ }
17175+
4f0767ce 17176+out_unpin:
4a4d8108
AM
17177+ au_unpin(&pin);
17178+ dput(wh_dentry);
17179+ dput(h_dentry);
027c5e7a 17180+out_parent:
4a4d8108
AM
17181+ di_write_unlock(parent);
17182+ if (args)
17183+ au_whtmp_rmdir_free(args);
4f0767ce 17184+out_unlock:
4a4d8108 17185+ aufs_read_unlock(dentry, AuLock_DW);
4f0767ce 17186+out:
4a4d8108
AM
17187+ AuTraceErr(err);
17188+ return err;
dece6358 17189+}
7f207e10
AM
17190diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
17191--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
53392da6 17192+++ linux/fs/aufs/i_op_ren.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 17193@@ -0,0 +1,1017 @@
1facf9fc 17194+/*
027c5e7a 17195+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 17196+ *
17197+ * This program, aufs is free software; you can redistribute it and/or modify
17198+ * it under the terms of the GNU General Public License as published by
17199+ * the Free Software Foundation; either version 2 of the License, or
17200+ * (at your option) any later version.
dece6358
AM
17201+ *
17202+ * This program is distributed in the hope that it will be useful,
17203+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17204+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17205+ * GNU General Public License for more details.
17206+ *
17207+ * You should have received a copy of the GNU General Public License
17208+ * along with this program; if not, write to the Free Software
17209+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 17210+ */
17211+
17212+/*
4a4d8108
AM
17213+ * inode operation (rename entry)
17214+ * todo: this is crazy monster
1facf9fc 17215+ */
17216+
17217+#include "aufs.h"
17218+
4a4d8108
AM
17219+enum { AuSRC, AuDST, AuSrcDst };
17220+enum { AuPARENT, AuCHILD, AuParentChild };
1facf9fc 17221+
4a4d8108
AM
17222+#define AuRen_ISDIR 1
17223+#define AuRen_ISSAMEDIR (1 << 1)
17224+#define AuRen_WHSRC (1 << 2)
17225+#define AuRen_WHDST (1 << 3)
17226+#define AuRen_MNT_WRITE (1 << 4)
17227+#define AuRen_DT_DSTDIR (1 << 5)
17228+#define AuRen_DIROPQ (1 << 6)
17229+#define AuRen_CPUP (1 << 7)
17230+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
7f207e10
AM
17231+#define au_fset_ren(flags, name) \
17232+ do { (flags) |= AuRen_##name; } while (0)
17233+#define au_fclr_ren(flags, name) \
17234+ do { (flags) &= ~AuRen_##name; } while (0)
1facf9fc 17235+
4a4d8108
AM
17236+struct au_ren_args {
17237+ struct {
17238+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
17239+ *wh_dentry;
17240+ struct inode *dir, *inode;
17241+ struct au_hinode *hdir;
17242+ struct au_dtime dt[AuParentChild];
17243+ aufs_bindex_t bstart;
17244+ } sd[AuSrcDst];
1facf9fc 17245+
4a4d8108
AM
17246+#define src_dentry sd[AuSRC].dentry
17247+#define src_dir sd[AuSRC].dir
17248+#define src_inode sd[AuSRC].inode
17249+#define src_h_dentry sd[AuSRC].h_dentry
17250+#define src_parent sd[AuSRC].parent
17251+#define src_h_parent sd[AuSRC].h_parent
17252+#define src_wh_dentry sd[AuSRC].wh_dentry
17253+#define src_hdir sd[AuSRC].hdir
17254+#define src_h_dir sd[AuSRC].hdir->hi_inode
17255+#define src_dt sd[AuSRC].dt
17256+#define src_bstart sd[AuSRC].bstart
1facf9fc 17257+
4a4d8108
AM
17258+#define dst_dentry sd[AuDST].dentry
17259+#define dst_dir sd[AuDST].dir
17260+#define dst_inode sd[AuDST].inode
17261+#define dst_h_dentry sd[AuDST].h_dentry
17262+#define dst_parent sd[AuDST].parent
17263+#define dst_h_parent sd[AuDST].h_parent
17264+#define dst_wh_dentry sd[AuDST].wh_dentry
17265+#define dst_hdir sd[AuDST].hdir
17266+#define dst_h_dir sd[AuDST].hdir->hi_inode
17267+#define dst_dt sd[AuDST].dt
17268+#define dst_bstart sd[AuDST].bstart
17269+
17270+ struct dentry *h_trap;
17271+ struct au_branch *br;
17272+ struct au_hinode *src_hinode;
17273+ struct path h_path;
17274+ struct au_nhash whlist;
027c5e7a 17275+ aufs_bindex_t btgt, src_bwh, src_bdiropq;
1facf9fc 17276+
1308ab2a 17277+ unsigned int flags;
1facf9fc 17278+
4a4d8108
AM
17279+ struct au_whtmp_rmdir *thargs;
17280+ struct dentry *h_dst;
17281+};
1308ab2a 17282+
4a4d8108 17283+/* ---------------------------------------------------------------------- */
1308ab2a 17284+
4a4d8108
AM
17285+/*
17286+ * functions for reverting.
17287+ * when an error happened in a single rename systemcall, we should revert
17288+ * everything as if nothing happend.
17289+ * we don't need to revert the copied-up/down the parent dir since they are
17290+ * harmless.
17291+ */
1facf9fc 17292+
4a4d8108
AM
17293+#define RevertFailure(fmt, ...) do { \
17294+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
17295+ ##__VA_ARGS__, err, rerr); \
17296+ err = -EIO; \
17297+} while (0)
1facf9fc 17298+
4a4d8108 17299+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
1facf9fc 17300+{
4a4d8108 17301+ int rerr;
1facf9fc 17302+
4a4d8108
AM
17303+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
17304+ rerr = au_diropq_remove(a->src_dentry, a->btgt);
17305+ au_hn_imtx_unlock(a->src_hinode);
027c5e7a 17306+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
4a4d8108
AM
17307+ if (rerr)
17308+ RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
17309+}
1facf9fc 17310+
4a4d8108
AM
17311+static void au_ren_rev_rename(int err, struct au_ren_args *a)
17312+{
17313+ int rerr;
1facf9fc 17314+
4a4d8108
AM
17315+ a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
17316+ a->br, /*nd*/NULL);
17317+ rerr = PTR_ERR(a->h_path.dentry);
17318+ if (IS_ERR(a->h_path.dentry)) {
17319+ RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
17320+ return;
1facf9fc 17321+ }
17322+
4a4d8108
AM
17323+ rerr = vfsub_rename(a->dst_h_dir,
17324+ au_h_dptr(a->src_dentry, a->btgt),
17325+ a->src_h_dir, &a->h_path);
17326+ d_drop(a->h_path.dentry);
17327+ dput(a->h_path.dentry);
17328+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
17329+ if (rerr)
17330+ RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
1facf9fc 17331+}
17332+
4a4d8108 17333+static void au_ren_rev_cpup(int err, struct au_ren_args *a)
1facf9fc 17334+{
4a4d8108 17335+ int rerr;
1facf9fc 17336+
4a4d8108
AM
17337+ a->h_path.dentry = a->dst_h_dentry;
17338+ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
17339+ au_set_h_dptr(a->src_dentry, a->btgt, NULL);
17340+ au_set_dbstart(a->src_dentry, a->src_bstart);
17341+ if (rerr)
17342+ RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
1facf9fc 17343+}
17344+
4a4d8108 17345+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
1facf9fc 17346+{
4a4d8108 17347+ int rerr;
dece6358 17348+
4a4d8108
AM
17349+ a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
17350+ a->br, /*nd*/NULL);
17351+ rerr = PTR_ERR(a->h_path.dentry);
17352+ if (IS_ERR(a->h_path.dentry)) {
17353+ RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
17354+ return;
17355+ }
17356+ if (a->h_path.dentry->d_inode) {
17357+ d_drop(a->h_path.dentry);
17358+ dput(a->h_path.dentry);
17359+ return;
dece6358
AM
17360+ }
17361+
4a4d8108
AM
17362+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
17363+ d_drop(a->h_path.dentry);
17364+ dput(a->h_path.dentry);
17365+ if (!rerr)
17366+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
17367+ else
17368+ RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
17369+}
1308ab2a 17370+
4a4d8108
AM
17371+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
17372+{
17373+ int rerr;
1308ab2a 17374+
4a4d8108
AM
17375+ a->h_path.dentry = a->src_wh_dentry;
17376+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
027c5e7a 17377+ au_set_dbwh(a->src_dentry, a->src_bwh);
4a4d8108
AM
17378+ if (rerr)
17379+ RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
17380+}
4a4d8108 17381+#undef RevertFailure
1facf9fc 17382+
1308ab2a 17383+/* ---------------------------------------------------------------------- */
17384+
4a4d8108
AM
17385+/*
17386+ * when we have to copyup the renaming entry, do it with the rename-target name
17387+ * in order to minimize the cost (the later actual rename is unnecessary).
17388+ * otherwise rename it on the target branch.
17389+ */
17390+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 17391+{
dece6358 17392+ int err;
4a4d8108 17393+ struct dentry *d;
1facf9fc 17394+
4a4d8108
AM
17395+ d = a->src_dentry;
17396+ if (au_dbstart(d) == a->btgt) {
17397+ a->h_path.dentry = a->dst_h_dentry;
17398+ if (au_ftest_ren(a->flags, DIROPQ)
17399+ && au_dbdiropq(d) == a->btgt)
17400+ au_fclr_ren(a->flags, DIROPQ);
17401+ AuDebugOn(au_dbstart(d) != a->btgt);
17402+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
17403+ a->dst_h_dir, &a->h_path);
17404+ } else {
17405+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
17406+ struct file *h_file;
1308ab2a 17407+
4a4d8108
AM
17408+ au_fset_ren(a->flags, CPUP);
17409+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
17410+ au_set_dbstart(d, a->btgt);
17411+ au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
17412+ h_file = au_h_open_pre(d, a->src_bstart);
17413+ if (IS_ERR(h_file)) {
17414+ err = PTR_ERR(h_file);
17415+ h_file = NULL;
17416+ } else
17417+ err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
17418+ !AuCpup_DTIME, a->dst_parent);
17419+ mutex_unlock(h_mtx);
17420+ au_h_open_post(d, a->src_bstart, h_file);
17421+ if (!err) {
17422+ d = a->dst_dentry;
17423+ au_set_h_dptr(d, a->btgt, NULL);
17424+ au_update_dbstart(d);
17425+ } else {
17426+ au_set_h_dptr(d, a->btgt, NULL);
17427+ au_set_dbstart(d, a->src_bstart);
17428+ }
1308ab2a 17429+ }
027c5e7a
AM
17430+ if (!err && a->h_dst)
17431+ /* it will be set to dinfo later */
17432+ dget(a->h_dst);
1facf9fc 17433+
dece6358
AM
17434+ return err;
17435+}
1facf9fc 17436+
4a4d8108
AM
17437+/* cf. aufs_rmdir() */
17438+static int au_ren_del_whtmp(struct au_ren_args *a)
dece6358 17439+{
4a4d8108
AM
17440+ int err;
17441+ struct inode *dir;
1facf9fc 17442+
4a4d8108
AM
17443+ dir = a->dst_dir;
17444+ SiMustAnyLock(dir->i_sb);
17445+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
17446+ au_sbi(dir->i_sb)->si_dirwh)
17447+ || au_test_fs_remote(a->h_dst->d_sb)) {
17448+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
17449+ if (unlikely(err))
17450+ pr_warning("failed removing whtmp dir %.*s (%d), "
17451+ "ignored.\n", AuDLNPair(a->h_dst), err);
17452+ } else {
17453+ au_nhash_wh_free(&a->thargs->whlist);
17454+ a->thargs->whlist = a->whlist;
17455+ a->whlist.nh_num = 0;
17456+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
17457+ dput(a->h_dst);
17458+ a->thargs = NULL;
17459+ }
17460+
17461+ return 0;
1308ab2a 17462+}
1facf9fc 17463+
4a4d8108
AM
17464+/* make it 'opaque' dir. */
17465+static int au_ren_diropq(struct au_ren_args *a)
17466+{
17467+ int err;
17468+ struct dentry *diropq;
1facf9fc 17469+
4a4d8108 17470+ err = 0;
027c5e7a 17471+ a->src_bdiropq = au_dbdiropq(a->src_dentry);
4a4d8108
AM
17472+ a->src_hinode = au_hi(a->src_inode, a->btgt);
17473+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
17474+ diropq = au_diropq_create(a->src_dentry, a->btgt);
17475+ au_hn_imtx_unlock(a->src_hinode);
17476+ if (IS_ERR(diropq))
17477+ err = PTR_ERR(diropq);
17478+ dput(diropq);
1facf9fc 17479+
4a4d8108
AM
17480+ return err;
17481+}
1facf9fc 17482+
4a4d8108
AM
17483+static int do_rename(struct au_ren_args *a)
17484+{
17485+ int err;
17486+ struct dentry *d, *h_d;
1facf9fc 17487+
4a4d8108
AM
17488+ /* prepare workqueue args for asynchronous rmdir */
17489+ h_d = a->dst_h_dentry;
17490+ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
17491+ err = -ENOMEM;
17492+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
17493+ if (unlikely(!a->thargs))
17494+ goto out;
17495+ a->h_dst = dget(h_d);
17496+ }
1facf9fc 17497+
4a4d8108
AM
17498+ /* create whiteout for src_dentry */
17499+ if (au_ftest_ren(a->flags, WHSRC)) {
027c5e7a
AM
17500+ a->src_bwh = au_dbwh(a->src_dentry);
17501+ AuDebugOn(a->src_bwh >= 0);
4a4d8108
AM
17502+ a->src_wh_dentry
17503+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
17504+ err = PTR_ERR(a->src_wh_dentry);
17505+ if (IS_ERR(a->src_wh_dentry))
17506+ goto out_thargs;
17507+ }
1facf9fc 17508+
4a4d8108
AM
17509+ /* lookup whiteout for dentry */
17510+ if (au_ftest_ren(a->flags, WHDST)) {
17511+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
17512+ a->br);
17513+ err = PTR_ERR(h_d);
17514+ if (IS_ERR(h_d))
17515+ goto out_whsrc;
17516+ if (!h_d->d_inode)
17517+ dput(h_d);
17518+ else
17519+ a->dst_wh_dentry = h_d;
17520+ }
1facf9fc 17521+
4a4d8108
AM
17522+ /* rename dentry to tmpwh */
17523+ if (a->thargs) {
17524+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
17525+ if (unlikely(err))
17526+ goto out_whdst;
dece6358 17527+
4a4d8108
AM
17528+ d = a->dst_dentry;
17529+ au_set_h_dptr(d, a->btgt, NULL);
17530+ err = au_lkup_neg(d, a->btgt);
17531+ if (unlikely(err))
17532+ goto out_whtmp;
17533+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
17534+ }
1facf9fc 17535+
4a4d8108
AM
17536+ /* cpup src */
17537+ if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
17538+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
17539+ struct file *h_file;
1facf9fc 17540+
4a4d8108
AM
17541+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
17542+ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
17543+ h_file = au_h_open_pre(a->src_dentry, a->src_bstart);
17544+ if (IS_ERR(h_file)) {
17545+ err = PTR_ERR(h_file);
17546+ h_file = NULL;
17547+ } else
17548+ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
17549+ !AuCpup_DTIME);
17550+ mutex_unlock(h_mtx);
17551+ au_h_open_post(a->src_dentry, a->src_bstart, h_file);
17552+ if (unlikely(err))
17553+ goto out_whtmp;
17554+ }
1facf9fc 17555+
4a4d8108
AM
17556+ /* rename by vfs_rename or cpup */
17557+ d = a->dst_dentry;
17558+ if (au_ftest_ren(a->flags, ISDIR)
17559+ && (a->dst_wh_dentry
17560+ || au_dbdiropq(d) == a->btgt
17561+ /* hide the lower to keep xino */
17562+ || a->btgt < au_dbend(d)
17563+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
17564+ au_fset_ren(a->flags, DIROPQ);
17565+ err = au_ren_or_cpup(a);
17566+ if (unlikely(err))
17567+ /* leave the copied-up one */
17568+ goto out_whtmp;
1308ab2a 17569+
4a4d8108
AM
17570+ /* make dir opaque */
17571+ if (au_ftest_ren(a->flags, DIROPQ)) {
17572+ err = au_ren_diropq(a);
17573+ if (unlikely(err))
17574+ goto out_rename;
17575+ }
1308ab2a 17576+
4a4d8108
AM
17577+ /* update target timestamps */
17578+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
17579+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
17580+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
17581+ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
1facf9fc 17582+
4a4d8108
AM
17583+ /* remove whiteout for dentry */
17584+ if (a->dst_wh_dentry) {
17585+ a->h_path.dentry = a->dst_wh_dentry;
17586+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
17587+ a->dst_dentry);
17588+ if (unlikely(err))
17589+ goto out_diropq;
17590+ }
1facf9fc 17591+
4a4d8108
AM
17592+ /* remove whtmp */
17593+ if (a->thargs)
17594+ au_ren_del_whtmp(a); /* ignore this error */
1308ab2a 17595+
4a4d8108
AM
17596+ err = 0;
17597+ goto out_success;
17598+
4f0767ce 17599+out_diropq:
4a4d8108
AM
17600+ if (au_ftest_ren(a->flags, DIROPQ))
17601+ au_ren_rev_diropq(err, a);
4f0767ce 17602+out_rename:
4a4d8108
AM
17603+ if (!au_ftest_ren(a->flags, CPUP))
17604+ au_ren_rev_rename(err, a);
17605+ else
17606+ au_ren_rev_cpup(err, a);
027c5e7a 17607+ dput(a->h_dst);
4f0767ce 17608+out_whtmp:
4a4d8108
AM
17609+ if (a->thargs)
17610+ au_ren_rev_whtmp(err, a);
4f0767ce 17611+out_whdst:
4a4d8108
AM
17612+ dput(a->dst_wh_dentry);
17613+ a->dst_wh_dentry = NULL;
4f0767ce 17614+out_whsrc:
4a4d8108
AM
17615+ if (a->src_wh_dentry)
17616+ au_ren_rev_whsrc(err, a);
4f0767ce 17617+out_success:
4a4d8108
AM
17618+ dput(a->src_wh_dentry);
17619+ dput(a->dst_wh_dentry);
4f0767ce 17620+out_thargs:
4a4d8108
AM
17621+ if (a->thargs) {
17622+ dput(a->h_dst);
17623+ au_whtmp_rmdir_free(a->thargs);
17624+ a->thargs = NULL;
17625+ }
4f0767ce 17626+out:
4a4d8108 17627+ return err;
dece6358 17628+}
1facf9fc 17629+
1308ab2a 17630+/* ---------------------------------------------------------------------- */
1facf9fc 17631+
4a4d8108
AM
17632+/*
17633+ * test if @dentry dir can be rename destination or not.
17634+ * success means, it is a logically empty dir.
17635+ */
17636+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
1308ab2a 17637+{
4a4d8108 17638+ return au_test_empty(dentry, whlist);
1308ab2a 17639+}
1facf9fc 17640+
4a4d8108
AM
17641+/*
17642+ * test if @dentry dir can be rename source or not.
17643+ * if it can, return 0 and @children is filled.
17644+ * success means,
17645+ * - it is a logically empty dir.
17646+ * - or, it exists on writable branch and has no children including whiteouts
17647+ * on the lower branch.
17648+ */
17649+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
17650+{
17651+ int err;
17652+ unsigned int rdhash;
17653+ aufs_bindex_t bstart;
1facf9fc 17654+
4a4d8108
AM
17655+ bstart = au_dbstart(dentry);
17656+ if (bstart != btgt) {
17657+ struct au_nhash whlist;
dece6358 17658+
4a4d8108
AM
17659+ SiMustAnyLock(dentry->d_sb);
17660+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
17661+ if (!rdhash)
17662+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
17663+ dentry));
17664+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
17665+ if (unlikely(err))
17666+ goto out;
17667+ err = au_test_empty(dentry, &whlist);
17668+ au_nhash_wh_free(&whlist);
17669+ goto out;
17670+ }
dece6358 17671+
4a4d8108
AM
17672+ if (bstart == au_dbtaildir(dentry))
17673+ return 0; /* success */
dece6358 17674+
4a4d8108 17675+ err = au_test_empty_lower(dentry);
1facf9fc 17676+
4f0767ce 17677+out:
4a4d8108
AM
17678+ if (err == -ENOTEMPTY) {
17679+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
17680+ " is not supported\n");
17681+ err = -EXDEV;
17682+ }
17683+ return err;
17684+}
1308ab2a 17685+
4a4d8108
AM
17686+/* side effect: sets whlist and h_dentry */
17687+static int au_ren_may_dir(struct au_ren_args *a)
1308ab2a 17688+{
4a4d8108
AM
17689+ int err;
17690+ unsigned int rdhash;
17691+ struct dentry *d;
1facf9fc 17692+
4a4d8108
AM
17693+ d = a->dst_dentry;
17694+ SiMustAnyLock(d->d_sb);
1facf9fc 17695+
4a4d8108
AM
17696+ err = 0;
17697+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
17698+ rdhash = au_sbi(d->d_sb)->si_rdhash;
17699+ if (!rdhash)
17700+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
17701+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
17702+ if (unlikely(err))
17703+ goto out;
1308ab2a 17704+
4a4d8108
AM
17705+ au_set_dbstart(d, a->dst_bstart);
17706+ err = may_rename_dstdir(d, &a->whlist);
17707+ au_set_dbstart(d, a->btgt);
17708+ }
17709+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
17710+ if (unlikely(err))
17711+ goto out;
17712+
17713+ d = a->src_dentry;
17714+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
17715+ if (au_ftest_ren(a->flags, ISDIR)) {
17716+ err = may_rename_srcdir(d, a->btgt);
17717+ if (unlikely(err)) {
17718+ au_nhash_wh_free(&a->whlist);
17719+ a->whlist.nh_num = 0;
17720+ }
17721+ }
4f0767ce 17722+out:
4a4d8108 17723+ return err;
1facf9fc 17724+}
17725+
4a4d8108 17726+/* ---------------------------------------------------------------------- */
1facf9fc 17727+
4a4d8108
AM
17728+/*
17729+ * simple tests for rename.
17730+ * following the checks in vfs, plus the parent-child relationship.
17731+ */
17732+static int au_may_ren(struct au_ren_args *a)
17733+{
17734+ int err, isdir;
17735+ struct inode *h_inode;
1facf9fc 17736+
4a4d8108
AM
17737+ if (a->src_bstart == a->btgt) {
17738+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
17739+ au_ftest_ren(a->flags, ISDIR));
17740+ if (unlikely(err))
17741+ goto out;
17742+ err = -EINVAL;
17743+ if (unlikely(a->src_h_dentry == a->h_trap))
17744+ goto out;
17745+ }
1facf9fc 17746+
4a4d8108
AM
17747+ err = 0;
17748+ if (a->dst_bstart != a->btgt)
17749+ goto out;
1facf9fc 17750+
027c5e7a
AM
17751+ err = -ENOTEMPTY;
17752+ if (unlikely(a->dst_h_dentry == a->h_trap))
17753+ goto out;
17754+
4a4d8108
AM
17755+ err = -EIO;
17756+ h_inode = a->dst_h_dentry->d_inode;
17757+ isdir = !!au_ftest_ren(a->flags, ISDIR);
17758+ if (!a->dst_dentry->d_inode) {
17759+ if (unlikely(h_inode))
17760+ goto out;
17761+ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
17762+ isdir);
17763+ } else {
17764+ if (unlikely(!h_inode || !h_inode->i_nlink))
17765+ goto out;
17766+ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
17767+ isdir);
17768+ if (unlikely(err))
17769+ goto out;
4a4d8108 17770+ }
1facf9fc 17771+
4f0767ce 17772+out:
4a4d8108
AM
17773+ if (unlikely(err == -ENOENT || err == -EEXIST))
17774+ err = -EIO;
17775+ AuTraceErr(err);
17776+ return err;
17777+}
1facf9fc 17778+
1308ab2a 17779+/* ---------------------------------------------------------------------- */
1facf9fc 17780+
4a4d8108
AM
17781+/*
17782+ * locking order
17783+ * (VFS)
17784+ * - src_dir and dir by lock_rename()
17785+ * - inode if exitsts
17786+ * (aufs)
17787+ * - lock all
17788+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
17789+ * + si_read_lock
17790+ * + di_write_lock2_child()
17791+ * + di_write_lock_child()
17792+ * + ii_write_lock_child()
17793+ * + di_write_lock_child2()
17794+ * + ii_write_lock_child2()
17795+ * + src_parent and parent
17796+ * + di_write_lock_parent()
17797+ * + ii_write_lock_parent()
17798+ * + di_write_lock_parent2()
17799+ * + ii_write_lock_parent2()
17800+ * + lower src_dir and dir by vfsub_lock_rename()
17801+ * + verify the every relationships between child and parent. if any
17802+ * of them failed, unlock all and return -EBUSY.
17803+ */
17804+static void au_ren_unlock(struct au_ren_args *a)
1308ab2a 17805+{
4a4d8108
AM
17806+ struct super_block *sb;
17807+
17808+ sb = a->dst_dentry->d_sb;
17809+ if (au_ftest_ren(a->flags, MNT_WRITE))
17810+ mnt_drop_write(a->br->br_mnt);
17811+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
17812+ a->dst_h_parent, a->dst_hdir);
1308ab2a 17813+}
17814+
4a4d8108 17815+static int au_ren_lock(struct au_ren_args *a)
1308ab2a 17816+{
4a4d8108
AM
17817+ int err;
17818+ unsigned int udba;
1308ab2a 17819+
4a4d8108
AM
17820+ err = 0;
17821+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
17822+ a->src_hdir = au_hi(a->src_dir, a->btgt);
17823+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
17824+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
17825+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
17826+ a->dst_h_parent, a->dst_hdir);
17827+ udba = au_opt_udba(a->src_dentry->d_sb);
17828+ if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
17829+ || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
17830+ err = au_busy_or_stale();
17831+ if (!err && au_dbstart(a->src_dentry) == a->btgt)
17832+ err = au_h_verify(a->src_h_dentry, udba,
17833+ a->src_h_parent->d_inode, a->src_h_parent,
17834+ a->br);
17835+ if (!err && au_dbstart(a->dst_dentry) == a->btgt)
17836+ err = au_h_verify(a->dst_h_dentry, udba,
17837+ a->dst_h_parent->d_inode, a->dst_h_parent,
17838+ a->br);
17839+ if (!err) {
17840+ err = mnt_want_write(a->br->br_mnt);
17841+ if (unlikely(err))
17842+ goto out_unlock;
17843+ au_fset_ren(a->flags, MNT_WRITE);
17844+ goto out; /* success */
17845+ }
17846+
17847+ err = au_busy_or_stale();
17848+
4f0767ce 17849+out_unlock:
4a4d8108 17850+ au_ren_unlock(a);
4f0767ce 17851+out:
4a4d8108 17852+ return err;
1facf9fc 17853+}
17854+
17855+/* ---------------------------------------------------------------------- */
17856+
4a4d8108 17857+static void au_ren_refresh_dir(struct au_ren_args *a)
1facf9fc 17858+{
4a4d8108 17859+ struct inode *dir;
dece6358 17860+
4a4d8108
AM
17861+ dir = a->dst_dir;
17862+ dir->i_version++;
17863+ if (au_ftest_ren(a->flags, ISDIR)) {
17864+ /* is this updating defined in POSIX? */
17865+ au_cpup_attr_timesizes(a->src_inode);
17866+ au_cpup_attr_nlink(dir, /*force*/1);
4a4d8108 17867+ }
027c5e7a 17868+
4a4d8108
AM
17869+ if (au_ibstart(dir) == a->btgt)
17870+ au_cpup_attr_timesizes(dir);
dece6358 17871+
4a4d8108
AM
17872+ if (au_ftest_ren(a->flags, ISSAMEDIR))
17873+ return;
dece6358 17874+
4a4d8108
AM
17875+ dir = a->src_dir;
17876+ dir->i_version++;
17877+ if (au_ftest_ren(a->flags, ISDIR))
17878+ au_cpup_attr_nlink(dir, /*force*/1);
17879+ if (au_ibstart(dir) == a->btgt)
17880+ au_cpup_attr_timesizes(dir);
1facf9fc 17881+}
17882+
4a4d8108 17883+static void au_ren_refresh(struct au_ren_args *a)
1facf9fc 17884+{
4a4d8108
AM
17885+ aufs_bindex_t bend, bindex;
17886+ struct dentry *d, *h_d;
17887+ struct inode *i, *h_i;
17888+ struct super_block *sb;
dece6358 17889+
027c5e7a
AM
17890+ d = a->dst_dentry;
17891+ d_drop(d);
17892+ if (a->h_dst)
17893+ /* already dget-ed by au_ren_or_cpup() */
17894+ au_set_h_dptr(d, a->btgt, a->h_dst);
17895+
17896+ i = a->dst_inode;
17897+ if (i) {
17898+ if (!au_ftest_ren(a->flags, ISDIR))
17899+ vfsub_drop_nlink(i);
17900+ else {
17901+ vfsub_dead_dir(i);
17902+ au_cpup_attr_timesizes(i);
17903+ }
17904+ au_update_dbrange(d, /*do_put_zero*/1);
17905+ } else {
17906+ bend = a->btgt;
17907+ for (bindex = au_dbstart(d); bindex < bend; bindex++)
17908+ au_set_h_dptr(d, bindex, NULL);
17909+ bend = au_dbend(d);
17910+ for (bindex = a->btgt + 1; bindex <= bend; bindex++)
17911+ au_set_h_dptr(d, bindex, NULL);
17912+ au_update_dbrange(d, /*do_put_zero*/0);
17913+ }
17914+
4a4d8108
AM
17915+ d = a->src_dentry;
17916+ au_set_dbwh(d, -1);
17917+ bend = au_dbend(d);
17918+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
17919+ h_d = au_h_dptr(d, bindex);
17920+ if (h_d)
17921+ au_set_h_dptr(d, bindex, NULL);
17922+ }
17923+ au_set_dbend(d, a->btgt);
17924+
17925+ sb = d->d_sb;
17926+ i = a->src_inode;
17927+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
17928+ return; /* success */
17929+
17930+ bend = au_ibend(i);
17931+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
17932+ h_i = au_h_iptr(i, bindex);
17933+ if (h_i) {
17934+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
17935+ /* ignore this error */
17936+ au_set_h_iptr(i, bindex, NULL, 0);
17937+ }
17938+ }
17939+ au_set_ibend(i, a->btgt);
1308ab2a 17940+}
dece6358 17941+
4a4d8108
AM
17942+/* ---------------------------------------------------------------------- */
17943+
17944+/* mainly for link(2) and rename(2) */
17945+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
1308ab2a 17946+{
4a4d8108
AM
17947+ aufs_bindex_t bdiropq, bwh;
17948+ struct dentry *parent;
17949+ struct au_branch *br;
17950+
17951+ parent = dentry->d_parent;
17952+ IMustLock(parent->d_inode); /* dir is locked */
17953+
17954+ bdiropq = au_dbdiropq(parent);
17955+ bwh = au_dbwh(dentry);
17956+ br = au_sbr(dentry->d_sb, btgt);
17957+ if (au_br_rdonly(br)
17958+ || (0 <= bdiropq && bdiropq < btgt)
17959+ || (0 <= bwh && bwh < btgt))
17960+ btgt = -1;
17961+
17962+ AuDbg("btgt %d\n", btgt);
17963+ return btgt;
1facf9fc 17964+}
17965+
4a4d8108
AM
17966+/* sets src_bstart, dst_bstart and btgt */
17967+static int au_ren_wbr(struct au_ren_args *a)
1facf9fc 17968+{
4a4d8108
AM
17969+ int err;
17970+ struct au_wr_dir_args wr_dir_args = {
17971+ /* .force_btgt = -1, */
17972+ .flags = AuWrDir_ADD_ENTRY
17973+ };
dece6358 17974+
4a4d8108
AM
17975+ a->src_bstart = au_dbstart(a->src_dentry);
17976+ a->dst_bstart = au_dbstart(a->dst_dentry);
17977+ if (au_ftest_ren(a->flags, ISDIR))
17978+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
17979+ wr_dir_args.force_btgt = a->src_bstart;
17980+ if (a->dst_inode && a->dst_bstart < a->src_bstart)
17981+ wr_dir_args.force_btgt = a->dst_bstart;
17982+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
17983+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
17984+ a->btgt = err;
dece6358 17985+
4a4d8108 17986+ return err;
1facf9fc 17987+}
17988+
4a4d8108 17989+static void au_ren_dt(struct au_ren_args *a)
1facf9fc 17990+{
4a4d8108
AM
17991+ a->h_path.dentry = a->src_h_parent;
17992+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
17993+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
17994+ a->h_path.dentry = a->dst_h_parent;
17995+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
17996+ }
1facf9fc 17997+
4a4d8108
AM
17998+ au_fclr_ren(a->flags, DT_DSTDIR);
17999+ if (!au_ftest_ren(a->flags, ISDIR))
18000+ return;
dece6358 18001+
4a4d8108
AM
18002+ a->h_path.dentry = a->src_h_dentry;
18003+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
18004+ if (a->dst_h_dentry->d_inode) {
18005+ au_fset_ren(a->flags, DT_DSTDIR);
18006+ a->h_path.dentry = a->dst_h_dentry;
18007+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
18008+ }
1308ab2a 18009+}
dece6358 18010+
4a4d8108 18011+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1308ab2a 18012+{
4a4d8108
AM
18013+ struct dentry *h_d;
18014+ struct mutex *h_mtx;
18015+
18016+ au_dtime_revert(a->src_dt + AuPARENT);
18017+ if (!au_ftest_ren(a->flags, ISSAMEDIR))
18018+ au_dtime_revert(a->dst_dt + AuPARENT);
18019+
18020+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
18021+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
18022+ h_mtx = &h_d->d_inode->i_mutex;
18023+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18024+ au_dtime_revert(a->src_dt + AuCHILD);
18025+ mutex_unlock(h_mtx);
18026+
18027+ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
18028+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
18029+ h_mtx = &h_d->d_inode->i_mutex;
18030+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18031+ au_dtime_revert(a->dst_dt + AuCHILD);
18032+ mutex_unlock(h_mtx);
1facf9fc 18033+ }
18034+ }
18035+}
18036+
4a4d8108
AM
18037+/* ---------------------------------------------------------------------- */
18038+
18039+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
18040+ struct inode *_dst_dir, struct dentry *_dst_dentry)
1facf9fc 18041+{
e49829fe 18042+ int err, flags;
4a4d8108
AM
18043+ /* reduce stack space */
18044+ struct au_ren_args *a;
18045+
18046+ AuDbg("%.*s, %.*s\n", AuDLNPair(_src_dentry), AuDLNPair(_dst_dentry));
18047+ IMustLock(_src_dir);
18048+ IMustLock(_dst_dir);
18049+
18050+ err = -ENOMEM;
18051+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
18052+ a = kzalloc(sizeof(*a), GFP_NOFS);
18053+ if (unlikely(!a))
18054+ goto out;
18055+
18056+ a->src_dir = _src_dir;
18057+ a->src_dentry = _src_dentry;
18058+ a->src_inode = a->src_dentry->d_inode;
18059+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
18060+ a->dst_dir = _dst_dir;
18061+ a->dst_dentry = _dst_dentry;
18062+ a->dst_inode = a->dst_dentry->d_inode;
18063+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
18064+ if (a->dst_inode) {
18065+ IMustLock(a->dst_inode);
18066+ au_igrab(a->dst_inode);
1facf9fc 18067+ }
1facf9fc 18068+
4a4d8108 18069+ err = -ENOTDIR;
027c5e7a 18070+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
4a4d8108
AM
18071+ if (S_ISDIR(a->src_inode->i_mode)) {
18072+ au_fset_ren(a->flags, ISDIR);
18073+ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
18074+ goto out_free;
e49829fe
JR
18075+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
18076+ AuLock_DIR | flags);
4a4d8108 18077+ } else
e49829fe
JR
18078+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
18079+ flags);
18080+ if (unlikely(err))
18081+ goto out_free;
1facf9fc 18082+
027c5e7a
AM
18083+ err = au_d_hashed_positive(a->src_dentry);
18084+ if (unlikely(err))
18085+ goto out_unlock;
18086+ err = -ENOENT;
18087+ if (a->dst_inode) {
18088+ /*
18089+ * If it is a dir, VFS unhash dst_dentry before this
18090+ * function. It means we cannot rely upon d_unhashed().
18091+ */
18092+ if (unlikely(!a->dst_inode->i_nlink))
18093+ goto out_unlock;
18094+ if (!S_ISDIR(a->dst_inode->i_mode)) {
18095+ err = au_d_hashed_positive(a->dst_dentry);
18096+ if (unlikely(err))
18097+ goto out_unlock;
18098+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
18099+ goto out_unlock;
18100+ } else if (unlikely(d_unhashed(a->dst_dentry)))
18101+ goto out_unlock;
18102+
4a4d8108
AM
18103+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
18104+ di_write_lock_parent(a->dst_parent);
1facf9fc 18105+
4a4d8108
AM
18106+ /* which branch we process */
18107+ err = au_ren_wbr(a);
18108+ if (unlikely(err < 0))
027c5e7a 18109+ goto out_parent;
4a4d8108
AM
18110+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
18111+ a->h_path.mnt = a->br->br_mnt;
1facf9fc 18112+
4a4d8108
AM
18113+ /* are they available to be renamed */
18114+ err = au_ren_may_dir(a);
18115+ if (unlikely(err))
18116+ goto out_children;
1facf9fc 18117+
4a4d8108
AM
18118+ /* prepare the writable parent dir on the same branch */
18119+ if (a->dst_bstart == a->btgt) {
18120+ au_fset_ren(a->flags, WHDST);
18121+ } else {
18122+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
18123+ if (unlikely(err))
18124+ goto out_children;
18125+ }
1facf9fc 18126+
4a4d8108
AM
18127+ if (a->src_dir != a->dst_dir) {
18128+ /*
18129+ * this temporary unlock is safe,
18130+ * because both dir->i_mutex are locked.
18131+ */
18132+ di_write_unlock(a->dst_parent);
18133+ di_write_lock_parent(a->src_parent);
18134+ err = au_wr_dir_need_wh(a->src_dentry,
18135+ au_ftest_ren(a->flags, ISDIR),
18136+ &a->btgt);
18137+ di_write_unlock(a->src_parent);
18138+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
18139+ au_fclr_ren(a->flags, ISSAMEDIR);
18140+ } else
18141+ err = au_wr_dir_need_wh(a->src_dentry,
18142+ au_ftest_ren(a->flags, ISDIR),
18143+ &a->btgt);
18144+ if (unlikely(err < 0))
18145+ goto out_children;
18146+ if (err)
18147+ au_fset_ren(a->flags, WHSRC);
1facf9fc 18148+
4a4d8108
AM
18149+ /* lock them all */
18150+ err = au_ren_lock(a);
18151+ if (unlikely(err))
18152+ goto out_children;
1facf9fc 18153+
4a4d8108
AM
18154+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
18155+ err = au_may_ren(a);
18156+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
18157+ err = -ENAMETOOLONG;
18158+ if (unlikely(err))
18159+ goto out_hdir;
1facf9fc 18160+
4a4d8108
AM
18161+ /* store timestamps to be revertible */
18162+ au_ren_dt(a);
1facf9fc 18163+
4a4d8108
AM
18164+ /* here we go */
18165+ err = do_rename(a);
18166+ if (unlikely(err))
18167+ goto out_dt;
18168+
18169+ /* update dir attributes */
18170+ au_ren_refresh_dir(a);
18171+
18172+ /* dput/iput all lower dentries */
18173+ au_ren_refresh(a);
18174+
18175+ goto out_hdir; /* success */
18176+
4f0767ce 18177+out_dt:
4a4d8108 18178+ au_ren_rev_dt(err, a);
4f0767ce 18179+out_hdir:
4a4d8108 18180+ au_ren_unlock(a);
4f0767ce 18181+out_children:
4a4d8108 18182+ au_nhash_wh_free(&a->whlist);
027c5e7a
AM
18183+ if (err && a->dst_inode && a->dst_bstart != a->btgt) {
18184+ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
18185+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
18186+ au_set_dbstart(a->dst_dentry, a->dst_bstart);
4a4d8108 18187+ }
027c5e7a 18188+out_parent:
4a4d8108
AM
18189+ if (!err)
18190+ d_move(a->src_dentry, a->dst_dentry);
027c5e7a
AM
18191+ else {
18192+ au_update_dbstart(a->dst_dentry);
18193+ if (!a->dst_inode)
18194+ d_drop(a->dst_dentry);
18195+ }
4a4d8108
AM
18196+ if (au_ftest_ren(a->flags, ISSAMEDIR))
18197+ di_write_unlock(a->dst_parent);
18198+ else
18199+ di_write_unlock2(a->src_parent, a->dst_parent);
027c5e7a 18200+out_unlock:
4a4d8108 18201+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
4f0767ce 18202+out_free:
4a4d8108
AM
18203+ iput(a->dst_inode);
18204+ if (a->thargs)
18205+ au_whtmp_rmdir_free(a->thargs);
18206+ kfree(a);
4f0767ce 18207+out:
4a4d8108
AM
18208+ AuTraceErr(err);
18209+ return err;
1308ab2a 18210+}
7f207e10
AM
18211diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
18212--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
53392da6 18213+++ linux/fs/aufs/Kconfig 2011-08-24 13:30:24.727980364 +0200
2cbb1c4b 18214@@ -0,0 +1,203 @@
4a4d8108
AM
18215+config AUFS_FS
18216+ tristate "Aufs (Advanced multi layered unification filesystem) support"
18217+ depends on EXPERIMENTAL
18218+ help
18219+ Aufs is a stackable unification filesystem such as Unionfs,
18220+ which unifies several directories and provides a merged single
18221+ directory.
18222+ In the early days, aufs was entirely re-designed and
18223+ re-implemented Unionfs Version 1.x series. Introducing many
18224+ original ideas, approaches and improvements, it becomes totally
18225+ different from Unionfs while keeping the basic features.
1facf9fc 18226+
4a4d8108
AM
18227+if AUFS_FS
18228+choice
18229+ prompt "Maximum number of branches"
18230+ default AUFS_BRANCH_MAX_127
18231+ help
18232+ Specifies the maximum number of branches (or member directories)
18233+ in a single aufs. The larger value consumes more system
18234+ resources and has a minor impact to performance.
18235+config AUFS_BRANCH_MAX_127
18236+ bool "127"
18237+ help
18238+ Specifies the maximum number of branches (or member directories)
18239+ in a single aufs. The larger value consumes more system
18240+ resources and has a minor impact to performance.
18241+config AUFS_BRANCH_MAX_511
18242+ bool "511"
18243+ help
18244+ Specifies the maximum number of branches (or member directories)
18245+ in a single aufs. The larger value consumes more system
18246+ resources and has a minor impact to performance.
18247+config AUFS_BRANCH_MAX_1023
18248+ bool "1023"
18249+ help
18250+ Specifies the maximum number of branches (or member directories)
18251+ in a single aufs. The larger value consumes more system
18252+ resources and has a minor impact to performance.
18253+config AUFS_BRANCH_MAX_32767
18254+ bool "32767"
18255+ help
18256+ Specifies the maximum number of branches (or member directories)
18257+ in a single aufs. The larger value consumes more system
18258+ resources and has a minor impact to performance.
18259+endchoice
1facf9fc 18260+
e49829fe
JR
18261+config AUFS_SBILIST
18262+ bool
18263+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
18264+ default y
18265+ help
18266+ Automatic configuration for internal use.
18267+ When aufs supports Magic SysRq or /proc, enabled automatically.
18268+
4a4d8108
AM
18269+config AUFS_HNOTIFY
18270+ bool "Detect direct branch access (bypassing aufs)"
18271+ help
18272+ If you want to modify files on branches directly, eg. bypassing aufs,
18273+ and want aufs to detect the changes of them fully, then enable this
18274+ option and use 'udba=notify' mount option.
7f207e10 18275+ Currently there is only one available configuration, "fsnotify".
4a4d8108
AM
18276+ It will have a negative impact to the performance.
18277+ See detail in aufs.5.
dece6358 18278+
4a4d8108
AM
18279+choice
18280+ prompt "method" if AUFS_HNOTIFY
18281+ default AUFS_HFSNOTIFY
18282+config AUFS_HFSNOTIFY
18283+ bool "fsnotify"
18284+ select FSNOTIFY
4a4d8108 18285+endchoice
1facf9fc 18286+
4a4d8108
AM
18287+config AUFS_EXPORT
18288+ bool "NFS-exportable aufs"
2cbb1c4b 18289+ depends on EXPORTFS
4a4d8108
AM
18290+ help
18291+ If you want to export your mounted aufs via NFS, then enable this
18292+ option. There are several requirements for this configuration.
18293+ See detail in aufs.5.
1facf9fc 18294+
4a4d8108
AM
18295+config AUFS_INO_T_64
18296+ bool
18297+ depends on AUFS_EXPORT
18298+ depends on 64BIT && !(ALPHA || S390)
18299+ default y
18300+ help
18301+ Automatic configuration for internal use.
18302+ /* typedef unsigned long/int __kernel_ino_t */
18303+ /* alpha and s390x are int */
1facf9fc 18304+
4a4d8108
AM
18305+config AUFS_RDU
18306+ bool "Readdir in userspace"
18307+ help
18308+ Aufs has two methods to provide a merged view for a directory,
18309+ by a user-space library and by kernel-space natively. The latter
18310+ is always enabled but sometimes large and slow.
18311+ If you enable this option, install the library in aufs2-util
18312+ package, and set some environment variables for your readdir(3),
18313+ then the work will be handled in user-space which generally
18314+ shows better performance in most cases.
18315+ See detail in aufs.5.
1facf9fc 18316+
2cbb1c4b
JR
18317+config AUFS_PROC_MAP
18318+ bool "support for /proc/maps and lsof(1)"
18319+ depends on PROC_FS
18320+ help
18321+ When you issue mmap(2) in aufs, it is actually a direct mmap(2)
18322+ call to the file on the branch fs since the file in aufs is
18323+ purely virtual. And the file path printed in /proc/maps (and
18324+ others) will be the path on the branch fs. In most cases, it
18325+ does no harm. But some utilities like lsof(1) may confuse since
18326+ the utility or user may expect the file path in aufs to be
18327+ printed.
18328+ To address this issue, aufs provides a patch which introduces a
18329+ new member called vm_prfile into struct vm_are_struct. The patch
18330+ is meaningless without enabling this configuration since nobody
18331+ sets the new vm_prfile member.
18332+ If you don't apply the patch, then enabling this configuration
18333+ will cause a compile error.
18334+ This approach is fragile since if someone else make some changes
18335+ around vm_file, then vm_prfile may not work anymore. As a
18336+ workaround such case, aufs provides this configuration. If you
18337+ disable it, then lsof(1) may produce incorrect result but the
18338+ problem will be gone even if the aufs patch is applied (I hope).
18339+
4a4d8108
AM
18340+config AUFS_SP_IATTR
18341+ bool "Respect the attributes (mtime/ctime mainly) of special files"
18342+ help
18343+ When you write something to a special file, some attributes of it
18344+ (mtime/ctime mainly) may be updated. Generally such updates are
18345+ less important (actually some device drivers and NFS ignore
18346+ it). But some applications (such like test program) requires
18347+ such updates. If you need these updates, then enable this
18348+ configuration which introduces some overhead.
18349+ Currently this configuration handles FIFO only.
1facf9fc 18350+
4a4d8108
AM
18351+config AUFS_SHWH
18352+ bool "Show whiteouts"
18353+ help
18354+ If you want to make the whiteouts in aufs visible, then enable
18355+ this option and specify 'shwh' mount option. Although it may
18356+ sounds like philosophy or something, but in technically it
18357+ simply shows the name of whiteout with keeping its behaviour.
1facf9fc 18358+
4a4d8108
AM
18359+config AUFS_BR_RAMFS
18360+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
18361+ help
18362+ If you want to use ramfs as an aufs branch fs, then enable this
18363+ option. Generally tmpfs is recommended.
18364+ Aufs prohibited them to be a branch fs by default, because
18365+ initramfs becomes unusable after switch_root or something
18366+ generally. If you sets initramfs as an aufs branch and boot your
18367+ system by switch_root, you will meet a problem easily since the
18368+ files in initramfs may be inaccessible.
18369+ Unless you are going to use ramfs as an aufs branch fs without
18370+ switch_root or something, leave it N.
1facf9fc 18371+
4a4d8108
AM
18372+config AUFS_BR_FUSE
18373+ bool "Fuse fs as an aufs branch"
18374+ depends on FUSE_FS
18375+ select AUFS_POLL
18376+ help
18377+ If you want to use fuse-based userspace filesystem as an aufs
18378+ branch fs, then enable this option.
18379+ It implements the internal poll(2) operation which is
18380+ implemented by fuse only (curretnly).
1facf9fc 18381+
4a4d8108
AM
18382+config AUFS_POLL
18383+ bool
18384+ help
18385+ Automatic configuration for internal use.
1facf9fc 18386+
4a4d8108
AM
18387+config AUFS_BR_HFSPLUS
18388+ bool "Hfsplus as an aufs branch"
18389+ depends on HFSPLUS_FS
18390+ default y
18391+ help
18392+ If you want to use hfsplus fs as an aufs branch fs, then enable
18393+ this option. This option introduces a small overhead at
18394+ copying-up a file on hfsplus.
1facf9fc 18395+
4a4d8108
AM
18396+config AUFS_BDEV_LOOP
18397+ bool
18398+ depends on BLK_DEV_LOOP
18399+ default y
18400+ help
18401+ Automatic configuration for internal use.
18402+ Convert =[ym] into =y.
1308ab2a 18403+
4a4d8108
AM
18404+config AUFS_DEBUG
18405+ bool "Debug aufs"
18406+ help
18407+ Enable this to compile aufs internal debug code.
18408+ It will have a negative impact to the performance.
18409+
18410+config AUFS_MAGIC_SYSRQ
18411+ bool
18412+ depends on AUFS_DEBUG && MAGIC_SYSRQ
18413+ default y
18414+ help
18415+ Automatic configuration for internal use.
18416+ When aufs supports Magic SysRq, enabled automatically.
18417+endif
7f207e10
AM
18418diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
18419--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
53392da6 18420+++ linux/fs/aufs/loop.c 2011-08-24 13:30:24.734646739 +0200
87a755f4 18421@@ -0,0 +1,133 @@
1facf9fc 18422+/*
027c5e7a 18423+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18424+ *
18425+ * This program, aufs is free software; you can redistribute it and/or modify
18426+ * it under the terms of the GNU General Public License as published by
18427+ * the Free Software Foundation; either version 2 of the License, or
18428+ * (at your option) any later version.
dece6358
AM
18429+ *
18430+ * This program is distributed in the hope that it will be useful,
18431+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18432+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18433+ * GNU General Public License for more details.
18434+ *
18435+ * You should have received a copy of the GNU General Public License
18436+ * along with this program; if not, write to the Free Software
18437+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18438+ */
18439+
18440+/*
18441+ * support for loopback block device as a branch
18442+ */
18443+
18444+#include <linux/loop.h>
18445+#include "aufs.h"
18446+
18447+/*
18448+ * test if two lower dentries have overlapping branches.
18449+ */
b752ccd1 18450+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
1facf9fc 18451+{
b752ccd1 18452+ struct super_block *h_sb;
1facf9fc 18453+ struct loop_device *l;
18454+
b752ccd1
AM
18455+ h_sb = h_adding->d_sb;
18456+ if (MAJOR(h_sb->s_dev) != LOOP_MAJOR)
1facf9fc 18457+ return 0;
18458+
b752ccd1
AM
18459+ l = h_sb->s_bdev->bd_disk->private_data;
18460+ h_adding = l->lo_backing_file->f_dentry;
18461+ /*
18462+ * h_adding can be local NFS.
18463+ * in this case aufs cannot detect the loop.
18464+ */
18465+ if (unlikely(h_adding->d_sb == sb))
1facf9fc 18466+ return 1;
b752ccd1 18467+ return !!au_test_subdir(h_adding, sb->s_root);
1facf9fc 18468+}
18469+
18470+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
18471+int au_test_loopback_kthread(void)
18472+{
b752ccd1
AM
18473+ int ret;
18474+ struct task_struct *tsk = current;
18475+
18476+ ret = 0;
18477+ if (tsk->flags & PF_KTHREAD) {
18478+ const char c = tsk->comm[4];
18479+ ret = ('0' <= c && c <= '9'
18480+ && !strncmp(tsk->comm, "loop", 4));
18481+ }
1facf9fc 18482+
b752ccd1 18483+ return ret;
1facf9fc 18484+}
87a755f4
AM
18485+
18486+/* ---------------------------------------------------------------------- */
18487+
18488+#define au_warn_loopback_step 16
18489+static int au_warn_loopback_nelem = au_warn_loopback_step;
18490+static unsigned long *au_warn_loopback_array;
18491+
18492+void au_warn_loopback(struct super_block *h_sb)
18493+{
18494+ int i, new_nelem;
18495+ unsigned long *a, magic;
18496+ static DEFINE_SPINLOCK(spin);
18497+
18498+ magic = h_sb->s_magic;
18499+ spin_lock(&spin);
18500+ a = au_warn_loopback_array;
18501+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
18502+ if (a[i] == magic) {
18503+ spin_unlock(&spin);
18504+ return;
18505+ }
18506+
18507+ /* h_sb is new to us, print it */
18508+ if (i < au_warn_loopback_nelem) {
18509+ a[i] = magic;
18510+ goto pr;
18511+ }
18512+
18513+ /* expand the array */
18514+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
18515+ a = au_kzrealloc(au_warn_loopback_array,
18516+ au_warn_loopback_nelem * sizeof(unsigned long),
18517+ new_nelem * sizeof(unsigned long), GFP_ATOMIC);
18518+ if (a) {
18519+ au_warn_loopback_nelem = new_nelem;
18520+ au_warn_loopback_array = a;
18521+ a[i] = magic;
18522+ goto pr;
18523+ }
18524+
18525+ spin_unlock(&spin);
18526+ AuWarn1("realloc failed, ignored\n");
18527+ return;
18528+
18529+pr:
18530+ spin_unlock(&spin);
18531+ pr_warning("you may want to try another patch for loopback file "
18532+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
18533+}
18534+
18535+int au_loopback_init(void)
18536+{
18537+ int err;
18538+ struct super_block *sb __maybe_unused;
18539+
18540+ AuDebugOn(sizeof(sb->s_magic) != sizeof(unsigned long));
18541+
18542+ err = 0;
18543+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
18544+ sizeof(unsigned long), GFP_NOFS);
18545+ if (unlikely(!au_warn_loopback_array))
18546+ err = -ENOMEM;
18547+
18548+ return err;
18549+}
18550+
18551+void au_loopback_fin(void)
18552+{
18553+ kfree(au_warn_loopback_array);
18554+}
7f207e10
AM
18555diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
18556--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
53392da6 18557+++ linux/fs/aufs/loop.h 2011-08-24 13:30:24.734646739 +0200
87a755f4 18558@@ -0,0 +1,50 @@
1facf9fc 18559+/*
027c5e7a 18560+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18561+ *
18562+ * This program, aufs is free software; you can redistribute it and/or modify
18563+ * it under the terms of the GNU General Public License as published by
18564+ * the Free Software Foundation; either version 2 of the License, or
18565+ * (at your option) any later version.
dece6358
AM
18566+ *
18567+ * This program is distributed in the hope that it will be useful,
18568+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18569+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18570+ * GNU General Public License for more details.
18571+ *
18572+ * You should have received a copy of the GNU General Public License
18573+ * along with this program; if not, write to the Free Software
18574+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18575+ */
18576+
18577+/*
18578+ * support for loopback mount as a branch
18579+ */
18580+
18581+#ifndef __AUFS_LOOP_H__
18582+#define __AUFS_LOOP_H__
18583+
18584+#ifdef __KERNEL__
18585+
dece6358
AM
18586+struct dentry;
18587+struct super_block;
1facf9fc 18588+
18589+#ifdef CONFIG_AUFS_BDEV_LOOP
18590+/* loop.c */
b752ccd1 18591+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
1facf9fc 18592+int au_test_loopback_kthread(void);
87a755f4
AM
18593+void au_warn_loopback(struct super_block *h_sb);
18594+
18595+int au_loopback_init(void);
18596+void au_loopback_fin(void);
1facf9fc 18597+#else
4a4d8108 18598+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
b752ccd1 18599+ struct dentry *h_adding)
4a4d8108 18600+AuStubInt0(au_test_loopback_kthread, void)
87a755f4
AM
18601+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
18602+
18603+AuStubInt0(au_loopback_init, void)
18604+AuStubVoid(au_loopback_fin, void)
1facf9fc 18605+#endif /* BLK_DEV_LOOP */
18606+
18607+#endif /* __KERNEL__ */
18608+#endif /* __AUFS_LOOP_H__ */
7f207e10
AM
18609diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
18610--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
53392da6 18611+++ linux/fs/aufs/magic.mk 2011-08-24 13:30:24.734646739 +0200
4a4d8108 18612@@ -0,0 +1,54 @@
1facf9fc 18613+
18614+# defined in ${srctree}/fs/fuse/inode.c
18615+# tristate
18616+ifdef CONFIG_FUSE_FS
18617+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
18618+endif
18619+
18620+# defined in ${srctree}/fs/ocfs2/ocfs2_fs.h
18621+# tristate
18622+ifdef CONFIG_OCFS2_FS
18623+ccflags-y += -DOCFS2_SUPER_MAGIC=0x7461636f
18624+endif
18625+
18626+# defined in ${srctree}/fs/ocfs2/dlm/userdlm.h
18627+# tristate
18628+ifdef CONFIG_OCFS2_FS_O2CB
18629+ccflags-y += -DDLMFS_MAGIC=0x76a9f425
18630+endif
18631+
1facf9fc 18632+# defined in ${srctree}/fs/cifs/cifsfs.c
18633+# tristate
18634+ifdef CONFIG_CIFS_FS
18635+ccflags-y += -DCIFS_MAGIC_NUMBER=0xFF534D42
18636+endif
18637+
18638+# defined in ${srctree}/fs/xfs/xfs_sb.h
18639+# tristate
18640+ifdef CONFIG_XFS_FS
18641+ccflags-y += -DXFS_SB_MAGIC=0x58465342
18642+endif
18643+
18644+# defined in ${srctree}/fs/configfs/mount.c
18645+# tristate
18646+ifdef CONFIG_CONFIGFS_FS
18647+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
18648+endif
18649+
18650+# defined in ${srctree}/fs/9p/v9fs.h
18651+# tristate
18652+ifdef CONFIG_9P_FS
18653+ccflags-y += -DV9FS_MAGIC=0x01021997
18654+endif
18655+
18656+# defined in ${srctree}/fs/ubifs/ubifs.h
18657+# tristate
18658+ifdef CONFIG_UBIFS_FS
18659+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
18660+endif
4a4d8108
AM
18661+
18662+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
18663+# tristate
18664+ifdef CONFIG_HFSPLUS_FS
18665+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
18666+endif
7f207e10
AM
18667diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
18668--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
53392da6 18669+++ linux/fs/aufs/Makefile 2011-08-24 13:30:24.727980364 +0200
7f207e10 18670@@ -0,0 +1,38 @@
4a4d8108
AM
18671+
18672+include ${src}/magic.mk
18673+ifeq (${CONFIG_AUFS_FS},m)
18674+include ${src}/conf.mk
18675+endif
18676+-include ${src}/priv_def.mk
18677+
18678+# cf. include/linux/kernel.h
18679+# enable pr_debug
18680+ccflags-y += -DDEBUG
7f207e10
AM
18681+# sparse doesn't allow spaces
18682+ccflags-y += -D'pr_fmt(fmt)=AUFS_NAME"\040%s:%d:%s[%d]:\040"fmt,__func__,__LINE__,current->comm,current->pid'
4a4d8108
AM
18683+
18684+obj-$(CONFIG_AUFS_FS) += aufs.o
18685+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
18686+ wkq.o vfsub.o dcsub.o \
e49829fe 18687+ cpup.o whout.o wbr_policy.o \
4a4d8108
AM
18688+ dinfo.o dentry.o \
18689+ dynop.o \
18690+ finfo.o file.o f_op.o \
18691+ dir.o vdir.o \
18692+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
18693+ ioctl.o
18694+
18695+# all are boolean
e49829fe 18696+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
4a4d8108
AM
18697+aufs-$(CONFIG_SYSFS) += sysfs.o
18698+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
18699+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
18700+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
18701+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
4a4d8108
AM
18702+aufs-$(CONFIG_AUFS_EXPORT) += export.o
18703+aufs-$(CONFIG_AUFS_POLL) += poll.o
18704+aufs-$(CONFIG_AUFS_RDU) += rdu.o
18705+aufs-$(CONFIG_AUFS_SP_IATTR) += f_op_sp.o
18706+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
18707+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
18708+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
7f207e10
AM
18709diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
18710--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
53392da6 18711+++ linux/fs/aufs/module.c 2011-08-24 13:30:24.734646739 +0200
87a755f4 18712@@ -0,0 +1,189 @@
1facf9fc 18713+/*
027c5e7a 18714+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18715+ *
18716+ * This program, aufs is free software; you can redistribute it and/or modify
18717+ * it under the terms of the GNU General Public License as published by
18718+ * the Free Software Foundation; either version 2 of the License, or
18719+ * (at your option) any later version.
dece6358
AM
18720+ *
18721+ * This program is distributed in the hope that it will be useful,
18722+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18723+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18724+ * GNU General Public License for more details.
18725+ *
18726+ * You should have received a copy of the GNU General Public License
18727+ * along with this program; if not, write to the Free Software
18728+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18729+ */
18730+
18731+/*
18732+ * module global variables and operations
18733+ */
18734+
18735+#include <linux/module.h>
18736+#include <linux/seq_file.h>
18737+#include "aufs.h"
18738+
18739+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
18740+{
18741+ if (new_sz <= nused)
18742+ return p;
18743+
18744+ p = krealloc(p, new_sz, gfp);
18745+ if (p)
18746+ memset(p + nused, 0, new_sz - nused);
18747+ return p;
18748+}
18749+
18750+/* ---------------------------------------------------------------------- */
18751+
18752+/*
18753+ * aufs caches
18754+ */
18755+struct kmem_cache *au_cachep[AuCache_Last];
18756+static int __init au_cache_init(void)
18757+{
4a4d8108 18758+ au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
1facf9fc 18759+ if (au_cachep[AuCache_DINFO])
027c5e7a 18760+ /* SLAB_DESTROY_BY_RCU */
4a4d8108
AM
18761+ au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
18762+ au_icntnr_init_once);
1facf9fc 18763+ if (au_cachep[AuCache_ICNTNR])
4a4d8108
AM
18764+ au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo,
18765+ au_fi_init_once);
1facf9fc 18766+ if (au_cachep[AuCache_FINFO])
18767+ au_cachep[AuCache_VDIR] = AuCache(au_vdir);
18768+ if (au_cachep[AuCache_VDIR])
18769+ au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
18770+ if (au_cachep[AuCache_DEHSTR])
18771+ return 0;
18772+
18773+ return -ENOMEM;
18774+}
18775+
18776+static void au_cache_fin(void)
18777+{
18778+ int i;
4a4d8108
AM
18779+
18780+ /* including AuCache_HNOTIFY */
1facf9fc 18781+ for (i = 0; i < AuCache_Last; i++)
18782+ if (au_cachep[i]) {
18783+ kmem_cache_destroy(au_cachep[i]);
18784+ au_cachep[i] = NULL;
18785+ }
18786+}
18787+
18788+/* ---------------------------------------------------------------------- */
18789+
18790+int au_dir_roflags;
18791+
e49829fe
JR
18792+#ifdef CONFIG_AUFS_SBILIST
18793+struct au_splhead au_sbilist;
18794+#endif
18795+
1facf9fc 18796+/*
18797+ * functions for module interface.
18798+ */
18799+MODULE_LICENSE("GPL");
18800+/* MODULE_LICENSE("GPL v2"); */
dece6358 18801+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 18802+MODULE_DESCRIPTION(AUFS_NAME
18803+ " -- Advanced multi layered unification filesystem");
18804+MODULE_VERSION(AUFS_VERSION);
18805+
1facf9fc 18806+/* this module parameter has no meaning when SYSFS is disabled */
18807+int sysaufs_brs = 1;
18808+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
18809+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
18810+
18811+/* ---------------------------------------------------------------------- */
18812+
18813+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
18814+
18815+int au_seq_path(struct seq_file *seq, struct path *path)
18816+{
18817+ return seq_path(seq, path, au_esc_chars);
18818+}
18819+
18820+/* ---------------------------------------------------------------------- */
18821+
18822+static int __init aufs_init(void)
18823+{
18824+ int err, i;
18825+ char *p;
18826+
18827+ p = au_esc_chars;
18828+ for (i = 1; i <= ' '; i++)
18829+ *p++ = i;
18830+ *p++ = '\\';
18831+ *p++ = '\x7f';
18832+ *p = 0;
18833+
18834+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
18835+
e49829fe 18836+ au_sbilist_init();
1facf9fc 18837+ sysaufs_brs_init();
18838+ au_debug_init();
4a4d8108 18839+ au_dy_init();
1facf9fc 18840+ err = sysaufs_init();
18841+ if (unlikely(err))
18842+ goto out;
e49829fe 18843+ err = au_procfs_init();
4f0767ce 18844+ if (unlikely(err))
953406b4 18845+ goto out_sysaufs;
e49829fe
JR
18846+ err = au_wkq_init();
18847+ if (unlikely(err))
18848+ goto out_procfs;
87a755f4 18849+ err = au_loopback_init();
1facf9fc 18850+ if (unlikely(err))
18851+ goto out_wkq;
87a755f4
AM
18852+ err = au_hnotify_init();
18853+ if (unlikely(err))
18854+ goto out_loopback;
1facf9fc 18855+ err = au_sysrq_init();
18856+ if (unlikely(err))
18857+ goto out_hin;
18858+ err = au_cache_init();
18859+ if (unlikely(err))
18860+ goto out_sysrq;
18861+ err = register_filesystem(&aufs_fs_type);
18862+ if (unlikely(err))
18863+ goto out_cache;
4a4d8108
AM
18864+ /* since we define pr_fmt, call printk directly */
18865+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
1facf9fc 18866+ goto out; /* success */
18867+
4f0767ce 18868+out_cache:
1facf9fc 18869+ au_cache_fin();
4f0767ce 18870+out_sysrq:
1facf9fc 18871+ au_sysrq_fin();
4f0767ce 18872+out_hin:
4a4d8108 18873+ au_hnotify_fin();
87a755f4
AM
18874+out_loopback:
18875+ au_loopback_fin();
4f0767ce 18876+out_wkq:
1facf9fc 18877+ au_wkq_fin();
e49829fe
JR
18878+out_procfs:
18879+ au_procfs_fin();
4f0767ce 18880+out_sysaufs:
1facf9fc 18881+ sysaufs_fin();
4a4d8108 18882+ au_dy_fin();
4f0767ce 18883+out:
1facf9fc 18884+ return err;
18885+}
18886+
18887+static void __exit aufs_exit(void)
18888+{
18889+ unregister_filesystem(&aufs_fs_type);
18890+ au_cache_fin();
18891+ au_sysrq_fin();
4a4d8108 18892+ au_hnotify_fin();
87a755f4 18893+ au_loopback_fin();
1facf9fc 18894+ au_wkq_fin();
e49829fe 18895+ au_procfs_fin();
1facf9fc 18896+ sysaufs_fin();
4a4d8108 18897+ au_dy_fin();
1facf9fc 18898+}
18899+
18900+module_init(aufs_init);
18901+module_exit(aufs_exit);
7f207e10
AM
18902diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
18903--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
53392da6 18904+++ linux/fs/aufs/module.h 2011-08-24 13:30:24.734646739 +0200
e49829fe 18905@@ -0,0 +1,91 @@
1facf9fc 18906+/*
027c5e7a 18907+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18908+ *
18909+ * This program, aufs is free software; you can redistribute it and/or modify
18910+ * it under the terms of the GNU General Public License as published by
18911+ * the Free Software Foundation; either version 2 of the License, or
18912+ * (at your option) any later version.
dece6358
AM
18913+ *
18914+ * This program is distributed in the hope that it will be useful,
18915+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18916+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18917+ * GNU General Public License for more details.
18918+ *
18919+ * You should have received a copy of the GNU General Public License
18920+ * along with this program; if not, write to the Free Software
18921+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18922+ */
18923+
18924+/*
18925+ * module initialization and module-global
18926+ */
18927+
18928+#ifndef __AUFS_MODULE_H__
18929+#define __AUFS_MODULE_H__
18930+
18931+#ifdef __KERNEL__
18932+
18933+#include <linux/slab.h>
18934+
dece6358
AM
18935+struct path;
18936+struct seq_file;
18937+
1facf9fc 18938+/* module parameters */
1facf9fc 18939+extern int sysaufs_brs;
18940+
18941+/* ---------------------------------------------------------------------- */
18942+
18943+extern int au_dir_roflags;
18944+
18945+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
18946+int au_seq_path(struct seq_file *seq, struct path *path);
18947+
e49829fe
JR
18948+#ifdef CONFIG_PROC_FS
18949+/* procfs.c */
18950+int __init au_procfs_init(void);
18951+void au_procfs_fin(void);
18952+#else
18953+AuStubInt0(au_procfs_init, void);
18954+AuStubVoid(au_procfs_fin, void);
18955+#endif
18956+
4f0767ce
JR
18957+/* ---------------------------------------------------------------------- */
18958+
18959+/* kmem cache */
1facf9fc 18960+enum {
18961+ AuCache_DINFO,
18962+ AuCache_ICNTNR,
18963+ AuCache_FINFO,
18964+ AuCache_VDIR,
18965+ AuCache_DEHSTR,
4a4d8108
AM
18966+#ifdef CONFIG_AUFS_HNOTIFY
18967+ AuCache_HNOTIFY,
1facf9fc 18968+#endif
18969+ AuCache_Last
18970+};
18971+
4a4d8108
AM
18972+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
18973+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
18974+#define AuCacheCtor(type, ctor) \
18975+ kmem_cache_create(#type, sizeof(struct type), \
18976+ __alignof__(struct type), AuCacheFlags, ctor)
1facf9fc 18977+
18978+extern struct kmem_cache *au_cachep[];
18979+
18980+#define AuCacheFuncs(name, index) \
4a4d8108 18981+static inline struct au_##name *au_cache_alloc_##name(void) \
1facf9fc 18982+{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
4a4d8108 18983+static inline void au_cache_free_##name(struct au_##name *p) \
1facf9fc 18984+{ kmem_cache_free(au_cachep[AuCache_##index], p); }
18985+
18986+AuCacheFuncs(dinfo, DINFO);
18987+AuCacheFuncs(icntnr, ICNTNR);
18988+AuCacheFuncs(finfo, FINFO);
18989+AuCacheFuncs(vdir, VDIR);
4a4d8108
AM
18990+AuCacheFuncs(vdir_dehstr, DEHSTR);
18991+#ifdef CONFIG_AUFS_HNOTIFY
18992+AuCacheFuncs(hnotify, HNOTIFY);
18993+#endif
1facf9fc 18994+
4a4d8108
AM
18995+#endif /* __KERNEL__ */
18996+#endif /* __AUFS_MODULE_H__ */
7f207e10
AM
18997diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
18998--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
53392da6 18999+++ linux/fs/aufs/opts.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 19000@@ -0,0 +1,1595 @@
1facf9fc 19001+/*
027c5e7a 19002+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 19003+ *
19004+ * This program, aufs is free software; you can redistribute it and/or modify
19005+ * it under the terms of the GNU General Public License as published by
19006+ * the Free Software Foundation; either version 2 of the License, or
19007+ * (at your option) any later version.
dece6358
AM
19008+ *
19009+ * This program is distributed in the hope that it will be useful,
19010+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19011+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19012+ * GNU General Public License for more details.
19013+ *
19014+ * You should have received a copy of the GNU General Public License
19015+ * along with this program; if not, write to the Free Software
19016+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 19017+ */
19018+
19019+/*
19020+ * mount options/flags
19021+ */
19022+
dece6358 19023+#include <linux/file.h>
e49829fe 19024+#include <linux/jiffies.h>
dece6358 19025+#include <linux/namei.h>
1facf9fc 19026+#include <linux/types.h> /* a distribution requires */
19027+#include <linux/parser.h>
19028+#include "aufs.h"
19029+
19030+/* ---------------------------------------------------------------------- */
19031+
19032+enum {
19033+ Opt_br,
19034+ Opt_add, Opt_del, Opt_mod, Opt_reorder, Opt_append, Opt_prepend,
19035+ Opt_idel, Opt_imod, Opt_ireorder,
19036+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash, Opt_rendir,
dece6358 19037+ Opt_rdblk_def, Opt_rdhash_def,
1facf9fc 19038+ Opt_xino, Opt_zxino, Opt_noxino,
19039+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
19040+ Opt_trunc_xino_path, Opt_itrunc_xino,
19041+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 19042+ Opt_shwh, Opt_noshwh,
1facf9fc 19043+ Opt_plink, Opt_noplink, Opt_list_plink,
19044+ Opt_udba,
4a4d8108 19045+ Opt_dio, Opt_nodio,
1facf9fc 19046+ /* Opt_lock, Opt_unlock, */
19047+ Opt_cmd, Opt_cmd_args,
19048+ Opt_diropq_a, Opt_diropq_w,
19049+ Opt_warn_perm, Opt_nowarn_perm,
19050+ Opt_wbr_copyup, Opt_wbr_create,
19051+ Opt_refrof, Opt_norefrof,
19052+ Opt_verbose, Opt_noverbose,
19053+ Opt_sum, Opt_nosum, Opt_wsum,
19054+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
19055+};
19056+
19057+static match_table_t options = {
19058+ {Opt_br, "br=%s"},
19059+ {Opt_br, "br:%s"},
19060+
19061+ {Opt_add, "add=%d:%s"},
19062+ {Opt_add, "add:%d:%s"},
19063+ {Opt_add, "ins=%d:%s"},
19064+ {Opt_add, "ins:%d:%s"},
19065+ {Opt_append, "append=%s"},
19066+ {Opt_append, "append:%s"},
19067+ {Opt_prepend, "prepend=%s"},
19068+ {Opt_prepend, "prepend:%s"},
19069+
19070+ {Opt_del, "del=%s"},
19071+ {Opt_del, "del:%s"},
19072+ /* {Opt_idel, "idel:%d"}, */
19073+ {Opt_mod, "mod=%s"},
19074+ {Opt_mod, "mod:%s"},
19075+ /* {Opt_imod, "imod:%d:%s"}, */
19076+
19077+ {Opt_dirwh, "dirwh=%d"},
19078+
19079+ {Opt_xino, "xino=%s"},
19080+ {Opt_noxino, "noxino"},
19081+ {Opt_trunc_xino, "trunc_xino"},
19082+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
19083+ {Opt_notrunc_xino, "notrunc_xino"},
19084+ {Opt_trunc_xino_path, "trunc_xino=%s"},
19085+ {Opt_itrunc_xino, "itrunc_xino=%d"},
19086+ /* {Opt_zxino, "zxino=%s"}, */
19087+ {Opt_trunc_xib, "trunc_xib"},
19088+ {Opt_notrunc_xib, "notrunc_xib"},
19089+
e49829fe 19090+#ifdef CONFIG_PROC_FS
1facf9fc 19091+ {Opt_plink, "plink"},
e49829fe
JR
19092+#else
19093+ {Opt_ignore_silent, "plink"},
19094+#endif
19095+
1facf9fc 19096+ {Opt_noplink, "noplink"},
e49829fe 19097+
1facf9fc 19098+#ifdef CONFIG_AUFS_DEBUG
19099+ {Opt_list_plink, "list_plink"},
19100+#endif
19101+
19102+ {Opt_udba, "udba=%s"},
19103+
4a4d8108
AM
19104+ {Opt_dio, "dio"},
19105+ {Opt_nodio, "nodio"},
19106+
1facf9fc 19107+ {Opt_diropq_a, "diropq=always"},
19108+ {Opt_diropq_a, "diropq=a"},
19109+ {Opt_diropq_w, "diropq=whiteouted"},
19110+ {Opt_diropq_w, "diropq=w"},
19111+
19112+ {Opt_warn_perm, "warn_perm"},
19113+ {Opt_nowarn_perm, "nowarn_perm"},
19114+
19115+ /* keep them temporary */
19116+ {Opt_ignore_silent, "coo=%s"},
19117+ {Opt_ignore_silent, "nodlgt"},
19118+ {Opt_ignore_silent, "nodirperm1"},
1facf9fc 19119+ {Opt_ignore_silent, "clean_plink"},
19120+
dece6358
AM
19121+#ifdef CONFIG_AUFS_SHWH
19122+ {Opt_shwh, "shwh"},
19123+#endif
19124+ {Opt_noshwh, "noshwh"},
19125+
1facf9fc 19126+ {Opt_rendir, "rendir=%d"},
19127+
19128+ {Opt_refrof, "refrof"},
19129+ {Opt_norefrof, "norefrof"},
19130+
19131+ {Opt_verbose, "verbose"},
19132+ {Opt_verbose, "v"},
19133+ {Opt_noverbose, "noverbose"},
19134+ {Opt_noverbose, "quiet"},
19135+ {Opt_noverbose, "q"},
19136+ {Opt_noverbose, "silent"},
19137+
19138+ {Opt_sum, "sum"},
19139+ {Opt_nosum, "nosum"},
19140+ {Opt_wsum, "wsum"},
19141+
19142+ {Opt_rdcache, "rdcache=%d"},
19143+ {Opt_rdblk, "rdblk=%d"},
dece6358 19144+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 19145+ {Opt_rdhash, "rdhash=%d"},
dece6358 19146+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 19147+
19148+ {Opt_wbr_create, "create=%s"},
19149+ {Opt_wbr_create, "create_policy=%s"},
19150+ {Opt_wbr_copyup, "cpup=%s"},
19151+ {Opt_wbr_copyup, "copyup=%s"},
19152+ {Opt_wbr_copyup, "copyup_policy=%s"},
19153+
19154+ /* internal use for the scripts */
19155+ {Opt_ignore_silent, "si=%s"},
19156+
19157+ {Opt_br, "dirs=%s"},
19158+ {Opt_ignore, "debug=%d"},
19159+ {Opt_ignore, "delete=whiteout"},
19160+ {Opt_ignore, "delete=all"},
19161+ {Opt_ignore, "imap=%s"},
19162+
1308ab2a 19163+ /* temporary workaround, due to old mount(8)? */
19164+ {Opt_ignore_silent, "relatime"},
19165+
1facf9fc 19166+ {Opt_err, NULL}
19167+};
19168+
19169+/* ---------------------------------------------------------------------- */
19170+
19171+static const char *au_parser_pattern(int val, struct match_token *token)
19172+{
19173+ while (token->pattern) {
19174+ if (token->token == val)
19175+ return token->pattern;
19176+ token++;
19177+ }
19178+ BUG();
19179+ return "??";
19180+}
19181+
19182+/* ---------------------------------------------------------------------- */
19183+
19184+static match_table_t brperms = {
19185+ {AuBrPerm_RO, AUFS_BRPERM_RO},
19186+ {AuBrPerm_RR, AUFS_BRPERM_RR},
19187+ {AuBrPerm_RW, AUFS_BRPERM_RW},
19188+
19189+ {AuBrPerm_ROWH, AUFS_BRPERM_ROWH},
19190+ {AuBrPerm_RRWH, AUFS_BRPERM_RRWH},
19191+ {AuBrPerm_RWNoLinkWH, AUFS_BRPERM_RWNLWH},
19192+
19193+ {AuBrPerm_ROWH, "nfsro"},
19194+ {AuBrPerm_RO, NULL}
19195+};
19196+
4a4d8108 19197+static int noinline_for_stack br_perm_val(char *perm)
1facf9fc 19198+{
19199+ int val;
19200+ substring_t args[MAX_OPT_ARGS];
19201+
19202+ val = match_token(perm, brperms, args);
19203+ return val;
19204+}
19205+
19206+const char *au_optstr_br_perm(int brperm)
19207+{
19208+ return au_parser_pattern(brperm, (void *)brperms);
19209+}
19210+
19211+/* ---------------------------------------------------------------------- */
19212+
19213+static match_table_t udbalevel = {
19214+ {AuOpt_UDBA_REVAL, "reval"},
19215+ {AuOpt_UDBA_NONE, "none"},
4a4d8108
AM
19216+#ifdef CONFIG_AUFS_HNOTIFY
19217+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
19218+#ifdef CONFIG_AUFS_HFSNOTIFY
19219+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
4a4d8108 19220+#endif
1facf9fc 19221+#endif
19222+ {-1, NULL}
19223+};
19224+
4a4d8108 19225+static int noinline_for_stack udba_val(char *str)
1facf9fc 19226+{
19227+ substring_t args[MAX_OPT_ARGS];
19228+
7f207e10 19229+ return match_token(str, udbalevel, args);
1facf9fc 19230+}
19231+
19232+const char *au_optstr_udba(int udba)
19233+{
19234+ return au_parser_pattern(udba, (void *)udbalevel);
19235+}
19236+
19237+/* ---------------------------------------------------------------------- */
19238+
19239+static match_table_t au_wbr_create_policy = {
19240+ {AuWbrCreate_TDP, "tdp"},
19241+ {AuWbrCreate_TDP, "top-down-parent"},
19242+ {AuWbrCreate_RR, "rr"},
19243+ {AuWbrCreate_RR, "round-robin"},
19244+ {AuWbrCreate_MFS, "mfs"},
19245+ {AuWbrCreate_MFS, "most-free-space"},
19246+ {AuWbrCreate_MFSV, "mfs:%d"},
19247+ {AuWbrCreate_MFSV, "most-free-space:%d"},
19248+
19249+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
19250+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
19251+ {AuWbrCreate_PMFS, "pmfs"},
19252+ {AuWbrCreate_PMFSV, "pmfs:%d"},
19253+
19254+ {-1, NULL}
19255+};
19256+
dece6358
AM
19257+/*
19258+ * cf. linux/lib/parser.c and cmdline.c
19259+ * gave up calling memparse() since it uses simple_strtoull() instead of
19260+ * strict_...().
19261+ */
4a4d8108
AM
19262+static int noinline_for_stack
19263+au_match_ull(substring_t *s, unsigned long long *result)
1facf9fc 19264+{
19265+ int err;
19266+ unsigned int len;
19267+ char a[32];
19268+
19269+ err = -ERANGE;
19270+ len = s->to - s->from;
19271+ if (len + 1 <= sizeof(a)) {
19272+ memcpy(a, s->from, len);
19273+ a[len] = '\0';
19274+ err = strict_strtoull(a, 0, result);
19275+ }
19276+ return err;
19277+}
19278+
19279+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
19280+ struct au_opt_wbr_create *create)
19281+{
19282+ int err;
19283+ unsigned long long ull;
19284+
19285+ err = 0;
19286+ if (!au_match_ull(arg, &ull))
19287+ create->mfsrr_watermark = ull;
19288+ else {
4a4d8108 19289+ pr_err("bad integer in %s\n", str);
1facf9fc 19290+ err = -EINVAL;
19291+ }
19292+
19293+ return err;
19294+}
19295+
19296+static int au_wbr_mfs_sec(substring_t *arg, char *str,
19297+ struct au_opt_wbr_create *create)
19298+{
19299+ int n, err;
19300+
19301+ err = 0;
027c5e7a 19302+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
1facf9fc 19303+ create->mfs_second = n;
19304+ else {
4a4d8108 19305+ pr_err("bad integer in %s\n", str);
1facf9fc 19306+ err = -EINVAL;
19307+ }
19308+
19309+ return err;
19310+}
19311+
4a4d8108
AM
19312+static int noinline_for_stack
19313+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
1facf9fc 19314+{
19315+ int err, e;
19316+ substring_t args[MAX_OPT_ARGS];
19317+
19318+ err = match_token(str, au_wbr_create_policy, args);
19319+ create->wbr_create = err;
19320+ switch (err) {
19321+ case AuWbrCreate_MFSRRV:
19322+ e = au_wbr_mfs_wmark(&args[0], str, create);
19323+ if (!e)
19324+ e = au_wbr_mfs_sec(&args[1], str, create);
19325+ if (unlikely(e))
19326+ err = e;
19327+ break;
19328+ case AuWbrCreate_MFSRR:
19329+ e = au_wbr_mfs_wmark(&args[0], str, create);
19330+ if (unlikely(e)) {
19331+ err = e;
19332+ break;
19333+ }
19334+ /*FALLTHROUGH*/
19335+ case AuWbrCreate_MFS:
19336+ case AuWbrCreate_PMFS:
027c5e7a 19337+ create->mfs_second = AUFS_MFS_DEF_SEC;
1facf9fc 19338+ break;
19339+ case AuWbrCreate_MFSV:
19340+ case AuWbrCreate_PMFSV:
19341+ e = au_wbr_mfs_sec(&args[0], str, create);
19342+ if (unlikely(e))
19343+ err = e;
19344+ break;
19345+ }
19346+
19347+ return err;
19348+}
19349+
19350+const char *au_optstr_wbr_create(int wbr_create)
19351+{
19352+ return au_parser_pattern(wbr_create, (void *)au_wbr_create_policy);
19353+}
19354+
19355+static match_table_t au_wbr_copyup_policy = {
19356+ {AuWbrCopyup_TDP, "tdp"},
19357+ {AuWbrCopyup_TDP, "top-down-parent"},
19358+ {AuWbrCopyup_BUP, "bup"},
19359+ {AuWbrCopyup_BUP, "bottom-up-parent"},
19360+ {AuWbrCopyup_BU, "bu"},
19361+ {AuWbrCopyup_BU, "bottom-up"},
19362+ {-1, NULL}
19363+};
19364+
4a4d8108 19365+static int noinline_for_stack au_wbr_copyup_val(char *str)
1facf9fc 19366+{
19367+ substring_t args[MAX_OPT_ARGS];
19368+
19369+ return match_token(str, au_wbr_copyup_policy, args);
19370+}
19371+
19372+const char *au_optstr_wbr_copyup(int wbr_copyup)
19373+{
19374+ return au_parser_pattern(wbr_copyup, (void *)au_wbr_copyup_policy);
19375+}
19376+
19377+/* ---------------------------------------------------------------------- */
19378+
19379+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
19380+
19381+static void dump_opts(struct au_opts *opts)
19382+{
19383+#ifdef CONFIG_AUFS_DEBUG
19384+ /* reduce stack space */
19385+ union {
19386+ struct au_opt_add *add;
19387+ struct au_opt_del *del;
19388+ struct au_opt_mod *mod;
19389+ struct au_opt_xino *xino;
19390+ struct au_opt_xino_itrunc *xino_itrunc;
19391+ struct au_opt_wbr_create *create;
19392+ } u;
19393+ struct au_opt *opt;
19394+
19395+ opt = opts->opt;
19396+ while (opt->type != Opt_tail) {
19397+ switch (opt->type) {
19398+ case Opt_add:
19399+ u.add = &opt->add;
19400+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
19401+ u.add->bindex, u.add->pathname, u.add->perm,
19402+ u.add->path.dentry);
19403+ break;
19404+ case Opt_del:
19405+ case Opt_idel:
19406+ u.del = &opt->del;
19407+ AuDbg("del {%s, %p}\n",
19408+ u.del->pathname, u.del->h_path.dentry);
19409+ break;
19410+ case Opt_mod:
19411+ case Opt_imod:
19412+ u.mod = &opt->mod;
19413+ AuDbg("mod {%s, 0x%x, %p}\n",
19414+ u.mod->path, u.mod->perm, u.mod->h_root);
19415+ break;
19416+ case Opt_append:
19417+ u.add = &opt->add;
19418+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
19419+ u.add->bindex, u.add->pathname, u.add->perm,
19420+ u.add->path.dentry);
19421+ break;
19422+ case Opt_prepend:
19423+ u.add = &opt->add;
19424+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
19425+ u.add->bindex, u.add->pathname, u.add->perm,
19426+ u.add->path.dentry);
19427+ break;
19428+ case Opt_dirwh:
19429+ AuDbg("dirwh %d\n", opt->dirwh);
19430+ break;
19431+ case Opt_rdcache:
19432+ AuDbg("rdcache %d\n", opt->rdcache);
19433+ break;
19434+ case Opt_rdblk:
19435+ AuDbg("rdblk %u\n", opt->rdblk);
19436+ break;
dece6358
AM
19437+ case Opt_rdblk_def:
19438+ AuDbg("rdblk_def\n");
19439+ break;
1facf9fc 19440+ case Opt_rdhash:
19441+ AuDbg("rdhash %u\n", opt->rdhash);
19442+ break;
dece6358
AM
19443+ case Opt_rdhash_def:
19444+ AuDbg("rdhash_def\n");
19445+ break;
1facf9fc 19446+ case Opt_xino:
19447+ u.xino = &opt->xino;
19448+ AuDbg("xino {%s %.*s}\n",
19449+ u.xino->path,
19450+ AuDLNPair(u.xino->file->f_dentry));
19451+ break;
19452+ case Opt_trunc_xino:
19453+ AuLabel(trunc_xino);
19454+ break;
19455+ case Opt_notrunc_xino:
19456+ AuLabel(notrunc_xino);
19457+ break;
19458+ case Opt_trunc_xino_path:
19459+ case Opt_itrunc_xino:
19460+ u.xino_itrunc = &opt->xino_itrunc;
19461+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
19462+ break;
19463+
19464+ case Opt_noxino:
19465+ AuLabel(noxino);
19466+ break;
19467+ case Opt_trunc_xib:
19468+ AuLabel(trunc_xib);
19469+ break;
19470+ case Opt_notrunc_xib:
19471+ AuLabel(notrunc_xib);
19472+ break;
dece6358
AM
19473+ case Opt_shwh:
19474+ AuLabel(shwh);
19475+ break;
19476+ case Opt_noshwh:
19477+ AuLabel(noshwh);
19478+ break;
1facf9fc 19479+ case Opt_plink:
19480+ AuLabel(plink);
19481+ break;
19482+ case Opt_noplink:
19483+ AuLabel(noplink);
19484+ break;
19485+ case Opt_list_plink:
19486+ AuLabel(list_plink);
19487+ break;
19488+ case Opt_udba:
19489+ AuDbg("udba %d, %s\n",
19490+ opt->udba, au_optstr_udba(opt->udba));
19491+ break;
4a4d8108
AM
19492+ case Opt_dio:
19493+ AuLabel(dio);
19494+ break;
19495+ case Opt_nodio:
19496+ AuLabel(nodio);
19497+ break;
1facf9fc 19498+ case Opt_diropq_a:
19499+ AuLabel(diropq_a);
19500+ break;
19501+ case Opt_diropq_w:
19502+ AuLabel(diropq_w);
19503+ break;
19504+ case Opt_warn_perm:
19505+ AuLabel(warn_perm);
19506+ break;
19507+ case Opt_nowarn_perm:
19508+ AuLabel(nowarn_perm);
19509+ break;
19510+ case Opt_refrof:
19511+ AuLabel(refrof);
19512+ break;
19513+ case Opt_norefrof:
19514+ AuLabel(norefrof);
19515+ break;
19516+ case Opt_verbose:
19517+ AuLabel(verbose);
19518+ break;
19519+ case Opt_noverbose:
19520+ AuLabel(noverbose);
19521+ break;
19522+ case Opt_sum:
19523+ AuLabel(sum);
19524+ break;
19525+ case Opt_nosum:
19526+ AuLabel(nosum);
19527+ break;
19528+ case Opt_wsum:
19529+ AuLabel(wsum);
19530+ break;
19531+ case Opt_wbr_create:
19532+ u.create = &opt->wbr_create;
19533+ AuDbg("create %d, %s\n", u.create->wbr_create,
19534+ au_optstr_wbr_create(u.create->wbr_create));
19535+ switch (u.create->wbr_create) {
19536+ case AuWbrCreate_MFSV:
19537+ case AuWbrCreate_PMFSV:
19538+ AuDbg("%d sec\n", u.create->mfs_second);
19539+ break;
19540+ case AuWbrCreate_MFSRR:
19541+ AuDbg("%llu watermark\n",
19542+ u.create->mfsrr_watermark);
19543+ break;
19544+ case AuWbrCreate_MFSRRV:
19545+ AuDbg("%llu watermark, %d sec\n",
19546+ u.create->mfsrr_watermark,
19547+ u.create->mfs_second);
19548+ break;
19549+ }
19550+ break;
19551+ case Opt_wbr_copyup:
19552+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
19553+ au_optstr_wbr_copyup(opt->wbr_copyup));
19554+ break;
19555+ default:
19556+ BUG();
19557+ }
19558+ opt++;
19559+ }
19560+#endif
19561+}
19562+
19563+void au_opts_free(struct au_opts *opts)
19564+{
19565+ struct au_opt *opt;
19566+
19567+ opt = opts->opt;
19568+ while (opt->type != Opt_tail) {
19569+ switch (opt->type) {
19570+ case Opt_add:
19571+ case Opt_append:
19572+ case Opt_prepend:
19573+ path_put(&opt->add.path);
19574+ break;
19575+ case Opt_del:
19576+ case Opt_idel:
19577+ path_put(&opt->del.h_path);
19578+ break;
19579+ case Opt_mod:
19580+ case Opt_imod:
19581+ dput(opt->mod.h_root);
19582+ break;
19583+ case Opt_xino:
19584+ fput(opt->xino.file);
19585+ break;
19586+ }
19587+ opt++;
19588+ }
19589+}
19590+
19591+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
19592+ aufs_bindex_t bindex)
19593+{
19594+ int err;
19595+ struct au_opt_add *add = &opt->add;
19596+ char *p;
19597+
19598+ add->bindex = bindex;
19599+ add->perm = AuBrPerm_Last;
19600+ add->pathname = opt_str;
19601+ p = strchr(opt_str, '=');
19602+ if (p) {
19603+ *p++ = 0;
19604+ if (*p)
19605+ add->perm = br_perm_val(p);
19606+ }
19607+
19608+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
19609+ if (!err) {
19610+ if (!p) {
19611+ add->perm = AuBrPerm_RO;
19612+ if (au_test_fs_rr(add->path.dentry->d_sb))
19613+ add->perm = AuBrPerm_RR;
19614+ else if (!bindex && !(sb_flags & MS_RDONLY))
19615+ add->perm = AuBrPerm_RW;
19616+ }
19617+ opt->type = Opt_add;
19618+ goto out;
19619+ }
4a4d8108 19620+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
1facf9fc 19621+ err = -EINVAL;
19622+
4f0767ce 19623+out:
1facf9fc 19624+ return err;
19625+}
19626+
19627+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
19628+{
19629+ int err;
19630+
19631+ del->pathname = args[0].from;
19632+ AuDbg("del path %s\n", del->pathname);
19633+
19634+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
19635+ if (unlikely(err))
4a4d8108 19636+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
1facf9fc 19637+
19638+ return err;
19639+}
19640+
19641+#if 0 /* reserved for future use */
19642+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
19643+ struct au_opt_del *del, substring_t args[])
19644+{
19645+ int err;
19646+ struct dentry *root;
19647+
19648+ err = -EINVAL;
19649+ root = sb->s_root;
19650+ aufs_read_lock(root, AuLock_FLUSH);
19651+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 19652+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 19653+ goto out;
19654+ }
19655+
19656+ err = 0;
19657+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
19658+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
19659+
4f0767ce 19660+out:
1facf9fc 19661+ aufs_read_unlock(root, !AuLock_IR);
19662+ return err;
19663+}
19664+#endif
19665+
4a4d8108
AM
19666+static int noinline_for_stack
19667+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
1facf9fc 19668+{
19669+ int err;
19670+ struct path path;
19671+ char *p;
19672+
19673+ err = -EINVAL;
19674+ mod->path = args[0].from;
19675+ p = strchr(mod->path, '=');
19676+ if (unlikely(!p)) {
4a4d8108 19677+ pr_err("no permssion %s\n", args[0].from);
1facf9fc 19678+ goto out;
19679+ }
19680+
19681+ *p++ = 0;
19682+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
19683+ if (unlikely(err)) {
4a4d8108 19684+ pr_err("lookup failed %s (%d)\n", mod->path, err);
1facf9fc 19685+ goto out;
19686+ }
19687+
19688+ mod->perm = br_perm_val(p);
19689+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
19690+ mod->h_root = dget(path.dentry);
19691+ path_put(&path);
19692+
4f0767ce 19693+out:
1facf9fc 19694+ return err;
19695+}
19696+
19697+#if 0 /* reserved for future use */
19698+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
19699+ struct au_opt_mod *mod, substring_t args[])
19700+{
19701+ int err;
19702+ struct dentry *root;
19703+
19704+ err = -EINVAL;
19705+ root = sb->s_root;
19706+ aufs_read_lock(root, AuLock_FLUSH);
19707+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 19708+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 19709+ goto out;
19710+ }
19711+
19712+ err = 0;
19713+ mod->perm = br_perm_val(args[1].from);
19714+ AuDbg("mod path %s, perm 0x%x, %s\n",
19715+ mod->path, mod->perm, args[1].from);
19716+ mod->h_root = dget(au_h_dptr(root, bindex));
19717+
4f0767ce 19718+out:
1facf9fc 19719+ aufs_read_unlock(root, !AuLock_IR);
19720+ return err;
19721+}
19722+#endif
19723+
19724+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
19725+ substring_t args[])
19726+{
19727+ int err;
19728+ struct file *file;
19729+
19730+ file = au_xino_create(sb, args[0].from, /*silent*/0);
19731+ err = PTR_ERR(file);
19732+ if (IS_ERR(file))
19733+ goto out;
19734+
19735+ err = -EINVAL;
19736+ if (unlikely(file->f_dentry->d_sb == sb)) {
19737+ fput(file);
4a4d8108 19738+ pr_err("%s must be outside\n", args[0].from);
1facf9fc 19739+ goto out;
19740+ }
19741+
19742+ err = 0;
19743+ xino->file = file;
19744+ xino->path = args[0].from;
19745+
4f0767ce 19746+out:
1facf9fc 19747+ return err;
19748+}
19749+
4a4d8108
AM
19750+static int noinline_for_stack
19751+au_opts_parse_xino_itrunc_path(struct super_block *sb,
19752+ struct au_opt_xino_itrunc *xino_itrunc,
19753+ substring_t args[])
1facf9fc 19754+{
19755+ int err;
19756+ aufs_bindex_t bend, bindex;
19757+ struct path path;
19758+ struct dentry *root;
19759+
19760+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
19761+ if (unlikely(err)) {
4a4d8108 19762+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
1facf9fc 19763+ goto out;
19764+ }
19765+
19766+ xino_itrunc->bindex = -1;
19767+ root = sb->s_root;
19768+ aufs_read_lock(root, AuLock_FLUSH);
19769+ bend = au_sbend(sb);
19770+ for (bindex = 0; bindex <= bend; bindex++) {
19771+ if (au_h_dptr(root, bindex) == path.dentry) {
19772+ xino_itrunc->bindex = bindex;
19773+ break;
19774+ }
19775+ }
19776+ aufs_read_unlock(root, !AuLock_IR);
19777+ path_put(&path);
19778+
19779+ if (unlikely(xino_itrunc->bindex < 0)) {
4a4d8108 19780+ pr_err("no such branch %s\n", args[0].from);
1facf9fc 19781+ err = -EINVAL;
19782+ }
19783+
4f0767ce 19784+out:
1facf9fc 19785+ return err;
19786+}
19787+
19788+/* called without aufs lock */
19789+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
19790+{
19791+ int err, n, token;
19792+ aufs_bindex_t bindex;
19793+ unsigned char skipped;
19794+ struct dentry *root;
19795+ struct au_opt *opt, *opt_tail;
19796+ char *opt_str;
19797+ /* reduce the stack space */
19798+ union {
19799+ struct au_opt_xino_itrunc *xino_itrunc;
19800+ struct au_opt_wbr_create *create;
19801+ } u;
19802+ struct {
19803+ substring_t args[MAX_OPT_ARGS];
19804+ } *a;
19805+
19806+ err = -ENOMEM;
19807+ a = kmalloc(sizeof(*a), GFP_NOFS);
19808+ if (unlikely(!a))
19809+ goto out;
19810+
19811+ root = sb->s_root;
19812+ err = 0;
19813+ bindex = 0;
19814+ opt = opts->opt;
19815+ opt_tail = opt + opts->max_opt - 1;
19816+ opt->type = Opt_tail;
19817+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
19818+ err = -EINVAL;
19819+ skipped = 0;
19820+ token = match_token(opt_str, options, a->args);
19821+ switch (token) {
19822+ case Opt_br:
19823+ err = 0;
19824+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
19825+ && *opt_str) {
19826+ err = opt_add(opt, opt_str, opts->sb_flags,
19827+ bindex++);
19828+ if (unlikely(!err && ++opt > opt_tail)) {
19829+ err = -E2BIG;
19830+ break;
19831+ }
19832+ opt->type = Opt_tail;
19833+ skipped = 1;
19834+ }
19835+ break;
19836+ case Opt_add:
19837+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 19838+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19839+ break;
19840+ }
19841+ bindex = n;
19842+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
19843+ bindex);
19844+ if (!err)
19845+ opt->type = token;
19846+ break;
19847+ case Opt_append:
19848+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
19849+ /*dummy bindex*/1);
19850+ if (!err)
19851+ opt->type = token;
19852+ break;
19853+ case Opt_prepend:
19854+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
19855+ /*bindex*/0);
19856+ if (!err)
19857+ opt->type = token;
19858+ break;
19859+ case Opt_del:
19860+ err = au_opts_parse_del(&opt->del, a->args);
19861+ if (!err)
19862+ opt->type = token;
19863+ break;
19864+#if 0 /* reserved for future use */
19865+ case Opt_idel:
19866+ del->pathname = "(indexed)";
19867+ if (unlikely(match_int(&args[0], &n))) {
4a4d8108 19868+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19869+ break;
19870+ }
19871+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
19872+ if (!err)
19873+ opt->type = token;
19874+ break;
19875+#endif
19876+ case Opt_mod:
19877+ err = au_opts_parse_mod(&opt->mod, a->args);
19878+ if (!err)
19879+ opt->type = token;
19880+ break;
19881+#ifdef IMOD /* reserved for future use */
19882+ case Opt_imod:
19883+ u.mod->path = "(indexed)";
19884+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 19885+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19886+ break;
19887+ }
19888+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
19889+ if (!err)
19890+ opt->type = token;
19891+ break;
19892+#endif
19893+ case Opt_xino:
19894+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
19895+ if (!err)
19896+ opt->type = token;
19897+ break;
19898+
19899+ case Opt_trunc_xino_path:
19900+ err = au_opts_parse_xino_itrunc_path
19901+ (sb, &opt->xino_itrunc, a->args);
19902+ if (!err)
19903+ opt->type = token;
19904+ break;
19905+
19906+ case Opt_itrunc_xino:
19907+ u.xino_itrunc = &opt->xino_itrunc;
19908+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 19909+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19910+ break;
19911+ }
19912+ u.xino_itrunc->bindex = n;
19913+ aufs_read_lock(root, AuLock_FLUSH);
19914+ if (n < 0 || au_sbend(sb) < n) {
4a4d8108 19915+ pr_err("out of bounds, %d\n", n);
1facf9fc 19916+ aufs_read_unlock(root, !AuLock_IR);
19917+ break;
19918+ }
19919+ aufs_read_unlock(root, !AuLock_IR);
19920+ err = 0;
19921+ opt->type = token;
19922+ break;
19923+
19924+ case Opt_dirwh:
19925+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
19926+ break;
19927+ err = 0;
19928+ opt->type = token;
19929+ break;
19930+
19931+ case Opt_rdcache:
027c5e7a
AM
19932+ if (unlikely(match_int(&a->args[0], &n))) {
19933+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19934+ break;
027c5e7a
AM
19935+ }
19936+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
19937+ pr_err("rdcache must be smaller than %d\n",
19938+ AUFS_RDCACHE_MAX);
19939+ break;
19940+ }
19941+ opt->rdcache = n;
1facf9fc 19942+ err = 0;
19943+ opt->type = token;
19944+ break;
19945+ case Opt_rdblk:
19946+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 19947+ || n < 0
1facf9fc 19948+ || n > KMALLOC_MAX_SIZE)) {
4a4d8108 19949+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19950+ break;
19951+ }
1308ab2a 19952+ if (unlikely(n && n < NAME_MAX)) {
4a4d8108
AM
19953+ pr_err("rdblk must be larger than %d\n",
19954+ NAME_MAX);
1facf9fc 19955+ break;
19956+ }
19957+ opt->rdblk = n;
19958+ err = 0;
19959+ opt->type = token;
19960+ break;
19961+ case Opt_rdhash:
19962+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 19963+ || n < 0
1facf9fc 19964+ || n * sizeof(struct hlist_head)
19965+ > KMALLOC_MAX_SIZE)) {
4a4d8108 19966+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19967+ break;
19968+ }
19969+ opt->rdhash = n;
19970+ err = 0;
19971+ opt->type = token;
19972+ break;
19973+
19974+ case Opt_trunc_xino:
19975+ case Opt_notrunc_xino:
19976+ case Opt_noxino:
19977+ case Opt_trunc_xib:
19978+ case Opt_notrunc_xib:
dece6358
AM
19979+ case Opt_shwh:
19980+ case Opt_noshwh:
1facf9fc 19981+ case Opt_plink:
19982+ case Opt_noplink:
19983+ case Opt_list_plink:
4a4d8108
AM
19984+ case Opt_dio:
19985+ case Opt_nodio:
1facf9fc 19986+ case Opt_diropq_a:
19987+ case Opt_diropq_w:
19988+ case Opt_warn_perm:
19989+ case Opt_nowarn_perm:
19990+ case Opt_refrof:
19991+ case Opt_norefrof:
19992+ case Opt_verbose:
19993+ case Opt_noverbose:
19994+ case Opt_sum:
19995+ case Opt_nosum:
19996+ case Opt_wsum:
dece6358
AM
19997+ case Opt_rdblk_def:
19998+ case Opt_rdhash_def:
1facf9fc 19999+ err = 0;
20000+ opt->type = token;
20001+ break;
20002+
20003+ case Opt_udba:
20004+ opt->udba = udba_val(a->args[0].from);
20005+ if (opt->udba >= 0) {
20006+ err = 0;
20007+ opt->type = token;
20008+ } else
4a4d8108 20009+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20010+ break;
20011+
20012+ case Opt_wbr_create:
20013+ u.create = &opt->wbr_create;
20014+ u.create->wbr_create
20015+ = au_wbr_create_val(a->args[0].from, u.create);
20016+ if (u.create->wbr_create >= 0) {
20017+ err = 0;
20018+ opt->type = token;
20019+ } else
4a4d8108 20020+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20021+ break;
20022+ case Opt_wbr_copyup:
20023+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
20024+ if (opt->wbr_copyup >= 0) {
20025+ err = 0;
20026+ opt->type = token;
20027+ } else
4a4d8108 20028+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20029+ break;
20030+
20031+ case Opt_ignore:
4a4d8108 20032+ pr_warning("ignored %s\n", opt_str);
1facf9fc 20033+ /*FALLTHROUGH*/
20034+ case Opt_ignore_silent:
20035+ skipped = 1;
20036+ err = 0;
20037+ break;
20038+ case Opt_err:
4a4d8108 20039+ pr_err("unknown option %s\n", opt_str);
1facf9fc 20040+ break;
20041+ }
20042+
20043+ if (!err && !skipped) {
20044+ if (unlikely(++opt > opt_tail)) {
20045+ err = -E2BIG;
20046+ opt--;
20047+ opt->type = Opt_tail;
20048+ break;
20049+ }
20050+ opt->type = Opt_tail;
20051+ }
20052+ }
20053+
20054+ kfree(a);
20055+ dump_opts(opts);
20056+ if (unlikely(err))
20057+ au_opts_free(opts);
20058+
4f0767ce 20059+out:
1facf9fc 20060+ return err;
20061+}
20062+
20063+static int au_opt_wbr_create(struct super_block *sb,
20064+ struct au_opt_wbr_create *create)
20065+{
20066+ int err;
20067+ struct au_sbinfo *sbinfo;
20068+
dece6358
AM
20069+ SiMustWriteLock(sb);
20070+
1facf9fc 20071+ err = 1; /* handled */
20072+ sbinfo = au_sbi(sb);
20073+ if (sbinfo->si_wbr_create_ops->fin) {
20074+ err = sbinfo->si_wbr_create_ops->fin(sb);
20075+ if (!err)
20076+ err = 1;
20077+ }
20078+
20079+ sbinfo->si_wbr_create = create->wbr_create;
20080+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
20081+ switch (create->wbr_create) {
20082+ case AuWbrCreate_MFSRRV:
20083+ case AuWbrCreate_MFSRR:
20084+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
20085+ /*FALLTHROUGH*/
20086+ case AuWbrCreate_MFS:
20087+ case AuWbrCreate_MFSV:
20088+ case AuWbrCreate_PMFS:
20089+ case AuWbrCreate_PMFSV:
e49829fe
JR
20090+ sbinfo->si_wbr_mfs.mfs_expire
20091+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
1facf9fc 20092+ break;
20093+ }
20094+
20095+ if (sbinfo->si_wbr_create_ops->init)
20096+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
20097+
20098+ return err;
20099+}
20100+
20101+/*
20102+ * returns,
20103+ * plus: processed without an error
20104+ * zero: unprocessed
20105+ */
20106+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
20107+ struct au_opts *opts)
20108+{
20109+ int err;
20110+ struct au_sbinfo *sbinfo;
20111+
dece6358
AM
20112+ SiMustWriteLock(sb);
20113+
1facf9fc 20114+ err = 1; /* handled */
20115+ sbinfo = au_sbi(sb);
20116+ switch (opt->type) {
20117+ case Opt_udba:
20118+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
20119+ sbinfo->si_mntflags |= opt->udba;
20120+ opts->given_udba |= opt->udba;
20121+ break;
20122+
20123+ case Opt_plink:
20124+ au_opt_set(sbinfo->si_mntflags, PLINK);
20125+ break;
20126+ case Opt_noplink:
20127+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
e49829fe 20128+ au_plink_put(sb, /*verbose*/1);
1facf9fc 20129+ au_opt_clr(sbinfo->si_mntflags, PLINK);
20130+ break;
20131+ case Opt_list_plink:
20132+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
20133+ au_plink_list(sb);
20134+ break;
20135+
4a4d8108
AM
20136+ case Opt_dio:
20137+ au_opt_set(sbinfo->si_mntflags, DIO);
20138+ au_fset_opts(opts->flags, REFRESH_DYAOP);
20139+ break;
20140+ case Opt_nodio:
20141+ au_opt_clr(sbinfo->si_mntflags, DIO);
20142+ au_fset_opts(opts->flags, REFRESH_DYAOP);
20143+ break;
20144+
1facf9fc 20145+ case Opt_diropq_a:
20146+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
20147+ break;
20148+ case Opt_diropq_w:
20149+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
20150+ break;
20151+
20152+ case Opt_warn_perm:
20153+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
20154+ break;
20155+ case Opt_nowarn_perm:
20156+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
20157+ break;
20158+
20159+ case Opt_refrof:
20160+ au_opt_set(sbinfo->si_mntflags, REFROF);
20161+ break;
20162+ case Opt_norefrof:
20163+ au_opt_clr(sbinfo->si_mntflags, REFROF);
20164+ break;
20165+
20166+ case Opt_verbose:
20167+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
20168+ break;
20169+ case Opt_noverbose:
20170+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
20171+ break;
20172+
20173+ case Opt_sum:
20174+ au_opt_set(sbinfo->si_mntflags, SUM);
20175+ break;
20176+ case Opt_wsum:
20177+ au_opt_clr(sbinfo->si_mntflags, SUM);
20178+ au_opt_set(sbinfo->si_mntflags, SUM_W);
20179+ case Opt_nosum:
20180+ au_opt_clr(sbinfo->si_mntflags, SUM);
20181+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
20182+ break;
20183+
20184+ case Opt_wbr_create:
20185+ err = au_opt_wbr_create(sb, &opt->wbr_create);
20186+ break;
20187+ case Opt_wbr_copyup:
20188+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
20189+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
20190+ break;
20191+
20192+ case Opt_dirwh:
20193+ sbinfo->si_dirwh = opt->dirwh;
20194+ break;
20195+
20196+ case Opt_rdcache:
e49829fe
JR
20197+ sbinfo->si_rdcache
20198+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
1facf9fc 20199+ break;
20200+ case Opt_rdblk:
20201+ sbinfo->si_rdblk = opt->rdblk;
20202+ break;
dece6358
AM
20203+ case Opt_rdblk_def:
20204+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
20205+ break;
1facf9fc 20206+ case Opt_rdhash:
20207+ sbinfo->si_rdhash = opt->rdhash;
20208+ break;
dece6358
AM
20209+ case Opt_rdhash_def:
20210+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
20211+ break;
20212+
20213+ case Opt_shwh:
20214+ au_opt_set(sbinfo->si_mntflags, SHWH);
20215+ break;
20216+ case Opt_noshwh:
20217+ au_opt_clr(sbinfo->si_mntflags, SHWH);
20218+ break;
1facf9fc 20219+
20220+ case Opt_trunc_xino:
20221+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
20222+ break;
20223+ case Opt_notrunc_xino:
20224+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
20225+ break;
20226+
20227+ case Opt_trunc_xino_path:
20228+ case Opt_itrunc_xino:
20229+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
20230+ if (!err)
20231+ err = 1;
20232+ break;
20233+
20234+ case Opt_trunc_xib:
20235+ au_fset_opts(opts->flags, TRUNC_XIB);
20236+ break;
20237+ case Opt_notrunc_xib:
20238+ au_fclr_opts(opts->flags, TRUNC_XIB);
20239+ break;
20240+
20241+ default:
20242+ err = 0;
20243+ break;
20244+ }
20245+
20246+ return err;
20247+}
20248+
20249+/*
20250+ * returns tri-state.
20251+ * plus: processed without an error
20252+ * zero: unprocessed
20253+ * minus: error
20254+ */
20255+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
20256+ struct au_opts *opts)
20257+{
20258+ int err, do_refresh;
20259+
20260+ err = 0;
20261+ switch (opt->type) {
20262+ case Opt_append:
20263+ opt->add.bindex = au_sbend(sb) + 1;
20264+ if (opt->add.bindex < 0)
20265+ opt->add.bindex = 0;
20266+ goto add;
20267+ case Opt_prepend:
20268+ opt->add.bindex = 0;
20269+ add:
20270+ case Opt_add:
20271+ err = au_br_add(sb, &opt->add,
20272+ au_ftest_opts(opts->flags, REMOUNT));
20273+ if (!err) {
20274+ err = 1;
027c5e7a 20275+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20276+ }
20277+ break;
20278+
20279+ case Opt_del:
20280+ case Opt_idel:
20281+ err = au_br_del(sb, &opt->del,
20282+ au_ftest_opts(opts->flags, REMOUNT));
20283+ if (!err) {
20284+ err = 1;
20285+ au_fset_opts(opts->flags, TRUNC_XIB);
027c5e7a 20286+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20287+ }
20288+ break;
20289+
20290+ case Opt_mod:
20291+ case Opt_imod:
20292+ err = au_br_mod(sb, &opt->mod,
20293+ au_ftest_opts(opts->flags, REMOUNT),
20294+ &do_refresh);
20295+ if (!err) {
20296+ err = 1;
027c5e7a
AM
20297+ if (do_refresh)
20298+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20299+ }
20300+ break;
20301+ }
20302+
20303+ return err;
20304+}
20305+
20306+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
20307+ struct au_opt_xino **opt_xino,
20308+ struct au_opts *opts)
20309+{
20310+ int err;
20311+ aufs_bindex_t bend, bindex;
20312+ struct dentry *root, *parent, *h_root;
20313+
20314+ err = 0;
20315+ switch (opt->type) {
20316+ case Opt_xino:
20317+ err = au_xino_set(sb, &opt->xino,
20318+ !!au_ftest_opts(opts->flags, REMOUNT));
20319+ if (unlikely(err))
20320+ break;
20321+
20322+ *opt_xino = &opt->xino;
20323+ au_xino_brid_set(sb, -1);
20324+
20325+ /* safe d_parent access */
20326+ parent = opt->xino.file->f_dentry->d_parent;
20327+ root = sb->s_root;
20328+ bend = au_sbend(sb);
20329+ for (bindex = 0; bindex <= bend; bindex++) {
20330+ h_root = au_h_dptr(root, bindex);
20331+ if (h_root == parent) {
20332+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
20333+ break;
20334+ }
20335+ }
20336+ break;
20337+
20338+ case Opt_noxino:
20339+ au_xino_clr(sb);
20340+ au_xino_brid_set(sb, -1);
20341+ *opt_xino = (void *)-1;
20342+ break;
20343+ }
20344+
20345+ return err;
20346+}
20347+
20348+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
20349+ unsigned int pending)
20350+{
20351+ int err;
20352+ aufs_bindex_t bindex, bend;
20353+ unsigned char do_plink, skip, do_free;
20354+ struct au_branch *br;
20355+ struct au_wbr *wbr;
20356+ struct dentry *root;
20357+ struct inode *dir, *h_dir;
20358+ struct au_sbinfo *sbinfo;
20359+ struct au_hinode *hdir;
20360+
dece6358
AM
20361+ SiMustAnyLock(sb);
20362+
1facf9fc 20363+ sbinfo = au_sbi(sb);
20364+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
20365+
dece6358
AM
20366+ if (!(sb_flags & MS_RDONLY)) {
20367+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
4a4d8108 20368+ pr_warning("first branch should be rw\n");
dece6358 20369+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
4a4d8108 20370+ pr_warning("shwh should be used with ro\n");
dece6358 20371+ }
1facf9fc 20372+
4a4d8108 20373+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
1facf9fc 20374+ && !au_opt_test(sbinfo->si_mntflags, XINO))
4a4d8108 20375+ pr_warning("udba=*notify requires xino\n");
1facf9fc 20376+
20377+ err = 0;
20378+ root = sb->s_root;
4a4d8108 20379+ dir = root->d_inode;
1facf9fc 20380+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
20381+ bend = au_sbend(sb);
20382+ for (bindex = 0; !err && bindex <= bend; bindex++) {
20383+ skip = 0;
20384+ h_dir = au_h_iptr(dir, bindex);
20385+ br = au_sbr(sb, bindex);
20386+ do_free = 0;
20387+
20388+ wbr = br->br_wbr;
20389+ if (wbr)
20390+ wbr_wh_read_lock(wbr);
20391+
20392+ switch (br->br_perm) {
20393+ case AuBrPerm_RO:
20394+ case AuBrPerm_ROWH:
20395+ case AuBrPerm_RR:
20396+ case AuBrPerm_RRWH:
20397+ do_free = !!wbr;
20398+ skip = (!wbr
20399+ || (!wbr->wbr_whbase
20400+ && !wbr->wbr_plink
20401+ && !wbr->wbr_orph));
20402+ break;
20403+
20404+ case AuBrPerm_RWNoLinkWH:
20405+ /* skip = (!br->br_whbase && !br->br_orph); */
20406+ skip = (!wbr || !wbr->wbr_whbase);
20407+ if (skip && wbr) {
20408+ if (do_plink)
20409+ skip = !!wbr->wbr_plink;
20410+ else
20411+ skip = !wbr->wbr_plink;
20412+ }
20413+ break;
20414+
20415+ case AuBrPerm_RW:
20416+ /* skip = (br->br_whbase && br->br_ohph); */
20417+ skip = (wbr && wbr->wbr_whbase);
20418+ if (skip) {
20419+ if (do_plink)
20420+ skip = !!wbr->wbr_plink;
20421+ else
20422+ skip = !wbr->wbr_plink;
20423+ }
20424+ break;
20425+
20426+ default:
20427+ BUG();
20428+ }
20429+ if (wbr)
20430+ wbr_wh_read_unlock(wbr);
20431+
20432+ if (skip)
20433+ continue;
20434+
20435+ hdir = au_hi(dir, bindex);
4a4d8108 20436+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 20437+ if (wbr)
20438+ wbr_wh_write_lock(wbr);
20439+ err = au_wh_init(au_h_dptr(root, bindex), br, sb);
20440+ if (wbr)
20441+ wbr_wh_write_unlock(wbr);
4a4d8108 20442+ au_hn_imtx_unlock(hdir);
1facf9fc 20443+
20444+ if (!err && do_free) {
20445+ kfree(wbr);
20446+ br->br_wbr = NULL;
20447+ }
20448+ }
20449+
20450+ return err;
20451+}
20452+
20453+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
20454+{
20455+ int err;
20456+ unsigned int tmp;
027c5e7a 20457+ aufs_bindex_t bindex, bend;
1facf9fc 20458+ struct au_opt *opt;
20459+ struct au_opt_xino *opt_xino, xino;
20460+ struct au_sbinfo *sbinfo;
027c5e7a 20461+ struct au_branch *br;
1facf9fc 20462+
dece6358
AM
20463+ SiMustWriteLock(sb);
20464+
1facf9fc 20465+ err = 0;
20466+ opt_xino = NULL;
20467+ opt = opts->opt;
20468+ while (err >= 0 && opt->type != Opt_tail)
20469+ err = au_opt_simple(sb, opt++, opts);
20470+ if (err > 0)
20471+ err = 0;
20472+ else if (unlikely(err < 0))
20473+ goto out;
20474+
20475+ /* disable xino and udba temporary */
20476+ sbinfo = au_sbi(sb);
20477+ tmp = sbinfo->si_mntflags;
20478+ au_opt_clr(sbinfo->si_mntflags, XINO);
20479+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
20480+
20481+ opt = opts->opt;
20482+ while (err >= 0 && opt->type != Opt_tail)
20483+ err = au_opt_br(sb, opt++, opts);
20484+ if (err > 0)
20485+ err = 0;
20486+ else if (unlikely(err < 0))
20487+ goto out;
20488+
20489+ bend = au_sbend(sb);
20490+ if (unlikely(bend < 0)) {
20491+ err = -EINVAL;
4a4d8108 20492+ pr_err("no branches\n");
1facf9fc 20493+ goto out;
20494+ }
20495+
20496+ if (au_opt_test(tmp, XINO))
20497+ au_opt_set(sbinfo->si_mntflags, XINO);
20498+ opt = opts->opt;
20499+ while (!err && opt->type != Opt_tail)
20500+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
20501+ if (unlikely(err))
20502+ goto out;
20503+
20504+ err = au_opts_verify(sb, sb->s_flags, tmp);
20505+ if (unlikely(err))
20506+ goto out;
20507+
20508+ /* restore xino */
20509+ if (au_opt_test(tmp, XINO) && !opt_xino) {
20510+ xino.file = au_xino_def(sb);
20511+ err = PTR_ERR(xino.file);
20512+ if (IS_ERR(xino.file))
20513+ goto out;
20514+
20515+ err = au_xino_set(sb, &xino, /*remount*/0);
20516+ fput(xino.file);
20517+ if (unlikely(err))
20518+ goto out;
20519+ }
20520+
20521+ /* restore udba */
027c5e7a 20522+ tmp &= AuOptMask_UDBA;
1facf9fc 20523+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
027c5e7a
AM
20524+ sbinfo->si_mntflags |= tmp;
20525+ bend = au_sbend(sb);
20526+ for (bindex = 0; bindex <= bend; bindex++) {
20527+ br = au_sbr(sb, bindex);
20528+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
20529+ if (unlikely(err))
20530+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
20531+ bindex, err);
20532+ /* go on even if err */
20533+ }
4a4d8108 20534+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
1facf9fc 20535+ struct inode *dir = sb->s_root->d_inode;
4a4d8108 20536+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
1facf9fc 20537+ }
20538+
4f0767ce 20539+out:
1facf9fc 20540+ return err;
20541+}
20542+
20543+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
20544+{
20545+ int err, rerr;
20546+ struct inode *dir;
20547+ struct au_opt_xino *opt_xino;
20548+ struct au_opt *opt;
20549+ struct au_sbinfo *sbinfo;
20550+
dece6358
AM
20551+ SiMustWriteLock(sb);
20552+
1facf9fc 20553+ dir = sb->s_root->d_inode;
20554+ sbinfo = au_sbi(sb);
20555+ err = 0;
20556+ opt_xino = NULL;
20557+ opt = opts->opt;
20558+ while (err >= 0 && opt->type != Opt_tail) {
20559+ err = au_opt_simple(sb, opt, opts);
20560+ if (!err)
20561+ err = au_opt_br(sb, opt, opts);
20562+ if (!err)
20563+ err = au_opt_xino(sb, opt, &opt_xino, opts);
20564+ opt++;
20565+ }
20566+ if (err > 0)
20567+ err = 0;
20568+ AuTraceErr(err);
20569+ /* go on even err */
20570+
20571+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
20572+ if (unlikely(rerr && !err))
20573+ err = rerr;
20574+
20575+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
20576+ rerr = au_xib_trunc(sb);
20577+ if (unlikely(rerr && !err))
20578+ err = rerr;
20579+ }
20580+
20581+ /* will be handled by the caller */
027c5e7a 20582+ if (!au_ftest_opts(opts->flags, REFRESH)
1facf9fc 20583+ && (opts->given_udba || au_opt_test(sbinfo->si_mntflags, XINO)))
027c5e7a 20584+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20585+
20586+ AuDbg("status 0x%x\n", opts->flags);
20587+ return err;
20588+}
20589+
20590+/* ---------------------------------------------------------------------- */
20591+
20592+unsigned int au_opt_udba(struct super_block *sb)
20593+{
20594+ return au_mntflags(sb) & AuOptMask_UDBA;
20595+}
7f207e10
AM
20596diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
20597--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
53392da6 20598+++ linux/fs/aufs/opts.h 2011-08-24 13:30:24.734646739 +0200
027c5e7a 20599@@ -0,0 +1,210 @@
1facf9fc 20600+/*
027c5e7a 20601+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 20602+ *
20603+ * This program, aufs is free software; you can redistribute it and/or modify
20604+ * it under the terms of the GNU General Public License as published by
20605+ * the Free Software Foundation; either version 2 of the License, or
20606+ * (at your option) any later version.
dece6358
AM
20607+ *
20608+ * This program is distributed in the hope that it will be useful,
20609+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20610+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20611+ * GNU General Public License for more details.
20612+ *
20613+ * You should have received a copy of the GNU General Public License
20614+ * along with this program; if not, write to the Free Software
20615+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20616+ */
20617+
20618+/*
20619+ * mount options/flags
20620+ */
20621+
20622+#ifndef __AUFS_OPTS_H__
20623+#define __AUFS_OPTS_H__
20624+
20625+#ifdef __KERNEL__
20626+
dece6358 20627+#include <linux/path.h>
1facf9fc 20628+#include <linux/aufs_type.h>
20629+
dece6358
AM
20630+struct file;
20631+struct super_block;
20632+
1facf9fc 20633+/* ---------------------------------------------------------------------- */
20634+
20635+/* mount flags */
20636+#define AuOpt_XINO 1 /* external inode number bitmap
20637+ and translation table */
20638+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
20639+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
20640+#define AuOpt_UDBA_REVAL (1 << 3)
4a4d8108 20641+#define AuOpt_UDBA_HNOTIFY (1 << 4)
dece6358
AM
20642+#define AuOpt_SHWH (1 << 5) /* show whiteout */
20643+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
20644+#define AuOpt_DIRPERM1 (1 << 7) /* unimplemented */
20645+#define AuOpt_REFROF (1 << 8) /* unimplemented */
20646+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
20647+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
20648+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
20649+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
20650+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
4a4d8108 20651+#define AuOpt_DIO (1 << 14) /* direct io */
1facf9fc 20652+
4a4d8108
AM
20653+#ifndef CONFIG_AUFS_HNOTIFY
20654+#undef AuOpt_UDBA_HNOTIFY
20655+#define AuOpt_UDBA_HNOTIFY 0
1facf9fc 20656+#endif
dece6358
AM
20657+#ifndef CONFIG_AUFS_SHWH
20658+#undef AuOpt_SHWH
20659+#define AuOpt_SHWH 0
20660+#endif
1facf9fc 20661+
20662+#define AuOpt_Def (AuOpt_XINO \
20663+ | AuOpt_UDBA_REVAL \
20664+ | AuOpt_PLINK \
20665+ /* | AuOpt_DIRPERM1 */ \
20666+ | AuOpt_WARN_PERM)
20667+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
20668+ | AuOpt_UDBA_REVAL \
4a4d8108 20669+ | AuOpt_UDBA_HNOTIFY)
1facf9fc 20670+
20671+#define au_opt_test(flags, name) (flags & AuOpt_##name)
20672+#define au_opt_set(flags, name) do { \
20673+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
20674+ ((flags) |= AuOpt_##name); \
20675+} while (0)
20676+#define au_opt_set_udba(flags, name) do { \
20677+ (flags) &= ~AuOptMask_UDBA; \
20678+ ((flags) |= AuOpt_##name); \
20679+} while (0)
7f207e10
AM
20680+#define au_opt_clr(flags, name) do { \
20681+ ((flags) &= ~AuOpt_##name); \
20682+} while (0)
1facf9fc 20683+
e49829fe
JR
20684+static inline unsigned int au_opts_plink(unsigned int mntflags)
20685+{
20686+#ifdef CONFIG_PROC_FS
20687+ return mntflags;
20688+#else
20689+ return mntflags & ~AuOpt_PLINK;
20690+#endif
20691+}
20692+
1facf9fc 20693+/* ---------------------------------------------------------------------- */
20694+
20695+/* policies to select one among multiple writable branches */
20696+enum {
20697+ AuWbrCreate_TDP, /* top down parent */
20698+ AuWbrCreate_RR, /* round robin */
20699+ AuWbrCreate_MFS, /* most free space */
20700+ AuWbrCreate_MFSV, /* mfs with seconds */
20701+ AuWbrCreate_MFSRR, /* mfs then rr */
20702+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
20703+ AuWbrCreate_PMFS, /* parent and mfs */
20704+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
20705+
20706+ AuWbrCreate_Def = AuWbrCreate_TDP
20707+};
20708+
20709+enum {
20710+ AuWbrCopyup_TDP, /* top down parent */
20711+ AuWbrCopyup_BUP, /* bottom up parent */
20712+ AuWbrCopyup_BU, /* bottom up */
20713+
20714+ AuWbrCopyup_Def = AuWbrCopyup_TDP
20715+};
20716+
20717+/* ---------------------------------------------------------------------- */
20718+
20719+struct au_opt_add {
20720+ aufs_bindex_t bindex;
20721+ char *pathname;
20722+ int perm;
20723+ struct path path;
20724+};
20725+
20726+struct au_opt_del {
20727+ char *pathname;
20728+ struct path h_path;
20729+};
20730+
20731+struct au_opt_mod {
20732+ char *path;
20733+ int perm;
20734+ struct dentry *h_root;
20735+};
20736+
20737+struct au_opt_xino {
20738+ char *path;
20739+ struct file *file;
20740+};
20741+
20742+struct au_opt_xino_itrunc {
20743+ aufs_bindex_t bindex;
20744+};
20745+
20746+struct au_opt_wbr_create {
20747+ int wbr_create;
20748+ int mfs_second;
20749+ unsigned long long mfsrr_watermark;
20750+};
20751+
20752+struct au_opt {
20753+ int type;
20754+ union {
20755+ struct au_opt_xino xino;
20756+ struct au_opt_xino_itrunc xino_itrunc;
20757+ struct au_opt_add add;
20758+ struct au_opt_del del;
20759+ struct au_opt_mod mod;
20760+ int dirwh;
20761+ int rdcache;
20762+ unsigned int rdblk;
20763+ unsigned int rdhash;
20764+ int udba;
20765+ struct au_opt_wbr_create wbr_create;
20766+ int wbr_copyup;
20767+ };
20768+};
20769+
20770+/* opts flags */
20771+#define AuOpts_REMOUNT 1
027c5e7a
AM
20772+#define AuOpts_REFRESH (1 << 1)
20773+#define AuOpts_TRUNC_XIB (1 << 2)
20774+#define AuOpts_REFRESH_DYAOP (1 << 3)
1facf9fc 20775+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
7f207e10
AM
20776+#define au_fset_opts(flags, name) \
20777+ do { (flags) |= AuOpts_##name; } while (0)
20778+#define au_fclr_opts(flags, name) \
20779+ do { (flags) &= ~AuOpts_##name; } while (0)
1facf9fc 20780+
20781+struct au_opts {
20782+ struct au_opt *opt;
20783+ int max_opt;
20784+
20785+ unsigned int given_udba;
20786+ unsigned int flags;
20787+ unsigned long sb_flags;
20788+};
20789+
20790+/* ---------------------------------------------------------------------- */
20791+
20792+const char *au_optstr_br_perm(int brperm);
20793+const char *au_optstr_udba(int udba);
20794+const char *au_optstr_wbr_copyup(int wbr_copyup);
20795+const char *au_optstr_wbr_create(int wbr_create);
20796+
20797+void au_opts_free(struct au_opts *opts);
20798+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
20799+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
20800+ unsigned int pending);
20801+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
20802+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
20803+
20804+unsigned int au_opt_udba(struct super_block *sb);
20805+
20806+/* ---------------------------------------------------------------------- */
20807+
20808+#endif /* __KERNEL__ */
20809+#endif /* __AUFS_OPTS_H__ */
7f207e10
AM
20810diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
20811--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
53392da6 20812+++ linux/fs/aufs/plink.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 20813@@ -0,0 +1,515 @@
1facf9fc 20814+/*
027c5e7a 20815+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 20816+ *
20817+ * This program, aufs is free software; you can redistribute it and/or modify
20818+ * it under the terms of the GNU General Public License as published by
20819+ * the Free Software Foundation; either version 2 of the License, or
20820+ * (at your option) any later version.
dece6358
AM
20821+ *
20822+ * This program is distributed in the hope that it will be useful,
20823+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20824+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20825+ * GNU General Public License for more details.
20826+ *
20827+ * You should have received a copy of the GNU General Public License
20828+ * along with this program; if not, write to the Free Software
20829+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20830+ */
20831+
20832+/*
20833+ * pseudo-link
20834+ */
20835+
20836+#include "aufs.h"
20837+
20838+/*
e49829fe 20839+ * the pseudo-link maintenance mode.
1facf9fc 20840+ * during a user process maintains the pseudo-links,
20841+ * prohibit adding a new plink and branch manipulation.
e49829fe
JR
20842+ *
20843+ * Flags
20844+ * NOPLM:
20845+ * For entry functions which will handle plink, and i_mutex is already held
20846+ * in VFS.
20847+ * They cannot wait and should return an error at once.
20848+ * Callers has to check the error.
20849+ * NOPLMW:
20850+ * For entry functions which will handle plink, but i_mutex is not held
20851+ * in VFS.
20852+ * They can wait the plink maintenance mode to finish.
20853+ *
20854+ * They behave like F_SETLK and F_SETLKW.
20855+ * If the caller never handle plink, then both flags are unnecessary.
1facf9fc 20856+ */
e49829fe
JR
20857+
20858+int au_plink_maint(struct super_block *sb, int flags)
1facf9fc 20859+{
e49829fe
JR
20860+ int err;
20861+ pid_t pid, ppid;
20862+ struct au_sbinfo *sbi;
dece6358
AM
20863+
20864+ SiMustAnyLock(sb);
20865+
e49829fe
JR
20866+ err = 0;
20867+ if (!au_opt_test(au_mntflags(sb), PLINK))
20868+ goto out;
20869+
20870+ sbi = au_sbi(sb);
20871+ pid = sbi->si_plink_maint_pid;
20872+ if (!pid || pid == current->pid)
20873+ goto out;
20874+
20875+ /* todo: it highly depends upon /sbin/mount.aufs */
20876+ rcu_read_lock();
20877+ ppid = task_pid_vnr(rcu_dereference(current->real_parent));
20878+ rcu_read_unlock();
20879+ if (pid == ppid)
20880+ goto out;
20881+
20882+ if (au_ftest_lock(flags, NOPLMW)) {
027c5e7a
AM
20883+ /* if there is no i_mutex lock in VFS, we don't need to wait */
20884+ /* AuDebugOn(!lockdep_depth(current)); */
e49829fe
JR
20885+ while (sbi->si_plink_maint_pid) {
20886+ si_read_unlock(sb);
20887+ /* gave up wake_up_bit() */
20888+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
20889+
20890+ if (au_ftest_lock(flags, FLUSH))
20891+ au_nwt_flush(&sbi->si_nowait);
20892+ si_noflush_read_lock(sb);
20893+ }
20894+ } else if (au_ftest_lock(flags, NOPLM)) {
20895+ AuDbg("ppid %d, pid %d\n", ppid, pid);
20896+ err = -EAGAIN;
20897+ }
20898+
20899+out:
20900+ return err;
4a4d8108
AM
20901+}
20902+
e49829fe 20903+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
4a4d8108 20904+{
4a4d8108 20905+ spin_lock(&sbinfo->si_plink_maint_lock);
027c5e7a 20906+ sbinfo->si_plink_maint_pid = 0;
4a4d8108 20907+ spin_unlock(&sbinfo->si_plink_maint_lock);
027c5e7a 20908+ wake_up_all(&sbinfo->si_plink_wq);
4a4d8108
AM
20909+}
20910+
e49829fe 20911+int au_plink_maint_enter(struct super_block *sb)
4a4d8108
AM
20912+{
20913+ int err;
4a4d8108
AM
20914+ struct au_sbinfo *sbinfo;
20915+
20916+ err = 0;
4a4d8108
AM
20917+ sbinfo = au_sbi(sb);
20918+ /* make sure i am the only one in this fs */
e49829fe
JR
20919+ si_write_lock(sb, AuLock_FLUSH);
20920+ if (au_opt_test(au_mntflags(sb), PLINK)) {
20921+ spin_lock(&sbinfo->si_plink_maint_lock);
20922+ if (!sbinfo->si_plink_maint_pid)
20923+ sbinfo->si_plink_maint_pid = current->pid;
20924+ else
20925+ err = -EBUSY;
20926+ spin_unlock(&sbinfo->si_plink_maint_lock);
20927+ }
4a4d8108
AM
20928+ si_write_unlock(sb);
20929+
20930+ return err;
1facf9fc 20931+}
20932+
20933+/* ---------------------------------------------------------------------- */
20934+
20935+struct pseudo_link {
4a4d8108
AM
20936+ union {
20937+ struct list_head list;
20938+ struct rcu_head rcu;
20939+ };
1facf9fc 20940+ struct inode *inode;
20941+};
20942+
20943+#ifdef CONFIG_AUFS_DEBUG
20944+void au_plink_list(struct super_block *sb)
20945+{
20946+ struct au_sbinfo *sbinfo;
20947+ struct list_head *plink_list;
20948+ struct pseudo_link *plink;
20949+
dece6358
AM
20950+ SiMustAnyLock(sb);
20951+
1facf9fc 20952+ sbinfo = au_sbi(sb);
20953+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 20954+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 20955+
20956+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
20957+ rcu_read_lock();
20958+ list_for_each_entry_rcu(plink, plink_list, list)
1facf9fc 20959+ AuDbg("%lu\n", plink->inode->i_ino);
4a4d8108 20960+ rcu_read_unlock();
1facf9fc 20961+}
20962+#endif
20963+
20964+/* is the inode pseudo-linked? */
20965+int au_plink_test(struct inode *inode)
20966+{
20967+ int found;
20968+ struct au_sbinfo *sbinfo;
20969+ struct list_head *plink_list;
20970+ struct pseudo_link *plink;
20971+
20972+ sbinfo = au_sbi(inode->i_sb);
dece6358 20973+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 20974+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
e49829fe 20975+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
1facf9fc 20976+
20977+ found = 0;
20978+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
20979+ rcu_read_lock();
20980+ list_for_each_entry_rcu(plink, plink_list, list)
1facf9fc 20981+ if (plink->inode == inode) {
20982+ found = 1;
20983+ break;
20984+ }
4a4d8108 20985+ rcu_read_unlock();
1facf9fc 20986+ return found;
20987+}
20988+
20989+/* ---------------------------------------------------------------------- */
20990+
20991+/*
20992+ * generate a name for plink.
20993+ * the file will be stored under AUFS_WH_PLINKDIR.
20994+ */
20995+/* 20 is max digits length of ulong 64 */
20996+#define PLINK_NAME_LEN ((20 + 1) * 2)
20997+
20998+static int plink_name(char *name, int len, struct inode *inode,
20999+ aufs_bindex_t bindex)
21000+{
21001+ int rlen;
21002+ struct inode *h_inode;
21003+
21004+ h_inode = au_h_iptr(inode, bindex);
21005+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
21006+ return rlen;
21007+}
21008+
7f207e10
AM
21009+struct au_do_plink_lkup_args {
21010+ struct dentry **errp;
21011+ struct qstr *tgtname;
21012+ struct dentry *h_parent;
21013+ struct au_branch *br;
21014+};
21015+
21016+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
21017+ struct dentry *h_parent,
21018+ struct au_branch *br)
21019+{
21020+ struct dentry *h_dentry;
21021+ struct mutex *h_mtx;
21022+
21023+ h_mtx = &h_parent->d_inode->i_mutex;
21024+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
21025+ h_dentry = au_lkup_one(tgtname, h_parent, br, /*nd*/NULL);
21026+ mutex_unlock(h_mtx);
21027+ return h_dentry;
21028+}
21029+
21030+static void au_call_do_plink_lkup(void *args)
21031+{
21032+ struct au_do_plink_lkup_args *a = args;
21033+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
21034+}
21035+
1facf9fc 21036+/* lookup the plink-ed @inode under the branch at @bindex */
21037+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
21038+{
21039+ struct dentry *h_dentry, *h_parent;
21040+ struct au_branch *br;
21041+ struct inode *h_dir;
7f207e10 21042+ int wkq_err;
1facf9fc 21043+ char a[PLINK_NAME_LEN];
21044+ struct qstr tgtname = {
21045+ .name = a
21046+ };
21047+
e49829fe
JR
21048+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
21049+
1facf9fc 21050+ br = au_sbr(inode->i_sb, bindex);
21051+ h_parent = br->br_wbr->wbr_plink;
21052+ h_dir = h_parent->d_inode;
21053+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
21054+
7f207e10
AM
21055+ if (current_fsuid()) {
21056+ struct au_do_plink_lkup_args args = {
21057+ .errp = &h_dentry,
21058+ .tgtname = &tgtname,
21059+ .h_parent = h_parent,
21060+ .br = br
21061+ };
21062+
21063+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
21064+ if (unlikely(wkq_err))
21065+ h_dentry = ERR_PTR(wkq_err);
21066+ } else
21067+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
21068+
1facf9fc 21069+ return h_dentry;
21070+}
21071+
21072+/* create a pseudo-link */
21073+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
21074+ struct dentry *h_dentry, struct au_branch *br)
21075+{
21076+ int err;
21077+ struct path h_path = {
21078+ .mnt = br->br_mnt
21079+ };
21080+ struct inode *h_dir;
21081+
21082+ h_dir = h_parent->d_inode;
7f207e10 21083+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
4f0767ce 21084+again:
1facf9fc 21085+ h_path.dentry = au_lkup_one(tgt, h_parent, br, /*nd*/NULL);
21086+ err = PTR_ERR(h_path.dentry);
21087+ if (IS_ERR(h_path.dentry))
21088+ goto out;
21089+
21090+ err = 0;
21091+ /* wh.plink dir is not monitored */
7f207e10 21092+ /* todo: is it really safe? */
1facf9fc 21093+ if (h_path.dentry->d_inode
21094+ && h_path.dentry->d_inode != h_dentry->d_inode) {
21095+ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
21096+ dput(h_path.dentry);
21097+ h_path.dentry = NULL;
21098+ if (!err)
21099+ goto again;
21100+ }
21101+ if (!err && !h_path.dentry->d_inode)
21102+ err = vfsub_link(h_dentry, h_dir, &h_path);
21103+ dput(h_path.dentry);
21104+
4f0767ce 21105+out:
7f207e10 21106+ mutex_unlock(&h_dir->i_mutex);
1facf9fc 21107+ return err;
21108+}
21109+
21110+struct do_whplink_args {
21111+ int *errp;
21112+ struct qstr *tgt;
21113+ struct dentry *h_parent;
21114+ struct dentry *h_dentry;
21115+ struct au_branch *br;
21116+};
21117+
21118+static void call_do_whplink(void *args)
21119+{
21120+ struct do_whplink_args *a = args;
21121+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
21122+}
21123+
21124+static int whplink(struct dentry *h_dentry, struct inode *inode,
21125+ aufs_bindex_t bindex, struct au_branch *br)
21126+{
21127+ int err, wkq_err;
21128+ struct au_wbr *wbr;
21129+ struct dentry *h_parent;
21130+ struct inode *h_dir;
21131+ char a[PLINK_NAME_LEN];
21132+ struct qstr tgtname = {
21133+ .name = a
21134+ };
21135+
21136+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
21137+ h_parent = wbr->wbr_plink;
21138+ h_dir = h_parent->d_inode;
21139+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
21140+
21141+ /* always superio. */
b752ccd1 21142+ if (current_fsuid()) {
1facf9fc 21143+ struct do_whplink_args args = {
21144+ .errp = &err,
21145+ .tgt = &tgtname,
21146+ .h_parent = h_parent,
21147+ .h_dentry = h_dentry,
21148+ .br = br
21149+ };
21150+ wkq_err = au_wkq_wait(call_do_whplink, &args);
21151+ if (unlikely(wkq_err))
21152+ err = wkq_err;
21153+ } else
21154+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
1facf9fc 21155+
21156+ return err;
21157+}
21158+
21159+/* free a single plink */
21160+static void do_put_plink(struct pseudo_link *plink, int do_del)
21161+{
1facf9fc 21162+ if (do_del)
21163+ list_del(&plink->list);
4a4d8108
AM
21164+ iput(plink->inode);
21165+ kfree(plink);
21166+}
21167+
21168+static void do_put_plink_rcu(struct rcu_head *rcu)
21169+{
21170+ struct pseudo_link *plink;
21171+
21172+ plink = container_of(rcu, struct pseudo_link, rcu);
21173+ iput(plink->inode);
1facf9fc 21174+ kfree(plink);
21175+}
21176+
21177+/*
21178+ * create a new pseudo-link for @h_dentry on @bindex.
21179+ * the linked inode is held in aufs @inode.
21180+ */
21181+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
21182+ struct dentry *h_dentry)
21183+{
21184+ struct super_block *sb;
21185+ struct au_sbinfo *sbinfo;
21186+ struct list_head *plink_list;
4a4d8108 21187+ struct pseudo_link *plink, *tmp;
1facf9fc 21188+ int found, err, cnt;
21189+
21190+ sb = inode->i_sb;
21191+ sbinfo = au_sbi(sb);
21192+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21193+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21194+
1facf9fc 21195+ cnt = 0;
21196+ found = 0;
21197+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
21198+ rcu_read_lock();
21199+ list_for_each_entry_rcu(plink, plink_list, list) {
1facf9fc 21200+ cnt++;
21201+ if (plink->inode == inode) {
21202+ found = 1;
21203+ break;
21204+ }
21205+ }
4a4d8108
AM
21206+ rcu_read_unlock();
21207+ if (found)
1facf9fc 21208+ return;
4a4d8108
AM
21209+
21210+ tmp = kmalloc(sizeof(*plink), GFP_NOFS);
21211+ if (tmp)
21212+ tmp->inode = au_igrab(inode);
21213+ else {
21214+ err = -ENOMEM;
21215+ goto out;
1facf9fc 21216+ }
21217+
4a4d8108
AM
21218+ spin_lock(&sbinfo->si_plink.spin);
21219+ list_for_each_entry(plink, plink_list, list) {
21220+ if (plink->inode == inode) {
21221+ found = 1;
21222+ break;
21223+ }
1facf9fc 21224+ }
4a4d8108
AM
21225+ if (!found)
21226+ list_add_rcu(&tmp->list, plink_list);
1facf9fc 21227+ spin_unlock(&sbinfo->si_plink.spin);
4a4d8108
AM
21228+ if (!found) {
21229+ cnt++;
21230+ WARN_ONCE(cnt > AUFS_PLINK_WARN,
21231+ "unexpectedly many pseudo links, %d\n", cnt);
1facf9fc 21232+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
4a4d8108
AM
21233+ } else {
21234+ do_put_plink(tmp, 0);
21235+ return;
1facf9fc 21236+ }
21237+
4a4d8108 21238+out:
1facf9fc 21239+ if (unlikely(err)) {
4a4d8108
AM
21240+ pr_warning("err %d, damaged pseudo link.\n", err);
21241+ if (tmp) {
21242+ au_spl_del_rcu(&tmp->list, &sbinfo->si_plink);
21243+ call_rcu(&tmp->rcu, do_put_plink_rcu);
21244+ }
1facf9fc 21245+ }
21246+}
21247+
21248+/* free all plinks */
e49829fe 21249+void au_plink_put(struct super_block *sb, int verbose)
1facf9fc 21250+{
21251+ struct au_sbinfo *sbinfo;
21252+ struct list_head *plink_list;
21253+ struct pseudo_link *plink, *tmp;
21254+
dece6358
AM
21255+ SiMustWriteLock(sb);
21256+
1facf9fc 21257+ sbinfo = au_sbi(sb);
21258+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21259+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21260+
21261+ plink_list = &sbinfo->si_plink.head;
21262+ /* no spin_lock since sbinfo is write-locked */
e49829fe 21263+ WARN(verbose && !list_empty(plink_list), "pseudo-link is not flushed");
1facf9fc 21264+ list_for_each_entry_safe(plink, tmp, plink_list, list)
21265+ do_put_plink(plink, 0);
21266+ INIT_LIST_HEAD(plink_list);
21267+}
21268+
e49829fe
JR
21269+void au_plink_clean(struct super_block *sb, int verbose)
21270+{
21271+ struct dentry *root;
21272+
21273+ root = sb->s_root;
21274+ aufs_write_lock(root);
21275+ if (au_opt_test(au_mntflags(sb), PLINK))
21276+ au_plink_put(sb, verbose);
21277+ aufs_write_unlock(root);
21278+}
21279+
1facf9fc 21280+/* free the plinks on a branch specified by @br_id */
21281+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
21282+{
21283+ struct au_sbinfo *sbinfo;
21284+ struct list_head *plink_list;
21285+ struct pseudo_link *plink, *tmp;
21286+ struct inode *inode;
21287+ aufs_bindex_t bstart, bend, bindex;
21288+ unsigned char do_put;
21289+
dece6358
AM
21290+ SiMustWriteLock(sb);
21291+
1facf9fc 21292+ sbinfo = au_sbi(sb);
21293+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21294+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21295+
21296+ plink_list = &sbinfo->si_plink.head;
21297+ /* no spin_lock since sbinfo is write-locked */
21298+ list_for_each_entry_safe(plink, tmp, plink_list, list) {
21299+ do_put = 0;
21300+ inode = au_igrab(plink->inode);
21301+ ii_write_lock_child(inode);
21302+ bstart = au_ibstart(inode);
21303+ bend = au_ibend(inode);
21304+ if (bstart >= 0) {
21305+ for (bindex = bstart; bindex <= bend; bindex++) {
21306+ if (!au_h_iptr(inode, bindex)
21307+ || au_ii_br_id(inode, bindex) != br_id)
21308+ continue;
21309+ au_set_h_iptr(inode, bindex, NULL, 0);
21310+ do_put = 1;
21311+ break;
21312+ }
21313+ } else
21314+ do_put_plink(plink, 1);
21315+
dece6358
AM
21316+ if (do_put) {
21317+ for (bindex = bstart; bindex <= bend; bindex++)
21318+ if (au_h_iptr(inode, bindex)) {
21319+ do_put = 0;
21320+ break;
21321+ }
21322+ if (do_put)
21323+ do_put_plink(plink, 1);
21324+ }
21325+ ii_write_unlock(inode);
21326+ iput(inode);
21327+ }
21328+}
7f207e10
AM
21329diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
21330--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
53392da6 21331+++ linux/fs/aufs/poll.c 2011-08-24 13:30:24.734646739 +0200
dece6358
AM
21332@@ -0,0 +1,56 @@
21333+/*
027c5e7a 21334+ * Copyright (C) 2005-2011 Junjiro R. Okajima
dece6358
AM
21335+ *
21336+ * This program, aufs is free software; you can redistribute it and/or modify
21337+ * it under the terms of the GNU General Public License as published by
21338+ * the Free Software Foundation; either version 2 of the License, or
21339+ * (at your option) any later version.
21340+ *
21341+ * This program is distributed in the hope that it will be useful,
21342+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21343+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21344+ * GNU General Public License for more details.
21345+ *
21346+ * You should have received a copy of the GNU General Public License
21347+ * along with this program; if not, write to the Free Software
21348+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21349+ */
21350+
1308ab2a 21351+/*
21352+ * poll operation
21353+ * There is only one filesystem which implements ->poll operation, currently.
21354+ */
21355+
21356+#include "aufs.h"
21357+
21358+unsigned int aufs_poll(struct file *file, poll_table *wait)
21359+{
21360+ unsigned int mask;
21361+ int err;
21362+ struct file *h_file;
21363+ struct dentry *dentry;
21364+ struct super_block *sb;
21365+
21366+ /* We should pretend an error happened. */
21367+ mask = POLLERR /* | POLLIN | POLLOUT */;
21368+ dentry = file->f_dentry;
21369+ sb = dentry->d_sb;
e49829fe 21370+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
1308ab2a 21371+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
21372+ if (unlikely(err))
21373+ goto out;
21374+
21375+ /* it is not an error if h_file has no operation */
21376+ mask = DEFAULT_POLLMASK;
4a4d8108 21377+ h_file = au_hf_top(file);
1308ab2a 21378+ if (h_file->f_op && h_file->f_op->poll)
21379+ mask = h_file->f_op->poll(h_file, wait);
21380+
21381+ di_read_unlock(dentry, AuLock_IR);
21382+ fi_read_unlock(file);
21383+
4f0767ce 21384+out:
1308ab2a 21385+ si_read_unlock(sb);
21386+ AuTraceErr((int)mask);
21387+ return mask;
21388+}
7f207e10
AM
21389diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
21390--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
21391+++ linux/fs/aufs/procfs.c 2011-08-24 13:30:24.734646739 +0200
21392@@ -0,0 +1,170 @@
e49829fe 21393+/*
027c5e7a 21394+ * Copyright (C) 2010-2011 Junjiro R. Okajima
e49829fe
JR
21395+ *
21396+ * This program, aufs is free software; you can redistribute it and/or modify
21397+ * it under the terms of the GNU General Public License as published by
21398+ * the Free Software Foundation; either version 2 of the License, or
21399+ * (at your option) any later version.
21400+ *
21401+ * This program is distributed in the hope that it will be useful,
21402+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21403+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21404+ * GNU General Public License for more details.
21405+ *
21406+ * You should have received a copy of the GNU General Public License
21407+ * along with this program; if not, write to the Free Software
21408+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21409+ */
21410+
21411+/*
21412+ * procfs interfaces
21413+ */
21414+
21415+#include <linux/proc_fs.h>
21416+#include "aufs.h"
21417+
21418+static int au_procfs_plm_release(struct inode *inode, struct file *file)
21419+{
21420+ struct au_sbinfo *sbinfo;
21421+
21422+ sbinfo = file->private_data;
21423+ if (sbinfo) {
21424+ au_plink_maint_leave(sbinfo);
21425+ kobject_put(&sbinfo->si_kobj);
21426+ }
21427+
21428+ return 0;
21429+}
21430+
21431+static void au_procfs_plm_write_clean(struct file *file)
21432+{
21433+ struct au_sbinfo *sbinfo;
21434+
21435+ sbinfo = file->private_data;
21436+ if (sbinfo)
21437+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
21438+}
21439+
21440+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
21441+{
21442+ int err;
21443+ struct super_block *sb;
21444+ struct au_sbinfo *sbinfo;
21445+
21446+ err = -EBUSY;
21447+ if (unlikely(file->private_data))
21448+ goto out;
21449+
21450+ sb = NULL;
53392da6 21451+ /* don't use au_sbilist_lock() here */
e49829fe
JR
21452+ spin_lock(&au_sbilist.spin);
21453+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
21454+ if (id == sysaufs_si_id(sbinfo)) {
21455+ kobject_get(&sbinfo->si_kobj);
21456+ sb = sbinfo->si_sb;
21457+ break;
21458+ }
21459+ spin_unlock(&au_sbilist.spin);
21460+
21461+ err = -EINVAL;
21462+ if (unlikely(!sb))
21463+ goto out;
21464+
21465+ err = au_plink_maint_enter(sb);
21466+ if (!err)
21467+ /* keep kobject_get() */
21468+ file->private_data = sbinfo;
21469+ else
21470+ kobject_put(&sbinfo->si_kobj);
21471+out:
21472+ return err;
21473+}
21474+
21475+/*
21476+ * Accept a valid "si=xxxx" only.
21477+ * Once it is accepted successfully, accept "clean" too.
21478+ */
21479+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
21480+ size_t count, loff_t *ppos)
21481+{
21482+ ssize_t err;
21483+ unsigned long id;
21484+ /* last newline is allowed */
21485+ char buf[3 + sizeof(unsigned long) * 2 + 1];
21486+
21487+ err = -EACCES;
21488+ if (unlikely(!capable(CAP_SYS_ADMIN)))
21489+ goto out;
21490+
21491+ err = -EINVAL;
21492+ if (unlikely(count > sizeof(buf)))
21493+ goto out;
21494+
21495+ err = copy_from_user(buf, ubuf, count);
21496+ if (unlikely(err)) {
21497+ err = -EFAULT;
21498+ goto out;
21499+ }
21500+ buf[count] = 0;
21501+
21502+ err = -EINVAL;
21503+ if (!strcmp("clean", buf)) {
21504+ au_procfs_plm_write_clean(file);
21505+ goto out_success;
21506+ } else if (unlikely(strncmp("si=", buf, 3)))
21507+ goto out;
21508+
21509+ err = strict_strtoul(buf + 3, 16, &id);
21510+ if (unlikely(err))
21511+ goto out;
21512+
21513+ err = au_procfs_plm_write_si(file, id);
21514+ if (unlikely(err))
21515+ goto out;
21516+
21517+out_success:
21518+ err = count; /* success */
21519+out:
21520+ return err;
21521+}
21522+
21523+static const struct file_operations au_procfs_plm_fop = {
21524+ .write = au_procfs_plm_write,
21525+ .release = au_procfs_plm_release,
21526+ .owner = THIS_MODULE
21527+};
21528+
21529+/* ---------------------------------------------------------------------- */
21530+
21531+static struct proc_dir_entry *au_procfs_dir;
21532+
21533+void au_procfs_fin(void)
21534+{
21535+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
21536+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
21537+}
21538+
21539+int __init au_procfs_init(void)
21540+{
21541+ int err;
21542+ struct proc_dir_entry *entry;
21543+
21544+ err = -ENOMEM;
21545+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
21546+ if (unlikely(!au_procfs_dir))
21547+ goto out;
21548+
21549+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
21550+ au_procfs_dir, &au_procfs_plm_fop);
21551+ if (unlikely(!entry))
21552+ goto out_dir;
21553+
21554+ err = 0;
21555+ goto out; /* success */
21556+
21557+
21558+out_dir:
21559+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
21560+out:
21561+ return err;
21562+}
7f207e10
AM
21563diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
21564--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
53392da6 21565+++ linux/fs/aufs/rdu.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 21566@@ -0,0 +1,383 @@
1308ab2a 21567+/*
027c5e7a 21568+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1308ab2a 21569+ *
21570+ * This program, aufs is free software; you can redistribute it and/or modify
21571+ * it under the terms of the GNU General Public License as published by
21572+ * the Free Software Foundation; either version 2 of the License, or
21573+ * (at your option) any later version.
21574+ *
21575+ * This program is distributed in the hope that it will be useful,
21576+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21577+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21578+ * GNU General Public License for more details.
21579+ *
21580+ * You should have received a copy of the GNU General Public License
21581+ * along with this program; if not, write to the Free Software
21582+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21583+ */
21584+
21585+/*
21586+ * readdir in userspace.
21587+ */
21588+
b752ccd1 21589+#include <linux/compat.h>
4a4d8108 21590+#include <linux/fs_stack.h>
1308ab2a 21591+#include <linux/security.h>
21592+#include <linux/uaccess.h>
21593+#include <linux/aufs_type.h>
21594+#include "aufs.h"
21595+
21596+/* bits for struct aufs_rdu.flags */
21597+#define AuRdu_CALLED 1
21598+#define AuRdu_CONT (1 << 1)
21599+#define AuRdu_FULL (1 << 2)
21600+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
7f207e10
AM
21601+#define au_fset_rdu(flags, name) \
21602+ do { (flags) |= AuRdu_##name; } while (0)
21603+#define au_fclr_rdu(flags, name) \
21604+ do { (flags) &= ~AuRdu_##name; } while (0)
1308ab2a 21605+
21606+struct au_rdu_arg {
21607+ struct aufs_rdu *rdu;
21608+ union au_rdu_ent_ul ent;
21609+ unsigned long end;
21610+
21611+ struct super_block *sb;
21612+ int err;
21613+};
21614+
21615+static int au_rdu_fill(void *__arg, const char *name, int nlen,
21616+ loff_t offset, u64 h_ino, unsigned int d_type)
21617+{
21618+ int err, len;
21619+ struct au_rdu_arg *arg = __arg;
21620+ struct aufs_rdu *rdu = arg->rdu;
21621+ struct au_rdu_ent ent;
21622+
21623+ err = 0;
21624+ arg->err = 0;
21625+ au_fset_rdu(rdu->cookie.flags, CALLED);
21626+ len = au_rdu_len(nlen);
21627+ if (arg->ent.ul + len < arg->end) {
21628+ ent.ino = h_ino;
21629+ ent.bindex = rdu->cookie.bindex;
21630+ ent.type = d_type;
21631+ ent.nlen = nlen;
4a4d8108
AM
21632+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
21633+ ent.type = DT_UNKNOWN;
1308ab2a 21634+
21635+ err = -EFAULT;
21636+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
21637+ goto out;
21638+ if (copy_to_user(arg->ent.e->name, name, nlen))
21639+ goto out;
21640+ /* the terminating NULL */
21641+ if (__put_user(0, arg->ent.e->name + nlen))
21642+ goto out;
21643+ err = 0;
21644+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
21645+ arg->ent.ul += len;
21646+ rdu->rent++;
21647+ } else {
21648+ err = -EFAULT;
21649+ au_fset_rdu(rdu->cookie.flags, FULL);
21650+ rdu->full = 1;
21651+ rdu->tail = arg->ent;
21652+ }
21653+
4f0767ce 21654+out:
1308ab2a 21655+ /* AuTraceErr(err); */
21656+ return err;
21657+}
21658+
21659+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
21660+{
21661+ int err;
21662+ loff_t offset;
21663+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
21664+
21665+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
21666+ err = offset;
21667+ if (unlikely(offset != cookie->h_pos))
21668+ goto out;
21669+
21670+ err = 0;
21671+ do {
21672+ arg->err = 0;
21673+ au_fclr_rdu(cookie->flags, CALLED);
21674+ /* smp_mb(); */
21675+ err = vfsub_readdir(h_file, au_rdu_fill, arg);
21676+ if (err >= 0)
21677+ err = arg->err;
21678+ } while (!err
21679+ && au_ftest_rdu(cookie->flags, CALLED)
21680+ && !au_ftest_rdu(cookie->flags, FULL));
21681+ cookie->h_pos = h_file->f_pos;
21682+
4f0767ce 21683+out:
1308ab2a 21684+ AuTraceErr(err);
21685+ return err;
21686+}
21687+
21688+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
21689+{
21690+ int err;
21691+ aufs_bindex_t bend;
21692+ struct au_rdu_arg arg;
21693+ struct dentry *dentry;
21694+ struct inode *inode;
21695+ struct file *h_file;
21696+ struct au_rdu_cookie *cookie = &rdu->cookie;
21697+
21698+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
21699+ if (unlikely(err)) {
21700+ err = -EFAULT;
21701+ AuTraceErr(err);
21702+ goto out;
21703+ }
21704+ rdu->rent = 0;
21705+ rdu->tail = rdu->ent;
21706+ rdu->full = 0;
21707+ arg.rdu = rdu;
21708+ arg.ent = rdu->ent;
21709+ arg.end = arg.ent.ul;
21710+ arg.end += rdu->sz;
21711+
21712+ err = -ENOTDIR;
21713+ if (unlikely(!file->f_op || !file->f_op->readdir))
21714+ goto out;
21715+
21716+ err = security_file_permission(file, MAY_READ);
21717+ AuTraceErr(err);
21718+ if (unlikely(err))
21719+ goto out;
21720+
21721+ dentry = file->f_dentry;
21722+ inode = dentry->d_inode;
21723+#if 1
21724+ mutex_lock(&inode->i_mutex);
21725+#else
21726+ err = mutex_lock_killable(&inode->i_mutex);
21727+ AuTraceErr(err);
21728+ if (unlikely(err))
21729+ goto out;
21730+#endif
1308ab2a 21731+
21732+ arg.sb = inode->i_sb;
e49829fe
JR
21733+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
21734+ if (unlikely(err))
21735+ goto out_mtx;
027c5e7a
AM
21736+ err = au_alive_dir(dentry);
21737+ if (unlikely(err))
21738+ goto out_si;
e49829fe 21739+ /* todo: reval? */
1308ab2a 21740+ fi_read_lock(file);
21741+
21742+ err = -EAGAIN;
21743+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
21744+ && cookie->generation != au_figen(file)))
21745+ goto out_unlock;
21746+
21747+ err = 0;
21748+ if (!rdu->blk) {
21749+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
21750+ if (!rdu->blk)
21751+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
21752+ }
21753+ bend = au_fbstart(file);
21754+ if (cookie->bindex < bend)
21755+ cookie->bindex = bend;
4a4d8108 21756+ bend = au_fbend_dir(file);
1308ab2a 21757+ /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
21758+ for (; !err && cookie->bindex <= bend;
21759+ cookie->bindex++, cookie->h_pos = 0) {
4a4d8108 21760+ h_file = au_hf_dir(file, cookie->bindex);
1308ab2a 21761+ if (!h_file)
21762+ continue;
21763+
21764+ au_fclr_rdu(cookie->flags, FULL);
21765+ err = au_rdu_do(h_file, &arg);
21766+ AuTraceErr(err);
21767+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
21768+ break;
21769+ }
21770+ AuDbg("rent %llu\n", rdu->rent);
21771+
21772+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
21773+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
21774+ au_fset_rdu(cookie->flags, CONT);
21775+ cookie->generation = au_figen(file);
21776+ }
21777+
21778+ ii_read_lock_child(inode);
21779+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
21780+ ii_read_unlock(inode);
21781+
4f0767ce 21782+out_unlock:
1308ab2a 21783+ fi_read_unlock(file);
027c5e7a 21784+out_si:
1308ab2a 21785+ si_read_unlock(arg.sb);
4f0767ce 21786+out_mtx:
1308ab2a 21787+ mutex_unlock(&inode->i_mutex);
4f0767ce 21788+out:
1308ab2a 21789+ AuTraceErr(err);
21790+ return err;
21791+}
21792+
21793+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
21794+{
21795+ int err;
21796+ ino_t ino;
21797+ unsigned long long nent;
21798+ union au_rdu_ent_ul *u;
21799+ struct au_rdu_ent ent;
21800+ struct super_block *sb;
21801+
21802+ err = 0;
21803+ nent = rdu->nent;
21804+ u = &rdu->ent;
21805+ sb = file->f_dentry->d_sb;
21806+ si_read_lock(sb, AuLock_FLUSH);
21807+ while (nent-- > 0) {
1308ab2a 21808+ err = copy_from_user(&ent, u->e, sizeof(ent));
4a4d8108
AM
21809+ if (!err)
21810+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
1308ab2a 21811+ if (unlikely(err)) {
21812+ err = -EFAULT;
21813+ AuTraceErr(err);
21814+ break;
21815+ }
21816+
21817+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
21818+ if (!ent.wh)
21819+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
21820+ else
21821+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
21822+ &ino);
21823+ if (unlikely(err)) {
21824+ AuTraceErr(err);
21825+ break;
21826+ }
21827+
21828+ err = __put_user(ino, &u->e->ino);
21829+ if (unlikely(err)) {
21830+ err = -EFAULT;
21831+ AuTraceErr(err);
21832+ break;
21833+ }
21834+ u->ul += au_rdu_len(ent.nlen);
21835+ }
21836+ si_read_unlock(sb);
21837+
21838+ return err;
21839+}
21840+
21841+/* ---------------------------------------------------------------------- */
21842+
21843+static int au_rdu_verify(struct aufs_rdu *rdu)
21844+{
b752ccd1 21845+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
1308ab2a 21846+ "%llu, b%d, 0x%x, g%u}\n",
b752ccd1 21847+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
1308ab2a 21848+ rdu->blk,
21849+ rdu->rent, rdu->shwh, rdu->full,
21850+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
21851+ rdu->cookie.generation);
dece6358 21852+
b752ccd1 21853+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
1308ab2a 21854+ return 0;
dece6358 21855+
b752ccd1
AM
21856+ AuDbg("%u:%u\n",
21857+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
1308ab2a 21858+ return -EINVAL;
21859+}
21860+
21861+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 21862+{
1308ab2a 21863+ long err, e;
21864+ struct aufs_rdu rdu;
21865+ void __user *p = (void __user *)arg;
dece6358 21866+
1308ab2a 21867+ err = copy_from_user(&rdu, p, sizeof(rdu));
21868+ if (unlikely(err)) {
21869+ err = -EFAULT;
21870+ AuTraceErr(err);
21871+ goto out;
21872+ }
21873+ err = au_rdu_verify(&rdu);
dece6358
AM
21874+ if (unlikely(err))
21875+ goto out;
21876+
1308ab2a 21877+ switch (cmd) {
21878+ case AUFS_CTL_RDU:
21879+ err = au_rdu(file, &rdu);
21880+ if (unlikely(err))
21881+ break;
dece6358 21882+
1308ab2a 21883+ e = copy_to_user(p, &rdu, sizeof(rdu));
21884+ if (unlikely(e)) {
21885+ err = -EFAULT;
21886+ AuTraceErr(err);
21887+ }
21888+ break;
21889+ case AUFS_CTL_RDU_INO:
21890+ err = au_rdu_ino(file, &rdu);
21891+ break;
21892+
21893+ default:
4a4d8108 21894+ /* err = -ENOTTY; */
1308ab2a 21895+ err = -EINVAL;
21896+ }
dece6358 21897+
4f0767ce 21898+out:
1308ab2a 21899+ AuTraceErr(err);
21900+ return err;
1facf9fc 21901+}
b752ccd1
AM
21902+
21903+#ifdef CONFIG_COMPAT
21904+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
21905+{
21906+ long err, e;
21907+ struct aufs_rdu rdu;
21908+ void __user *p = compat_ptr(arg);
21909+
21910+ /* todo: get_user()? */
21911+ err = copy_from_user(&rdu, p, sizeof(rdu));
21912+ if (unlikely(err)) {
21913+ err = -EFAULT;
21914+ AuTraceErr(err);
21915+ goto out;
21916+ }
21917+ rdu.ent.e = compat_ptr(rdu.ent.ul);
21918+ err = au_rdu_verify(&rdu);
21919+ if (unlikely(err))
21920+ goto out;
21921+
21922+ switch (cmd) {
21923+ case AUFS_CTL_RDU:
21924+ err = au_rdu(file, &rdu);
21925+ if (unlikely(err))
21926+ break;
21927+
21928+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
21929+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
21930+ e = copy_to_user(p, &rdu, sizeof(rdu));
21931+ if (unlikely(e)) {
21932+ err = -EFAULT;
21933+ AuTraceErr(err);
21934+ }
21935+ break;
21936+ case AUFS_CTL_RDU_INO:
21937+ err = au_rdu_ino(file, &rdu);
21938+ break;
21939+
21940+ default:
21941+ /* err = -ENOTTY; */
21942+ err = -EINVAL;
21943+ }
21944+
4f0767ce 21945+out:
b752ccd1
AM
21946+ AuTraceErr(err);
21947+ return err;
21948+}
21949+#endif
7f207e10
AM
21950diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
21951--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
53392da6 21952+++ linux/fs/aufs/rwsem.h 2011-08-24 13:30:24.734646739 +0200
e49829fe 21953@@ -0,0 +1,189 @@
1facf9fc 21954+/*
027c5e7a 21955+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 21956+ *
21957+ * This program, aufs is free software; you can redistribute it and/or modify
21958+ * it under the terms of the GNU General Public License as published by
21959+ * the Free Software Foundation; either version 2 of the License, or
21960+ * (at your option) any later version.
dece6358
AM
21961+ *
21962+ * This program is distributed in the hope that it will be useful,
21963+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21964+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21965+ * GNU General Public License for more details.
21966+ *
21967+ * You should have received a copy of the GNU General Public License
21968+ * along with this program; if not, write to the Free Software
21969+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 21970+ */
21971+
21972+/*
21973+ * simple read-write semaphore wrappers
21974+ */
21975+
21976+#ifndef __AUFS_RWSEM_H__
21977+#define __AUFS_RWSEM_H__
21978+
21979+#ifdef __KERNEL__
21980+
dece6358 21981+#include <linux/rwsem.h>
4a4d8108 21982+#include "debug.h"
dece6358
AM
21983+
21984+struct au_rwsem {
21985+ struct rw_semaphore rwsem;
21986+#ifdef CONFIG_AUFS_DEBUG
21987+ /* just for debugging, not almighty counter */
21988+ atomic_t rcnt, wcnt;
21989+#endif
21990+};
21991+
21992+#ifdef CONFIG_AUFS_DEBUG
21993+#define AuDbgCntInit(rw) do { \
21994+ atomic_set(&(rw)->rcnt, 0); \
21995+ atomic_set(&(rw)->wcnt, 0); \
21996+ smp_mb(); /* atomic set */ \
21997+} while (0)
21998+
e49829fe 21999+#define AuDbgRcntInc(rw) atomic_inc(&(rw)->rcnt)
dece6358 22000+#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
e49829fe 22001+#define AuDbgWcntInc(rw) atomic_inc(&(rw)->wcnt)
dece6358
AM
22002+#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
22003+#else
22004+#define AuDbgCntInit(rw) do {} while (0)
22005+#define AuDbgRcntInc(rw) do {} while (0)
22006+#define AuDbgRcntDec(rw) do {} while (0)
22007+#define AuDbgWcntInc(rw) do {} while (0)
22008+#define AuDbgWcntDec(rw) do {} while (0)
22009+#endif /* CONFIG_AUFS_DEBUG */
22010+
22011+/* to debug easier, do not make them inlined functions */
22012+#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
22013+/* rwsem_is_locked() is unusable */
22014+#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
22015+#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
22016+#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
22017+ && atomic_read(&(rw)->wcnt) <= 0)
22018+#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \
22019+ || atomic_read(&(rw)->wcnt))
22020+
e49829fe
JR
22021+#define au_rw_class(rw, key) lockdep_set_class(&(rw)->rwsem, key)
22022+
dece6358
AM
22023+static inline void au_rw_init(struct au_rwsem *rw)
22024+{
22025+ AuDbgCntInit(rw);
22026+ init_rwsem(&rw->rwsem);
22027+}
22028+
22029+static inline void au_rw_init_wlock(struct au_rwsem *rw)
22030+{
22031+ au_rw_init(rw);
22032+ down_write(&rw->rwsem);
22033+ AuDbgWcntInc(rw);
22034+}
22035+
22036+static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
22037+ unsigned int lsc)
22038+{
22039+ au_rw_init(rw);
22040+ down_write_nested(&rw->rwsem, lsc);
22041+ AuDbgWcntInc(rw);
22042+}
22043+
22044+static inline void au_rw_read_lock(struct au_rwsem *rw)
22045+{
22046+ down_read(&rw->rwsem);
22047+ AuDbgRcntInc(rw);
22048+}
22049+
22050+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
22051+{
22052+ down_read_nested(&rw->rwsem, lsc);
22053+ AuDbgRcntInc(rw);
22054+}
22055+
22056+static inline void au_rw_read_unlock(struct au_rwsem *rw)
22057+{
22058+ AuRwMustReadLock(rw);
22059+ AuDbgRcntDec(rw);
22060+ up_read(&rw->rwsem);
22061+}
22062+
22063+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
22064+{
22065+ AuRwMustWriteLock(rw);
22066+ AuDbgRcntInc(rw);
22067+ AuDbgWcntDec(rw);
22068+ downgrade_write(&rw->rwsem);
22069+}
22070+
22071+static inline void au_rw_write_lock(struct au_rwsem *rw)
22072+{
22073+ down_write(&rw->rwsem);
22074+ AuDbgWcntInc(rw);
22075+}
22076+
22077+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
22078+ unsigned int lsc)
22079+{
22080+ down_write_nested(&rw->rwsem, lsc);
22081+ AuDbgWcntInc(rw);
22082+}
1facf9fc 22083+
dece6358
AM
22084+static inline void au_rw_write_unlock(struct au_rwsem *rw)
22085+{
22086+ AuRwMustWriteLock(rw);
22087+ AuDbgWcntDec(rw);
22088+ up_write(&rw->rwsem);
22089+}
22090+
22091+/* why is not _nested version defined */
22092+static inline int au_rw_read_trylock(struct au_rwsem *rw)
22093+{
22094+ int ret = down_read_trylock(&rw->rwsem);
22095+ if (ret)
22096+ AuDbgRcntInc(rw);
22097+ return ret;
22098+}
22099+
22100+static inline int au_rw_write_trylock(struct au_rwsem *rw)
22101+{
22102+ int ret = down_write_trylock(&rw->rwsem);
22103+ if (ret)
22104+ AuDbgWcntInc(rw);
22105+ return ret;
22106+}
22107+
22108+#undef AuDbgCntInit
22109+#undef AuDbgRcntInc
22110+#undef AuDbgRcntDec
22111+#undef AuDbgWcntInc
22112+#undef AuDbgWcntDec
1facf9fc 22113+
22114+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
22115+static inline void prefix##_read_lock(param) \
dece6358 22116+{ au_rw_read_lock(rwsem); } \
1facf9fc 22117+static inline void prefix##_write_lock(param) \
dece6358 22118+{ au_rw_write_lock(rwsem); } \
1facf9fc 22119+static inline int prefix##_read_trylock(param) \
dece6358 22120+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 22121+static inline int prefix##_write_trylock(param) \
dece6358 22122+{ return au_rw_write_trylock(rwsem); }
1facf9fc 22123+/* why is not _nested version defined */
22124+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 22125+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 22126+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 22127+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 22128+
22129+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
22130+static inline void prefix##_read_unlock(param) \
dece6358 22131+{ au_rw_read_unlock(rwsem); } \
1facf9fc 22132+static inline void prefix##_write_unlock(param) \
dece6358 22133+{ au_rw_write_unlock(rwsem); } \
1facf9fc 22134+static inline void prefix##_downgrade_lock(param) \
dece6358 22135+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 22136+
22137+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
22138+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
22139+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
22140+
22141+#endif /* __KERNEL__ */
22142+#endif /* __AUFS_RWSEM_H__ */
7f207e10
AM
22143diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
22144--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 22145+++ linux/fs/aufs/sbinfo.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 22146@@ -0,0 +1,344 @@
1facf9fc 22147+/*
027c5e7a 22148+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 22149+ *
22150+ * This program, aufs is free software; you can redistribute it and/or modify
22151+ * it under the terms of the GNU General Public License as published by
22152+ * the Free Software Foundation; either version 2 of the License, or
22153+ * (at your option) any later version.
dece6358
AM
22154+ *
22155+ * This program is distributed in the hope that it will be useful,
22156+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22157+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22158+ * GNU General Public License for more details.
22159+ *
22160+ * You should have received a copy of the GNU General Public License
22161+ * along with this program; if not, write to the Free Software
22162+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22163+ */
22164+
22165+/*
22166+ * superblock private data
22167+ */
22168+
e49829fe 22169+#include <linux/jiffies.h>
1facf9fc 22170+#include "aufs.h"
22171+
22172+/*
22173+ * they are necessary regardless sysfs is disabled.
22174+ */
22175+void au_si_free(struct kobject *kobj)
22176+{
22177+ struct au_sbinfo *sbinfo;
b752ccd1 22178+ char *locked __maybe_unused; /* debug only */
1facf9fc 22179+
22180+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
22181+ AuDebugOn(!list_empty(&sbinfo->si_plink.head));
e49829fe 22182+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
1facf9fc 22183+
e49829fe 22184+ au_rw_write_lock(&sbinfo->si_rwsem);
1facf9fc 22185+ au_br_free(sbinfo);
e49829fe 22186+ au_rw_write_unlock(&sbinfo->si_rwsem);
b752ccd1
AM
22187+
22188+ AuDebugOn(radix_tree_gang_lookup
22189+ (&sbinfo->au_si_pid.tree, (void **)&locked,
22190+ /*first_index*/PID_MAX_DEFAULT - 1,
22191+ /*max_items*/sizeof(locked)/sizeof(*locked)));
22192+
1facf9fc 22193+ kfree(sbinfo->si_branch);
b752ccd1 22194+ kfree(sbinfo->au_si_pid.bitmap);
1facf9fc 22195+ mutex_destroy(&sbinfo->si_xib_mtx);
dece6358 22196+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 22197+
22198+ kfree(sbinfo);
22199+}
22200+
22201+int au_si_alloc(struct super_block *sb)
22202+{
22203+ int err;
22204+ struct au_sbinfo *sbinfo;
e49829fe 22205+ static struct lock_class_key aufs_si;
1facf9fc 22206+
22207+ err = -ENOMEM;
4a4d8108 22208+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
1facf9fc 22209+ if (unlikely(!sbinfo))
22210+ goto out;
22211+
b752ccd1
AM
22212+ BUILD_BUG_ON(sizeof(unsigned long) !=
22213+ sizeof(*sbinfo->au_si_pid.bitmap));
22214+ sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT),
22215+ sizeof(*sbinfo->au_si_pid.bitmap),
22216+ GFP_NOFS);
22217+ if (unlikely(!sbinfo->au_si_pid.bitmap))
22218+ goto out_sbinfo;
22219+
1facf9fc 22220+ /* will be reallocated separately */
22221+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
22222+ if (unlikely(!sbinfo->si_branch))
b752ccd1 22223+ goto out_pidmap;
1facf9fc 22224+
1facf9fc 22225+ err = sysaufs_si_init(sbinfo);
22226+ if (unlikely(err))
22227+ goto out_br;
22228+
22229+ au_nwt_init(&sbinfo->si_nowait);
dece6358 22230+ au_rw_init_wlock(&sbinfo->si_rwsem);
e49829fe 22231+ au_rw_class(&sbinfo->si_rwsem, &aufs_si);
b752ccd1
AM
22232+ spin_lock_init(&sbinfo->au_si_pid.tree_lock);
22233+ INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL);
22234+
7f207e10 22235+ atomic_long_set(&sbinfo->si_ninodes, 0);
7f207e10
AM
22236+ atomic_long_set(&sbinfo->si_nfiles, 0);
22237+
1facf9fc 22238+ sbinfo->si_bend = -1;
1facf9fc 22239+
22240+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
22241+ sbinfo->si_wbr_create = AuWbrCreate_Def;
4a4d8108
AM
22242+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
22243+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
1facf9fc 22244+
e49829fe 22245+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
1facf9fc 22246+
1facf9fc 22247+ mutex_init(&sbinfo->si_xib_mtx);
1facf9fc 22248+ sbinfo->si_xino_brid = -1;
22249+ /* leave si_xib_last_pindex and si_xib_next_bit */
22250+
e49829fe 22251+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
1facf9fc 22252+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
22253+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
22254+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
22255+
22256+ au_spl_init(&sbinfo->si_plink);
22257+ init_waitqueue_head(&sbinfo->si_plink_wq);
4a4d8108 22258+ spin_lock_init(&sbinfo->si_plink_maint_lock);
1facf9fc 22259+
22260+ /* leave other members for sysaufs and si_mnt. */
22261+ sbinfo->si_sb = sb;
22262+ sb->s_fs_info = sbinfo;
b752ccd1 22263+ si_pid_set(sb);
1facf9fc 22264+ au_debug_sbinfo_init(sbinfo);
22265+ return 0; /* success */
22266+
4f0767ce 22267+out_br:
1facf9fc 22268+ kfree(sbinfo->si_branch);
4f0767ce 22269+out_pidmap:
b752ccd1 22270+ kfree(sbinfo->au_si_pid.bitmap);
4f0767ce 22271+out_sbinfo:
1facf9fc 22272+ kfree(sbinfo);
4f0767ce 22273+out:
1facf9fc 22274+ return err;
22275+}
22276+
22277+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
22278+{
22279+ int err, sz;
22280+ struct au_branch **brp;
22281+
dece6358
AM
22282+ AuRwMustWriteLock(&sbinfo->si_rwsem);
22283+
1facf9fc 22284+ err = -ENOMEM;
22285+ sz = sizeof(*brp) * (sbinfo->si_bend + 1);
22286+ if (unlikely(!sz))
22287+ sz = sizeof(*brp);
22288+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
22289+ if (brp) {
22290+ sbinfo->si_branch = brp;
22291+ err = 0;
22292+ }
22293+
22294+ return err;
22295+}
22296+
22297+/* ---------------------------------------------------------------------- */
22298+
22299+unsigned int au_sigen_inc(struct super_block *sb)
22300+{
22301+ unsigned int gen;
22302+
dece6358
AM
22303+ SiMustWriteLock(sb);
22304+
1facf9fc 22305+ gen = ++au_sbi(sb)->si_generation;
22306+ au_update_digen(sb->s_root);
22307+ au_update_iigen(sb->s_root->d_inode);
22308+ sb->s_root->d_inode->i_version++;
22309+ return gen;
22310+}
22311+
22312+aufs_bindex_t au_new_br_id(struct super_block *sb)
22313+{
22314+ aufs_bindex_t br_id;
22315+ int i;
22316+ struct au_sbinfo *sbinfo;
22317+
dece6358
AM
22318+ SiMustWriteLock(sb);
22319+
1facf9fc 22320+ sbinfo = au_sbi(sb);
22321+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
22322+ br_id = ++sbinfo->si_last_br_id;
7f207e10 22323+ AuDebugOn(br_id < 0);
1facf9fc 22324+ if (br_id && au_br_index(sb, br_id) < 0)
22325+ return br_id;
22326+ }
22327+
22328+ return -1;
22329+}
22330+
22331+/* ---------------------------------------------------------------------- */
22332+
e49829fe
JR
22333+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
22334+int si_read_lock(struct super_block *sb, int flags)
22335+{
22336+ int err;
22337+
22338+ err = 0;
22339+ if (au_ftest_lock(flags, FLUSH))
22340+ au_nwt_flush(&au_sbi(sb)->si_nowait);
22341+
22342+ si_noflush_read_lock(sb);
22343+ err = au_plink_maint(sb, flags);
22344+ if (unlikely(err))
22345+ si_read_unlock(sb);
22346+
22347+ return err;
22348+}
22349+
22350+int si_write_lock(struct super_block *sb, int flags)
22351+{
22352+ int err;
22353+
22354+ if (au_ftest_lock(flags, FLUSH))
22355+ au_nwt_flush(&au_sbi(sb)->si_nowait);
22356+
22357+ si_noflush_write_lock(sb);
22358+ err = au_plink_maint(sb, flags);
22359+ if (unlikely(err))
22360+ si_write_unlock(sb);
22361+
22362+ return err;
22363+}
22364+
1facf9fc 22365+/* dentry and super_block lock. call at entry point */
e49829fe 22366+int aufs_read_lock(struct dentry *dentry, int flags)
1facf9fc 22367+{
e49829fe 22368+ int err;
027c5e7a 22369+ struct super_block *sb;
e49829fe 22370+
027c5e7a
AM
22371+ sb = dentry->d_sb;
22372+ err = si_read_lock(sb, flags);
22373+ if (unlikely(err))
22374+ goto out;
22375+
22376+ if (au_ftest_lock(flags, DW))
22377+ di_write_lock_child(dentry);
22378+ else
22379+ di_read_lock_child(dentry, flags);
22380+
22381+ if (au_ftest_lock(flags, GEN)) {
22382+ err = au_digen_test(dentry, au_sigen(sb));
22383+ AuDebugOn(!err && au_dbrange_test(dentry));
22384+ if (unlikely(err))
22385+ aufs_read_unlock(dentry, flags);
e49829fe
JR
22386+ }
22387+
027c5e7a 22388+out:
e49829fe 22389+ return err;
1facf9fc 22390+}
22391+
22392+void aufs_read_unlock(struct dentry *dentry, int flags)
22393+{
22394+ if (au_ftest_lock(flags, DW))
22395+ di_write_unlock(dentry);
22396+ else
22397+ di_read_unlock(dentry, flags);
22398+ si_read_unlock(dentry->d_sb);
22399+}
22400+
22401+void aufs_write_lock(struct dentry *dentry)
22402+{
e49829fe 22403+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 22404+ di_write_lock_child(dentry);
22405+}
22406+
22407+void aufs_write_unlock(struct dentry *dentry)
22408+{
22409+ di_write_unlock(dentry);
22410+ si_write_unlock(dentry->d_sb);
22411+}
22412+
e49829fe 22413+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
1facf9fc 22414+{
e49829fe 22415+ int err;
027c5e7a
AM
22416+ unsigned int sigen;
22417+ struct super_block *sb;
e49829fe 22418+
027c5e7a
AM
22419+ sb = d1->d_sb;
22420+ err = si_read_lock(sb, flags);
22421+ if (unlikely(err))
22422+ goto out;
22423+
22424+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIR));
22425+
22426+ if (au_ftest_lock(flags, GEN)) {
22427+ sigen = au_sigen(sb);
22428+ err = au_digen_test(d1, sigen);
22429+ AuDebugOn(!err && au_dbrange_test(d1));
22430+ if (!err) {
22431+ err = au_digen_test(d2, sigen);
22432+ AuDebugOn(!err && au_dbrange_test(d2));
22433+ }
22434+ if (unlikely(err))
22435+ aufs_read_and_write_unlock2(d1, d2);
22436+ }
22437+
22438+out:
e49829fe 22439+ return err;
1facf9fc 22440+}
22441+
22442+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
22443+{
22444+ di_write_unlock2(d1, d2);
22445+ si_read_unlock(d1->d_sb);
22446+}
b752ccd1
AM
22447+
22448+/* ---------------------------------------------------------------------- */
22449+
22450+int si_pid_test_slow(struct super_block *sb)
22451+{
22452+ void *p;
22453+
22454+ rcu_read_lock();
22455+ p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid);
22456+ rcu_read_unlock();
22457+
027c5e7a 22458+ return (long)!!p;
b752ccd1
AM
22459+}
22460+
22461+void si_pid_set_slow(struct super_block *sb)
22462+{
22463+ int err;
22464+ struct au_sbinfo *sbinfo;
22465+
22466+ AuDebugOn(si_pid_test_slow(sb));
22467+
22468+ sbinfo = au_sbi(sb);
22469+ err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
22470+ AuDebugOn(err);
22471+ spin_lock(&sbinfo->au_si_pid.tree_lock);
22472+ err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid,
027c5e7a 22473+ /*any valid ptr*/sb);
b752ccd1
AM
22474+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
22475+ AuDebugOn(err);
22476+ radix_tree_preload_end();
22477+}
22478+
22479+void si_pid_clr_slow(struct super_block *sb)
22480+{
22481+ void *p;
22482+ struct au_sbinfo *sbinfo;
22483+
22484+ AuDebugOn(!si_pid_test_slow(sb));
22485+
22486+ sbinfo = au_sbi(sb);
22487+ spin_lock(&sbinfo->au_si_pid.tree_lock);
22488+ p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
22489+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
b752ccd1 22490+}
7f207e10
AM
22491diff -urN /usr/share/empty/fs/aufs/spl.h linux/fs/aufs/spl.h
22492--- /usr/share/empty/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
53392da6 22493+++ linux/fs/aufs/spl.h 2011-08-24 13:30:24.734646739 +0200
4a4d8108 22494@@ -0,0 +1,66 @@
1facf9fc 22495+/*
027c5e7a 22496+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 22497+ *
22498+ * This program, aufs is free software; you can redistribute it and/or modify
22499+ * it under the terms of the GNU General Public License as published by
22500+ * the Free Software Foundation; either version 2 of the License, or
22501+ * (at your option) any later version.
dece6358
AM
22502+ *
22503+ * This program is distributed in the hope that it will be useful,
22504+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22505+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22506+ * GNU General Public License for more details.
22507+ *
22508+ * You should have received a copy of the GNU General Public License
22509+ * along with this program; if not, write to the Free Software
22510+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22511+ */
22512+
22513+/*
22514+ * simple list protected by a spinlock
22515+ */
22516+
22517+#ifndef __AUFS_SPL_H__
22518+#define __AUFS_SPL_H__
22519+
22520+#ifdef __KERNEL__
22521+
dece6358
AM
22522+#include <linux/spinlock.h>
22523+#include <linux/list.h>
4a4d8108 22524+#include <linux/rculist.h>
1facf9fc 22525+
22526+struct au_splhead {
22527+ spinlock_t spin;
22528+ struct list_head head;
22529+};
22530+
22531+static inline void au_spl_init(struct au_splhead *spl)
22532+{
22533+ spin_lock_init(&spl->spin);
22534+ INIT_LIST_HEAD(&spl->head);
22535+}
22536+
22537+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
22538+{
22539+ spin_lock(&spl->spin);
22540+ list_add(list, &spl->head);
22541+ spin_unlock(&spl->spin);
22542+}
22543+
22544+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
22545+{
22546+ spin_lock(&spl->spin);
22547+ list_del(list);
22548+ spin_unlock(&spl->spin);
22549+}
22550+
4a4d8108
AM
22551+static inline void au_spl_del_rcu(struct list_head *list,
22552+ struct au_splhead *spl)
22553+{
22554+ spin_lock(&spl->spin);
22555+ list_del_rcu(list);
22556+ spin_unlock(&spl->spin);
22557+}
22558+
1facf9fc 22559+#endif /* __KERNEL__ */
22560+#endif /* __AUFS_SPL_H__ */
7f207e10
AM
22561diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
22562--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
53392da6 22563+++ linux/fs/aufs/super.c 2011-08-24 13:30:24.734646739 +0200
2cbb1c4b 22564@@ -0,0 +1,930 @@
1facf9fc 22565+/*
027c5e7a 22566+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 22567+ *
22568+ * This program, aufs is free software; you can redistribute it and/or modify
22569+ * it under the terms of the GNU General Public License as published by
22570+ * the Free Software Foundation; either version 2 of the License, or
22571+ * (at your option) any later version.
dece6358
AM
22572+ *
22573+ * This program is distributed in the hope that it will be useful,
22574+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22575+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22576+ * GNU General Public License for more details.
22577+ *
22578+ * You should have received a copy of the GNU General Public License
22579+ * along with this program; if not, write to the Free Software
22580+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22581+ */
22582+
22583+/*
22584+ * mount and super_block operations
22585+ */
22586+
22587+#include <linux/buffer_head.h>
e49829fe 22588+#include <linux/jiffies.h>
dece6358 22589+#include <linux/module.h>
1facf9fc 22590+#include <linux/seq_file.h>
22591+#include <linux/statfs.h>
7f207e10
AM
22592+#include <linux/vmalloc.h>
22593+#include <linux/writeback.h>
1facf9fc 22594+#include "aufs.h"
22595+
22596+/*
22597+ * super_operations
22598+ */
22599+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
22600+{
22601+ struct au_icntnr *c;
22602+
22603+ c = au_cache_alloc_icntnr();
22604+ if (c) {
027c5e7a 22605+ au_icntnr_init(c);
1facf9fc 22606+ c->vfs_inode.i_version = 1; /* sigen(sb); */
22607+ c->iinfo.ii_hinode = NULL;
22608+ return &c->vfs_inode;
22609+ }
22610+ return NULL;
22611+}
22612+
027c5e7a
AM
22613+static void aufs_destroy_inode_cb(struct rcu_head *head)
22614+{
22615+ struct inode *inode = container_of(head, struct inode, i_rcu);
22616+
22617+ INIT_LIST_HEAD(&inode->i_dentry);
22618+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
22619+}
22620+
1facf9fc 22621+static void aufs_destroy_inode(struct inode *inode)
22622+{
22623+ au_iinfo_fin(inode);
027c5e7a 22624+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
1facf9fc 22625+}
22626+
22627+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
22628+{
22629+ struct inode *inode;
22630+ int err;
22631+
22632+ inode = iget_locked(sb, ino);
22633+ if (unlikely(!inode)) {
22634+ inode = ERR_PTR(-ENOMEM);
22635+ goto out;
22636+ }
22637+ if (!(inode->i_state & I_NEW))
22638+ goto out;
22639+
22640+ err = au_xigen_new(inode);
22641+ if (!err)
22642+ err = au_iinfo_init(inode);
22643+ if (!err)
22644+ inode->i_version++;
22645+ else {
22646+ iget_failed(inode);
22647+ inode = ERR_PTR(err);
22648+ }
22649+
4f0767ce 22650+out:
1facf9fc 22651+ /* never return NULL */
22652+ AuDebugOn(!inode);
22653+ AuTraceErrPtr(inode);
22654+ return inode;
22655+}
22656+
22657+/* lock free root dinfo */
22658+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
22659+{
22660+ int err;
22661+ aufs_bindex_t bindex, bend;
22662+ struct path path;
4a4d8108 22663+ struct au_hdentry *hdp;
1facf9fc 22664+ struct au_branch *br;
22665+
22666+ err = 0;
22667+ bend = au_sbend(sb);
4a4d8108 22668+ hdp = au_di(sb->s_root)->di_hdentry;
1facf9fc 22669+ for (bindex = 0; !err && bindex <= bend; bindex++) {
22670+ br = au_sbr(sb, bindex);
22671+ path.mnt = br->br_mnt;
4a4d8108 22672+ path.dentry = hdp[bindex].hd_dentry;
1facf9fc 22673+ err = au_seq_path(seq, &path);
22674+ if (err > 0)
22675+ err = seq_printf(seq, "=%s",
22676+ au_optstr_br_perm(br->br_perm));
22677+ if (!err && bindex != bend)
22678+ err = seq_putc(seq, ':');
22679+ }
22680+
22681+ return err;
22682+}
22683+
22684+static void au_show_wbr_create(struct seq_file *m, int v,
22685+ struct au_sbinfo *sbinfo)
22686+{
22687+ const char *pat;
22688+
dece6358
AM
22689+ AuRwMustAnyLock(&sbinfo->si_rwsem);
22690+
1facf9fc 22691+ seq_printf(m, ",create=");
22692+ pat = au_optstr_wbr_create(v);
22693+ switch (v) {
22694+ case AuWbrCreate_TDP:
22695+ case AuWbrCreate_RR:
22696+ case AuWbrCreate_MFS:
22697+ case AuWbrCreate_PMFS:
22698+ seq_printf(m, pat);
22699+ break;
22700+ case AuWbrCreate_MFSV:
22701+ seq_printf(m, /*pat*/"mfs:%lu",
e49829fe
JR
22702+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22703+ / MSEC_PER_SEC);
1facf9fc 22704+ break;
22705+ case AuWbrCreate_PMFSV:
22706+ seq_printf(m, /*pat*/"pmfs:%lu",
e49829fe
JR
22707+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22708+ / MSEC_PER_SEC);
1facf9fc 22709+ break;
22710+ case AuWbrCreate_MFSRR:
22711+ seq_printf(m, /*pat*/"mfsrr:%llu",
22712+ sbinfo->si_wbr_mfs.mfsrr_watermark);
22713+ break;
22714+ case AuWbrCreate_MFSRRV:
22715+ seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
22716+ sbinfo->si_wbr_mfs.mfsrr_watermark,
e49829fe
JR
22717+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22718+ / MSEC_PER_SEC);
1facf9fc 22719+ break;
22720+ }
22721+}
22722+
22723+static int au_show_xino(struct seq_file *seq, struct vfsmount *mnt)
22724+{
22725+#ifdef CONFIG_SYSFS
22726+ return 0;
22727+#else
22728+ int err;
22729+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
22730+ aufs_bindex_t bindex, brid;
22731+ struct super_block *sb;
22732+ struct qstr *name;
22733+ struct file *f;
22734+ struct dentry *d, *h_root;
4a4d8108 22735+ struct au_hdentry *hdp;
1facf9fc 22736+
dece6358
AM
22737+ AuRwMustAnyLock(&sbinfo->si_rwsem);
22738+
1facf9fc 22739+ err = 0;
22740+ sb = mnt->mnt_sb;
22741+ f = au_sbi(sb)->si_xib;
22742+ if (!f)
22743+ goto out;
22744+
22745+ /* stop printing the default xino path on the first writable branch */
22746+ h_root = NULL;
22747+ brid = au_xino_brid(sb);
22748+ if (brid >= 0) {
22749+ bindex = au_br_index(sb, brid);
4a4d8108
AM
22750+ hdp = au_di(sb->s_root)->di_hdentry;
22751+ h_root = hdp[0 + bindex].hd_dentry;
1facf9fc 22752+ }
22753+ d = f->f_dentry;
22754+ name = &d->d_name;
22755+ /* safe ->d_parent because the file is unlinked */
22756+ if (d->d_parent == h_root
22757+ && name->len == len
22758+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
22759+ goto out;
22760+
22761+ seq_puts(seq, ",xino=");
22762+ err = au_xino_path(seq, f);
22763+
4f0767ce 22764+out:
1facf9fc 22765+ return err;
22766+#endif
22767+}
22768+
22769+/* seq_file will re-call me in case of too long string */
22770+static int aufs_show_options(struct seq_file *m, struct vfsmount *mnt)
22771+{
027c5e7a 22772+ int err;
1facf9fc 22773+ unsigned int mnt_flags, v;
22774+ struct super_block *sb;
22775+ struct au_sbinfo *sbinfo;
22776+
22777+#define AuBool(name, str) do { \
22778+ v = au_opt_test(mnt_flags, name); \
22779+ if (v != au_opt_test(AuOpt_Def, name)) \
22780+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
22781+} while (0)
22782+
22783+#define AuStr(name, str) do { \
22784+ v = mnt_flags & AuOptMask_##name; \
22785+ if (v != (AuOpt_Def & AuOptMask_##name)) \
22786+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
22787+} while (0)
22788+
22789+#define AuUInt(name, str, val) do { \
22790+ if (val != AUFS_##name##_DEF) \
22791+ seq_printf(m, "," #str "=%u", val); \
22792+} while (0)
22793+
22794+ /* lock free root dinfo */
22795+ sb = mnt->mnt_sb;
22796+ si_noflush_read_lock(sb);
22797+ sbinfo = au_sbi(sb);
22798+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
22799+
22800+ mnt_flags = au_mntflags(sb);
22801+ if (au_opt_test(mnt_flags, XINO)) {
22802+ err = au_show_xino(m, mnt);
22803+ if (unlikely(err))
22804+ goto out;
22805+ } else
22806+ seq_puts(m, ",noxino");
22807+
22808+ AuBool(TRUNC_XINO, trunc_xino);
22809+ AuStr(UDBA, udba);
dece6358 22810+ AuBool(SHWH, shwh);
1facf9fc 22811+ AuBool(PLINK, plink);
4a4d8108 22812+ AuBool(DIO, dio);
1facf9fc 22813+ /* AuBool(DIRPERM1, dirperm1); */
22814+ /* AuBool(REFROF, refrof); */
22815+
22816+ v = sbinfo->si_wbr_create;
22817+ if (v != AuWbrCreate_Def)
22818+ au_show_wbr_create(m, v, sbinfo);
22819+
22820+ v = sbinfo->si_wbr_copyup;
22821+ if (v != AuWbrCopyup_Def)
22822+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
22823+
22824+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
22825+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
22826+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
22827+
22828+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
22829+
027c5e7a
AM
22830+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
22831+ AuUInt(RDCACHE, rdcache, v);
1facf9fc 22832+
22833+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
22834+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
22835+
22836+ AuBool(SUM, sum);
22837+ /* AuBool(SUM_W, wsum); */
22838+ AuBool(WARN_PERM, warn_perm);
22839+ AuBool(VERBOSE, verbose);
22840+
4f0767ce 22841+out:
1facf9fc 22842+ /* be sure to print "br:" last */
22843+ if (!sysaufs_brs) {
22844+ seq_puts(m, ",br:");
22845+ au_show_brs(m, sb);
22846+ }
22847+ si_read_unlock(sb);
22848+ return 0;
22849+
1facf9fc 22850+#undef AuBool
22851+#undef AuStr
4a4d8108 22852+#undef AuUInt
1facf9fc 22853+}
22854+
22855+/* ---------------------------------------------------------------------- */
22856+
22857+/* sum mode which returns the summation for statfs(2) */
22858+
22859+static u64 au_add_till_max(u64 a, u64 b)
22860+{
22861+ u64 old;
22862+
22863+ old = a;
22864+ a += b;
22865+ if (old < a)
22866+ return a;
22867+ return ULLONG_MAX;
22868+}
22869+
22870+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
22871+{
22872+ int err;
22873+ u64 blocks, bfree, bavail, files, ffree;
22874+ aufs_bindex_t bend, bindex, i;
22875+ unsigned char shared;
7f207e10 22876+ struct path h_path;
1facf9fc 22877+ struct super_block *h_sb;
22878+
22879+ blocks = 0;
22880+ bfree = 0;
22881+ bavail = 0;
22882+ files = 0;
22883+ ffree = 0;
22884+
22885+ err = 0;
22886+ bend = au_sbend(sb);
22887+ for (bindex = bend; bindex >= 0; bindex--) {
7f207e10
AM
22888+ h_path.mnt = au_sbr_mnt(sb, bindex);
22889+ h_sb = h_path.mnt->mnt_sb;
1facf9fc 22890+ shared = 0;
22891+ for (i = bindex + 1; !shared && i <= bend; i++)
22892+ shared = (au_sbr_sb(sb, i) == h_sb);
22893+ if (shared)
22894+ continue;
22895+
22896+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
22897+ h_path.dentry = h_path.mnt->mnt_root;
22898+ err = vfs_statfs(&h_path, buf);
1facf9fc 22899+ if (unlikely(err))
22900+ goto out;
22901+
22902+ blocks = au_add_till_max(blocks, buf->f_blocks);
22903+ bfree = au_add_till_max(bfree, buf->f_bfree);
22904+ bavail = au_add_till_max(bavail, buf->f_bavail);
22905+ files = au_add_till_max(files, buf->f_files);
22906+ ffree = au_add_till_max(ffree, buf->f_ffree);
22907+ }
22908+
22909+ buf->f_blocks = blocks;
22910+ buf->f_bfree = bfree;
22911+ buf->f_bavail = bavail;
22912+ buf->f_files = files;
22913+ buf->f_ffree = ffree;
22914+
4f0767ce 22915+out:
1facf9fc 22916+ return err;
22917+}
22918+
22919+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
22920+{
22921+ int err;
7f207e10 22922+ struct path h_path;
1facf9fc 22923+ struct super_block *sb;
22924+
22925+ /* lock free root dinfo */
22926+ sb = dentry->d_sb;
22927+ si_noflush_read_lock(sb);
7f207e10 22928+ if (!au_opt_test(au_mntflags(sb), SUM)) {
1facf9fc 22929+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
22930+ h_path.mnt = au_sbr_mnt(sb, 0);
22931+ h_path.dentry = h_path.mnt->mnt_root;
22932+ err = vfs_statfs(&h_path, buf);
22933+ } else
1facf9fc 22934+ err = au_statfs_sum(sb, buf);
22935+ si_read_unlock(sb);
22936+
22937+ if (!err) {
22938+ buf->f_type = AUFS_SUPER_MAGIC;
4a4d8108 22939+ buf->f_namelen = AUFS_MAX_NAMELEN;
1facf9fc 22940+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
22941+ }
22942+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
22943+
22944+ return err;
22945+}
22946+
22947+/* ---------------------------------------------------------------------- */
22948+
1facf9fc 22949+/* final actions when unmounting a file system */
22950+static void aufs_put_super(struct super_block *sb)
22951+{
22952+ struct au_sbinfo *sbinfo;
22953+
22954+ sbinfo = au_sbi(sb);
22955+ if (!sbinfo)
22956+ return;
22957+
1facf9fc 22958+ dbgaufs_si_fin(sbinfo);
22959+ kobject_put(&sbinfo->si_kobj);
22960+}
22961+
22962+/* ---------------------------------------------------------------------- */
22963+
7f207e10
AM
22964+void au_array_free(void *array)
22965+{
22966+ if (array) {
22967+ if (!is_vmalloc_addr(array))
22968+ kfree(array);
22969+ else
22970+ vfree(array);
22971+ }
22972+}
22973+
22974+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg)
22975+{
22976+ void *array;
22977+ unsigned long long n;
22978+
22979+ array = NULL;
22980+ n = 0;
22981+ if (!*hint)
22982+ goto out;
22983+
22984+ if (*hint > ULLONG_MAX / sizeof(array)) {
22985+ array = ERR_PTR(-EMFILE);
22986+ pr_err("hint %llu\n", *hint);
22987+ goto out;
22988+ }
22989+
22990+ array = kmalloc(sizeof(array) * *hint, GFP_NOFS);
22991+ if (unlikely(!array))
22992+ array = vmalloc(sizeof(array) * *hint);
22993+ if (unlikely(!array)) {
22994+ array = ERR_PTR(-ENOMEM);
22995+ goto out;
22996+ }
22997+
22998+ n = cb(array, *hint, arg);
22999+ AuDebugOn(n > *hint);
23000+
23001+out:
23002+ *hint = n;
23003+ return array;
23004+}
23005+
23006+static unsigned long long au_iarray_cb(void *a,
23007+ unsigned long long max __maybe_unused,
23008+ void *arg)
23009+{
23010+ unsigned long long n;
23011+ struct inode **p, *inode;
23012+ struct list_head *head;
23013+
23014+ n = 0;
23015+ p = a;
23016+ head = arg;
2cbb1c4b 23017+ spin_lock(&inode_sb_list_lock);
7f207e10
AM
23018+ list_for_each_entry(inode, head, i_sb_list) {
23019+ if (!is_bad_inode(inode)
23020+ && au_ii(inode)->ii_bstart >= 0) {
2cbb1c4b
JR
23021+ spin_lock(&inode->i_lock);
23022+ if (atomic_read(&inode->i_count)) {
23023+ au_igrab(inode);
23024+ *p++ = inode;
23025+ n++;
23026+ AuDebugOn(n > max);
23027+ }
23028+ spin_unlock(&inode->i_lock);
7f207e10
AM
23029+ }
23030+ }
2cbb1c4b 23031+ spin_unlock(&inode_sb_list_lock);
7f207e10
AM
23032+
23033+ return n;
23034+}
23035+
23036+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
23037+{
23038+ *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
23039+ return au_array_alloc(max, au_iarray_cb, &sb->s_inodes);
23040+}
23041+
23042+void au_iarray_free(struct inode **a, unsigned long long max)
23043+{
23044+ unsigned long long ull;
23045+
23046+ for (ull = 0; ull < max; ull++)
23047+ iput(a[ull]);
23048+ au_array_free(a);
23049+}
23050+
23051+/* ---------------------------------------------------------------------- */
23052+
1facf9fc 23053+/*
23054+ * refresh dentry and inode at remount time.
23055+ */
027c5e7a
AM
23056+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
23057+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
23058+ struct dentry *parent)
1facf9fc 23059+{
23060+ int err;
1facf9fc 23061+
23062+ di_write_lock_child(dentry);
1facf9fc 23063+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
23064+ err = au_refresh_dentry(dentry, parent);
23065+ if (!err && dir_flags)
23066+ au_hn_reset(dentry->d_inode, dir_flags);
1facf9fc 23067+ di_read_unlock(parent, AuLock_IR);
1facf9fc 23068+ di_write_unlock(dentry);
23069+
23070+ return err;
23071+}
23072+
027c5e7a
AM
23073+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
23074+ struct au_sbinfo *sbinfo,
23075+ const unsigned int dir_flags)
1facf9fc 23076+{
027c5e7a
AM
23077+ int err;
23078+ struct dentry *parent;
23079+ struct inode *inode;
23080+
23081+ err = 0;
23082+ parent = dget_parent(dentry);
23083+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
23084+ inode = dentry->d_inode;
23085+ if (inode) {
23086+ if (!S_ISDIR(inode->i_mode))
23087+ err = au_do_refresh(dentry, /*dir_flags*/0,
23088+ parent);
23089+ else {
23090+ err = au_do_refresh(dentry, dir_flags, parent);
23091+ if (unlikely(err))
23092+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
23093+ }
23094+ } else
23095+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
23096+ AuDbgDentry(dentry);
23097+ }
23098+ dput(parent);
23099+
23100+ AuTraceErr(err);
23101+ return err;
1facf9fc 23102+}
23103+
027c5e7a 23104+static int au_refresh_d(struct super_block *sb)
1facf9fc 23105+{
23106+ int err, i, j, ndentry, e;
027c5e7a 23107+ unsigned int sigen;
1facf9fc 23108+ struct au_dcsub_pages dpages;
23109+ struct au_dpage *dpage;
027c5e7a
AM
23110+ struct dentry **dentries, *d;
23111+ struct au_sbinfo *sbinfo;
23112+ struct dentry *root = sb->s_root;
23113+ const unsigned int dir_flags = au_hi_flags(root->d_inode, /*isdir*/1);
1facf9fc 23114+
027c5e7a
AM
23115+ err = au_dpages_init(&dpages, GFP_NOFS);
23116+ if (unlikely(err))
1facf9fc 23117+ goto out;
027c5e7a
AM
23118+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
23119+ if (unlikely(err))
1facf9fc 23120+ goto out_dpages;
1facf9fc 23121+
027c5e7a
AM
23122+ sigen = au_sigen(sb);
23123+ sbinfo = au_sbi(sb);
23124+ for (i = 0; i < dpages.ndpage; i++) {
1facf9fc 23125+ dpage = dpages.dpages + i;
23126+ dentries = dpage->dentries;
23127+ ndentry = dpage->ndentry;
027c5e7a 23128+ for (j = 0; j < ndentry; j++) {
1facf9fc 23129+ d = dentries[j];
027c5e7a
AM
23130+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags);
23131+ if (unlikely(e && !err))
23132+ err = e;
23133+ /* go on even err */
1facf9fc 23134+ }
23135+ }
23136+
4f0767ce 23137+out_dpages:
1facf9fc 23138+ au_dpages_free(&dpages);
4f0767ce 23139+out:
1facf9fc 23140+ return err;
23141+}
23142+
027c5e7a 23143+static int au_refresh_i(struct super_block *sb)
1facf9fc 23144+{
027c5e7a
AM
23145+ int err, e;
23146+ unsigned int sigen;
23147+ unsigned long long max, ull;
23148+ struct inode *inode, **array;
1facf9fc 23149+
027c5e7a
AM
23150+ array = au_iarray_alloc(sb, &max);
23151+ err = PTR_ERR(array);
23152+ if (IS_ERR(array))
23153+ goto out;
1facf9fc 23154+
23155+ err = 0;
027c5e7a
AM
23156+ sigen = au_sigen(sb);
23157+ for (ull = 0; ull < max; ull++) {
23158+ inode = array[ull];
23159+ if (au_iigen(inode) != sigen) {
1facf9fc 23160+ ii_write_lock_child(inode);
027c5e7a 23161+ e = au_refresh_hinode_self(inode);
1facf9fc 23162+ ii_write_unlock(inode);
23163+ if (unlikely(e)) {
027c5e7a 23164+ pr_err("error %d, i%lu\n", e, inode->i_ino);
1facf9fc 23165+ if (!err)
23166+ err = e;
23167+ /* go on even if err */
23168+ }
23169+ }
1facf9fc 23170+ }
23171+
027c5e7a 23172+ au_iarray_free(array, max);
1facf9fc 23173+
4f0767ce 23174+out:
1facf9fc 23175+ return err;
23176+}
23177+
027c5e7a 23178+static void au_remount_refresh(struct super_block *sb)
1facf9fc 23179+{
027c5e7a
AM
23180+ int err, e;
23181+ unsigned int udba;
23182+ aufs_bindex_t bindex, bend;
1facf9fc 23183+ struct dentry *root;
23184+ struct inode *inode;
027c5e7a 23185+ struct au_branch *br;
1facf9fc 23186+
23187+ au_sigen_inc(sb);
027c5e7a 23188+ au_fclr_si(au_sbi(sb), FAILED_REFRESH_DIR);
1facf9fc 23189+
23190+ root = sb->s_root;
23191+ DiMustNoWaiters(root);
23192+ inode = root->d_inode;
23193+ IiMustNoWaiters(inode);
1facf9fc 23194+
027c5e7a
AM
23195+ udba = au_opt_udba(sb);
23196+ bend = au_sbend(sb);
23197+ for (bindex = 0; bindex <= bend; bindex++) {
23198+ br = au_sbr(sb, bindex);
23199+ err = au_hnotify_reset_br(udba, br, br->br_perm);
1facf9fc 23200+ if (unlikely(err))
027c5e7a
AM
23201+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
23202+ bindex, err);
23203+ /* go on even if err */
1facf9fc 23204+ }
027c5e7a 23205+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
1facf9fc 23206+
027c5e7a
AM
23207+ di_write_unlock(root);
23208+ err = au_refresh_d(sb);
23209+ e = au_refresh_i(sb);
23210+ if (unlikely(e && !err))
23211+ err = e;
1facf9fc 23212+ /* aufs_write_lock() calls ..._child() */
23213+ di_write_lock_child(root);
027c5e7a
AM
23214+
23215+ au_cpup_attr_all(inode, /*force*/1);
23216+
23217+ if (unlikely(err))
23218+ AuIOErr("refresh failed, ignored, %d\n", err);
1facf9fc 23219+}
23220+
23221+/* stop extra interpretation of errno in mount(8), and strange error messages */
23222+static int cvt_err(int err)
23223+{
23224+ AuTraceErr(err);
23225+
23226+ switch (err) {
23227+ case -ENOENT:
23228+ case -ENOTDIR:
23229+ case -EEXIST:
23230+ case -EIO:
23231+ err = -EINVAL;
23232+ }
23233+ return err;
23234+}
23235+
23236+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
23237+{
4a4d8108
AM
23238+ int err, do_dx;
23239+ unsigned int mntflags;
1facf9fc 23240+ struct au_opts opts;
23241+ struct dentry *root;
23242+ struct inode *inode;
23243+ struct au_sbinfo *sbinfo;
23244+
23245+ err = 0;
23246+ root = sb->s_root;
23247+ if (!data || !*data) {
e49829fe
JR
23248+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
23249+ if (!err) {
23250+ di_write_lock_child(root);
23251+ err = au_opts_verify(sb, *flags, /*pending*/0);
23252+ aufs_write_unlock(root);
23253+ }
1facf9fc 23254+ goto out;
23255+ }
23256+
23257+ err = -ENOMEM;
23258+ memset(&opts, 0, sizeof(opts));
23259+ opts.opt = (void *)__get_free_page(GFP_NOFS);
23260+ if (unlikely(!opts.opt))
23261+ goto out;
23262+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
23263+ opts.flags = AuOpts_REMOUNT;
23264+ opts.sb_flags = *flags;
23265+
23266+ /* parse it before aufs lock */
23267+ err = au_opts_parse(sb, data, &opts);
23268+ if (unlikely(err))
23269+ goto out_opts;
23270+
23271+ sbinfo = au_sbi(sb);
23272+ inode = root->d_inode;
23273+ mutex_lock(&inode->i_mutex);
e49829fe
JR
23274+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
23275+ if (unlikely(err))
23276+ goto out_mtx;
23277+ di_write_lock_child(root);
1facf9fc 23278+
23279+ /* au_opts_remount() may return an error */
23280+ err = au_opts_remount(sb, &opts);
23281+ au_opts_free(&opts);
23282+
027c5e7a
AM
23283+ if (au_ftest_opts(opts.flags, REFRESH))
23284+ au_remount_refresh(sb);
1facf9fc 23285+
4a4d8108
AM
23286+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
23287+ mntflags = au_mntflags(sb);
23288+ do_dx = !!au_opt_test(mntflags, DIO);
23289+ au_dy_arefresh(do_dx);
23290+ }
23291+
1facf9fc 23292+ aufs_write_unlock(root);
953406b4 23293+
e49829fe
JR
23294+out_mtx:
23295+ mutex_unlock(&inode->i_mutex);
4f0767ce 23296+out_opts:
1facf9fc 23297+ free_page((unsigned long)opts.opt);
4f0767ce 23298+out:
1facf9fc 23299+ err = cvt_err(err);
23300+ AuTraceErr(err);
23301+ return err;
23302+}
23303+
4a4d8108 23304+static const struct super_operations aufs_sop = {
1facf9fc 23305+ .alloc_inode = aufs_alloc_inode,
23306+ .destroy_inode = aufs_destroy_inode,
b752ccd1 23307+ /* always deleting, no clearing */
1facf9fc 23308+ .drop_inode = generic_delete_inode,
23309+ .show_options = aufs_show_options,
23310+ .statfs = aufs_statfs,
23311+ .put_super = aufs_put_super,
23312+ .remount_fs = aufs_remount_fs
23313+};
23314+
23315+/* ---------------------------------------------------------------------- */
23316+
23317+static int alloc_root(struct super_block *sb)
23318+{
23319+ int err;
23320+ struct inode *inode;
23321+ struct dentry *root;
23322+
23323+ err = -ENOMEM;
23324+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
23325+ err = PTR_ERR(inode);
23326+ if (IS_ERR(inode))
23327+ goto out;
23328+
23329+ inode->i_op = &aufs_dir_iop;
23330+ inode->i_fop = &aufs_dir_fop;
23331+ inode->i_mode = S_IFDIR;
23332+ inode->i_nlink = 2;
23333+ unlock_new_inode(inode);
23334+
23335+ root = d_alloc_root(inode);
23336+ if (unlikely(!root))
23337+ goto out_iput;
23338+ err = PTR_ERR(root);
23339+ if (IS_ERR(root))
23340+ goto out_iput;
23341+
4a4d8108 23342+ err = au_di_init(root);
1facf9fc 23343+ if (!err) {
23344+ sb->s_root = root;
23345+ return 0; /* success */
23346+ }
23347+ dput(root);
23348+ goto out; /* do not iput */
23349+
4f0767ce 23350+out_iput:
1facf9fc 23351+ iget_failed(inode);
4f0767ce 23352+out:
1facf9fc 23353+ return err;
23354+
23355+}
23356+
23357+static int aufs_fill_super(struct super_block *sb, void *raw_data,
23358+ int silent __maybe_unused)
23359+{
23360+ int err;
23361+ struct au_opts opts;
23362+ struct dentry *root;
23363+ struct inode *inode;
23364+ char *arg = raw_data;
23365+
23366+ if (unlikely(!arg || !*arg)) {
23367+ err = -EINVAL;
4a4d8108 23368+ pr_err("no arg\n");
1facf9fc 23369+ goto out;
23370+ }
23371+
23372+ err = -ENOMEM;
23373+ memset(&opts, 0, sizeof(opts));
23374+ opts.opt = (void *)__get_free_page(GFP_NOFS);
23375+ if (unlikely(!opts.opt))
23376+ goto out;
23377+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
23378+ opts.sb_flags = sb->s_flags;
23379+
23380+ err = au_si_alloc(sb);
23381+ if (unlikely(err))
23382+ goto out_opts;
23383+
23384+ /* all timestamps always follow the ones on the branch */
23385+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
23386+ sb->s_op = &aufs_sop;
027c5e7a 23387+ sb->s_d_op = &aufs_dop;
1facf9fc 23388+ sb->s_magic = AUFS_SUPER_MAGIC;
23389+ sb->s_maxbytes = 0;
23390+ au_export_init(sb);
23391+
23392+ err = alloc_root(sb);
23393+ if (unlikely(err)) {
23394+ si_write_unlock(sb);
23395+ goto out_info;
23396+ }
23397+ root = sb->s_root;
23398+ inode = root->d_inode;
23399+
23400+ /*
23401+ * actually we can parse options regardless aufs lock here.
23402+ * but at remount time, parsing must be done before aufs lock.
23403+ * so we follow the same rule.
23404+ */
23405+ ii_write_lock_parent(inode);
23406+ aufs_write_unlock(root);
23407+ err = au_opts_parse(sb, arg, &opts);
23408+ if (unlikely(err))
23409+ goto out_root;
23410+
23411+ /* lock vfs_inode first, then aufs. */
23412+ mutex_lock(&inode->i_mutex);
1facf9fc 23413+ aufs_write_lock(root);
23414+ err = au_opts_mount(sb, &opts);
23415+ au_opts_free(&opts);
1facf9fc 23416+ aufs_write_unlock(root);
23417+ mutex_unlock(&inode->i_mutex);
4a4d8108
AM
23418+ if (!err)
23419+ goto out_opts; /* success */
1facf9fc 23420+
4f0767ce 23421+out_root:
1facf9fc 23422+ dput(root);
23423+ sb->s_root = NULL;
4f0767ce 23424+out_info:
2cbb1c4b 23425+ dbgaufs_si_fin(au_sbi(sb));
1facf9fc 23426+ kobject_put(&au_sbi(sb)->si_kobj);
23427+ sb->s_fs_info = NULL;
4f0767ce 23428+out_opts:
1facf9fc 23429+ free_page((unsigned long)opts.opt);
4f0767ce 23430+out:
1facf9fc 23431+ AuTraceErr(err);
23432+ err = cvt_err(err);
23433+ AuTraceErr(err);
23434+ return err;
23435+}
23436+
23437+/* ---------------------------------------------------------------------- */
23438+
027c5e7a
AM
23439+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
23440+ const char *dev_name __maybe_unused,
23441+ void *raw_data)
1facf9fc 23442+{
027c5e7a 23443+ struct dentry *root;
1facf9fc 23444+ struct super_block *sb;
23445+
23446+ /* all timestamps always follow the ones on the branch */
23447+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
027c5e7a
AM
23448+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
23449+ if (IS_ERR(root))
23450+ goto out;
23451+
23452+ sb = root->d_sb;
23453+ si_write_lock(sb, !AuLock_FLUSH);
23454+ sysaufs_brs_add(sb, 0);
23455+ si_write_unlock(sb);
23456+ au_sbilist_add(sb);
23457+
23458+out:
23459+ return root;
1facf9fc 23460+}
23461+
e49829fe
JR
23462+static void aufs_kill_sb(struct super_block *sb)
23463+{
23464+ struct au_sbinfo *sbinfo;
23465+
23466+ sbinfo = au_sbi(sb);
23467+ if (sbinfo) {
23468+ au_sbilist_del(sb);
23469+ aufs_write_lock(sb->s_root);
23470+ if (sbinfo->si_wbr_create_ops->fin)
23471+ sbinfo->si_wbr_create_ops->fin(sb);
23472+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
23473+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
027c5e7a 23474+ au_remount_refresh(sb);
e49829fe
JR
23475+ }
23476+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
23477+ au_plink_put(sb, /*verbose*/1);
23478+ au_xino_clr(sb);
23479+ aufs_write_unlock(sb->s_root);
e49829fe
JR
23480+ au_nwt_flush(&sbinfo->si_nowait);
23481+ }
23482+ generic_shutdown_super(sb);
23483+}
23484+
1facf9fc 23485+struct file_system_type aufs_fs_type = {
23486+ .name = AUFS_FSTYPE,
23487+ .fs_flags =
23488+ FS_RENAME_DOES_D_MOVE /* a race between rename and others */
23489+ | FS_REVAL_DOT, /* for NFS branch and udba */
027c5e7a 23490+ .mount = aufs_mount,
e49829fe 23491+ .kill_sb = aufs_kill_sb,
1facf9fc 23492+ /* no need to __module_get() and module_put(). */
23493+ .owner = THIS_MODULE,
23494+};
7f207e10
AM
23495diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
23496--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
23497+++ linux/fs/aufs/super.h 2011-08-24 13:30:24.734646739 +0200
23498@@ -0,0 +1,547 @@
1facf9fc 23499+/*
027c5e7a 23500+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 23501+ *
23502+ * This program, aufs is free software; you can redistribute it and/or modify
23503+ * it under the terms of the GNU General Public License as published by
23504+ * the Free Software Foundation; either version 2 of the License, or
23505+ * (at your option) any later version.
dece6358
AM
23506+ *
23507+ * This program is distributed in the hope that it will be useful,
23508+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23509+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23510+ * GNU General Public License for more details.
23511+ *
23512+ * You should have received a copy of the GNU General Public License
23513+ * along with this program; if not, write to the Free Software
23514+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 23515+ */
23516+
23517+/*
23518+ * super_block operations
23519+ */
23520+
23521+#ifndef __AUFS_SUPER_H__
23522+#define __AUFS_SUPER_H__
23523+
23524+#ifdef __KERNEL__
23525+
23526+#include <linux/fs.h>
1facf9fc 23527+#include <linux/aufs_type.h>
23528+#include "rwsem.h"
23529+#include "spl.h"
23530+#include "wkq.h"
23531+
23532+typedef ssize_t (*au_readf_t)(struct file *, char __user *, size_t, loff_t *);
23533+typedef ssize_t (*au_writef_t)(struct file *, const char __user *, size_t,
23534+ loff_t *);
23535+
23536+/* policies to select one among multiple writable branches */
23537+struct au_wbr_copyup_operations {
23538+ int (*copyup)(struct dentry *dentry);
23539+};
23540+
23541+struct au_wbr_create_operations {
23542+ int (*create)(struct dentry *dentry, int isdir);
23543+ int (*init)(struct super_block *sb);
23544+ int (*fin)(struct super_block *sb);
23545+};
23546+
23547+struct au_wbr_mfs {
23548+ struct mutex mfs_lock; /* protect this structure */
23549+ unsigned long mfs_jiffy;
23550+ unsigned long mfs_expire;
23551+ aufs_bindex_t mfs_bindex;
23552+
23553+ unsigned long long mfsrr_bytes;
23554+ unsigned long long mfsrr_watermark;
23555+};
23556+
1facf9fc 23557+struct au_branch;
23558+struct au_sbinfo {
23559+ /* nowait tasks in the system-wide workqueue */
23560+ struct au_nowait_tasks si_nowait;
23561+
b752ccd1
AM
23562+ /*
23563+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
23564+ * rwsem for au_sbinfo is necessary.
23565+ */
dece6358 23566+ struct au_rwsem si_rwsem;
1facf9fc 23567+
b752ccd1
AM
23568+ /* prevent recursive locking in deleting inode */
23569+ struct {
23570+ unsigned long *bitmap;
23571+ spinlock_t tree_lock;
23572+ struct radix_tree_root tree;
23573+ } au_si_pid;
23574+
7f207e10
AM
23575+ /*
23576+ * dirty approach to protect sb->sb_inodes and ->s_files from remount.
23577+ */
23578+ atomic_long_t si_ninodes, si_nfiles;
23579+
1facf9fc 23580+ /* branch management */
23581+ unsigned int si_generation;
23582+
23583+ /* see above flags */
23584+ unsigned char au_si_status;
23585+
23586+ aufs_bindex_t si_bend;
7f207e10
AM
23587+
23588+ /* dirty trick to keep br_id plus */
23589+ unsigned int si_last_br_id :
23590+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
1facf9fc 23591+ struct au_branch **si_branch;
23592+
23593+ /* policy to select a writable branch */
23594+ unsigned char si_wbr_copyup;
23595+ unsigned char si_wbr_create;
23596+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
23597+ struct au_wbr_create_operations *si_wbr_create_ops;
23598+
23599+ /* round robin */
23600+ atomic_t si_wbr_rr_next;
23601+
23602+ /* most free space */
23603+ struct au_wbr_mfs si_wbr_mfs;
23604+
23605+ /* mount flags */
23606+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
23607+ unsigned int si_mntflags;
23608+
23609+ /* external inode number (bitmap and translation table) */
23610+ au_readf_t si_xread;
23611+ au_writef_t si_xwrite;
23612+ struct file *si_xib;
23613+ struct mutex si_xib_mtx; /* protect xib members */
23614+ unsigned long *si_xib_buf;
23615+ unsigned long si_xib_last_pindex;
23616+ int si_xib_next_bit;
23617+ aufs_bindex_t si_xino_brid;
23618+ /* reserved for future use */
23619+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
23620+
23621+#ifdef CONFIG_AUFS_EXPORT
23622+ /* i_generation */
23623+ struct file *si_xigen;
23624+ atomic_t si_xigen_next;
23625+#endif
23626+
23627+ /* vdir parameters */
e49829fe 23628+ unsigned long si_rdcache; /* max cache time in jiffies */
1facf9fc 23629+ unsigned int si_rdblk; /* deblk size */
23630+ unsigned int si_rdhash; /* hash size */
23631+
23632+ /*
23633+ * If the number of whiteouts are larger than si_dirwh, leave all of
23634+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
23635+ * future fsck.aufs or kernel thread will remove them later.
23636+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
23637+ */
23638+ unsigned int si_dirwh;
23639+
23640+ /*
23641+ * rename(2) a directory with all children.
23642+ */
23643+ /* reserved for future use */
23644+ /* int si_rendir; */
23645+
23646+ /* pseudo_link list */
23647+ struct au_splhead si_plink;
23648+ wait_queue_head_t si_plink_wq;
4a4d8108 23649+ spinlock_t si_plink_maint_lock;
e49829fe 23650+ pid_t si_plink_maint_pid;
1facf9fc 23651+
23652+ /*
23653+ * sysfs and lifetime management.
23654+ * this is not a small structure and it may be a waste of memory in case
23655+ * of sysfs is disabled, particulary when many aufs-es are mounted.
23656+ * but using sysfs is majority.
23657+ */
23658+ struct kobject si_kobj;
23659+#ifdef CONFIG_DEBUG_FS
23660+ struct dentry *si_dbgaufs, *si_dbgaufs_xib;
23661+#ifdef CONFIG_AUFS_EXPORT
23662+ struct dentry *si_dbgaufs_xigen;
23663+#endif
23664+#endif
23665+
e49829fe
JR
23666+#ifdef CONFIG_AUFS_SBILIST
23667+ struct list_head si_list;
23668+#endif
23669+
1facf9fc 23670+ /* dirty, necessary for unmounting, sysfs and sysrq */
23671+ struct super_block *si_sb;
23672+};
23673+
dece6358
AM
23674+/* sbinfo status flags */
23675+/*
23676+ * set true when refresh_dirs() failed at remount time.
23677+ * then try refreshing dirs at access time again.
23678+ * if it is false, refreshing dirs at access time is unnecesary
23679+ */
027c5e7a 23680+#define AuSi_FAILED_REFRESH_DIR 1
dece6358
AM
23681+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
23682+ unsigned int flag)
23683+{
23684+ AuRwMustAnyLock(&sbi->si_rwsem);
23685+ return sbi->au_si_status & flag;
23686+}
23687+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
23688+#define au_fset_si(sbinfo, name) do { \
23689+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
23690+ (sbinfo)->au_si_status |= AuSi_##name; \
23691+} while (0)
23692+#define au_fclr_si(sbinfo, name) do { \
23693+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
23694+ (sbinfo)->au_si_status &= ~AuSi_##name; \
23695+} while (0)
23696+
1facf9fc 23697+/* ---------------------------------------------------------------------- */
23698+
23699+/* policy to select one among writable branches */
4a4d8108
AM
23700+#define AuWbrCopyup(sbinfo, ...) \
23701+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
23702+#define AuWbrCreate(sbinfo, ...) \
23703+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
1facf9fc 23704+
23705+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
23706+#define AuLock_DW 1 /* write-lock dentry */
23707+#define AuLock_IR (1 << 1) /* read-lock inode */
23708+#define AuLock_IW (1 << 2) /* write-lock inode */
23709+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
23710+#define AuLock_DIR (1 << 4) /* target is a dir */
e49829fe
JR
23711+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
23712+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
027c5e7a 23713+#define AuLock_GEN (1 << 7) /* test digen/iigen */
1facf9fc 23714+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
7f207e10
AM
23715+#define au_fset_lock(flags, name) \
23716+ do { (flags) |= AuLock_##name; } while (0)
23717+#define au_fclr_lock(flags, name) \
23718+ do { (flags) &= ~AuLock_##name; } while (0)
1facf9fc 23719+
23720+/* ---------------------------------------------------------------------- */
23721+
23722+/* super.c */
23723+extern struct file_system_type aufs_fs_type;
23724+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
7f207e10
AM
23725+typedef unsigned long long (*au_arraycb_t)(void *array, unsigned long long max,
23726+ void *arg);
23727+void au_array_free(void *array);
23728+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg);
23729+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
23730+void au_iarray_free(struct inode **a, unsigned long long max);
1facf9fc 23731+
23732+/* sbinfo.c */
23733+void au_si_free(struct kobject *kobj);
23734+int au_si_alloc(struct super_block *sb);
23735+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
23736+
23737+unsigned int au_sigen_inc(struct super_block *sb);
23738+aufs_bindex_t au_new_br_id(struct super_block *sb);
23739+
e49829fe
JR
23740+int si_read_lock(struct super_block *sb, int flags);
23741+int si_write_lock(struct super_block *sb, int flags);
23742+int aufs_read_lock(struct dentry *dentry, int flags);
1facf9fc 23743+void aufs_read_unlock(struct dentry *dentry, int flags);
23744+void aufs_write_lock(struct dentry *dentry);
23745+void aufs_write_unlock(struct dentry *dentry);
e49829fe 23746+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
1facf9fc 23747+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
23748+
b752ccd1
AM
23749+int si_pid_test_slow(struct super_block *sb);
23750+void si_pid_set_slow(struct super_block *sb);
23751+void si_pid_clr_slow(struct super_block *sb);
23752+
1facf9fc 23753+/* wbr_policy.c */
23754+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
23755+extern struct au_wbr_create_operations au_wbr_create_ops[];
23756+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
23757+
23758+/* ---------------------------------------------------------------------- */
23759+
23760+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
23761+{
23762+ return sb->s_fs_info;
23763+}
23764+
23765+/* ---------------------------------------------------------------------- */
23766+
23767+#ifdef CONFIG_AUFS_EXPORT
23768+void au_export_init(struct super_block *sb);
23769+
b752ccd1 23770+static inline int au_test_nfsd(void)
1facf9fc 23771+{
b752ccd1
AM
23772+ struct task_struct *tsk = current;
23773+
23774+ return (tsk->flags & PF_KTHREAD)
23775+ && !strcmp(tsk->comm, "nfsd");
1facf9fc 23776+}
23777+
b752ccd1 23778+void au_xigen_inc(struct inode *inode);
1facf9fc 23779+int au_xigen_new(struct inode *inode);
23780+int au_xigen_set(struct super_block *sb, struct file *base);
23781+void au_xigen_clr(struct super_block *sb);
23782+
23783+static inline int au_busy_or_stale(void)
23784+{
b752ccd1 23785+ if (!au_test_nfsd())
1facf9fc 23786+ return -EBUSY;
23787+ return -ESTALE;
23788+}
23789+#else
4a4d8108 23790+AuStubVoid(au_export_init, struct super_block *sb)
b752ccd1
AM
23791+AuStubInt0(au_test_nfsd, void)
23792+AuStubVoid(au_xigen_inc, struct inode *inode)
4a4d8108
AM
23793+AuStubInt0(au_xigen_new, struct inode *inode)
23794+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
23795+AuStubVoid(au_xigen_clr, struct super_block *sb)
1facf9fc 23796+static inline int au_busy_or_stale(void)
23797+{
23798+ return -EBUSY;
23799+}
23800+#endif /* CONFIG_AUFS_EXPORT */
23801+
23802+/* ---------------------------------------------------------------------- */
23803+
e49829fe
JR
23804+#ifdef CONFIG_AUFS_SBILIST
23805+/* module.c */
23806+extern struct au_splhead au_sbilist;
23807+
23808+static inline void au_sbilist_init(void)
23809+{
23810+ au_spl_init(&au_sbilist);
23811+}
23812+
23813+static inline void au_sbilist_add(struct super_block *sb)
23814+{
23815+ au_spl_add(&au_sbi(sb)->si_list, &au_sbilist);
23816+}
23817+
23818+static inline void au_sbilist_del(struct super_block *sb)
23819+{
23820+ au_spl_del(&au_sbi(sb)->si_list, &au_sbilist);
23821+}
53392da6
AM
23822+
23823+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
23824+static inline void au_sbilist_lock(void)
23825+{
23826+ spin_lock(&au_sbilist.spin);
23827+}
23828+
23829+static inline void au_sbilist_unlock(void)
23830+{
23831+ spin_unlock(&au_sbilist.spin);
23832+}
23833+#define AuGFP_SBILIST GFP_ATOMIC
23834+#else
23835+AuStubVoid(au_sbilist_lock, void)
23836+AuStubVoid(au_sbilist_unlock, void)
23837+#define AuGFP_SBILIST GFP_NOFS
23838+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
e49829fe
JR
23839+#else
23840+AuStubVoid(au_sbilist_init, void)
23841+AuStubVoid(au_sbilist_add, struct super_block*)
23842+AuStubVoid(au_sbilist_del, struct super_block*)
53392da6
AM
23843+AuStubVoid(au_sbilist_lock, void)
23844+AuStubVoid(au_sbilist_unlock, void)
23845+#define AuGFP_SBILIST GFP_NOFS
e49829fe
JR
23846+#endif
23847+
23848+/* ---------------------------------------------------------------------- */
23849+
1facf9fc 23850+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
23851+{
dece6358
AM
23852+ /*
23853+ * This function is a dynamic '__init' fucntion actually,
23854+ * so the tiny check for si_rwsem is unnecessary.
23855+ */
23856+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 23857+#ifdef CONFIG_DEBUG_FS
23858+ sbinfo->si_dbgaufs = NULL;
23859+ sbinfo->si_dbgaufs_xib = NULL;
23860+#ifdef CONFIG_AUFS_EXPORT
23861+ sbinfo->si_dbgaufs_xigen = NULL;
23862+#endif
23863+#endif
23864+}
23865+
23866+/* ---------------------------------------------------------------------- */
23867+
b752ccd1
AM
23868+static inline pid_t si_pid_bit(void)
23869+{
23870+ /* the origin of pid is 1, but the bitmap's is 0 */
23871+ return current->pid - 1;
23872+}
23873+
23874+static inline int si_pid_test(struct super_block *sb)
23875+{
23876+ pid_t bit = si_pid_bit();
23877+ if (bit < PID_MAX_DEFAULT)
23878+ return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
23879+ else
23880+ return si_pid_test_slow(sb);
23881+}
23882+
23883+static inline void si_pid_set(struct super_block *sb)
23884+{
23885+ pid_t bit = si_pid_bit();
23886+ if (bit < PID_MAX_DEFAULT) {
23887+ AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
23888+ set_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
23889+ /* smp_mb(); */
23890+ } else
23891+ si_pid_set_slow(sb);
23892+}
23893+
23894+static inline void si_pid_clr(struct super_block *sb)
23895+{
23896+ pid_t bit = si_pid_bit();
23897+ if (bit < PID_MAX_DEFAULT) {
23898+ AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
23899+ clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
23900+ /* smp_mb(); */
23901+ } else
23902+ si_pid_clr_slow(sb);
23903+}
23904+
23905+/* ---------------------------------------------------------------------- */
23906+
1facf9fc 23907+/* lock superblock. mainly for entry point functions */
23908+/*
b752ccd1
AM
23909+ * __si_read_lock, __si_write_lock,
23910+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
1facf9fc 23911+ */
b752ccd1 23912+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
1facf9fc 23913+
dece6358
AM
23914+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
23915+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
23916+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
23917+
b752ccd1
AM
23918+static inline void si_noflush_read_lock(struct super_block *sb)
23919+{
23920+ __si_read_lock(sb);
23921+ si_pid_set(sb);
23922+}
23923+
23924+static inline int si_noflush_read_trylock(struct super_block *sb)
23925+{
23926+ int locked = __si_read_trylock(sb);
23927+ if (locked)
23928+ si_pid_set(sb);
23929+ return locked;
23930+}
23931+
23932+static inline void si_noflush_write_lock(struct super_block *sb)
23933+{
23934+ __si_write_lock(sb);
23935+ si_pid_set(sb);
23936+}
23937+
23938+static inline int si_noflush_write_trylock(struct super_block *sb)
23939+{
23940+ int locked = __si_write_trylock(sb);
23941+ if (locked)
23942+ si_pid_set(sb);
23943+ return locked;
23944+}
23945+
e49829fe 23946+#if 0 /* unused */
1facf9fc 23947+static inline int si_read_trylock(struct super_block *sb, int flags)
23948+{
23949+ if (au_ftest_lock(flags, FLUSH))
23950+ au_nwt_flush(&au_sbi(sb)->si_nowait);
23951+ return si_noflush_read_trylock(sb);
23952+}
e49829fe 23953+#endif
1facf9fc 23954+
b752ccd1
AM
23955+static inline void si_read_unlock(struct super_block *sb)
23956+{
23957+ si_pid_clr(sb);
23958+ __si_read_unlock(sb);
23959+}
23960+
b752ccd1 23961+#if 0 /* unused */
1facf9fc 23962+static inline int si_write_trylock(struct super_block *sb, int flags)
23963+{
23964+ if (au_ftest_lock(flags, FLUSH))
23965+ au_nwt_flush(&au_sbi(sb)->si_nowait);
23966+ return si_noflush_write_trylock(sb);
23967+}
b752ccd1
AM
23968+#endif
23969+
23970+static inline void si_write_unlock(struct super_block *sb)
23971+{
23972+ si_pid_clr(sb);
23973+ __si_write_unlock(sb);
23974+}
23975+
23976+#if 0 /* unused */
23977+static inline void si_downgrade_lock(struct super_block *sb)
23978+{
23979+ __si_downgrade_lock(sb);
23980+}
23981+#endif
1facf9fc 23982+
23983+/* ---------------------------------------------------------------------- */
23984+
23985+static inline aufs_bindex_t au_sbend(struct super_block *sb)
23986+{
dece6358 23987+ SiMustAnyLock(sb);
1facf9fc 23988+ return au_sbi(sb)->si_bend;
23989+}
23990+
23991+static inline unsigned int au_mntflags(struct super_block *sb)
23992+{
dece6358 23993+ SiMustAnyLock(sb);
1facf9fc 23994+ return au_sbi(sb)->si_mntflags;
23995+}
23996+
23997+static inline unsigned int au_sigen(struct super_block *sb)
23998+{
dece6358 23999+ SiMustAnyLock(sb);
1facf9fc 24000+ return au_sbi(sb)->si_generation;
24001+}
24002+
7f207e10
AM
24003+static inline void au_ninodes_inc(struct super_block *sb)
24004+{
24005+ atomic_long_inc(&au_sbi(sb)->si_ninodes);
24006+}
24007+
24008+static inline void au_ninodes_dec(struct super_block *sb)
24009+{
24010+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
24011+ atomic_long_dec(&au_sbi(sb)->si_ninodes);
24012+}
24013+
24014+static inline void au_nfiles_inc(struct super_block *sb)
24015+{
24016+ atomic_long_inc(&au_sbi(sb)->si_nfiles);
24017+}
24018+
24019+static inline void au_nfiles_dec(struct super_block *sb)
24020+{
24021+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
24022+ atomic_long_dec(&au_sbi(sb)->si_nfiles);
24023+}
24024+
1facf9fc 24025+static inline struct au_branch *au_sbr(struct super_block *sb,
24026+ aufs_bindex_t bindex)
24027+{
dece6358 24028+ SiMustAnyLock(sb);
1facf9fc 24029+ return au_sbi(sb)->si_branch[0 + bindex];
24030+}
24031+
24032+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
24033+{
dece6358 24034+ SiMustWriteLock(sb);
1facf9fc 24035+ au_sbi(sb)->si_xino_brid = brid;
24036+}
24037+
24038+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
24039+{
dece6358 24040+ SiMustAnyLock(sb);
1facf9fc 24041+ return au_sbi(sb)->si_xino_brid;
24042+}
24043+
24044+#endif /* __KERNEL__ */
24045+#endif /* __AUFS_SUPER_H__ */
7f207e10
AM
24046diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
24047--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
53392da6 24048+++ linux/fs/aufs/sysaufs.c 2011-08-24 13:30:24.734646739 +0200
4a4d8108 24049@@ -0,0 +1,107 @@
1facf9fc 24050+/*
027c5e7a 24051+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24052+ *
24053+ * This program, aufs is free software; you can redistribute it and/or modify
24054+ * it under the terms of the GNU General Public License as published by
24055+ * the Free Software Foundation; either version 2 of the License, or
24056+ * (at your option) any later version.
dece6358
AM
24057+ *
24058+ * This program is distributed in the hope that it will be useful,
24059+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24060+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24061+ * GNU General Public License for more details.
24062+ *
24063+ * You should have received a copy of the GNU General Public License
24064+ * along with this program; if not, write to the Free Software
24065+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24066+ */
24067+
24068+/*
24069+ * sysfs interface and lifetime management
24070+ * they are necessary regardless sysfs is disabled.
24071+ */
24072+
24073+#include <linux/fs.h>
24074+#include <linux/random.h>
24075+#include <linux/sysfs.h>
24076+#include "aufs.h"
24077+
24078+unsigned long sysaufs_si_mask;
e49829fe 24079+struct kset *sysaufs_kset;
1facf9fc 24080+
24081+#define AuSiAttr(_name) { \
24082+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
24083+ .show = sysaufs_si_##_name, \
24084+}
24085+
24086+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
24087+struct attribute *sysaufs_si_attrs[] = {
24088+ &sysaufs_si_attr_xi_path.attr,
24089+ NULL,
24090+};
24091+
4a4d8108 24092+static const struct sysfs_ops au_sbi_ops = {
1facf9fc 24093+ .show = sysaufs_si_show
24094+};
24095+
24096+static struct kobj_type au_sbi_ktype = {
24097+ .release = au_si_free,
24098+ .sysfs_ops = &au_sbi_ops,
24099+ .default_attrs = sysaufs_si_attrs
24100+};
24101+
24102+/* ---------------------------------------------------------------------- */
24103+
24104+int sysaufs_si_init(struct au_sbinfo *sbinfo)
24105+{
24106+ int err;
24107+
e49829fe 24108+ sbinfo->si_kobj.kset = sysaufs_kset;
1facf9fc 24109+ /* cf. sysaufs_name() */
24110+ err = kobject_init_and_add
e49829fe 24111+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
1facf9fc 24112+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
24113+
24114+ dbgaufs_si_null(sbinfo);
24115+ if (!err) {
24116+ err = dbgaufs_si_init(sbinfo);
24117+ if (unlikely(err))
24118+ kobject_put(&sbinfo->si_kobj);
24119+ }
24120+ return err;
24121+}
24122+
24123+void sysaufs_fin(void)
24124+{
24125+ dbgaufs_fin();
e49829fe
JR
24126+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
24127+ kset_unregister(sysaufs_kset);
1facf9fc 24128+}
24129+
24130+int __init sysaufs_init(void)
24131+{
24132+ int err;
24133+
24134+ do {
24135+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
24136+ } while (!sysaufs_si_mask);
24137+
4a4d8108 24138+ err = -EINVAL;
e49829fe
JR
24139+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
24140+ if (unlikely(!sysaufs_kset))
4a4d8108 24141+ goto out;
e49829fe
JR
24142+ err = PTR_ERR(sysaufs_kset);
24143+ if (IS_ERR(sysaufs_kset))
1facf9fc 24144+ goto out;
e49829fe 24145+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
1facf9fc 24146+ if (unlikely(err)) {
e49829fe 24147+ kset_unregister(sysaufs_kset);
1facf9fc 24148+ goto out;
24149+ }
24150+
24151+ err = dbgaufs_init();
24152+ if (unlikely(err))
24153+ sysaufs_fin();
4f0767ce 24154+out:
1facf9fc 24155+ return err;
24156+}
7f207e10
AM
24157diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
24158--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
53392da6 24159+++ linux/fs/aufs/sysaufs.h 2011-08-24 13:30:24.734646739 +0200
4a4d8108 24160@@ -0,0 +1,105 @@
1facf9fc 24161+/*
027c5e7a 24162+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24163+ *
24164+ * This program, aufs is free software; you can redistribute it and/or modify
24165+ * it under the terms of the GNU General Public License as published by
24166+ * the Free Software Foundation; either version 2 of the License, or
24167+ * (at your option) any later version.
dece6358
AM
24168+ *
24169+ * This program is distributed in the hope that it will be useful,
24170+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24171+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24172+ * GNU General Public License for more details.
24173+ *
24174+ * You should have received a copy of the GNU General Public License
24175+ * along with this program; if not, write to the Free Software
24176+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24177+ */
24178+
24179+/*
24180+ * sysfs interface and mount lifetime management
24181+ */
24182+
24183+#ifndef __SYSAUFS_H__
24184+#define __SYSAUFS_H__
24185+
24186+#ifdef __KERNEL__
24187+
1facf9fc 24188+#include <linux/sysfs.h>
24189+#include <linux/aufs_type.h>
24190+#include "module.h"
24191+
dece6358
AM
24192+struct super_block;
24193+struct au_sbinfo;
24194+
1facf9fc 24195+struct sysaufs_si_attr {
24196+ struct attribute attr;
24197+ int (*show)(struct seq_file *seq, struct super_block *sb);
24198+};
24199+
24200+/* ---------------------------------------------------------------------- */
24201+
24202+/* sysaufs.c */
24203+extern unsigned long sysaufs_si_mask;
e49829fe 24204+extern struct kset *sysaufs_kset;
1facf9fc 24205+extern struct attribute *sysaufs_si_attrs[];
24206+int sysaufs_si_init(struct au_sbinfo *sbinfo);
24207+int __init sysaufs_init(void);
24208+void sysaufs_fin(void);
24209+
24210+/* ---------------------------------------------------------------------- */
24211+
24212+/* some people doesn't like to show a pointer in kernel */
24213+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
24214+{
24215+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
24216+}
24217+
24218+#define SysaufsSiNamePrefix "si_"
24219+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
24220+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
24221+{
24222+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
24223+ sysaufs_si_id(sbinfo));
24224+}
24225+
24226+struct au_branch;
24227+#ifdef CONFIG_SYSFS
24228+/* sysfs.c */
24229+extern struct attribute_group *sysaufs_attr_group;
24230+
24231+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
24232+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
24233+ char *buf);
24234+
24235+void sysaufs_br_init(struct au_branch *br);
24236+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
24237+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
24238+
24239+#define sysaufs_brs_init() do {} while (0)
24240+
24241+#else
24242+#define sysaufs_attr_group NULL
24243+
4a4d8108 24244+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
1facf9fc 24245+
24246+static inline
24247+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
24248+ char *buf)
24249+{
24250+ return 0;
24251+}
24252+
4a4d8108
AM
24253+AuStubVoid(sysaufs_br_init, struct au_branch *br)
24254+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
24255+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
1facf9fc 24256+
24257+static inline void sysaufs_brs_init(void)
24258+{
24259+ sysaufs_brs = 0;
24260+}
24261+
24262+#endif /* CONFIG_SYSFS */
24263+
24264+#endif /* __KERNEL__ */
24265+#endif /* __SYSAUFS_H__ */
7f207e10
AM
24266diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
24267--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
53392da6 24268+++ linux/fs/aufs/sysfs.c 2011-08-24 13:30:24.734646739 +0200
953406b4 24269@@ -0,0 +1,250 @@
1facf9fc 24270+/*
027c5e7a 24271+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24272+ *
24273+ * This program, aufs is free software; you can redistribute it and/or modify
24274+ * it under the terms of the GNU General Public License as published by
24275+ * the Free Software Foundation; either version 2 of the License, or
24276+ * (at your option) any later version.
dece6358
AM
24277+ *
24278+ * This program is distributed in the hope that it will be useful,
24279+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24280+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24281+ * GNU General Public License for more details.
24282+ *
24283+ * You should have received a copy of the GNU General Public License
24284+ * along with this program; if not, write to the Free Software
24285+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24286+ */
24287+
24288+/*
24289+ * sysfs interface
24290+ */
24291+
24292+#include <linux/fs.h>
dece6358 24293+#include <linux/module.h>
1facf9fc 24294+#include <linux/seq_file.h>
24295+#include <linux/sysfs.h>
24296+#include "aufs.h"
24297+
4a4d8108
AM
24298+#ifdef CONFIG_AUFS_FS_MODULE
24299+/* this entry violates the "one line per file" policy of sysfs */
24300+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
24301+ char *buf)
24302+{
24303+ ssize_t err;
24304+ static char *conf =
24305+/* this file is generated at compiling */
24306+#include "conf.str"
24307+ ;
24308+
24309+ err = snprintf(buf, PAGE_SIZE, conf);
24310+ if (unlikely(err >= PAGE_SIZE))
24311+ err = -EFBIG;
24312+ return err;
24313+}
24314+
24315+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
24316+#endif
24317+
1facf9fc 24318+static struct attribute *au_attr[] = {
4a4d8108
AM
24319+#ifdef CONFIG_AUFS_FS_MODULE
24320+ &au_config_attr.attr,
24321+#endif
1facf9fc 24322+ NULL, /* need to NULL terminate the list of attributes */
24323+};
24324+
24325+static struct attribute_group sysaufs_attr_group_body = {
24326+ .attrs = au_attr
24327+};
24328+
24329+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
24330+
24331+/* ---------------------------------------------------------------------- */
24332+
24333+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
24334+{
24335+ int err;
24336+
dece6358
AM
24337+ SiMustAnyLock(sb);
24338+
1facf9fc 24339+ err = 0;
24340+ if (au_opt_test(au_mntflags(sb), XINO)) {
24341+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
24342+ seq_putc(seq, '\n');
24343+ }
24344+ return err;
24345+}
24346+
24347+/*
24348+ * the lifetime of branch is independent from the entry under sysfs.
24349+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
24350+ * unlinked.
24351+ */
24352+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
24353+ aufs_bindex_t bindex)
24354+{
24355+ struct path path;
24356+ struct dentry *root;
24357+ struct au_branch *br;
24358+
24359+ AuDbg("b%d\n", bindex);
24360+
24361+ root = sb->s_root;
24362+ di_read_lock_parent(root, !AuLock_IR);
24363+ br = au_sbr(sb, bindex);
24364+ path.mnt = br->br_mnt;
24365+ path.dentry = au_h_dptr(root, bindex);
24366+ au_seq_path(seq, &path);
24367+ di_read_unlock(root, !AuLock_IR);
24368+ seq_printf(seq, "=%s\n", au_optstr_br_perm(br->br_perm));
24369+ return 0;
24370+}
24371+
24372+/* ---------------------------------------------------------------------- */
24373+
24374+static struct seq_file *au_seq(char *p, ssize_t len)
24375+{
24376+ struct seq_file *seq;
24377+
24378+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
24379+ if (seq) {
24380+ /* mutex_init(&seq.lock); */
24381+ seq->buf = p;
24382+ seq->size = len;
24383+ return seq; /* success */
24384+ }
24385+
24386+ seq = ERR_PTR(-ENOMEM);
24387+ return seq;
24388+}
24389+
24390+#define SysaufsBr_PREFIX "br"
24391+
24392+/* todo: file size may exceed PAGE_SIZE */
24393+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 24394+ char *buf)
1facf9fc 24395+{
24396+ ssize_t err;
24397+ long l;
24398+ aufs_bindex_t bend;
24399+ struct au_sbinfo *sbinfo;
24400+ struct super_block *sb;
24401+ struct seq_file *seq;
24402+ char *name;
24403+ struct attribute **cattr;
24404+
24405+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
24406+ sb = sbinfo->si_sb;
1308ab2a 24407+
24408+ /*
24409+ * prevent a race condition between sysfs and aufs.
24410+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
24411+ * prohibits maintaining the sysfs entries.
24412+ * hew we acquire read lock after sysfs_get_active_two().
24413+ * on the other hand, the remount process may maintain the sysfs/aufs
24414+ * entries after acquiring write lock.
24415+ * it can cause a deadlock.
24416+ * simply we gave up processing read here.
24417+ */
24418+ err = -EBUSY;
24419+ if (unlikely(!si_noflush_read_trylock(sb)))
24420+ goto out;
1facf9fc 24421+
24422+ seq = au_seq(buf, PAGE_SIZE);
24423+ err = PTR_ERR(seq);
24424+ if (IS_ERR(seq))
1308ab2a 24425+ goto out_unlock;
1facf9fc 24426+
24427+ name = (void *)attr->name;
24428+ cattr = sysaufs_si_attrs;
24429+ while (*cattr) {
24430+ if (!strcmp(name, (*cattr)->name)) {
24431+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
24432+ ->show(seq, sb);
24433+ goto out_seq;
24434+ }
24435+ cattr++;
24436+ }
24437+
24438+ bend = au_sbend(sb);
24439+ if (!strncmp(name, SysaufsBr_PREFIX, sizeof(SysaufsBr_PREFIX) - 1)) {
24440+ name += sizeof(SysaufsBr_PREFIX) - 1;
24441+ err = strict_strtol(name, 10, &l);
24442+ if (!err) {
24443+ if (l <= bend)
24444+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l);
24445+ else
24446+ err = -ENOENT;
24447+ }
24448+ goto out_seq;
24449+ }
24450+ BUG();
24451+
4f0767ce 24452+out_seq:
1facf9fc 24453+ if (!err) {
24454+ err = seq->count;
24455+ /* sysfs limit */
24456+ if (unlikely(err == PAGE_SIZE))
24457+ err = -EFBIG;
24458+ }
24459+ kfree(seq);
4f0767ce 24460+out_unlock:
1facf9fc 24461+ si_read_unlock(sb);
4f0767ce 24462+out:
1facf9fc 24463+ return err;
24464+}
24465+
24466+/* ---------------------------------------------------------------------- */
24467+
24468+void sysaufs_br_init(struct au_branch *br)
24469+{
4a4d8108
AM
24470+ struct attribute *attr = &br->br_attr;
24471+
24472+ sysfs_attr_init(attr);
24473+ attr->name = br->br_name;
24474+ attr->mode = S_IRUGO;
1facf9fc 24475+}
24476+
24477+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
24478+{
24479+ struct au_branch *br;
24480+ struct kobject *kobj;
24481+ aufs_bindex_t bend;
24482+
24483+ dbgaufs_brs_del(sb, bindex);
24484+
24485+ if (!sysaufs_brs)
24486+ return;
24487+
24488+ kobj = &au_sbi(sb)->si_kobj;
24489+ bend = au_sbend(sb);
24490+ for (; bindex <= bend; bindex++) {
24491+ br = au_sbr(sb, bindex);
24492+ sysfs_remove_file(kobj, &br->br_attr);
24493+ }
24494+}
24495+
24496+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
24497+{
24498+ int err;
24499+ aufs_bindex_t bend;
24500+ struct kobject *kobj;
24501+ struct au_branch *br;
24502+
24503+ dbgaufs_brs_add(sb, bindex);
24504+
24505+ if (!sysaufs_brs)
24506+ return;
24507+
24508+ kobj = &au_sbi(sb)->si_kobj;
24509+ bend = au_sbend(sb);
24510+ for (; bindex <= bend; bindex++) {
24511+ br = au_sbr(sb, bindex);
24512+ snprintf(br->br_name, sizeof(br->br_name), SysaufsBr_PREFIX
24513+ "%d", bindex);
24514+ err = sysfs_create_file(kobj, &br->br_attr);
24515+ if (unlikely(err))
4a4d8108
AM
24516+ pr_warning("failed %s under sysfs(%d)\n",
24517+ br->br_name, err);
1facf9fc 24518+ }
24519+}
7f207e10
AM
24520diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
24521--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
53392da6 24522+++ linux/fs/aufs/sysrq.c 2011-08-24 13:30:24.734646739 +0200
2cbb1c4b 24523@@ -0,0 +1,151 @@
1facf9fc 24524+/*
027c5e7a 24525+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24526+ *
24527+ * This program, aufs is free software; you can redistribute it and/or modify
24528+ * it under the terms of the GNU General Public License as published by
24529+ * the Free Software Foundation; either version 2 of the License, or
24530+ * (at your option) any later version.
dece6358
AM
24531+ *
24532+ * This program is distributed in the hope that it will be useful,
24533+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24534+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24535+ * GNU General Public License for more details.
24536+ *
24537+ * You should have received a copy of the GNU General Public License
24538+ * along with this program; if not, write to the Free Software
24539+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24540+ */
24541+
24542+/*
24543+ * magic sysrq hanlder
24544+ */
24545+
24546+#include <linux/fs.h>
24547+#include <linux/module.h>
24548+#include <linux/moduleparam.h>
24549+/* #include <linux/sysrq.h> */
027c5e7a 24550+#include <linux/writeback.h>
1facf9fc 24551+#include "aufs.h"
24552+
24553+/* ---------------------------------------------------------------------- */
24554+
24555+static void sysrq_sb(struct super_block *sb)
24556+{
24557+ char *plevel;
24558+ struct au_sbinfo *sbinfo;
24559+ struct file *file;
24560+
24561+ plevel = au_plevel;
24562+ au_plevel = KERN_WARNING;
1facf9fc 24563+
24564+ sbinfo = au_sbi(sb);
4a4d8108
AM
24565+ /* since we define pr_fmt, call printk directly */
24566+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
24567+ printk(KERN_WARNING AUFS_NAME ": superblock\n");
1facf9fc 24568+ au_dpri_sb(sb);
027c5e7a
AM
24569+
24570+#if 0
4a4d8108 24571+ printk(KERN_WARNING AUFS_NAME ": root dentry\n");
1facf9fc 24572+ au_dpri_dentry(sb->s_root);
4a4d8108 24573+ printk(KERN_WARNING AUFS_NAME ": root inode\n");
1facf9fc 24574+ au_dpri_inode(sb->s_root->d_inode);
027c5e7a
AM
24575+#endif
24576+
1facf9fc 24577+#if 0
027c5e7a
AM
24578+ do {
24579+ int err, i, j, ndentry;
24580+ struct au_dcsub_pages dpages;
24581+ struct au_dpage *dpage;
24582+
24583+ err = au_dpages_init(&dpages, GFP_ATOMIC);
24584+ if (unlikely(err))
24585+ break;
24586+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
24587+ if (!err)
24588+ for (i = 0; i < dpages.ndpage; i++) {
24589+ dpage = dpages.dpages + i;
24590+ ndentry = dpage->ndentry;
24591+ for (j = 0; j < ndentry; j++)
24592+ au_dpri_dentry(dpage->dentries[j]);
24593+ }
24594+ au_dpages_free(&dpages);
24595+ } while (0);
24596+#endif
24597+
24598+#if 1
24599+ {
24600+ struct inode *i;
24601+ printk(KERN_WARNING AUFS_NAME ": isolated inode\n");
2cbb1c4b
JR
24602+ spin_lock(&inode_sb_list_lock);
24603+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
24604+ spin_lock(&i->i_lock);
027c5e7a
AM
24605+ if (1 || list_empty(&i->i_dentry))
24606+ au_dpri_inode(i);
2cbb1c4b
JR
24607+ spin_unlock(&i->i_lock);
24608+ }
24609+ spin_unlock(&inode_sb_list_lock);
027c5e7a 24610+ }
1facf9fc 24611+#endif
4a4d8108 24612+ printk(KERN_WARNING AUFS_NAME ": files\n");
0c5527e5
AM
24613+ lg_global_lock(files_lglock);
24614+ do_file_list_for_each_entry(sb, file) {
4a4d8108
AM
24615+ umode_t mode;
24616+ mode = file->f_dentry->d_inode->i_mode;
24617+ if (!special_file(mode) || au_special_file(mode))
1facf9fc 24618+ au_dpri_file(file);
0c5527e5
AM
24619+ } while_file_list_for_each_entry;
24620+ lg_global_unlock(files_lglock);
e49829fe 24621+ printk(KERN_WARNING AUFS_NAME ": done\n");
1facf9fc 24622+
24623+ au_plevel = plevel;
1facf9fc 24624+}
24625+
24626+/* ---------------------------------------------------------------------- */
24627+
24628+/* module parameter */
24629+static char *aufs_sysrq_key = "a";
24630+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
24631+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
24632+
0c5527e5 24633+static void au_sysrq(int key __maybe_unused)
1facf9fc 24634+{
1facf9fc 24635+ struct au_sbinfo *sbinfo;
24636+
027c5e7a 24637+ lockdep_off();
53392da6 24638+ au_sbilist_lock();
e49829fe 24639+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
1facf9fc 24640+ sysrq_sb(sbinfo->si_sb);
53392da6 24641+ au_sbilist_unlock();
027c5e7a 24642+ lockdep_on();
1facf9fc 24643+}
24644+
24645+static struct sysrq_key_op au_sysrq_op = {
24646+ .handler = au_sysrq,
24647+ .help_msg = "Aufs",
24648+ .action_msg = "Aufs",
24649+ .enable_mask = SYSRQ_ENABLE_DUMP
24650+};
24651+
24652+/* ---------------------------------------------------------------------- */
24653+
24654+int __init au_sysrq_init(void)
24655+{
24656+ int err;
24657+ char key;
24658+
24659+ err = -1;
24660+ key = *aufs_sysrq_key;
24661+ if ('a' <= key && key <= 'z')
24662+ err = register_sysrq_key(key, &au_sysrq_op);
24663+ if (unlikely(err))
4a4d8108 24664+ pr_err("err %d, sysrq=%c\n", err, key);
1facf9fc 24665+ return err;
24666+}
24667+
24668+void au_sysrq_fin(void)
24669+{
24670+ int err;
24671+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
24672+ if (unlikely(err))
4a4d8108 24673+ pr_err("err %d (ignored)\n", err);
1facf9fc 24674+}
7f207e10
AM
24675diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
24676--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
53392da6 24677+++ linux/fs/aufs/vdir.c 2011-08-24 13:30:24.734646739 +0200
7f207e10 24678@@ -0,0 +1,886 @@
1facf9fc 24679+/*
027c5e7a 24680+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24681+ *
24682+ * This program, aufs is free software; you can redistribute it and/or modify
24683+ * it under the terms of the GNU General Public License as published by
24684+ * the Free Software Foundation; either version 2 of the License, or
24685+ * (at your option) any later version.
dece6358
AM
24686+ *
24687+ * This program is distributed in the hope that it will be useful,
24688+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24689+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24690+ * GNU General Public License for more details.
24691+ *
24692+ * You should have received a copy of the GNU General Public License
24693+ * along with this program; if not, write to the Free Software
24694+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24695+ */
24696+
24697+/*
24698+ * virtual or vertical directory
24699+ */
24700+
dece6358 24701+#include <linux/hash.h>
1facf9fc 24702+#include "aufs.h"
24703+
dece6358 24704+static unsigned int calc_size(int nlen)
1facf9fc 24705+{
dece6358 24706+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 24707+}
24708+
24709+static int set_deblk_end(union au_vdir_deblk_p *p,
24710+ union au_vdir_deblk_p *deblk_end)
24711+{
24712+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
24713+ p->de->de_str.len = 0;
24714+ /* smp_mb(); */
24715+ return 0;
24716+ }
24717+ return -1; /* error */
24718+}
24719+
24720+/* returns true or false */
24721+static int is_deblk_end(union au_vdir_deblk_p *p,
24722+ union au_vdir_deblk_p *deblk_end)
24723+{
24724+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
24725+ return !p->de->de_str.len;
24726+ return 1;
24727+}
24728+
24729+static unsigned char *last_deblk(struct au_vdir *vdir)
24730+{
24731+ return vdir->vd_deblk[vdir->vd_nblk - 1];
24732+}
24733+
24734+/* ---------------------------------------------------------------------- */
24735+
1308ab2a 24736+/* estimate the apropriate size for name hash table */
24737+unsigned int au_rdhash_est(loff_t sz)
24738+{
24739+ unsigned int n;
24740+
24741+ n = UINT_MAX;
24742+ sz >>= 10;
24743+ if (sz < n)
24744+ n = sz;
24745+ if (sz < AUFS_RDHASH_DEF)
24746+ n = AUFS_RDHASH_DEF;
4a4d8108 24747+ /* pr_info("n %u\n", n); */
1308ab2a 24748+ return n;
24749+}
24750+
1facf9fc 24751+/*
24752+ * the allocated memory has to be freed by
dece6358 24753+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 24754+ */
dece6358 24755+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 24756+{
1facf9fc 24757+ struct hlist_head *head;
dece6358 24758+ unsigned int u;
1facf9fc 24759+
dece6358
AM
24760+ head = kmalloc(sizeof(*nhash->nh_head) * num_hash, gfp);
24761+ if (head) {
24762+ nhash->nh_num = num_hash;
24763+ nhash->nh_head = head;
24764+ for (u = 0; u < num_hash; u++)
1facf9fc 24765+ INIT_HLIST_HEAD(head++);
dece6358 24766+ return 0; /* success */
1facf9fc 24767+ }
1facf9fc 24768+
dece6358 24769+ return -ENOMEM;
1facf9fc 24770+}
24771+
dece6358
AM
24772+static void nhash_count(struct hlist_head *head)
24773+{
24774+#if 0
24775+ unsigned long n;
24776+ struct hlist_node *pos;
24777+
24778+ n = 0;
24779+ hlist_for_each(pos, head)
24780+ n++;
4a4d8108 24781+ pr_info("%lu\n", n);
dece6358
AM
24782+#endif
24783+}
24784+
24785+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 24786+{
1facf9fc 24787+ struct au_vdir_wh *tpos;
24788+ struct hlist_node *pos, *node;
24789+
dece6358
AM
24790+ hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) {
24791+ /* hlist_del(pos); */
24792+ kfree(tpos);
1facf9fc 24793+ }
24794+}
24795+
dece6358 24796+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 24797+{
dece6358
AM
24798+ struct au_vdir_dehstr *tpos;
24799+ struct hlist_node *pos, *node;
1facf9fc 24800+
dece6358
AM
24801+ hlist_for_each_entry_safe(tpos, pos, node, head, hash) {
24802+ /* hlist_del(pos); */
4a4d8108 24803+ au_cache_free_vdir_dehstr(tpos);
1facf9fc 24804+ }
1facf9fc 24805+}
24806+
dece6358
AM
24807+static void au_nhash_do_free(struct au_nhash *nhash,
24808+ void (*free)(struct hlist_head *head))
1facf9fc 24809+{
1308ab2a 24810+ unsigned int n;
1facf9fc 24811+ struct hlist_head *head;
1facf9fc 24812+
dece6358 24813+ n = nhash->nh_num;
1308ab2a 24814+ if (!n)
24815+ return;
24816+
dece6358 24817+ head = nhash->nh_head;
1308ab2a 24818+ while (n-- > 0) {
dece6358
AM
24819+ nhash_count(head);
24820+ free(head++);
1facf9fc 24821+ }
dece6358 24822+ kfree(nhash->nh_head);
1facf9fc 24823+}
24824+
dece6358 24825+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 24826+{
dece6358
AM
24827+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
24828+}
1facf9fc 24829+
dece6358
AM
24830+static void au_nhash_de_free(struct au_nhash *delist)
24831+{
24832+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 24833+}
24834+
24835+/* ---------------------------------------------------------------------- */
24836+
24837+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
24838+ int limit)
24839+{
24840+ int num;
24841+ unsigned int u, n;
24842+ struct hlist_head *head;
24843+ struct au_vdir_wh *tpos;
24844+ struct hlist_node *pos;
24845+
24846+ num = 0;
24847+ n = whlist->nh_num;
24848+ head = whlist->nh_head;
1308ab2a 24849+ for (u = 0; u < n; u++, head++)
1facf9fc 24850+ hlist_for_each_entry(tpos, pos, head, wh_hash)
24851+ if (tpos->wh_bindex == btgt && ++num > limit)
24852+ return 1;
1facf9fc 24853+ return 0;
24854+}
24855+
24856+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 24857+ unsigned char *name,
1facf9fc 24858+ unsigned int len)
24859+{
dece6358
AM
24860+ unsigned int v;
24861+ /* const unsigned int magic_bit = 12; */
24862+
1308ab2a 24863+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
24864+
dece6358
AM
24865+ v = 0;
24866+ while (len--)
24867+ v += *name++;
24868+ /* v = hash_long(v, magic_bit); */
24869+ v %= nhash->nh_num;
24870+ return nhash->nh_head + v;
24871+}
24872+
24873+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
24874+ int nlen)
24875+{
24876+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 24877+}
24878+
24879+/* returns found or not */
dece6358 24880+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 24881+{
24882+ struct hlist_head *head;
24883+ struct au_vdir_wh *tpos;
24884+ struct hlist_node *pos;
24885+ struct au_vdir_destr *str;
24886+
dece6358 24887+ head = au_name_hash(whlist, name, nlen);
1facf9fc 24888+ hlist_for_each_entry(tpos, pos, head, wh_hash) {
24889+ str = &tpos->wh_str;
24890+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
24891+ if (au_nhash_test_name(str, name, nlen))
24892+ return 1;
24893+ }
24894+ return 0;
24895+}
24896+
24897+/* returns found(true) or not */
24898+static int test_known(struct au_nhash *delist, char *name, int nlen)
24899+{
24900+ struct hlist_head *head;
24901+ struct au_vdir_dehstr *tpos;
24902+ struct hlist_node *pos;
24903+ struct au_vdir_destr *str;
24904+
24905+ head = au_name_hash(delist, name, nlen);
24906+ hlist_for_each_entry(tpos, pos, head, hash) {
24907+ str = tpos->str;
24908+ AuDbg("%.*s\n", str->len, str->name);
24909+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 24910+ return 1;
24911+ }
24912+ return 0;
24913+}
24914+
dece6358
AM
24915+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
24916+ unsigned char d_type)
24917+{
24918+#ifdef CONFIG_AUFS_SHWH
24919+ wh->wh_ino = ino;
24920+ wh->wh_type = d_type;
24921+#endif
24922+}
24923+
24924+/* ---------------------------------------------------------------------- */
24925+
24926+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
24927+ unsigned int d_type, aufs_bindex_t bindex,
24928+ unsigned char shwh)
1facf9fc 24929+{
24930+ int err;
24931+ struct au_vdir_destr *str;
24932+ struct au_vdir_wh *wh;
24933+
dece6358 24934+ AuDbg("%.*s\n", nlen, name);
1308ab2a 24935+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
24936+
1facf9fc 24937+ err = -ENOMEM;
dece6358 24938+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 24939+ if (unlikely(!wh))
24940+ goto out;
24941+
24942+ err = 0;
24943+ wh->wh_bindex = bindex;
dece6358
AM
24944+ if (shwh)
24945+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 24946+ str = &wh->wh_str;
dece6358
AM
24947+ str->len = nlen;
24948+ memcpy(str->name, name, nlen);
24949+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 24950+ /* smp_mb(); */
24951+
4f0767ce 24952+out:
1facf9fc 24953+ return err;
24954+}
24955+
1facf9fc 24956+static int append_deblk(struct au_vdir *vdir)
24957+{
24958+ int err;
dece6358 24959+ unsigned long ul;
1facf9fc 24960+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
24961+ union au_vdir_deblk_p p, deblk_end;
24962+ unsigned char **o;
24963+
24964+ err = -ENOMEM;
dece6358
AM
24965+ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
24966+ GFP_NOFS);
1facf9fc 24967+ if (unlikely(!o))
24968+ goto out;
24969+
24970+ vdir->vd_deblk = o;
24971+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
24972+ if (p.deblk) {
24973+ ul = vdir->vd_nblk++;
24974+ vdir->vd_deblk[ul] = p.deblk;
24975+ vdir->vd_last.ul = ul;
24976+ vdir->vd_last.p.deblk = p.deblk;
24977+ deblk_end.deblk = p.deblk + deblk_sz;
24978+ err = set_deblk_end(&p, &deblk_end);
24979+ }
24980+
4f0767ce 24981+out:
1facf9fc 24982+ return err;
24983+}
24984+
dece6358
AM
24985+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
24986+ unsigned int d_type, struct au_nhash *delist)
24987+{
24988+ int err;
24989+ unsigned int sz;
24990+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
24991+ union au_vdir_deblk_p p, *room, deblk_end;
24992+ struct au_vdir_dehstr *dehstr;
24993+
24994+ p.deblk = last_deblk(vdir);
24995+ deblk_end.deblk = p.deblk + deblk_sz;
24996+ room = &vdir->vd_last.p;
24997+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
24998+ || !is_deblk_end(room, &deblk_end));
24999+
25000+ sz = calc_size(nlen);
25001+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
25002+ err = append_deblk(vdir);
25003+ if (unlikely(err))
25004+ goto out;
25005+
25006+ p.deblk = last_deblk(vdir);
25007+ deblk_end.deblk = p.deblk + deblk_sz;
25008+ /* smp_mb(); */
25009+ AuDebugOn(room->deblk != p.deblk);
25010+ }
25011+
25012+ err = -ENOMEM;
4a4d8108 25013+ dehstr = au_cache_alloc_vdir_dehstr();
dece6358
AM
25014+ if (unlikely(!dehstr))
25015+ goto out;
25016+
25017+ dehstr->str = &room->de->de_str;
25018+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
25019+ room->de->de_ino = ino;
25020+ room->de->de_type = d_type;
25021+ room->de->de_str.len = nlen;
25022+ memcpy(room->de->de_str.name, name, nlen);
25023+
25024+ err = 0;
25025+ room->deblk += sz;
25026+ if (unlikely(set_deblk_end(room, &deblk_end)))
25027+ err = append_deblk(vdir);
25028+ /* smp_mb(); */
25029+
4f0767ce 25030+out:
dece6358
AM
25031+ return err;
25032+}
25033+
25034+/* ---------------------------------------------------------------------- */
25035+
25036+void au_vdir_free(struct au_vdir *vdir)
25037+{
25038+ unsigned char **deblk;
25039+
25040+ deblk = vdir->vd_deblk;
25041+ while (vdir->vd_nblk--)
25042+ kfree(*deblk++);
25043+ kfree(vdir->vd_deblk);
25044+ au_cache_free_vdir(vdir);
25045+}
25046+
1308ab2a 25047+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 25048+{
25049+ struct au_vdir *vdir;
1308ab2a 25050+ struct super_block *sb;
1facf9fc 25051+ int err;
25052+
1308ab2a 25053+ sb = file->f_dentry->d_sb;
dece6358
AM
25054+ SiMustAnyLock(sb);
25055+
1facf9fc 25056+ err = -ENOMEM;
25057+ vdir = au_cache_alloc_vdir();
25058+ if (unlikely(!vdir))
25059+ goto out;
25060+
25061+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
25062+ if (unlikely(!vdir->vd_deblk))
25063+ goto out_free;
25064+
25065+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 25066+ if (!vdir->vd_deblk_sz) {
25067+ /* estimate the apropriate size for deblk */
25068+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
4a4d8108 25069+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
1308ab2a 25070+ }
1facf9fc 25071+ vdir->vd_nblk = 0;
25072+ vdir->vd_version = 0;
25073+ vdir->vd_jiffy = 0;
25074+ err = append_deblk(vdir);
25075+ if (!err)
25076+ return vdir; /* success */
25077+
25078+ kfree(vdir->vd_deblk);
25079+
4f0767ce 25080+out_free:
1facf9fc 25081+ au_cache_free_vdir(vdir);
4f0767ce 25082+out:
1facf9fc 25083+ vdir = ERR_PTR(err);
25084+ return vdir;
25085+}
25086+
25087+static int reinit_vdir(struct au_vdir *vdir)
25088+{
25089+ int err;
25090+ union au_vdir_deblk_p p, deblk_end;
25091+
25092+ while (vdir->vd_nblk > 1) {
25093+ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
25094+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
25095+ vdir->vd_nblk--;
25096+ }
25097+ p.deblk = vdir->vd_deblk[0];
25098+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
25099+ err = set_deblk_end(&p, &deblk_end);
25100+ /* keep vd_dblk_sz */
25101+ vdir->vd_last.ul = 0;
25102+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
25103+ vdir->vd_version = 0;
25104+ vdir->vd_jiffy = 0;
25105+ /* smp_mb(); */
25106+ return err;
25107+}
25108+
25109+/* ---------------------------------------------------------------------- */
25110+
1facf9fc 25111+#define AuFillVdir_CALLED 1
25112+#define AuFillVdir_WHABLE (1 << 1)
dece6358 25113+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 25114+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
7f207e10
AM
25115+#define au_fset_fillvdir(flags, name) \
25116+ do { (flags) |= AuFillVdir_##name; } while (0)
25117+#define au_fclr_fillvdir(flags, name) \
25118+ do { (flags) &= ~AuFillVdir_##name; } while (0)
1facf9fc 25119+
dece6358
AM
25120+#ifndef CONFIG_AUFS_SHWH
25121+#undef AuFillVdir_SHWH
25122+#define AuFillVdir_SHWH 0
25123+#endif
25124+
1facf9fc 25125+struct fillvdir_arg {
25126+ struct file *file;
25127+ struct au_vdir *vdir;
dece6358
AM
25128+ struct au_nhash delist;
25129+ struct au_nhash whlist;
1facf9fc 25130+ aufs_bindex_t bindex;
25131+ unsigned int flags;
25132+ int err;
25133+};
25134+
dece6358 25135+static int fillvdir(void *__arg, const char *__name, int nlen,
1facf9fc 25136+ loff_t offset __maybe_unused, u64 h_ino,
25137+ unsigned int d_type)
25138+{
25139+ struct fillvdir_arg *arg = __arg;
25140+ char *name = (void *)__name;
25141+ struct super_block *sb;
1facf9fc 25142+ ino_t ino;
dece6358 25143+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 25144+
1facf9fc 25145+ arg->err = 0;
dece6358 25146+ sb = arg->file->f_dentry->d_sb;
1facf9fc 25147+ au_fset_fillvdir(arg->flags, CALLED);
25148+ /* smp_mb(); */
dece6358 25149+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 25150+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
25151+ if (test_known(&arg->delist, name, nlen)
25152+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
25153+ goto out; /* already exists or whiteouted */
1facf9fc 25154+
25155+ sb = arg->file->f_dentry->d_sb;
dece6358 25156+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
4a4d8108
AM
25157+ if (!arg->err) {
25158+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
25159+ d_type = DT_UNKNOWN;
dece6358
AM
25160+ arg->err = append_de(arg->vdir, name, nlen, ino,
25161+ d_type, &arg->delist);
4a4d8108 25162+ }
1facf9fc 25163+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
25164+ name += AUFS_WH_PFX_LEN;
dece6358
AM
25165+ nlen -= AUFS_WH_PFX_LEN;
25166+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
25167+ goto out; /* already whiteouted */
1facf9fc 25168+
dece6358
AM
25169+ if (shwh)
25170+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
25171+ &ino);
4a4d8108
AM
25172+ if (!arg->err) {
25173+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
25174+ d_type = DT_UNKNOWN;
1facf9fc 25175+ arg->err = au_nhash_append_wh
dece6358
AM
25176+ (&arg->whlist, name, nlen, ino, d_type,
25177+ arg->bindex, shwh);
4a4d8108 25178+ }
1facf9fc 25179+ }
25180+
4f0767ce 25181+out:
1facf9fc 25182+ if (!arg->err)
25183+ arg->vdir->vd_jiffy = jiffies;
25184+ /* smp_mb(); */
25185+ AuTraceErr(arg->err);
25186+ return arg->err;
25187+}
25188+
dece6358
AM
25189+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
25190+ struct au_nhash *whlist, struct au_nhash *delist)
25191+{
25192+#ifdef CONFIG_AUFS_SHWH
25193+ int err;
25194+ unsigned int nh, u;
25195+ struct hlist_head *head;
25196+ struct au_vdir_wh *tpos;
25197+ struct hlist_node *pos, *n;
25198+ char *p, *o;
25199+ struct au_vdir_destr *destr;
25200+
25201+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
25202+
25203+ err = -ENOMEM;
4a4d8108 25204+ o = p = __getname_gfp(GFP_NOFS);
dece6358
AM
25205+ if (unlikely(!p))
25206+ goto out;
25207+
25208+ err = 0;
25209+ nh = whlist->nh_num;
25210+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
25211+ p += AUFS_WH_PFX_LEN;
25212+ for (u = 0; u < nh; u++) {
25213+ head = whlist->nh_head + u;
25214+ hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) {
25215+ destr = &tpos->wh_str;
25216+ memcpy(p, destr->name, destr->len);
25217+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
25218+ tpos->wh_ino, tpos->wh_type, delist);
25219+ if (unlikely(err))
25220+ break;
25221+ }
25222+ }
25223+
25224+ __putname(o);
25225+
4f0767ce 25226+out:
dece6358
AM
25227+ AuTraceErr(err);
25228+ return err;
25229+#else
25230+ return 0;
25231+#endif
25232+}
25233+
1facf9fc 25234+static int au_do_read_vdir(struct fillvdir_arg *arg)
25235+{
25236+ int err;
dece6358 25237+ unsigned int rdhash;
1facf9fc 25238+ loff_t offset;
dece6358
AM
25239+ aufs_bindex_t bend, bindex, bstart;
25240+ unsigned char shwh;
1facf9fc 25241+ struct file *hf, *file;
25242+ struct super_block *sb;
25243+
1facf9fc 25244+ file = arg->file;
25245+ sb = file->f_dentry->d_sb;
dece6358
AM
25246+ SiMustAnyLock(sb);
25247+
25248+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 25249+ if (!rdhash)
25250+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
25251+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
25252+ if (unlikely(err))
1facf9fc 25253+ goto out;
dece6358
AM
25254+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
25255+ if (unlikely(err))
1facf9fc 25256+ goto out_delist;
25257+
25258+ err = 0;
25259+ arg->flags = 0;
dece6358
AM
25260+ shwh = 0;
25261+ if (au_opt_test(au_mntflags(sb), SHWH)) {
25262+ shwh = 1;
25263+ au_fset_fillvdir(arg->flags, SHWH);
25264+ }
25265+ bstart = au_fbstart(file);
4a4d8108 25266+ bend = au_fbend_dir(file);
dece6358 25267+ for (bindex = bstart; !err && bindex <= bend; bindex++) {
4a4d8108 25268+ hf = au_hf_dir(file, bindex);
1facf9fc 25269+ if (!hf)
25270+ continue;
25271+
25272+ offset = vfsub_llseek(hf, 0, SEEK_SET);
25273+ err = offset;
25274+ if (unlikely(offset))
25275+ break;
25276+
25277+ arg->bindex = bindex;
25278+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358
AM
25279+ if (shwh
25280+ || (bindex != bend
25281+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 25282+ au_fset_fillvdir(arg->flags, WHABLE);
25283+ do {
25284+ arg->err = 0;
25285+ au_fclr_fillvdir(arg->flags, CALLED);
25286+ /* smp_mb(); */
25287+ err = vfsub_readdir(hf, fillvdir, arg);
25288+ if (err >= 0)
25289+ err = arg->err;
25290+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
25291+ }
dece6358
AM
25292+
25293+ if (!err && shwh)
25294+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
25295+
25296+ au_nhash_wh_free(&arg->whlist);
1facf9fc 25297+
4f0767ce 25298+out_delist:
dece6358 25299+ au_nhash_de_free(&arg->delist);
4f0767ce 25300+out:
1facf9fc 25301+ return err;
25302+}
25303+
25304+static int read_vdir(struct file *file, int may_read)
25305+{
25306+ int err;
25307+ unsigned long expire;
25308+ unsigned char do_read;
25309+ struct fillvdir_arg arg;
25310+ struct inode *inode;
25311+ struct au_vdir *vdir, *allocated;
25312+
25313+ err = 0;
25314+ inode = file->f_dentry->d_inode;
25315+ IMustLock(inode);
dece6358
AM
25316+ SiMustAnyLock(inode->i_sb);
25317+
1facf9fc 25318+ allocated = NULL;
25319+ do_read = 0;
25320+ expire = au_sbi(inode->i_sb)->si_rdcache;
25321+ vdir = au_ivdir(inode);
25322+ if (!vdir) {
25323+ do_read = 1;
1308ab2a 25324+ vdir = alloc_vdir(file);
1facf9fc 25325+ err = PTR_ERR(vdir);
25326+ if (IS_ERR(vdir))
25327+ goto out;
25328+ err = 0;
25329+ allocated = vdir;
25330+ } else if (may_read
25331+ && (inode->i_version != vdir->vd_version
25332+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
25333+ do_read = 1;
25334+ err = reinit_vdir(vdir);
25335+ if (unlikely(err))
25336+ goto out;
25337+ }
25338+
25339+ if (!do_read)
25340+ return 0; /* success */
25341+
25342+ arg.file = file;
25343+ arg.vdir = vdir;
25344+ err = au_do_read_vdir(&arg);
25345+ if (!err) {
25346+ /* file->f_pos = 0; */
25347+ vdir->vd_version = inode->i_version;
25348+ vdir->vd_last.ul = 0;
25349+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
25350+ if (allocated)
25351+ au_set_ivdir(inode, allocated);
25352+ } else if (allocated)
25353+ au_vdir_free(allocated);
25354+
4f0767ce 25355+out:
1facf9fc 25356+ return err;
25357+}
25358+
25359+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
25360+{
25361+ int err, rerr;
25362+ unsigned long ul, n;
25363+ const unsigned int deblk_sz = src->vd_deblk_sz;
25364+
25365+ AuDebugOn(tgt->vd_nblk != 1);
25366+
25367+ err = -ENOMEM;
25368+ if (tgt->vd_nblk < src->vd_nblk) {
25369+ unsigned char **p;
25370+
dece6358
AM
25371+ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
25372+ GFP_NOFS);
1facf9fc 25373+ if (unlikely(!p))
25374+ goto out;
25375+ tgt->vd_deblk = p;
25376+ }
25377+
1308ab2a 25378+ if (tgt->vd_deblk_sz != deblk_sz) {
25379+ unsigned char *p;
25380+
25381+ tgt->vd_deblk_sz = deblk_sz;
25382+ p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
25383+ if (unlikely(!p))
25384+ goto out;
25385+ tgt->vd_deblk[0] = p;
25386+ }
1facf9fc 25387+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 25388+ tgt->vd_version = src->vd_version;
25389+ tgt->vd_jiffy = src->vd_jiffy;
25390+
25391+ n = src->vd_nblk;
25392+ for (ul = 1; ul < n; ul++) {
dece6358
AM
25393+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
25394+ GFP_NOFS);
25395+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 25396+ goto out;
1308ab2a 25397+ tgt->vd_nblk++;
1facf9fc 25398+ }
1308ab2a 25399+ tgt->vd_nblk = n;
25400+ tgt->vd_last.ul = tgt->vd_last.ul;
25401+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
25402+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
25403+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 25404+ /* smp_mb(); */
25405+ return 0; /* success */
25406+
4f0767ce 25407+out:
1facf9fc 25408+ rerr = reinit_vdir(tgt);
25409+ BUG_ON(rerr);
25410+ return err;
25411+}
25412+
25413+int au_vdir_init(struct file *file)
25414+{
25415+ int err;
25416+ struct inode *inode;
25417+ struct au_vdir *vdir_cache, *allocated;
25418+
25419+ err = read_vdir(file, !file->f_pos);
25420+ if (unlikely(err))
25421+ goto out;
25422+
25423+ allocated = NULL;
25424+ vdir_cache = au_fvdir_cache(file);
25425+ if (!vdir_cache) {
1308ab2a 25426+ vdir_cache = alloc_vdir(file);
1facf9fc 25427+ err = PTR_ERR(vdir_cache);
25428+ if (IS_ERR(vdir_cache))
25429+ goto out;
25430+ allocated = vdir_cache;
25431+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
25432+ err = reinit_vdir(vdir_cache);
25433+ if (unlikely(err))
25434+ goto out;
25435+ } else
25436+ return 0; /* success */
25437+
25438+ inode = file->f_dentry->d_inode;
25439+ err = copy_vdir(vdir_cache, au_ivdir(inode));
25440+ if (!err) {
25441+ file->f_version = inode->i_version;
25442+ if (allocated)
25443+ au_set_fvdir_cache(file, allocated);
25444+ } else if (allocated)
25445+ au_vdir_free(allocated);
25446+
4f0767ce 25447+out:
1facf9fc 25448+ return err;
25449+}
25450+
25451+static loff_t calc_offset(struct au_vdir *vdir)
25452+{
25453+ loff_t offset;
25454+ union au_vdir_deblk_p p;
25455+
25456+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
25457+ offset = vdir->vd_last.p.deblk - p.deblk;
25458+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
25459+ return offset;
25460+}
25461+
25462+/* returns true or false */
25463+static int seek_vdir(struct file *file)
25464+{
25465+ int valid;
25466+ unsigned int deblk_sz;
25467+ unsigned long ul, n;
25468+ loff_t offset;
25469+ union au_vdir_deblk_p p, deblk_end;
25470+ struct au_vdir *vdir_cache;
25471+
25472+ valid = 1;
25473+ vdir_cache = au_fvdir_cache(file);
25474+ offset = calc_offset(vdir_cache);
25475+ AuDbg("offset %lld\n", offset);
25476+ if (file->f_pos == offset)
25477+ goto out;
25478+
25479+ vdir_cache->vd_last.ul = 0;
25480+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
25481+ if (!file->f_pos)
25482+ goto out;
25483+
25484+ valid = 0;
25485+ deblk_sz = vdir_cache->vd_deblk_sz;
25486+ ul = div64_u64(file->f_pos, deblk_sz);
25487+ AuDbg("ul %lu\n", ul);
25488+ if (ul >= vdir_cache->vd_nblk)
25489+ goto out;
25490+
25491+ n = vdir_cache->vd_nblk;
25492+ for (; ul < n; ul++) {
25493+ p.deblk = vdir_cache->vd_deblk[ul];
25494+ deblk_end.deblk = p.deblk + deblk_sz;
25495+ offset = ul;
25496+ offset *= deblk_sz;
25497+ while (!is_deblk_end(&p, &deblk_end) && offset < file->f_pos) {
25498+ unsigned int l;
25499+
25500+ l = calc_size(p.de->de_str.len);
25501+ offset += l;
25502+ p.deblk += l;
25503+ }
25504+ if (!is_deblk_end(&p, &deblk_end)) {
25505+ valid = 1;
25506+ vdir_cache->vd_last.ul = ul;
25507+ vdir_cache->vd_last.p = p;
25508+ break;
25509+ }
25510+ }
25511+
4f0767ce 25512+out:
1facf9fc 25513+ /* smp_mb(); */
25514+ AuTraceErr(!valid);
25515+ return valid;
25516+}
25517+
25518+int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir)
25519+{
25520+ int err;
25521+ unsigned int l, deblk_sz;
25522+ union au_vdir_deblk_p deblk_end;
25523+ struct au_vdir *vdir_cache;
25524+ struct au_vdir_de *de;
25525+
25526+ vdir_cache = au_fvdir_cache(file);
25527+ if (!seek_vdir(file))
25528+ return 0;
25529+
25530+ deblk_sz = vdir_cache->vd_deblk_sz;
25531+ while (1) {
25532+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
25533+ deblk_end.deblk += deblk_sz;
25534+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
25535+ de = vdir_cache->vd_last.p.de;
25536+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
25537+ de->de_str.len, de->de_str.name, file->f_pos,
25538+ (unsigned long)de->de_ino, de->de_type);
25539+ err = filldir(dirent, de->de_str.name, de->de_str.len,
25540+ file->f_pos, de->de_ino, de->de_type);
25541+ if (unlikely(err)) {
25542+ AuTraceErr(err);
25543+ /* todo: ignore the error caused by udba? */
25544+ /* return err; */
25545+ return 0;
25546+ }
25547+
25548+ l = calc_size(de->de_str.len);
25549+ vdir_cache->vd_last.p.deblk += l;
25550+ file->f_pos += l;
25551+ }
25552+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
25553+ vdir_cache->vd_last.ul++;
25554+ vdir_cache->vd_last.p.deblk
25555+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
25556+ file->f_pos = deblk_sz * vdir_cache->vd_last.ul;
25557+ continue;
25558+ }
25559+ break;
25560+ }
25561+
25562+ /* smp_mb(); */
25563+ return 0;
25564+}
7f207e10
AM
25565diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
25566--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
25567+++ linux/fs/aufs/vfsub.c 2011-08-24 13:30:24.734646739 +0200
25568@@ -0,0 +1,836 @@
1facf9fc 25569+/*
027c5e7a 25570+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 25571+ *
25572+ * This program, aufs is free software; you can redistribute it and/or modify
25573+ * it under the terms of the GNU General Public License as published by
25574+ * the Free Software Foundation; either version 2 of the License, or
25575+ * (at your option) any later version.
dece6358
AM
25576+ *
25577+ * This program is distributed in the hope that it will be useful,
25578+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25579+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25580+ * GNU General Public License for more details.
25581+ *
25582+ * You should have received a copy of the GNU General Public License
25583+ * along with this program; if not, write to the Free Software
25584+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 25585+ */
25586+
25587+/*
25588+ * sub-routines for VFS
25589+ */
25590+
4a4d8108 25591+#include <linux/file.h>
1308ab2a 25592+#include <linux/ima.h>
dece6358
AM
25593+#include <linux/namei.h>
25594+#include <linux/security.h>
25595+#include <linux/splice.h>
1facf9fc 25596+#include <linux/uaccess.h>
25597+#include "aufs.h"
25598+
25599+int vfsub_update_h_iattr(struct path *h_path, int *did)
25600+{
25601+ int err;
25602+ struct kstat st;
25603+ struct super_block *h_sb;
25604+
25605+ /* for remote fs, leave work for its getattr or d_revalidate */
25606+ /* for bad i_attr fs, handle them in aufs_getattr() */
25607+ /* still some fs may acquire i_mutex. we need to skip them */
25608+ err = 0;
25609+ if (!did)
25610+ did = &err;
25611+ h_sb = h_path->dentry->d_sb;
25612+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
25613+ if (*did)
25614+ err = vfs_getattr(h_path->mnt, h_path->dentry, &st);
25615+
25616+ return err;
25617+}
25618+
25619+/* ---------------------------------------------------------------------- */
25620+
4a4d8108 25621+struct file *vfsub_dentry_open(struct path *path, int flags)
1308ab2a 25622+{
25623+ struct file *file;
25624+
4a4d8108 25625+ path_get(path);
0c5527e5 25626+ file = dentry_open(path->dentry, path->mnt,
2cbb1c4b 25627+ flags /* | __FMODE_NONOTIFY */,
7f207e10 25628+ current_cred());
2cbb1c4b
JR
25629+ if (!IS_ERR_OR_NULL(file)
25630+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
25631+ i_readcount_inc(path->dentry->d_inode);
4a4d8108 25632+
1308ab2a 25633+ return file;
25634+}
25635+
1facf9fc 25636+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
25637+{
25638+ struct file *file;
25639+
2cbb1c4b 25640+ lockdep_off();
7f207e10 25641+ file = filp_open(path,
2cbb1c4b 25642+ oflags /* | __FMODE_NONOTIFY */,
7f207e10 25643+ mode);
2cbb1c4b 25644+ lockdep_on();
1facf9fc 25645+ if (IS_ERR(file))
25646+ goto out;
25647+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
25648+
4f0767ce 25649+out:
1facf9fc 25650+ return file;
25651+}
25652+
25653+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
25654+{
25655+ int err;
25656+
1facf9fc 25657+ err = kern_path(name, flags, path);
1facf9fc 25658+ if (!err && path->dentry->d_inode)
25659+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
25660+ return err;
25661+}
25662+
25663+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
25664+ int len)
25665+{
25666+ struct path path = {
25667+ .mnt = NULL
25668+ };
25669+
1308ab2a 25670+ /* VFS checks it too, but by WARN_ON_ONCE() */
1facf9fc 25671+ IMustLock(parent->d_inode);
25672+
25673+ path.dentry = lookup_one_len(name, parent, len);
25674+ if (IS_ERR(path.dentry))
25675+ goto out;
25676+ if (path.dentry->d_inode)
25677+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
25678+
4f0767ce 25679+out:
4a4d8108 25680+ AuTraceErrPtr(path.dentry);
1facf9fc 25681+ return path.dentry;
25682+}
25683+
25684+struct dentry *vfsub_lookup_hash(struct nameidata *nd)
25685+{
25686+ struct path path = {
25687+ .mnt = nd->path.mnt
25688+ };
25689+
25690+ IMustLock(nd->path.dentry->d_inode);
25691+
25692+ path.dentry = lookup_hash(nd);
4a4d8108
AM
25693+ if (IS_ERR(path.dentry))
25694+ goto out;
25695+ if (path.dentry->d_inode)
1facf9fc 25696+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
25697+
4f0767ce 25698+out:
4a4d8108 25699+ AuTraceErrPtr(path.dentry);
1facf9fc 25700+ return path.dentry;
25701+}
25702+
2cbb1c4b
JR
25703+/*
25704+ * this is "VFS:__lookup_one_len()" which was removed and merged into
25705+ * VFS:lookup_one_len() by the commit.
25706+ * 6a96ba5 2011-03-14 kill __lookup_one_len()
25707+ * this function should always be equivalent to the corresponding part in
25708+ * VFS:lookup_one_len().
25709+ */
25710+int vfsub_name_hash(const char *name, struct qstr *this, int len)
25711+{
25712+ unsigned long hash;
25713+ unsigned int c;
25714+
25715+ this->name = name;
25716+ this->len = len;
25717+ if (!len)
25718+ return -EACCES;
25719+
25720+ hash = init_name_hash();
25721+ while (len--) {
25722+ c = *(const unsigned char *)name++;
25723+ if (c == '/' || c == '\0')
25724+ return -EACCES;
25725+ hash = partial_name_hash(c, hash);
25726+ }
25727+ this->hash = end_name_hash(hash);
25728+ return 0;
25729+}
25730+
1facf9fc 25731+/* ---------------------------------------------------------------------- */
25732+
25733+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
25734+ struct dentry *d2, struct au_hinode *hdir2)
25735+{
25736+ struct dentry *d;
25737+
2cbb1c4b 25738+ lockdep_off();
1facf9fc 25739+ d = lock_rename(d1, d2);
2cbb1c4b 25740+ lockdep_on();
4a4d8108 25741+ au_hn_suspend(hdir1);
1facf9fc 25742+ if (hdir1 != hdir2)
4a4d8108 25743+ au_hn_suspend(hdir2);
1facf9fc 25744+
25745+ return d;
25746+}
25747+
25748+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
25749+ struct dentry *d2, struct au_hinode *hdir2)
25750+{
4a4d8108 25751+ au_hn_resume(hdir1);
1facf9fc 25752+ if (hdir1 != hdir2)
4a4d8108 25753+ au_hn_resume(hdir2);
2cbb1c4b 25754+ lockdep_off();
1facf9fc 25755+ unlock_rename(d1, d2);
2cbb1c4b 25756+ lockdep_on();
1facf9fc 25757+}
25758+
25759+/* ---------------------------------------------------------------------- */
25760+
25761+int vfsub_create(struct inode *dir, struct path *path, int mode)
25762+{
25763+ int err;
25764+ struct dentry *d;
25765+
25766+ IMustLock(dir);
25767+
25768+ d = path->dentry;
25769+ path->dentry = d->d_parent;
b752ccd1 25770+ err = security_path_mknod(path, d, mode, 0);
1facf9fc 25771+ path->dentry = d;
25772+ if (unlikely(err))
25773+ goto out;
25774+
25775+ if (au_test_fs_null_nd(dir->i_sb))
25776+ err = vfs_create(dir, path->dentry, mode, NULL);
25777+ else {
25778+ struct nameidata h_nd;
25779+
25780+ memset(&h_nd, 0, sizeof(h_nd));
25781+ h_nd.flags = LOOKUP_CREATE;
25782+ h_nd.intent.open.flags = O_CREAT
25783+ | vfsub_fmode_to_uint(FMODE_READ);
25784+ h_nd.intent.open.create_mode = mode;
25785+ h_nd.path.dentry = path->dentry->d_parent;
25786+ h_nd.path.mnt = path->mnt;
25787+ path_get(&h_nd.path);
25788+ err = vfs_create(dir, path->dentry, mode, &h_nd);
25789+ path_put(&h_nd.path);
25790+ }
25791+
25792+ if (!err) {
25793+ struct path tmp = *path;
25794+ int did;
25795+
25796+ vfsub_update_h_iattr(&tmp, &did);
25797+ if (did) {
25798+ tmp.dentry = path->dentry->d_parent;
25799+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25800+ }
25801+ /*ignore*/
25802+ }
25803+
4f0767ce 25804+out:
1facf9fc 25805+ return err;
25806+}
25807+
25808+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
25809+{
25810+ int err;
25811+ struct dentry *d;
25812+
25813+ IMustLock(dir);
25814+
25815+ d = path->dentry;
25816+ path->dentry = d->d_parent;
b752ccd1 25817+ err = security_path_symlink(path, d, symname);
1facf9fc 25818+ path->dentry = d;
25819+ if (unlikely(err))
25820+ goto out;
25821+
25822+ err = vfs_symlink(dir, path->dentry, symname);
25823+ if (!err) {
25824+ struct path tmp = *path;
25825+ int did;
25826+
25827+ vfsub_update_h_iattr(&tmp, &did);
25828+ if (did) {
25829+ tmp.dentry = path->dentry->d_parent;
25830+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25831+ }
25832+ /*ignore*/
25833+ }
25834+
4f0767ce 25835+out:
1facf9fc 25836+ return err;
25837+}
25838+
25839+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
25840+{
25841+ int err;
25842+ struct dentry *d;
25843+
25844+ IMustLock(dir);
25845+
25846+ d = path->dentry;
25847+ path->dentry = d->d_parent;
027c5e7a 25848+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
1facf9fc 25849+ path->dentry = d;
25850+ if (unlikely(err))
25851+ goto out;
25852+
25853+ err = vfs_mknod(dir, path->dentry, mode, dev);
25854+ if (!err) {
25855+ struct path tmp = *path;
25856+ int did;
25857+
25858+ vfsub_update_h_iattr(&tmp, &did);
25859+ if (did) {
25860+ tmp.dentry = path->dentry->d_parent;
25861+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25862+ }
25863+ /*ignore*/
25864+ }
25865+
4f0767ce 25866+out:
1facf9fc 25867+ return err;
25868+}
25869+
25870+static int au_test_nlink(struct inode *inode)
25871+{
25872+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
25873+
25874+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
25875+ || inode->i_nlink < link_max)
25876+ return 0;
25877+ return -EMLINK;
25878+}
25879+
25880+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path)
25881+{
25882+ int err;
25883+ struct dentry *d;
25884+
25885+ IMustLock(dir);
25886+
25887+ err = au_test_nlink(src_dentry->d_inode);
25888+ if (unlikely(err))
25889+ return err;
25890+
25891+ d = path->dentry;
25892+ path->dentry = d->d_parent;
b752ccd1 25893+ err = security_path_link(src_dentry, path, d);
1facf9fc 25894+ path->dentry = d;
25895+ if (unlikely(err))
25896+ goto out;
25897+
2cbb1c4b 25898+ lockdep_off();
1facf9fc 25899+ err = vfs_link(src_dentry, dir, path->dentry);
2cbb1c4b 25900+ lockdep_on();
1facf9fc 25901+ if (!err) {
25902+ struct path tmp = *path;
25903+ int did;
25904+
25905+ /* fuse has different memory inode for the same inumber */
25906+ vfsub_update_h_iattr(&tmp, &did);
25907+ if (did) {
25908+ tmp.dentry = path->dentry->d_parent;
25909+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25910+ tmp.dentry = src_dentry;
25911+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25912+ }
25913+ /*ignore*/
25914+ }
25915+
4f0767ce 25916+out:
1facf9fc 25917+ return err;
25918+}
25919+
25920+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
25921+ struct inode *dir, struct path *path)
25922+{
25923+ int err;
25924+ struct path tmp = {
25925+ .mnt = path->mnt
25926+ };
25927+ struct dentry *d;
25928+
25929+ IMustLock(dir);
25930+ IMustLock(src_dir);
25931+
25932+ d = path->dentry;
25933+ path->dentry = d->d_parent;
25934+ tmp.dentry = src_dentry->d_parent;
b752ccd1 25935+ err = security_path_rename(&tmp, src_dentry, path, d);
1facf9fc 25936+ path->dentry = d;
25937+ if (unlikely(err))
25938+ goto out;
25939+
2cbb1c4b 25940+ lockdep_off();
1facf9fc 25941+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry);
2cbb1c4b 25942+ lockdep_on();
1facf9fc 25943+ if (!err) {
25944+ int did;
25945+
25946+ tmp.dentry = d->d_parent;
25947+ vfsub_update_h_iattr(&tmp, &did);
25948+ if (did) {
25949+ tmp.dentry = src_dentry;
25950+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25951+ tmp.dentry = src_dentry->d_parent;
25952+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25953+ }
25954+ /*ignore*/
25955+ }
25956+
4f0767ce 25957+out:
1facf9fc 25958+ return err;
25959+}
25960+
25961+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
25962+{
25963+ int err;
25964+ struct dentry *d;
25965+
25966+ IMustLock(dir);
25967+
25968+ d = path->dentry;
25969+ path->dentry = d->d_parent;
b752ccd1 25970+ err = security_path_mkdir(path, d, mode);
1facf9fc 25971+ path->dentry = d;
25972+ if (unlikely(err))
25973+ goto out;
25974+
25975+ err = vfs_mkdir(dir, path->dentry, mode);
25976+ if (!err) {
25977+ struct path tmp = *path;
25978+ int did;
25979+
25980+ vfsub_update_h_iattr(&tmp, &did);
25981+ if (did) {
25982+ tmp.dentry = path->dentry->d_parent;
25983+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25984+ }
25985+ /*ignore*/
25986+ }
25987+
4f0767ce 25988+out:
1facf9fc 25989+ return err;
25990+}
25991+
25992+int vfsub_rmdir(struct inode *dir, struct path *path)
25993+{
25994+ int err;
25995+ struct dentry *d;
25996+
25997+ IMustLock(dir);
25998+
25999+ d = path->dentry;
26000+ path->dentry = d->d_parent;
b752ccd1 26001+ err = security_path_rmdir(path, d);
1facf9fc 26002+ path->dentry = d;
26003+ if (unlikely(err))
26004+ goto out;
26005+
2cbb1c4b 26006+ lockdep_off();
1facf9fc 26007+ err = vfs_rmdir(dir, path->dentry);
2cbb1c4b 26008+ lockdep_on();
1facf9fc 26009+ if (!err) {
26010+ struct path tmp = {
26011+ .dentry = path->dentry->d_parent,
26012+ .mnt = path->mnt
26013+ };
26014+
26015+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
26016+ }
26017+
4f0767ce 26018+out:
1facf9fc 26019+ return err;
26020+}
26021+
26022+/* ---------------------------------------------------------------------- */
26023+
26024+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
26025+ loff_t *ppos)
26026+{
26027+ ssize_t err;
26028+
2cbb1c4b 26029+ lockdep_off();
1facf9fc 26030+ err = vfs_read(file, ubuf, count, ppos);
2cbb1c4b 26031+ lockdep_on();
1facf9fc 26032+ if (err >= 0)
26033+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26034+ return err;
26035+}
26036+
26037+/* todo: kernel_read()? */
26038+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
26039+ loff_t *ppos)
26040+{
26041+ ssize_t err;
26042+ mm_segment_t oldfs;
b752ccd1
AM
26043+ union {
26044+ void *k;
26045+ char __user *u;
26046+ } buf;
1facf9fc 26047+
b752ccd1 26048+ buf.k = kbuf;
1facf9fc 26049+ oldfs = get_fs();
26050+ set_fs(KERNEL_DS);
b752ccd1 26051+ err = vfsub_read_u(file, buf.u, count, ppos);
1facf9fc 26052+ set_fs(oldfs);
26053+ return err;
26054+}
26055+
26056+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
26057+ loff_t *ppos)
26058+{
26059+ ssize_t err;
26060+
2cbb1c4b 26061+ lockdep_off();
1facf9fc 26062+ err = vfs_write(file, ubuf, count, ppos);
2cbb1c4b 26063+ lockdep_on();
1facf9fc 26064+ if (err >= 0)
26065+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26066+ return err;
26067+}
26068+
26069+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
26070+{
26071+ ssize_t err;
26072+ mm_segment_t oldfs;
b752ccd1
AM
26073+ union {
26074+ void *k;
26075+ const char __user *u;
26076+ } buf;
1facf9fc 26077+
b752ccd1 26078+ buf.k = kbuf;
1facf9fc 26079+ oldfs = get_fs();
26080+ set_fs(KERNEL_DS);
b752ccd1 26081+ err = vfsub_write_u(file, buf.u, count, ppos);
1facf9fc 26082+ set_fs(oldfs);
26083+ return err;
26084+}
26085+
4a4d8108
AM
26086+int vfsub_flush(struct file *file, fl_owner_t id)
26087+{
26088+ int err;
26089+
26090+ err = 0;
26091+ if (file->f_op && file->f_op->flush) {
2cbb1c4b
JR
26092+ if (!au_test_nfs(file->f_dentry->d_sb))
26093+ err = file->f_op->flush(file, id);
26094+ else {
26095+ lockdep_off();
26096+ err = file->f_op->flush(file, id);
26097+ lockdep_on();
26098+ }
4a4d8108
AM
26099+ if (!err)
26100+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
26101+ /*ignore*/
26102+ }
26103+ return err;
26104+}
26105+
1facf9fc 26106+int vfsub_readdir(struct file *file, filldir_t filldir, void *arg)
26107+{
26108+ int err;
26109+
2cbb1c4b 26110+ lockdep_off();
1facf9fc 26111+ err = vfs_readdir(file, filldir, arg);
2cbb1c4b 26112+ lockdep_on();
1facf9fc 26113+ if (err >= 0)
26114+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26115+ return err;
26116+}
26117+
26118+long vfsub_splice_to(struct file *in, loff_t *ppos,
26119+ struct pipe_inode_info *pipe, size_t len,
26120+ unsigned int flags)
26121+{
26122+ long err;
26123+
2cbb1c4b 26124+ lockdep_off();
0fc653ad 26125+ err = do_splice_to(in, ppos, pipe, len, flags);
2cbb1c4b 26126+ lockdep_on();
4a4d8108 26127+ file_accessed(in);
1facf9fc 26128+ if (err >= 0)
26129+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
26130+ return err;
26131+}
26132+
26133+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
26134+ loff_t *ppos, size_t len, unsigned int flags)
26135+{
26136+ long err;
26137+
2cbb1c4b 26138+ lockdep_off();
0fc653ad 26139+ err = do_splice_from(pipe, out, ppos, len, flags);
2cbb1c4b 26140+ lockdep_on();
1facf9fc 26141+ if (err >= 0)
26142+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
26143+ return err;
26144+}
26145+
53392da6
AM
26146+int vfsub_fsync(struct file *file, struct path *path, int datasync)
26147+{
26148+ int err;
26149+
26150+ /* file can be NULL */
26151+ lockdep_off();
26152+ err = vfs_fsync(file, datasync);
26153+ lockdep_on();
26154+ if (!err) {
26155+ if (!path) {
26156+ AuDebugOn(!file);
26157+ path = &file->f_path;
26158+ }
26159+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
26160+ }
26161+ return err;
26162+}
26163+
1facf9fc 26164+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
26165+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
26166+ struct file *h_file)
26167+{
26168+ int err;
26169+ struct inode *h_inode;
26170+
26171+ h_inode = h_path->dentry->d_inode;
26172+ if (!h_file) {
26173+ err = mnt_want_write(h_path->mnt);
26174+ if (err)
26175+ goto out;
26176+ err = inode_permission(h_inode, MAY_WRITE);
26177+ if (err)
26178+ goto out_mnt;
26179+ err = get_write_access(h_inode);
26180+ if (err)
26181+ goto out_mnt;
4a4d8108 26182+ err = break_lease(h_inode, O_WRONLY);
1facf9fc 26183+ if (err)
26184+ goto out_inode;
26185+ }
26186+
26187+ err = locks_verify_truncate(h_inode, h_file, length);
26188+ if (!err)
953406b4 26189+ err = security_path_truncate(h_path);
2cbb1c4b
JR
26190+ if (!err) {
26191+ lockdep_off();
1facf9fc 26192+ err = do_truncate(h_path->dentry, length, attr, h_file);
2cbb1c4b
JR
26193+ lockdep_on();
26194+ }
1facf9fc 26195+
4f0767ce 26196+out_inode:
1facf9fc 26197+ if (!h_file)
26198+ put_write_access(h_inode);
4f0767ce 26199+out_mnt:
1facf9fc 26200+ if (!h_file)
26201+ mnt_drop_write(h_path->mnt);
4f0767ce 26202+out:
1facf9fc 26203+ return err;
26204+}
26205+
26206+/* ---------------------------------------------------------------------- */
26207+
26208+struct au_vfsub_mkdir_args {
26209+ int *errp;
26210+ struct inode *dir;
26211+ struct path *path;
26212+ int mode;
26213+};
26214+
26215+static void au_call_vfsub_mkdir(void *args)
26216+{
26217+ struct au_vfsub_mkdir_args *a = args;
26218+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
26219+}
26220+
26221+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
26222+{
26223+ int err, do_sio, wkq_err;
26224+
26225+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
26226+ if (!do_sio)
26227+ err = vfsub_mkdir(dir, path, mode);
26228+ else {
26229+ struct au_vfsub_mkdir_args args = {
26230+ .errp = &err,
26231+ .dir = dir,
26232+ .path = path,
26233+ .mode = mode
26234+ };
26235+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
26236+ if (unlikely(wkq_err))
26237+ err = wkq_err;
26238+ }
26239+
26240+ return err;
26241+}
26242+
26243+struct au_vfsub_rmdir_args {
26244+ int *errp;
26245+ struct inode *dir;
26246+ struct path *path;
26247+};
26248+
26249+static void au_call_vfsub_rmdir(void *args)
26250+{
26251+ struct au_vfsub_rmdir_args *a = args;
26252+ *a->errp = vfsub_rmdir(a->dir, a->path);
26253+}
26254+
26255+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
26256+{
26257+ int err, do_sio, wkq_err;
26258+
26259+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
26260+ if (!do_sio)
26261+ err = vfsub_rmdir(dir, path);
26262+ else {
26263+ struct au_vfsub_rmdir_args args = {
26264+ .errp = &err,
26265+ .dir = dir,
26266+ .path = path
26267+ };
26268+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
26269+ if (unlikely(wkq_err))
26270+ err = wkq_err;
26271+ }
26272+
26273+ return err;
26274+}
26275+
26276+/* ---------------------------------------------------------------------- */
26277+
26278+struct notify_change_args {
26279+ int *errp;
26280+ struct path *path;
26281+ struct iattr *ia;
26282+};
26283+
26284+static void call_notify_change(void *args)
26285+{
26286+ struct notify_change_args *a = args;
26287+ struct inode *h_inode;
26288+
26289+ h_inode = a->path->dentry->d_inode;
26290+ IMustLock(h_inode);
26291+
26292+ *a->errp = -EPERM;
26293+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
1facf9fc 26294+ *a->errp = notify_change(a->path->dentry, a->ia);
1facf9fc 26295+ if (!*a->errp)
26296+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
26297+ }
26298+ AuTraceErr(*a->errp);
26299+}
26300+
26301+int vfsub_notify_change(struct path *path, struct iattr *ia)
26302+{
26303+ int err;
26304+ struct notify_change_args args = {
26305+ .errp = &err,
26306+ .path = path,
26307+ .ia = ia
26308+ };
26309+
26310+ call_notify_change(&args);
26311+
26312+ return err;
26313+}
26314+
26315+int vfsub_sio_notify_change(struct path *path, struct iattr *ia)
26316+{
26317+ int err, wkq_err;
26318+ struct notify_change_args args = {
26319+ .errp = &err,
26320+ .path = path,
26321+ .ia = ia
26322+ };
26323+
26324+ wkq_err = au_wkq_wait(call_notify_change, &args);
26325+ if (unlikely(wkq_err))
26326+ err = wkq_err;
26327+
26328+ return err;
26329+}
26330+
26331+/* ---------------------------------------------------------------------- */
26332+
26333+struct unlink_args {
26334+ int *errp;
26335+ struct inode *dir;
26336+ struct path *path;
26337+};
26338+
26339+static void call_unlink(void *args)
26340+{
26341+ struct unlink_args *a = args;
26342+ struct dentry *d = a->path->dentry;
26343+ struct inode *h_inode;
26344+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
027c5e7a 26345+ && d->d_count == 1);
1facf9fc 26346+
26347+ IMustLock(a->dir);
26348+
26349+ a->path->dentry = d->d_parent;
26350+ *a->errp = security_path_unlink(a->path, d);
26351+ a->path->dentry = d;
26352+ if (unlikely(*a->errp))
26353+ return;
26354+
26355+ if (!stop_sillyrename)
26356+ dget(d);
26357+ h_inode = d->d_inode;
26358+ if (h_inode)
027c5e7a 26359+ ihold(h_inode);
1facf9fc 26360+
2cbb1c4b 26361+ lockdep_off();
1facf9fc 26362+ *a->errp = vfs_unlink(a->dir, d);
2cbb1c4b 26363+ lockdep_on();
1facf9fc 26364+ if (!*a->errp) {
26365+ struct path tmp = {
26366+ .dentry = d->d_parent,
26367+ .mnt = a->path->mnt
26368+ };
26369+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
26370+ }
26371+
26372+ if (!stop_sillyrename)
26373+ dput(d);
26374+ if (h_inode)
26375+ iput(h_inode);
26376+
26377+ AuTraceErr(*a->errp);
26378+}
26379+
26380+/*
26381+ * @dir: must be locked.
26382+ * @dentry: target dentry.
26383+ */
26384+int vfsub_unlink(struct inode *dir, struct path *path, int force)
26385+{
26386+ int err;
26387+ struct unlink_args args = {
26388+ .errp = &err,
26389+ .dir = dir,
26390+ .path = path
26391+ };
26392+
26393+ if (!force)
26394+ call_unlink(&args);
26395+ else {
26396+ int wkq_err;
26397+
26398+ wkq_err = au_wkq_wait(call_unlink, &args);
26399+ if (unlikely(wkq_err))
26400+ err = wkq_err;
26401+ }
26402+
26403+ return err;
26404+}
7f207e10
AM
26405diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
26406--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
26407+++ linux/fs/aufs/vfsub.h 2011-08-24 13:30:24.734646739 +0200
26408@@ -0,0 +1,232 @@
1facf9fc 26409+/*
027c5e7a 26410+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 26411+ *
26412+ * This program, aufs is free software; you can redistribute it and/or modify
26413+ * it under the terms of the GNU General Public License as published by
26414+ * the Free Software Foundation; either version 2 of the License, or
26415+ * (at your option) any later version.
dece6358
AM
26416+ *
26417+ * This program is distributed in the hope that it will be useful,
26418+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26419+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26420+ * GNU General Public License for more details.
26421+ *
26422+ * You should have received a copy of the GNU General Public License
26423+ * along with this program; if not, write to the Free Software
26424+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 26425+ */
26426+
26427+/*
26428+ * sub-routines for VFS
26429+ */
26430+
26431+#ifndef __AUFS_VFSUB_H__
26432+#define __AUFS_VFSUB_H__
26433+
26434+#ifdef __KERNEL__
26435+
26436+#include <linux/fs.h>
0c5527e5 26437+#include <linux/lglock.h>
7f207e10 26438+#include "debug.h"
1facf9fc 26439+
7f207e10 26440+/* copied from linux/fs/internal.h */
2cbb1c4b 26441+/* todo: BAD approach!! */
0c5527e5 26442+DECLARE_BRLOCK(vfsmount_lock);
0c5527e5 26443+extern void file_sb_list_del(struct file *f);
2cbb1c4b 26444+extern spinlock_t inode_sb_list_lock;
0c5527e5 26445+
7f207e10
AM
26446+/* copied from linux/fs/file_table.c */
26447+DECLARE_LGLOCK(files_lglock);
0c5527e5
AM
26448+#ifdef CONFIG_SMP
26449+/*
26450+ * These macros iterate all files on all CPUs for a given superblock.
26451+ * files_lglock must be held globally.
26452+ */
26453+#define do_file_list_for_each_entry(__sb, __file) \
26454+{ \
26455+ int i; \
26456+ for_each_possible_cpu(i) { \
26457+ struct list_head *list; \
26458+ list = per_cpu_ptr((__sb)->s_files, i); \
26459+ list_for_each_entry((__file), list, f_u.fu_list)
26460+
26461+#define while_file_list_for_each_entry \
26462+ } \
26463+}
26464+
26465+#else
26466+
26467+#define do_file_list_for_each_entry(__sb, __file) \
26468+{ \
26469+ struct list_head *list; \
26470+ list = &(sb)->s_files; \
26471+ list_for_each_entry((__file), list, f_u.fu_list)
26472+
26473+#define while_file_list_for_each_entry \
26474+}
7f207e10
AM
26475+#endif
26476+
26477+/* ---------------------------------------------------------------------- */
1facf9fc 26478+
26479+/* lock subclass for lower inode */
26480+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
26481+/* reduce? gave up. */
26482+enum {
26483+ AuLsc_I_Begin = I_MUTEX_QUOTA, /* 4 */
26484+ AuLsc_I_PARENT, /* lower inode, parent first */
26485+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 26486+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 26487+ AuLsc_I_CHILD,
26488+ AuLsc_I_CHILD2,
26489+ AuLsc_I_End
26490+};
26491+
26492+/* to debug easier, do not make them inlined functions */
26493+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
26494+#define IMustLock(i) MtxMustLock(&(i)->i_mutex)
26495+
26496+/* ---------------------------------------------------------------------- */
26497+
7f207e10
AM
26498+static inline void vfsub_drop_nlink(struct inode *inode)
26499+{
26500+ AuDebugOn(!inode->i_nlink);
26501+ drop_nlink(inode);
26502+}
26503+
027c5e7a
AM
26504+static inline void vfsub_dead_dir(struct inode *inode)
26505+{
26506+ AuDebugOn(!S_ISDIR(inode->i_mode));
26507+ inode->i_flags |= S_DEAD;
26508+ clear_nlink(inode);
26509+}
26510+
7f207e10
AM
26511+/* ---------------------------------------------------------------------- */
26512+
26513+int vfsub_update_h_iattr(struct path *h_path, int *did);
26514+struct file *vfsub_dentry_open(struct path *path, int flags);
26515+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
1facf9fc 26516+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
26517+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
26518+ int len);
26519+struct dentry *vfsub_lookup_hash(struct nameidata *nd);
2cbb1c4b 26520+int vfsub_name_hash(const char *name, struct qstr *this, int len);
1facf9fc 26521+
26522+/* ---------------------------------------------------------------------- */
26523+
26524+struct au_hinode;
26525+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
26526+ struct dentry *d2, struct au_hinode *hdir2);
26527+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
26528+ struct dentry *d2, struct au_hinode *hdir2);
26529+
26530+int vfsub_create(struct inode *dir, struct path *path, int mode);
26531+int vfsub_symlink(struct inode *dir, struct path *path,
26532+ const char *symname);
26533+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
26534+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
26535+ struct path *path);
26536+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
26537+ struct inode *hdir, struct path *path);
26538+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
26539+int vfsub_rmdir(struct inode *dir, struct path *path);
26540+
26541+/* ---------------------------------------------------------------------- */
26542+
26543+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
26544+ loff_t *ppos);
26545+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
26546+ loff_t *ppos);
26547+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
26548+ loff_t *ppos);
26549+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
26550+ loff_t *ppos);
4a4d8108 26551+int vfsub_flush(struct file *file, fl_owner_t id);
1facf9fc 26552+int vfsub_readdir(struct file *file, filldir_t filldir, void *arg);
26553+
4a4d8108
AM
26554+static inline unsigned int vfsub_file_flags(struct file *file)
26555+{
26556+ unsigned int flags;
26557+
26558+ spin_lock(&file->f_lock);
26559+ flags = file->f_flags;
26560+ spin_unlock(&file->f_lock);
26561+
26562+ return flags;
26563+}
1308ab2a 26564+
1facf9fc 26565+static inline void vfsub_file_accessed(struct file *h_file)
26566+{
26567+ file_accessed(h_file);
26568+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
26569+}
26570+
26571+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
26572+ struct dentry *h_dentry)
26573+{
26574+ struct path h_path = {
26575+ .dentry = h_dentry,
26576+ .mnt = h_mnt
26577+ };
26578+ touch_atime(h_mnt, h_dentry);
26579+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
26580+}
26581+
4a4d8108
AM
26582+long vfsub_splice_to(struct file *in, loff_t *ppos,
26583+ struct pipe_inode_info *pipe, size_t len,
26584+ unsigned int flags);
26585+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
26586+ loff_t *ppos, size_t len, unsigned int flags);
26587+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
26588+ struct file *h_file);
53392da6 26589+int vfsub_fsync(struct file *file, struct path *path, int datasync);
4a4d8108 26590+
1facf9fc 26591+/* ---------------------------------------------------------------------- */
26592+
26593+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
26594+{
26595+ loff_t err;
26596+
2cbb1c4b 26597+ lockdep_off();
1facf9fc 26598+ err = vfs_llseek(file, offset, origin);
2cbb1c4b 26599+ lockdep_on();
1facf9fc 26600+ return err;
26601+}
26602+
26603+/* ---------------------------------------------------------------------- */
26604+
26605+/* dirty workaround for strict type of fmode_t */
26606+union vfsub_fmu {
26607+ fmode_t fm;
26608+ unsigned int ui;
26609+};
26610+
26611+static inline unsigned int vfsub_fmode_to_uint(fmode_t fm)
26612+{
26613+ union vfsub_fmu u = {
26614+ .fm = fm
26615+ };
26616+
26617+ BUILD_BUG_ON(sizeof(u.fm) != sizeof(u.ui));
26618+
26619+ return u.ui;
26620+}
26621+
26622+static inline fmode_t vfsub_uint_to_fmode(unsigned int ui)
26623+{
26624+ union vfsub_fmu u = {
26625+ .ui = ui
26626+ };
26627+
26628+ return u.fm;
26629+}
26630+
4a4d8108
AM
26631+/* ---------------------------------------------------------------------- */
26632+
26633+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
26634+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
26635+int vfsub_sio_notify_change(struct path *path, struct iattr *ia);
26636+int vfsub_notify_change(struct path *path, struct iattr *ia);
26637+int vfsub_unlink(struct inode *dir, struct path *path, int force);
26638+
1facf9fc 26639+#endif /* __KERNEL__ */
26640+#endif /* __AUFS_VFSUB_H__ */
7f207e10
AM
26641diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
26642--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
53392da6 26643+++ linux/fs/aufs/wbr_policy.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 26644@@ -0,0 +1,700 @@
1facf9fc 26645+/*
027c5e7a 26646+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 26647+ *
26648+ * This program, aufs is free software; you can redistribute it and/or modify
26649+ * it under the terms of the GNU General Public License as published by
26650+ * the Free Software Foundation; either version 2 of the License, or
26651+ * (at your option) any later version.
dece6358
AM
26652+ *
26653+ * This program is distributed in the hope that it will be useful,
26654+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26655+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26656+ * GNU General Public License for more details.
26657+ *
26658+ * You should have received a copy of the GNU General Public License
26659+ * along with this program; if not, write to the Free Software
26660+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 26661+ */
26662+
26663+/*
26664+ * policies for selecting one among multiple writable branches
26665+ */
26666+
26667+#include <linux/statfs.h>
26668+#include "aufs.h"
26669+
26670+/* subset of cpup_attr() */
26671+static noinline_for_stack
26672+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
26673+{
26674+ int err, sbits;
26675+ struct iattr ia;
26676+ struct inode *h_isrc;
26677+
26678+ h_isrc = h_src->d_inode;
26679+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
26680+ ia.ia_mode = h_isrc->i_mode;
26681+ ia.ia_uid = h_isrc->i_uid;
26682+ ia.ia_gid = h_isrc->i_gid;
26683+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
26684+ au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc);
26685+ err = vfsub_sio_notify_change(h_path, &ia);
26686+
26687+ /* is this nfs only? */
26688+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
26689+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
26690+ ia.ia_mode = h_isrc->i_mode;
26691+ err = vfsub_sio_notify_change(h_path, &ia);
26692+ }
26693+
26694+ return err;
26695+}
26696+
26697+#define AuCpdown_PARENT_OPQ 1
26698+#define AuCpdown_WHED (1 << 1)
26699+#define AuCpdown_MADE_DIR (1 << 2)
26700+#define AuCpdown_DIROPQ (1 << 3)
26701+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
7f207e10
AM
26702+#define au_fset_cpdown(flags, name) \
26703+ do { (flags) |= AuCpdown_##name; } while (0)
26704+#define au_fclr_cpdown(flags, name) \
26705+ do { (flags) &= ~AuCpdown_##name; } while (0)
1facf9fc 26706+
26707+struct au_cpdown_dir_args {
26708+ struct dentry *parent;
26709+ unsigned int flags;
26710+};
26711+
26712+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
26713+ struct au_cpdown_dir_args *a)
26714+{
26715+ int err;
26716+ struct dentry *opq_dentry;
26717+
26718+ opq_dentry = au_diropq_create(dentry, bdst);
26719+ err = PTR_ERR(opq_dentry);
26720+ if (IS_ERR(opq_dentry))
26721+ goto out;
26722+ dput(opq_dentry);
26723+ au_fset_cpdown(a->flags, DIROPQ);
26724+
4f0767ce 26725+out:
1facf9fc 26726+ return err;
26727+}
26728+
26729+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
26730+ struct inode *dir, aufs_bindex_t bdst)
26731+{
26732+ int err;
26733+ struct path h_path;
26734+ struct au_branch *br;
26735+
26736+ br = au_sbr(dentry->d_sb, bdst);
26737+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
26738+ err = PTR_ERR(h_path.dentry);
26739+ if (IS_ERR(h_path.dentry))
26740+ goto out;
26741+
26742+ err = 0;
26743+ if (h_path.dentry->d_inode) {
26744+ h_path.mnt = br->br_mnt;
26745+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
26746+ dentry);
26747+ }
26748+ dput(h_path.dentry);
26749+
4f0767ce 26750+out:
1facf9fc 26751+ return err;
26752+}
26753+
26754+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
26755+ struct dentry *h_parent, void *arg)
26756+{
26757+ int err, rerr;
4a4d8108 26758+ aufs_bindex_t bopq, bstart;
1facf9fc 26759+ struct path h_path;
26760+ struct dentry *parent;
26761+ struct inode *h_dir, *h_inode, *inode, *dir;
26762+ struct au_cpdown_dir_args *args = arg;
26763+
26764+ bstart = au_dbstart(dentry);
26765+ /* dentry is di-locked */
26766+ parent = dget_parent(dentry);
26767+ dir = parent->d_inode;
26768+ h_dir = h_parent->d_inode;
26769+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
26770+ IMustLock(h_dir);
26771+
26772+ err = au_lkup_neg(dentry, bdst);
26773+ if (unlikely(err < 0))
26774+ goto out;
26775+ h_path.dentry = au_h_dptr(dentry, bdst);
26776+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
26777+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
26778+ S_IRWXU | S_IRUGO | S_IXUGO);
26779+ if (unlikely(err))
26780+ goto out_put;
26781+ au_fset_cpdown(args->flags, MADE_DIR);
26782+
1facf9fc 26783+ bopq = au_dbdiropq(dentry);
26784+ au_fclr_cpdown(args->flags, WHED);
26785+ au_fclr_cpdown(args->flags, DIROPQ);
26786+ if (au_dbwh(dentry) == bdst)
26787+ au_fset_cpdown(args->flags, WHED);
26788+ if (!au_ftest_cpdown(args->flags, PARENT_OPQ) && bopq <= bdst)
26789+ au_fset_cpdown(args->flags, PARENT_OPQ);
1facf9fc 26790+ h_inode = h_path.dentry->d_inode;
26791+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
26792+ if (au_ftest_cpdown(args->flags, WHED)) {
26793+ err = au_cpdown_dir_opq(dentry, bdst, args);
26794+ if (unlikely(err)) {
26795+ mutex_unlock(&h_inode->i_mutex);
26796+ goto out_dir;
26797+ }
26798+ }
26799+
26800+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
26801+ mutex_unlock(&h_inode->i_mutex);
26802+ if (unlikely(err))
26803+ goto out_opq;
26804+
26805+ if (au_ftest_cpdown(args->flags, WHED)) {
26806+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
26807+ if (unlikely(err))
26808+ goto out_opq;
26809+ }
26810+
26811+ inode = dentry->d_inode;
26812+ if (au_ibend(inode) < bdst)
26813+ au_set_ibend(inode, bdst);
26814+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
26815+ au_hi_flags(inode, /*isdir*/1));
26816+ goto out; /* success */
26817+
26818+ /* revert */
4f0767ce 26819+out_opq:
1facf9fc 26820+ if (au_ftest_cpdown(args->flags, DIROPQ)) {
26821+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
26822+ rerr = au_diropq_remove(dentry, bdst);
26823+ mutex_unlock(&h_inode->i_mutex);
26824+ if (unlikely(rerr)) {
26825+ AuIOErr("failed removing diropq for %.*s b%d (%d)\n",
26826+ AuDLNPair(dentry), bdst, rerr);
26827+ err = -EIO;
26828+ goto out;
26829+ }
26830+ }
4f0767ce 26831+out_dir:
1facf9fc 26832+ if (au_ftest_cpdown(args->flags, MADE_DIR)) {
26833+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
26834+ if (unlikely(rerr)) {
26835+ AuIOErr("failed removing %.*s b%d (%d)\n",
26836+ AuDLNPair(dentry), bdst, rerr);
26837+ err = -EIO;
26838+ }
26839+ }
4f0767ce 26840+out_put:
1facf9fc 26841+ au_set_h_dptr(dentry, bdst, NULL);
26842+ if (au_dbend(dentry) == bdst)
26843+ au_update_dbend(dentry);
4f0767ce 26844+out:
1facf9fc 26845+ dput(parent);
26846+ return err;
26847+}
26848+
26849+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
26850+{
26851+ int err;
26852+ struct au_cpdown_dir_args args = {
26853+ .parent = dget_parent(dentry),
26854+ .flags = 0
26855+ };
26856+
26857+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &args);
26858+ dput(args.parent);
26859+
26860+ return err;
26861+}
26862+
26863+/* ---------------------------------------------------------------------- */
26864+
26865+/* policies for create */
26866+
4a4d8108
AM
26867+static int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
26868+{
26869+ int err, i, j, ndentry;
26870+ aufs_bindex_t bopq;
26871+ struct au_dcsub_pages dpages;
26872+ struct au_dpage *dpage;
26873+ struct dentry **dentries, *parent, *d;
26874+
26875+ err = au_dpages_init(&dpages, GFP_NOFS);
26876+ if (unlikely(err))
26877+ goto out;
26878+ parent = dget_parent(dentry);
027c5e7a 26879+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
4a4d8108
AM
26880+ if (unlikely(err))
26881+ goto out_free;
26882+
26883+ err = bindex;
26884+ for (i = 0; i < dpages.ndpage; i++) {
26885+ dpage = dpages.dpages + i;
26886+ dentries = dpage->dentries;
26887+ ndentry = dpage->ndentry;
26888+ for (j = 0; j < ndentry; j++) {
26889+ d = dentries[j];
26890+ di_read_lock_parent2(d, !AuLock_IR);
26891+ bopq = au_dbdiropq(d);
26892+ di_read_unlock(d, !AuLock_IR);
26893+ if (bopq >= 0 && bopq < err)
26894+ err = bopq;
26895+ }
26896+ }
26897+
26898+out_free:
26899+ dput(parent);
26900+ au_dpages_free(&dpages);
26901+out:
26902+ return err;
26903+}
26904+
1facf9fc 26905+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
26906+{
26907+ for (; bindex >= 0; bindex--)
26908+ if (!au_br_rdonly(au_sbr(sb, bindex)))
26909+ return bindex;
26910+ return -EROFS;
26911+}
26912+
26913+/* top down parent */
26914+static int au_wbr_create_tdp(struct dentry *dentry, int isdir __maybe_unused)
26915+{
26916+ int err;
26917+ aufs_bindex_t bstart, bindex;
26918+ struct super_block *sb;
26919+ struct dentry *parent, *h_parent;
26920+
26921+ sb = dentry->d_sb;
26922+ bstart = au_dbstart(dentry);
26923+ err = bstart;
26924+ if (!au_br_rdonly(au_sbr(sb, bstart)))
26925+ goto out;
26926+
26927+ err = -EROFS;
26928+ parent = dget_parent(dentry);
26929+ for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
26930+ h_parent = au_h_dptr(parent, bindex);
26931+ if (!h_parent || !h_parent->d_inode)
26932+ continue;
26933+
26934+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
26935+ err = bindex;
26936+ break;
26937+ }
26938+ }
26939+ dput(parent);
26940+
26941+ /* bottom up here */
4a4d8108 26942+ if (unlikely(err < 0)) {
1facf9fc 26943+ err = au_wbr_bu(sb, bstart - 1);
4a4d8108
AM
26944+ if (err >= 0)
26945+ err = au_wbr_nonopq(dentry, err);
26946+ }
1facf9fc 26947+
4f0767ce 26948+out:
1facf9fc 26949+ AuDbg("b%d\n", err);
26950+ return err;
26951+}
26952+
26953+/* ---------------------------------------------------------------------- */
26954+
26955+/* an exception for the policy other than tdp */
26956+static int au_wbr_create_exp(struct dentry *dentry)
26957+{
26958+ int err;
26959+ aufs_bindex_t bwh, bdiropq;
26960+ struct dentry *parent;
26961+
26962+ err = -1;
26963+ bwh = au_dbwh(dentry);
26964+ parent = dget_parent(dentry);
26965+ bdiropq = au_dbdiropq(parent);
26966+ if (bwh >= 0) {
26967+ if (bdiropq >= 0)
26968+ err = min(bdiropq, bwh);
26969+ else
26970+ err = bwh;
26971+ AuDbg("%d\n", err);
26972+ } else if (bdiropq >= 0) {
26973+ err = bdiropq;
26974+ AuDbg("%d\n", err);
26975+ }
26976+ dput(parent);
26977+
4a4d8108
AM
26978+ if (err >= 0)
26979+ err = au_wbr_nonopq(dentry, err);
26980+
1facf9fc 26981+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
26982+ err = -1;
26983+
26984+ AuDbg("%d\n", err);
26985+ return err;
26986+}
26987+
26988+/* ---------------------------------------------------------------------- */
26989+
26990+/* round robin */
26991+static int au_wbr_create_init_rr(struct super_block *sb)
26992+{
26993+ int err;
26994+
26995+ err = au_wbr_bu(sb, au_sbend(sb));
26996+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 26997+ /* smp_mb(); */
1facf9fc 26998+
26999+ AuDbg("b%d\n", err);
27000+ return err;
27001+}
27002+
27003+static int au_wbr_create_rr(struct dentry *dentry, int isdir)
27004+{
27005+ int err, nbr;
27006+ unsigned int u;
27007+ aufs_bindex_t bindex, bend;
27008+ struct super_block *sb;
27009+ atomic_t *next;
27010+
27011+ err = au_wbr_create_exp(dentry);
27012+ if (err >= 0)
27013+ goto out;
27014+
27015+ sb = dentry->d_sb;
27016+ next = &au_sbi(sb)->si_wbr_rr_next;
27017+ bend = au_sbend(sb);
27018+ nbr = bend + 1;
27019+ for (bindex = 0; bindex <= bend; bindex++) {
27020+ if (!isdir) {
27021+ err = atomic_dec_return(next) + 1;
27022+ /* modulo for 0 is meaningless */
27023+ if (unlikely(!err))
27024+ err = atomic_dec_return(next) + 1;
27025+ } else
27026+ err = atomic_read(next);
27027+ AuDbg("%d\n", err);
27028+ u = err;
27029+ err = u % nbr;
27030+ AuDbg("%d\n", err);
27031+ if (!au_br_rdonly(au_sbr(sb, err)))
27032+ break;
27033+ err = -EROFS;
27034+ }
27035+
4a4d8108
AM
27036+ if (err >= 0)
27037+ err = au_wbr_nonopq(dentry, err);
27038+
4f0767ce 27039+out:
1facf9fc 27040+ AuDbg("%d\n", err);
27041+ return err;
27042+}
27043+
27044+/* ---------------------------------------------------------------------- */
27045+
27046+/* most free space */
27047+static void au_mfs(struct dentry *dentry)
27048+{
27049+ struct super_block *sb;
27050+ struct au_branch *br;
27051+ struct au_wbr_mfs *mfs;
27052+ aufs_bindex_t bindex, bend;
27053+ int err;
27054+ unsigned long long b, bavail;
7f207e10 27055+ struct path h_path;
1facf9fc 27056+ /* reduce the stack usage */
27057+ struct kstatfs *st;
27058+
27059+ st = kmalloc(sizeof(*st), GFP_NOFS);
27060+ if (unlikely(!st)) {
27061+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
27062+ return;
27063+ }
27064+
27065+ bavail = 0;
27066+ sb = dentry->d_sb;
27067+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 27068+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 27069+ mfs->mfs_bindex = -EROFS;
27070+ mfs->mfsrr_bytes = 0;
27071+ bend = au_sbend(sb);
27072+ for (bindex = 0; bindex <= bend; bindex++) {
27073+ br = au_sbr(sb, bindex);
27074+ if (au_br_rdonly(br))
27075+ continue;
27076+
27077+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27078+ h_path.mnt = br->br_mnt;
27079+ h_path.dentry = h_path.mnt->mnt_root;
27080+ err = vfs_statfs(&h_path, st);
1facf9fc 27081+ if (unlikely(err)) {
27082+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
27083+ continue;
27084+ }
27085+
27086+ /* when the available size is equal, select the lower one */
27087+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
27088+ || sizeof(b) < sizeof(st->f_bsize));
27089+ b = st->f_bavail * st->f_bsize;
27090+ br->br_wbr->wbr_bytes = b;
27091+ if (b >= bavail) {
27092+ bavail = b;
27093+ mfs->mfs_bindex = bindex;
27094+ mfs->mfs_jiffy = jiffies;
27095+ }
27096+ }
27097+
27098+ mfs->mfsrr_bytes = bavail;
27099+ AuDbg("b%d\n", mfs->mfs_bindex);
27100+ kfree(st);
27101+}
27102+
27103+static int au_wbr_create_mfs(struct dentry *dentry, int isdir __maybe_unused)
27104+{
27105+ int err;
27106+ struct super_block *sb;
27107+ struct au_wbr_mfs *mfs;
27108+
27109+ err = au_wbr_create_exp(dentry);
27110+ if (err >= 0)
27111+ goto out;
27112+
27113+ sb = dentry->d_sb;
27114+ mfs = &au_sbi(sb)->si_wbr_mfs;
27115+ mutex_lock(&mfs->mfs_lock);
27116+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
27117+ || mfs->mfs_bindex < 0
27118+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
27119+ au_mfs(dentry);
27120+ mutex_unlock(&mfs->mfs_lock);
27121+ err = mfs->mfs_bindex;
27122+
4a4d8108
AM
27123+ if (err >= 0)
27124+ err = au_wbr_nonopq(dentry, err);
27125+
4f0767ce 27126+out:
1facf9fc 27127+ AuDbg("b%d\n", err);
27128+ return err;
27129+}
27130+
27131+static int au_wbr_create_init_mfs(struct super_block *sb)
27132+{
27133+ struct au_wbr_mfs *mfs;
27134+
27135+ mfs = &au_sbi(sb)->si_wbr_mfs;
27136+ mutex_init(&mfs->mfs_lock);
27137+ mfs->mfs_jiffy = 0;
27138+ mfs->mfs_bindex = -EROFS;
27139+
27140+ return 0;
27141+}
27142+
27143+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
27144+{
27145+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
27146+ return 0;
27147+}
27148+
27149+/* ---------------------------------------------------------------------- */
27150+
27151+/* most free space and then round robin */
27152+static int au_wbr_create_mfsrr(struct dentry *dentry, int isdir)
27153+{
27154+ int err;
27155+ struct au_wbr_mfs *mfs;
27156+
27157+ err = au_wbr_create_mfs(dentry, isdir);
27158+ if (err >= 0) {
27159+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 27160+ mutex_lock(&mfs->mfs_lock);
1facf9fc 27161+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
27162+ err = au_wbr_create_rr(dentry, isdir);
dece6358 27163+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 27164+ }
27165+
27166+ AuDbg("b%d\n", err);
27167+ return err;
27168+}
27169+
27170+static int au_wbr_create_init_mfsrr(struct super_block *sb)
27171+{
27172+ int err;
27173+
27174+ au_wbr_create_init_mfs(sb); /* ignore */
27175+ err = au_wbr_create_init_rr(sb);
27176+
27177+ return err;
27178+}
27179+
27180+/* ---------------------------------------------------------------------- */
27181+
27182+/* top down parent and most free space */
27183+static int au_wbr_create_pmfs(struct dentry *dentry, int isdir)
27184+{
27185+ int err, e2;
27186+ unsigned long long b;
27187+ aufs_bindex_t bindex, bstart, bend;
27188+ struct super_block *sb;
27189+ struct dentry *parent, *h_parent;
27190+ struct au_branch *br;
27191+
27192+ err = au_wbr_create_tdp(dentry, isdir);
27193+ if (unlikely(err < 0))
27194+ goto out;
27195+ parent = dget_parent(dentry);
27196+ bstart = au_dbstart(parent);
27197+ bend = au_dbtaildir(parent);
27198+ if (bstart == bend)
27199+ goto out_parent; /* success */
27200+
27201+ e2 = au_wbr_create_mfs(dentry, isdir);
27202+ if (e2 < 0)
27203+ goto out_parent; /* success */
27204+
27205+ /* when the available size is equal, select upper one */
27206+ sb = dentry->d_sb;
27207+ br = au_sbr(sb, err);
27208+ b = br->br_wbr->wbr_bytes;
27209+ AuDbg("b%d, %llu\n", err, b);
27210+
27211+ for (bindex = bstart; bindex <= bend; bindex++) {
27212+ h_parent = au_h_dptr(parent, bindex);
27213+ if (!h_parent || !h_parent->d_inode)
27214+ continue;
27215+
27216+ br = au_sbr(sb, bindex);
27217+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
27218+ b = br->br_wbr->wbr_bytes;
27219+ err = bindex;
27220+ AuDbg("b%d, %llu\n", err, b);
27221+ }
27222+ }
27223+
4a4d8108
AM
27224+ if (err >= 0)
27225+ err = au_wbr_nonopq(dentry, err);
27226+
4f0767ce 27227+out_parent:
1facf9fc 27228+ dput(parent);
4f0767ce 27229+out:
1facf9fc 27230+ AuDbg("b%d\n", err);
27231+ return err;
27232+}
27233+
27234+/* ---------------------------------------------------------------------- */
27235+
27236+/* policies for copyup */
27237+
27238+/* top down parent */
27239+static int au_wbr_copyup_tdp(struct dentry *dentry)
27240+{
27241+ return au_wbr_create_tdp(dentry, /*isdir, anything is ok*/0);
27242+}
27243+
27244+/* bottom up parent */
27245+static int au_wbr_copyup_bup(struct dentry *dentry)
27246+{
27247+ int err;
27248+ aufs_bindex_t bindex, bstart;
27249+ struct dentry *parent, *h_parent;
27250+ struct super_block *sb;
27251+
27252+ err = -EROFS;
27253+ sb = dentry->d_sb;
27254+ parent = dget_parent(dentry);
27255+ bstart = au_dbstart(parent);
27256+ for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
27257+ h_parent = au_h_dptr(parent, bindex);
27258+ if (!h_parent || !h_parent->d_inode)
27259+ continue;
27260+
27261+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
27262+ err = bindex;
27263+ break;
27264+ }
27265+ }
27266+ dput(parent);
27267+
27268+ /* bottom up here */
27269+ if (unlikely(err < 0))
27270+ err = au_wbr_bu(sb, bstart - 1);
27271+
27272+ AuDbg("b%d\n", err);
27273+ return err;
27274+}
27275+
27276+/* bottom up */
27277+static int au_wbr_copyup_bu(struct dentry *dentry)
27278+{
27279+ int err;
4a4d8108 27280+ aufs_bindex_t bstart;
1facf9fc 27281+
4a4d8108
AM
27282+ bstart = au_dbstart(dentry);
27283+ err = au_wbr_bu(dentry->d_sb, bstart);
27284+ AuDbg("b%d\n", err);
27285+ if (err > bstart)
27286+ err = au_wbr_nonopq(dentry, err);
1facf9fc 27287+
27288+ AuDbg("b%d\n", err);
27289+ return err;
27290+}
27291+
27292+/* ---------------------------------------------------------------------- */
27293+
27294+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
27295+ [AuWbrCopyup_TDP] = {
27296+ .copyup = au_wbr_copyup_tdp
27297+ },
27298+ [AuWbrCopyup_BUP] = {
27299+ .copyup = au_wbr_copyup_bup
27300+ },
27301+ [AuWbrCopyup_BU] = {
27302+ .copyup = au_wbr_copyup_bu
27303+ }
27304+};
27305+
27306+struct au_wbr_create_operations au_wbr_create_ops[] = {
27307+ [AuWbrCreate_TDP] = {
27308+ .create = au_wbr_create_tdp
27309+ },
27310+ [AuWbrCreate_RR] = {
27311+ .create = au_wbr_create_rr,
27312+ .init = au_wbr_create_init_rr
27313+ },
27314+ [AuWbrCreate_MFS] = {
27315+ .create = au_wbr_create_mfs,
27316+ .init = au_wbr_create_init_mfs,
27317+ .fin = au_wbr_create_fin_mfs
27318+ },
27319+ [AuWbrCreate_MFSV] = {
27320+ .create = au_wbr_create_mfs,
27321+ .init = au_wbr_create_init_mfs,
27322+ .fin = au_wbr_create_fin_mfs
27323+ },
27324+ [AuWbrCreate_MFSRR] = {
27325+ .create = au_wbr_create_mfsrr,
27326+ .init = au_wbr_create_init_mfsrr,
27327+ .fin = au_wbr_create_fin_mfs
27328+ },
27329+ [AuWbrCreate_MFSRRV] = {
27330+ .create = au_wbr_create_mfsrr,
27331+ .init = au_wbr_create_init_mfsrr,
27332+ .fin = au_wbr_create_fin_mfs
27333+ },
27334+ [AuWbrCreate_PMFS] = {
27335+ .create = au_wbr_create_pmfs,
27336+ .init = au_wbr_create_init_mfs,
27337+ .fin = au_wbr_create_fin_mfs
27338+ },
27339+ [AuWbrCreate_PMFSV] = {
27340+ .create = au_wbr_create_pmfs,
27341+ .init = au_wbr_create_init_mfs,
27342+ .fin = au_wbr_create_fin_mfs
27343+ }
27344+};
7f207e10
AM
27345diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
27346--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
53392da6 27347+++ linux/fs/aufs/whout.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 27348@@ -0,0 +1,1062 @@
1facf9fc 27349+/*
027c5e7a 27350+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 27351+ *
27352+ * This program, aufs is free software; you can redistribute it and/or modify
27353+ * it under the terms of the GNU General Public License as published by
27354+ * the Free Software Foundation; either version 2 of the License, or
27355+ * (at your option) any later version.
dece6358
AM
27356+ *
27357+ * This program is distributed in the hope that it will be useful,
27358+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27359+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27360+ * GNU General Public License for more details.
27361+ *
27362+ * You should have received a copy of the GNU General Public License
27363+ * along with this program; if not, write to the Free Software
27364+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 27365+ */
27366+
27367+/*
27368+ * whiteout for logical deletion and opaque directory
27369+ */
27370+
27371+#include <linux/fs.h>
27372+#include "aufs.h"
27373+
27374+#define WH_MASK S_IRUGO
27375+
27376+/*
27377+ * If a directory contains this file, then it is opaque. We start with the
27378+ * .wh. flag so that it is blocked by lookup.
27379+ */
27380+static struct qstr diropq_name = {
27381+ .name = AUFS_WH_DIROPQ,
27382+ .len = sizeof(AUFS_WH_DIROPQ) - 1
27383+};
27384+
27385+/*
27386+ * generate whiteout name, which is NOT terminated by NULL.
27387+ * @name: original d_name.name
27388+ * @len: original d_name.len
27389+ * @wh: whiteout qstr
27390+ * returns zero when succeeds, otherwise error.
27391+ * succeeded value as wh->name should be freed by kfree().
27392+ */
27393+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
27394+{
27395+ char *p;
27396+
27397+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
27398+ return -ENAMETOOLONG;
27399+
27400+ wh->len = name->len + AUFS_WH_PFX_LEN;
27401+ p = kmalloc(wh->len, GFP_NOFS);
27402+ wh->name = p;
27403+ if (p) {
27404+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
27405+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
27406+ /* smp_mb(); */
27407+ return 0;
27408+ }
27409+ return -ENOMEM;
27410+}
27411+
27412+/* ---------------------------------------------------------------------- */
27413+
27414+/*
27415+ * test if the @wh_name exists under @h_parent.
27416+ * @try_sio specifies the necessary of super-io.
27417+ */
27418+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
27419+ struct au_branch *br, int try_sio)
27420+{
27421+ int err;
27422+ struct dentry *wh_dentry;
1facf9fc 27423+
1facf9fc 27424+ if (!try_sio)
27425+ wh_dentry = au_lkup_one(wh_name, h_parent, br, /*nd*/NULL);
27426+ else
27427+ wh_dentry = au_sio_lkup_one(wh_name, h_parent, br);
27428+ err = PTR_ERR(wh_dentry);
27429+ if (IS_ERR(wh_dentry))
27430+ goto out;
27431+
27432+ err = 0;
27433+ if (!wh_dentry->d_inode)
27434+ goto out_wh; /* success */
27435+
27436+ err = 1;
27437+ if (S_ISREG(wh_dentry->d_inode->i_mode))
27438+ goto out_wh; /* success */
27439+
27440+ err = -EIO;
27441+ AuIOErr("%.*s Invalid whiteout entry type 0%o.\n",
27442+ AuDLNPair(wh_dentry), wh_dentry->d_inode->i_mode);
27443+
4f0767ce 27444+out_wh:
1facf9fc 27445+ dput(wh_dentry);
4f0767ce 27446+out:
1facf9fc 27447+ return err;
27448+}
27449+
27450+/*
27451+ * test if the @h_dentry sets opaque or not.
27452+ */
27453+int au_diropq_test(struct dentry *h_dentry, struct au_branch *br)
27454+{
27455+ int err;
27456+ struct inode *h_dir;
27457+
27458+ h_dir = h_dentry->d_inode;
27459+ err = au_wh_test(h_dentry, &diropq_name, br,
27460+ au_test_h_perm_sio(h_dir, MAY_EXEC));
27461+ return err;
27462+}
27463+
27464+/*
27465+ * returns a negative dentry whose name is unique and temporary.
27466+ */
27467+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
27468+ struct qstr *prefix)
27469+{
1facf9fc 27470+ struct dentry *dentry;
27471+ int i;
027c5e7a 27472+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
4a4d8108 27473+ *name, *p;
027c5e7a 27474+ /* strict atomic_t is unnecessary here */
1facf9fc 27475+ static unsigned short cnt;
27476+ struct qstr qs;
27477+
4a4d8108
AM
27478+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
27479+
1facf9fc 27480+ name = defname;
027c5e7a
AM
27481+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
27482+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
1facf9fc 27483+ dentry = ERR_PTR(-ENAMETOOLONG);
4a4d8108 27484+ if (unlikely(qs.len > NAME_MAX))
1facf9fc 27485+ goto out;
27486+ dentry = ERR_PTR(-ENOMEM);
27487+ name = kmalloc(qs.len + 1, GFP_NOFS);
27488+ if (unlikely(!name))
27489+ goto out;
27490+ }
27491+
27492+ /* doubly whiteout-ed */
27493+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
27494+ p = name + AUFS_WH_PFX_LEN * 2;
27495+ memcpy(p, prefix->name, prefix->len);
27496+ p += prefix->len;
27497+ *p++ = '.';
4a4d8108 27498+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
1facf9fc 27499+
27500+ qs.name = name;
27501+ for (i = 0; i < 3; i++) {
b752ccd1 27502+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
1facf9fc 27503+ dentry = au_sio_lkup_one(&qs, h_parent, br);
27504+ if (IS_ERR(dentry) || !dentry->d_inode)
27505+ goto out_name;
27506+ dput(dentry);
27507+ }
4a4d8108 27508+ /* pr_warning("could not get random name\n"); */
1facf9fc 27509+ dentry = ERR_PTR(-EEXIST);
27510+ AuDbg("%.*s\n", AuLNPair(&qs));
27511+ BUG();
27512+
4f0767ce 27513+out_name:
1facf9fc 27514+ if (name != defname)
27515+ kfree(name);
4f0767ce 27516+out:
4a4d8108 27517+ AuTraceErrPtr(dentry);
1facf9fc 27518+ return dentry;
1facf9fc 27519+}
27520+
27521+/*
27522+ * rename the @h_dentry on @br to the whiteouted temporary name.
27523+ */
27524+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
27525+{
27526+ int err;
27527+ struct path h_path = {
27528+ .mnt = br->br_mnt
27529+ };
27530+ struct inode *h_dir;
27531+ struct dentry *h_parent;
27532+
27533+ h_parent = h_dentry->d_parent; /* dir inode is locked */
27534+ h_dir = h_parent->d_inode;
27535+ IMustLock(h_dir);
27536+
27537+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
27538+ err = PTR_ERR(h_path.dentry);
27539+ if (IS_ERR(h_path.dentry))
27540+ goto out;
27541+
27542+ /* under the same dir, no need to lock_rename() */
27543+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path);
27544+ AuTraceErr(err);
27545+ dput(h_path.dentry);
27546+
4f0767ce 27547+out:
4a4d8108 27548+ AuTraceErr(err);
1facf9fc 27549+ return err;
27550+}
27551+
27552+/* ---------------------------------------------------------------------- */
27553+/*
27554+ * functions for removing a whiteout
27555+ */
27556+
27557+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
27558+{
27559+ int force;
27560+
27561+ /*
27562+ * forces superio when the dir has a sticky bit.
27563+ * this may be a violation of unix fs semantics.
27564+ */
27565+ force = (h_dir->i_mode & S_ISVTX)
27566+ && h_path->dentry->d_inode->i_uid != current_fsuid();
27567+ return vfsub_unlink(h_dir, h_path, force);
27568+}
27569+
27570+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
27571+ struct dentry *dentry)
27572+{
27573+ int err;
27574+
27575+ err = do_unlink_wh(h_dir, h_path);
27576+ if (!err && dentry)
27577+ au_set_dbwh(dentry, -1);
27578+
27579+ return err;
27580+}
27581+
27582+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
27583+ struct au_branch *br)
27584+{
27585+ int err;
27586+ struct path h_path = {
27587+ .mnt = br->br_mnt
27588+ };
27589+
27590+ err = 0;
27591+ h_path.dentry = au_lkup_one(wh, h_parent, br, /*nd*/NULL);
27592+ if (IS_ERR(h_path.dentry))
27593+ err = PTR_ERR(h_path.dentry);
27594+ else {
27595+ if (h_path.dentry->d_inode
27596+ && S_ISREG(h_path.dentry->d_inode->i_mode))
27597+ err = do_unlink_wh(h_parent->d_inode, &h_path);
27598+ dput(h_path.dentry);
27599+ }
27600+
27601+ return err;
27602+}
27603+
27604+/* ---------------------------------------------------------------------- */
27605+/*
27606+ * initialize/clean whiteout for a branch
27607+ */
27608+
27609+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
27610+ const int isdir)
27611+{
27612+ int err;
27613+
27614+ if (!whpath->dentry->d_inode)
27615+ return;
27616+
27617+ err = mnt_want_write(whpath->mnt);
27618+ if (!err) {
27619+ if (isdir)
27620+ err = vfsub_rmdir(h_dir, whpath);
27621+ else
27622+ err = vfsub_unlink(h_dir, whpath, /*force*/0);
27623+ mnt_drop_write(whpath->mnt);
27624+ }
27625+ if (unlikely(err))
4a4d8108
AM
27626+ pr_warning("failed removing %.*s (%d), ignored.\n",
27627+ AuDLNPair(whpath->dentry), err);
1facf9fc 27628+}
27629+
27630+static int test_linkable(struct dentry *h_root)
27631+{
27632+ struct inode *h_dir = h_root->d_inode;
27633+
27634+ if (h_dir->i_op->link)
27635+ return 0;
27636+
4a4d8108
AM
27637+ pr_err("%.*s (%s) doesn't support link(2), use noplink and rw+nolwh\n",
27638+ AuDLNPair(h_root), au_sbtype(h_root->d_sb));
1facf9fc 27639+ return -ENOSYS;
27640+}
27641+
27642+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
27643+static int au_whdir(struct inode *h_dir, struct path *path)
27644+{
27645+ int err;
27646+
27647+ err = -EEXIST;
27648+ if (!path->dentry->d_inode) {
27649+ int mode = S_IRWXU;
27650+
27651+ if (au_test_nfs(path->dentry->d_sb))
27652+ mode |= S_IXUGO;
27653+ err = mnt_want_write(path->mnt);
27654+ if (!err) {
27655+ err = vfsub_mkdir(h_dir, path, mode);
27656+ mnt_drop_write(path->mnt);
27657+ }
27658+ } else if (S_ISDIR(path->dentry->d_inode->i_mode))
27659+ err = 0;
27660+ else
4a4d8108 27661+ pr_err("unknown %.*s exists\n", AuDLNPair(path->dentry));
1facf9fc 27662+
27663+ return err;
27664+}
27665+
27666+struct au_wh_base {
27667+ const struct qstr *name;
27668+ struct dentry *dentry;
27669+};
27670+
27671+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
27672+ struct path *h_path)
27673+{
27674+ h_path->dentry = base[AuBrWh_BASE].dentry;
27675+ au_wh_clean(h_dir, h_path, /*isdir*/0);
27676+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27677+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27678+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27679+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27680+}
27681+
27682+/*
27683+ * returns tri-state,
27684+ * minus: error, caller should print the mesage
27685+ * zero: succuess
27686+ * plus: error, caller should NOT print the mesage
27687+ */
27688+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
27689+ int do_plink, struct au_wh_base base[],
27690+ struct path *h_path)
27691+{
27692+ int err;
27693+ struct inode *h_dir;
27694+
27695+ h_dir = h_root->d_inode;
27696+ h_path->dentry = base[AuBrWh_BASE].dentry;
27697+ au_wh_clean(h_dir, h_path, /*isdir*/0);
27698+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27699+ if (do_plink) {
27700+ err = test_linkable(h_root);
27701+ if (unlikely(err)) {
27702+ err = 1;
27703+ goto out;
27704+ }
27705+
27706+ err = au_whdir(h_dir, h_path);
27707+ if (unlikely(err))
27708+ goto out;
27709+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
27710+ } else
27711+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27712+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27713+ err = au_whdir(h_dir, h_path);
27714+ if (unlikely(err))
27715+ goto out;
27716+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
27717+
4f0767ce 27718+out:
1facf9fc 27719+ return err;
27720+}
27721+
27722+/*
27723+ * for the moment, aufs supports the branch filesystem which does not support
27724+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
27725+ * copyup failed. finally, such filesystem will not be used as the writable
27726+ * branch.
27727+ *
27728+ * returns tri-state, see above.
27729+ */
27730+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
27731+ int do_plink, struct au_wh_base base[],
27732+ struct path *h_path)
27733+{
27734+ int err;
27735+ struct inode *h_dir;
27736+
1308ab2a 27737+ WbrWhMustWriteLock(wbr);
27738+
1facf9fc 27739+ err = test_linkable(h_root);
27740+ if (unlikely(err)) {
27741+ err = 1;
27742+ goto out;
27743+ }
27744+
27745+ /*
27746+ * todo: should this create be done in /sbin/mount.aufs helper?
27747+ */
27748+ err = -EEXIST;
27749+ h_dir = h_root->d_inode;
27750+ if (!base[AuBrWh_BASE].dentry->d_inode) {
27751+ err = mnt_want_write(h_path->mnt);
27752+ if (!err) {
27753+ h_path->dentry = base[AuBrWh_BASE].dentry;
27754+ err = vfsub_create(h_dir, h_path, WH_MASK);
27755+ mnt_drop_write(h_path->mnt);
27756+ }
27757+ } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
27758+ err = 0;
27759+ else
4a4d8108
AM
27760+ pr_err("unknown %.*s/%.*s exists\n",
27761+ AuDLNPair(h_root), AuDLNPair(base[AuBrWh_BASE].dentry));
1facf9fc 27762+ if (unlikely(err))
27763+ goto out;
27764+
27765+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27766+ if (do_plink) {
27767+ err = au_whdir(h_dir, h_path);
27768+ if (unlikely(err))
27769+ goto out;
27770+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
27771+ } else
27772+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27773+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
27774+
27775+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27776+ err = au_whdir(h_dir, h_path);
27777+ if (unlikely(err))
27778+ goto out;
27779+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
27780+
4f0767ce 27781+out:
1facf9fc 27782+ return err;
27783+}
27784+
27785+/*
27786+ * initialize the whiteout base file/dir for @br.
27787+ */
27788+int au_wh_init(struct dentry *h_root, struct au_branch *br,
27789+ struct super_block *sb)
27790+{
27791+ int err, i;
27792+ const unsigned char do_plink
27793+ = !!au_opt_test(au_mntflags(sb), PLINK);
27794+ struct path path = {
27795+ .mnt = br->br_mnt
27796+ };
27797+ struct inode *h_dir;
27798+ struct au_wbr *wbr = br->br_wbr;
27799+ static const struct qstr base_name[] = {
27800+ [AuBrWh_BASE] = {
27801+ .name = AUFS_BASE_NAME,
27802+ .len = sizeof(AUFS_BASE_NAME) - 1
27803+ },
27804+ [AuBrWh_PLINK] = {
27805+ .name = AUFS_PLINKDIR_NAME,
27806+ .len = sizeof(AUFS_PLINKDIR_NAME) - 1
27807+ },
27808+ [AuBrWh_ORPH] = {
27809+ .name = AUFS_ORPHDIR_NAME,
27810+ .len = sizeof(AUFS_ORPHDIR_NAME) - 1
27811+ }
27812+ };
27813+ struct au_wh_base base[] = {
27814+ [AuBrWh_BASE] = {
27815+ .name = base_name + AuBrWh_BASE,
27816+ .dentry = NULL
27817+ },
27818+ [AuBrWh_PLINK] = {
27819+ .name = base_name + AuBrWh_PLINK,
27820+ .dentry = NULL
27821+ },
27822+ [AuBrWh_ORPH] = {
27823+ .name = base_name + AuBrWh_ORPH,
27824+ .dentry = NULL
27825+ }
27826+ };
27827+
1308ab2a 27828+ if (wbr)
27829+ WbrWhMustWriteLock(wbr);
1facf9fc 27830+
1facf9fc 27831+ for (i = 0; i < AuBrWh_Last; i++) {
27832+ /* doubly whiteouted */
27833+ struct dentry *d;
27834+
27835+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
27836+ err = PTR_ERR(d);
27837+ if (IS_ERR(d))
27838+ goto out;
27839+
27840+ base[i].dentry = d;
27841+ AuDebugOn(wbr
27842+ && wbr->wbr_wh[i]
27843+ && wbr->wbr_wh[i] != base[i].dentry);
27844+ }
27845+
27846+ if (wbr)
27847+ for (i = 0; i < AuBrWh_Last; i++) {
27848+ dput(wbr->wbr_wh[i]);
27849+ wbr->wbr_wh[i] = NULL;
27850+ }
27851+
27852+ err = 0;
1facf9fc 27853+ switch (br->br_perm) {
27854+ case AuBrPerm_RO:
27855+ case AuBrPerm_ROWH:
27856+ case AuBrPerm_RR:
27857+ case AuBrPerm_RRWH:
4a4d8108 27858+ h_dir = h_root->d_inode;
1facf9fc 27859+ au_wh_init_ro(h_dir, base, &path);
27860+ break;
27861+
27862+ case AuBrPerm_RWNoLinkWH:
27863+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
27864+ if (err > 0)
27865+ goto out;
27866+ else if (err)
27867+ goto out_err;
27868+ break;
27869+
27870+ case AuBrPerm_RW:
27871+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
27872+ if (err > 0)
27873+ goto out;
27874+ else if (err)
27875+ goto out_err;
27876+ break;
27877+
27878+ default:
27879+ BUG();
27880+ }
27881+ goto out; /* success */
27882+
4f0767ce 27883+out_err:
4a4d8108
AM
27884+ pr_err("an error(%d) on the writable branch %.*s(%s)\n",
27885+ err, AuDLNPair(h_root), au_sbtype(h_root->d_sb));
4f0767ce 27886+out:
1facf9fc 27887+ for (i = 0; i < AuBrWh_Last; i++)
27888+ dput(base[i].dentry);
27889+ return err;
27890+}
27891+
27892+/* ---------------------------------------------------------------------- */
27893+/*
27894+ * whiteouts are all hard-linked usually.
27895+ * when its link count reaches a ceiling, we create a new whiteout base
27896+ * asynchronously.
27897+ */
27898+
27899+struct reinit_br_wh {
27900+ struct super_block *sb;
27901+ struct au_branch *br;
27902+};
27903+
27904+static void reinit_br_wh(void *arg)
27905+{
27906+ int err;
27907+ aufs_bindex_t bindex;
27908+ struct path h_path;
27909+ struct reinit_br_wh *a = arg;
27910+ struct au_wbr *wbr;
27911+ struct inode *dir;
27912+ struct dentry *h_root;
27913+ struct au_hinode *hdir;
27914+
27915+ err = 0;
27916+ wbr = a->br->br_wbr;
27917+ /* big aufs lock */
27918+ si_noflush_write_lock(a->sb);
27919+ if (!au_br_writable(a->br->br_perm))
27920+ goto out;
27921+ bindex = au_br_index(a->sb, a->br->br_id);
27922+ if (unlikely(bindex < 0))
27923+ goto out;
27924+
1308ab2a 27925+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
1facf9fc 27926+ dir = a->sb->s_root->d_inode;
1facf9fc 27927+ hdir = au_hi(dir, bindex);
27928+ h_root = au_h_dptr(a->sb->s_root, bindex);
27929+
4a4d8108 27930+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 27931+ wbr_wh_write_lock(wbr);
27932+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
27933+ h_root, a->br);
27934+ if (!err) {
27935+ err = mnt_want_write(a->br->br_mnt);
27936+ if (!err) {
27937+ h_path.dentry = wbr->wbr_whbase;
27938+ h_path.mnt = a->br->br_mnt;
27939+ err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0);
27940+ mnt_drop_write(a->br->br_mnt);
27941+ }
27942+ } else {
4a4d8108
AM
27943+ pr_warning("%.*s is moved, ignored\n",
27944+ AuDLNPair(wbr->wbr_whbase));
1facf9fc 27945+ err = 0;
27946+ }
27947+ dput(wbr->wbr_whbase);
27948+ wbr->wbr_whbase = NULL;
27949+ if (!err)
27950+ err = au_wh_init(h_root, a->br, a->sb);
27951+ wbr_wh_write_unlock(wbr);
4a4d8108 27952+ au_hn_imtx_unlock(hdir);
1308ab2a 27953+ di_read_unlock(a->sb->s_root, AuLock_IR);
1facf9fc 27954+
4f0767ce 27955+out:
1facf9fc 27956+ if (wbr)
27957+ atomic_dec(&wbr->wbr_wh_running);
27958+ atomic_dec(&a->br->br_count);
1facf9fc 27959+ si_write_unlock(a->sb);
027c5e7a 27960+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
1facf9fc 27961+ kfree(arg);
27962+ if (unlikely(err))
27963+ AuIOErr("err %d\n", err);
27964+}
27965+
27966+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
27967+{
27968+ int do_dec, wkq_err;
27969+ struct reinit_br_wh *arg;
27970+
27971+ do_dec = 1;
27972+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
27973+ goto out;
27974+
27975+ /* ignore ENOMEM */
27976+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
27977+ if (arg) {
27978+ /*
27979+ * dec(wh_running), kfree(arg) and dec(br_count)
27980+ * in reinit function
27981+ */
27982+ arg->sb = sb;
27983+ arg->br = br;
27984+ atomic_inc(&br->br_count);
53392da6 27985+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
1facf9fc 27986+ if (unlikely(wkq_err)) {
27987+ atomic_dec(&br->br_wbr->wbr_wh_running);
27988+ atomic_dec(&br->br_count);
27989+ kfree(arg);
27990+ }
27991+ do_dec = 0;
27992+ }
27993+
4f0767ce 27994+out:
1facf9fc 27995+ if (do_dec)
27996+ atomic_dec(&br->br_wbr->wbr_wh_running);
27997+}
27998+
27999+/* ---------------------------------------------------------------------- */
28000+
28001+/*
28002+ * create the whiteout @wh.
28003+ */
28004+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
28005+ struct dentry *wh)
28006+{
28007+ int err;
28008+ struct path h_path = {
28009+ .dentry = wh
28010+ };
28011+ struct au_branch *br;
28012+ struct au_wbr *wbr;
28013+ struct dentry *h_parent;
28014+ struct inode *h_dir;
28015+
28016+ h_parent = wh->d_parent; /* dir inode is locked */
28017+ h_dir = h_parent->d_inode;
28018+ IMustLock(h_dir);
28019+
28020+ br = au_sbr(sb, bindex);
28021+ h_path.mnt = br->br_mnt;
28022+ wbr = br->br_wbr;
28023+ wbr_wh_read_lock(wbr);
28024+ if (wbr->wbr_whbase) {
28025+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path);
28026+ if (!err || err != -EMLINK)
28027+ goto out;
28028+
28029+ /* link count full. re-initialize br_whbase. */
28030+ kick_reinit_br_wh(sb, br);
28031+ }
28032+
28033+ /* return this error in this context */
28034+ err = vfsub_create(h_dir, &h_path, WH_MASK);
28035+
4f0767ce 28036+out:
1facf9fc 28037+ wbr_wh_read_unlock(wbr);
28038+ return err;
28039+}
28040+
28041+/* ---------------------------------------------------------------------- */
28042+
28043+/*
28044+ * create or remove the diropq.
28045+ */
28046+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
28047+ unsigned int flags)
28048+{
28049+ struct dentry *opq_dentry, *h_dentry;
28050+ struct super_block *sb;
28051+ struct au_branch *br;
28052+ int err;
28053+
28054+ sb = dentry->d_sb;
28055+ br = au_sbr(sb, bindex);
28056+ h_dentry = au_h_dptr(dentry, bindex);
28057+ opq_dentry = au_lkup_one(&diropq_name, h_dentry, br, /*nd*/NULL);
28058+ if (IS_ERR(opq_dentry))
28059+ goto out;
28060+
28061+ if (au_ftest_diropq(flags, CREATE)) {
28062+ err = link_or_create_wh(sb, bindex, opq_dentry);
28063+ if (!err) {
28064+ au_set_dbdiropq(dentry, bindex);
28065+ goto out; /* success */
28066+ }
28067+ } else {
28068+ struct path tmp = {
28069+ .dentry = opq_dentry,
28070+ .mnt = br->br_mnt
28071+ };
28072+ err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
28073+ if (!err)
28074+ au_set_dbdiropq(dentry, -1);
28075+ }
28076+ dput(opq_dentry);
28077+ opq_dentry = ERR_PTR(err);
28078+
4f0767ce 28079+out:
1facf9fc 28080+ return opq_dentry;
28081+}
28082+
28083+struct do_diropq_args {
28084+ struct dentry **errp;
28085+ struct dentry *dentry;
28086+ aufs_bindex_t bindex;
28087+ unsigned int flags;
28088+};
28089+
28090+static void call_do_diropq(void *args)
28091+{
28092+ struct do_diropq_args *a = args;
28093+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
28094+}
28095+
28096+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
28097+ unsigned int flags)
28098+{
28099+ struct dentry *diropq, *h_dentry;
28100+
28101+ h_dentry = au_h_dptr(dentry, bindex);
28102+ if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
28103+ diropq = do_diropq(dentry, bindex, flags);
28104+ else {
28105+ int wkq_err;
28106+ struct do_diropq_args args = {
28107+ .errp = &diropq,
28108+ .dentry = dentry,
28109+ .bindex = bindex,
28110+ .flags = flags
28111+ };
28112+
28113+ wkq_err = au_wkq_wait(call_do_diropq, &args);
28114+ if (unlikely(wkq_err))
28115+ diropq = ERR_PTR(wkq_err);
28116+ }
28117+
28118+ return diropq;
28119+}
28120+
28121+/* ---------------------------------------------------------------------- */
28122+
28123+/*
28124+ * lookup whiteout dentry.
28125+ * @h_parent: lower parent dentry which must exist and be locked
28126+ * @base_name: name of dentry which will be whiteouted
28127+ * returns dentry for whiteout.
28128+ */
28129+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
28130+ struct au_branch *br)
28131+{
28132+ int err;
28133+ struct qstr wh_name;
28134+ struct dentry *wh_dentry;
28135+
28136+ err = au_wh_name_alloc(&wh_name, base_name);
28137+ wh_dentry = ERR_PTR(err);
28138+ if (!err) {
28139+ wh_dentry = au_lkup_one(&wh_name, h_parent, br, /*nd*/NULL);
28140+ kfree(wh_name.name);
28141+ }
28142+ return wh_dentry;
28143+}
28144+
28145+/*
28146+ * link/create a whiteout for @dentry on @bindex.
28147+ */
28148+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
28149+ struct dentry *h_parent)
28150+{
28151+ struct dentry *wh_dentry;
28152+ struct super_block *sb;
28153+ int err;
28154+
28155+ sb = dentry->d_sb;
28156+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
28157+ if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
28158+ err = link_or_create_wh(sb, bindex, wh_dentry);
28159+ if (!err)
28160+ au_set_dbwh(dentry, bindex);
28161+ else {
28162+ dput(wh_dentry);
28163+ wh_dentry = ERR_PTR(err);
28164+ }
28165+ }
28166+
28167+ return wh_dentry;
28168+}
28169+
28170+/* ---------------------------------------------------------------------- */
28171+
28172+/* Delete all whiteouts in this directory on branch bindex. */
28173+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
28174+ aufs_bindex_t bindex, struct au_branch *br)
28175+{
28176+ int err;
28177+ unsigned long ul, n;
28178+ struct qstr wh_name;
28179+ char *p;
28180+ struct hlist_head *head;
28181+ struct au_vdir_wh *tpos;
28182+ struct hlist_node *pos;
28183+ struct au_vdir_destr *str;
28184+
28185+ err = -ENOMEM;
4a4d8108 28186+ p = __getname_gfp(GFP_NOFS);
1facf9fc 28187+ wh_name.name = p;
28188+ if (unlikely(!wh_name.name))
28189+ goto out;
28190+
28191+ err = 0;
28192+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
28193+ p += AUFS_WH_PFX_LEN;
28194+ n = whlist->nh_num;
28195+ head = whlist->nh_head;
28196+ for (ul = 0; !err && ul < n; ul++, head++) {
28197+ hlist_for_each_entry(tpos, pos, head, wh_hash) {
28198+ if (tpos->wh_bindex != bindex)
28199+ continue;
28200+
28201+ str = &tpos->wh_str;
28202+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
28203+ memcpy(p, str->name, str->len);
28204+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
28205+ err = unlink_wh_name(h_dentry, &wh_name, br);
28206+ if (!err)
28207+ continue;
28208+ break;
28209+ }
28210+ AuIOErr("whiteout name too long %.*s\n",
28211+ str->len, str->name);
28212+ err = -EIO;
28213+ break;
28214+ }
28215+ }
28216+ __putname(wh_name.name);
28217+
4f0767ce 28218+out:
1facf9fc 28219+ return err;
28220+}
28221+
28222+struct del_wh_children_args {
28223+ int *errp;
28224+ struct dentry *h_dentry;
1308ab2a 28225+ struct au_nhash *whlist;
1facf9fc 28226+ aufs_bindex_t bindex;
28227+ struct au_branch *br;
28228+};
28229+
28230+static void call_del_wh_children(void *args)
28231+{
28232+ struct del_wh_children_args *a = args;
1308ab2a 28233+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 28234+}
28235+
28236+/* ---------------------------------------------------------------------- */
28237+
28238+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
28239+{
28240+ struct au_whtmp_rmdir *whtmp;
dece6358 28241+ int err;
1308ab2a 28242+ unsigned int rdhash;
dece6358
AM
28243+
28244+ SiMustAnyLock(sb);
1facf9fc 28245+
28246+ whtmp = kmalloc(sizeof(*whtmp), gfp);
dece6358
AM
28247+ if (unlikely(!whtmp)) {
28248+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 28249+ goto out;
dece6358 28250+ }
1facf9fc 28251+
28252+ whtmp->dir = NULL;
027c5e7a 28253+ whtmp->br = NULL;
1facf9fc 28254+ whtmp->wh_dentry = NULL;
1308ab2a 28255+ /* no estimation for dir size */
28256+ rdhash = au_sbi(sb)->si_rdhash;
28257+ if (!rdhash)
28258+ rdhash = AUFS_RDHASH_DEF;
28259+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
28260+ if (unlikely(err)) {
28261+ kfree(whtmp);
28262+ whtmp = ERR_PTR(err);
28263+ }
dece6358 28264+
4f0767ce 28265+out:
dece6358 28266+ return whtmp;
1facf9fc 28267+}
28268+
28269+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
28270+{
027c5e7a
AM
28271+ if (whtmp->br)
28272+ atomic_dec(&whtmp->br->br_count);
1facf9fc 28273+ dput(whtmp->wh_dentry);
28274+ iput(whtmp->dir);
dece6358 28275+ au_nhash_wh_free(&whtmp->whlist);
1facf9fc 28276+ kfree(whtmp);
28277+}
28278+
28279+/*
28280+ * rmdir the whiteouted temporary named dir @h_dentry.
28281+ * @whlist: whiteouted children.
28282+ */
28283+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
28284+ struct dentry *wh_dentry, struct au_nhash *whlist)
28285+{
28286+ int err;
28287+ struct path h_tmp;
28288+ struct inode *wh_inode, *h_dir;
28289+ struct au_branch *br;
28290+
28291+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
28292+ IMustLock(h_dir);
28293+
28294+ br = au_sbr(dir->i_sb, bindex);
28295+ wh_inode = wh_dentry->d_inode;
28296+ mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
28297+
28298+ /*
28299+ * someone else might change some whiteouts while we were sleeping.
28300+ * it means this whlist may have an obsoleted entry.
28301+ */
28302+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
28303+ err = del_wh_children(wh_dentry, whlist, bindex, br);
28304+ else {
28305+ int wkq_err;
28306+ struct del_wh_children_args args = {
28307+ .errp = &err,
28308+ .h_dentry = wh_dentry,
1308ab2a 28309+ .whlist = whlist,
1facf9fc 28310+ .bindex = bindex,
28311+ .br = br
28312+ };
28313+
28314+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
28315+ if (unlikely(wkq_err))
28316+ err = wkq_err;
28317+ }
28318+ mutex_unlock(&wh_inode->i_mutex);
28319+
28320+ if (!err) {
28321+ h_tmp.dentry = wh_dentry;
28322+ h_tmp.mnt = br->br_mnt;
28323+ err = vfsub_rmdir(h_dir, &h_tmp);
1facf9fc 28324+ }
28325+
28326+ if (!err) {
28327+ if (au_ibstart(dir) == bindex) {
7f207e10 28328+ /* todo: dir->i_mutex is necessary */
1facf9fc 28329+ au_cpup_attr_timesizes(dir);
7f207e10 28330+ vfsub_drop_nlink(dir);
1facf9fc 28331+ }
28332+ return 0; /* success */
28333+ }
28334+
4a4d8108
AM
28335+ pr_warning("failed removing %.*s(%d), ignored\n",
28336+ AuDLNPair(wh_dentry), err);
1facf9fc 28337+ return err;
28338+}
28339+
28340+static void call_rmdir_whtmp(void *args)
28341+{
28342+ int err;
e49829fe 28343+ aufs_bindex_t bindex;
1facf9fc 28344+ struct au_whtmp_rmdir *a = args;
28345+ struct super_block *sb;
28346+ struct dentry *h_parent;
28347+ struct inode *h_dir;
1facf9fc 28348+ struct au_hinode *hdir;
28349+
28350+ /* rmdir by nfsd may cause deadlock with this i_mutex */
28351+ /* mutex_lock(&a->dir->i_mutex); */
e49829fe 28352+ err = -EROFS;
1facf9fc 28353+ sb = a->dir->i_sb;
e49829fe
JR
28354+ si_read_lock(sb, !AuLock_FLUSH);
28355+ if (!au_br_writable(a->br->br_perm))
28356+ goto out;
28357+ bindex = au_br_index(sb, a->br->br_id);
28358+ if (unlikely(bindex < 0))
1facf9fc 28359+ goto out;
28360+
28361+ err = -EIO;
1facf9fc 28362+ ii_write_lock_parent(a->dir);
28363+ h_parent = dget_parent(a->wh_dentry);
28364+ h_dir = h_parent->d_inode;
e49829fe 28365+ hdir = au_hi(a->dir, bindex);
4a4d8108 28366+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
e49829fe
JR
28367+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
28368+ a->br);
1facf9fc 28369+ if (!err) {
e49829fe 28370+ err = mnt_want_write(a->br->br_mnt);
1facf9fc 28371+ if (!err) {
e49829fe 28372+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry,
dece6358 28373+ &a->whlist);
e49829fe 28374+ mnt_drop_write(a->br->br_mnt);
1facf9fc 28375+ }
28376+ }
4a4d8108 28377+ au_hn_imtx_unlock(hdir);
1facf9fc 28378+ dput(h_parent);
28379+ ii_write_unlock(a->dir);
28380+
4f0767ce 28381+out:
1facf9fc 28382+ /* mutex_unlock(&a->dir->i_mutex); */
1facf9fc 28383+ au_whtmp_rmdir_free(a);
027c5e7a
AM
28384+ si_read_unlock(sb);
28385+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 28386+ if (unlikely(err))
28387+ AuIOErr("err %d\n", err);
28388+}
28389+
28390+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
28391+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
28392+{
28393+ int wkq_err;
e49829fe 28394+ struct super_block *sb;
1facf9fc 28395+
28396+ IMustLock(dir);
28397+
28398+ /* all post-process will be done in do_rmdir_whtmp(). */
e49829fe 28399+ sb = dir->i_sb;
1facf9fc 28400+ args->dir = au_igrab(dir);
e49829fe
JR
28401+ args->br = au_sbr(sb, bindex);
28402+ atomic_inc(&args->br->br_count);
1facf9fc 28403+ args->wh_dentry = dget(wh_dentry);
53392da6 28404+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1facf9fc 28405+ if (unlikely(wkq_err)) {
4a4d8108
AM
28406+ pr_warning("rmdir error %.*s (%d), ignored\n",
28407+ AuDLNPair(wh_dentry), wkq_err);
1facf9fc 28408+ au_whtmp_rmdir_free(args);
28409+ }
28410+}
7f207e10
AM
28411diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
28412--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
53392da6 28413+++ linux/fs/aufs/whout.h 2011-08-24 13:30:24.734646739 +0200
7f207e10 28414@@ -0,0 +1,89 @@
1facf9fc 28415+/*
027c5e7a 28416+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 28417+ *
28418+ * This program, aufs is free software; you can redistribute it and/or modify
28419+ * it under the terms of the GNU General Public License as published by
28420+ * the Free Software Foundation; either version 2 of the License, or
28421+ * (at your option) any later version.
dece6358
AM
28422+ *
28423+ * This program is distributed in the hope that it will be useful,
28424+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28425+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28426+ * GNU General Public License for more details.
28427+ *
28428+ * You should have received a copy of the GNU General Public License
28429+ * along with this program; if not, write to the Free Software
28430+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28431+ */
28432+
28433+/*
28434+ * whiteout for logical deletion and opaque directory
28435+ */
28436+
28437+#ifndef __AUFS_WHOUT_H__
28438+#define __AUFS_WHOUT_H__
28439+
28440+#ifdef __KERNEL__
28441+
1facf9fc 28442+#include <linux/aufs_type.h>
28443+#include "dir.h"
28444+
28445+/* whout.c */
28446+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
28447+struct au_branch;
28448+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
28449+ struct au_branch *br, int try_sio);
28450+int au_diropq_test(struct dentry *h_dentry, struct au_branch *br);
28451+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
28452+ struct qstr *prefix);
28453+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
28454+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
28455+ struct dentry *dentry);
28456+int au_wh_init(struct dentry *h_parent, struct au_branch *br,
28457+ struct super_block *sb);
28458+
28459+/* diropq flags */
28460+#define AuDiropq_CREATE 1
28461+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
7f207e10
AM
28462+#define au_fset_diropq(flags, name) \
28463+ do { (flags) |= AuDiropq_##name; } while (0)
28464+#define au_fclr_diropq(flags, name) \
28465+ do { (flags) &= ~AuDiropq_##name; } while (0)
1facf9fc 28466+
28467+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
28468+ unsigned int flags);
28469+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
28470+ struct au_branch *br);
28471+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
28472+ struct dentry *h_parent);
28473+
28474+/* real rmdir for the whiteout-ed dir */
28475+struct au_whtmp_rmdir {
28476+ struct inode *dir;
e49829fe 28477+ struct au_branch *br;
1facf9fc 28478+ struct dentry *wh_dentry;
dece6358 28479+ struct au_nhash whlist;
1facf9fc 28480+};
28481+
28482+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
28483+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
28484+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
28485+ struct dentry *wh_dentry, struct au_nhash *whlist);
28486+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
28487+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
28488+
28489+/* ---------------------------------------------------------------------- */
28490+
28491+static inline struct dentry *au_diropq_create(struct dentry *dentry,
28492+ aufs_bindex_t bindex)
28493+{
28494+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
28495+}
28496+
28497+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
28498+{
28499+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
28500+}
28501+
28502+#endif /* __KERNEL__ */
28503+#endif /* __AUFS_WHOUT_H__ */
7f207e10
AM
28504diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
28505--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
28506+++ linux/fs/aufs/wkq.c 2011-08-24 13:30:24.734646739 +0200
28507@@ -0,0 +1,244 @@
1facf9fc 28508+/*
027c5e7a 28509+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 28510+ *
28511+ * This program, aufs is free software; you can redistribute it and/or modify
28512+ * it under the terms of the GNU General Public License as published by
28513+ * the Free Software Foundation; either version 2 of the License, or
28514+ * (at your option) any later version.
dece6358
AM
28515+ *
28516+ * This program is distributed in the hope that it will be useful,
28517+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28518+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28519+ * GNU General Public License for more details.
28520+ *
28521+ * You should have received a copy of the GNU General Public License
28522+ * along with this program; if not, write to the Free Software
28523+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28524+ */
28525+
28526+/*
28527+ * workqueue for asynchronous/super-io operations
28528+ * todo: try new dredential scheme
28529+ */
28530+
dece6358 28531+#include <linux/module.h>
1facf9fc 28532+#include "aufs.h"
28533+
b752ccd1
AM
28534+/* internal workqueue named AUFS_WKQ_NAME and AUFS_WKQ_PRE_NAME */
28535+enum {
28536+ AuWkq_INORMAL,
28537+ AuWkq_IPRE
28538+};
28539+
28540+static struct {
28541+ char *name;
28542+ struct workqueue_struct *wkq;
28543+} au_wkq[] = {
28544+ [AuWkq_INORMAL] = {
28545+ .name = AUFS_WKQ_NAME
28546+ },
28547+ [AuWkq_IPRE] = {
28548+ .name = AUFS_WKQ_PRE_NAME
28549+ }
28550+};
1facf9fc 28551+
28552+struct au_wkinfo {
28553+ struct work_struct wk;
7f207e10 28554+ struct kobject *kobj;
1facf9fc 28555+
28556+ unsigned int flags; /* see wkq.h */
28557+
28558+ au_wkq_func_t func;
28559+ void *args;
28560+
1facf9fc 28561+ struct completion *comp;
28562+};
28563+
28564+/* ---------------------------------------------------------------------- */
28565+
1facf9fc 28566+static void wkq_func(struct work_struct *wk)
28567+{
28568+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
28569+
7f207e10
AM
28570+ AuDebugOn(current_fsuid());
28571+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
28572+
1facf9fc 28573+ wkinfo->func(wkinfo->args);
1facf9fc 28574+ if (au_ftest_wkq(wkinfo->flags, WAIT))
28575+ complete(wkinfo->comp);
28576+ else {
7f207e10 28577+ kobject_put(wkinfo->kobj);
1facf9fc 28578+ module_put(THIS_MODULE);
28579+ kfree(wkinfo);
28580+ }
28581+}
28582+
28583+/*
28584+ * Since struct completion is large, try allocating it dynamically.
28585+ */
28586+#if defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS)
28587+#define AuWkqCompDeclare(name) struct completion *comp = NULL
28588+
28589+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
28590+{
28591+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
28592+ if (*comp) {
28593+ init_completion(*comp);
28594+ wkinfo->comp = *comp;
28595+ return 0;
28596+ }
28597+ return -ENOMEM;
28598+}
28599+
28600+static void au_wkq_comp_free(struct completion *comp)
28601+{
28602+ kfree(comp);
28603+}
28604+
28605+#else
28606+
28607+/* no braces */
28608+#define AuWkqCompDeclare(name) \
28609+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
28610+ struct completion *comp = &_ ## name
28611+
28612+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
28613+{
28614+ wkinfo->comp = *comp;
28615+ return 0;
28616+}
28617+
28618+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
28619+{
28620+ /* empty */
28621+}
28622+#endif /* 4KSTACKS */
28623+
53392da6 28624+static void au_wkq_run(struct au_wkinfo *wkinfo)
1facf9fc 28625+{
b752ccd1
AM
28626+ struct workqueue_struct *wkq;
28627+
53392da6
AM
28628+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
28629+ if (au_wkq_test()) {
28630+ AuWarn1("wkq from wkq, due to a dead dir by UDBA?\n");
28631+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
28632+ }
28633+ } else
28634+ au_dbg_verify_kthread();
28635+
28636+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
a1f66529 28637+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
b752ccd1 28638+ wkq = au_wkq[AuWkq_INORMAL].wkq;
53392da6 28639+ if (au_ftest_wkq(wkinfo->flags, PRE))
b752ccd1
AM
28640+ wkq = au_wkq[AuWkq_IPRE].wkq;
28641+ queue_work(wkq, &wkinfo->wk);
4a4d8108
AM
28642+ } else {
28643+ INIT_WORK(&wkinfo->wk, wkq_func);
28644+ schedule_work(&wkinfo->wk);
28645+ }
1facf9fc 28646+}
28647+
7f207e10
AM
28648+/*
28649+ * Be careful. It is easy to make deadlock happen.
28650+ * processA: lock, wkq and wait
28651+ * processB: wkq and wait, lock in wkq
28652+ * --> deadlock
28653+ */
b752ccd1 28654+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
1facf9fc 28655+{
28656+ int err;
28657+ AuWkqCompDeclare(comp);
28658+ struct au_wkinfo wkinfo = {
b752ccd1 28659+ .flags = flags,
1facf9fc 28660+ .func = func,
28661+ .args = args
28662+ };
28663+
28664+ err = au_wkq_comp_alloc(&wkinfo, &comp);
28665+ if (!err) {
53392da6 28666+ au_wkq_run(&wkinfo);
1facf9fc 28667+ /* no timeout, no interrupt */
28668+ wait_for_completion(wkinfo.comp);
28669+ au_wkq_comp_free(comp);
4a4d8108 28670+ destroy_work_on_stack(&wkinfo.wk);
1facf9fc 28671+ }
28672+
28673+ return err;
28674+
28675+}
28676+
027c5e7a
AM
28677+/*
28678+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
28679+ * problem in a concurrent umounting.
28680+ */
53392da6
AM
28681+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
28682+ unsigned int flags)
1facf9fc 28683+{
28684+ int err;
28685+ struct au_wkinfo *wkinfo;
28686+
28687+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
28688+
28689+ /*
28690+ * wkq_func() must free this wkinfo.
28691+ * it highly depends upon the implementation of workqueue.
28692+ */
28693+ err = 0;
28694+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
28695+ if (wkinfo) {
7f207e10 28696+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
53392da6 28697+ wkinfo->flags = flags & ~AuWkq_WAIT;
1facf9fc 28698+ wkinfo->func = func;
28699+ wkinfo->args = args;
28700+ wkinfo->comp = NULL;
7f207e10 28701+ kobject_get(wkinfo->kobj);
1facf9fc 28702+ __module_get(THIS_MODULE);
28703+
53392da6 28704+ au_wkq_run(wkinfo);
1facf9fc 28705+ } else {
28706+ err = -ENOMEM;
e49829fe 28707+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 28708+ }
28709+
28710+ return err;
28711+}
28712+
28713+/* ---------------------------------------------------------------------- */
28714+
28715+void au_nwt_init(struct au_nowait_tasks *nwt)
28716+{
28717+ atomic_set(&nwt->nw_len, 0);
4a4d8108 28718+ /* smp_mb(); */ /* atomic_set */
1facf9fc 28719+ init_waitqueue_head(&nwt->nw_wq);
28720+}
28721+
28722+void au_wkq_fin(void)
28723+{
b752ccd1
AM
28724+ int i;
28725+
28726+ for (i = 0; i < ARRAY_SIZE(au_wkq); i++)
28727+ if (au_wkq[i].wkq)
28728+ destroy_workqueue(au_wkq[i].wkq);
1facf9fc 28729+}
28730+
28731+int __init au_wkq_init(void)
28732+{
b752ccd1
AM
28733+ int err, i;
28734+
28735+ err = 0;
28736+ for (i = 0; !err && i < ARRAY_SIZE(au_wkq); i++) {
7f207e10
AM
28737+ BUILD_BUG_ON(!WQ_RESCUER);
28738+ au_wkq[i].wkq = alloc_workqueue(au_wkq[i].name, !WQ_RESCUER,
28739+ WQ_DFL_ACTIVE);
b752ccd1
AM
28740+ if (IS_ERR(au_wkq[i].wkq))
28741+ err = PTR_ERR(au_wkq[i].wkq);
28742+ else if (!au_wkq[i].wkq)
28743+ err = -ENOMEM;
28744+ if (unlikely(err))
28745+ au_wkq[i].wkq = NULL;
28746+ }
7f207e10 28747+ if (unlikely(err))
b752ccd1
AM
28748+ au_wkq_fin();
28749+
28750+ return err;
1facf9fc 28751+}
7f207e10
AM
28752diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
28753--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
28754+++ linux/fs/aufs/wkq.h 2011-08-24 13:30:24.737979976 +0200
28755@@ -0,0 +1,101 @@
1facf9fc 28756+/*
027c5e7a 28757+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 28758+ *
28759+ * This program, aufs is free software; you can redistribute it and/or modify
28760+ * it under the terms of the GNU General Public License as published by
28761+ * the Free Software Foundation; either version 2 of the License, or
28762+ * (at your option) any later version.
dece6358
AM
28763+ *
28764+ * This program is distributed in the hope that it will be useful,
28765+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28766+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28767+ * GNU General Public License for more details.
28768+ *
28769+ * You should have received a copy of the GNU General Public License
28770+ * along with this program; if not, write to the Free Software
28771+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28772+ */
28773+
28774+/*
28775+ * workqueue for asynchronous/super-io operations
28776+ * todo: try new credentials management scheme
28777+ */
28778+
28779+#ifndef __AUFS_WKQ_H__
28780+#define __AUFS_WKQ_H__
28781+
28782+#ifdef __KERNEL__
28783+
1facf9fc 28784+#include <linux/sched.h>
dece6358 28785+#include <linux/wait.h>
1facf9fc 28786+#include <linux/aufs_type.h>
28787+
dece6358
AM
28788+struct super_block;
28789+
1facf9fc 28790+/* ---------------------------------------------------------------------- */
28791+
28792+/*
28793+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
28794+ */
28795+struct au_nowait_tasks {
28796+ atomic_t nw_len;
28797+ wait_queue_head_t nw_wq;
28798+};
28799+
28800+/* ---------------------------------------------------------------------- */
28801+
28802+typedef void (*au_wkq_func_t)(void *args);
28803+
28804+/* wkq flags */
28805+#define AuWkq_WAIT 1
b752ccd1 28806+#define AuWkq_PRE (1 << 1)
53392da6
AM
28807+#ifdef CONFIG_AUFS_HNOTIFY
28808+#define AuWkq_NEST (1 << 2)
28809+#else
28810+#define AuWkq_NEST 0
28811+#endif
1facf9fc 28812+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
7f207e10
AM
28813+#define au_fset_wkq(flags, name) \
28814+ do { (flags) |= AuWkq_##name; } while (0)
28815+#define au_fclr_wkq(flags, name) \
28816+ do { (flags) &= ~AuWkq_##name; } while (0)
1facf9fc 28817+
28818+/* wkq.c */
b752ccd1 28819+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
53392da6
AM
28820+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
28821+ unsigned int flags);
1facf9fc 28822+void au_nwt_init(struct au_nowait_tasks *nwt);
28823+int __init au_wkq_init(void);
28824+void au_wkq_fin(void);
28825+
28826+/* ---------------------------------------------------------------------- */
28827+
53392da6
AM
28828+static inline int au_wkq_test(void)
28829+{
28830+ return current->flags & PF_WQ_WORKER;
28831+}
28832+
b752ccd1
AM
28833+static inline int au_wkq_wait_pre(au_wkq_func_t func, void *args)
28834+{
28835+ return au_wkq_do_wait(AuWkq_WAIT | AuWkq_PRE, func, args);
28836+}
28837+
28838+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
1facf9fc 28839+{
b752ccd1 28840+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
1facf9fc 28841+}
28842+
28843+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
28844+{
e49829fe 28845+ if (atomic_dec_and_test(&nwt->nw_len))
1facf9fc 28846+ wake_up_all(&nwt->nw_wq);
28847+}
28848+
28849+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
28850+{
28851+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
28852+ return 0;
28853+}
28854+
28855+#endif /* __KERNEL__ */
28856+#endif /* __AUFS_WKQ_H__ */
7f207e10
AM
28857diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
28858--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
53392da6 28859+++ linux/fs/aufs/xino.c 2011-08-24 13:30:24.737979976 +0200
7f207e10 28860@@ -0,0 +1,1265 @@
1facf9fc 28861+/*
027c5e7a 28862+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 28863+ *
28864+ * This program, aufs is free software; you can redistribute it and/or modify
28865+ * it under the terms of the GNU General Public License as published by
28866+ * the Free Software Foundation; either version 2 of the License, or
28867+ * (at your option) any later version.
dece6358
AM
28868+ *
28869+ * This program is distributed in the hope that it will be useful,
28870+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28871+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28872+ * GNU General Public License for more details.
28873+ *
28874+ * You should have received a copy of the GNU General Public License
28875+ * along with this program; if not, write to the Free Software
28876+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28877+ */
28878+
28879+/*
28880+ * external inode number translation table and bitmap
28881+ */
28882+
dece6358 28883+#include <linux/file.h>
1facf9fc 28884+#include <linux/seq_file.h>
28885+#include <linux/uaccess.h>
28886+#include "aufs.h"
28887+
b752ccd1 28888+ssize_t xino_fread(au_readf_t func, struct file *file, void *kbuf, size_t size,
1facf9fc 28889+ loff_t *pos)
28890+{
28891+ ssize_t err;
28892+ mm_segment_t oldfs;
b752ccd1
AM
28893+ union {
28894+ void *k;
28895+ char __user *u;
28896+ } buf;
1facf9fc 28897+
b752ccd1 28898+ buf.k = kbuf;
1facf9fc 28899+ oldfs = get_fs();
28900+ set_fs(KERNEL_DS);
28901+ do {
28902+ /* todo: signal_pending? */
b752ccd1 28903+ err = func(file, buf.u, size, pos);
1facf9fc 28904+ } while (err == -EAGAIN || err == -EINTR);
28905+ set_fs(oldfs);
28906+
28907+#if 0 /* reserved for future use */
28908+ if (err > 0)
28909+ fsnotify_access(file->f_dentry);
28910+#endif
28911+
28912+ return err;
28913+}
28914+
28915+/* ---------------------------------------------------------------------- */
28916+
b752ccd1 28917+static ssize_t do_xino_fwrite(au_writef_t func, struct file *file, void *kbuf,
1facf9fc 28918+ size_t size, loff_t *pos)
28919+{
28920+ ssize_t err;
28921+ mm_segment_t oldfs;
b752ccd1
AM
28922+ union {
28923+ void *k;
28924+ const char __user *u;
28925+ } buf;
1facf9fc 28926+
b752ccd1 28927+ buf.k = kbuf;
1facf9fc 28928+ oldfs = get_fs();
28929+ set_fs(KERNEL_DS);
1facf9fc 28930+ do {
28931+ /* todo: signal_pending? */
b752ccd1 28932+ err = func(file, buf.u, size, pos);
1facf9fc 28933+ } while (err == -EAGAIN || err == -EINTR);
1facf9fc 28934+ set_fs(oldfs);
28935+
28936+#if 0 /* reserved for future use */
28937+ if (err > 0)
28938+ fsnotify_modify(file->f_dentry);
28939+#endif
28940+
28941+ return err;
28942+}
28943+
28944+struct do_xino_fwrite_args {
28945+ ssize_t *errp;
28946+ au_writef_t func;
28947+ struct file *file;
28948+ void *buf;
28949+ size_t size;
28950+ loff_t *pos;
28951+};
28952+
28953+static void call_do_xino_fwrite(void *args)
28954+{
28955+ struct do_xino_fwrite_args *a = args;
28956+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
28957+}
28958+
28959+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
28960+ loff_t *pos)
28961+{
28962+ ssize_t err;
28963+
28964+ /* todo: signal block and no wkq? */
b752ccd1
AM
28965+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
28966+ lockdep_off();
28967+ err = do_xino_fwrite(func, file, buf, size, pos);
28968+ lockdep_on();
28969+ } else {
28970+ /*
28971+ * it breaks RLIMIT_FSIZE and normal user's limit,
28972+ * users should care about quota and real 'filesystem full.'
28973+ */
1facf9fc 28974+ int wkq_err;
28975+ struct do_xino_fwrite_args args = {
28976+ .errp = &err,
28977+ .func = func,
28978+ .file = file,
28979+ .buf = buf,
28980+ .size = size,
28981+ .pos = pos
28982+ };
28983+
28984+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
28985+ if (unlikely(wkq_err))
28986+ err = wkq_err;
b752ccd1 28987+ }
1facf9fc 28988+
28989+ return err;
28990+}
28991+
28992+/* ---------------------------------------------------------------------- */
28993+
28994+/*
28995+ * create a new xinofile at the same place/path as @base_file.
28996+ */
28997+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
28998+{
28999+ struct file *file;
4a4d8108 29000+ struct dentry *base, *parent;
1facf9fc 29001+ struct inode *dir;
29002+ struct qstr *name;
1308ab2a 29003+ struct path path;
4a4d8108 29004+ int err;
1facf9fc 29005+
29006+ base = base_file->f_dentry;
29007+ parent = base->d_parent; /* dir inode is locked */
29008+ dir = parent->d_inode;
29009+ IMustLock(dir);
29010+
29011+ file = ERR_PTR(-EINVAL);
29012+ name = &base->d_name;
4a4d8108
AM
29013+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
29014+ if (IS_ERR(path.dentry)) {
29015+ file = (void *)path.dentry;
29016+ pr_err("%.*s lookup err %ld\n",
29017+ AuLNPair(name), PTR_ERR(path.dentry));
1facf9fc 29018+ goto out;
29019+ }
29020+
29021+ /* no need to mnt_want_write() since we call dentry_open() later */
4a4d8108 29022+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
1facf9fc 29023+ if (unlikely(err)) {
29024+ file = ERR_PTR(err);
4a4d8108 29025+ pr_err("%.*s create err %d\n", AuLNPair(name), err);
1facf9fc 29026+ goto out_dput;
29027+ }
29028+
1308ab2a 29029+ path.mnt = base_file->f_vfsmnt;
4a4d8108 29030+ file = vfsub_dentry_open(&path,
7f207e10 29031+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 29032+ /* | __FMODE_NONOTIFY */);
1facf9fc 29033+ if (IS_ERR(file)) {
4a4d8108 29034+ pr_err("%.*s open err %ld\n", AuLNPair(name), PTR_ERR(file));
1facf9fc 29035+ goto out_dput;
29036+ }
29037+
29038+ err = vfsub_unlink(dir, &file->f_path, /*force*/0);
29039+ if (unlikely(err)) {
4a4d8108 29040+ pr_err("%.*s unlink err %d\n", AuLNPair(name), err);
1facf9fc 29041+ goto out_fput;
29042+ }
29043+
29044+ if (copy_src) {
29045+ /* no one can touch copy_src xino */
29046+ err = au_copy_file(file, copy_src,
29047+ i_size_read(copy_src->f_dentry->d_inode));
29048+ if (unlikely(err)) {
4a4d8108 29049+ pr_err("%.*s copy err %d\n", AuLNPair(name), err);
1facf9fc 29050+ goto out_fput;
29051+ }
29052+ }
29053+ goto out_dput; /* success */
29054+
4f0767ce 29055+out_fput:
1facf9fc 29056+ fput(file);
29057+ file = ERR_PTR(err);
4f0767ce 29058+out_dput:
4a4d8108 29059+ dput(path.dentry);
4f0767ce 29060+out:
1facf9fc 29061+ return file;
29062+}
29063+
29064+struct au_xino_lock_dir {
29065+ struct au_hinode *hdir;
29066+ struct dentry *parent;
29067+ struct mutex *mtx;
29068+};
29069+
29070+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
29071+ struct au_xino_lock_dir *ldir)
29072+{
29073+ aufs_bindex_t brid, bindex;
29074+
29075+ ldir->hdir = NULL;
29076+ bindex = -1;
29077+ brid = au_xino_brid(sb);
29078+ if (brid >= 0)
29079+ bindex = au_br_index(sb, brid);
29080+ if (bindex >= 0) {
29081+ ldir->hdir = au_hi(sb->s_root->d_inode, bindex);
4a4d8108 29082+ au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
1facf9fc 29083+ } else {
29084+ ldir->parent = dget_parent(xino->f_dentry);
29085+ ldir->mtx = &ldir->parent->d_inode->i_mutex;
29086+ mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
29087+ }
29088+}
29089+
29090+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
29091+{
29092+ if (ldir->hdir)
4a4d8108 29093+ au_hn_imtx_unlock(ldir->hdir);
1facf9fc 29094+ else {
29095+ mutex_unlock(ldir->mtx);
29096+ dput(ldir->parent);
29097+ }
29098+}
29099+
29100+/* ---------------------------------------------------------------------- */
29101+
29102+/* trucate xino files asynchronously */
29103+
29104+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
29105+{
29106+ int err;
29107+ aufs_bindex_t bi, bend;
29108+ struct au_branch *br;
29109+ struct file *new_xino, *file;
29110+ struct super_block *h_sb;
29111+ struct au_xino_lock_dir ldir;
29112+
29113+ err = -EINVAL;
29114+ bend = au_sbend(sb);
29115+ if (unlikely(bindex < 0 || bend < bindex))
29116+ goto out;
29117+ br = au_sbr(sb, bindex);
29118+ file = br->br_xino.xi_file;
29119+ if (!file)
29120+ goto out;
29121+
29122+ au_xino_lock_dir(sb, file, &ldir);
29123+ /* mnt_want_write() is unnecessary here */
29124+ new_xino = au_xino_create2(file, file);
29125+ au_xino_unlock_dir(&ldir);
29126+ err = PTR_ERR(new_xino);
29127+ if (IS_ERR(new_xino))
29128+ goto out;
29129+ err = 0;
29130+ fput(file);
29131+ br->br_xino.xi_file = new_xino;
29132+
29133+ h_sb = br->br_mnt->mnt_sb;
29134+ for (bi = 0; bi <= bend; bi++) {
29135+ if (unlikely(bi == bindex))
29136+ continue;
29137+ br = au_sbr(sb, bi);
29138+ if (br->br_mnt->mnt_sb != h_sb)
29139+ continue;
29140+
29141+ fput(br->br_xino.xi_file);
29142+ br->br_xino.xi_file = new_xino;
29143+ get_file(new_xino);
29144+ }
29145+
4f0767ce 29146+out:
1facf9fc 29147+ return err;
29148+}
29149+
29150+struct xino_do_trunc_args {
29151+ struct super_block *sb;
29152+ struct au_branch *br;
29153+};
29154+
29155+static void xino_do_trunc(void *_args)
29156+{
29157+ struct xino_do_trunc_args *args = _args;
29158+ struct super_block *sb;
29159+ struct au_branch *br;
29160+ struct inode *dir;
29161+ int err;
29162+ aufs_bindex_t bindex;
29163+
29164+ err = 0;
29165+ sb = args->sb;
29166+ dir = sb->s_root->d_inode;
29167+ br = args->br;
29168+
29169+ si_noflush_write_lock(sb);
29170+ ii_read_lock_parent(dir);
29171+ bindex = au_br_index(sb, br->br_id);
29172+ err = au_xino_trunc(sb, bindex);
dece6358
AM
29173+ if (!err
29174+ && br->br_xino.xi_file->f_dentry->d_inode->i_blocks
1facf9fc 29175+ >= br->br_xino_upper)
29176+ br->br_xino_upper += AUFS_XINO_TRUNC_STEP;
29177+
1facf9fc 29178+ ii_read_unlock(dir);
29179+ if (unlikely(err))
4a4d8108 29180+ pr_warning("err b%d, (%d)\n", bindex, err);
1facf9fc 29181+ atomic_dec(&br->br_xino_running);
29182+ atomic_dec(&br->br_count);
1facf9fc 29183+ si_write_unlock(sb);
027c5e7a 29184+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 29185+ kfree(args);
29186+}
29187+
29188+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
29189+{
29190+ struct xino_do_trunc_args *args;
29191+ int wkq_err;
29192+
29193+ if (br->br_xino.xi_file->f_dentry->d_inode->i_blocks
29194+ < br->br_xino_upper)
29195+ return;
29196+
29197+ if (atomic_inc_return(&br->br_xino_running) > 1)
29198+ goto out;
29199+
29200+ /* lock and kfree() will be called in trunc_xino() */
29201+ args = kmalloc(sizeof(*args), GFP_NOFS);
29202+ if (unlikely(!args)) {
29203+ AuErr1("no memory\n");
29204+ goto out_args;
29205+ }
29206+
e49829fe 29207+ atomic_inc(&br->br_count);
1facf9fc 29208+ args->sb = sb;
29209+ args->br = br;
53392da6 29210+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
1facf9fc 29211+ if (!wkq_err)
29212+ return; /* success */
29213+
4a4d8108 29214+ pr_err("wkq %d\n", wkq_err);
e49829fe 29215+ atomic_dec(&br->br_count);
1facf9fc 29216+
4f0767ce 29217+out_args:
1facf9fc 29218+ kfree(args);
4f0767ce 29219+out:
e49829fe 29220+ atomic_dec(&br->br_xino_running);
1facf9fc 29221+}
29222+
29223+/* ---------------------------------------------------------------------- */
29224+
29225+static int au_xino_do_write(au_writef_t write, struct file *file,
29226+ ino_t h_ino, ino_t ino)
29227+{
29228+ loff_t pos;
29229+ ssize_t sz;
29230+
29231+ pos = h_ino;
29232+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
29233+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
29234+ return -EFBIG;
29235+ }
29236+ pos *= sizeof(ino);
29237+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
29238+ if (sz == sizeof(ino))
29239+ return 0; /* success */
29240+
29241+ AuIOErr("write failed (%zd)\n", sz);
29242+ return -EIO;
29243+}
29244+
29245+/*
29246+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
29247+ * at the position of @h_ino.
29248+ * even if @ino is zero, it is written to the xinofile and means no entry.
29249+ * if the size of the xino file on a specific filesystem exceeds the watermark,
29250+ * try truncating it.
29251+ */
29252+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
29253+ ino_t ino)
29254+{
29255+ int err;
29256+ unsigned int mnt_flags;
29257+ struct au_branch *br;
29258+
29259+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
29260+ || ((loff_t)-1) > 0);
dece6358 29261+ SiMustAnyLock(sb);
1facf9fc 29262+
29263+ mnt_flags = au_mntflags(sb);
29264+ if (!au_opt_test(mnt_flags, XINO))
29265+ return 0;
29266+
29267+ br = au_sbr(sb, bindex);
29268+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
29269+ h_ino, ino);
29270+ if (!err) {
29271+ if (au_opt_test(mnt_flags, TRUNC_XINO)
29272+ && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
29273+ xino_try_trunc(sb, br);
29274+ return 0; /* success */
29275+ }
29276+
29277+ AuIOErr("write failed (%d)\n", err);
29278+ return -EIO;
29279+}
29280+
29281+/* ---------------------------------------------------------------------- */
29282+
29283+/* aufs inode number bitmap */
29284+
29285+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
29286+static ino_t xib_calc_ino(unsigned long pindex, int bit)
29287+{
29288+ ino_t ino;
29289+
29290+ AuDebugOn(bit < 0 || page_bits <= bit);
29291+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
29292+ return ino;
29293+}
29294+
29295+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
29296+{
29297+ AuDebugOn(ino < AUFS_FIRST_INO);
29298+ ino -= AUFS_FIRST_INO;
29299+ *pindex = ino / page_bits;
29300+ *bit = ino % page_bits;
29301+}
29302+
29303+static int xib_pindex(struct super_block *sb, unsigned long pindex)
29304+{
29305+ int err;
29306+ loff_t pos;
29307+ ssize_t sz;
29308+ struct au_sbinfo *sbinfo;
29309+ struct file *xib;
29310+ unsigned long *p;
29311+
29312+ sbinfo = au_sbi(sb);
29313+ MtxMustLock(&sbinfo->si_xib_mtx);
29314+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
29315+ || !au_opt_test(sbinfo->si_mntflags, XINO));
29316+
29317+ if (pindex == sbinfo->si_xib_last_pindex)
29318+ return 0;
29319+
29320+ xib = sbinfo->si_xib;
29321+ p = sbinfo->si_xib_buf;
29322+ pos = sbinfo->si_xib_last_pindex;
29323+ pos *= PAGE_SIZE;
29324+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
29325+ if (unlikely(sz != PAGE_SIZE))
29326+ goto out;
29327+
29328+ pos = pindex;
29329+ pos *= PAGE_SIZE;
29330+ if (i_size_read(xib->f_dentry->d_inode) >= pos + PAGE_SIZE)
29331+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
29332+ else {
29333+ memset(p, 0, PAGE_SIZE);
29334+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
29335+ }
29336+ if (sz == PAGE_SIZE) {
29337+ sbinfo->si_xib_last_pindex = pindex;
29338+ return 0; /* success */
29339+ }
29340+
4f0767ce 29341+out:
b752ccd1
AM
29342+ AuIOErr1("write failed (%zd)\n", sz);
29343+ err = sz;
29344+ if (sz >= 0)
29345+ err = -EIO;
29346+ return err;
29347+}
29348+
29349+/* ---------------------------------------------------------------------- */
29350+
29351+static void au_xib_clear_bit(struct inode *inode)
29352+{
29353+ int err, bit;
29354+ unsigned long pindex;
29355+ struct super_block *sb;
29356+ struct au_sbinfo *sbinfo;
29357+
29358+ AuDebugOn(inode->i_nlink);
29359+
29360+ sb = inode->i_sb;
29361+ xib_calc_bit(inode->i_ino, &pindex, &bit);
29362+ AuDebugOn(page_bits <= bit);
29363+ sbinfo = au_sbi(sb);
29364+ mutex_lock(&sbinfo->si_xib_mtx);
29365+ err = xib_pindex(sb, pindex);
29366+ if (!err) {
29367+ clear_bit(bit, sbinfo->si_xib_buf);
29368+ sbinfo->si_xib_next_bit = bit;
29369+ }
29370+ mutex_unlock(&sbinfo->si_xib_mtx);
29371+}
29372+
29373+/* for s_op->delete_inode() */
29374+void au_xino_delete_inode(struct inode *inode, const int unlinked)
29375+{
29376+ int err;
29377+ unsigned int mnt_flags;
29378+ aufs_bindex_t bindex, bend, bi;
29379+ unsigned char try_trunc;
29380+ struct au_iinfo *iinfo;
29381+ struct super_block *sb;
29382+ struct au_hinode *hi;
29383+ struct inode *h_inode;
29384+ struct au_branch *br;
29385+ au_writef_t xwrite;
29386+
29387+ sb = inode->i_sb;
29388+ mnt_flags = au_mntflags(sb);
29389+ if (!au_opt_test(mnt_flags, XINO)
29390+ || inode->i_ino == AUFS_ROOT_INO)
29391+ return;
29392+
29393+ if (unlinked) {
29394+ au_xigen_inc(inode);
29395+ au_xib_clear_bit(inode);
29396+ }
29397+
29398+ iinfo = au_ii(inode);
29399+ if (!iinfo)
29400+ return;
1facf9fc 29401+
b752ccd1
AM
29402+ bindex = iinfo->ii_bstart;
29403+ if (bindex < 0)
29404+ return;
1facf9fc 29405+
b752ccd1
AM
29406+ xwrite = au_sbi(sb)->si_xwrite;
29407+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
29408+ hi = iinfo->ii_hinode + bindex;
29409+ bend = iinfo->ii_bend;
29410+ for (; bindex <= bend; bindex++, hi++) {
29411+ h_inode = hi->hi_inode;
29412+ if (!h_inode
29413+ || (!unlinked && h_inode->i_nlink))
29414+ continue;
1facf9fc 29415+
b752ccd1
AM
29416+ /* inode may not be revalidated */
29417+ bi = au_br_index(sb, hi->hi_id);
29418+ if (bi < 0)
29419+ continue;
1facf9fc 29420+
b752ccd1
AM
29421+ br = au_sbr(sb, bi);
29422+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
29423+ h_inode->i_ino, /*ino*/0);
29424+ if (!err && try_trunc
29425+ && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
29426+ xino_try_trunc(sb, br);
1facf9fc 29427+ }
1facf9fc 29428+}
29429+
29430+/* get an unused inode number from bitmap */
29431+ino_t au_xino_new_ino(struct super_block *sb)
29432+{
29433+ ino_t ino;
29434+ unsigned long *p, pindex, ul, pend;
29435+ struct au_sbinfo *sbinfo;
29436+ struct file *file;
29437+ int free_bit, err;
29438+
29439+ if (!au_opt_test(au_mntflags(sb), XINO))
29440+ return iunique(sb, AUFS_FIRST_INO);
29441+
29442+ sbinfo = au_sbi(sb);
29443+ mutex_lock(&sbinfo->si_xib_mtx);
29444+ p = sbinfo->si_xib_buf;
29445+ free_bit = sbinfo->si_xib_next_bit;
29446+ if (free_bit < page_bits && !test_bit(free_bit, p))
29447+ goto out; /* success */
29448+ free_bit = find_first_zero_bit(p, page_bits);
29449+ if (free_bit < page_bits)
29450+ goto out; /* success */
29451+
29452+ pindex = sbinfo->si_xib_last_pindex;
29453+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
29454+ err = xib_pindex(sb, ul);
29455+ if (unlikely(err))
29456+ goto out_err;
29457+ free_bit = find_first_zero_bit(p, page_bits);
29458+ if (free_bit < page_bits)
29459+ goto out; /* success */
29460+ }
29461+
29462+ file = sbinfo->si_xib;
29463+ pend = i_size_read(file->f_dentry->d_inode) / PAGE_SIZE;
29464+ for (ul = pindex + 1; ul <= pend; ul++) {
29465+ err = xib_pindex(sb, ul);
29466+ if (unlikely(err))
29467+ goto out_err;
29468+ free_bit = find_first_zero_bit(p, page_bits);
29469+ if (free_bit < page_bits)
29470+ goto out; /* success */
29471+ }
29472+ BUG();
29473+
4f0767ce 29474+out:
1facf9fc 29475+ set_bit(free_bit, p);
7f207e10 29476+ sbinfo->si_xib_next_bit = free_bit + 1;
1facf9fc 29477+ pindex = sbinfo->si_xib_last_pindex;
29478+ mutex_unlock(&sbinfo->si_xib_mtx);
29479+ ino = xib_calc_ino(pindex, free_bit);
29480+ AuDbg("i%lu\n", (unsigned long)ino);
29481+ return ino;
4f0767ce 29482+out_err:
1facf9fc 29483+ mutex_unlock(&sbinfo->si_xib_mtx);
29484+ AuDbg("i0\n");
29485+ return 0;
29486+}
29487+
29488+/*
29489+ * read @ino from xinofile for the specified branch{@sb, @bindex}
29490+ * at the position of @h_ino.
29491+ * if @ino does not exist and @do_new is true, get new one.
29492+ */
29493+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
29494+ ino_t *ino)
29495+{
29496+ int err;
29497+ ssize_t sz;
29498+ loff_t pos;
29499+ struct file *file;
29500+ struct au_sbinfo *sbinfo;
29501+
29502+ *ino = 0;
29503+ if (!au_opt_test(au_mntflags(sb), XINO))
29504+ return 0; /* no xino */
29505+
29506+ err = 0;
29507+ sbinfo = au_sbi(sb);
29508+ pos = h_ino;
29509+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
29510+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
29511+ return -EFBIG;
29512+ }
29513+ pos *= sizeof(*ino);
29514+
29515+ file = au_sbr(sb, bindex)->br_xino.xi_file;
29516+ if (i_size_read(file->f_dentry->d_inode) < pos + sizeof(*ino))
29517+ return 0; /* no ino */
29518+
29519+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
29520+ if (sz == sizeof(*ino))
29521+ return 0; /* success */
29522+
29523+ err = sz;
29524+ if (unlikely(sz >= 0)) {
29525+ err = -EIO;
29526+ AuIOErr("xino read error (%zd)\n", sz);
29527+ }
29528+
29529+ return err;
29530+}
29531+
29532+/* ---------------------------------------------------------------------- */
29533+
29534+/* create and set a new xino file */
29535+
29536+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
29537+{
29538+ struct file *file;
29539+ struct dentry *h_parent, *d;
29540+ struct inode *h_dir;
29541+ int err;
29542+
29543+ /*
29544+ * at mount-time, and the xino file is the default path,
4a4d8108 29545+ * hnotify is disabled so we have no notify events to ignore.
1facf9fc 29546+ * when a user specified the xino, we cannot get au_hdir to be ignored.
29547+ */
7f207e10 29548+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 29549+ /* | __FMODE_NONOTIFY */,
1facf9fc 29550+ S_IRUGO | S_IWUGO);
29551+ if (IS_ERR(file)) {
29552+ if (!silent)
4a4d8108 29553+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
1facf9fc 29554+ return file;
29555+ }
29556+
29557+ /* keep file count */
29558+ h_parent = dget_parent(file->f_dentry);
29559+ h_dir = h_parent->d_inode;
29560+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
29561+ /* mnt_want_write() is unnecessary here */
29562+ err = vfsub_unlink(h_dir, &file->f_path, /*force*/0);
29563+ mutex_unlock(&h_dir->i_mutex);
29564+ dput(h_parent);
29565+ if (unlikely(err)) {
29566+ if (!silent)
4a4d8108 29567+ pr_err("unlink %s(%d)\n", fname, err);
1facf9fc 29568+ goto out;
29569+ }
29570+
29571+ err = -EINVAL;
29572+ d = file->f_dentry;
29573+ if (unlikely(sb == d->d_sb)) {
29574+ if (!silent)
4a4d8108 29575+ pr_err("%s must be outside\n", fname);
1facf9fc 29576+ goto out;
29577+ }
29578+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
29579+ if (!silent)
4a4d8108
AM
29580+ pr_err("xino doesn't support %s(%s)\n",
29581+ fname, au_sbtype(d->d_sb));
1facf9fc 29582+ goto out;
29583+ }
29584+ return file; /* success */
29585+
4f0767ce 29586+out:
1facf9fc 29587+ fput(file);
29588+ file = ERR_PTR(err);
29589+ return file;
29590+}
29591+
29592+/*
29593+ * find another branch who is on the same filesystem of the specified
29594+ * branch{@btgt}. search until @bend.
29595+ */
29596+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
29597+ aufs_bindex_t bend)
29598+{
29599+ aufs_bindex_t bindex;
29600+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
29601+
29602+ for (bindex = 0; bindex < btgt; bindex++)
29603+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
29604+ return bindex;
29605+ for (bindex++; bindex <= bend; bindex++)
29606+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
29607+ return bindex;
29608+ return -1;
29609+}
29610+
29611+/* ---------------------------------------------------------------------- */
29612+
29613+/*
29614+ * initialize the xinofile for the specified branch @br
29615+ * at the place/path where @base_file indicates.
29616+ * test whether another branch is on the same filesystem or not,
29617+ * if @do_test is true.
29618+ */
29619+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
29620+ struct file *base_file, int do_test)
29621+{
29622+ int err;
29623+ ino_t ino;
29624+ aufs_bindex_t bend, bindex;
29625+ struct au_branch *shared_br, *b;
29626+ struct file *file;
29627+ struct super_block *tgt_sb;
29628+
29629+ shared_br = NULL;
29630+ bend = au_sbend(sb);
29631+ if (do_test) {
29632+ tgt_sb = br->br_mnt->mnt_sb;
29633+ for (bindex = 0; bindex <= bend; bindex++) {
29634+ b = au_sbr(sb, bindex);
29635+ if (tgt_sb == b->br_mnt->mnt_sb) {
29636+ shared_br = b;
29637+ break;
29638+ }
29639+ }
29640+ }
29641+
29642+ if (!shared_br || !shared_br->br_xino.xi_file) {
29643+ struct au_xino_lock_dir ldir;
29644+
29645+ au_xino_lock_dir(sb, base_file, &ldir);
29646+ /* mnt_want_write() is unnecessary here */
29647+ file = au_xino_create2(base_file, NULL);
29648+ au_xino_unlock_dir(&ldir);
29649+ err = PTR_ERR(file);
29650+ if (IS_ERR(file))
29651+ goto out;
29652+ br->br_xino.xi_file = file;
29653+ } else {
29654+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
29655+ get_file(br->br_xino.xi_file);
29656+ }
29657+
29658+ ino = AUFS_ROOT_INO;
29659+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
29660+ h_ino, ino);
b752ccd1
AM
29661+ if (unlikely(err)) {
29662+ fput(br->br_xino.xi_file);
29663+ br->br_xino.xi_file = NULL;
29664+ }
1facf9fc 29665+
4f0767ce 29666+out:
1facf9fc 29667+ return err;
29668+}
29669+
29670+/* ---------------------------------------------------------------------- */
29671+
29672+/* trucate a xino bitmap file */
29673+
29674+/* todo: slow */
29675+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
29676+{
29677+ int err, bit;
29678+ ssize_t sz;
29679+ unsigned long pindex;
29680+ loff_t pos, pend;
29681+ struct au_sbinfo *sbinfo;
29682+ au_readf_t func;
29683+ ino_t *ino;
29684+ unsigned long *p;
29685+
29686+ err = 0;
29687+ sbinfo = au_sbi(sb);
dece6358 29688+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 29689+ p = sbinfo->si_xib_buf;
29690+ func = sbinfo->si_xread;
29691+ pend = i_size_read(file->f_dentry->d_inode);
29692+ pos = 0;
29693+ while (pos < pend) {
29694+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
29695+ err = sz;
29696+ if (unlikely(sz <= 0))
29697+ goto out;
29698+
29699+ err = 0;
29700+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
29701+ if (unlikely(*ino < AUFS_FIRST_INO))
29702+ continue;
29703+
29704+ xib_calc_bit(*ino, &pindex, &bit);
29705+ AuDebugOn(page_bits <= bit);
29706+ err = xib_pindex(sb, pindex);
29707+ if (!err)
29708+ set_bit(bit, p);
29709+ else
29710+ goto out;
29711+ }
29712+ }
29713+
4f0767ce 29714+out:
1facf9fc 29715+ return err;
29716+}
29717+
29718+static int xib_restore(struct super_block *sb)
29719+{
29720+ int err;
29721+ aufs_bindex_t bindex, bend;
29722+ void *page;
29723+
29724+ err = -ENOMEM;
29725+ page = (void *)__get_free_page(GFP_NOFS);
29726+ if (unlikely(!page))
29727+ goto out;
29728+
29729+ err = 0;
29730+ bend = au_sbend(sb);
29731+ for (bindex = 0; !err && bindex <= bend; bindex++)
29732+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
29733+ err = do_xib_restore
29734+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
29735+ else
29736+ AuDbg("b%d\n", bindex);
29737+ free_page((unsigned long)page);
29738+
4f0767ce 29739+out:
1facf9fc 29740+ return err;
29741+}
29742+
29743+int au_xib_trunc(struct super_block *sb)
29744+{
29745+ int err;
29746+ ssize_t sz;
29747+ loff_t pos;
29748+ struct au_xino_lock_dir ldir;
29749+ struct au_sbinfo *sbinfo;
29750+ unsigned long *p;
29751+ struct file *file;
29752+
dece6358
AM
29753+ SiMustWriteLock(sb);
29754+
1facf9fc 29755+ err = 0;
29756+ sbinfo = au_sbi(sb);
29757+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
29758+ goto out;
29759+
29760+ file = sbinfo->si_xib;
29761+ if (i_size_read(file->f_dentry->d_inode) <= PAGE_SIZE)
29762+ goto out;
29763+
29764+ au_xino_lock_dir(sb, file, &ldir);
29765+ /* mnt_want_write() is unnecessary here */
29766+ file = au_xino_create2(sbinfo->si_xib, NULL);
29767+ au_xino_unlock_dir(&ldir);
29768+ err = PTR_ERR(file);
29769+ if (IS_ERR(file))
29770+ goto out;
29771+ fput(sbinfo->si_xib);
29772+ sbinfo->si_xib = file;
29773+
29774+ p = sbinfo->si_xib_buf;
29775+ memset(p, 0, PAGE_SIZE);
29776+ pos = 0;
29777+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
29778+ if (unlikely(sz != PAGE_SIZE)) {
29779+ err = sz;
29780+ AuIOErr("err %d\n", err);
29781+ if (sz >= 0)
29782+ err = -EIO;
29783+ goto out;
29784+ }
29785+
29786+ mutex_lock(&sbinfo->si_xib_mtx);
29787+ /* mnt_want_write() is unnecessary here */
29788+ err = xib_restore(sb);
29789+ mutex_unlock(&sbinfo->si_xib_mtx);
29790+
29791+out:
29792+ return err;
29793+}
29794+
29795+/* ---------------------------------------------------------------------- */
29796+
29797+/*
29798+ * xino mount option handlers
29799+ */
29800+static au_readf_t find_readf(struct file *h_file)
29801+{
29802+ const struct file_operations *fop = h_file->f_op;
29803+
29804+ if (fop) {
29805+ if (fop->read)
29806+ return fop->read;
29807+ if (fop->aio_read)
29808+ return do_sync_read;
29809+ }
29810+ return ERR_PTR(-ENOSYS);
29811+}
29812+
29813+static au_writef_t find_writef(struct file *h_file)
29814+{
29815+ const struct file_operations *fop = h_file->f_op;
29816+
29817+ if (fop) {
29818+ if (fop->write)
29819+ return fop->write;
29820+ if (fop->aio_write)
29821+ return do_sync_write;
29822+ }
29823+ return ERR_PTR(-ENOSYS);
29824+}
29825+
29826+/* xino bitmap */
29827+static void xino_clear_xib(struct super_block *sb)
29828+{
29829+ struct au_sbinfo *sbinfo;
29830+
dece6358
AM
29831+ SiMustWriteLock(sb);
29832+
1facf9fc 29833+ sbinfo = au_sbi(sb);
29834+ sbinfo->si_xread = NULL;
29835+ sbinfo->si_xwrite = NULL;
29836+ if (sbinfo->si_xib)
29837+ fput(sbinfo->si_xib);
29838+ sbinfo->si_xib = NULL;
29839+ free_page((unsigned long)sbinfo->si_xib_buf);
29840+ sbinfo->si_xib_buf = NULL;
29841+}
29842+
29843+static int au_xino_set_xib(struct super_block *sb, struct file *base)
29844+{
29845+ int err;
29846+ loff_t pos;
29847+ struct au_sbinfo *sbinfo;
29848+ struct file *file;
29849+
dece6358
AM
29850+ SiMustWriteLock(sb);
29851+
1facf9fc 29852+ sbinfo = au_sbi(sb);
29853+ file = au_xino_create2(base, sbinfo->si_xib);
29854+ err = PTR_ERR(file);
29855+ if (IS_ERR(file))
29856+ goto out;
29857+ if (sbinfo->si_xib)
29858+ fput(sbinfo->si_xib);
29859+ sbinfo->si_xib = file;
29860+ sbinfo->si_xread = find_readf(file);
29861+ sbinfo->si_xwrite = find_writef(file);
29862+
29863+ err = -ENOMEM;
29864+ if (!sbinfo->si_xib_buf)
29865+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
29866+ if (unlikely(!sbinfo->si_xib_buf))
29867+ goto out_unset;
29868+
29869+ sbinfo->si_xib_last_pindex = 0;
29870+ sbinfo->si_xib_next_bit = 0;
29871+ if (i_size_read(file->f_dentry->d_inode) < PAGE_SIZE) {
29872+ pos = 0;
29873+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
29874+ PAGE_SIZE, &pos);
29875+ if (unlikely(err != PAGE_SIZE))
29876+ goto out_free;
29877+ }
29878+ err = 0;
29879+ goto out; /* success */
29880+
4f0767ce 29881+out_free:
1facf9fc 29882+ free_page((unsigned long)sbinfo->si_xib_buf);
b752ccd1
AM
29883+ sbinfo->si_xib_buf = NULL;
29884+ if (err >= 0)
29885+ err = -EIO;
4f0767ce 29886+out_unset:
b752ccd1
AM
29887+ fput(sbinfo->si_xib);
29888+ sbinfo->si_xib = NULL;
29889+ sbinfo->si_xread = NULL;
29890+ sbinfo->si_xwrite = NULL;
4f0767ce 29891+out:
b752ccd1 29892+ return err;
1facf9fc 29893+}
29894+
b752ccd1
AM
29895+/* xino for each branch */
29896+static void xino_clear_br(struct super_block *sb)
29897+{
29898+ aufs_bindex_t bindex, bend;
29899+ struct au_branch *br;
1facf9fc 29900+
b752ccd1
AM
29901+ bend = au_sbend(sb);
29902+ for (bindex = 0; bindex <= bend; bindex++) {
29903+ br = au_sbr(sb, bindex);
29904+ if (!br || !br->br_xino.xi_file)
29905+ continue;
29906+
29907+ fput(br->br_xino.xi_file);
29908+ br->br_xino.xi_file = NULL;
29909+ }
29910+}
29911+
29912+static int au_xino_set_br(struct super_block *sb, struct file *base)
1facf9fc 29913+{
29914+ int err;
b752ccd1
AM
29915+ ino_t ino;
29916+ aufs_bindex_t bindex, bend, bshared;
29917+ struct {
29918+ struct file *old, *new;
29919+ } *fpair, *p;
29920+ struct au_branch *br;
29921+ struct inode *inode;
29922+ au_writef_t writef;
1facf9fc 29923+
b752ccd1
AM
29924+ SiMustWriteLock(sb);
29925+
29926+ err = -ENOMEM;
29927+ bend = au_sbend(sb);
29928+ fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
29929+ if (unlikely(!fpair))
1facf9fc 29930+ goto out;
29931+
b752ccd1
AM
29932+ inode = sb->s_root->d_inode;
29933+ ino = AUFS_ROOT_INO;
29934+ writef = au_sbi(sb)->si_xwrite;
29935+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
29936+ br = au_sbr(sb, bindex);
29937+ bshared = is_sb_shared(sb, bindex, bindex - 1);
29938+ if (bshared >= 0) {
29939+ /* shared xino */
29940+ *p = fpair[bshared];
29941+ get_file(p->new);
29942+ }
29943+
29944+ if (!p->new) {
29945+ /* new xino */
29946+ p->old = br->br_xino.xi_file;
29947+ p->new = au_xino_create2(base, br->br_xino.xi_file);
29948+ err = PTR_ERR(p->new);
29949+ if (IS_ERR(p->new)) {
29950+ p->new = NULL;
29951+ goto out_pair;
29952+ }
29953+ }
29954+
29955+ err = au_xino_do_write(writef, p->new,
29956+ au_h_iptr(inode, bindex)->i_ino, ino);
29957+ if (unlikely(err))
29958+ goto out_pair;
29959+ }
29960+
29961+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
29962+ br = au_sbr(sb, bindex);
29963+ if (br->br_xino.xi_file)
29964+ fput(br->br_xino.xi_file);
29965+ get_file(p->new);
29966+ br->br_xino.xi_file = p->new;
29967+ }
1facf9fc 29968+
4f0767ce 29969+out_pair:
b752ccd1
AM
29970+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
29971+ if (p->new)
29972+ fput(p->new);
29973+ else
29974+ break;
29975+ kfree(fpair);
4f0767ce 29976+out:
1facf9fc 29977+ return err;
29978+}
b752ccd1
AM
29979+
29980+void au_xino_clr(struct super_block *sb)
29981+{
29982+ struct au_sbinfo *sbinfo;
29983+
29984+ au_xigen_clr(sb);
29985+ xino_clear_xib(sb);
29986+ xino_clear_br(sb);
29987+ sbinfo = au_sbi(sb);
29988+ /* lvalue, do not call au_mntflags() */
29989+ au_opt_clr(sbinfo->si_mntflags, XINO);
29990+}
29991+
29992+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
29993+{
29994+ int err, skip;
29995+ struct dentry *parent, *cur_parent;
29996+ struct qstr *dname, *cur_name;
29997+ struct file *cur_xino;
29998+ struct inode *dir;
29999+ struct au_sbinfo *sbinfo;
30000+
30001+ SiMustWriteLock(sb);
30002+
30003+ err = 0;
30004+ sbinfo = au_sbi(sb);
30005+ parent = dget_parent(xino->file->f_dentry);
30006+ if (remount) {
30007+ skip = 0;
30008+ dname = &xino->file->f_dentry->d_name;
30009+ cur_xino = sbinfo->si_xib;
30010+ if (cur_xino) {
30011+ cur_parent = dget_parent(cur_xino->f_dentry);
30012+ cur_name = &cur_xino->f_dentry->d_name;
30013+ skip = (cur_parent == parent
30014+ && dname->len == cur_name->len
30015+ && !memcmp(dname->name, cur_name->name,
30016+ dname->len));
30017+ dput(cur_parent);
30018+ }
30019+ if (skip)
30020+ goto out;
30021+ }
30022+
30023+ au_opt_set(sbinfo->si_mntflags, XINO);
30024+ dir = parent->d_inode;
30025+ mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
30026+ /* mnt_want_write() is unnecessary here */
30027+ err = au_xino_set_xib(sb, xino->file);
30028+ if (!err)
30029+ err = au_xigen_set(sb, xino->file);
30030+ if (!err)
30031+ err = au_xino_set_br(sb, xino->file);
30032+ mutex_unlock(&dir->i_mutex);
30033+ if (!err)
30034+ goto out; /* success */
30035+
30036+ /* reset all */
30037+ AuIOErr("failed creating xino(%d).\n", err);
30038+
4f0767ce 30039+out:
b752ccd1
AM
30040+ dput(parent);
30041+ return err;
30042+}
30043+
30044+/* ---------------------------------------------------------------------- */
30045+
30046+/*
30047+ * create a xinofile at the default place/path.
30048+ */
30049+struct file *au_xino_def(struct super_block *sb)
30050+{
30051+ struct file *file;
30052+ char *page, *p;
30053+ struct au_branch *br;
30054+ struct super_block *h_sb;
30055+ struct path path;
30056+ aufs_bindex_t bend, bindex, bwr;
30057+
30058+ br = NULL;
30059+ bend = au_sbend(sb);
30060+ bwr = -1;
30061+ for (bindex = 0; bindex <= bend; bindex++) {
30062+ br = au_sbr(sb, bindex);
30063+ if (au_br_writable(br->br_perm)
30064+ && !au_test_fs_bad_xino(br->br_mnt->mnt_sb)) {
30065+ bwr = bindex;
30066+ break;
30067+ }
30068+ }
30069+
7f207e10
AM
30070+ if (bwr >= 0) {
30071+ file = ERR_PTR(-ENOMEM);
30072+ page = __getname_gfp(GFP_NOFS);
30073+ if (unlikely(!page))
30074+ goto out;
30075+ path.mnt = br->br_mnt;
30076+ path.dentry = au_h_dptr(sb->s_root, bwr);
30077+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
30078+ file = (void *)p;
30079+ if (!IS_ERR(p)) {
30080+ strcat(p, "/" AUFS_XINO_FNAME);
30081+ AuDbg("%s\n", p);
30082+ file = au_xino_create(sb, p, /*silent*/0);
30083+ if (!IS_ERR(file))
30084+ au_xino_brid_set(sb, br->br_id);
30085+ }
30086+ __putname(page);
30087+ } else {
30088+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
30089+ if (IS_ERR(file))
30090+ goto out;
30091+ h_sb = file->f_dentry->d_sb;
30092+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
30093+ pr_err("xino doesn't support %s(%s)\n",
30094+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
30095+ fput(file);
30096+ file = ERR_PTR(-EINVAL);
30097+ }
30098+ if (!IS_ERR(file))
30099+ au_xino_brid_set(sb, -1);
30100+ }
0c5527e5 30101+
7f207e10
AM
30102+out:
30103+ return file;
30104+}
30105+
30106+/* ---------------------------------------------------------------------- */
30107+
30108+int au_xino_path(struct seq_file *seq, struct file *file)
30109+{
30110+ int err;
30111+
30112+ err = au_seq_path(seq, &file->f_path);
30113+ if (unlikely(err < 0))
30114+ goto out;
30115+
30116+ err = 0;
30117+#define Deleted "\\040(deleted)"
30118+ seq->count -= sizeof(Deleted) - 1;
30119+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
30120+ sizeof(Deleted) - 1));
30121+#undef Deleted
30122+
30123+out:
30124+ return err;
30125+}
30126diff -urN /usr/share/empty/include/linux/aufs_type.h linux/include/linux/aufs_type.h
30127--- /usr/share/empty/include/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
30128+++ linux/include/linux/aufs_type.h 2011-08-24 13:30:24.737979976 +0200
30129@@ -0,0 +1,211 @@
7f207e10 30130+/*
027c5e7a 30131+ * Copyright (C) 2005-2011 Junjiro R. Okajima
7f207e10
AM
30132+ *
30133+ * This program, aufs is free software; you can redistribute it and/or modify
30134+ * it under the terms of the GNU General Public License as published by
30135+ * the Free Software Foundation; either version 2 of the License, or
30136+ * (at your option) any later version.
30137+ *
30138+ * This program is distributed in the hope that it will be useful,
30139+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30140+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30141+ * GNU General Public License for more details.
30142+ *
30143+ * You should have received a copy of the GNU General Public License
30144+ * along with this program; if not, write to the Free Software
30145+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30146+ */
30147+
30148+#ifndef __AUFS_TYPE_H__
30149+#define __AUFS_TYPE_H__
30150+
30151+#include <linux/ioctl.h>
30152+#include <linux/kernel.h>
30153+#include <linux/limits.h>
30154+#include <linux/types.h>
30155+
53392da6 30156+#define AUFS_VERSION "3.0-20110822"
7f207e10
AM
30157+
30158+/* todo? move this to linux-2.6.19/include/magic.h */
30159+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
30160+
30161+/* ---------------------------------------------------------------------- */
30162+
30163+#ifdef CONFIG_AUFS_BRANCH_MAX_127
30164+typedef __s8 aufs_bindex_t;
30165+#define AUFS_BRANCH_MAX 127
30166+#else
30167+typedef __s16 aufs_bindex_t;
30168+#ifdef CONFIG_AUFS_BRANCH_MAX_511
30169+#define AUFS_BRANCH_MAX 511
30170+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
30171+#define AUFS_BRANCH_MAX 1023
30172+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
30173+#define AUFS_BRANCH_MAX 32767
30174+#endif
30175+#endif
30176+
30177+#ifdef __KERNEL__
30178+#ifndef AUFS_BRANCH_MAX
30179+#error unknown CONFIG_AUFS_BRANCH_MAX value
30180+#endif
30181+#endif /* __KERNEL__ */
30182+
30183+/* ---------------------------------------------------------------------- */
30184+
30185+#define AUFS_NAME "aufs"
30186+#define AUFS_FSTYPE AUFS_NAME
30187+
30188+#define AUFS_ROOT_INO 2
30189+#define AUFS_FIRST_INO 11
30190+
30191+#define AUFS_WH_PFX ".wh."
30192+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
30193+#define AUFS_WH_TMP_LEN 4
30194+/* a limit for rmdir/rename a dir */
30195+#define AUFS_MAX_NAMELEN (NAME_MAX \
30196+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
30197+ - 1 /* dot */\
30198+ - AUFS_WH_TMP_LEN) /* hex */
30199+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
30200+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
30201+#define AUFS_XINO_TRUNC_INIT 64 /* blocks */
30202+#define AUFS_XINO_TRUNC_STEP 4 /* blocks */
30203+#define AUFS_DIRWH_DEF 3
30204+#define AUFS_RDCACHE_DEF 10 /* seconds */
027c5e7a 30205+#define AUFS_RDCACHE_MAX 3600 /* seconds */
7f207e10
AM
30206+#define AUFS_RDBLK_DEF 512 /* bytes */
30207+#define AUFS_RDHASH_DEF 32
30208+#define AUFS_WKQ_NAME AUFS_NAME "d"
30209+#define AUFS_WKQ_PRE_NAME AUFS_WKQ_NAME "_pre"
027c5e7a
AM
30210+#define AUFS_MFS_DEF_SEC 30 /* seconds */
30211+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
7f207e10
AM
30212+#define AUFS_PLINK_WARN 100 /* number of plinks */
30213+
30214+/* pseudo-link maintenace under /proc */
30215+#define AUFS_PLINK_MAINT_NAME "plink_maint"
30216+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
30217+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
30218+
30219+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
30220+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
30221+
30222+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
30223+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
30224+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
30225+
30226+/* doubly whiteouted */
30227+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
30228+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
30229+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
30230+
30231+/* branch permission */
30232+#define AUFS_BRPERM_RW "rw"
30233+#define AUFS_BRPERM_RO "ro"
30234+#define AUFS_BRPERM_RR "rr"
30235+#define AUFS_BRPERM_WH "wh"
30236+#define AUFS_BRPERM_NLWH "nolwh"
30237+#define AUFS_BRPERM_ROWH AUFS_BRPERM_RO "+" AUFS_BRPERM_WH
30238+#define AUFS_BRPERM_RRWH AUFS_BRPERM_RR "+" AUFS_BRPERM_WH
30239+#define AUFS_BRPERM_RWNLWH AUFS_BRPERM_RW "+" AUFS_BRPERM_NLWH
30240+
30241+/* ---------------------------------------------------------------------- */
30242+
30243+/* ioctl */
30244+enum {
30245+ /* readdir in userspace */
30246+ AuCtl_RDU,
30247+ AuCtl_RDU_INO,
30248+
30249+ /* pathconf wrapper */
027c5e7a
AM
30250+ AuCtl_WBR_FD,
30251+
30252+ /* busy inode */
30253+ AuCtl_IBUSY
7f207e10
AM
30254+};
30255+
30256+/* borrowed from linux/include/linux/kernel.h */
30257+#ifndef ALIGN
30258+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
30259+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
30260+#endif
30261+
30262+/* borrowed from linux/include/linux/compiler-gcc3.h */
30263+#ifndef __aligned
30264+#define __aligned(x) __attribute__((aligned(x)))
53392da6
AM
30265+#endif
30266+
30267+#ifdef __KERNEL__
30268+#ifndef __packed
7f207e10
AM
30269+#define __packed __attribute__((packed))
30270+#endif
53392da6 30271+#endif
7f207e10
AM
30272+
30273+struct au_rdu_cookie {
30274+ __u64 h_pos;
30275+ __s16 bindex;
30276+ __u8 flags;
30277+ __u8 pad;
30278+ __u32 generation;
30279+} __aligned(8);
30280+
30281+struct au_rdu_ent {
30282+ __u64 ino;
30283+ __s16 bindex;
30284+ __u8 type;
30285+ __u8 nlen;
30286+ __u8 wh;
30287+ char name[0];
30288+} __aligned(8);
30289+
30290+static inline int au_rdu_len(int nlen)
30291+{
30292+ /* include the terminating NULL */
30293+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
30294+ sizeof(__u64));
30295+}
30296+
30297+union au_rdu_ent_ul {
30298+ struct au_rdu_ent __user *e;
30299+ __u64 ul;
30300+};
30301+
30302+enum {
30303+ AufsCtlRduV_SZ,
30304+ AufsCtlRduV_End
30305+};
30306+
30307+struct aufs_rdu {
30308+ /* input */
30309+ union {
30310+ __u64 sz; /* AuCtl_RDU */
30311+ __u64 nent; /* AuCtl_RDU_INO */
30312+ };
30313+ union au_rdu_ent_ul ent;
30314+ __u16 verify[AufsCtlRduV_End];
30315+
30316+ /* input/output */
30317+ __u32 blk;
30318+
30319+ /* output */
30320+ union au_rdu_ent_ul tail;
30321+ /* number of entries which were added in a single call */
30322+ __u64 rent;
30323+ __u8 full;
30324+ __u8 shwh;
30325+
30326+ struct au_rdu_cookie cookie;
30327+} __aligned(8);
30328+
027c5e7a
AM
30329+struct aufs_ibusy {
30330+ __u64 ino, h_ino;
30331+ __s16 bindex;
30332+} __aligned(8);
30333+
7f207e10
AM
30334+#define AuCtlType 'A'
30335+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
30336+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
30337+#define AUFS_CTL_WBR_FD _IO(AuCtlType, AuCtl_WBR_FD)
027c5e7a 30338+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
7f207e10
AM
30339+
30340+#endif /* __AUFS_TYPE_H__ */
87a755f4 30341
This page took 4.184979 seconds and 4 git commands to generate.