]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs3.patch
- update for 3.2
[packages/kernel.git] / kernel-aufs3.patch
CommitLineData
1e00d052 1aufs3.x-rcN kbuild patch
7f207e10
AM
2
3diff --git a/fs/Kconfig b/fs/Kconfig
1e00d052 4index 9fe0b34..c4311f8 100644
7f207e10
AM
5--- a/fs/Kconfig
6+++ b/fs/Kconfig
1e00d052 7@@ -215,6 +215,7 @@ source "fs/pstore/Kconfig"
7f207e10
AM
8 source "fs/sysv/Kconfig"
9 source "fs/ufs/Kconfig"
10 source "fs/exofs/Kconfig"
11+source "fs/aufs/Kconfig"
12
13 endif # MISC_FILESYSTEMS
14
15diff --git a/fs/Makefile b/fs/Makefile
1e00d052 16index afc1096..5c5ac76 100644
7f207e10
AM
17--- a/fs/Makefile
18+++ b/fs/Makefile
1e00d052 19@@ -123,3 +123,4 @@ obj-$(CONFIG_GFS2_FS) += gfs2/
7f207e10
AM
20 obj-$(CONFIG_EXOFS_FS) += exofs/
21 obj-$(CONFIG_CEPH_FS) += ceph/
bf0370f2 22 obj-$(CONFIG_PSTORE) += pstore/
2cbb1c4b 23+obj-$(CONFIG_AUFS_FS) += aufs/
7f207e10 24diff --git a/include/linux/Kbuild b/include/linux/Kbuild
1e00d052 25index 619b565..29f386b 100644
7f207e10
AM
26--- a/include/linux/Kbuild
27+++ b/include/linux/Kbuild
2cbb1c4b 28@@ -65,6 +65,7 @@ header-y += atmppp.h
7f207e10
AM
29 header-y += atmsap.h
30 header-y += atmsvc.h
31 header-y += audit.h
32+header-y += aufs_type.h
33 header-y += auto_fs.h
34 header-y += auto_fs4.h
35 header-y += auxvec.h
1e00d052 36aufs3.x-rcN base patch
7f207e10
AM
37
38diff --git a/fs/namei.c b/fs/namei.c
1e00d052 39index 0b3138d..8edad02 100644
7f207e10
AM
40--- a/fs/namei.c
41+++ b/fs/namei.c
1e00d052 42@@ -1748,7 +1748,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
7f207e10
AM
43 * needs parent already locked. Doesn't follow mounts.
44 * SMP-safe.
45 */
46-static struct dentry *lookup_hash(struct nameidata *nd)
47+struct dentry *lookup_hash(struct nameidata *nd)
48 {
7f207e10
AM
49 return __lookup_hash(&nd->last, nd->path.dentry, nd);
50 }
7f207e10 51diff --git a/fs/splice.c b/fs/splice.c
1e00d052 52index fa2defa..e3569b0 100644
7f207e10
AM
53--- a/fs/splice.c
54+++ b/fs/splice.c
2cbb1c4b 55@@ -1085,8 +1085,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
7f207e10
AM
56 /*
57 * Attempt to initiate a splice from pipe to file.
58 */
59-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
60- loff_t *ppos, size_t len, unsigned int flags)
61+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
62+ loff_t *ppos, size_t len, unsigned int flags)
63 {
64 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
65 loff_t *, size_t, unsigned int);
2cbb1c4b 66@@ -1113,9 +1113,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
67 /*
68 * Attempt to initiate a splice from a file to a pipe.
69 */
70-static long do_splice_to(struct file *in, loff_t *ppos,
71- struct pipe_inode_info *pipe, size_t len,
72- unsigned int flags)
73+long do_splice_to(struct file *in, loff_t *ppos,
74+ struct pipe_inode_info *pipe, size_t len,
75+ unsigned int flags)
76 {
77 ssize_t (*splice_read)(struct file *, loff_t *,
78 struct pipe_inode_info *, size_t, unsigned int);
79diff --git a/include/linux/namei.h b/include/linux/namei.h
1e00d052 80index 409328d..40afdc0 100644
7f207e10
AM
81--- a/include/linux/namei.h
82+++ b/include/linux/namei.h
1e00d052 83@@ -84,6 +84,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
7f207e10
AM
84 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
85 int (*open)(struct inode *, struct file *));
86
87+extern struct dentry *lookup_hash(struct nameidata *nd);
7f207e10
AM
88 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
89
027c5e7a 90 extern int follow_down_one(struct path *);
1e00d052
AM
91diff --git a/include/linux/splice.h b/include/linux/splice.h
92index 26e5b61..3ffef2f 100644
93--- a/include/linux/splice.h
94+++ b/include/linux/splice.h
95@@ -91,4 +91,10 @@ extern void splice_shrink_spd(struct pipe_inode_info *,
4b3da204
AM
96 extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
97
98 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
1e00d052
AM
99+
100+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
101+ loff_t *ppos, size_t len, unsigned int flags);
102+extern long do_splice_to(struct file *in, loff_t *ppos,
103+ struct pipe_inode_info *pipe, size_t len,
104+ unsigned int flags);
105 #endif
106aufs3.x-rcN standalone patch
7f207e10
AM
107
108diff --git a/fs/file_table.c b/fs/file_table.c
1e00d052 109index c322794..2aad244 100644
7f207e10
AM
110--- a/fs/file_table.c
111+++ b/fs/file_table.c
2cbb1c4b 112@@ -443,6 +443,8 @@ void file_sb_list_del(struct file *file)
7f207e10
AM
113 }
114 }
115
116+EXPORT_SYMBOL(file_sb_list_del);
1facf9fc 117+
7f207e10
AM
118 #ifdef CONFIG_SMP
119
120 /*
1e00d052
AM
121diff --git a/fs/inode.c b/fs/inode.c
122index ec79246..46ac6f9 100644
123--- a/fs/inode.c
124+++ b/fs/inode.c
125@@ -65,6 +65,7 @@ static struct hlist_head *inode_hashtable __read_mostly;
4b3da204 126 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
2cbb1c4b
JR
127
128 __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
2cbb1c4b 129+EXPORT_SYMBOL(inode_sb_list_lock);
7f207e10
AM
130
131 /*
4b3da204 132 * Empty aops. Can be used for the cases where the user does not
7f207e10 133diff --git a/fs/namei.c b/fs/namei.c
1e00d052 134index 8edad02..50e8718 100644
7f207e10
AM
135--- a/fs/namei.c
136+++ b/fs/namei.c
1e00d052 137@@ -1752,6 +1752,7 @@ struct dentry *lookup_hash(struct nameidata *nd)
027c5e7a 138 {
7f207e10
AM
139 return __lookup_hash(&nd->last, nd->path.dentry, nd);
140 }
141+EXPORT_SYMBOL(lookup_hash);
142
7f207e10
AM
143 /**
144 * lookup_one_len - filesystem helper to lookup single pathname component
145diff --git a/fs/namespace.c b/fs/namespace.c
1e00d052 146index b4febb2..598a308 100644
7f207e10
AM
147--- a/fs/namespace.c
148+++ b/fs/namespace.c
2cbb1c4b 149@@ -1508,6 +1508,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
7f207e10
AM
150 }
151 return 0;
152 }
153+EXPORT_SYMBOL(iterate_mounts);
154
155 static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
156 {
157diff --git a/fs/notify/group.c b/fs/notify/group.c
1e00d052 158index 63fc294..6f4adca 100644
7f207e10
AM
159--- a/fs/notify/group.c
160+++ b/fs/notify/group.c
161@@ -22,6 +22,7 @@
162 #include <linux/srcu.h>
163 #include <linux/rculist.h>
164 #include <linux/wait.h>
165+#include <linux/module.h>
166
167 #include <linux/fsnotify_backend.h>
168 #include "fsnotify.h"
169@@ -70,6 +71,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
170 if (atomic_dec_and_test(&group->refcnt))
171 fsnotify_destroy_group(group);
172 }
173+EXPORT_SYMBOL(fsnotify_put_group);
174
175 /*
176 * Create a new fsnotify_group and hold a reference for the group returned.
177@@ -102,3 +104,4 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
178
179 return group;
180 }
181+EXPORT_SYMBOL(fsnotify_alloc_group);
182diff --git a/fs/notify/mark.c b/fs/notify/mark.c
1e00d052 183index e14587d..be6533b 100644
7f207e10
AM
184--- a/fs/notify/mark.c
185+++ b/fs/notify/mark.c
2cbb1c4b 186@@ -112,6 +112,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
7f207e10
AM
187 if (atomic_dec_and_test(&mark->refcnt))
188 mark->free_mark(mark);
189 }
190+EXPORT_SYMBOL(fsnotify_put_mark);
191
192 /*
193 * Any time a mark is getting freed we end up here.
2cbb1c4b 194@@ -189,6 +190,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
7f207e10
AM
195 if (unlikely(atomic_dec_and_test(&group->num_marks)))
196 fsnotify_final_destroy_group(group);
197 }
198+EXPORT_SYMBOL(fsnotify_destroy_mark);
199
200 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
201 {
2cbb1c4b 202@@ -276,6 +278,7 @@ err:
7f207e10
AM
203
204 return ret;
205 }
206+EXPORT_SYMBOL(fsnotify_add_mark);
207
208 /*
209 * clear any marks in a group in which mark->flags & flags is true
2cbb1c4b 210@@ -331,6 +334,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
7f207e10
AM
211 atomic_set(&mark->refcnt, 1);
212 mark->free_mark = free_mark;
213 }
214+EXPORT_SYMBOL(fsnotify_init_mark);
215
216 static int fsnotify_mark_destroy(void *ignored)
217 {
218diff --git a/fs/open.c b/fs/open.c
1e00d052 219index f711921..d742fc0 100644
7f207e10
AM
220--- a/fs/open.c
221+++ b/fs/open.c
222@@ -60,6 +60,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
223 mutex_unlock(&dentry->d_inode->i_mutex);
224 return ret;
225 }
226+EXPORT_SYMBOL(do_truncate);
227
228 static long do_sys_truncate(const char __user *pathname, loff_t length)
229 {
230diff --git a/fs/splice.c b/fs/splice.c
1e00d052 231index e3569b0..9dc07b7 100644
7f207e10
AM
232--- a/fs/splice.c
233+++ b/fs/splice.c
2cbb1c4b 234@@ -1109,6 +1109,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
235
236 return splice_write(pipe, out, ppos, len, flags);
237 }
238+EXPORT_SYMBOL(do_splice_from);
239
240 /*
241 * Attempt to initiate a splice from a file to a pipe.
2cbb1c4b 242@@ -1135,6 +1136,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
7f207e10
AM
243
244 return splice_read(in, ppos, pipe, len, flags);
245 }
246+EXPORT_SYMBOL(do_splice_to);
247
248 /**
249 * splice_direct_to_actor - splices data directly between two non-pipes
250diff --git a/security/commoncap.c b/security/commoncap.c
2cbb1c4b 251index a93b3b7..024282c 100644
7f207e10
AM
252--- a/security/commoncap.c
253+++ b/security/commoncap.c
1e00d052 254@@ -971,3 +971,4 @@ int cap_file_mmap(struct file *file, unsigned long reqprot,
94337f0d 255 }
7f207e10
AM
256 return ret;
257 }
258+EXPORT_SYMBOL(cap_file_mmap);
259diff --git a/security/device_cgroup.c b/security/device_cgroup.c
1e00d052 260index 4450fbe..2c437e5 100644
7f207e10
AM
261--- a/security/device_cgroup.c
262+++ b/security/device_cgroup.c
1e00d052 263@@ -500,6 +500,7 @@ found:
7f207e10
AM
264
265 return -EPERM;
266 }
2cbb1c4b 267+EXPORT_SYMBOL(__devcgroup_inode_permission);
7f207e10
AM
268
269 int devcgroup_inode_mknod(int mode, dev_t dev)
270 {
271diff --git a/security/security.c b/security/security.c
1e00d052 272index d9e1533..466ee5c 100644
7f207e10
AM
273--- a/security/security.c
274+++ b/security/security.c
2cbb1c4b 275@@ -373,6 +373,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry)
7f207e10
AM
276 return 0;
277 return security_ops->path_rmdir(dir, dentry);
278 }
279+EXPORT_SYMBOL(security_path_rmdir);
280
281 int security_path_unlink(struct path *dir, struct dentry *dentry)
282 {
2cbb1c4b 283@@ -389,6 +390,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
7f207e10
AM
284 return 0;
285 return security_ops->path_symlink(dir, dentry, old_name);
286 }
287+EXPORT_SYMBOL(security_path_symlink);
288
289 int security_path_link(struct dentry *old_dentry, struct path *new_dir,
290 struct dentry *new_dentry)
2cbb1c4b 291@@ -397,6 +399,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
7f207e10
AM
292 return 0;
293 return security_ops->path_link(old_dentry, new_dir, new_dentry);
294 }
295+EXPORT_SYMBOL(security_path_link);
296
297 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
298 struct path *new_dir, struct dentry *new_dentry)
2cbb1c4b 299@@ -415,6 +418,7 @@ int security_path_truncate(struct path *path)
7f207e10
AM
300 return 0;
301 return security_ops->path_truncate(path);
302 }
303+EXPORT_SYMBOL(security_path_truncate);
304
305 int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
306 mode_t mode)
2cbb1c4b 307@@ -423,6 +427,7 @@ int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
7f207e10
AM
308 return 0;
309 return security_ops->path_chmod(dentry, mnt, mode);
310 }
311+EXPORT_SYMBOL(security_path_chmod);
312
313 int security_path_chown(struct path *path, uid_t uid, gid_t gid)
314 {
2cbb1c4b 315@@ -430,6 +435,7 @@ int security_path_chown(struct path *path, uid_t uid, gid_t gid)
7f207e10
AM
316 return 0;
317 return security_ops->path_chown(path, uid, gid);
318 }
319+EXPORT_SYMBOL(security_path_chown);
320
321 int security_path_chroot(struct path *path)
322 {
2cbb1c4b 323@@ -506,6 +512,7 @@ int security_inode_readlink(struct dentry *dentry)
7f207e10
AM
324 return 0;
325 return security_ops->inode_readlink(dentry);
326 }
327+EXPORT_SYMBOL(security_inode_readlink);
328
329 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
330 {
2cbb1c4b 331@@ -520,6 +527,7 @@ int security_inode_permission(struct inode *inode, int mask)
7f207e10 332 return 0;
1e00d052 333 return security_ops->inode_permission(inode, mask);
7f207e10
AM
334 }
335+EXPORT_SYMBOL(security_inode_permission);
336
1e00d052 337 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
7f207e10 338 {
1e00d052 339@@ -619,6 +627,7 @@ int security_file_permission(struct file *file, int mask)
7f207e10
AM
340
341 return fsnotify_perm(file, mask);
342 }
343+EXPORT_SYMBOL(security_file_permission);
344
345 int security_file_alloc(struct file *file)
346 {
1e00d052 347@@ -646,6 +655,7 @@ int security_file_mmap(struct file *file, unsigned long reqprot,
7f207e10
AM
348 return ret;
349 return ima_file_mmap(file, prot);
350 }
351+EXPORT_SYMBOL(security_file_mmap);
352
353 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
354 unsigned long prot)
355diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
356--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
53392da6 357+++ linux/Documentation/ABI/testing/debugfs-aufs 2011-08-24 13:30:24.727980364 +0200
7f207e10
AM
358@@ -0,0 +1,37 @@
359+What: /debug/aufs/si_<id>/
360+Date: March 2009
361+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
362+Description:
363+ Under /debug/aufs, a directory named si_<id> is created
364+ per aufs mount, where <id> is a unique id generated
365+ internally.
1facf9fc 366+
7f207e10
AM
367+What: /debug/aufs/si_<id>/xib
368+Date: March 2009
369+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
370+Description:
371+ It shows the consumed blocks by xib (External Inode Number
372+ Bitmap), its block size and file size.
373+ When the aufs mount option 'noxino' is specified, it
374+ will be empty. About XINO files, see the aufs manual.
375+
376+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
377+Date: March 2009
378+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
379+Description:
380+ It shows the consumed blocks by xino (External Inode Number
381+ Translation Table), its link count, block size and file
382+ size.
383+ When the aufs mount option 'noxino' is specified, it
384+ will be empty. About XINO files, see the aufs manual.
385+
386+What: /debug/aufs/si_<id>/xigen
387+Date: March 2009
388+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
389+Description:
390+ It shows the consumed blocks by xigen (External Inode
391+ Generation Table), its block size and file size.
392+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
393+ be created.
394+ When the aufs mount option 'noxino' is specified, it
395+ will be empty. About XINO files, see the aufs manual.
396diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
397--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
53392da6 398+++ linux/Documentation/ABI/testing/sysfs-aufs 2011-08-24 13:30:24.727980364 +0200
7f207e10
AM
399@@ -0,0 +1,24 @@
400+What: /sys/fs/aufs/si_<id>/
401+Date: March 2009
402+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
403+Description:
404+ Under /sys/fs/aufs, a directory named si_<id> is created
405+ per aufs mount, where <id> is a unique id generated
406+ internally.
407+
408+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
409+Date: March 2009
410+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
411+Description:
412+ It shows the abolute path of a member directory (which
413+ is called branch) in aufs, and its permission.
414+
415+What: /sys/fs/aufs/si_<id>/xi_path
416+Date: March 2009
417+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
418+Description:
419+ It shows the abolute path of XINO (External Inode Number
420+ Bitmap, Translation Table and Generation Table) file
421+ even if it is the default path.
422+ When the aufs mount option 'noxino' is specified, it
423+ will be empty. About XINO files, see the aufs manual.
53392da6
AM
424diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
425--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
426+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2011-08-24 13:30:24.727980364 +0200
427@@ -0,0 +1,162 @@
428+
429+# Copyright (C) 2005-2011 Junjiro R. Okajima
430+#
431+# This program is free software; you can redistribute it and/or modify
432+# it under the terms of the GNU General Public License as published by
433+# the Free Software Foundation; either version 2 of the License, or
434+# (at your option) any later version.
435+#
436+# This program is distributed in the hope that it will be useful,
437+# but WITHOUT ANY WARRANTY; without even the implied warranty of
438+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
439+# GNU General Public License for more details.
440+#
441+# You should have received a copy of the GNU General Public License
442+# along with this program; if not, write to the Free Software
443+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
444+
445+Introduction
446+----------------------------------------
447+
448+aufs [ei ju: ef es] | [a u f s]
449+1. abbrev. for "advanced multi-layered unification filesystem".
450+2. abbrev. for "another unionfs".
451+3. abbrev. for "auf das" in German which means "on the" in English.
452+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
453+ But "Filesystem aufs Filesystem" is hard to understand.
454+
455+AUFS is a filesystem with features:
456+- multi layered stackable unification filesystem, the member directory
457+ is called as a branch.
458+- branch permission and attribute, 'readonly', 'real-readonly',
459+ 'readwrite', 'whiteout-able', 'link-able whiteout' and their
460+ combination.
461+- internal "file copy-on-write".
462+- logical deletion, whiteout.
463+- dynamic branch manipulation, adding, deleting and changing permission.
464+- allow bypassing aufs, user's direct branch access.
465+- external inode number translation table and bitmap which maintains the
466+ persistent aufs inode number.
467+- seekable directory, including NFS readdir.
468+- file mapping, mmap and sharing pages.
469+- pseudo-link, hardlink over branches.
470+- loopback mounted filesystem as a branch.
471+- several policies to select one among multiple writable branches.
472+- revert a single systemcall when an error occurs in aufs.
473+- and more...
474+
475+
476+Multi Layered Stackable Unification Filesystem
477+----------------------------------------------------------------------
478+Most people already knows what it is.
479+It is a filesystem which unifies several directories and provides a
480+merged single directory. When users access a file, the access will be
481+passed/re-directed/converted (sorry, I am not sure which English word is
482+correct) to the real file on the member filesystem. The member
483+filesystem is called 'lower filesystem' or 'branch' and has a mode
484+'readonly' and 'readwrite.' And the deletion for a file on the lower
485+readonly branch is handled by creating 'whiteout' on the upper writable
486+branch.
487+
488+On LKML, there have been discussions about UnionMount (Jan Blunck,
489+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
490+different approaches to implement the merged-view.
491+The former tries putting it into VFS, and the latter implements as a
492+separate filesystem.
493+(If I misunderstand about these implementations, please let me know and
494+I shall correct it. Because it is a long time ago when I read their
495+source files last time).
496+
497+UnionMount's approach will be able to small, but may be hard to share
498+branches between several UnionMount since the whiteout in it is
499+implemented in the inode on branch filesystem and always
500+shared. According to Bharata's post, readdir does not seems to be
501+finished yet.
502+There are several missing features known in this implementations such as
503+- for users, the inode number may change silently. eg. copy-up.
504+- link(2) may break by copy-up.
505+- read(2) may get an obsoleted filedata (fstat(2) too).
506+- fcntl(F_SETLK) may be broken by copy-up.
507+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
508+ open(O_RDWR).
509+
510+Unionfs has a longer history. When I started implementing a stacking filesystem
511+(Aug 2005), it already existed. It has virtual super_block, inode,
512+dentry and file objects and they have an array pointing lower same kind
513+objects. After contributing many patches for Unionfs, I re-started my
514+project AUFS (Jun 2006).
515+
516+In AUFS, the structure of filesystem resembles to Unionfs, but I
517+implemented my own ideas, approaches and enhancements and it became
518+totally different one.
519+
520+Comparing DM snapshot and fs based implementation
521+- the number of bytes to be copied between devices is much smaller.
522+- the type of filesystem must be one and only.
523+- the fs must be writable, no readonly fs, even for the lower original
524+ device. so the compression fs will not be usable. but if we use
525+ loopback mount, we may address this issue.
526+ for instance,
527+ mount /cdrom/squashfs.img /sq
528+ losetup /sq/ext2.img
529+ losetup /somewhere/cow
530+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
531+- it will be difficult (or needs more operations) to extract the
532+ difference between the original device and COW.
533+- DM snapshot-merge may help a lot when users try merging. in the
534+ fs-layer union, users will use rsync(1).
535+
536+
537+Several characters/aspects of aufs
538+----------------------------------------------------------------------
539+
540+Aufs has several characters or aspects.
541+1. a filesystem, callee of VFS helper
542+2. sub-VFS, caller of VFS helper for branches
543+3. a virtual filesystem which maintains persistent inode number
544+4. reader/writer of files on branches such like an application
545+
546+1. Callee of VFS Helper
547+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
548+unlink(2) from an application reaches sys_unlink() kernel function and
549+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
550+calls filesystem specific unlink operation. Actually aufs implements the
551+unlink operation but it behaves like a redirector.
552+
553+2. Caller of VFS Helper for Branches
554+aufs_unlink() passes the unlink request to the branch filesystem as if
555+it were called from VFS. So the called unlink operation of the branch
556+filesystem acts as usual. As a caller of VFS helper, aufs should handle
557+every necessary pre/post operation for the branch filesystem.
558+- acquire the lock for the parent dir on a branch
559+- lookup in a branch
560+- revalidate dentry on a branch
561+- mnt_want_write() for a branch
562+- vfs_unlink() for a branch
563+- mnt_drop_write() for a branch
564+- release the lock on a branch
565+
566+3. Persistent Inode Number
567+One of the most important issue for a filesystem is to maintain inode
568+numbers. This is particularly important to support exporting a
569+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
570+backend block device for its own. But some storage is necessary to
571+maintain inode number. It may be a large space and may not suit to keep
572+in memory. Aufs rents some space from its first writable branch
573+filesystem (by default) and creates file(s) on it. These files are
574+created by aufs internally and removed soon (currently) keeping opened.
575+Note: Because these files are removed, they are totally gone after
576+ unmounting aufs. It means the inode numbers are not persistent
577+ across unmount or reboot. I have a plan to make them really
578+ persistent which will be important for aufs on NFS server.
579+
580+4. Read/Write Files Internally (copy-on-write)
581+Because a branch can be readonly, when you write a file on it, aufs will
582+"copy-up" it to the upper writable branch internally. And then write the
583+originally requested thing to the file. Generally kernel doesn't
584+open/read/write file actively. In aufs, even a single write may cause a
585+internal "file copy". This behaviour is very similar to cp(1) command.
586+
587+Some people may think it is better to pass such work to user space
588+helper, instead of doing in kernel space. Actually I am still thinking
589+about it. But currently I have implemented it in kernel space.
590diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
591--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
592+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2011-08-24 13:30:24.727980364 +0200
593@@ -0,0 +1,226 @@
594+
595+# Copyright (C) 2005-2011 Junjiro R. Okajima
596+#
597+# This program is free software; you can redistribute it and/or modify
598+# it under the terms of the GNU General Public License as published by
599+# the Free Software Foundation; either version 2 of the License, or
600+# (at your option) any later version.
601+#
602+# This program is distributed in the hope that it will be useful,
603+# but WITHOUT ANY WARRANTY; without even the implied warranty of
604+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
605+# GNU General Public License for more details.
606+#
607+# You should have received a copy of the GNU General Public License
608+# along with this program; if not, write to the Free Software
609+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
610+
611+Basic Aufs Internal Structure
612+
613+Superblock/Inode/Dentry/File Objects
614+----------------------------------------------------------------------
615+As like an ordinary filesystem, aufs has its own
616+superblock/inode/dentry/file objects. All these objects have a
617+dynamically allocated array and store the same kind of pointers to the
618+lower filesystem, branch.
619+For example, when you build a union with one readwrite branch and one
620+readonly, mounted /au, /rw and /ro respectively.
621+- /au = /rw + /ro
622+- /ro/fileA exists but /rw/fileA
623+
624+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
625+pointers are stored in a aufs dentry. The array in aufs dentry will be,
626+- [0] = NULL
627+- [1] = /ro/fileA
628+
629+This style of an array is essentially same to the aufs
630+superblock/inode/dentry/file objects.
631+
632+Because aufs supports manipulating branches, ie. add/delete/change
633+dynamically, these objects has its own generation. When branches are
634+changed, the generation in aufs superblock is incremented. And a
635+generation in other object are compared when it is accessed.
636+When a generation in other objects are obsoleted, aufs refreshes the
637+internal array.
638+
639+
640+Superblock
641+----------------------------------------------------------------------
642+Additionally aufs superblock has some data for policies to select one
643+among multiple writable branches, XIB files, pseudo-links and kobject.
644+See below in detail.
645+About the policies which supports copy-down a directory, see policy.txt
646+too.
647+
648+
649+Branch and XINO(External Inode Number Translation Table)
650+----------------------------------------------------------------------
651+Every branch has its own xino (external inode number translation table)
652+file. The xino file is created and unlinked by aufs internally. When two
653+members of a union exist on the same filesystem, they share the single
654+xino file.
655+The struct of a xino file is simple, just a sequence of aufs inode
656+numbers which is indexed by the lower inode number.
657+In the above sample, assume the inode number of /ro/fileA is i111 and
658+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
659+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
660+
661+When the inode numbers are not contiguous, the xino file will be sparse
662+which has a hole in it and doesn't consume as much disk space as it
663+might appear. If your branch filesystem consumes disk space for such
664+holes, then you should specify 'xino=' option at mounting aufs.
665+
666+Also a writable branch has three kinds of "whiteout bases". All these
667+are existed when the branch is joined to aufs and the names are
668+whiteout-ed doubly, so that users will never see their names in aufs
669+hierarchy.
670+1. a regular file which will be linked to all whiteouts.
671+2. a directory to store a pseudo-link.
672+3. a directory to store an "orphan-ed" file temporary.
673+
674+1. Whiteout Base
675+ When you remove a file on a readonly branch, aufs handles it as a
676+ logical deletion and creates a whiteout on the upper writable branch
677+ as a hardlink of this file in order not to consume inode on the
678+ writable branch.
679+2. Pseudo-link Dir
680+ See below, Pseudo-link.
681+3. Step-Parent Dir
682+ When "fileC" exists on the lower readonly branch only and it is
683+ opened and removed with its parent dir, and then user writes
684+ something into it, then aufs copies-up fileC to this
685+ directory. Because there is no other dir to store fileC. After
686+ creating a file under this dir, the file is unlinked.
687+
688+Because aufs supports manipulating branches, ie. add/delete/change
689+dynamically, a branch has its own id. When the branch order changes, aufs
690+finds the new index by searching the branch id.
691+
692+
693+Pseudo-link
694+----------------------------------------------------------------------
695+Assume "fileA" exists on the lower readonly branch only and it is
696+hardlinked to "fileB" on the branch. When you write something to fileA,
697+aufs copies-up it to the upper writable branch. Additionally aufs
698+creates a hardlink under the Pseudo-link Directory of the writable
699+branch. The inode of a pseudo-link is kept in aufs super_block as a
700+simple list. If fileB is read after unlinking fileA, aufs returns
701+filedata from the pseudo-link instead of the lower readonly
702+branch. Because the pseudo-link is based upon the inode, to keep the
703+inode number by xino (see above) is important.
704+
705+All the hardlinks under the Pseudo-link Directory of the writable branch
706+should be restored in a proper location later. Aufs provides a utility
707+to do this. The userspace helpers executed at remounting and unmounting
708+aufs by default.
709+During this utility is running, it puts aufs into the pseudo-link
710+maintenance mode. In this mode, only the process which began the
711+maintenance mode (and its child processes) is allowed to operate in
712+aufs. Some other processes which are not related to the pseudo-link will
713+be allowed to run too, but the rest have to return an error or wait
714+until the maintenance mode ends. If a process already acquires an inode
715+mutex (in VFS), it has to return an error.
716+
717+
718+XIB(external inode number bitmap)
719+----------------------------------------------------------------------
720+Addition to the xino file per a branch, aufs has an external inode number
721+bitmap in a superblock object. It is also a file such like a xino file.
722+It is a simple bitmap to mark whether the aufs inode number is in-use or
723+not.
724+To reduce the file I/O, aufs prepares a single memory page to cache xib.
725+
726+Aufs implements a feature to truncate/refresh both of xino and xib to
727+reduce the number of consumed disk blocks for these files.
728+
729+
730+Virtual or Vertical Dir, and Readdir in Userspace
731+----------------------------------------------------------------------
732+In order to support multiple layers (branches), aufs readdir operation
733+constructs a virtual dir block on memory. For readdir, aufs calls
734+vfs_readdir() internally for each dir on branches, merges their entries
735+with eliminating the whiteout-ed ones, and sets it to file (dir)
736+object. So the file object has its entry list until it is closed. The
737+entry list will be updated when the file position is zero and becomes
738+old. This decision is made in aufs automatically.
739+
740+The dynamically allocated memory block for the name of entries has a
741+unit of 512 bytes (by default) and stores the names contiguously (no
742+padding). Another block for each entry is handled by kmem_cache too.
743+During building dir blocks, aufs creates hash list and judging whether
744+the entry is whiteouted by its upper branch or already listed.
745+The merged result is cached in the corresponding inode object and
746+maintained by a customizable life-time option.
747+
748+Some people may call it can be a security hole or invite DoS attack
749+since the opened and once readdir-ed dir (file object) holds its entry
750+list and becomes a pressure for system memory. But I'd say it is similar
751+to files under /proc or /sys. The virtual files in them also holds a
752+memory page (generally) while they are opened. When an idea to reduce
753+memory for them is introduced, it will be applied to aufs too.
754+For those who really hate this situation, I've developed readdir(3)
755+library which operates this merging in userspace. You just need to set
756+LD_PRELOAD environment variable, and aufs will not consume no memory in
757+kernel space for readdir(3).
758+
759+
760+Workqueue
761+----------------------------------------------------------------------
762+Aufs sometimes requires privilege access to a branch. For instance,
763+in copy-up/down operation. When a user process is going to make changes
764+to a file which exists in the lower readonly branch only, and the mode
765+of one of ancestor directories may not be writable by a user
766+process. Here aufs copy-up the file with its ancestors and they may
767+require privilege to set its owner/group/mode/etc.
768+This is a typical case of a application character of aufs (see
769+Introduction).
770+
771+Aufs uses workqueue synchronously for this case. It creates its own
772+workqueue. The workqueue is a kernel thread and has privilege. Aufs
773+passes the request to call mkdir or write (for example), and wait for
774+its completion. This approach solves a problem of a signal handler
775+simply.
776+If aufs didn't adopt the workqueue and changed the privilege of the
777+process, and if the mkdir/write call arises SIGXFSZ or other signal,
778+then the user process might gain a privilege or the generated core file
779+was owned by a superuser.
780+
781+Also aufs uses the system global workqueue ("events" kernel thread) too
782+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
783+whiteout base and etc. This is unrelated to a privilege.
784+Most of aufs operation tries acquiring a rw_semaphore for aufs
785+superblock at the beginning, at the same time waits for the completion
786+of all queued asynchronous tasks.
787+
788+
789+Whiteout
790+----------------------------------------------------------------------
791+The whiteout in aufs is very similar to Unionfs's. That is represented
792+by its filename. UnionMount takes an approach of a file mode, but I am
793+afraid several utilities (find(1) or something) will have to support it.
794+
795+Basically the whiteout represents "logical deletion" which stops aufs to
796+lookup further, but also it represents "dir is opaque" which also stop
797+lookup.
798+
799+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
800+In order to make several functions in a single systemcall to be
801+revertible, aufs adopts an approach to rename a directory to a temporary
802+unique whiteouted name.
803+For example, in rename(2) dir where the target dir already existed, aufs
804+renames the target dir to a temporary unique whiteouted name before the
805+actual rename on a branch and then handles other actions (make it opaque,
806+update the attributes, etc). If an error happens in these actions, aufs
807+simply renames the whiteouted name back and returns an error. If all are
808+succeeded, aufs registers a function to remove the whiteouted unique
809+temporary name completely and asynchronously to the system global
810+workqueue.
811+
812+
813+Copy-up
814+----------------------------------------------------------------------
815+It is a well-known feature or concept.
816+When user modifies a file on a readonly branch, aufs operate "copy-up"
817+internally and makes change to the new file on the upper writable branch.
818+When the trigger systemcall does not update the timestamps of the parent
819+dir, aufs reverts it after copy-up.
820diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
821--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
822+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2011-08-24 13:30:24.727980364 +0200
823@@ -0,0 +1,106 @@
824+
825+# Copyright (C) 2005-2011 Junjiro R. Okajima
826+#
827+# This program is free software; you can redistribute it and/or modify
828+# it under the terms of the GNU General Public License as published by
829+# the Free Software Foundation; either version 2 of the License, or
830+# (at your option) any later version.
831+#
832+# This program is distributed in the hope that it will be useful,
833+# but WITHOUT ANY WARRANTY; without even the implied warranty of
834+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
835+# GNU General Public License for more details.
836+#
837+# You should have received a copy of the GNU General Public License
838+# along with this program; if not, write to the Free Software
839+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
840+
841+Lookup in a Branch
842+----------------------------------------------------------------------
843+Since aufs has a character of sub-VFS (see Introduction), it operates
844+lookup for branches as VFS does. It may be a heavy work. Generally
845+speaking struct nameidata is a bigger structure and includes many
846+information. But almost all lookup operation in aufs is the simplest
847+case, ie. lookup only an entry directly connected to its parent. Digging
848+down the directory hierarchy is unnecessary.
849+
850+VFS has a function lookup_one_len() for that use, but it is not usable
851+for a branch filesystem which requires struct nameidata. So aufs
852+implements a simple lookup wrapper function. When a branch filesystem
853+allows NULL as nameidata, it calls lookup_one_len(). Otherwise it builds
854+a simplest nameidata and calls lookup_hash().
855+Here aufs applies "a principle in NFSD", ie. if the filesystem supports
856+NFS-export, then it has to support NULL as a nameidata parameter for
857+->create(), ->lookup() and ->d_revalidate(). So the lookup wrapper in
858+aufs tests if ->s_export_op in the branch is NULL or not.
859+
860+When a branch is a remote filesystem, aufs basically trusts its
861+->d_revalidate(), also aufs forces the hardest revalidate tests for
862+them.
863+For d_revalidate, aufs implements three levels of revalidate tests. See
864+"Revalidate Dentry and UDBA" in detail.
865+
866+
867+Loopback Mount
868+----------------------------------------------------------------------
869+Basically aufs supports any type of filesystem and block device for a
870+branch (actually there are some exceptions). But it is prohibited to add
871+a loopback mounted one whose backend file exists in a filesystem which is
872+already added to aufs. The reason is to protect aufs from a recursive
873+lookup. If it was allowed, the aufs lookup operation might re-enter a
874+lookup for the loopback mounted branch in the same context, and will
875+cause a deadlock.
876+
877+
878+Revalidate Dentry and UDBA (User's Direct Branch Access)
879+----------------------------------------------------------------------
880+Generally VFS helpers re-validate a dentry as a part of lookup.
881+0. digging down the directory hierarchy.
882+1. lock the parent dir by its i_mutex.
883+2. lookup the final (child) entry.
884+3. revalidate it.
885+4. call the actual operation (create, unlink, etc.)
886+5. unlock the parent dir
887+
888+If the filesystem implements its ->d_revalidate() (step 3), then it is
889+called. Actually aufs implements it and checks the dentry on a branch is
890+still valid.
891+But it is not enough. Because aufs has to release the lock for the
892+parent dir on a branch at the end of ->lookup() (step 2) and
893+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
894+held by VFS.
895+If the file on a branch is changed directly, eg. bypassing aufs, after
896+aufs released the lock, then the subsequent operation may cause
897+something unpleasant result.
898+
899+This situation is a result of VFS architecture, ->lookup() and
900+->d_revalidate() is separated. But I never say it is wrong. It is a good
901+design from VFS's point of view. It is just not suitable for sub-VFS
902+character in aufs.
903+
904+Aufs supports such case by three level of revalidation which is
905+selectable by user.
906+1. Simple Revalidate
907+ Addition to the native flow in VFS's, confirm the child-parent
908+ relationship on the branch just after locking the parent dir on the
909+ branch in the "actual operation" (step 4). When this validation
910+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
911+ checks the validation of the dentry on branches.
912+2. Monitor Changes Internally by Inotify/Fsnotify
913+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
914+ the dentry on the branch, and returns EBUSY if it finds different
915+ dentry.
916+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
917+ during it is in cache. When the event is notified, aufs registers a
918+ function to kernel 'events' thread by schedule_work(). And the
919+ function sets some special status to the cached aufs dentry and inode
920+ private data. If they are not cached, then aufs has nothing to
921+ do. When the same file is accessed through aufs (step 0-3) later,
922+ aufs will detect the status and refresh all necessary data.
923+ In this mode, aufs has to ignore the event which is fired by aufs
924+ itself.
925+3. No Extra Validation
926+ This is the simplest test and doesn't add any additional revalidation
927+ test, and skip therevalidatin in step 4. It is useful and improves
928+ aufs performance when system surely hide the aufs branches from user,
929+ by over-mounting something (or another method).
930diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
931--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
932+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2011-08-24 13:30:24.727980364 +0200
933@@ -0,0 +1,76 @@
934+
935+# Copyright (C) 2005-2011 Junjiro R. Okajima
936+#
937+# This program is free software; you can redistribute it and/or modify
938+# it under the terms of the GNU General Public License as published by
939+# the Free Software Foundation; either version 2 of the License, or
940+# (at your option) any later version.
941+#
942+# This program is distributed in the hope that it will be useful,
943+# but WITHOUT ANY WARRANTY; without even the implied warranty of
944+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
945+# GNU General Public License for more details.
946+#
947+# You should have received a copy of the GNU General Public License
948+# along with this program; if not, write to the Free Software
949+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
950+
951+Branch Manipulation
952+
953+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
954+and changing its permission/attribute, there are a lot of works to do.
955+
956+
957+Add a Branch
958+----------------------------------------------------------------------
959+o Confirm the adding dir exists outside of aufs, including loopback
960+ mount.
961+- and other various attributes...
962+o Initialize the xino file and whiteout bases if necessary.
963+ See struct.txt.
964+
965+o Check the owner/group/mode of the directory
966+ When the owner/group/mode of the adding directory differs from the
967+ existing branch, aufs issues a warning because it may impose a
968+ security risk.
969+ For example, when a upper writable branch has a world writable empty
970+ top directory, a malicious user can create any files on the writable
971+ branch directly, like copy-up and modify manually. If something like
972+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
973+ writable branch, and the writable branch is world-writable, then a
974+ malicious guy may create /etc/passwd on the writable branch directly
975+ and the infected file will be valid in aufs.
976+ I am afraid it can be a security issue, but nothing to do except
977+ producing a warning.
978+
979+
980+Delete a Branch
981+----------------------------------------------------------------------
982+o Confirm the deleting branch is not busy
983+ To be general, there is one merit to adopt "remount" interface to
984+ manipulate branches. It is to discard caches. At deleting a branch,
985+ aufs checks the still cached (and connected) dentries and inodes. If
986+ there are any, then they are all in-use. An inode without its
987+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
988+
989+ For the cached one, aufs checks whether the same named entry exists on
990+ other branches.
991+ If the cached one is a directory, because aufs provides a merged view
992+ to users, as long as one dir is left on any branch aufs can show the
993+ dir to users. In this case, the branch can be removed from aufs.
994+ Otherwise aufs rejects deleting the branch.
995+
996+ If any file on the deleting branch is opened by aufs, then aufs
997+ rejects deleting.
998+
999+
1000+Modify the Permission of a Branch
1001+----------------------------------------------------------------------
1002+o Re-initialize or remove the xino file and whiteout bases if necessary.
1003+ See struct.txt.
1004+
1005+o rw --> ro: Confirm the modifying branch is not busy
1006+ Aufs rejects the request if any of these conditions are true.
1007+ - a file on the branch is mmap-ed.
1008+ - a regular file on the branch is opened for write and there is no
1009+ same named entry on the upper branch.
1010diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1011--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
1012+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2011-08-24 13:30:24.727980364 +0200
1013@@ -0,0 +1,65 @@
1014+
1015+# Copyright (C) 2005-2011 Junjiro R. Okajima
1016+#
1017+# This program is free software; you can redistribute it and/or modify
1018+# it under the terms of the GNU General Public License as published by
1019+# the Free Software Foundation; either version 2 of the License, or
1020+# (at your option) any later version.
1021+#
1022+# This program is distributed in the hope that it will be useful,
1023+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1024+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1025+# GNU General Public License for more details.
1026+#
1027+# You should have received a copy of the GNU General Public License
1028+# along with this program; if not, write to the Free Software
1029+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1030+
1031+Policies to Select One among Multiple Writable Branches
1032+----------------------------------------------------------------------
1033+When the number of writable branch is more than one, aufs has to decide
1034+the target branch for file creation or copy-up. By default, the highest
1035+writable branch which has the parent (or ancestor) dir of the target
1036+file is chosen (top-down-parent policy).
1037+By user's request, aufs implements some other policies to select the
1038+writable branch, for file creation two policies, round-robin and
1039+most-free-space policies. For copy-up three policies, top-down-parent,
1040+bottom-up-parent and bottom-up policies.
1041+
1042+As expected, the round-robin policy selects the branch in circular. When
1043+you have two writable branches and creates 10 new files, 5 files will be
1044+created for each branch. mkdir(2) systemcall is an exception. When you
1045+create 10 new directories, all will be created on the same branch.
1046+And the most-free-space policy selects the one which has most free
1047+space among the writable branches. The amount of free space will be
1048+checked by aufs internally, and users can specify its time interval.
1049+
1050+The policies for copy-up is more simple,
1051+top-down-parent is equivalent to the same named on in create policy,
1052+bottom-up-parent selects the writable branch where the parent dir
1053+exists and the nearest upper one from the copyup-source,
1054+bottom-up selects the nearest upper writable branch from the
1055+copyup-source, regardless the existence of the parent dir.
1056+
1057+There are some rules or exceptions to apply these policies.
1058+- If there is a readonly branch above the policy-selected branch and
1059+ the parent dir is marked as opaque (a variation of whiteout), or the
1060+ target (creating) file is whiteout-ed on the upper readonly branch,
1061+ then the result of the policy is ignored and the target file will be
1062+ created on the nearest upper writable branch than the readonly branch.
1063+- If there is a writable branch above the policy-selected branch and
1064+ the parent dir is marked as opaque or the target file is whiteouted
1065+ on the branch, then the result of the policy is ignored and the target
1066+ file will be created on the highest one among the upper writable
1067+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1068+ it as usual.
1069+- link(2) and rename(2) systemcalls are exceptions in every policy.
1070+ They try selecting the branch where the source exists as possible
1071+ since copyup a large file will take long time. If it can't be,
1072+ ie. the branch where the source exists is readonly, then they will
1073+ follow the copyup policy.
1074+- There is an exception for rename(2) when the target exists.
1075+ If the rename target exists, aufs compares the index of the branches
1076+ where the source and the target exists and selects the higher
1077+ one. If the selected branch is readonly, then aufs follows the
1078+ copyup policy.
1079diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
1080--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
1081+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2011-08-24 13:30:24.727980364 +0200
1082@@ -0,0 +1,47 @@
1083+
1084+# Copyright (C) 2005-2011 Junjiro R. Okajima
1085+#
1086+# This program is free software; you can redistribute it and/or modify
1087+# it under the terms of the GNU General Public License as published by
1088+# the Free Software Foundation; either version 2 of the License, or
1089+# (at your option) any later version.
1090+#
1091+# This program is distributed in the hope that it will be useful,
1092+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1093+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1094+# GNU General Public License for more details.
1095+#
1096+# You should have received a copy of the GNU General Public License
1097+# along with this program; if not, write to the Free Software
1098+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1099+
1100+mmap(2) -- File Memory Mapping
1101+----------------------------------------------------------------------
1102+In aufs, the file-mapped pages are handled by a branch fs directly, no
1103+interaction with aufs. It means aufs_mmap() calls the branch fs's
1104+->mmap().
1105+This approach is simple and good, but there is one problem.
1106+Under /proc, several entries show the mmap-ped files by its path (with
1107+device and inode number), and the printed path will be the path on the
1108+branch fs's instead of virtual aufs's.
1109+This is not a problem in most cases, but some utilities lsof(1) (and its
1110+user) may expect the path on aufs.
1111+
1112+To address this issue, aufs adds a new member called vm_prfile in struct
1113+vm_area_struct (and struct vm_region). The original vm_file points to
1114+the file on the branch fs in order to handle everything correctly as
1115+usual. The new vm_prfile points to a virtual file in aufs, and the
1116+show-functions in procfs refers to vm_prfile if it is set.
1117+Also we need to maintain several other places where touching vm_file
1118+such like
1119+- fork()/clone() copies vma and the reference count of vm_file is
1120+ incremented.
1121+- merging vma maintains the ref count too.
1122+
1123+This is not a good approach. It just faking the printed path. But it
1124+leaves all behaviour around f_mapping unchanged. This is surely an
1125+advantage.
1126+Actually aufs had adopted another complicated approach which calls
1127+generic_file_mmap() and handles struct vm_operations_struct. In this
1128+approach, aufs met a hard problem and I could not solve it without
1129+switching the approach.
1130diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
1131--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
1132+++ linux/Documentation/filesystems/aufs/design/07export.txt 2011-08-24 13:30:24.727980364 +0200
1133@@ -0,0 +1,59 @@
1134+
1135+# Copyright (C) 2005-2011 Junjiro R. Okajima
1136+#
1137+# This program is free software; you can redistribute it and/or modify
1138+# it under the terms of the GNU General Public License as published by
1139+# the Free Software Foundation; either version 2 of the License, or
1140+# (at your option) any later version.
1141+#
1142+# This program is distributed in the hope that it will be useful,
1143+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1144+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1145+# GNU General Public License for more details.
1146+#
1147+# You should have received a copy of the GNU General Public License
1148+# along with this program; if not, write to the Free Software
1149+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1150+
1151+Export Aufs via NFS
1152+----------------------------------------------------------------------
1153+Here is an approach.
1154+- like xino/xib, add a new file 'xigen' which stores aufs inode
1155+ generation.
1156+- iget_locked(): initialize aufs inode generation for a new inode, and
1157+ store it in xigen file.
1158+- destroy_inode(): increment aufs inode generation and store it in xigen
1159+ file. it is necessary even if it is not unlinked, because any data of
1160+ inode may be changed by UDBA.
1161+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
1162+ build file handle by
1163+ + branch id (4 bytes)
1164+ + superblock generation (4 bytes)
1165+ + inode number (4 or 8 bytes)
1166+ + parent dir inode number (4 or 8 bytes)
1167+ + inode generation (4 bytes))
1168+ + return value of exportfs_encode_fh() for the parent on a branch (4
1169+ bytes)
1170+ + file handle for a branch (by exportfs_encode_fh())
1171+- fh_to_dentry():
1172+ + find the index of a branch from its id in handle, and check it is
1173+ still exist in aufs.
1174+ + 1st level: get the inode number from handle and search it in cache.
1175+ + 2nd level: if not found, get the parent inode number from handle and
1176+ search it in cache. and then open the parent dir, find the matching
1177+ inode number by vfs_readdir() and get its name, and call
1178+ lookup_one_len() for the target dentry.
1179+ + 3rd level: if the parent dir is not cached, call
1180+ exportfs_decode_fh() for a branch and get the parent on a branch,
1181+ build a pathname of it, convert it a pathname in aufs, call
1182+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
1183+ the 2nd level.
1184+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
1185+ for every branch, but not itself. to get this, (currently) aufs
1186+ searches in current->nsproxy->mnt_ns list. it may not be a good
1187+ idea, but I didn't get other approach.
1188+ + test the generation of the gotten inode.
1189+- every inode operation: they may get EBUSY due to UDBA. in this case,
1190+ convert it into ESTALE for NFSD.
1191+- readdir(): call lockdep_on/off() because filldir in NFSD calls
1192+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
1193diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
1194--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
1195+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2011-08-24 13:30:24.727980364 +0200
1196@@ -0,0 +1,53 @@
1197+
1198+# Copyright (C) 2005-2011 Junjiro R. Okajima
1199+#
1200+# This program is free software; you can redistribute it and/or modify
1201+# it under the terms of the GNU General Public License as published by
1202+# the Free Software Foundation; either version 2 of the License, or
1203+# (at your option) any later version.
1204+#
1205+# This program is distributed in the hope that it will be useful,
1206+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1207+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1208+# GNU General Public License for more details.
1209+#
1210+# You should have received a copy of the GNU General Public License
1211+# along with this program; if not, write to the Free Software
1212+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1213+
1214+Show Whiteout Mode (shwh)
1215+----------------------------------------------------------------------
1216+Generally aufs hides the name of whiteouts. But in some cases, to show
1217+them is very useful for users. For instance, creating a new middle layer
1218+(branch) by merging existing layers.
1219+
1220+(borrowing aufs1 HOW-TO from a user, Michael Towers)
1221+When you have three branches,
1222+- Bottom: 'system', squashfs (underlying base system), read-only
1223+- Middle: 'mods', squashfs, read-only
1224+- Top: 'overlay', ram (tmpfs), read-write
1225+
1226+The top layer is loaded at boot time and saved at shutdown, to preserve
1227+the changes made to the system during the session.
1228+When larger changes have been made, or smaller changes have accumulated,
1229+the size of the saved top layer data grows. At this point, it would be
1230+nice to be able to merge the two overlay branches ('mods' and 'overlay')
1231+and rewrite the 'mods' squashfs, clearing the top layer and thus
1232+restoring save and load speed.
1233+
1234+This merging is simplified by the use of another aufs mount, of just the
1235+two overlay branches using the 'shwh' option.
1236+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
1237+ aufs /livesys/merge_union
1238+
1239+A merged view of these two branches is then available at
1240+/livesys/merge_union, and the new feature is that the whiteouts are
1241+visible!
1242+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
1243+writing to all branches. Also the default mode for all branches is 'ro'.
1244+It is now possible to save the combined contents of the two overlay
1245+branches to a new squashfs, e.g.:
1246+# mksquashfs /livesys/merge_union /path/to/newmods.squash
1247+
1248+This new squashfs archive can be stored on the boot device and the
1249+initramfs will use it to replace the old one at the next boot.
1250diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
1251--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
1252+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2011-08-24 13:30:24.727980364 +0200
1253@@ -0,0 +1,47 @@
1254+
1255+# Copyright (C) 2010-2011 Junjiro R. Okajima
1256+#
1257+# This program is free software; you can redistribute it and/or modify
1258+# it under the terms of the GNU General Public License as published by
1259+# the Free Software Foundation; either version 2 of the License, or
1260+# (at your option) any later version.
1261+#
1262+# This program is distributed in the hope that it will be useful,
1263+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1264+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1265+# GNU General Public License for more details.
1266+#
1267+# You should have received a copy of the GNU General Public License
1268+# along with this program; if not, write to the Free Software
1269+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1270+
1271+Dynamically customizable FS operations
1272+----------------------------------------------------------------------
1273+Generally FS operations (struct inode_operations, struct
1274+address_space_operations, struct file_operations, etc.) are defined as
1275+"static const", but it never means that FS have only one set of
1276+operation. Some FS have multiple sets of them. For instance, ext2 has
1277+three sets, one for XIP, for NOBH, and for normal.
1278+Since aufs overrides and redirects these operations, sometimes aufs has
1279+to change its behaviour according to the branch FS type. More imporantly
1280+VFS acts differently if a function (member in the struct) is set or
1281+not. It means aufs should have several sets of operations and select one
1282+among them according to the branch FS definition.
1283+
1284+In order to solve this problem and not to affect the behavour of VFS,
1285+aufs defines these operations dynamically. For instance, aufs defines
1286+aio_read function for struct file_operations, but it may not be set to
1287+the file_operations. When the branch FS doesn't have it, aufs doesn't
1288+set it to its file_operations while the function definition itself is
1289+still alive. So the behaviour of io_submit(2) will not change, and it
1290+will return an error when aio_read is not defined.
1291+
1292+The lifetime of these dynamically generated operation object is
1293+maintained by aufs branch object. When the branch is removed from aufs,
1294+the reference counter of the object is decremented. When it reaches
1295+zero, the dynamically generated operation object will be freed.
1296+
1297+This approach is designed to support AIO (io_submit), Direcit I/O and
1298+XIP mainly.
1299+Currently this approach is applied to file_operations and
1300+vm_operations_struct for regular files only.
1301diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/99plan.txt linux/Documentation/filesystems/aufs/design/99plan.txt
1302--- /usr/share/empty/Documentation/filesystems/aufs/design/99plan.txt 1970-01-01 01:00:00.000000000 +0100
1303+++ linux/Documentation/filesystems/aufs/design/99plan.txt 2011-08-24 13:30:24.727980364 +0200
1304@@ -0,0 +1,96 @@
1305+
1306+# Copyright (C) 2005-2011 Junjiro R. Okajima
1307+#
1308+# This program is free software; you can redistribute it and/or modify
1309+# it under the terms of the GNU General Public License as published by
1310+# the Free Software Foundation; either version 2 of the License, or
1311+# (at your option) any later version.
1312+#
1313+# This program is distributed in the hope that it will be useful,
1314+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1315+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1316+# GNU General Public License for more details.
1317+#
1318+# You should have received a copy of the GNU General Public License
1319+# along with this program; if not, write to the Free Software
1320+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1321+
1322+Plan
1323+
1324+Restoring some features which was implemented in aufs1.
1325+They were dropped in aufs2 in order to make source files simpler and
1326+easier to be reviewed.
1327+
1328+
1329+Test Only the Highest One for the Directory Permission (dirperm1 option)
1330+----------------------------------------------------------------------
1331+Let's try case study.
1332+- aufs has two branches, upper readwrite and lower readonly.
1333+ /au = /rw + /ro
1334+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1335+- user invoked "chmod a+rx /au/dirA"
1336+- then "dirA" becomes world readable?
1337+
1338+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1339+or it may be a natively readonly filesystem. If aufs respects the lower
1340+branch, it should not respond readdir request from other users. But user
1341+allowed it by chmod. Should really aufs rejects showing the entries
1342+under /ro/dirA?
1343+
1344+To be honest, I don't have a best solution for this case. So I
1345+implemented 'dirperm1' and 'nodirperm1' option in aufs1, and leave it to
1346+users.
1347+When dirperm1 is specified, aufs checks only the highest one for the
1348+directory permission, and shows the entries. Otherwise, as usual, checks
1349+every dir existing on all branches and rejects the request.
1350+
1351+As a side effect, dirperm1 option improves the performance of aufs
1352+because the number of permission check is reduced.
1353+
1354+
1355+Being Another Aufs's Readonly Branch (robr)
1356+----------------------------------------------------------------------
1357+Aufs1 allows aufs to be another aufs's readonly branch.
1358+This feature was developed by a user's request. But it may not be used
1359+currecnly.
1360+
1361+
1362+Copy-up on Open (coo=)
1363+----------------------------------------------------------------------
1364+By default the internal copy-up is executed when it is really necessary.
1365+It is not done when a file is opened for writing, but when write(2) is
1366+done. Users who have many (over 100) branches want to know and analyse
1367+when and what file is copied-up. To insert a new upper branch which
1368+contains such files only may improve the performance of aufs.
1369+
1370+Aufs1 implemented "coo=none | leaf | all" option.
1371+
1372+
1373+Refresh the Opened File (refrof)
1374+----------------------------------------------------------------------
1375+This option is implemented in aufs1 but incomplete.
1376+
1377+When user reads from a file, he expects to get its latest filedata
1378+generally. If the file is removed and a new same named file is created,
1379+the content he gets is unchanged, ie. the unlinked filedata.
1380+
1381+Let's try case study again.
1382+- aufs has two branches.
1383+ /au = /rw + /ro
1384+- "fileA" exists under /ro, but /rw.
1385+- user opened "/au/fileA".
1386+- he or someone else inserts a branch (/new) between /rw and /ro.
1387+ /au = /rw + /new + /ro
1388+- the new branch has "fileA".
1389+- user reads from the opened "fileA"
1390+- which filedata should aufs return, from /ro or /new?
1391+
1392+Some people says it has to be "from /ro" and it is a semantics of Unix.
1393+The others say it should be "from /new" because the file is not removed
1394+and it is equivalent to the case of someone else modifies the file.
1395+
1396+Here again I don't have a best and final answer. I got an idea to
1397+implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the
1398+Opened File) is specified (by default), aufs returns the filedata from
1399+/new.
1400+Otherwise from /new.
1401diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
1402--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
1403+++ linux/Documentation/filesystems/aufs/README 2011-10-24 20:51:51.580466925 +0200
1404@@ -0,0 +1,313 @@
53392da6
AM
1405+
1406+Aufs3 -- advanced multi layered unification filesystem version 3.x
1407+http://aufs.sf.net
1408+Junjiro R. Okajima
1409+
1410+
1411+0. Introduction
1412+----------------------------------------
1413+In the early days, aufs was entirely re-designed and re-implemented
1414+Unionfs Version 1.x series. After many original ideas, approaches,
1415+improvements and implementations, it becomes totally different from
1416+Unionfs while keeping the basic features.
1417+Recently, Unionfs Version 2.x series begin taking some of the same
1418+approaches to aufs1's.
1419+Unionfs is being developed by Professor Erez Zadok at Stony Brook
1420+University and his team.
1421+
1422+Aufs3 supports linux-3.0 and later.
1423+If you want older kernel version support, try aufs2-2.6.git or
1424+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
1425+
1426+Note: it becomes clear that "Aufs was rejected. Let's give it up."
1427+According to Christoph Hellwig, linux rejects all union-type filesystems
1428+but UnionMount.
1429+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
1430+
1431+
1432+1. Features
1433+----------------------------------------
1434+- unite several directories into a single virtual filesystem. The member
1435+ directory is called as a branch.
1436+- you can specify the permission flags to the branch, which are 'readonly',
1437+ 'readwrite' and 'whiteout-able.'
1438+- by upper writable branch, internal copyup and whiteout, files/dirs on
1439+ readonly branch are modifiable logically.
1440+- dynamic branch manipulation, add, del.
1441+- etc...
1442+
1443+Also there are many enhancements in aufs1, such as:
1444+- readdir(3) in userspace.
1445+- keep inode number by external inode number table
1446+- keep the timestamps of file/dir in internal copyup operation
1447+- seekable directory, supporting NFS readdir.
1448+- whiteout is hardlinked in order to reduce the consumption of inodes
1449+ on branch
1450+- do not copyup, nor create a whiteout when it is unnecessary
1451+- revert a single systemcall when an error occurs in aufs
1452+- remount interface instead of ioctl
1453+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
1454+- loopback mounted filesystem as a branch
1455+- kernel thread for removing the dir who has a plenty of whiteouts
1456+- support copyup sparse file (a file which has a 'hole' in it)
1457+- default permission flags for branches
1458+- selectable permission flags for ro branch, whether whiteout can
1459+ exist or not
1460+- export via NFS.
1461+- support <sysfs>/fs/aufs and <debugfs>/aufs.
1462+- support multiple writable branches, some policies to select one
1463+ among multiple writable branches.
1464+- a new semantics for link(2) and rename(2) to support multiple
1465+ writable branches.
1466+- no glibc changes are required.
1467+- pseudo hardlink (hardlink over branches)
1468+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
1469+ including NFS or remote filesystem branch.
1470+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
1471+- and more...
1472+
1473+Currently these features are dropped temporary from aufs3.
1474+See design/08plan.txt in detail.
1475+- test only the highest one for the directory permission (dirperm1)
1476+- copyup on open (coo=)
1477+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
1478+ (robr)
1479+- statistics of aufs thread (/sys/fs/aufs/stat)
1480+- delegation mode (dlgt)
1481+ a delegation of the internal branch access to support task I/O
1482+ accounting, which also supports Linux Security Modules (LSM) mainly
1483+ for Suse AppArmor.
1484+- intent.open/create (file open in a single lookup)
1485+
1486+Features or just an idea in the future (see also design/*.txt),
1487+- reorder the branch index without del/re-add.
1488+- permanent xino files for NFSD
1489+- an option for refreshing the opened files after add/del branches
1490+- 'move' policy for copy-up between two writable branches, after
1491+ checking free space.
1492+- light version, without branch manipulation. (unnecessary?)
1493+- copyup in userspace
1494+- inotify in userspace
1495+- readv/writev
1496+- xattr, acl
1497+
1498+
1499+2. Download
1500+----------------------------------------
1e00d052
AM
1501+There were three GIT trees for aufs3, aufs3-linux.git,
1502+aufs3-standalone.git, and aufs-util.git. Note that there is no "3" in
1503+"aufs-util.git."
1504+While the aufs-util is always necessary, you need either of aufs3-linux
1505+or aufs3-standalone.
1506+
1507+The aufs3-linux tree includes the whole linux mainline GIT tree,
1508+git://git.kernel.org/.../torvalds/linux.git.
1509+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
1510+build aufs3 as an externel kernel module.
1511+
1512+On the other hand, the aufs3-standalone tree has only aufs source files
53392da6
AM
1513+and necessary patches, and you can select CONFIG_AUFS_FS=m.
1514+
1515+You will find GIT branches whose name is in form of "aufs3.x" where "x"
1516+represents the linux kernel version, "linux-3.x". For instance,
1e00d052
AM
1517+"aufs3.0" is for linux-3.0. For latest "linux-3.x-rcN", use
1518+"aufs3.x-rcN" branch.
1519+
1520+o aufs3-linux tree
1521+$ git clone --reference /your/linux/git/tree \
1522+ git://aufs.git.sourceforge.net/gitroot/aufs/aufs3-linux.git \
1523+ aufs3-linux.git
1524+- if you don't have linux GIT tree, then remove "--reference ..."
1525+$ cd aufs3-linux.git
1526+$ git checkout origin/aufs3.0
53392da6
AM
1527+
1528+o aufs3-standalone tree
1529+$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs3-standalone.git \
1530+ aufs3-standalone.git
1531+$ cd aufs3-standalone.git
1532+$ git checkout origin/aufs3.0
1533+
1534+o aufs-util tree
1535+$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs-util.git \
1536+ aufs-util.git
1537+$ cd aufs-util.git
1538+$ git checkout origin/aufs3.0
1539+
1540+You may not be able to find the GIT branch in aufs-util for your
1541+version. In this case, you should git-checkout the branch for the
1542+nearest lower number.
1543+If you are using linux-3.10 and aufs3.10 (which are not released yet),
1544+but the "aufs3.10" branch doesn't exit in this repository, then
1545+"aufs3.9", "aufs3.8", ... or something is the branch for you.
1546+Also you can view all branches by
1547+ $ git branch -a
1548+
1549+
1550+3. Configuration and Compilation
1551+----------------------------------------
1552+Make sure you have git-checkout'ed the correct branch.
1553+
1e00d052
AM
1554+For aufs3-linux tree,
1555+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS.
1556+- set other aufs configurations if necessary.
1557+
53392da6
AM
1558+For aufs3-standalone tree,
1559+There are several ways to build.
1560+
1561+1.
1562+- apply ./aufs3-kbuild.patch to your kernel source files.
1563+- apply ./aufs3-base.patch too.
1564+- apply ./aufs3-proc_map.patch too, if you want to make /proc/PID/maps (and
1565+ others including lsof(1)) show the file path on aufs instead of the
1566+ path on the branch fs.
1567+- apply ./aufs3-standalone.patch too, if you have a plan to set
1568+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs3-standalone.patch.
1569+- copy ./{Documentation,fs,include/linux/aufs_type.h} files to your
1570+ kernel source tree. Never copy ./include/linux/Kbuild.
1571+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS, you can select either
1572+ =m or =y.
1573+- and build your kernel as usual.
1574+- install the built kernel.
1575+- install the header files too by "make headers_install".
1576+- and reboot your system.
1577+
1578+2.
1579+- module only (CONFIG_AUFS_FS=m).
1580+- apply ./aufs3-base.patch to your kernel source files.
1581+- apply ./aufs3-proc_map.patch too to your kernel source files,
1582+ if you want to make /proc/PID/maps (and others including lsof(1)) show
1583+ the file path on aufs instead of the path on the branch fs.
1584+- apply ./aufs3-standalone.patch too.
1585+- build your kernel, don't forget "make headers_install", and reboot.
1586+- edit ./config.mk and set other aufs configurations if necessary.
1587+ Note: You should read ./fs/aufs/Kconfig carefully which describes
1588+ every aufs configurations.
1589+- build the module by simple "make".
1590+- you can specify ${KDIR} make variable which points to your kernel
1591+ source tree.
1592+- install the files
1593+ + run "make install" to install the aufs module, or copy the built
1594+ ./aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
1595+ + run "make headers_install" to install the aufs header file (you can
1596+ specify DESTDIR), or copty ./usr/include/linux/aufs_type.h to
1597+ /usr/include/linux or wherever you like.
1598+- no need to apply aufs3-kbuild.patch, nor copying source files to your
1599+ kernel source tree.
1600+
1601+Note: The haeder file aufs_type.h is necessary to build aufs-util
1602+ as well as "make headers_install" in the kernel source tree.
1603+ headers_install is subject to be forgotten, but it is essentially
1604+ necessary, not only for building aufs-util.
1605+ You may not meet problems without headers_install in some older
1606+ version though.
1607+
1608+And then,
1609+- read README in aufs-util, build and install it
1610+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
1611+ then run "make install_ulib" too. And refer to the aufs manual in
1612+ detail.
1613+
1614+
1615+4. Usage
1616+----------------------------------------
1617+At first, make sure aufs-util are installed, and please read the aufs
1618+manual, aufs.5 in aufs-util.git tree.
1619+$ man -l aufs.5
1620+
1621+And then,
1622+$ mkdir /tmp/rw /tmp/aufs
1623+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
1624+
1625+Here is another example. The result is equivalent.
1626+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
1627+ Or
1628+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
1629+# mount -o remount,append:${HOME} /tmp/aufs
1630+
1631+Then, you can see whole tree of your home dir through /tmp/aufs. If
1632+you modify a file under /tmp/aufs, the one on your home directory is
1633+not affected, instead the same named file will be newly created under
1634+/tmp/rw. And all of your modification to a file will be applied to
1635+the one under /tmp/rw. This is called the file based Copy on Write
1636+(COW) method.
1637+Aufs mount options are described in aufs.5.
1638+If you run chroot or something and make your aufs as a root directory,
1639+then you need to customize the shutdown script. See the aufs manual in
1640+detail.
1641+
1642+Additionally, there are some sample usages of aufs which are a
1643+diskless system with network booting, and LiveCD over NFS.
1644+See sample dir in CVS tree on SourceForge.
1645+
1646+
1647+5. Contact
1648+----------------------------------------
1649+When you have any problems or strange behaviour in aufs, please let me
1650+know with:
1651+- /proc/mounts (instead of the output of mount(8))
1652+- /sys/module/aufs/*
1653+- /sys/fs/aufs/* (if you have them)
1654+- /debug/aufs/* (if you have them)
1655+- linux kernel version
1656+ if your kernel is not plain, for example modified by distributor,
1657+ the url where i can download its source is necessary too.
1658+- aufs version which was printed at loading the module or booting the
1659+ system, instead of the date you downloaded.
1660+- configuration (define/undefine CONFIG_AUFS_xxx)
1661+- kernel configuration or /proc/config.gz (if you have it)
1662+- behaviour which you think to be incorrect
1663+- actual operation, reproducible one is better
1664+- mailto: aufs-users at lists.sourceforge.net
1665+
1666+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
1667+and Feature Requests) on SourceForge. Please join and write to
1668+aufs-users ML.
1669+
1670+
1671+6. Acknowledgements
1672+----------------------------------------
1673+Thanks to everyone who have tried and are using aufs, whoever
1674+have reported a bug or any feedback.
1675+
1676+Especially donators:
1677+Tomas Matejicek(slax.org) made a donation (much more than once).
1678+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
1679+ scripts) is making "doubling" donations.
1680+ Unfortunately I cannot list all of the donators, but I really
1681+ appriciate.
1682+ It ends Aug 2010, but the ordinary donation URL is still available.
1683+ <http://sourceforge.net/donate/index.php?group_id=167503>
1684+Dai Itasaka made a donation (2007/8).
1685+Chuck Smith made a donation (2008/4, 10 and 12).
1686+Henk Schoneveld made a donation (2008/9).
1687+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
1688+Francois Dupoux made a donation (2008/11).
1689+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
1690+ aufs2 GIT tree (2009/2).
1691+William Grant made a donation (2009/3).
1692+Patrick Lane made a donation (2009/4).
1693+The Mail Archive (mail-archive.com) made donations (2009/5).
1694+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
1695+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
1696+Pavel Pronskiy made a donation (2011/2).
1697+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
1698+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
1699+Max Lekomcev (DOM-TV project) made a donation (2011/7).
1e00d052 1700+Sam Liddicott made a donation (2011/9).
53392da6
AM
1701+
1702+Thank you very much.
1703+Donations are always, including future donations, very important and
1704+helpful for me to keep on developing aufs.
1705+
1706+
1707+7.
1708+----------------------------------------
1709+If you are an experienced user, no explanation is needed. Aufs is
1710+just a linux filesystem.
1711+
1712+
1713+Enjoy!
1714+
1715+# Local variables: ;
1716+# mode: text;
1717+# End: ;
7f207e10
AM
1718diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
1719--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
53392da6 1720+++ linux/fs/aufs/aufs.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 1721@@ -0,0 +1,60 @@
7f207e10 1722+/*
027c5e7a 1723+ * Copyright (C) 2005-2011 Junjiro R. Okajima
7f207e10
AM
1724+ *
1725+ * This program, aufs is free software; you can redistribute it and/or modify
1726+ * it under the terms of the GNU General Public License as published by
1727+ * the Free Software Foundation; either version 2 of the License, or
1728+ * (at your option) any later version.
1729+ *
1730+ * This program is distributed in the hope that it will be useful,
1731+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1732+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1733+ * GNU General Public License for more details.
1734+ *
1735+ * You should have received a copy of the GNU General Public License
1736+ * along with this program; if not, write to the Free Software
1737+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1738+ */
1739+
1740+/*
1741+ * all header files
1742+ */
1743+
1744+#ifndef __AUFS_H__
1745+#define __AUFS_H__
1746+
1747+#ifdef __KERNEL__
1748+
1749+#define AuStub(type, name, body, ...) \
1750+ static inline type name(__VA_ARGS__) { body; }
1751+
1752+#define AuStubVoid(name, ...) \
1753+ AuStub(void, name, , __VA_ARGS__)
1754+#define AuStubInt0(name, ...) \
1755+ AuStub(int, name, return 0, __VA_ARGS__)
1756+
1757+#include "debug.h"
1758+
1759+#include "branch.h"
1760+#include "cpup.h"
1761+#include "dcsub.h"
1762+#include "dbgaufs.h"
1763+#include "dentry.h"
1764+#include "dir.h"
1765+#include "dynop.h"
1766+#include "file.h"
1767+#include "fstype.h"
1768+#include "inode.h"
1769+#include "loop.h"
1770+#include "module.h"
7f207e10
AM
1771+#include "opts.h"
1772+#include "rwsem.h"
1773+#include "spl.h"
1774+#include "super.h"
1775+#include "sysaufs.h"
1776+#include "vfsub.h"
1777+#include "whout.h"
1778+#include "wkq.h"
1779+
1780+#endif /* __KERNEL__ */
1781+#endif /* __AUFS_H__ */
1782diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
1783--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
1784+++ linux/fs/aufs/branch.c 2011-08-24 13:30:24.731313534 +0200
1785@@ -0,0 +1,1170 @@
7f207e10 1786+/*
027c5e7a 1787+ * Copyright (C) 2005-2011 Junjiro R. Okajima
7f207e10
AM
1788+ *
1789+ * This program, aufs is free software; you can redistribute it and/or modify
1790+ * it under the terms of the GNU General Public License as published by
1791+ * the Free Software Foundation; either version 2 of the License, or
1792+ * (at your option) any later version.
1793+ *
1794+ * This program is distributed in the hope that it will be useful,
1795+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1796+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1797+ * GNU General Public License for more details.
1798+ *
1799+ * You should have received a copy of the GNU General Public License
1800+ * along with this program; if not, write to the Free Software
1801+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1802+ */
1803+
1804+/*
1805+ * branch management
1806+ */
1807+
027c5e7a 1808+#include <linux/compat.h>
7f207e10
AM
1809+#include <linux/file.h>
1810+#include <linux/statfs.h>
1811+#include "aufs.h"
1812+
1813+/*
1814+ * free a single branch
1facf9fc 1815+ */
1816+static void au_br_do_free(struct au_branch *br)
1817+{
1818+ int i;
1819+ struct au_wbr *wbr;
4a4d8108 1820+ struct au_dykey **key;
1facf9fc 1821+
027c5e7a
AM
1822+ au_hnotify_fin_br(br);
1823+
1facf9fc 1824+ if (br->br_xino.xi_file)
1825+ fput(br->br_xino.xi_file);
1826+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
1827+
1828+ AuDebugOn(atomic_read(&br->br_count));
1829+
1830+ wbr = br->br_wbr;
1831+ if (wbr) {
1832+ for (i = 0; i < AuBrWh_Last; i++)
1833+ dput(wbr->wbr_wh[i]);
1834+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 1835+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 1836+ }
1837+
4a4d8108
AM
1838+ key = br->br_dykey;
1839+ for (i = 0; i < AuBrDynOp; i++, key++)
1840+ if (*key)
1841+ au_dy_put(*key);
1842+ else
1843+ break;
1844+
1facf9fc 1845+ mntput(br->br_mnt);
1facf9fc 1846+ kfree(wbr);
1847+ kfree(br);
1848+}
1849+
1850+/*
1851+ * frees all branches
1852+ */
1853+void au_br_free(struct au_sbinfo *sbinfo)
1854+{
1855+ aufs_bindex_t bmax;
1856+ struct au_branch **br;
1857+
dece6358
AM
1858+ AuRwMustWriteLock(&sbinfo->si_rwsem);
1859+
1facf9fc 1860+ bmax = sbinfo->si_bend + 1;
1861+ br = sbinfo->si_branch;
1862+ while (bmax--)
1863+ au_br_do_free(*br++);
1864+}
1865+
1866+/*
1867+ * find the index of a branch which is specified by @br_id.
1868+ */
1869+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
1870+{
1871+ aufs_bindex_t bindex, bend;
1872+
1873+ bend = au_sbend(sb);
1874+ for (bindex = 0; bindex <= bend; bindex++)
1875+ if (au_sbr_id(sb, bindex) == br_id)
1876+ return bindex;
1877+ return -1;
1878+}
1879+
1880+/* ---------------------------------------------------------------------- */
1881+
1882+/*
1883+ * add a branch
1884+ */
1885+
b752ccd1
AM
1886+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
1887+ struct dentry *h_root)
1facf9fc 1888+{
b752ccd1
AM
1889+ if (unlikely(h_adding == h_root
1890+ || au_test_loopback_overlap(sb, h_adding)))
1facf9fc 1891+ return 1;
b752ccd1
AM
1892+ if (h_adding->d_sb != h_root->d_sb)
1893+ return 0;
1894+ return au_test_subdir(h_adding, h_root)
1895+ || au_test_subdir(h_root, h_adding);
1facf9fc 1896+}
1897+
1898+/*
1899+ * returns a newly allocated branch. @new_nbranch is a number of branches
1900+ * after adding a branch.
1901+ */
1902+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
1903+ int perm)
1904+{
1905+ struct au_branch *add_branch;
1906+ struct dentry *root;
4a4d8108 1907+ int err;
1facf9fc 1908+
4a4d8108 1909+ err = -ENOMEM;
1facf9fc 1910+ root = sb->s_root;
1911+ add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
1912+ if (unlikely(!add_branch))
1913+ goto out;
1914+
027c5e7a
AM
1915+ err = au_hnotify_init_br(add_branch, perm);
1916+ if (unlikely(err))
1917+ goto out_br;
1918+
1facf9fc 1919+ add_branch->br_wbr = NULL;
1920+ if (au_br_writable(perm)) {
1921+ /* may be freed separately at changing the branch permission */
1922+ add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
1923+ GFP_NOFS);
1924+ if (unlikely(!add_branch->br_wbr))
027c5e7a 1925+ goto out_hnotify;
1facf9fc 1926+ }
1927+
4a4d8108
AM
1928+ err = au_sbr_realloc(au_sbi(sb), new_nbranch);
1929+ if (!err)
1930+ err = au_di_realloc(au_di(root), new_nbranch);
1931+ if (!err)
1932+ err = au_ii_realloc(au_ii(root->d_inode), new_nbranch);
1933+ if (!err)
1934+ return add_branch; /* success */
1facf9fc 1935+
1facf9fc 1936+ kfree(add_branch->br_wbr);
4a4d8108 1937+
027c5e7a
AM
1938+out_hnotify:
1939+ au_hnotify_fin_br(add_branch);
4f0767ce 1940+out_br:
1facf9fc 1941+ kfree(add_branch);
4f0767ce 1942+out:
4a4d8108 1943+ return ERR_PTR(err);
1facf9fc 1944+}
1945+
1946+/*
1947+ * test if the branch permission is legal or not.
1948+ */
1949+static int test_br(struct inode *inode, int brperm, char *path)
1950+{
1951+ int err;
1952+
4a4d8108
AM
1953+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
1954+ if (!err)
1955+ goto out;
1facf9fc 1956+
4a4d8108
AM
1957+ err = -EINVAL;
1958+ pr_err("write permission for readonly mount or inode, %s\n", path);
1959+
4f0767ce 1960+out:
1facf9fc 1961+ return err;
1962+}
1963+
1964+/*
1965+ * returns:
1966+ * 0: success, the caller will add it
1967+ * plus: success, it is already unified, the caller should ignore it
1968+ * minus: error
1969+ */
1970+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
1971+{
1972+ int err;
1973+ aufs_bindex_t bend, bindex;
1974+ struct dentry *root;
1975+ struct inode *inode, *h_inode;
1976+
1977+ root = sb->s_root;
1978+ bend = au_sbend(sb);
1979+ if (unlikely(bend >= 0
1980+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
1981+ err = 1;
1982+ if (!remount) {
1983+ err = -EINVAL;
4a4d8108 1984+ pr_err("%s duplicated\n", add->pathname);
1facf9fc 1985+ }
1986+ goto out;
1987+ }
1988+
1989+ err = -ENOSPC; /* -E2BIG; */
1990+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
1991+ || AUFS_BRANCH_MAX - 1 <= bend)) {
4a4d8108 1992+ pr_err("number of branches exceeded %s\n", add->pathname);
1facf9fc 1993+ goto out;
1994+ }
1995+
1996+ err = -EDOM;
1997+ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
4a4d8108 1998+ pr_err("bad index %d\n", add->bindex);
1facf9fc 1999+ goto out;
2000+ }
2001+
2002+ inode = add->path.dentry->d_inode;
2003+ err = -ENOENT;
2004+ if (unlikely(!inode->i_nlink)) {
4a4d8108 2005+ pr_err("no existence %s\n", add->pathname);
1facf9fc 2006+ goto out;
2007+ }
2008+
2009+ err = -EINVAL;
2010+ if (unlikely(inode->i_sb == sb)) {
4a4d8108 2011+ pr_err("%s must be outside\n", add->pathname);
1facf9fc 2012+ goto out;
2013+ }
2014+
2015+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
4a4d8108
AM
2016+ pr_err("unsupported filesystem, %s (%s)\n",
2017+ add->pathname, au_sbtype(inode->i_sb));
1facf9fc 2018+ goto out;
2019+ }
2020+
2021+ err = test_br(add->path.dentry->d_inode, add->perm, add->pathname);
2022+ if (unlikely(err))
2023+ goto out;
2024+
2025+ if (bend < 0)
2026+ return 0; /* success */
2027+
2028+ err = -EINVAL;
2029+ for (bindex = 0; bindex <= bend; bindex++)
2030+ if (unlikely(test_overlap(sb, add->path.dentry,
2031+ au_h_dptr(root, bindex)))) {
4a4d8108 2032+ pr_err("%s is overlapped\n", add->pathname);
1facf9fc 2033+ goto out;
2034+ }
2035+
2036+ err = 0;
2037+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
2038+ h_inode = au_h_dptr(root, 0)->d_inode;
2039+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
2040+ || h_inode->i_uid != inode->i_uid
2041+ || h_inode->i_gid != inode->i_gid)
4a4d8108
AM
2042+ pr_warning("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
2043+ add->pathname,
2044+ inode->i_uid, inode->i_gid,
2045+ (inode->i_mode & S_IALLUGO),
2046+ h_inode->i_uid, h_inode->i_gid,
2047+ (h_inode->i_mode & S_IALLUGO));
1facf9fc 2048+ }
2049+
4f0767ce 2050+out:
1facf9fc 2051+ return err;
2052+}
2053+
2054+/*
2055+ * initialize or clean the whiteouts for an adding branch
2056+ */
2057+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
2058+ int new_perm, struct dentry *h_root)
2059+{
2060+ int err, old_perm;
2061+ aufs_bindex_t bindex;
2062+ struct mutex *h_mtx;
2063+ struct au_wbr *wbr;
2064+ struct au_hinode *hdir;
2065+
2066+ wbr = br->br_wbr;
2067+ old_perm = br->br_perm;
2068+ br->br_perm = new_perm;
2069+ hdir = NULL;
2070+ h_mtx = NULL;
2071+ bindex = au_br_index(sb, br->br_id);
2072+ if (0 <= bindex) {
2073+ hdir = au_hi(sb->s_root->d_inode, bindex);
4a4d8108 2074+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 2075+ } else {
2076+ h_mtx = &h_root->d_inode->i_mutex;
2077+ mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
2078+ }
2079+ if (!wbr)
2080+ err = au_wh_init(h_root, br, sb);
2081+ else {
2082+ wbr_wh_write_lock(wbr);
2083+ err = au_wh_init(h_root, br, sb);
2084+ wbr_wh_write_unlock(wbr);
2085+ }
2086+ if (hdir)
4a4d8108 2087+ au_hn_imtx_unlock(hdir);
1facf9fc 2088+ else
2089+ mutex_unlock(h_mtx);
2090+ br->br_perm = old_perm;
2091+
2092+ if (!err && wbr && !au_br_writable(new_perm)) {
2093+ kfree(wbr);
2094+ br->br_wbr = NULL;
2095+ }
2096+
2097+ return err;
2098+}
2099+
2100+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
2101+ int perm, struct path *path)
2102+{
2103+ int err;
4a4d8108 2104+ struct kstatfs kst;
1facf9fc 2105+ struct au_wbr *wbr;
4a4d8108 2106+ struct dentry *h_dentry;
1facf9fc 2107+
2108+ wbr = br->br_wbr;
dece6358 2109+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 2110+ memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
2111+ atomic_set(&wbr->wbr_wh_running, 0);
2112+ wbr->wbr_bytes = 0;
2113+
4a4d8108
AM
2114+ /*
2115+ * a limit for rmdir/rename a dir
2116+ * cf. AUFS_MAX_NAMELEN in include/linux/aufs_type.h
2117+ */
7f207e10 2118+ err = vfs_statfs(path, &kst);
4a4d8108
AM
2119+ if (unlikely(err))
2120+ goto out;
2121+ err = -EINVAL;
7f207e10 2122+ h_dentry = path->dentry;
4a4d8108
AM
2123+ if (kst.f_namelen >= NAME_MAX)
2124+ err = au_br_init_wh(sb, br, perm, h_dentry);
2125+ else
2126+ pr_err("%.*s(%s), unsupported namelen %ld\n",
2127+ AuDLNPair(h_dentry), au_sbtype(h_dentry->d_sb),
2128+ kst.f_namelen);
1facf9fc 2129+
4f0767ce 2130+out:
1facf9fc 2131+ return err;
2132+}
2133+
2134+/* intialize a new branch */
2135+static int au_br_init(struct au_branch *br, struct super_block *sb,
2136+ struct au_opt_add *add)
2137+{
2138+ int err;
2139+
2140+ err = 0;
2141+ memset(&br->br_xino, 0, sizeof(br->br_xino));
2142+ mutex_init(&br->br_xino.xi_nondir_mtx);
2143+ br->br_perm = add->perm;
2144+ br->br_mnt = add->path.mnt; /* set first, mntget() later */
4a4d8108
AM
2145+ spin_lock_init(&br->br_dykey_lock);
2146+ memset(br->br_dykey, 0, sizeof(br->br_dykey));
1facf9fc 2147+ atomic_set(&br->br_count, 0);
2148+ br->br_xino_upper = AUFS_XINO_TRUNC_INIT;
2149+ atomic_set(&br->br_xino_running, 0);
2150+ br->br_id = au_new_br_id(sb);
7f207e10 2151+ AuDebugOn(br->br_id < 0);
1facf9fc 2152+
2153+ if (au_br_writable(add->perm)) {
2154+ err = au_wbr_init(br, sb, add->perm, &add->path);
2155+ if (unlikely(err))
b752ccd1 2156+ goto out_err;
1facf9fc 2157+ }
2158+
2159+ if (au_opt_test(au_mntflags(sb), XINO)) {
2160+ err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino,
2161+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
2162+ if (unlikely(err)) {
2163+ AuDebugOn(br->br_xino.xi_file);
b752ccd1 2164+ goto out_err;
1facf9fc 2165+ }
2166+ }
2167+
2168+ sysaufs_br_init(br);
2169+ mntget(add->path.mnt);
b752ccd1 2170+ goto out; /* success */
1facf9fc 2171+
4f0767ce 2172+out_err:
b752ccd1 2173+ br->br_mnt = NULL;
4f0767ce 2174+out:
1facf9fc 2175+ return err;
2176+}
2177+
2178+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
2179+ struct au_branch *br, aufs_bindex_t bend,
2180+ aufs_bindex_t amount)
2181+{
2182+ struct au_branch **brp;
2183+
dece6358
AM
2184+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2185+
1facf9fc 2186+ brp = sbinfo->si_branch + bindex;
2187+ memmove(brp + 1, brp, sizeof(*brp) * amount);
2188+ *brp = br;
2189+ sbinfo->si_bend++;
2190+ if (unlikely(bend < 0))
2191+ sbinfo->si_bend = 0;
2192+}
2193+
2194+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
2195+ aufs_bindex_t bend, aufs_bindex_t amount)
2196+{
2197+ struct au_hdentry *hdp;
2198+
1308ab2a 2199+ AuRwMustWriteLock(&dinfo->di_rwsem);
2200+
1facf9fc 2201+ hdp = dinfo->di_hdentry + bindex;
2202+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
2203+ au_h_dentry_init(hdp);
2204+ dinfo->di_bend++;
2205+ if (unlikely(bend < 0))
2206+ dinfo->di_bstart = 0;
2207+}
2208+
2209+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
2210+ aufs_bindex_t bend, aufs_bindex_t amount)
2211+{
2212+ struct au_hinode *hip;
2213+
1308ab2a 2214+ AuRwMustWriteLock(&iinfo->ii_rwsem);
2215+
1facf9fc 2216+ hip = iinfo->ii_hinode + bindex;
2217+ memmove(hip + 1, hip, sizeof(*hip) * amount);
2218+ hip->hi_inode = NULL;
4a4d8108 2219+ au_hn_init(hip);
1facf9fc 2220+ iinfo->ii_bend++;
2221+ if (unlikely(bend < 0))
2222+ iinfo->ii_bstart = 0;
2223+}
2224+
2225+static void au_br_do_add(struct super_block *sb, struct dentry *h_dentry,
2226+ struct au_branch *br, aufs_bindex_t bindex)
2227+{
2228+ struct dentry *root;
2229+ struct inode *root_inode;
2230+ aufs_bindex_t bend, amount;
2231+
2232+ root = sb->s_root;
2233+ root_inode = root->d_inode;
1facf9fc 2234+ bend = au_sbend(sb);
2235+ amount = bend + 1 - bindex;
53392da6 2236+ au_sbilist_lock();
1facf9fc 2237+ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
2238+ au_br_do_add_hdp(au_di(root), bindex, bend, amount);
2239+ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
2240+ au_set_h_dptr(root, bindex, dget(h_dentry));
2241+ au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode),
2242+ /*flags*/0);
53392da6 2243+ au_sbilist_unlock();
1facf9fc 2244+}
2245+
2246+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
2247+{
2248+ int err;
1facf9fc 2249+ aufs_bindex_t bend, add_bindex;
2250+ struct dentry *root, *h_dentry;
2251+ struct inode *root_inode;
2252+ struct au_branch *add_branch;
2253+
2254+ root = sb->s_root;
2255+ root_inode = root->d_inode;
2256+ IMustLock(root_inode);
2257+ err = test_add(sb, add, remount);
2258+ if (unlikely(err < 0))
2259+ goto out;
2260+ if (err) {
2261+ err = 0;
2262+ goto out; /* success */
2263+ }
2264+
2265+ bend = au_sbend(sb);
2266+ add_branch = au_br_alloc(sb, bend + 2, add->perm);
2267+ err = PTR_ERR(add_branch);
2268+ if (IS_ERR(add_branch))
2269+ goto out;
2270+
2271+ err = au_br_init(add_branch, sb, add);
2272+ if (unlikely(err)) {
2273+ au_br_do_free(add_branch);
2274+ goto out;
2275+ }
2276+
2277+ add_bindex = add->bindex;
2278+ h_dentry = add->path.dentry;
2279+ if (!remount)
2280+ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2281+ else {
2282+ sysaufs_brs_del(sb, add_bindex);
2283+ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2284+ sysaufs_brs_add(sb, add_bindex);
2285+ }
2286+
1308ab2a 2287+ if (!add_bindex) {
1facf9fc 2288+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 2289+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
2290+ } else
1facf9fc 2291+ au_add_nlink(root_inode, h_dentry->d_inode);
1facf9fc 2292+
2293+ /*
4a4d8108 2294+ * this test/set prevents aufs from handling unnecesary notify events
027c5e7a 2295+ * of xino files, in case of re-adding a writable branch which was
1facf9fc 2296+ * once detached from aufs.
2297+ */
2298+ if (au_xino_brid(sb) < 0
2299+ && au_br_writable(add_branch->br_perm)
2300+ && !au_test_fs_bad_xino(h_dentry->d_sb)
2301+ && add_branch->br_xino.xi_file
2302+ && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry)
2303+ au_xino_brid_set(sb, add_branch->br_id);
2304+
4f0767ce 2305+out:
1facf9fc 2306+ return err;
2307+}
2308+
2309+/* ---------------------------------------------------------------------- */
2310+
2311+/*
2312+ * delete a branch
2313+ */
2314+
2315+/* to show the line number, do not make it inlined function */
4a4d8108 2316+#define AuVerbose(do_info, fmt, ...) do { \
1facf9fc 2317+ if (do_info) \
4a4d8108 2318+ pr_info(fmt, ##__VA_ARGS__); \
1facf9fc 2319+} while (0)
2320+
027c5e7a
AM
2321+static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
2322+ aufs_bindex_t bend)
2323+{
2324+ return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
2325+}
2326+
2327+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
2328+ aufs_bindex_t bend)
2329+{
2330+ return au_test_ibusy(dentry->d_inode, bstart, bend);
2331+}
2332+
1facf9fc 2333+/*
2334+ * test if the branch is deletable or not.
2335+ */
2336+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
b752ccd1 2337+ unsigned int sigen, const unsigned int verbose)
1facf9fc 2338+{
2339+ int err, i, j, ndentry;
2340+ aufs_bindex_t bstart, bend;
1facf9fc 2341+ struct au_dcsub_pages dpages;
2342+ struct au_dpage *dpage;
2343+ struct dentry *d;
1facf9fc 2344+
2345+ err = au_dpages_init(&dpages, GFP_NOFS);
2346+ if (unlikely(err))
2347+ goto out;
2348+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
2349+ if (unlikely(err))
2350+ goto out_dpages;
2351+
1facf9fc 2352+ for (i = 0; !err && i < dpages.ndpage; i++) {
2353+ dpage = dpages.dpages + i;
2354+ ndentry = dpage->ndentry;
2355+ for (j = 0; !err && j < ndentry; j++) {
2356+ d = dpage->dentries[j];
027c5e7a
AM
2357+ AuDebugOn(!d->d_count);
2358+ if (!au_digen_test(d, sigen)) {
1facf9fc 2359+ di_read_lock_child(d, AuLock_IR);
027c5e7a
AM
2360+ if (unlikely(au_dbrange_test(d))) {
2361+ di_read_unlock(d, AuLock_IR);
2362+ continue;
2363+ }
2364+ } else {
1facf9fc 2365+ di_write_lock_child(d);
027c5e7a
AM
2366+ if (unlikely(au_dbrange_test(d))) {
2367+ di_write_unlock(d);
2368+ continue;
2369+ }
1facf9fc 2370+ err = au_reval_dpath(d, sigen);
2371+ if (!err)
2372+ di_downgrade_lock(d, AuLock_IR);
2373+ else {
2374+ di_write_unlock(d);
2375+ break;
2376+ }
2377+ }
2378+
027c5e7a 2379+ /* AuDbgDentry(d); */
1facf9fc 2380+ bstart = au_dbstart(d);
2381+ bend = au_dbend(d);
2382+ if (bstart <= bindex
2383+ && bindex <= bend
2384+ && au_h_dptr(d, bindex)
027c5e7a 2385+ && au_test_dbusy(d, bstart, bend)) {
1facf9fc 2386+ err = -EBUSY;
2387+ AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d));
027c5e7a 2388+ AuDbgDentry(d);
1facf9fc 2389+ }
2390+ di_read_unlock(d, AuLock_IR);
2391+ }
2392+ }
2393+
4f0767ce 2394+out_dpages:
1facf9fc 2395+ au_dpages_free(&dpages);
4f0767ce 2396+out:
1facf9fc 2397+ return err;
2398+}
2399+
2400+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
b752ccd1 2401+ unsigned int sigen, const unsigned int verbose)
1facf9fc 2402+{
2403+ int err;
7f207e10
AM
2404+ unsigned long long max, ull;
2405+ struct inode *i, **array;
1facf9fc 2406+ aufs_bindex_t bstart, bend;
1facf9fc 2407+
7f207e10
AM
2408+ array = au_iarray_alloc(sb, &max);
2409+ err = PTR_ERR(array);
2410+ if (IS_ERR(array))
2411+ goto out;
2412+
1facf9fc 2413+ err = 0;
7f207e10
AM
2414+ AuDbg("b%d\n", bindex);
2415+ for (ull = 0; !err && ull < max; ull++) {
2416+ i = array[ull];
2417+ if (i->i_ino == AUFS_ROOT_INO)
1facf9fc 2418+ continue;
2419+
7f207e10 2420+ /* AuDbgInode(i); */
1facf9fc 2421+ if (au_iigen(i) == sigen)
2422+ ii_read_lock_child(i);
2423+ else {
2424+ ii_write_lock_child(i);
027c5e7a
AM
2425+ err = au_refresh_hinode_self(i);
2426+ au_iigen_dec(i);
1facf9fc 2427+ if (!err)
2428+ ii_downgrade_lock(i);
2429+ else {
2430+ ii_write_unlock(i);
2431+ break;
2432+ }
2433+ }
2434+
2435+ bstart = au_ibstart(i);
2436+ bend = au_ibend(i);
2437+ if (bstart <= bindex
2438+ && bindex <= bend
2439+ && au_h_iptr(i, bindex)
027c5e7a 2440+ && au_test_ibusy(i, bstart, bend)) {
1facf9fc 2441+ err = -EBUSY;
2442+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
7f207e10 2443+ AuDbgInode(i);
1facf9fc 2444+ }
2445+ ii_read_unlock(i);
2446+ }
7f207e10 2447+ au_iarray_free(array, max);
1facf9fc 2448+
7f207e10 2449+out:
1facf9fc 2450+ return err;
2451+}
2452+
b752ccd1
AM
2453+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
2454+ const unsigned int verbose)
1facf9fc 2455+{
2456+ int err;
2457+ unsigned int sigen;
2458+
2459+ sigen = au_sigen(root->d_sb);
2460+ DiMustNoWaiters(root);
2461+ IiMustNoWaiters(root->d_inode);
2462+ di_write_unlock(root);
b752ccd1 2463+ err = test_dentry_busy(root, bindex, sigen, verbose);
1facf9fc 2464+ if (!err)
b752ccd1 2465+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
1facf9fc 2466+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
2467+
2468+ return err;
2469+}
2470+
2471+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
2472+ const aufs_bindex_t bindex,
2473+ const aufs_bindex_t bend)
2474+{
2475+ struct au_branch **brp, **p;
2476+
dece6358
AM
2477+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2478+
1facf9fc 2479+ brp = sbinfo->si_branch + bindex;
2480+ if (bindex < bend)
2481+ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
2482+ sbinfo->si_branch[0 + bend] = NULL;
2483+ sbinfo->si_bend--;
2484+
53392da6 2485+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2486+ if (p)
2487+ sbinfo->si_branch = p;
4a4d8108 2488+ /* harmless error */
1facf9fc 2489+}
2490+
2491+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
2492+ const aufs_bindex_t bend)
2493+{
2494+ struct au_hdentry *hdp, *p;
2495+
1308ab2a 2496+ AuRwMustWriteLock(&dinfo->di_rwsem);
2497+
4a4d8108 2498+ hdp = dinfo->di_hdentry;
1facf9fc 2499+ if (bindex < bend)
4a4d8108
AM
2500+ memmove(hdp + bindex, hdp + bindex + 1,
2501+ sizeof(*hdp) * (bend - bindex));
2502+ hdp[0 + bend].hd_dentry = NULL;
1facf9fc 2503+ dinfo->di_bend--;
2504+
53392da6 2505+ p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2506+ if (p)
2507+ dinfo->di_hdentry = p;
4a4d8108 2508+ /* harmless error */
1facf9fc 2509+}
2510+
2511+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
2512+ const aufs_bindex_t bend)
2513+{
2514+ struct au_hinode *hip, *p;
2515+
1308ab2a 2516+ AuRwMustWriteLock(&iinfo->ii_rwsem);
2517+
1facf9fc 2518+ hip = iinfo->ii_hinode + bindex;
2519+ if (bindex < bend)
2520+ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
2521+ iinfo->ii_hinode[0 + bend].hi_inode = NULL;
4a4d8108 2522+ au_hn_init(iinfo->ii_hinode + bend);
1facf9fc 2523+ iinfo->ii_bend--;
2524+
53392da6 2525+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2526+ if (p)
2527+ iinfo->ii_hinode = p;
4a4d8108 2528+ /* harmless error */
1facf9fc 2529+}
2530+
2531+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
2532+ struct au_branch *br)
2533+{
2534+ aufs_bindex_t bend;
2535+ struct au_sbinfo *sbinfo;
53392da6
AM
2536+ struct dentry *root, *h_root;
2537+ struct inode *inode, *h_inode;
2538+ struct au_hinode *hinode;
1facf9fc 2539+
dece6358
AM
2540+ SiMustWriteLock(sb);
2541+
1facf9fc 2542+ root = sb->s_root;
2543+ inode = root->d_inode;
1facf9fc 2544+ sbinfo = au_sbi(sb);
2545+ bend = sbinfo->si_bend;
2546+
53392da6
AM
2547+ h_root = au_h_dptr(root, bindex);
2548+ hinode = au_hi(inode, bindex);
2549+ h_inode = au_igrab(hinode->hi_inode);
2550+ au_hiput(hinode);
1facf9fc 2551+
53392da6 2552+ au_sbilist_lock();
1facf9fc 2553+ au_br_do_del_brp(sbinfo, bindex, bend);
2554+ au_br_do_del_hdp(au_di(root), bindex, bend);
2555+ au_br_do_del_hip(au_ii(inode), bindex, bend);
53392da6
AM
2556+ au_sbilist_unlock();
2557+
2558+ dput(h_root);
2559+ iput(h_inode);
2560+ au_br_do_free(br);
1facf9fc 2561+}
2562+
2563+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
2564+{
2565+ int err, rerr, i;
2566+ unsigned int mnt_flags;
2567+ aufs_bindex_t bindex, bend, br_id;
2568+ unsigned char do_wh, verbose;
2569+ struct au_branch *br;
2570+ struct au_wbr *wbr;
2571+
2572+ err = 0;
2573+ bindex = au_find_dbindex(sb->s_root, del->h_path.dentry);
2574+ if (bindex < 0) {
2575+ if (remount)
2576+ goto out; /* success */
2577+ err = -ENOENT;
4a4d8108 2578+ pr_err("%s no such branch\n", del->pathname);
1facf9fc 2579+ goto out;
2580+ }
2581+ AuDbg("bindex b%d\n", bindex);
2582+
2583+ err = -EBUSY;
2584+ mnt_flags = au_mntflags(sb);
2585+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
2586+ bend = au_sbend(sb);
2587+ if (unlikely(!bend)) {
2588+ AuVerbose(verbose, "no more branches left\n");
2589+ goto out;
2590+ }
2591+ br = au_sbr(sb, bindex);
2592+ i = atomic_read(&br->br_count);
2593+ if (unlikely(i)) {
2594+ AuVerbose(verbose, "%d file(s) opened\n", i);
e49829fe 2595+ goto out;
1facf9fc 2596+ }
2597+
2598+ wbr = br->br_wbr;
2599+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
2600+ if (do_wh) {
1308ab2a 2601+ /* instead of WbrWhMustWriteLock(wbr) */
2602+ SiMustWriteLock(sb);
1facf9fc 2603+ for (i = 0; i < AuBrWh_Last; i++) {
2604+ dput(wbr->wbr_wh[i]);
2605+ wbr->wbr_wh[i] = NULL;
2606+ }
2607+ }
2608+
b752ccd1 2609+ err = test_children_busy(sb->s_root, bindex, verbose);
1facf9fc 2610+ if (unlikely(err)) {
2611+ if (do_wh)
2612+ goto out_wh;
2613+ goto out;
2614+ }
2615+
2616+ err = 0;
2617+ br_id = br->br_id;
2618+ if (!remount)
2619+ au_br_do_del(sb, bindex, br);
2620+ else {
2621+ sysaufs_brs_del(sb, bindex);
2622+ au_br_do_del(sb, bindex, br);
2623+ sysaufs_brs_add(sb, bindex);
2624+ }
2625+
1308ab2a 2626+ if (!bindex) {
1facf9fc 2627+ au_cpup_attr_all(sb->s_root->d_inode, /*force*/1);
1308ab2a 2628+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
2629+ } else
1facf9fc 2630+ au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode);
2631+ if (au_opt_test(mnt_flags, PLINK))
2632+ au_plink_half_refresh(sb, br_id);
2633+
b752ccd1 2634+ if (au_xino_brid(sb) == br_id)
1facf9fc 2635+ au_xino_brid_set(sb, -1);
2636+ goto out; /* success */
2637+
4f0767ce 2638+out_wh:
1facf9fc 2639+ /* revert */
2640+ rerr = au_br_init_wh(sb, br, br->br_perm, del->h_path.dentry);
2641+ if (rerr)
4a4d8108
AM
2642+ pr_warning("failed re-creating base whiteout, %s. (%d)\n",
2643+ del->pathname, rerr);
4f0767ce 2644+out:
1facf9fc 2645+ return err;
2646+}
2647+
2648+/* ---------------------------------------------------------------------- */
2649+
027c5e7a
AM
2650+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
2651+{
2652+ int err;
2653+ aufs_bindex_t bstart, bend;
2654+ struct aufs_ibusy ibusy;
2655+ struct inode *inode, *h_inode;
2656+
2657+ err = -EPERM;
2658+ if (unlikely(!capable(CAP_SYS_ADMIN)))
2659+ goto out;
2660+
2661+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
2662+ if (!err)
2663+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
2664+ if (unlikely(err)) {
2665+ err = -EFAULT;
2666+ AuTraceErr(err);
2667+ goto out;
2668+ }
2669+
2670+ err = -EINVAL;
2671+ si_read_lock(sb, AuLock_FLUSH);
2672+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
2673+ goto out_unlock;
2674+
2675+ err = 0;
2676+ ibusy.h_ino = 0; /* invalid */
2677+ inode = ilookup(sb, ibusy.ino);
2678+ if (!inode
2679+ || inode->i_ino == AUFS_ROOT_INO
2680+ || is_bad_inode(inode))
2681+ goto out_unlock;
2682+
2683+ ii_read_lock_child(inode);
2684+ bstart = au_ibstart(inode);
2685+ bend = au_ibend(inode);
2686+ if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
2687+ h_inode = au_h_iptr(inode, ibusy.bindex);
2688+ if (h_inode && au_test_ibusy(inode, bstart, bend))
2689+ ibusy.h_ino = h_inode->i_ino;
2690+ }
2691+ ii_read_unlock(inode);
2692+ iput(inode);
2693+
2694+out_unlock:
2695+ si_read_unlock(sb);
2696+ if (!err) {
2697+ err = __put_user(ibusy.h_ino, &arg->h_ino);
2698+ if (unlikely(err)) {
2699+ err = -EFAULT;
2700+ AuTraceErr(err);
2701+ }
2702+ }
2703+out:
2704+ return err;
2705+}
2706+
2707+long au_ibusy_ioctl(struct file *file, unsigned long arg)
2708+{
2709+ return au_ibusy(file->f_dentry->d_sb, (void __user *)arg);
2710+}
2711+
2712+#ifdef CONFIG_COMPAT
2713+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
2714+{
2715+ return au_ibusy(file->f_dentry->d_sb, compat_ptr(arg));
2716+}
2717+#endif
2718+
2719+/* ---------------------------------------------------------------------- */
2720+
1facf9fc 2721+/*
2722+ * change a branch permission
2723+ */
2724+
dece6358
AM
2725+static void au_warn_ima(void)
2726+{
2727+#ifdef CONFIG_IMA
1308ab2a 2728+ /* since it doesn't support mark_files_ro() */
027c5e7a 2729+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
dece6358
AM
2730+#endif
2731+}
2732+
1facf9fc 2733+static int do_need_sigen_inc(int a, int b)
2734+{
2735+ return au_br_whable(a) && !au_br_whable(b);
2736+}
2737+
2738+static int need_sigen_inc(int old, int new)
2739+{
2740+ return do_need_sigen_inc(old, new)
2741+ || do_need_sigen_inc(new, old);
2742+}
2743+
7f207e10
AM
2744+static unsigned long long au_farray_cb(void *a,
2745+ unsigned long long max __maybe_unused,
2746+ void *arg)
2747+{
2748+ unsigned long long n;
2749+ struct file **p, *f;
2750+ struct super_block *sb = arg;
2751+
2752+ n = 0;
2753+ p = a;
2754+ lg_global_lock(files_lglock);
2755+ do_file_list_for_each_entry(sb, f) {
2756+ if (au_fi(f)
027c5e7a 2757+ && file_count(f)
7f207e10
AM
2758+ && !special_file(f->f_dentry->d_inode->i_mode)) {
2759+ get_file(f);
2760+ *p++ = f;
2761+ n++;
2762+ AuDebugOn(n > max);
2763+ }
2764+ } while_file_list_for_each_entry;
2765+ lg_global_unlock(files_lglock);
2766+
2767+ return n;
2768+}
2769+
2770+static struct file **au_farray_alloc(struct super_block *sb,
2771+ unsigned long long *max)
2772+{
2773+ *max = atomic_long_read(&au_sbi(sb)->si_nfiles);
2774+ return au_array_alloc(max, au_farray_cb, sb);
2775+}
2776+
2777+static void au_farray_free(struct file **a, unsigned long long max)
2778+{
2779+ unsigned long long ull;
2780+
2781+ for (ull = 0; ull < max; ull++)
2782+ if (a[ull])
2783+ fput(a[ull]);
2784+ au_array_free(a);
2785+}
2786+
1facf9fc 2787+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
2788+{
7f207e10 2789+ int err, do_warn;
027c5e7a 2790+ unsigned int mnt_flags;
7f207e10 2791+ unsigned long long ull, max;
e49829fe 2792+ aufs_bindex_t br_id;
027c5e7a 2793+ unsigned char verbose;
7f207e10 2794+ struct file *file, *hf, **array;
e49829fe
JR
2795+ struct inode *inode;
2796+ struct au_hfile *hfile;
1facf9fc 2797+
027c5e7a
AM
2798+ mnt_flags = au_mntflags(sb);
2799+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
2800+
7f207e10
AM
2801+ array = au_farray_alloc(sb, &max);
2802+ err = PTR_ERR(array);
2803+ if (IS_ERR(array))
1facf9fc 2804+ goto out;
2805+
7f207e10 2806+ do_warn = 0;
e49829fe 2807+ br_id = au_sbr_id(sb, bindex);
7f207e10
AM
2808+ for (ull = 0; ull < max; ull++) {
2809+ file = array[ull];
1facf9fc 2810+
7f207e10 2811+ /* AuDbg("%.*s\n", AuDLNPair(file->f_dentry)); */
1facf9fc 2812+ fi_read_lock(file);
2813+ if (unlikely(au_test_mmapped(file))) {
2814+ err = -EBUSY;
027c5e7a
AM
2815+ AuVerbose(verbose, "mmapped %.*s\n",
2816+ AuDLNPair(file->f_dentry));
7f207e10 2817+ AuDbgFile(file);
1facf9fc 2818+ FiMustNoWaiters(file);
2819+ fi_read_unlock(file);
7f207e10 2820+ goto out_array;
1facf9fc 2821+ }
2822+
027c5e7a 2823+ inode = file->f_dentry->d_inode;
e49829fe
JR
2824+ hfile = &au_fi(file)->fi_htop;
2825+ hf = hfile->hf_file;
2826+ if (!S_ISREG(inode->i_mode)
1facf9fc 2827+ || !(file->f_mode & FMODE_WRITE)
e49829fe 2828+ || hfile->hf_br->br_id != br_id
7f207e10
AM
2829+ || !(hf->f_mode & FMODE_WRITE))
2830+ array[ull] = NULL;
2831+ else {
2832+ do_warn = 1;
2833+ get_file(file);
1facf9fc 2834+ }
2835+
1facf9fc 2836+ FiMustNoWaiters(file);
2837+ fi_read_unlock(file);
7f207e10
AM
2838+ fput(file);
2839+ }
1facf9fc 2840+
2841+ err = 0;
7f207e10 2842+ if (do_warn)
dece6358 2843+ au_warn_ima();
7f207e10
AM
2844+
2845+ for (ull = 0; ull < max; ull++) {
2846+ file = array[ull];
2847+ if (!file)
2848+ continue;
2849+
1facf9fc 2850+ /* todo: already flushed? */
2851+ /* cf. fs/super.c:mark_files_ro() */
7f207e10
AM
2852+ /* fi_read_lock(file); */
2853+ hfile = &au_fi(file)->fi_htop;
2854+ hf = hfile->hf_file;
2855+ /* fi_read_unlock(file); */
027c5e7a 2856+ spin_lock(&hf->f_lock);
1facf9fc 2857+ hf->f_mode &= ~FMODE_WRITE;
027c5e7a 2858+ spin_unlock(&hf->f_lock);
1facf9fc 2859+ if (!file_check_writeable(hf)) {
2860+ file_release_write(hf);
2861+ mnt_drop_write(hf->f_vfsmnt);
2862+ }
2863+ }
2864+
7f207e10
AM
2865+out_array:
2866+ au_farray_free(array, max);
4f0767ce 2867+out:
7f207e10 2868+ AuTraceErr(err);
1facf9fc 2869+ return err;
2870+}
2871+
2872+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 2873+ int *do_refresh)
1facf9fc 2874+{
2875+ int err, rerr;
2876+ aufs_bindex_t bindex;
1308ab2a 2877+ struct path path;
1facf9fc 2878+ struct dentry *root;
2879+ struct au_branch *br;
2880+
2881+ root = sb->s_root;
1facf9fc 2882+ bindex = au_find_dbindex(root, mod->h_root);
2883+ if (bindex < 0) {
2884+ if (remount)
2885+ return 0; /* success */
2886+ err = -ENOENT;
4a4d8108 2887+ pr_err("%s no such branch\n", mod->path);
1facf9fc 2888+ goto out;
2889+ }
2890+ AuDbg("bindex b%d\n", bindex);
2891+
2892+ err = test_br(mod->h_root->d_inode, mod->perm, mod->path);
2893+ if (unlikely(err))
2894+ goto out;
2895+
2896+ br = au_sbr(sb, bindex);
2897+ if (br->br_perm == mod->perm)
2898+ return 0; /* success */
2899+
2900+ if (au_br_writable(br->br_perm)) {
2901+ /* remove whiteout base */
2902+ err = au_br_init_wh(sb, br, mod->perm, mod->h_root);
2903+ if (unlikely(err))
2904+ goto out;
2905+
2906+ if (!au_br_writable(mod->perm)) {
2907+ /* rw --> ro, file might be mmapped */
2908+ DiMustNoWaiters(root);
2909+ IiMustNoWaiters(root->d_inode);
2910+ di_write_unlock(root);
2911+ err = au_br_mod_files_ro(sb, bindex);
2912+ /* aufs_write_lock() calls ..._child() */
2913+ di_write_lock_child(root);
2914+
2915+ if (unlikely(err)) {
2916+ rerr = -ENOMEM;
2917+ br->br_wbr = kmalloc(sizeof(*br->br_wbr),
2918+ GFP_NOFS);
1308ab2a 2919+ if (br->br_wbr) {
2920+ path.mnt = br->br_mnt;
2921+ path.dentry = mod->h_root;
2922+ rerr = au_wbr_init(br, sb, br->br_perm,
2923+ &path);
2924+ }
1facf9fc 2925+ if (unlikely(rerr)) {
2926+ AuIOErr("nested error %d (%d)\n",
2927+ rerr, err);
2928+ br->br_perm = mod->perm;
2929+ }
2930+ }
2931+ }
2932+ } else if (au_br_writable(mod->perm)) {
2933+ /* ro --> rw */
2934+ err = -ENOMEM;
2935+ br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
2936+ if (br->br_wbr) {
1308ab2a 2937+ path.mnt = br->br_mnt;
2938+ path.dentry = mod->h_root;
1facf9fc 2939+ err = au_wbr_init(br, sb, mod->perm, &path);
2940+ if (unlikely(err)) {
2941+ kfree(br->br_wbr);
2942+ br->br_wbr = NULL;
2943+ }
2944+ }
2945+ }
2946+
2947+ if (!err) {
7f207e10 2948+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
1facf9fc 2949+ br->br_perm = mod->perm;
2950+ }
2951+
4f0767ce 2952+out:
7f207e10 2953+ AuTraceErr(err);
1facf9fc 2954+ return err;
2955+}
7f207e10
AM
2956diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
2957--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
2958+++ linux/fs/aufs/branch.h 2011-10-24 20:51:51.580466925 +0200
2959@@ -0,0 +1,232 @@
1facf9fc 2960+/*
027c5e7a 2961+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 2962+ *
2963+ * This program, aufs is free software; you can redistribute it and/or modify
2964+ * it under the terms of the GNU General Public License as published by
2965+ * the Free Software Foundation; either version 2 of the License, or
2966+ * (at your option) any later version.
dece6358
AM
2967+ *
2968+ * This program is distributed in the hope that it will be useful,
2969+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2970+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2971+ * GNU General Public License for more details.
2972+ *
2973+ * You should have received a copy of the GNU General Public License
2974+ * along with this program; if not, write to the Free Software
2975+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 2976+ */
2977+
2978+/*
2979+ * branch filesystems and xino for them
2980+ */
2981+
2982+#ifndef __AUFS_BRANCH_H__
2983+#define __AUFS_BRANCH_H__
2984+
2985+#ifdef __KERNEL__
2986+
2987+#include <linux/fs.h>
2988+#include <linux/mount.h>
1facf9fc 2989+#include <linux/aufs_type.h>
4a4d8108 2990+#include "dynop.h"
1facf9fc 2991+#include "rwsem.h"
2992+#include "super.h"
2993+
2994+/* ---------------------------------------------------------------------- */
2995+
2996+/* a xino file */
2997+struct au_xino_file {
2998+ struct file *xi_file;
2999+ struct mutex xi_nondir_mtx;
3000+
3001+ /* todo: make xino files an array to support huge inode number */
3002+
3003+#ifdef CONFIG_DEBUG_FS
3004+ struct dentry *xi_dbgaufs;
3005+#endif
3006+};
3007+
3008+/* members for writable branch only */
3009+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
3010+struct au_wbr {
dece6358 3011+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 3012+ struct dentry *wbr_wh[AuBrWh_Last];
4a4d8108 3013+ atomic_t wbr_wh_running;
1facf9fc 3014+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
3015+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
3016+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
3017+
3018+ /* mfs mode */
3019+ unsigned long long wbr_bytes;
3020+};
3021+
4a4d8108
AM
3022+/* ext2 has 3 types of operations at least, ext3 has 4 */
3023+#define AuBrDynOp (AuDyLast * 4)
3024+
1facf9fc 3025+/* protected by superblock rwsem */
3026+struct au_branch {
3027+ struct au_xino_file br_xino;
3028+
3029+ aufs_bindex_t br_id;
3030+
3031+ int br_perm;
3032+ struct vfsmount *br_mnt;
4a4d8108
AM
3033+ spinlock_t br_dykey_lock;
3034+ struct au_dykey *br_dykey[AuBrDynOp];
1facf9fc 3035+ atomic_t br_count;
3036+
3037+ struct au_wbr *br_wbr;
3038+
3039+ /* xino truncation */
3040+ blkcnt_t br_xino_upper; /* watermark in blocks */
3041+ atomic_t br_xino_running;
3042+
027c5e7a
AM
3043+#ifdef CONFIG_AUFS_HFSNOTIFY
3044+ struct fsnotify_group *br_hfsn_group;
3045+ struct fsnotify_ops br_hfsn_ops;
3046+#endif
3047+
1facf9fc 3048+#ifdef CONFIG_SYSFS
3049+ /* an entry under sysfs per mount-point */
3050+ char br_name[8];
3051+ struct attribute br_attr;
3052+#endif
3053+};
3054+
3055+/* ---------------------------------------------------------------------- */
3056+
1e00d052
AM
3057+/* branch permissions and attributes */
3058+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */
3059+#define AuBrPerm_RO (1 << 1) /* readonly */
3060+#define AuBrPerm_RR (1 << 2) /* natively readonly */
3061+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
1facf9fc 3062+
1e00d052 3063+#define AuBrRAttr_WH (1 << 3) /* whiteout-able */
1facf9fc 3064+
1e00d052 3065+#define AuBrWAttr_NoLinkWH (1 << 4) /* un-hardlinkable whiteouts */
1facf9fc 3066+
3067+static inline int au_br_writable(int brperm)
3068+{
1e00d052 3069+ return brperm & AuBrPerm_RW;
1facf9fc 3070+}
3071+
3072+static inline int au_br_whable(int brperm)
3073+{
1e00d052
AM
3074+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
3075+}
3076+
3077+static inline int au_br_wh_linkable(int brperm)
3078+{
3079+ return !(brperm & AuBrWAttr_NoLinkWH);
1facf9fc 3080+}
3081+
3082+static inline int au_br_rdonly(struct au_branch *br)
3083+{
3084+ return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY)
3085+ || !au_br_writable(br->br_perm))
3086+ ? -EROFS : 0;
3087+}
3088+
4a4d8108 3089+static inline int au_br_hnotifyable(int brperm __maybe_unused)
1facf9fc 3090+{
4a4d8108 3091+#ifdef CONFIG_AUFS_HNOTIFY
1e00d052 3092+ return !(brperm & AuBrPerm_RR);
1facf9fc 3093+#else
3094+ return 0;
3095+#endif
3096+}
3097+
3098+/* ---------------------------------------------------------------------- */
3099+
3100+/* branch.c */
3101+struct au_sbinfo;
3102+void au_br_free(struct au_sbinfo *sinfo);
3103+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
3104+struct au_opt_add;
3105+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
3106+struct au_opt_del;
3107+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
027c5e7a
AM
3108+long au_ibusy_ioctl(struct file *file, unsigned long arg);
3109+#ifdef CONFIG_COMPAT
3110+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
3111+#endif
1facf9fc 3112+struct au_opt_mod;
3113+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 3114+ int *do_refresh);
1facf9fc 3115+
3116+/* xino.c */
3117+static const loff_t au_loff_max = LLONG_MAX;
3118+
3119+int au_xib_trunc(struct super_block *sb);
3120+ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
3121+ loff_t *pos);
3122+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
3123+ loff_t *pos);
3124+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
3125+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
3126+ino_t au_xino_new_ino(struct super_block *sb);
b752ccd1 3127+void au_xino_delete_inode(struct inode *inode, const int unlinked);
1facf9fc 3128+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
3129+ ino_t ino);
3130+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
3131+ ino_t *ino);
3132+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
3133+ struct file *base_file, int do_test);
3134+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
3135+
3136+struct au_opt_xino;
3137+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
3138+void au_xino_clr(struct super_block *sb);
3139+struct file *au_xino_def(struct super_block *sb);
3140+int au_xino_path(struct seq_file *seq, struct file *file);
3141+
3142+/* ---------------------------------------------------------------------- */
3143+
3144+/* Superblock to branch */
3145+static inline
3146+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
3147+{
3148+ return au_sbr(sb, bindex)->br_id;
3149+}
3150+
3151+static inline
3152+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
3153+{
3154+ return au_sbr(sb, bindex)->br_mnt;
3155+}
3156+
3157+static inline
3158+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
3159+{
3160+ return au_sbr_mnt(sb, bindex)->mnt_sb;
3161+}
3162+
3163+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
3164+{
e49829fe 3165+ atomic_dec(&au_sbr(sb, bindex)->br_count);
1facf9fc 3166+}
3167+
3168+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
3169+{
3170+ return au_sbr(sb, bindex)->br_perm;
3171+}
3172+
3173+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
3174+{
3175+ return au_br_whable(au_sbr_perm(sb, bindex));
3176+}
3177+
3178+/* ---------------------------------------------------------------------- */
3179+
3180+/*
3181+ * wbr_wh_read_lock, wbr_wh_write_lock
3182+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
3183+ */
3184+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
3185+
dece6358
AM
3186+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
3187+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
3188+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
3189+
1facf9fc 3190+#endif /* __KERNEL__ */
3191+#endif /* __AUFS_BRANCH_H__ */
7f207e10
AM
3192diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
3193--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
53392da6 3194+++ linux/fs/aufs/conf.mk 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 3195@@ -0,0 +1,38 @@
4a4d8108
AM
3196+
3197+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
3198+
3199+define AuConf
3200+ifdef ${1}
3201+AuConfStr += ${1}=${${1}}
3202+endif
3203+endef
3204+
b752ccd1 3205+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
e49829fe 3206+ SBILIST \
7f207e10 3207+ HNOTIFY HFSNOTIFY \
4a4d8108
AM
3208+ EXPORT INO_T_64 \
3209+ RDU \
2cbb1c4b 3210+ PROC_MAP \
4a4d8108
AM
3211+ SP_IATTR \
3212+ SHWH \
3213+ BR_RAMFS \
3214+ BR_FUSE POLL \
3215+ BR_HFSPLUS \
3216+ BDEV_LOOP \
b752ccd1
AM
3217+ DEBUG MAGIC_SYSRQ
3218+$(foreach i, ${AuConfAll}, \
4a4d8108
AM
3219+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
3220+
3221+AuConfName = ${obj}/conf.str
3222+${AuConfName}.tmp: FORCE
3223+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
3224+${AuConfName}: ${AuConfName}.tmp
3225+ @diff -q $< $@ > /dev/null 2>&1 || { \
3226+ echo ' GEN ' $@; \
3227+ cp -p $< $@; \
3228+ }
3229+FORCE:
3230+clean-files += ${AuConfName} ${AuConfName}.tmp
3231+${obj}/sysfs.o: ${AuConfName}
b752ccd1
AM
3232+
3233+-include ${srctree}/${src}/conf_priv.mk
7f207e10
AM
3234diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
3235--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
3236+++ linux/fs/aufs/cpup.c 2011-08-24 13:30:24.731313534 +0200
3237@@ -0,0 +1,1080 @@
1facf9fc 3238+/*
027c5e7a 3239+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 3240+ *
3241+ * This program, aufs is free software; you can redistribute it and/or modify
3242+ * it under the terms of the GNU General Public License as published by
3243+ * the Free Software Foundation; either version 2 of the License, or
3244+ * (at your option) any later version.
dece6358
AM
3245+ *
3246+ * This program is distributed in the hope that it will be useful,
3247+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3248+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3249+ * GNU General Public License for more details.
3250+ *
3251+ * You should have received a copy of the GNU General Public License
3252+ * along with this program; if not, write to the Free Software
3253+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 3254+ */
3255+
3256+/*
3257+ * copy-up functions, see wbr_policy.c for copy-down
3258+ */
3259+
dece6358 3260+#include <linux/file.h>
1facf9fc 3261+#include <linux/fs_stack.h>
dece6358 3262+#include <linux/mm.h>
1facf9fc 3263+#include <linux/uaccess.h>
3264+#include "aufs.h"
3265+
3266+void au_cpup_attr_flags(struct inode *dst, struct inode *src)
3267+{
3268+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
3269+ | S_NOATIME | S_NOCMTIME;
3270+
3271+ dst->i_flags |= src->i_flags & ~mask;
3272+ if (au_test_fs_notime(dst->i_sb))
3273+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
3274+}
3275+
3276+void au_cpup_attr_timesizes(struct inode *inode)
3277+{
3278+ struct inode *h_inode;
3279+
3280+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3281+ fsstack_copy_attr_times(inode, h_inode);
4a4d8108 3282+ fsstack_copy_inode_size(inode, h_inode);
1facf9fc 3283+}
3284+
3285+void au_cpup_attr_nlink(struct inode *inode, int force)
3286+{
3287+ struct inode *h_inode;
3288+ struct super_block *sb;
3289+ aufs_bindex_t bindex, bend;
3290+
3291+ sb = inode->i_sb;
3292+ bindex = au_ibstart(inode);
3293+ h_inode = au_h_iptr(inode, bindex);
3294+ if (!force
3295+ && !S_ISDIR(h_inode->i_mode)
3296+ && au_opt_test(au_mntflags(sb), PLINK)
3297+ && au_plink_test(inode))
3298+ return;
3299+
3300+ inode->i_nlink = h_inode->i_nlink;
3301+
3302+ /*
3303+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
3304+ * it may includes whplink directory.
3305+ */
3306+ if (S_ISDIR(h_inode->i_mode)) {
3307+ bend = au_ibend(inode);
3308+ for (bindex++; bindex <= bend; bindex++) {
3309+ h_inode = au_h_iptr(inode, bindex);
3310+ if (h_inode)
3311+ au_add_nlink(inode, h_inode);
3312+ }
3313+ }
3314+}
3315+
3316+void au_cpup_attr_changeable(struct inode *inode)
3317+{
3318+ struct inode *h_inode;
3319+
3320+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3321+ inode->i_mode = h_inode->i_mode;
3322+ inode->i_uid = h_inode->i_uid;
3323+ inode->i_gid = h_inode->i_gid;
3324+ au_cpup_attr_timesizes(inode);
3325+ au_cpup_attr_flags(inode, h_inode);
3326+}
3327+
3328+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
3329+{
3330+ struct au_iinfo *iinfo = au_ii(inode);
3331+
1308ab2a 3332+ IiMustWriteLock(inode);
3333+
1facf9fc 3334+ iinfo->ii_higen = h_inode->i_generation;
3335+ iinfo->ii_hsb1 = h_inode->i_sb;
3336+}
3337+
3338+void au_cpup_attr_all(struct inode *inode, int force)
3339+{
3340+ struct inode *h_inode;
3341+
3342+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3343+ au_cpup_attr_changeable(inode);
3344+ if (inode->i_nlink > 0)
3345+ au_cpup_attr_nlink(inode, force);
3346+ inode->i_rdev = h_inode->i_rdev;
3347+ inode->i_blkbits = h_inode->i_blkbits;
3348+ au_cpup_igen(inode, h_inode);
3349+}
3350+
3351+/* ---------------------------------------------------------------------- */
3352+
3353+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
3354+
3355+/* keep the timestamps of the parent dir when cpup */
3356+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
3357+ struct path *h_path)
3358+{
3359+ struct inode *h_inode;
3360+
3361+ dt->dt_dentry = dentry;
3362+ dt->dt_h_path = *h_path;
3363+ h_inode = h_path->dentry->d_inode;
3364+ dt->dt_atime = h_inode->i_atime;
3365+ dt->dt_mtime = h_inode->i_mtime;
3366+ /* smp_mb(); */
3367+}
3368+
3369+void au_dtime_revert(struct au_dtime *dt)
3370+{
3371+ struct iattr attr;
3372+ int err;
3373+
3374+ attr.ia_atime = dt->dt_atime;
3375+ attr.ia_mtime = dt->dt_mtime;
3376+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
3377+ | ATTR_ATIME | ATTR_ATIME_SET;
3378+
3379+ err = vfsub_notify_change(&dt->dt_h_path, &attr);
3380+ if (unlikely(err))
4a4d8108 3381+ pr_warning("restoring timestamps failed(%d). ignored\n", err);
1facf9fc 3382+}
3383+
3384+/* ---------------------------------------------------------------------- */
3385+
3386+static noinline_for_stack
3387+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src)
3388+{
3389+ int err, sbits;
3390+ struct iattr ia;
3391+ struct path h_path;
1308ab2a 3392+ struct inode *h_isrc, *h_idst;
1facf9fc 3393+
3394+ h_path.dentry = au_h_dptr(dst, bindex);
1308ab2a 3395+ h_idst = h_path.dentry->d_inode;
1facf9fc 3396+ h_path.mnt = au_sbr_mnt(dst->d_sb, bindex);
3397+ h_isrc = h_src->d_inode;
1308ab2a 3398+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 3399+ | ATTR_ATIME | ATTR_MTIME
3400+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
1facf9fc 3401+ ia.ia_uid = h_isrc->i_uid;
3402+ ia.ia_gid = h_isrc->i_gid;
3403+ ia.ia_atime = h_isrc->i_atime;
3404+ ia.ia_mtime = h_isrc->i_mtime;
1308ab2a 3405+ if (h_idst->i_mode != h_isrc->i_mode
3406+ && !S_ISLNK(h_idst->i_mode)) {
3407+ ia.ia_valid |= ATTR_MODE;
3408+ ia.ia_mode = h_isrc->i_mode;
3409+ }
3410+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
3411+ au_cpup_attr_flags(h_idst, h_isrc);
1facf9fc 3412+ err = vfsub_notify_change(&h_path, &ia);
3413+
3414+ /* is this nfs only? */
3415+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
3416+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
3417+ ia.ia_mode = h_isrc->i_mode;
3418+ err = vfsub_notify_change(&h_path, &ia);
3419+ }
3420+
3421+ return err;
3422+}
3423+
3424+/* ---------------------------------------------------------------------- */
3425+
3426+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
3427+ char *buf, unsigned long blksize)
3428+{
3429+ int err;
3430+ size_t sz, rbytes, wbytes;
3431+ unsigned char all_zero;
3432+ char *p, *zp;
3433+ struct mutex *h_mtx;
3434+ /* reduce stack usage */
3435+ struct iattr *ia;
3436+
3437+ zp = page_address(ZERO_PAGE(0));
3438+ if (unlikely(!zp))
3439+ return -ENOMEM; /* possible? */
3440+
3441+ err = 0;
3442+ all_zero = 0;
3443+ while (len) {
3444+ AuDbg("len %lld\n", len);
3445+ sz = blksize;
3446+ if (len < blksize)
3447+ sz = len;
3448+
3449+ rbytes = 0;
3450+ /* todo: signal_pending? */
3451+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
3452+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
3453+ err = rbytes;
3454+ }
3455+ if (unlikely(err < 0))
3456+ break;
3457+
3458+ all_zero = 0;
3459+ if (len >= rbytes && rbytes == blksize)
3460+ all_zero = !memcmp(buf, zp, rbytes);
3461+ if (!all_zero) {
3462+ wbytes = rbytes;
3463+ p = buf;
3464+ while (wbytes) {
3465+ size_t b;
3466+
3467+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
3468+ err = b;
3469+ /* todo: signal_pending? */
3470+ if (unlikely(err == -EAGAIN || err == -EINTR))
3471+ continue;
3472+ if (unlikely(err < 0))
3473+ break;
3474+ wbytes -= b;
3475+ p += b;
3476+ }
3477+ } else {
3478+ loff_t res;
3479+
3480+ AuLabel(hole);
3481+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
3482+ err = res;
3483+ if (unlikely(res < 0))
3484+ break;
3485+ }
3486+ len -= rbytes;
3487+ err = 0;
3488+ }
3489+
3490+ /* the last block may be a hole */
3491+ if (!err && all_zero) {
3492+ AuLabel(last hole);
3493+
3494+ err = 1;
3495+ if (au_test_nfs(dst->f_dentry->d_sb)) {
3496+ /* nfs requires this step to make last hole */
3497+ /* is this only nfs? */
3498+ do {
3499+ /* todo: signal_pending? */
3500+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
3501+ } while (err == -EAGAIN || err == -EINTR);
3502+ if (err == 1)
3503+ dst->f_pos--;
3504+ }
3505+
3506+ if (err == 1) {
3507+ ia = (void *)buf;
3508+ ia->ia_size = dst->f_pos;
3509+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
3510+ ia->ia_file = dst;
3511+ h_mtx = &dst->f_dentry->d_inode->i_mutex;
3512+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
3513+ err = vfsub_notify_change(&dst->f_path, ia);
3514+ mutex_unlock(h_mtx);
3515+ }
3516+ }
3517+
3518+ return err;
3519+}
3520+
3521+int au_copy_file(struct file *dst, struct file *src, loff_t len)
3522+{
3523+ int err;
3524+ unsigned long blksize;
3525+ unsigned char do_kfree;
3526+ char *buf;
3527+
3528+ err = -ENOMEM;
3529+ blksize = dst->f_dentry->d_sb->s_blocksize;
3530+ if (!blksize || PAGE_SIZE < blksize)
3531+ blksize = PAGE_SIZE;
3532+ AuDbg("blksize %lu\n", blksize);
3533+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
3534+ if (do_kfree)
3535+ buf = kmalloc(blksize, GFP_NOFS);
3536+ else
3537+ buf = (void *)__get_free_page(GFP_NOFS);
3538+ if (unlikely(!buf))
3539+ goto out;
3540+
3541+ if (len > (1 << 22))
3542+ AuDbg("copying a large file %lld\n", (long long)len);
3543+
3544+ src->f_pos = 0;
3545+ dst->f_pos = 0;
3546+ err = au_do_copy_file(dst, src, len, buf, blksize);
3547+ if (do_kfree)
3548+ kfree(buf);
3549+ else
3550+ free_page((unsigned long)buf);
3551+
4f0767ce 3552+out:
1facf9fc 3553+ return err;
3554+}
3555+
3556+/*
3557+ * to support a sparse file which is opened with O_APPEND,
3558+ * we need to close the file.
3559+ */
3560+static int au_cp_regular(struct dentry *dentry, aufs_bindex_t bdst,
4a4d8108 3561+ aufs_bindex_t bsrc, loff_t len)
1facf9fc 3562+{
3563+ int err, i;
3564+ enum { SRC, DST };
3565+ struct {
3566+ aufs_bindex_t bindex;
3567+ unsigned int flags;
3568+ struct dentry *dentry;
3569+ struct file *file;
3570+ void *label, *label_file;
3571+ } *f, file[] = {
3572+ {
3573+ .bindex = bsrc,
3574+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
3575+ .file = NULL,
3576+ .label = &&out,
3577+ .label_file = &&out_src
3578+ },
3579+ {
3580+ .bindex = bdst,
3581+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
3582+ .file = NULL,
3583+ .label = &&out_src,
3584+ .label_file = &&out_dst
3585+ }
3586+ };
3587+ struct super_block *sb;
3588+
3589+ /* bsrc branch can be ro/rw. */
3590+ sb = dentry->d_sb;
3591+ f = file;
3592+ for (i = 0; i < 2; i++, f++) {
3593+ f->dentry = au_h_dptr(dentry, f->bindex);
3594+ f->file = au_h_open(dentry, f->bindex, f->flags, /*file*/NULL);
3595+ err = PTR_ERR(f->file);
3596+ if (IS_ERR(f->file))
3597+ goto *f->label;
3598+ err = -EINVAL;
3599+ if (unlikely(!f->file->f_op))
3600+ goto *f->label_file;
3601+ }
3602+
3603+ /* try stopping to update while we copyup */
3604+ IMustLock(file[SRC].dentry->d_inode);
3605+ err = au_copy_file(file[DST].file, file[SRC].file, len);
3606+
4f0767ce 3607+out_dst:
1facf9fc 3608+ fput(file[DST].file);
3609+ au_sbr_put(sb, file[DST].bindex);
4f0767ce 3610+out_src:
1facf9fc 3611+ fput(file[SRC].file);
3612+ au_sbr_put(sb, file[SRC].bindex);
4f0767ce 3613+out:
1facf9fc 3614+ return err;
3615+}
3616+
3617+static int au_do_cpup_regular(struct dentry *dentry, aufs_bindex_t bdst,
3618+ aufs_bindex_t bsrc, loff_t len,
3619+ struct inode *h_dir, struct path *h_path)
3620+{
3621+ int err, rerr;
3622+ loff_t l;
3623+
3624+ err = 0;
3625+ l = i_size_read(au_h_iptr(dentry->d_inode, bsrc));
3626+ if (len == -1 || l < len)
3627+ len = l;
3628+ if (len)
3629+ err = au_cp_regular(dentry, bdst, bsrc, len);
3630+ if (!err)
3631+ goto out; /* success */
3632+
3633+ rerr = vfsub_unlink(h_dir, h_path, /*force*/0);
3634+ if (rerr) {
3635+ AuIOErr("failed unlinking cpup-ed %.*s(%d, %d)\n",
3636+ AuDLNPair(h_path->dentry), err, rerr);
3637+ err = -EIO;
3638+ }
3639+
4f0767ce 3640+out:
1facf9fc 3641+ return err;
3642+}
3643+
3644+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
3645+ struct inode *h_dir)
3646+{
3647+ int err, symlen;
3648+ mm_segment_t old_fs;
b752ccd1
AM
3649+ union {
3650+ char *k;
3651+ char __user *u;
3652+ } sym;
1facf9fc 3653+
3654+ err = -ENOSYS;
3655+ if (unlikely(!h_src->d_inode->i_op->readlink))
3656+ goto out;
3657+
3658+ err = -ENOMEM;
b752ccd1
AM
3659+ sym.k = __getname_gfp(GFP_NOFS);
3660+ if (unlikely(!sym.k))
1facf9fc 3661+ goto out;
3662+
3663+ old_fs = get_fs();
3664+ set_fs(KERNEL_DS);
b752ccd1 3665+ symlen = h_src->d_inode->i_op->readlink(h_src, sym.u, PATH_MAX);
1facf9fc 3666+ err = symlen;
3667+ set_fs(old_fs);
3668+
3669+ if (symlen > 0) {
b752ccd1
AM
3670+ sym.k[symlen] = 0;
3671+ err = vfsub_symlink(h_dir, h_path, sym.k);
1facf9fc 3672+ }
b752ccd1 3673+ __putname(sym.k);
1facf9fc 3674+
4f0767ce 3675+out:
1facf9fc 3676+ return err;
3677+}
3678+
3679+/* return with the lower dst inode is locked */
3680+static noinline_for_stack
3681+int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst,
3682+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3683+ struct dentry *dst_parent)
3684+{
3685+ int err;
3686+ umode_t mode;
3687+ unsigned int mnt_flags;
3688+ unsigned char isdir;
3689+ const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME);
3690+ struct au_dtime dt;
3691+ struct path h_path;
3692+ struct dentry *h_src, *h_dst, *h_parent;
3693+ struct inode *h_inode, *h_dir;
3694+ struct super_block *sb;
3695+
3696+ /* bsrc branch can be ro/rw. */
3697+ h_src = au_h_dptr(dentry, bsrc);
3698+ h_inode = h_src->d_inode;
3699+ AuDebugOn(h_inode != au_h_iptr(dentry->d_inode, bsrc));
3700+
3701+ /* try stopping to be referenced while we are creating */
3702+ h_dst = au_h_dptr(dentry, bdst);
3703+ h_parent = h_dst->d_parent; /* dir inode is locked */
3704+ h_dir = h_parent->d_inode;
3705+ IMustLock(h_dir);
3706+ AuDebugOn(h_parent != h_dst->d_parent);
3707+
3708+ sb = dentry->d_sb;
3709+ h_path.mnt = au_sbr_mnt(sb, bdst);
3710+ if (do_dt) {
3711+ h_path.dentry = h_parent;
3712+ au_dtime_store(&dt, dst_parent, &h_path);
3713+ }
3714+ h_path.dentry = h_dst;
3715+
3716+ isdir = 0;
3717+ mode = h_inode->i_mode;
3718+ switch (mode & S_IFMT) {
3719+ case S_IFREG:
3720+ /* try stopping to update while we are referencing */
3721+ IMustLock(h_inode);
3722+ err = vfsub_create(h_dir, &h_path, mode | S_IWUSR);
3723+ if (!err)
3724+ err = au_do_cpup_regular
3725+ (dentry, bdst, bsrc, len,
3726+ au_h_iptr(dst_parent->d_inode, bdst), &h_path);
3727+ break;
3728+ case S_IFDIR:
3729+ isdir = 1;
3730+ err = vfsub_mkdir(h_dir, &h_path, mode);
3731+ if (!err) {
3732+ /*
3733+ * strange behaviour from the users view,
3734+ * particularry setattr case
3735+ */
3736+ if (au_ibstart(dst_parent->d_inode) == bdst)
3737+ au_cpup_attr_nlink(dst_parent->d_inode,
3738+ /*force*/1);
3739+ au_cpup_attr_nlink(dentry->d_inode, /*force*/1);
3740+ }
3741+ break;
3742+ case S_IFLNK:
3743+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
3744+ break;
3745+ case S_IFCHR:
3746+ case S_IFBLK:
3747+ AuDebugOn(!capable(CAP_MKNOD));
3748+ /*FALLTHROUGH*/
3749+ case S_IFIFO:
3750+ case S_IFSOCK:
3751+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
3752+ break;
3753+ default:
3754+ AuIOErr("Unknown inode type 0%o\n", mode);
3755+ err = -EIO;
3756+ }
3757+
3758+ mnt_flags = au_mntflags(sb);
3759+ if (!au_opt_test(mnt_flags, UDBA_NONE)
3760+ && !isdir
3761+ && au_opt_test(mnt_flags, XINO)
3762+ && h_inode->i_nlink == 1
3763+ /* todo: unnecessary? */
3764+ /* && dentry->d_inode->i_nlink == 1 */
3765+ && bdst < bsrc
3766+ && !au_ftest_cpup(flags, KEEPLINO))
1308ab2a 3767+ au_xino_write(sb, bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 3768+ /* ignore this error */
3769+
3770+ if (do_dt)
3771+ au_dtime_revert(&dt);
3772+ return err;
3773+}
3774+
3775+/*
3776+ * copyup the @dentry from @bsrc to @bdst.
3777+ * the caller must set the both of lower dentries.
3778+ * @len is for truncating when it is -1 copyup the entire file.
3779+ * in link/rename cases, @dst_parent may be different from the real one.
3780+ */
3781+static int au_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3782+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3783+ struct dentry *dst_parent)
3784+{
3785+ int err, rerr;
3786+ aufs_bindex_t old_ibstart;
3787+ unsigned char isdir, plink;
3788+ struct au_dtime dt;
3789+ struct path h_path;
3790+ struct dentry *h_src, *h_dst, *h_parent;
3791+ struct inode *dst_inode, *h_dir, *inode;
3792+ struct super_block *sb;
3793+
3794+ AuDebugOn(bsrc <= bdst);
3795+
3796+ sb = dentry->d_sb;
3797+ h_path.mnt = au_sbr_mnt(sb, bdst);
3798+ h_dst = au_h_dptr(dentry, bdst);
3799+ h_parent = h_dst->d_parent; /* dir inode is locked */
3800+ h_dir = h_parent->d_inode;
3801+ IMustLock(h_dir);
3802+
3803+ h_src = au_h_dptr(dentry, bsrc);
3804+ inode = dentry->d_inode;
3805+
3806+ if (!dst_parent)
3807+ dst_parent = dget_parent(dentry);
3808+ else
3809+ dget(dst_parent);
3810+
3811+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
3812+ dst_inode = au_h_iptr(inode, bdst);
3813+ if (dst_inode) {
3814+ if (unlikely(!plink)) {
3815+ err = -EIO;
027c5e7a
AM
3816+ AuIOErr("hi%lu(i%lu) exists on b%d "
3817+ "but plink is disabled\n",
3818+ dst_inode->i_ino, inode->i_ino, bdst);
1facf9fc 3819+ goto out;
3820+ }
3821+
3822+ if (dst_inode->i_nlink) {
3823+ const int do_dt = au_ftest_cpup(flags, DTIME);
3824+
3825+ h_src = au_plink_lkup(inode, bdst);
3826+ err = PTR_ERR(h_src);
3827+ if (IS_ERR(h_src))
3828+ goto out;
3829+ if (unlikely(!h_src->d_inode)) {
3830+ err = -EIO;
3831+ AuIOErr("i%lu exists on a upper branch "
027c5e7a
AM
3832+ "but not pseudo-linked\n",
3833+ inode->i_ino);
1facf9fc 3834+ dput(h_src);
3835+ goto out;
3836+ }
3837+
3838+ if (do_dt) {
3839+ h_path.dentry = h_parent;
3840+ au_dtime_store(&dt, dst_parent, &h_path);
3841+ }
3842+ h_path.dentry = h_dst;
3843+ err = vfsub_link(h_src, h_dir, &h_path);
3844+ if (do_dt)
3845+ au_dtime_revert(&dt);
3846+ dput(h_src);
3847+ goto out;
3848+ } else
3849+ /* todo: cpup_wh_file? */
3850+ /* udba work */
4a4d8108 3851+ au_update_ibrange(inode, /*do_put_zero*/1);
1facf9fc 3852+ }
3853+
3854+ old_ibstart = au_ibstart(inode);
3855+ err = cpup_entry(dentry, bdst, bsrc, len, flags, dst_parent);
3856+ if (unlikely(err))
3857+ goto out;
3858+ dst_inode = h_dst->d_inode;
3859+ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
3860+
3861+ err = cpup_iattr(dentry, bdst, h_src);
3862+ isdir = S_ISDIR(dst_inode->i_mode);
3863+ if (!err) {
4a4d8108
AM
3864+ if (bdst < old_ibstart) {
3865+ if (S_ISREG(inode->i_mode)) {
3866+ err = au_dy_iaop(inode, bdst, dst_inode);
3867+ if (unlikely(err))
3868+ goto out_rev;
3869+ }
1facf9fc 3870+ au_set_ibstart(inode, bdst);
4a4d8108 3871+ }
1facf9fc 3872+ au_set_h_iptr(inode, bdst, au_igrab(dst_inode),
3873+ au_hi_flags(inode, isdir));
3874+ mutex_unlock(&dst_inode->i_mutex);
3875+ if (!isdir
3876+ && h_src->d_inode->i_nlink > 1
3877+ && plink)
3878+ au_plink_append(inode, bdst, h_dst);
3879+ goto out; /* success */
3880+ }
3881+
3882+ /* revert */
4a4d8108 3883+out_rev:
1facf9fc 3884+ h_path.dentry = h_parent;
3885+ mutex_unlock(&dst_inode->i_mutex);
3886+ au_dtime_store(&dt, dst_parent, &h_path);
3887+ h_path.dentry = h_dst;
3888+ if (!isdir)
3889+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
3890+ else
3891+ rerr = vfsub_rmdir(h_dir, &h_path);
3892+ au_dtime_revert(&dt);
3893+ if (rerr) {
3894+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
3895+ err = -EIO;
3896+ }
3897+
4f0767ce 3898+out:
1facf9fc 3899+ dput(dst_parent);
3900+ return err;
3901+}
3902+
3903+struct au_cpup_single_args {
3904+ int *errp;
3905+ struct dentry *dentry;
3906+ aufs_bindex_t bdst, bsrc;
3907+ loff_t len;
3908+ unsigned int flags;
3909+ struct dentry *dst_parent;
3910+};
3911+
3912+static void au_call_cpup_single(void *args)
3913+{
3914+ struct au_cpup_single_args *a = args;
3915+ *a->errp = au_cpup_single(a->dentry, a->bdst, a->bsrc, a->len,
3916+ a->flags, a->dst_parent);
3917+}
3918+
53392da6
AM
3919+/*
3920+ * prevent SIGXFSZ in copy-up.
3921+ * testing CAP_MKNOD is for generic fs,
3922+ * but CAP_FSETID is for xfs only, currently.
3923+ */
3924+static int au_cpup_sio_test(struct super_block *sb, umode_t mode)
3925+{
3926+ int do_sio;
3927+
3928+ do_sio = 0;
3929+ if (!au_wkq_test()
3930+ && (!au_sbi(sb)->si_plink_maint_pid
3931+ || au_plink_maint(sb, AuLock_NOPLM))) {
3932+ switch (mode & S_IFMT) {
3933+ case S_IFREG:
3934+ /* no condition about RLIMIT_FSIZE and the file size */
3935+ do_sio = 1;
3936+ break;
3937+ case S_IFCHR:
3938+ case S_IFBLK:
3939+ do_sio = !capable(CAP_MKNOD);
3940+ break;
3941+ }
3942+ if (!do_sio)
3943+ do_sio = ((mode & (S_ISUID | S_ISGID))
3944+ && !capable(CAP_FSETID));
3945+ }
3946+
3947+ return do_sio;
3948+}
3949+
1facf9fc 3950+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3951+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3952+ struct dentry *dst_parent)
3953+{
3954+ int err, wkq_err;
1facf9fc 3955+ struct dentry *h_dentry;
3956+
3957+ h_dentry = au_h_dptr(dentry, bsrc);
53392da6 3958+ if (!au_cpup_sio_test(dentry->d_sb, h_dentry->d_inode->i_mode))
1facf9fc 3959+ err = au_cpup_single(dentry, bdst, bsrc, len, flags,
3960+ dst_parent);
3961+ else {
3962+ struct au_cpup_single_args args = {
3963+ .errp = &err,
3964+ .dentry = dentry,
3965+ .bdst = bdst,
3966+ .bsrc = bsrc,
3967+ .len = len,
3968+ .flags = flags,
3969+ .dst_parent = dst_parent
3970+ };
3971+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
3972+ if (unlikely(wkq_err))
3973+ err = wkq_err;
3974+ }
3975+
3976+ return err;
3977+}
3978+
3979+/*
3980+ * copyup the @dentry from the first active lower branch to @bdst,
3981+ * using au_cpup_single().
3982+ */
3983+static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3984+ unsigned int flags)
3985+{
3986+ int err;
3987+ aufs_bindex_t bsrc, bend;
3988+
3989+ bend = au_dbend(dentry);
3990+ for (bsrc = bdst + 1; bsrc <= bend; bsrc++)
3991+ if (au_h_dptr(dentry, bsrc))
3992+ break;
3993+
3994+ err = au_lkup_neg(dentry, bdst);
3995+ if (!err) {
3996+ err = au_cpup_single(dentry, bdst, bsrc, len, flags, NULL);
3997+ if (!err)
3998+ return 0; /* success */
3999+
4000+ /* revert */
4001+ au_set_h_dptr(dentry, bdst, NULL);
4002+ au_set_dbstart(dentry, bsrc);
4003+ }
4004+
4005+ return err;
4006+}
4007+
4008+struct au_cpup_simple_args {
4009+ int *errp;
4010+ struct dentry *dentry;
4011+ aufs_bindex_t bdst;
4012+ loff_t len;
4013+ unsigned int flags;
4014+};
4015+
4016+static void au_call_cpup_simple(void *args)
4017+{
4018+ struct au_cpup_simple_args *a = args;
4019+ *a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags);
4020+}
4021+
4022+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4023+ unsigned int flags)
4024+{
4025+ int err, wkq_err;
1facf9fc 4026+ struct dentry *parent;
4027+ struct inode *h_dir;
4028+
4029+ parent = dget_parent(dentry);
4030+ h_dir = au_h_iptr(parent->d_inode, bdst);
53392da6
AM
4031+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
4032+ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
1facf9fc 4033+ err = au_cpup_simple(dentry, bdst, len, flags);
4034+ else {
4035+ struct au_cpup_simple_args args = {
4036+ .errp = &err,
4037+ .dentry = dentry,
4038+ .bdst = bdst,
4039+ .len = len,
4040+ .flags = flags
4041+ };
4042+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
4043+ if (unlikely(wkq_err))
4044+ err = wkq_err;
4045+ }
4046+
4047+ dput(parent);
4048+ return err;
4049+}
4050+
4051+/* ---------------------------------------------------------------------- */
4052+
4053+/*
4054+ * copyup the deleted file for writing.
4055+ */
4056+static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst,
4057+ struct dentry *wh_dentry, struct file *file,
4058+ loff_t len)
4059+{
4060+ int err;
4061+ aufs_bindex_t bstart;
4062+ struct au_dinfo *dinfo;
4063+ struct dentry *h_d_dst, *h_d_start;
4a4d8108 4064+ struct au_hdentry *hdp;
1facf9fc 4065+
4066+ dinfo = au_di(dentry);
1308ab2a 4067+ AuRwMustWriteLock(&dinfo->di_rwsem);
4068+
1facf9fc 4069+ bstart = dinfo->di_bstart;
4a4d8108
AM
4070+ hdp = dinfo->di_hdentry;
4071+ h_d_dst = hdp[0 + bdst].hd_dentry;
1facf9fc 4072+ dinfo->di_bstart = bdst;
4a4d8108 4073+ hdp[0 + bdst].hd_dentry = wh_dentry;
027c5e7a
AM
4074+ if (file) {
4075+ h_d_start = hdp[0 + bstart].hd_dentry;
4a4d8108 4076+ hdp[0 + bstart].hd_dentry = au_hf_top(file)->f_dentry;
027c5e7a 4077+ }
1facf9fc 4078+ err = au_cpup_single(dentry, bdst, bstart, len, !AuCpup_DTIME,
4079+ /*h_parent*/NULL);
027c5e7a
AM
4080+ if (file) {
4081+ if (!err)
4082+ err = au_reopen_nondir(file);
4a4d8108 4083+ hdp[0 + bstart].hd_dentry = h_d_start;
1facf9fc 4084+ }
4a4d8108 4085+ hdp[0 + bdst].hd_dentry = h_d_dst;
1facf9fc 4086+ dinfo->di_bstart = bstart;
4087+
4088+ return err;
4089+}
4090+
4091+static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4092+ struct file *file)
4093+{
4094+ int err;
4095+ struct au_dtime dt;
4096+ struct dentry *parent, *h_parent, *wh_dentry;
4097+ struct au_branch *br;
4098+ struct path h_path;
4099+
4100+ br = au_sbr(dentry->d_sb, bdst);
4101+ parent = dget_parent(dentry);
4102+ h_parent = au_h_dptr(parent, bdst);
4103+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
4104+ err = PTR_ERR(wh_dentry);
4105+ if (IS_ERR(wh_dentry))
4106+ goto out;
4107+
4108+ h_path.dentry = h_parent;
4109+ h_path.mnt = br->br_mnt;
4110+ au_dtime_store(&dt, parent, &h_path);
4111+ err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len);
4112+ if (unlikely(err))
4113+ goto out_wh;
4114+
4115+ dget(wh_dentry);
4116+ h_path.dentry = wh_dentry;
4a4d8108
AM
4117+ if (!S_ISDIR(wh_dentry->d_inode->i_mode))
4118+ err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0);
4119+ else
4120+ err = vfsub_rmdir(h_parent->d_inode, &h_path);
1facf9fc 4121+ if (unlikely(err)) {
4122+ AuIOErr("failed remove copied-up tmp file %.*s(%d)\n",
4123+ AuDLNPair(wh_dentry), err);
4124+ err = -EIO;
4125+ }
4126+ au_dtime_revert(&dt);
4127+ au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
4128+
4f0767ce 4129+out_wh:
1facf9fc 4130+ dput(wh_dentry);
4f0767ce 4131+out:
1facf9fc 4132+ dput(parent);
4133+ return err;
4134+}
4135+
4136+struct au_cpup_wh_args {
4137+ int *errp;
4138+ struct dentry *dentry;
4139+ aufs_bindex_t bdst;
4140+ loff_t len;
4141+ struct file *file;
4142+};
4143+
4144+static void au_call_cpup_wh(void *args)
4145+{
4146+ struct au_cpup_wh_args *a = args;
4147+ *a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file);
4148+}
4149+
4150+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4151+ struct file *file)
4152+{
4153+ int err, wkq_err;
4154+ struct dentry *parent, *h_orph, *h_parent, *h_dentry;
4155+ struct inode *dir, *h_dir, *h_tmpdir, *h_inode;
4156+ struct au_wbr *wbr;
4157+
4158+ parent = dget_parent(dentry);
4159+ dir = parent->d_inode;
4160+ h_orph = NULL;
4161+ h_parent = NULL;
4162+ h_dir = au_igrab(au_h_iptr(dir, bdst));
4163+ h_tmpdir = h_dir;
4164+ if (!h_dir->i_nlink) {
4165+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
4166+ h_orph = wbr->wbr_orph;
4167+
4168+ h_parent = dget(au_h_dptr(parent, bdst));
1facf9fc 4169+ au_set_h_dptr(parent, bdst, dget(h_orph));
4170+ h_tmpdir = h_orph->d_inode;
1facf9fc 4171+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
4172+
4173+ /* this temporary unlock is safe */
4174+ if (file)
4a4d8108 4175+ h_dentry = au_hf_top(file)->f_dentry;
1facf9fc 4176+ else
4177+ h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
4178+ h_inode = h_dentry->d_inode;
4179+ IMustLock(h_inode);
4180+ mutex_unlock(&h_inode->i_mutex);
dece6358 4181+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
1facf9fc 4182+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
4a4d8108 4183+ /* todo: au_h_open_pre()? */
1facf9fc 4184+ }
4185+
53392da6
AM
4186+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
4187+ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
1facf9fc 4188+ err = au_cpup_wh(dentry, bdst, len, file);
4189+ else {
4190+ struct au_cpup_wh_args args = {
4191+ .errp = &err,
4192+ .dentry = dentry,
4193+ .bdst = bdst,
4194+ .len = len,
4195+ .file = file
4196+ };
4197+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
4198+ if (unlikely(wkq_err))
4199+ err = wkq_err;
4200+ }
4201+
4202+ if (h_orph) {
4203+ mutex_unlock(&h_tmpdir->i_mutex);
4a4d8108 4204+ /* todo: au_h_open_post()? */
1facf9fc 4205+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1facf9fc 4206+ au_set_h_dptr(parent, bdst, h_parent);
4207+ }
4208+ iput(h_dir);
4209+ dput(parent);
4210+
4211+ return err;
4212+}
4213+
4214+/* ---------------------------------------------------------------------- */
4215+
4216+/*
4217+ * generic routine for both of copy-up and copy-down.
4218+ */
4219+/* cf. revalidate function in file.c */
4220+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
4221+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
4222+ struct dentry *h_parent, void *arg),
4223+ void *arg)
4224+{
4225+ int err;
4226+ struct au_pin pin;
4227+ struct dentry *d, *parent, *h_parent, *real_parent;
4228+
4229+ err = 0;
4230+ parent = dget_parent(dentry);
4231+ if (IS_ROOT(parent))
4232+ goto out;
4233+
4234+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
4235+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
4236+
4237+ /* do not use au_dpage */
4238+ real_parent = parent;
4239+ while (1) {
4240+ dput(parent);
4241+ parent = dget_parent(dentry);
4242+ h_parent = au_h_dptr(parent, bdst);
4243+ if (h_parent)
4244+ goto out; /* success */
4245+
4246+ /* find top dir which is necessary to cpup */
4247+ do {
4248+ d = parent;
4249+ dput(parent);
4250+ parent = dget_parent(d);
4251+ di_read_lock_parent3(parent, !AuLock_IR);
4252+ h_parent = au_h_dptr(parent, bdst);
4253+ di_read_unlock(parent, !AuLock_IR);
4254+ } while (!h_parent);
4255+
4256+ if (d != real_parent)
4257+ di_write_lock_child3(d);
4258+
4259+ /* somebody else might create while we were sleeping */
4260+ if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
4261+ if (au_h_dptr(d, bdst))
4262+ au_update_dbstart(d);
4263+
4264+ au_pin_set_dentry(&pin, d);
4265+ err = au_do_pin(&pin);
4266+ if (!err) {
4267+ err = cp(d, bdst, h_parent, arg);
4268+ au_unpin(&pin);
4269+ }
4270+ }
4271+
4272+ if (d != real_parent)
4273+ di_write_unlock(d);
4274+ if (unlikely(err))
4275+ break;
4276+ }
4277+
4f0767ce 4278+out:
1facf9fc 4279+ dput(parent);
4280+ return err;
4281+}
4282+
4283+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
4284+ struct dentry *h_parent __maybe_unused ,
4285+ void *arg __maybe_unused)
4286+{
4287+ return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME);
4288+}
4289+
4290+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
4291+{
4292+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
4293+}
4294+
4295+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
4296+{
4297+ int err;
4298+ struct dentry *parent;
4299+ struct inode *dir;
4300+
4301+ parent = dget_parent(dentry);
4302+ dir = parent->d_inode;
4303+ err = 0;
4304+ if (au_h_iptr(dir, bdst))
4305+ goto out;
4306+
4307+ di_read_unlock(parent, AuLock_IR);
4308+ di_write_lock_parent(parent);
4309+ /* someone else might change our inode while we were sleeping */
4310+ if (!au_h_iptr(dir, bdst))
4311+ err = au_cpup_dirs(dentry, bdst);
4312+ di_downgrade_lock(parent, AuLock_IR);
4313+
4f0767ce 4314+out:
1facf9fc 4315+ dput(parent);
4316+ return err;
4317+}
7f207e10
AM
4318diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
4319--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
53392da6 4320+++ linux/fs/aufs/cpup.h 2011-08-24 13:30:24.731313534 +0200
7f207e10 4321@@ -0,0 +1,83 @@
1facf9fc 4322+/*
027c5e7a 4323+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4324+ *
4325+ * This program, aufs is free software; you can redistribute it and/or modify
4326+ * it under the terms of the GNU General Public License as published by
4327+ * the Free Software Foundation; either version 2 of the License, or
4328+ * (at your option) any later version.
dece6358
AM
4329+ *
4330+ * This program is distributed in the hope that it will be useful,
4331+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4332+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4333+ * GNU General Public License for more details.
4334+ *
4335+ * You should have received a copy of the GNU General Public License
4336+ * along with this program; if not, write to the Free Software
4337+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4338+ */
4339+
4340+/*
4341+ * copy-up/down functions
4342+ */
4343+
4344+#ifndef __AUFS_CPUP_H__
4345+#define __AUFS_CPUP_H__
4346+
4347+#ifdef __KERNEL__
4348+
dece6358
AM
4349+#include <linux/path.h>
4350+#include <linux/time.h>
1facf9fc 4351+#include <linux/aufs_type.h>
4352+
dece6358
AM
4353+struct inode;
4354+struct file;
4355+
1facf9fc 4356+void au_cpup_attr_flags(struct inode *dst, struct inode *src);
4357+void au_cpup_attr_timesizes(struct inode *inode);
4358+void au_cpup_attr_nlink(struct inode *inode, int force);
4359+void au_cpup_attr_changeable(struct inode *inode);
4360+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
4361+void au_cpup_attr_all(struct inode *inode, int force);
4362+
4363+/* ---------------------------------------------------------------------- */
4364+
4365+/* cpup flags */
4366+#define AuCpup_DTIME 1 /* do dtime_store/revert */
4367+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
4368+ for link(2) */
4369+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
7f207e10
AM
4370+#define au_fset_cpup(flags, name) \
4371+ do { (flags) |= AuCpup_##name; } while (0)
4372+#define au_fclr_cpup(flags, name) \
4373+ do { (flags) &= ~AuCpup_##name; } while (0)
1facf9fc 4374+
4375+int au_copy_file(struct file *dst, struct file *src, loff_t len);
4376+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
4377+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
4378+ struct dentry *dst_parent);
4379+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4380+ unsigned int flags);
4381+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4382+ struct file *file);
4383+
4384+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
4385+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
4386+ struct dentry *h_parent, void *arg),
4387+ void *arg);
4388+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
4389+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
4390+
4391+/* ---------------------------------------------------------------------- */
4392+
4393+/* keep timestamps when copyup */
4394+struct au_dtime {
4395+ struct dentry *dt_dentry;
4396+ struct path dt_h_path;
4397+ struct timespec dt_atime, dt_mtime;
4398+};
4399+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4400+ struct path *h_path);
4401+void au_dtime_revert(struct au_dtime *dt);
4402+
4403+#endif /* __KERNEL__ */
4404+#endif /* __AUFS_CPUP_H__ */
7f207e10
AM
4405diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
4406--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
53392da6 4407+++ linux/fs/aufs/dbgaufs.c 2011-08-24 13:30:24.731313534 +0200
4a4d8108 4408@@ -0,0 +1,334 @@
1facf9fc 4409+/*
027c5e7a 4410+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4411+ *
4412+ * This program, aufs is free software; you can redistribute it and/or modify
4413+ * it under the terms of the GNU General Public License as published by
4414+ * the Free Software Foundation; either version 2 of the License, or
4415+ * (at your option) any later version.
dece6358
AM
4416+ *
4417+ * This program is distributed in the hope that it will be useful,
4418+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4419+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4420+ * GNU General Public License for more details.
4421+ *
4422+ * You should have received a copy of the GNU General Public License
4423+ * along with this program; if not, write to the Free Software
4424+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4425+ */
4426+
4427+/*
4428+ * debugfs interface
4429+ */
4430+
4431+#include <linux/debugfs.h>
4432+#include "aufs.h"
4433+
4434+#ifndef CONFIG_SYSFS
4435+#error DEBUG_FS depends upon SYSFS
4436+#endif
4437+
4438+static struct dentry *dbgaufs;
4439+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
4440+
4441+/* 20 is max digits length of ulong 64 */
4442+struct dbgaufs_arg {
4443+ int n;
4444+ char a[20 * 4];
4445+};
4446+
4447+/*
4448+ * common function for all XINO files
4449+ */
4450+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
4451+ struct file *file)
4452+{
4453+ kfree(file->private_data);
4454+ return 0;
4455+}
4456+
4457+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
4458+{
4459+ int err;
4460+ struct kstat st;
4461+ struct dbgaufs_arg *p;
4462+
4463+ err = -ENOMEM;
4464+ p = kmalloc(sizeof(*p), GFP_NOFS);
4465+ if (unlikely(!p))
4466+ goto out;
4467+
4468+ err = 0;
4469+ p->n = 0;
4470+ file->private_data = p;
4471+ if (!xf)
4472+ goto out;
4473+
4474+ err = vfs_getattr(xf->f_vfsmnt, xf->f_dentry, &st);
4475+ if (!err) {
4476+ if (do_fcnt)
4477+ p->n = snprintf
4478+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
4479+ (long)file_count(xf), st.blocks, st.blksize,
4480+ (long long)st.size);
4481+ else
4482+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
4483+ st.blocks, st.blksize,
4484+ (long long)st.size);
4485+ AuDebugOn(p->n >= sizeof(p->a));
4486+ } else {
4487+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
4488+ err = 0;
4489+ }
4490+
4f0767ce 4491+out:
1facf9fc 4492+ return err;
4493+
4494+}
4495+
4496+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
4497+ size_t count, loff_t *ppos)
4498+{
4499+ struct dbgaufs_arg *p;
4500+
4501+ p = file->private_data;
4502+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
4503+}
4504+
4505+/* ---------------------------------------------------------------------- */
4506+
4507+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
4508+{
4509+ int err;
4510+ struct au_sbinfo *sbinfo;
4511+ struct super_block *sb;
4512+
4513+ sbinfo = inode->i_private;
4514+ sb = sbinfo->si_sb;
4515+ si_noflush_read_lock(sb);
4516+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
4517+ si_read_unlock(sb);
4518+ return err;
4519+}
4520+
4521+static const struct file_operations dbgaufs_xib_fop = {
4a4d8108 4522+ .owner = THIS_MODULE,
1facf9fc 4523+ .open = dbgaufs_xib_open,
4524+ .release = dbgaufs_xi_release,
4525+ .read = dbgaufs_xi_read
4526+};
4527+
4528+/* ---------------------------------------------------------------------- */
4529+
4530+#define DbgaufsXi_PREFIX "xi"
4531+
4532+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
4533+{
4534+ int err;
4535+ long l;
4536+ struct au_sbinfo *sbinfo;
4537+ struct super_block *sb;
4538+ struct file *xf;
4539+ struct qstr *name;
4540+
4541+ err = -ENOENT;
4542+ xf = NULL;
4543+ name = &file->f_dentry->d_name;
4544+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
4545+ || memcmp(name->name, DbgaufsXi_PREFIX,
4546+ sizeof(DbgaufsXi_PREFIX) - 1)))
4547+ goto out;
4548+ err = strict_strtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
4549+ if (unlikely(err))
4550+ goto out;
4551+
4552+ sbinfo = inode->i_private;
4553+ sb = sbinfo->si_sb;
4554+ si_noflush_read_lock(sb);
4555+ if (l <= au_sbend(sb)) {
4556+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
4557+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
4558+ } else
4559+ err = -ENOENT;
4560+ si_read_unlock(sb);
4561+
4f0767ce 4562+out:
1facf9fc 4563+ return err;
4564+}
4565+
4566+static const struct file_operations dbgaufs_xino_fop = {
4a4d8108 4567+ .owner = THIS_MODULE,
1facf9fc 4568+ .open = dbgaufs_xino_open,
4569+ .release = dbgaufs_xi_release,
4570+ .read = dbgaufs_xi_read
4571+};
4572+
4573+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
4574+{
4575+ aufs_bindex_t bend;
4576+ struct au_branch *br;
4577+ struct au_xino_file *xi;
4578+
4579+ if (!au_sbi(sb)->si_dbgaufs)
4580+ return;
4581+
4582+ bend = au_sbend(sb);
4583+ for (; bindex <= bend; bindex++) {
4584+ br = au_sbr(sb, bindex);
4585+ xi = &br->br_xino;
4586+ if (xi->xi_dbgaufs) {
4587+ debugfs_remove(xi->xi_dbgaufs);
4588+ xi->xi_dbgaufs = NULL;
4589+ }
4590+ }
4591+}
4592+
4593+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
4594+{
4595+ struct au_sbinfo *sbinfo;
4596+ struct dentry *parent;
4597+ struct au_branch *br;
4598+ struct au_xino_file *xi;
4599+ aufs_bindex_t bend;
4600+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
4601+
4602+ sbinfo = au_sbi(sb);
4603+ parent = sbinfo->si_dbgaufs;
4604+ if (!parent)
4605+ return;
4606+
4607+ bend = au_sbend(sb);
4608+ for (; bindex <= bend; bindex++) {
4609+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
4610+ br = au_sbr(sb, bindex);
4611+ xi = &br->br_xino;
4612+ AuDebugOn(xi->xi_dbgaufs);
4613+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
4614+ sbinfo, &dbgaufs_xino_fop);
4615+ /* ignore an error */
4616+ if (unlikely(!xi->xi_dbgaufs))
4617+ AuWarn1("failed %s under debugfs\n", name);
4618+ }
4619+}
4620+
4621+/* ---------------------------------------------------------------------- */
4622+
4623+#ifdef CONFIG_AUFS_EXPORT
4624+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
4625+{
4626+ int err;
4627+ struct au_sbinfo *sbinfo;
4628+ struct super_block *sb;
4629+
4630+ sbinfo = inode->i_private;
4631+ sb = sbinfo->si_sb;
4632+ si_noflush_read_lock(sb);
4633+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
4634+ si_read_unlock(sb);
4635+ return err;
4636+}
4637+
4638+static const struct file_operations dbgaufs_xigen_fop = {
4a4d8108 4639+ .owner = THIS_MODULE,
1facf9fc 4640+ .open = dbgaufs_xigen_open,
4641+ .release = dbgaufs_xi_release,
4642+ .read = dbgaufs_xi_read
4643+};
4644+
4645+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4646+{
4647+ int err;
4648+
dece6358
AM
4649+ /*
4650+ * This function is a dynamic '__init' fucntion actually,
4651+ * so the tiny check for si_rwsem is unnecessary.
4652+ */
4653+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4654+
1facf9fc 4655+ err = -EIO;
4656+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
4657+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4658+ &dbgaufs_xigen_fop);
4659+ if (sbinfo->si_dbgaufs_xigen)
4660+ err = 0;
4661+
4662+ return err;
4663+}
4664+#else
4665+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4666+{
4667+ return 0;
4668+}
4669+#endif /* CONFIG_AUFS_EXPORT */
4670+
4671+/* ---------------------------------------------------------------------- */
4672+
4673+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
4674+{
dece6358
AM
4675+ /*
4676+ * This function is a dynamic '__init' fucntion actually,
4677+ * so the tiny check for si_rwsem is unnecessary.
4678+ */
4679+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4680+
1facf9fc 4681+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
4682+ sbinfo->si_dbgaufs = NULL;
4683+ kobject_put(&sbinfo->si_kobj);
4684+}
4685+
4686+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
4687+{
4688+ int err;
4689+ char name[SysaufsSiNameLen];
4690+
dece6358
AM
4691+ /*
4692+ * This function is a dynamic '__init' fucntion actually,
4693+ * so the tiny check for si_rwsem is unnecessary.
4694+ */
4695+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4696+
1facf9fc 4697+ err = -ENOENT;
4698+ if (!dbgaufs) {
4699+ AuErr1("/debug/aufs is uninitialized\n");
4700+ goto out;
4701+ }
4702+
4703+ err = -EIO;
4704+ sysaufs_name(sbinfo, name);
4705+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
4706+ if (unlikely(!sbinfo->si_dbgaufs))
4707+ goto out;
4708+ kobject_get(&sbinfo->si_kobj);
4709+
4710+ sbinfo->si_dbgaufs_xib = debugfs_create_file
4711+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4712+ &dbgaufs_xib_fop);
4713+ if (unlikely(!sbinfo->si_dbgaufs_xib))
4714+ goto out_dir;
4715+
4716+ err = dbgaufs_xigen_init(sbinfo);
4717+ if (!err)
4718+ goto out; /* success */
4719+
4f0767ce 4720+out_dir:
1facf9fc 4721+ dbgaufs_si_fin(sbinfo);
4f0767ce 4722+out:
1facf9fc 4723+ return err;
4724+}
4725+
4726+/* ---------------------------------------------------------------------- */
4727+
4728+void dbgaufs_fin(void)
4729+{
4730+ debugfs_remove(dbgaufs);
4731+}
4732+
4733+int __init dbgaufs_init(void)
4734+{
4735+ int err;
4736+
4737+ err = -EIO;
4738+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
4739+ if (dbgaufs)
4740+ err = 0;
4741+ return err;
4742+}
7f207e10
AM
4743diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
4744--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
53392da6 4745+++ linux/fs/aufs/dbgaufs.h 2011-08-24 13:30:24.731313534 +0200
4a4d8108 4746@@ -0,0 +1,52 @@
1facf9fc 4747+/*
027c5e7a 4748+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4749+ *
4750+ * This program, aufs is free software; you can redistribute it and/or modify
4751+ * it under the terms of the GNU General Public License as published by
4752+ * the Free Software Foundation; either version 2 of the License, or
4753+ * (at your option) any later version.
dece6358
AM
4754+ *
4755+ * This program is distributed in the hope that it will be useful,
4756+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4757+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4758+ * GNU General Public License for more details.
4759+ *
4760+ * You should have received a copy of the GNU General Public License
4761+ * along with this program; if not, write to the Free Software
4762+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4763+ */
4764+
4765+/*
4766+ * debugfs interface
4767+ */
4768+
4769+#ifndef __DBGAUFS_H__
4770+#define __DBGAUFS_H__
4771+
4772+#ifdef __KERNEL__
4773+
dece6358 4774+#include <linux/init.h>
1facf9fc 4775+#include <linux/aufs_type.h>
4776+
dece6358 4777+struct super_block;
1facf9fc 4778+struct au_sbinfo;
dece6358 4779+
1facf9fc 4780+#ifdef CONFIG_DEBUG_FS
4781+/* dbgaufs.c */
4782+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
4783+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
4784+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
4785+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
4786+void dbgaufs_fin(void);
4787+int __init dbgaufs_init(void);
1facf9fc 4788+#else
4a4d8108
AM
4789+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
4790+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
4791+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
4792+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
4793+AuStubVoid(dbgaufs_fin, void)
4794+AuStubInt0(__init dbgaufs_init, void)
1facf9fc 4795+#endif /* CONFIG_DEBUG_FS */
4796+
4797+#endif /* __KERNEL__ */
4798+#endif /* __DBGAUFS_H__ */
7f207e10
AM
4799diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
4800--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
53392da6 4801+++ linux/fs/aufs/dcsub.c 2011-08-24 13:30:24.731313534 +0200
027c5e7a 4802@@ -0,0 +1,243 @@
1facf9fc 4803+/*
027c5e7a 4804+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4805+ *
4806+ * This program, aufs is free software; you can redistribute it and/or modify
4807+ * it under the terms of the GNU General Public License as published by
4808+ * the Free Software Foundation; either version 2 of the License, or
4809+ * (at your option) any later version.
dece6358
AM
4810+ *
4811+ * This program is distributed in the hope that it will be useful,
4812+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4813+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4814+ * GNU General Public License for more details.
4815+ *
4816+ * You should have received a copy of the GNU General Public License
4817+ * along with this program; if not, write to the Free Software
4818+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4819+ */
4820+
4821+/*
4822+ * sub-routines for dentry cache
4823+ */
4824+
4825+#include "aufs.h"
4826+
4827+static void au_dpage_free(struct au_dpage *dpage)
4828+{
4829+ int i;
4830+ struct dentry **p;
4831+
4832+ p = dpage->dentries;
4833+ for (i = 0; i < dpage->ndentry; i++)
4834+ dput(*p++);
4835+ free_page((unsigned long)dpage->dentries);
4836+}
4837+
4838+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
4839+{
4840+ int err;
4841+ void *p;
4842+
4843+ err = -ENOMEM;
4844+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
4845+ if (unlikely(!dpages->dpages))
4846+ goto out;
4847+
4848+ p = (void *)__get_free_page(gfp);
4849+ if (unlikely(!p))
4850+ goto out_dpages;
4851+
4852+ dpages->dpages[0].ndentry = 0;
4853+ dpages->dpages[0].dentries = p;
4854+ dpages->ndpage = 1;
4855+ return 0; /* success */
4856+
4f0767ce 4857+out_dpages:
1facf9fc 4858+ kfree(dpages->dpages);
4f0767ce 4859+out:
1facf9fc 4860+ return err;
4861+}
4862+
4863+void au_dpages_free(struct au_dcsub_pages *dpages)
4864+{
4865+ int i;
4866+ struct au_dpage *p;
4867+
4868+ p = dpages->dpages;
4869+ for (i = 0; i < dpages->ndpage; i++)
4870+ au_dpage_free(p++);
4871+ kfree(dpages->dpages);
4872+}
4873+
4874+static int au_dpages_append(struct au_dcsub_pages *dpages,
4875+ struct dentry *dentry, gfp_t gfp)
4876+{
4877+ int err, sz;
4878+ struct au_dpage *dpage;
4879+ void *p;
4880+
4881+ dpage = dpages->dpages + dpages->ndpage - 1;
4882+ sz = PAGE_SIZE / sizeof(dentry);
4883+ if (unlikely(dpage->ndentry >= sz)) {
4884+ AuLabel(new dpage);
4885+ err = -ENOMEM;
4886+ sz = dpages->ndpage * sizeof(*dpages->dpages);
4887+ p = au_kzrealloc(dpages->dpages, sz,
4888+ sz + sizeof(*dpages->dpages), gfp);
4889+ if (unlikely(!p))
4890+ goto out;
4891+
4892+ dpages->dpages = p;
4893+ dpage = dpages->dpages + dpages->ndpage;
4894+ p = (void *)__get_free_page(gfp);
4895+ if (unlikely(!p))
4896+ goto out;
4897+
4898+ dpage->ndentry = 0;
4899+ dpage->dentries = p;
4900+ dpages->ndpage++;
4901+ }
4902+
027c5e7a
AM
4903+ AuDebugOn(!dentry->d_count);
4904+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
1facf9fc 4905+ return 0; /* success */
4906+
4f0767ce 4907+out:
1facf9fc 4908+ return err;
4909+}
4910+
4911+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
4912+ au_dpages_test test, void *arg)
4913+{
4914+ int err;
027c5e7a 4915+ struct dentry *this_parent;
1facf9fc 4916+ struct list_head *next;
4917+ struct super_block *sb = root->d_sb;
4918+
4919+ err = 0;
027c5e7a
AM
4920+ write_seqlock(&rename_lock);
4921+ this_parent = root;
4922+ spin_lock(&this_parent->d_lock);
4f0767ce 4923+repeat:
1facf9fc 4924+ next = this_parent->d_subdirs.next;
4f0767ce 4925+resume:
1facf9fc 4926+ if (this_parent->d_sb == sb
4927+ && !IS_ROOT(this_parent)
027c5e7a
AM
4928+ && au_di(this_parent)
4929+ && this_parent->d_count
1facf9fc 4930+ && (!test || test(this_parent, arg))) {
4931+ err = au_dpages_append(dpages, this_parent, GFP_ATOMIC);
4932+ if (unlikely(err))
4933+ goto out;
4934+ }
4935+
4936+ while (next != &this_parent->d_subdirs) {
4937+ struct list_head *tmp = next;
4938+ struct dentry *dentry = list_entry(tmp, struct dentry,
4939+ d_u.d_child);
027c5e7a 4940+
1facf9fc 4941+ next = tmp->next;
027c5e7a
AM
4942+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
4943+ if (dentry->d_count) {
4944+ if (!list_empty(&dentry->d_subdirs)) {
4945+ spin_unlock(&this_parent->d_lock);
4946+ spin_release(&dentry->d_lock.dep_map, 1,
4947+ _RET_IP_);
4948+ this_parent = dentry;
4949+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1,
4950+ _RET_IP_);
4951+ goto repeat;
4952+ }
4953+ if (dentry->d_sb == sb
4954+ && au_di(dentry)
4955+ && (!test || test(dentry, arg)))
4956+ err = au_dpages_append(dpages, dentry,
4957+ GFP_ATOMIC);
1facf9fc 4958+ }
027c5e7a
AM
4959+ spin_unlock(&dentry->d_lock);
4960+ if (unlikely(err))
4961+ goto out;
1facf9fc 4962+ }
4963+
4964+ if (this_parent != root) {
027c5e7a
AM
4965+ struct dentry *tmp;
4966+ struct dentry *child;
4967+
4968+ tmp = this_parent->d_parent;
4969+ rcu_read_lock();
4970+ spin_unlock(&this_parent->d_lock);
4971+ child = this_parent;
4972+ this_parent = tmp;
4973+ spin_lock(&this_parent->d_lock);
4974+ rcu_read_unlock();
4975+ next = child->d_u.d_child.next;
1facf9fc 4976+ goto resume;
4977+ }
027c5e7a 4978+
4f0767ce 4979+out:
027c5e7a
AM
4980+ spin_unlock(&this_parent->d_lock);
4981+ write_sequnlock(&rename_lock);
1facf9fc 4982+ return err;
4983+}
4984+
4985+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
4986+ int do_include, au_dpages_test test, void *arg)
4987+{
4988+ int err;
4989+
4990+ err = 0;
027c5e7a
AM
4991+ write_seqlock(&rename_lock);
4992+ spin_lock(&dentry->d_lock);
4993+ if (do_include
4994+ && dentry->d_count
4995+ && (!test || test(dentry, arg)))
1facf9fc 4996+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
4997+ spin_unlock(&dentry->d_lock);
4998+ if (unlikely(err))
4999+ goto out;
5000+
5001+ /*
5002+ * vfsmount_lock is unnecessary since this is a traverse in a single
5003+ * mount
5004+ */
1facf9fc 5005+ while (!IS_ROOT(dentry)) {
027c5e7a
AM
5006+ dentry = dentry->d_parent; /* rename_lock is locked */
5007+ spin_lock(&dentry->d_lock);
5008+ if (dentry->d_count
5009+ && (!test || test(dentry, arg)))
1facf9fc 5010+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
5011+ spin_unlock(&dentry->d_lock);
5012+ if (unlikely(err))
5013+ break;
1facf9fc 5014+ }
5015+
4f0767ce 5016+out:
027c5e7a 5017+ write_sequnlock(&rename_lock);
1facf9fc 5018+ return err;
5019+}
5020+
027c5e7a
AM
5021+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
5022+{
5023+ return au_di(dentry) && dentry->d_sb == arg;
5024+}
5025+
5026+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
5027+ struct dentry *dentry, int do_include)
5028+{
5029+ return au_dcsub_pages_rev(dpages, dentry, do_include,
5030+ au_dcsub_dpages_aufs, dentry->d_sb);
5031+}
5032+
4a4d8108 5033+int au_test_subdir(struct dentry *d1, struct dentry *d2)
1facf9fc 5034+{
4a4d8108
AM
5035+ struct path path[2] = {
5036+ {
5037+ .dentry = d1
5038+ },
5039+ {
5040+ .dentry = d2
5041+ }
5042+ };
1facf9fc 5043+
4a4d8108 5044+ return path_is_under(path + 0, path + 1);
1facf9fc 5045+}
7f207e10
AM
5046diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
5047--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
53392da6 5048+++ linux/fs/aufs/dcsub.h 2011-08-24 13:30:24.731313534 +0200
027c5e7a 5049@@ -0,0 +1,95 @@
1facf9fc 5050+/*
027c5e7a 5051+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5052+ *
5053+ * This program, aufs is free software; you can redistribute it and/or modify
5054+ * it under the terms of the GNU General Public License as published by
5055+ * the Free Software Foundation; either version 2 of the License, or
5056+ * (at your option) any later version.
dece6358
AM
5057+ *
5058+ * This program is distributed in the hope that it will be useful,
5059+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5060+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5061+ * GNU General Public License for more details.
5062+ *
5063+ * You should have received a copy of the GNU General Public License
5064+ * along with this program; if not, write to the Free Software
5065+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5066+ */
5067+
5068+/*
5069+ * sub-routines for dentry cache
5070+ */
5071+
5072+#ifndef __AUFS_DCSUB_H__
5073+#define __AUFS_DCSUB_H__
5074+
5075+#ifdef __KERNEL__
5076+
7f207e10 5077+#include <linux/dcache.h>
027c5e7a 5078+#include <linux/fs.h>
dece6358
AM
5079+#include <linux/types.h>
5080+
5081+struct dentry;
1facf9fc 5082+
5083+struct au_dpage {
5084+ int ndentry;
5085+ struct dentry **dentries;
5086+};
5087+
5088+struct au_dcsub_pages {
5089+ int ndpage;
5090+ struct au_dpage *dpages;
5091+};
5092+
5093+/* ---------------------------------------------------------------------- */
5094+
7f207e10 5095+/* dcsub.c */
1facf9fc 5096+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
5097+void au_dpages_free(struct au_dcsub_pages *dpages);
5098+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
5099+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
5100+ au_dpages_test test, void *arg);
5101+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
5102+ int do_include, au_dpages_test test, void *arg);
027c5e7a
AM
5103+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
5104+ struct dentry *dentry, int do_include);
4a4d8108 5105+int au_test_subdir(struct dentry *d1, struct dentry *d2);
1facf9fc 5106+
7f207e10
AM
5107+/* ---------------------------------------------------------------------- */
5108+
027c5e7a
AM
5109+static inline int au_d_hashed_positive(struct dentry *d)
5110+{
5111+ int err;
5112+ struct inode *inode = d->d_inode;
5113+ err = 0;
5114+ if (unlikely(d_unhashed(d) || !inode || !inode->i_nlink))
5115+ err = -ENOENT;
5116+ return err;
5117+}
5118+
5119+static inline int au_d_alive(struct dentry *d)
5120+{
5121+ int err;
5122+ struct inode *inode;
5123+ err = 0;
5124+ if (!IS_ROOT(d))
5125+ err = au_d_hashed_positive(d);
5126+ else {
5127+ inode = d->d_inode;
5128+ if (unlikely(d_unlinked(d) || !inode || !inode->i_nlink))
5129+ err = -ENOENT;
5130+ }
5131+ return err;
5132+}
5133+
5134+static inline int au_alive_dir(struct dentry *d)
7f207e10 5135+{
027c5e7a
AM
5136+ int err;
5137+ err = au_d_alive(d);
5138+ if (unlikely(err || IS_DEADDIR(d->d_inode)))
5139+ err = -ENOENT;
5140+ return err;
7f207e10
AM
5141+}
5142+
1facf9fc 5143+#endif /* __KERNEL__ */
5144+#endif /* __AUFS_DCSUB_H__ */
7f207e10
AM
5145diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
5146--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
5147+++ linux/fs/aufs/debug.c 2011-10-24 20:51:51.580466925 +0200
5148@@ -0,0 +1,490 @@
1facf9fc 5149+/*
027c5e7a 5150+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5151+ *
5152+ * This program, aufs is free software; you can redistribute it and/or modify
5153+ * it under the terms of the GNU General Public License as published by
5154+ * the Free Software Foundation; either version 2 of the License, or
5155+ * (at your option) any later version.
dece6358
AM
5156+ *
5157+ * This program is distributed in the hope that it will be useful,
5158+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5159+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5160+ * GNU General Public License for more details.
5161+ *
5162+ * You should have received a copy of the GNU General Public License
5163+ * along with this program; if not, write to the Free Software
5164+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5165+ */
5166+
5167+/*
5168+ * debug print functions
5169+ */
5170+
dece6358 5171+#include <linux/module.h>
7f207e10 5172+#include <linux/vt_kern.h>
1facf9fc 5173+#include "aufs.h"
5174+
5175+int aufs_debug;
5176+MODULE_PARM_DESC(debug, "debug print");
5177+module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
5178+
5179+char *au_plevel = KERN_DEBUG;
e49829fe
JR
5180+#define dpri(fmt, ...) do { \
5181+ if ((au_plevel \
5182+ && strcmp(au_plevel, KERN_DEBUG)) \
5183+ || au_debug_test()) \
5184+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
1facf9fc 5185+} while (0)
5186+
5187+/* ---------------------------------------------------------------------- */
5188+
5189+void au_dpri_whlist(struct au_nhash *whlist)
5190+{
5191+ unsigned long ul, n;
5192+ struct hlist_head *head;
5193+ struct au_vdir_wh *tpos;
5194+ struct hlist_node *pos;
5195+
5196+ n = whlist->nh_num;
5197+ head = whlist->nh_head;
5198+ for (ul = 0; ul < n; ul++) {
5199+ hlist_for_each_entry(tpos, pos, head, wh_hash)
5200+ dpri("b%d, %.*s, %d\n",
5201+ tpos->wh_bindex,
5202+ tpos->wh_str.len, tpos->wh_str.name,
5203+ tpos->wh_str.len);
5204+ head++;
5205+ }
5206+}
5207+
5208+void au_dpri_vdir(struct au_vdir *vdir)
5209+{
5210+ unsigned long ul;
5211+ union au_vdir_deblk_p p;
5212+ unsigned char *o;
5213+
5214+ if (!vdir || IS_ERR(vdir)) {
5215+ dpri("err %ld\n", PTR_ERR(vdir));
5216+ return;
5217+ }
5218+
5219+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
5220+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
5221+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
5222+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
5223+ p.deblk = vdir->vd_deblk[ul];
5224+ o = p.deblk;
5225+ dpri("[%lu]: %p\n", ul, o);
5226+ }
5227+}
5228+
53392da6 5229+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
1facf9fc 5230+ struct dentry *wh)
5231+{
5232+ char *n = NULL;
5233+ int l = 0;
5234+
5235+ if (!inode || IS_ERR(inode)) {
5236+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
5237+ return -1;
5238+ }
5239+
5240+ /* the type of i_blocks depends upon CONFIG_LSF */
5241+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
5242+ && sizeof(inode->i_blocks) != sizeof(u64));
5243+ if (wh) {
5244+ n = (void *)wh->d_name.name;
5245+ l = wh->d_name.len;
5246+ }
5247+
53392da6
AM
5248+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
5249+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
5250+ bindex, inode,
1facf9fc 5251+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
5252+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
5253+ i_size_read(inode), (unsigned long long)inode->i_blocks,
53392da6 5254+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
1facf9fc 5255+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
b752ccd1
AM
5256+ inode->i_state, inode->i_flags, inode->i_version,
5257+ inode->i_generation,
1facf9fc 5258+ l ? ", wh " : "", l, n);
5259+ return 0;
5260+}
5261+
5262+void au_dpri_inode(struct inode *inode)
5263+{
5264+ struct au_iinfo *iinfo;
5265+ aufs_bindex_t bindex;
53392da6 5266+ int err, hn;
1facf9fc 5267+
53392da6 5268+ err = do_pri_inode(-1, inode, -1, NULL);
1facf9fc 5269+ if (err || !au_test_aufs(inode->i_sb))
5270+ return;
5271+
5272+ iinfo = au_ii(inode);
5273+ if (!iinfo)
5274+ return;
5275+ dpri("i-1: bstart %d, bend %d, gen %d\n",
5276+ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode));
5277+ if (iinfo->ii_bstart < 0)
5278+ return;
53392da6
AM
5279+ hn = 0;
5280+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
5281+ hn = !!au_hn(iinfo->ii_hinode + bindex);
5282+ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
1facf9fc 5283+ iinfo->ii_hinode[0 + bindex].hi_whdentry);
53392da6 5284+ }
1facf9fc 5285+}
5286+
2cbb1c4b
JR
5287+void au_dpri_dalias(struct inode *inode)
5288+{
5289+ struct dentry *d;
5290+
5291+ spin_lock(&inode->i_lock);
5292+ list_for_each_entry(d, &inode->i_dentry, d_alias)
5293+ au_dpri_dentry(d);
5294+ spin_unlock(&inode->i_lock);
5295+}
5296+
1facf9fc 5297+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
5298+{
5299+ struct dentry *wh = NULL;
53392da6 5300+ int hn;
1facf9fc 5301+
5302+ if (!dentry || IS_ERR(dentry)) {
5303+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
5304+ return -1;
5305+ }
5306+ /* do not call dget_parent() here */
027c5e7a 5307+ /* note: access d_xxx without d_lock */
1facf9fc 5308+ dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n",
5309+ bindex,
5310+ AuDLNPair(dentry->d_parent), AuDLNPair(dentry),
5311+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
027c5e7a 5312+ dentry->d_count, dentry->d_flags);
53392da6 5313+ hn = -1;
1facf9fc 5314+ if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) {
5315+ struct au_iinfo *iinfo = au_ii(dentry->d_inode);
53392da6
AM
5316+ if (iinfo) {
5317+ hn = !!au_hn(iinfo->ii_hinode + bindex);
1facf9fc 5318+ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
53392da6 5319+ }
1facf9fc 5320+ }
53392da6 5321+ do_pri_inode(bindex, dentry->d_inode, hn, wh);
1facf9fc 5322+ return 0;
5323+}
5324+
5325+void au_dpri_dentry(struct dentry *dentry)
5326+{
5327+ struct au_dinfo *dinfo;
5328+ aufs_bindex_t bindex;
5329+ int err;
4a4d8108 5330+ struct au_hdentry *hdp;
1facf9fc 5331+
5332+ err = do_pri_dentry(-1, dentry);
5333+ if (err || !au_test_aufs(dentry->d_sb))
5334+ return;
5335+
5336+ dinfo = au_di(dentry);
5337+ if (!dinfo)
5338+ return;
5339+ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n",
5340+ dinfo->di_bstart, dinfo->di_bend,
5341+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry));
5342+ if (dinfo->di_bstart < 0)
5343+ return;
4a4d8108 5344+ hdp = dinfo->di_hdentry;
1facf9fc 5345+ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
4a4d8108 5346+ do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
1facf9fc 5347+}
5348+
5349+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
5350+{
5351+ char a[32];
5352+
5353+ if (!file || IS_ERR(file)) {
5354+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
5355+ return -1;
5356+ }
5357+ a[0] = 0;
5358+ if (bindex < 0
5359+ && file->f_dentry
5360+ && au_test_aufs(file->f_dentry->d_sb)
5361+ && au_fi(file))
e49829fe 5362+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
2cbb1c4b 5363+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
b752ccd1 5364+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
1facf9fc 5365+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
b752ccd1 5366+ file->f_version, file->f_pos, a);
1facf9fc 5367+ if (file->f_dentry)
5368+ do_pri_dentry(bindex, file->f_dentry);
5369+ return 0;
5370+}
5371+
5372+void au_dpri_file(struct file *file)
5373+{
5374+ struct au_finfo *finfo;
4a4d8108
AM
5375+ struct au_fidir *fidir;
5376+ struct au_hfile *hfile;
1facf9fc 5377+ aufs_bindex_t bindex;
5378+ int err;
5379+
5380+ err = do_pri_file(-1, file);
5381+ if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb))
5382+ return;
5383+
5384+ finfo = au_fi(file);
5385+ if (!finfo)
5386+ return;
4a4d8108 5387+ if (finfo->fi_btop < 0)
1facf9fc 5388+ return;
4a4d8108
AM
5389+ fidir = finfo->fi_hdir;
5390+ if (!fidir)
5391+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
5392+ else
e49829fe
JR
5393+ for (bindex = finfo->fi_btop;
5394+ bindex >= 0 && bindex <= fidir->fd_bbot;
4a4d8108
AM
5395+ bindex++) {
5396+ hfile = fidir->fd_hfile + bindex;
5397+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
5398+ }
1facf9fc 5399+}
5400+
5401+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
5402+{
5403+ struct vfsmount *mnt;
5404+ struct super_block *sb;
5405+
5406+ if (!br || IS_ERR(br))
5407+ goto out;
5408+ mnt = br->br_mnt;
5409+ if (!mnt || IS_ERR(mnt))
5410+ goto out;
5411+ sb = mnt->mnt_sb;
5412+ if (!sb || IS_ERR(sb))
5413+ goto out;
5414+
1e00d052 5415+ dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, "
b752ccd1 5416+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
1facf9fc 5417+ "xino %d\n",
1e00d052
AM
5418+ bindex, br->br_perm, br->br_id, atomic_read(&br->br_count),
5419+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
b752ccd1 5420+ sb->s_flags, sb->s_count,
1facf9fc 5421+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
5422+ return 0;
5423+
4f0767ce 5424+out:
1facf9fc 5425+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
5426+ return -1;
5427+}
5428+
5429+void au_dpri_sb(struct super_block *sb)
5430+{
5431+ struct au_sbinfo *sbinfo;
5432+ aufs_bindex_t bindex;
5433+ int err;
5434+ /* to reuduce stack size */
5435+ struct {
5436+ struct vfsmount mnt;
5437+ struct au_branch fake;
5438+ } *a;
5439+
5440+ /* this function can be called from magic sysrq */
5441+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
5442+ if (unlikely(!a)) {
5443+ dpri("no memory\n");
5444+ return;
5445+ }
5446+
5447+ a->mnt.mnt_sb = sb;
5448+ a->fake.br_perm = 0;
5449+ a->fake.br_mnt = &a->mnt;
5450+ a->fake.br_xino.xi_file = NULL;
5451+ atomic_set(&a->fake.br_count, 0);
5452+ smp_mb(); /* atomic_set */
5453+ err = do_pri_br(-1, &a->fake);
5454+ kfree(a);
5455+ dpri("dev 0x%x\n", sb->s_dev);
5456+ if (err || !au_test_aufs(sb))
5457+ return;
5458+
5459+ sbinfo = au_sbi(sb);
5460+ if (!sbinfo)
5461+ return;
5462+ dpri("nw %d, gen %u, kobj %d\n",
5463+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
5464+ atomic_read(&sbinfo->si_kobj.kref.refcount));
5465+ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
5466+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
5467+}
5468+
5469+/* ---------------------------------------------------------------------- */
5470+
5471+void au_dbg_sleep_jiffy(int jiffy)
5472+{
5473+ while (jiffy)
5474+ jiffy = schedule_timeout_uninterruptible(jiffy);
5475+}
5476+
5477+void au_dbg_iattr(struct iattr *ia)
5478+{
5479+#define AuBit(name) if (ia->ia_valid & ATTR_ ## name) \
5480+ dpri(#name "\n")
5481+ AuBit(MODE);
5482+ AuBit(UID);
5483+ AuBit(GID);
5484+ AuBit(SIZE);
5485+ AuBit(ATIME);
5486+ AuBit(MTIME);
5487+ AuBit(CTIME);
5488+ AuBit(ATIME_SET);
5489+ AuBit(MTIME_SET);
5490+ AuBit(FORCE);
5491+ AuBit(ATTR_FLAG);
5492+ AuBit(KILL_SUID);
5493+ AuBit(KILL_SGID);
5494+ AuBit(FILE);
5495+ AuBit(KILL_PRIV);
5496+ AuBit(OPEN);
5497+ AuBit(TIMES_SET);
5498+#undef AuBit
5499+ dpri("ia_file %p\n", ia->ia_file);
5500+}
5501+
5502+/* ---------------------------------------------------------------------- */
5503+
027c5e7a
AM
5504+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
5505+{
5506+ struct inode *h_inode, *inode = dentry->d_inode;
5507+ struct dentry *h_dentry;
5508+ aufs_bindex_t bindex, bend, bi;
5509+
5510+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
5511+ return;
5512+
5513+ bend = au_dbend(dentry);
5514+ bi = au_ibend(inode);
5515+ if (bi < bend)
5516+ bend = bi;
5517+ bindex = au_dbstart(dentry);
5518+ bi = au_ibstart(inode);
5519+ if (bi > bindex)
5520+ bindex = bi;
5521+
5522+ for (; bindex <= bend; bindex++) {
5523+ h_dentry = au_h_dptr(dentry, bindex);
5524+ if (!h_dentry)
5525+ continue;
5526+ h_inode = au_h_iptr(inode, bindex);
5527+ if (unlikely(h_inode != h_dentry->d_inode)) {
5528+ int old = au_debug_test();
5529+ if (!old)
5530+ au_debug(1);
5531+ AuDbg("b%d, %s:%d\n", bindex, func, line);
5532+ AuDbgDentry(dentry);
5533+ AuDbgInode(inode);
5534+ if (!old)
5535+ au_debug(0);
5536+ BUG();
5537+ }
5538+ }
5539+}
5540+
1facf9fc 5541+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen)
5542+{
5543+ struct dentry *parent;
5544+
5545+ parent = dget_parent(dentry);
027c5e7a
AM
5546+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
5547+ AuDebugOn(IS_ROOT(dentry));
5548+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 5549+ dput(parent);
5550+}
5551+
5552+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen)
5553+{
5554+ struct dentry *parent;
027c5e7a 5555+ struct inode *inode;
1facf9fc 5556+
5557+ parent = dget_parent(dentry);
027c5e7a
AM
5558+ inode = dentry->d_inode;
5559+ AuDebugOn(inode && S_ISDIR(dentry->d_inode->i_mode));
5560+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 5561+ dput(parent);
5562+}
5563+
5564+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
5565+{
5566+ int err, i, j;
5567+ struct au_dcsub_pages dpages;
5568+ struct au_dpage *dpage;
5569+ struct dentry **dentries;
5570+
5571+ err = au_dpages_init(&dpages, GFP_NOFS);
5572+ AuDebugOn(err);
027c5e7a 5573+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
1facf9fc 5574+ AuDebugOn(err);
5575+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
5576+ dpage = dpages.dpages + i;
5577+ dentries = dpage->dentries;
5578+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
027c5e7a 5579+ AuDebugOn(au_digen_test(dentries[j], sigen));
1facf9fc 5580+ }
5581+ au_dpages_free(&dpages);
5582+}
5583+
1facf9fc 5584+void au_dbg_verify_kthread(void)
5585+{
53392da6 5586+ if (au_wkq_test()) {
1facf9fc 5587+ au_dbg_blocked();
1e00d052
AM
5588+ /*
5589+ * It may be recursive, but udba=notify between two aufs mounts,
5590+ * where a single ro branch is shared, is not a problem.
5591+ */
5592+ /* WARN_ON(1); */
1facf9fc 5593+ }
5594+}
5595+
5596+/* ---------------------------------------------------------------------- */
5597+
5598+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused)
5599+{
5600+#ifdef AuForceNoPlink
5601+ au_opt_clr(sbinfo->si_mntflags, PLINK);
5602+#endif
5603+#ifdef AuForceNoXino
5604+ au_opt_clr(sbinfo->si_mntflags, XINO);
5605+#endif
5606+#ifdef AuForceNoRefrof
5607+ au_opt_clr(sbinfo->si_mntflags, REFROF);
5608+#endif
4a4d8108
AM
5609+#ifdef AuForceHnotify
5610+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_HNOTIFY);
1facf9fc 5611+#endif
1308ab2a 5612+#ifdef AuForceRd0
5613+ sbinfo->si_rdblk = 0;
5614+ sbinfo->si_rdhash = 0;
5615+#endif
1facf9fc 5616+}
5617+
5618+int __init au_debug_init(void)
5619+{
5620+ aufs_bindex_t bindex;
5621+ struct au_vdir_destr destr;
5622+
5623+ bindex = -1;
5624+ AuDebugOn(bindex >= 0);
5625+
5626+ destr.len = -1;
5627+ AuDebugOn(destr.len < NAME_MAX);
5628+
5629+#ifdef CONFIG_4KSTACKS
4a4d8108 5630+ pr_warning("CONFIG_4KSTACKS is defined.\n");
1facf9fc 5631+#endif
5632+
5633+#ifdef AuForceNoBrs
5634+ sysaufs_brs = 0;
5635+#endif
5636+
5637+ return 0;
5638+}
7f207e10
AM
5639diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
5640--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
53392da6 5641+++ linux/fs/aufs/debug.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 5642@@ -0,0 +1,252 @@
1facf9fc 5643+/*
027c5e7a 5644+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5645+ *
5646+ * This program, aufs is free software; you can redistribute it and/or modify
5647+ * it under the terms of the GNU General Public License as published by
5648+ * the Free Software Foundation; either version 2 of the License, or
5649+ * (at your option) any later version.
dece6358
AM
5650+ *
5651+ * This program is distributed in the hope that it will be useful,
5652+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5653+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5654+ * GNU General Public License for more details.
5655+ *
5656+ * You should have received a copy of the GNU General Public License
5657+ * along with this program; if not, write to the Free Software
5658+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5659+ */
5660+
5661+/*
5662+ * debug print functions
5663+ */
5664+
5665+#ifndef __AUFS_DEBUG_H__
5666+#define __AUFS_DEBUG_H__
5667+
5668+#ifdef __KERNEL__
5669+
1308ab2a 5670+#include <asm/system.h>
dece6358 5671+#include <linux/bug.h>
7f207e10 5672+/* #include <linux/err.h> */
1308ab2a 5673+#include <linux/init.h>
4a4d8108
AM
5674+#include <linux/module.h>
5675+#include <linux/kallsyms.h>
7f207e10 5676+/* #include <linux/kernel.h> */
1facf9fc 5677+#include <linux/delay.h>
7f207e10 5678+/* #include <linux/kd.h> */
1facf9fc 5679+#include <linux/sysrq.h>
5680+#include <linux/aufs_type.h>
5681+
4a4d8108
AM
5682+#include <asm/system.h>
5683+
1facf9fc 5684+#ifdef CONFIG_AUFS_DEBUG
5685+#define AuDebugOn(a) BUG_ON(a)
5686+
5687+/* module parameter */
5688+extern int aufs_debug;
5689+static inline void au_debug(int n)
5690+{
5691+ aufs_debug = n;
5692+ smp_mb();
5693+}
5694+
5695+static inline int au_debug_test(void)
5696+{
5697+ return aufs_debug;
5698+}
5699+#else
5700+#define AuDebugOn(a) do {} while (0)
4a4d8108
AM
5701+AuStubVoid(au_debug, int n)
5702+AuStubInt0(au_debug_test, void)
1facf9fc 5703+#endif /* CONFIG_AUFS_DEBUG */
5704+
5705+/* ---------------------------------------------------------------------- */
5706+
5707+/* debug print */
5708+
4a4d8108 5709+#define AuDbg(fmt, ...) do { \
1facf9fc 5710+ if (au_debug_test()) \
4a4d8108 5711+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
1facf9fc 5712+} while (0)
4a4d8108
AM
5713+#define AuLabel(l) AuDbg(#l "\n")
5714+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
5715+#define AuWarn1(fmt, ...) do { \
1facf9fc 5716+ static unsigned char _c; \
5717+ if (!_c++) \
4a4d8108 5718+ pr_warning(fmt, ##__VA_ARGS__); \
1facf9fc 5719+} while (0)
5720+
4a4d8108 5721+#define AuErr1(fmt, ...) do { \
1facf9fc 5722+ static unsigned char _c; \
5723+ if (!_c++) \
4a4d8108 5724+ pr_err(fmt, ##__VA_ARGS__); \
1facf9fc 5725+} while (0)
5726+
4a4d8108 5727+#define AuIOErr1(fmt, ...) do { \
1facf9fc 5728+ static unsigned char _c; \
5729+ if (!_c++) \
4a4d8108 5730+ AuIOErr(fmt, ##__VA_ARGS__); \
1facf9fc 5731+} while (0)
5732+
5733+#define AuUnsupportMsg "This operation is not supported." \
5734+ " Please report this application to aufs-users ML."
4a4d8108
AM
5735+#define AuUnsupport(fmt, ...) do { \
5736+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
1facf9fc 5737+ dump_stack(); \
5738+} while (0)
5739+
5740+#define AuTraceErr(e) do { \
5741+ if (unlikely((e) < 0)) \
5742+ AuDbg("err %d\n", (int)(e)); \
5743+} while (0)
5744+
5745+#define AuTraceErrPtr(p) do { \
5746+ if (IS_ERR(p)) \
5747+ AuDbg("err %ld\n", PTR_ERR(p)); \
5748+} while (0)
5749+
5750+/* dirty macros for debug print, use with "%.*s" and caution */
5751+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
5752+#define AuDLNPair(d) AuLNPair(&(d)->d_name)
5753+
5754+/* ---------------------------------------------------------------------- */
5755+
5756+struct au_sbinfo;
5757+struct au_finfo;
dece6358 5758+struct dentry;
1facf9fc 5759+#ifdef CONFIG_AUFS_DEBUG
5760+extern char *au_plevel;
5761+struct au_nhash;
5762+void au_dpri_whlist(struct au_nhash *whlist);
5763+struct au_vdir;
5764+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 5765+struct inode;
1facf9fc 5766+void au_dpri_inode(struct inode *inode);
2cbb1c4b 5767+void au_dpri_dalias(struct inode *inode);
1facf9fc 5768+void au_dpri_dentry(struct dentry *dentry);
dece6358 5769+struct file;
1facf9fc 5770+void au_dpri_file(struct file *filp);
dece6358 5771+struct super_block;
1facf9fc 5772+void au_dpri_sb(struct super_block *sb);
5773+
5774+void au_dbg_sleep_jiffy(int jiffy);
dece6358 5775+struct iattr;
1facf9fc 5776+void au_dbg_iattr(struct iattr *ia);
5777+
027c5e7a
AM
5778+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
5779+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
1facf9fc 5780+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen);
5781+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen);
5782+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
1facf9fc 5783+void au_dbg_verify_kthread(void);
5784+
5785+int __init au_debug_init(void);
5786+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo);
5787+#define AuDbgWhlist(w) do { \
5788+ AuDbg(#w "\n"); \
5789+ au_dpri_whlist(w); \
5790+} while (0)
5791+
5792+#define AuDbgVdir(v) do { \
5793+ AuDbg(#v "\n"); \
5794+ au_dpri_vdir(v); \
5795+} while (0)
5796+
5797+#define AuDbgInode(i) do { \
5798+ AuDbg(#i "\n"); \
5799+ au_dpri_inode(i); \
5800+} while (0)
5801+
2cbb1c4b
JR
5802+#define AuDbgDAlias(i) do { \
5803+ AuDbg(#i "\n"); \
5804+ au_dpri_dalias(i); \
5805+} while (0)
5806+
1facf9fc 5807+#define AuDbgDentry(d) do { \
5808+ AuDbg(#d "\n"); \
5809+ au_dpri_dentry(d); \
5810+} while (0)
5811+
5812+#define AuDbgFile(f) do { \
5813+ AuDbg(#f "\n"); \
5814+ au_dpri_file(f); \
5815+} while (0)
5816+
5817+#define AuDbgSb(sb) do { \
5818+ AuDbg(#sb "\n"); \
5819+ au_dpri_sb(sb); \
5820+} while (0)
5821+
5822+#define AuDbgSleep(sec) do { \
5823+ AuDbg("sleep %d sec\n", sec); \
5824+ ssleep(sec); \
5825+} while (0)
5826+
5827+#define AuDbgSleepJiffy(jiffy) do { \
5828+ AuDbg("sleep %d jiffies\n", jiffy); \
5829+ au_dbg_sleep_jiffy(jiffy); \
5830+} while (0)
5831+
5832+#define AuDbgIAttr(ia) do { \
5833+ AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \
5834+ au_dbg_iattr(ia); \
5835+} while (0)
4a4d8108
AM
5836+
5837+#define AuDbgSym(addr) do { \
5838+ char sym[KSYM_SYMBOL_LEN]; \
5839+ sprint_symbol(sym, (unsigned long)addr); \
5840+ AuDbg("%s\n", sym); \
5841+} while (0)
5842+
5843+#define AuInfoSym(addr) do { \
5844+ char sym[KSYM_SYMBOL_LEN]; \
5845+ sprint_symbol(sym, (unsigned long)addr); \
5846+ AuInfo("%s\n", sym); \
5847+} while (0)
1facf9fc 5848+#else
027c5e7a 5849+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
4a4d8108
AM
5850+AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen)
5851+AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry,
5852+ unsigned int sigen)
5853+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
5854+AuStubVoid(au_dbg_verify_kthread, void)
5855+AuStubInt0(__init au_debug_init, void)
5856+AuStubVoid(au_debug_sbinfo_init, struct au_sbinfo *sbinfo)
1facf9fc 5857+
1facf9fc 5858+#define AuDbgWhlist(w) do {} while (0)
5859+#define AuDbgVdir(v) do {} while (0)
5860+#define AuDbgInode(i) do {} while (0)
2cbb1c4b 5861+#define AuDbgDAlias(i) do {} while (0)
1facf9fc 5862+#define AuDbgDentry(d) do {} while (0)
5863+#define AuDbgFile(f) do {} while (0)
5864+#define AuDbgSb(sb) do {} while (0)
5865+#define AuDbgSleep(sec) do {} while (0)
5866+#define AuDbgSleepJiffy(jiffy) do {} while (0)
5867+#define AuDbgIAttr(ia) do {} while (0)
4a4d8108
AM
5868+#define AuDbgSym(addr) do {} while (0)
5869+#define AuInfoSym(addr) do {} while (0)
1facf9fc 5870+#endif /* CONFIG_AUFS_DEBUG */
5871+
5872+/* ---------------------------------------------------------------------- */
5873+
5874+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
5875+int __init au_sysrq_init(void);
5876+void au_sysrq_fin(void);
5877+
5878+#ifdef CONFIG_HW_CONSOLE
5879+#define au_dbg_blocked() do { \
5880+ WARN_ON(1); \
0c5527e5 5881+ handle_sysrq('w'); \
1facf9fc 5882+} while (0)
5883+#else
4a4d8108 5884+AuStubVoid(au_dbg_blocked, void)
1facf9fc 5885+#endif
5886+
5887+#else
4a4d8108
AM
5888+AuStubInt0(__init au_sysrq_init, void)
5889+AuStubVoid(au_sysrq_fin, void)
5890+AuStubVoid(au_dbg_blocked, void)
1facf9fc 5891+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
5892+
5893+#endif /* __KERNEL__ */
5894+#endif /* __AUFS_DEBUG_H__ */
7f207e10
AM
5895diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
5896--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
53392da6 5897+++ linux/fs/aufs/dentry.c 2011-08-24 13:30:24.731313534 +0200
027c5e7a 5898@@ -0,0 +1,1140 @@
1facf9fc 5899+/*
027c5e7a 5900+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5901+ *
5902+ * This program, aufs is free software; you can redistribute it and/or modify
5903+ * it under the terms of the GNU General Public License as published by
5904+ * the Free Software Foundation; either version 2 of the License, or
5905+ * (at your option) any later version.
dece6358
AM
5906+ *
5907+ * This program is distributed in the hope that it will be useful,
5908+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5909+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5910+ * GNU General Public License for more details.
5911+ *
5912+ * You should have received a copy of the GNU General Public License
5913+ * along with this program; if not, write to the Free Software
5914+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5915+ */
5916+
5917+/*
5918+ * lookup and dentry operations
5919+ */
5920+
dece6358 5921+#include <linux/namei.h>
1facf9fc 5922+#include "aufs.h"
5923+
5924+static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
5925+{
5926+ if (nd) {
5927+ *h_nd = *nd;
5928+
5929+ /*
5930+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
5931+ * due to whiteout and branch permission.
5932+ */
5933+ h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
b752ccd1 5934+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
1facf9fc 5935+ /* unnecessary? */
5936+ h_nd->intent.open.file = NULL;
5937+ } else
5938+ memset(h_nd, 0, sizeof(*h_nd));
5939+}
5940+
5941+struct au_lkup_one_args {
5942+ struct dentry **errp;
5943+ struct qstr *name;
5944+ struct dentry *h_parent;
5945+ struct au_branch *br;
5946+ struct nameidata *nd;
5947+};
5948+
5949+struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
5950+ struct au_branch *br, struct nameidata *nd)
5951+{
5952+ struct dentry *h_dentry;
5953+ int err;
5954+ struct nameidata h_nd;
5955+
5956+ if (au_test_fs_null_nd(h_parent->d_sb))
5957+ return vfsub_lookup_one_len(name->name, h_parent, name->len);
5958+
5959+ au_h_nd(&h_nd, nd);
5960+ h_nd.path.dentry = h_parent;
5961+ h_nd.path.mnt = br->br_mnt;
5962+
2cbb1c4b 5963+ err = vfsub_name_hash(name->name, &h_nd.last, name->len);
1facf9fc 5964+ h_dentry = ERR_PTR(err);
5965+ if (!err) {
5966+ path_get(&h_nd.path);
5967+ h_dentry = vfsub_lookup_hash(&h_nd);
5968+ path_put(&h_nd.path);
5969+ }
5970+
4a4d8108 5971+ AuTraceErrPtr(h_dentry);
1facf9fc 5972+ return h_dentry;
5973+}
5974+
5975+static void au_call_lkup_one(void *args)
5976+{
5977+ struct au_lkup_one_args *a = args;
5978+ *a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
5979+}
5980+
5981+#define AuLkup_ALLOW_NEG 1
5982+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
7f207e10
AM
5983+#define au_fset_lkup(flags, name) \
5984+ do { (flags) |= AuLkup_##name; } while (0)
5985+#define au_fclr_lkup(flags, name) \
5986+ do { (flags) &= ~AuLkup_##name; } while (0)
1facf9fc 5987+
5988+struct au_do_lookup_args {
5989+ unsigned int flags;
5990+ mode_t type;
5991+ struct nameidata *nd;
5992+};
5993+
5994+/*
5995+ * returns positive/negative dentry, NULL or an error.
5996+ * NULL means whiteout-ed or not-found.
5997+ */
5998+static struct dentry*
5999+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
6000+ aufs_bindex_t bindex, struct qstr *wh_name,
6001+ struct au_do_lookup_args *args)
6002+{
6003+ struct dentry *h_dentry;
6004+ struct inode *h_inode, *inode;
1facf9fc 6005+ struct au_branch *br;
6006+ int wh_found, opq;
6007+ unsigned char wh_able;
6008+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
6009+
1facf9fc 6010+ wh_found = 0;
6011+ br = au_sbr(dentry->d_sb, bindex);
6012+ wh_able = !!au_br_whable(br->br_perm);
6013+ if (wh_able)
6014+ wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
6015+ h_dentry = ERR_PTR(wh_found);
6016+ if (!wh_found)
6017+ goto real_lookup;
6018+ if (unlikely(wh_found < 0))
6019+ goto out;
6020+
6021+ /* We found a whiteout */
6022+ /* au_set_dbend(dentry, bindex); */
6023+ au_set_dbwh(dentry, bindex);
6024+ if (!allow_neg)
6025+ return NULL; /* success */
6026+
4f0767ce 6027+real_lookup:
4a4d8108 6028+ h_dentry = au_lkup_one(&dentry->d_name, h_parent, br, args->nd);
1facf9fc 6029+ if (IS_ERR(h_dentry))
6030+ goto out;
6031+
6032+ h_inode = h_dentry->d_inode;
6033+ if (!h_inode) {
6034+ if (!allow_neg)
6035+ goto out_neg;
6036+ } else if (wh_found
6037+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
6038+ goto out_neg;
6039+
6040+ if (au_dbend(dentry) <= bindex)
6041+ au_set_dbend(dentry, bindex);
6042+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
6043+ au_set_dbstart(dentry, bindex);
6044+ au_set_h_dptr(dentry, bindex, h_dentry);
6045+
6046+ inode = dentry->d_inode;
6047+ if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
6048+ || (inode && !S_ISDIR(inode->i_mode)))
6049+ goto out; /* success */
6050+
6051+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
6052+ opq = au_diropq_test(h_dentry, br);
6053+ mutex_unlock(&h_inode->i_mutex);
6054+ if (opq > 0)
6055+ au_set_dbdiropq(dentry, bindex);
6056+ else if (unlikely(opq < 0)) {
6057+ au_set_h_dptr(dentry, bindex, NULL);
6058+ h_dentry = ERR_PTR(opq);
6059+ }
6060+ goto out;
6061+
4f0767ce 6062+out_neg:
1facf9fc 6063+ dput(h_dentry);
6064+ h_dentry = NULL;
4f0767ce 6065+out:
1facf9fc 6066+ return h_dentry;
6067+}
6068+
dece6358
AM
6069+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
6070+{
6071+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
6072+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
6073+ return -EPERM;
6074+ return 0;
6075+}
6076+
1facf9fc 6077+/*
6078+ * returns the number of lower positive dentries,
6079+ * otherwise an error.
6080+ * can be called at unlinking with @type is zero.
6081+ */
6082+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
6083+ struct nameidata *nd)
6084+{
6085+ int npositive, err;
6086+ aufs_bindex_t bindex, btail, bdiropq;
6087+ unsigned char isdir;
6088+ struct qstr whname;
6089+ struct au_do_lookup_args args = {
6090+ .flags = 0,
6091+ .type = type,
6092+ .nd = nd
6093+ };
6094+ const struct qstr *name = &dentry->d_name;
6095+ struct dentry *parent;
6096+ struct inode *inode;
6097+
dece6358
AM
6098+ err = au_test_shwh(dentry->d_sb, name);
6099+ if (unlikely(err))
1facf9fc 6100+ goto out;
6101+
6102+ err = au_wh_name_alloc(&whname, name);
6103+ if (unlikely(err))
6104+ goto out;
6105+
6106+ inode = dentry->d_inode;
6107+ isdir = !!(inode && S_ISDIR(inode->i_mode));
6108+ if (!type)
6109+ au_fset_lkup(args.flags, ALLOW_NEG);
6110+
6111+ npositive = 0;
4a4d8108 6112+ parent = dget_parent(dentry);
1facf9fc 6113+ btail = au_dbtaildir(parent);
6114+ for (bindex = bstart; bindex <= btail; bindex++) {
6115+ struct dentry *h_parent, *h_dentry;
6116+ struct inode *h_inode, *h_dir;
6117+
6118+ h_dentry = au_h_dptr(dentry, bindex);
6119+ if (h_dentry) {
6120+ if (h_dentry->d_inode)
6121+ npositive++;
6122+ if (type != S_IFDIR)
6123+ break;
6124+ continue;
6125+ }
6126+ h_parent = au_h_dptr(parent, bindex);
6127+ if (!h_parent)
6128+ continue;
6129+ h_dir = h_parent->d_inode;
6130+ if (!h_dir || !S_ISDIR(h_dir->i_mode))
6131+ continue;
6132+
6133+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
6134+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
6135+ &args);
6136+ mutex_unlock(&h_dir->i_mutex);
6137+ err = PTR_ERR(h_dentry);
6138+ if (IS_ERR(h_dentry))
4a4d8108 6139+ goto out_parent;
1facf9fc 6140+ au_fclr_lkup(args.flags, ALLOW_NEG);
6141+
6142+ if (au_dbwh(dentry) >= 0)
6143+ break;
6144+ if (!h_dentry)
6145+ continue;
6146+ h_inode = h_dentry->d_inode;
6147+ if (!h_inode)
6148+ continue;
6149+ npositive++;
6150+ if (!args.type)
6151+ args.type = h_inode->i_mode & S_IFMT;
6152+ if (args.type != S_IFDIR)
6153+ break;
6154+ else if (isdir) {
6155+ /* the type of lower may be different */
6156+ bdiropq = au_dbdiropq(dentry);
6157+ if (bdiropq >= 0 && bdiropq <= bindex)
6158+ break;
6159+ }
6160+ }
6161+
6162+ if (npositive) {
6163+ AuLabel(positive);
6164+ au_update_dbstart(dentry);
6165+ }
6166+ err = npositive;
6167+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
027c5e7a 6168+ && au_dbstart(dentry) < 0)) {
1facf9fc 6169+ err = -EIO;
027c5e7a
AM
6170+ AuIOErr("both of real entry and whiteout found, %.*s, err %d\n",
6171+ AuDLNPair(dentry), err);
6172+ }
1facf9fc 6173+
4f0767ce 6174+out_parent:
4a4d8108 6175+ dput(parent);
1facf9fc 6176+ kfree(whname.name);
4f0767ce 6177+out:
1facf9fc 6178+ return err;
6179+}
6180+
6181+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
6182+ struct au_branch *br)
6183+{
6184+ struct dentry *dentry;
6185+ int wkq_err;
6186+
6187+ if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
6188+ dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
6189+ else {
6190+ struct au_lkup_one_args args = {
6191+ .errp = &dentry,
6192+ .name = name,
6193+ .h_parent = parent,
6194+ .br = br,
6195+ .nd = NULL
6196+ };
6197+
6198+ wkq_err = au_wkq_wait(au_call_lkup_one, &args);
6199+ if (unlikely(wkq_err))
6200+ dentry = ERR_PTR(wkq_err);
6201+ }
6202+
6203+ return dentry;
6204+}
6205+
6206+/*
6207+ * lookup @dentry on @bindex which should be negative.
6208+ */
6209+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex)
6210+{
6211+ int err;
6212+ struct dentry *parent, *h_parent, *h_dentry;
1facf9fc 6213+
1facf9fc 6214+ parent = dget_parent(dentry);
6215+ h_parent = au_h_dptr(parent, bindex);
4a4d8108 6216+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent,
1facf9fc 6217+ au_sbr(dentry->d_sb, bindex));
6218+ err = PTR_ERR(h_dentry);
6219+ if (IS_ERR(h_dentry))
6220+ goto out;
6221+ if (unlikely(h_dentry->d_inode)) {
6222+ err = -EIO;
027c5e7a
AM
6223+ AuIOErr("%.*s should be negative on b%d.\n",
6224+ AuDLNPair(h_dentry), bindex);
1facf9fc 6225+ dput(h_dentry);
6226+ goto out;
6227+ }
6228+
4a4d8108 6229+ err = 0;
1facf9fc 6230+ if (bindex < au_dbstart(dentry))
6231+ au_set_dbstart(dentry, bindex);
6232+ if (au_dbend(dentry) < bindex)
6233+ au_set_dbend(dentry, bindex);
6234+ au_set_h_dptr(dentry, bindex, h_dentry);
1facf9fc 6235+
4f0767ce 6236+out:
1facf9fc 6237+ dput(parent);
6238+ return err;
6239+}
6240+
6241+/* ---------------------------------------------------------------------- */
6242+
6243+/* subset of struct inode */
6244+struct au_iattr {
6245+ unsigned long i_ino;
6246+ /* unsigned int i_nlink; */
6247+ uid_t i_uid;
6248+ gid_t i_gid;
6249+ u64 i_version;
6250+/*
6251+ loff_t i_size;
6252+ blkcnt_t i_blocks;
6253+*/
6254+ umode_t i_mode;
6255+};
6256+
6257+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
6258+{
6259+ ia->i_ino = h_inode->i_ino;
6260+ /* ia->i_nlink = h_inode->i_nlink; */
6261+ ia->i_uid = h_inode->i_uid;
6262+ ia->i_gid = h_inode->i_gid;
6263+ ia->i_version = h_inode->i_version;
6264+/*
6265+ ia->i_size = h_inode->i_size;
6266+ ia->i_blocks = h_inode->i_blocks;
6267+*/
6268+ ia->i_mode = (h_inode->i_mode & S_IFMT);
6269+}
6270+
6271+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
6272+{
6273+ return ia->i_ino != h_inode->i_ino
6274+ /* || ia->i_nlink != h_inode->i_nlink */
6275+ || ia->i_uid != h_inode->i_uid
6276+ || ia->i_gid != h_inode->i_gid
6277+ || ia->i_version != h_inode->i_version
6278+/*
6279+ || ia->i_size != h_inode->i_size
6280+ || ia->i_blocks != h_inode->i_blocks
6281+*/
6282+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
6283+}
6284+
6285+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
6286+ struct au_branch *br)
6287+{
6288+ int err;
6289+ struct au_iattr ia;
6290+ struct inode *h_inode;
6291+ struct dentry *h_d;
6292+ struct super_block *h_sb;
6293+
6294+ err = 0;
6295+ memset(&ia, -1, sizeof(ia));
6296+ h_sb = h_dentry->d_sb;
6297+ h_inode = h_dentry->d_inode;
6298+ if (h_inode)
6299+ au_iattr_save(&ia, h_inode);
6300+ else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
6301+ /* nfs d_revalidate may return 0 for negative dentry */
6302+ /* fuse d_revalidate always return 0 for negative dentry */
6303+ goto out;
6304+
6305+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
6306+ h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
6307+ err = PTR_ERR(h_d);
6308+ if (IS_ERR(h_d))
6309+ goto out;
6310+
6311+ err = 0;
6312+ if (unlikely(h_d != h_dentry
6313+ || h_d->d_inode != h_inode
6314+ || (h_inode && au_iattr_test(&ia, h_inode))))
6315+ err = au_busy_or_stale();
6316+ dput(h_d);
6317+
4f0767ce 6318+out:
1facf9fc 6319+ AuTraceErr(err);
6320+ return err;
6321+}
6322+
6323+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
6324+ struct dentry *h_parent, struct au_branch *br)
6325+{
6326+ int err;
6327+
6328+ err = 0;
027c5e7a
AM
6329+ if (udba == AuOpt_UDBA_REVAL
6330+ && !au_test_fs_remote(h_dentry->d_sb)) {
1facf9fc 6331+ IMustLock(h_dir);
6332+ err = (h_dentry->d_parent->d_inode != h_dir);
027c5e7a 6333+ } else if (udba != AuOpt_UDBA_NONE)
1facf9fc 6334+ err = au_h_verify_dentry(h_dentry, h_parent, br);
6335+
6336+ return err;
6337+}
6338+
6339+/* ---------------------------------------------------------------------- */
6340+
027c5e7a 6341+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
1facf9fc 6342+{
027c5e7a 6343+ int err;
1facf9fc 6344+ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
027c5e7a
AM
6345+ struct au_hdentry tmp, *p, *q;
6346+ struct au_dinfo *dinfo;
6347+ struct super_block *sb;
1facf9fc 6348+
027c5e7a 6349+ DiMustWriteLock(dentry);
1308ab2a 6350+
027c5e7a
AM
6351+ sb = dentry->d_sb;
6352+ dinfo = au_di(dentry);
1facf9fc 6353+ bend = dinfo->di_bend;
6354+ bwh = dinfo->di_bwh;
6355+ bdiropq = dinfo->di_bdiropq;
027c5e7a 6356+ p = dinfo->di_hdentry + dinfo->di_bstart;
1facf9fc 6357+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
027c5e7a 6358+ if (!p->hd_dentry)
1facf9fc 6359+ continue;
6360+
027c5e7a
AM
6361+ new_bindex = au_br_index(sb, p->hd_id);
6362+ if (new_bindex == bindex)
1facf9fc 6363+ continue;
1facf9fc 6364+
1facf9fc 6365+ if (dinfo->di_bwh == bindex)
6366+ bwh = new_bindex;
6367+ if (dinfo->di_bdiropq == bindex)
6368+ bdiropq = new_bindex;
6369+ if (new_bindex < 0) {
6370+ au_hdput(p);
6371+ p->hd_dentry = NULL;
6372+ continue;
6373+ }
6374+
6375+ /* swap two lower dentries, and loop again */
6376+ q = dinfo->di_hdentry + new_bindex;
6377+ tmp = *q;
6378+ *q = *p;
6379+ *p = tmp;
6380+ if (tmp.hd_dentry) {
6381+ bindex--;
6382+ p--;
6383+ }
6384+ }
6385+
1facf9fc 6386+ dinfo->di_bwh = -1;
6387+ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
6388+ dinfo->di_bwh = bwh;
6389+
6390+ dinfo->di_bdiropq = -1;
6391+ if (bdiropq >= 0
6392+ && bdiropq <= au_sbend(sb)
6393+ && au_sbr_whable(sb, bdiropq))
6394+ dinfo->di_bdiropq = bdiropq;
6395+
027c5e7a
AM
6396+ err = -EIO;
6397+ dinfo->di_bstart = -1;
6398+ dinfo->di_bend = -1;
1facf9fc 6399+ bend = au_dbend(parent);
6400+ p = dinfo->di_hdentry;
6401+ for (bindex = 0; bindex <= bend; bindex++, p++)
6402+ if (p->hd_dentry) {
6403+ dinfo->di_bstart = bindex;
6404+ break;
6405+ }
6406+
027c5e7a
AM
6407+ if (dinfo->di_bstart >= 0) {
6408+ p = dinfo->di_hdentry + bend;
6409+ for (bindex = bend; bindex >= 0; bindex--, p--)
6410+ if (p->hd_dentry) {
6411+ dinfo->di_bend = bindex;
6412+ err = 0;
6413+ break;
6414+ }
6415+ }
6416+
6417+ return err;
1facf9fc 6418+}
6419+
027c5e7a 6420+static void au_do_hide(struct dentry *dentry)
1facf9fc 6421+{
027c5e7a 6422+ struct inode *inode;
1facf9fc 6423+
027c5e7a
AM
6424+ inode = dentry->d_inode;
6425+ if (inode) {
6426+ if (!S_ISDIR(inode->i_mode)) {
6427+ if (inode->i_nlink && !d_unhashed(dentry))
6428+ drop_nlink(inode);
6429+ } else {
6430+ clear_nlink(inode);
6431+ /* stop next lookup */
6432+ inode->i_flags |= S_DEAD;
6433+ }
6434+ smp_mb(); /* necessary? */
6435+ }
6436+ d_drop(dentry);
6437+}
1308ab2a 6438+
027c5e7a
AM
6439+static int au_hide_children(struct dentry *parent)
6440+{
6441+ int err, i, j, ndentry;
6442+ struct au_dcsub_pages dpages;
6443+ struct au_dpage *dpage;
6444+ struct dentry *dentry;
1facf9fc 6445+
027c5e7a 6446+ err = au_dpages_init(&dpages, GFP_NOFS);
1facf9fc 6447+ if (unlikely(err))
6448+ goto out;
027c5e7a
AM
6449+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
6450+ if (unlikely(err))
6451+ goto out_dpages;
1facf9fc 6452+
027c5e7a
AM
6453+ /* in reverse order */
6454+ for (i = dpages.ndpage - 1; i >= 0; i--) {
6455+ dpage = dpages.dpages + i;
6456+ ndentry = dpage->ndentry;
6457+ for (j = ndentry - 1; j >= 0; j--) {
6458+ dentry = dpage->dentries[j];
6459+ if (dentry != parent)
6460+ au_do_hide(dentry);
6461+ }
6462+ }
1facf9fc 6463+
027c5e7a
AM
6464+out_dpages:
6465+ au_dpages_free(&dpages);
4f0767ce 6466+out:
027c5e7a 6467+ return err;
1facf9fc 6468+}
6469+
027c5e7a 6470+static void au_hide(struct dentry *dentry)
1facf9fc 6471+{
027c5e7a
AM
6472+ int err;
6473+ struct inode *inode;
1facf9fc 6474+
027c5e7a
AM
6475+ AuDbgDentry(dentry);
6476+ inode = dentry->d_inode;
6477+ if (inode && S_ISDIR(inode->i_mode)) {
6478+ /* shrink_dcache_parent(dentry); */
6479+ err = au_hide_children(dentry);
6480+ if (unlikely(err))
6481+ AuIOErr("%.*s, failed hiding children, ignored %d\n",
6482+ AuDLNPair(dentry), err);
6483+ }
6484+ au_do_hide(dentry);
6485+}
1facf9fc 6486+
027c5e7a
AM
6487+/*
6488+ * By adding a dirty branch, a cached dentry may be affected in various ways.
6489+ *
6490+ * a dirty branch is added
6491+ * - on the top of layers
6492+ * - in the middle of layers
6493+ * - to the bottom of layers
6494+ *
6495+ * on the added branch there exists
6496+ * - a whiteout
6497+ * - a diropq
6498+ * - a same named entry
6499+ * + exist
6500+ * * negative --> positive
6501+ * * positive --> positive
6502+ * - type is unchanged
6503+ * - type is changed
6504+ * + doesn't exist
6505+ * * negative --> negative
6506+ * * positive --> negative (rejected by au_br_del() for non-dir case)
6507+ * - none
6508+ */
6509+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
6510+ struct au_dinfo *tmp)
6511+{
6512+ int err;
6513+ aufs_bindex_t bindex, bend;
6514+ struct {
6515+ struct dentry *dentry;
6516+ struct inode *inode;
6517+ mode_t mode;
6518+ } orig_h, tmp_h;
6519+ struct au_hdentry *hd;
6520+ struct inode *inode, *h_inode;
6521+ struct dentry *h_dentry;
6522+
6523+ err = 0;
6524+ AuDebugOn(dinfo->di_bstart < 0);
6525+ orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
6526+ orig_h.inode = orig_h.dentry->d_inode;
6527+ orig_h.mode = 0;
6528+ if (orig_h.inode)
6529+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
6530+ memset(&tmp_h, 0, sizeof(tmp_h));
6531+ if (tmp->di_bstart >= 0) {
6532+ tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
6533+ tmp_h.inode = tmp_h.dentry->d_inode;
6534+ if (tmp_h.inode)
6535+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
6536+ }
6537+
6538+ inode = dentry->d_inode;
6539+ if (!orig_h.inode) {
6540+ AuDbg("nagative originally\n");
6541+ if (inode) {
6542+ au_hide(dentry);
6543+ goto out;
6544+ }
6545+ AuDebugOn(inode);
6546+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
6547+ AuDebugOn(dinfo->di_bdiropq != -1);
6548+
6549+ if (!tmp_h.inode) {
6550+ AuDbg("negative --> negative\n");
6551+ /* should have only one negative lower */
6552+ if (tmp->di_bstart >= 0
6553+ && tmp->di_bstart < dinfo->di_bstart) {
6554+ AuDebugOn(tmp->di_bstart != tmp->di_bend);
6555+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
6556+ au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
6557+ au_di_cp(dinfo, tmp);
6558+ hd = tmp->di_hdentry + tmp->di_bstart;
6559+ au_set_h_dptr(dentry, tmp->di_bstart,
6560+ dget(hd->hd_dentry));
6561+ }
6562+ au_dbg_verify_dinode(dentry);
6563+ } else {
6564+ AuDbg("negative --> positive\n");
6565+ /*
6566+ * similar to the behaviour of creating with bypassing
6567+ * aufs.
6568+ * unhash it in order to force an error in the
6569+ * succeeding create operation.
6570+ * we should not set S_DEAD here.
6571+ */
6572+ d_drop(dentry);
6573+ /* au_di_swap(tmp, dinfo); */
6574+ au_dbg_verify_dinode(dentry);
6575+ }
6576+ } else {
6577+ AuDbg("positive originally\n");
6578+ /* inode may be NULL */
6579+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
6580+ if (!tmp_h.inode) {
6581+ AuDbg("positive --> negative\n");
6582+ /* or bypassing aufs */
6583+ au_hide(dentry);
6584+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
6585+ dinfo->di_bwh = tmp->di_bwh;
6586+ if (inode)
6587+ err = au_refresh_hinode_self(inode);
6588+ au_dbg_verify_dinode(dentry);
6589+ } else if (orig_h.mode == tmp_h.mode) {
6590+ AuDbg("positive --> positive, same type\n");
6591+ if (!S_ISDIR(orig_h.mode)
6592+ && dinfo->di_bstart > tmp->di_bstart) {
6593+ /*
6594+ * similar to the behaviour of removing and
6595+ * creating.
6596+ */
6597+ au_hide(dentry);
6598+ if (inode)
6599+ err = au_refresh_hinode_self(inode);
6600+ au_dbg_verify_dinode(dentry);
6601+ } else {
6602+ /* fill empty slots */
6603+ if (dinfo->di_bstart > tmp->di_bstart)
6604+ dinfo->di_bstart = tmp->di_bstart;
6605+ if (dinfo->di_bend < tmp->di_bend)
6606+ dinfo->di_bend = tmp->di_bend;
6607+ dinfo->di_bwh = tmp->di_bwh;
6608+ dinfo->di_bdiropq = tmp->di_bdiropq;
6609+ hd = tmp->di_hdentry;
6610+ bend = dinfo->di_bend;
6611+ for (bindex = tmp->di_bstart; bindex <= bend;
6612+ bindex++) {
6613+ if (au_h_dptr(dentry, bindex))
6614+ continue;
6615+ h_dentry = hd[bindex].hd_dentry;
6616+ if (!h_dentry)
6617+ continue;
6618+ h_inode = h_dentry->d_inode;
6619+ AuDebugOn(!h_inode);
6620+ AuDebugOn(orig_h.mode
6621+ != (h_inode->i_mode
6622+ & S_IFMT));
6623+ au_set_h_dptr(dentry, bindex,
6624+ dget(h_dentry));
6625+ }
6626+ err = au_refresh_hinode(inode, dentry);
6627+ au_dbg_verify_dinode(dentry);
6628+ }
6629+ } else {
6630+ AuDbg("positive --> positive, different type\n");
6631+ /* similar to the behaviour of removing and creating */
6632+ au_hide(dentry);
6633+ if (inode)
6634+ err = au_refresh_hinode_self(inode);
6635+ au_dbg_verify_dinode(dentry);
6636+ }
6637+ }
6638+
6639+out:
6640+ return err;
6641+}
6642+
6643+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
6644+{
6645+ int err, ebrange;
6646+ unsigned int sigen;
6647+ struct au_dinfo *dinfo, *tmp;
6648+ struct super_block *sb;
6649+ struct inode *inode;
6650+
6651+ DiMustWriteLock(dentry);
6652+ AuDebugOn(IS_ROOT(dentry));
6653+ AuDebugOn(!parent->d_inode);
6654+
6655+ sb = dentry->d_sb;
6656+ inode = dentry->d_inode;
6657+ sigen = au_sigen(sb);
6658+ err = au_digen_test(parent, sigen);
6659+ if (unlikely(err))
6660+ goto out;
6661+
6662+ dinfo = au_di(dentry);
6663+ err = au_di_realloc(dinfo, au_sbend(sb) + 1);
6664+ if (unlikely(err))
6665+ goto out;
6666+ ebrange = au_dbrange_test(dentry);
6667+ if (!ebrange)
6668+ ebrange = au_do_refresh_hdentry(dentry, parent);
6669+
6670+ if (d_unhashed(dentry) || ebrange) {
6671+ AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
6672+ if (inode)
6673+ err = au_refresh_hinode_self(inode);
6674+ au_dbg_verify_dinode(dentry);
6675+ if (!err)
6676+ goto out_dgen; /* success */
6677+ goto out;
6678+ }
6679+
6680+ /* temporary dinfo */
6681+ AuDbgDentry(dentry);
6682+ err = -ENOMEM;
6683+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
6684+ if (unlikely(!tmp))
6685+ goto out;
6686+ au_di_swap(tmp, dinfo);
6687+ /* returns the number of positive dentries */
6688+ /*
6689+ * if current working dir is removed, it returns an error.
6690+ * but the dentry is legal.
6691+ */
6692+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0, /*nd*/NULL);
6693+ AuDbgDentry(dentry);
6694+ au_di_swap(tmp, dinfo);
6695+ if (err == -ENOENT)
6696+ err = 0;
6697+ if (err >= 0) {
6698+ /* compare/refresh by dinfo */
6699+ AuDbgDentry(dentry);
6700+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
6701+ au_dbg_verify_dinode(dentry);
6702+ AuTraceErr(err);
6703+ }
6704+ au_rw_write_unlock(&tmp->di_rwsem);
6705+ au_di_free(tmp);
6706+ if (unlikely(err))
6707+ goto out;
6708+
6709+out_dgen:
6710+ au_update_digen(dentry);
6711+out:
6712+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
6713+ AuIOErr("failed refreshing %.*s, %d\n",
6714+ AuDLNPair(dentry), err);
6715+ AuDbgDentry(dentry);
6716+ }
6717+ AuTraceErr(err);
6718+ return err;
6719+}
6720+
6721+static noinline_for_stack
6722+int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
6723+ struct dentry *dentry, aufs_bindex_t bindex)
6724+{
6725+ int err, valid;
6726+ int (*reval)(struct dentry *, struct nameidata *);
6727+
6728+ err = 0;
6729+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
6730+ goto out;
6731+ reval = h_dentry->d_op->d_revalidate;
6732+
6733+ AuDbg("b%d\n", bindex);
6734+ if (au_test_fs_null_nd(h_dentry->d_sb))
6735+ /* it may return tri-state */
6736+ valid = reval(h_dentry, NULL);
6737+ else {
6738+ struct nameidata h_nd;
6739+ int locked;
1facf9fc 6740+ struct dentry *parent;
6741+
6742+ au_h_nd(&h_nd, nd);
6743+ parent = nd->path.dentry;
6744+ locked = (nd && nd->path.dentry != dentry);
6745+ if (locked)
6746+ di_read_lock_parent(parent, AuLock_IR);
6747+ BUG_ON(bindex > au_dbend(parent));
6748+ h_nd.path.dentry = au_h_dptr(parent, bindex);
6749+ BUG_ON(!h_nd.path.dentry);
6750+ h_nd.path.mnt = au_sbr(parent->d_sb, bindex)->br_mnt;
6751+ path_get(&h_nd.path);
6752+ valid = reval(h_dentry, &h_nd);
6753+ path_put(&h_nd.path);
6754+ if (locked)
6755+ di_read_unlock(parent, AuLock_IR);
6756+ }
6757+
6758+ if (unlikely(valid < 0))
6759+ err = valid;
6760+ else if (!valid)
6761+ err = -EINVAL;
6762+
4f0767ce 6763+out:
1facf9fc 6764+ AuTraceErr(err);
6765+ return err;
6766+}
6767+
6768+/* todo: remove this */
6769+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
6770+ struct nameidata *nd, int do_udba)
6771+{
6772+ int err;
6773+ umode_t mode, h_mode;
6774+ aufs_bindex_t bindex, btail, bstart, ibs, ibe;
6775+ unsigned char plus, unhashed, is_root, h_plus;
4a4d8108 6776+ struct inode *h_inode, *h_cached_inode;
1facf9fc 6777+ struct dentry *h_dentry;
6778+ struct qstr *name, *h_name;
6779+
6780+ err = 0;
6781+ plus = 0;
6782+ mode = 0;
1facf9fc 6783+ ibs = -1;
6784+ ibe = -1;
6785+ unhashed = !!d_unhashed(dentry);
6786+ is_root = !!IS_ROOT(dentry);
6787+ name = &dentry->d_name;
6788+
6789+ /*
7f207e10
AM
6790+ * Theoretically, REVAL test should be unnecessary in case of
6791+ * {FS,I}NOTIFY.
6792+ * But {fs,i}notify doesn't fire some necessary events,
1facf9fc 6793+ * IN_ATTRIB for atime/nlink/pageio
6794+ * IN_DELETE for NFS dentry
6795+ * Let's do REVAL test too.
6796+ */
6797+ if (do_udba && inode) {
6798+ mode = (inode->i_mode & S_IFMT);
6799+ plus = (inode->i_nlink > 0);
1facf9fc 6800+ ibs = au_ibstart(inode);
6801+ ibe = au_ibend(inode);
6802+ }
6803+
6804+ bstart = au_dbstart(dentry);
6805+ btail = bstart;
6806+ if (inode && S_ISDIR(inode->i_mode))
6807+ btail = au_dbtaildir(dentry);
6808+ for (bindex = bstart; bindex <= btail; bindex++) {
6809+ h_dentry = au_h_dptr(dentry, bindex);
6810+ if (!h_dentry)
6811+ continue;
6812+
6813+ AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
027c5e7a 6814+ spin_lock(&h_dentry->d_lock);
1facf9fc 6815+ h_name = &h_dentry->d_name;
6816+ if (unlikely(do_udba
6817+ && !is_root
6818+ && (unhashed != !!d_unhashed(h_dentry)
6819+ || name->len != h_name->len
6820+ || memcmp(name->name, h_name->name, name->len))
6821+ )) {
6822+ AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
6823+ unhashed, d_unhashed(h_dentry),
6824+ AuDLNPair(dentry), AuDLNPair(h_dentry));
027c5e7a 6825+ spin_unlock(&h_dentry->d_lock);
1facf9fc 6826+ goto err;
6827+ }
027c5e7a 6828+ spin_unlock(&h_dentry->d_lock);
1facf9fc 6829+
6830+ err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
6831+ if (unlikely(err))
6832+ /* do not goto err, to keep the errno */
6833+ break;
6834+
6835+ /* todo: plink too? */
6836+ if (!do_udba)
6837+ continue;
6838+
6839+ /* UDBA tests */
6840+ h_inode = h_dentry->d_inode;
6841+ if (unlikely(!!inode != !!h_inode))
6842+ goto err;
6843+
6844+ h_plus = plus;
6845+ h_mode = mode;
6846+ h_cached_inode = h_inode;
6847+ if (h_inode) {
6848+ h_mode = (h_inode->i_mode & S_IFMT);
6849+ h_plus = (h_inode->i_nlink > 0);
6850+ }
6851+ if (inode && ibs <= bindex && bindex <= ibe)
6852+ h_cached_inode = au_h_iptr(inode, bindex);
6853+
6854+ if (unlikely(plus != h_plus
6855+ || mode != h_mode
6856+ || h_cached_inode != h_inode))
6857+ goto err;
6858+ continue;
6859+
6860+ err:
6861+ err = -EINVAL;
6862+ break;
6863+ }
6864+
6865+ return err;
6866+}
6867+
027c5e7a 6868+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
1facf9fc 6869+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
6870+{
6871+ int err;
6872+ struct dentry *parent;
1facf9fc 6873+
027c5e7a 6874+ if (!au_digen_test(dentry, sigen))
1facf9fc 6875+ return 0;
6876+
6877+ parent = dget_parent(dentry);
6878+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 6879+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 6880+ au_dbg_verify_gen(parent, sigen);
027c5e7a 6881+ err = au_refresh_dentry(dentry, parent);
1facf9fc 6882+ di_read_unlock(parent, AuLock_IR);
6883+ dput(parent);
027c5e7a 6884+ AuTraceErr(err);
1facf9fc 6885+ return err;
6886+}
6887+
6888+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
6889+{
6890+ int err;
6891+ struct dentry *d, *parent;
6892+ struct inode *inode;
6893+
027c5e7a 6894+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1facf9fc 6895+ return simple_reval_dpath(dentry, sigen);
6896+
6897+ /* slow loop, keep it simple and stupid */
6898+ /* cf: au_cpup_dirs() */
6899+ err = 0;
6900+ parent = NULL;
027c5e7a 6901+ while (au_digen_test(dentry, sigen)) {
1facf9fc 6902+ d = dentry;
6903+ while (1) {
6904+ dput(parent);
6905+ parent = dget_parent(d);
027c5e7a 6906+ if (!au_digen_test(parent, sigen))
1facf9fc 6907+ break;
6908+ d = parent;
6909+ }
6910+
6911+ inode = d->d_inode;
6912+ if (d != dentry)
027c5e7a 6913+ di_write_lock_child2(d);
1facf9fc 6914+
6915+ /* someone might update our dentry while we were sleeping */
027c5e7a
AM
6916+ if (au_digen_test(d, sigen)) {
6917+ /*
6918+ * todo: consolidate with simple_reval_dpath(),
6919+ * do_refresh() and au_reval_for_attr().
6920+ */
1facf9fc 6921+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 6922+ err = au_refresh_dentry(d, parent);
1facf9fc 6923+ di_read_unlock(parent, AuLock_IR);
6924+ }
6925+
6926+ if (d != dentry)
6927+ di_write_unlock(d);
6928+ dput(parent);
6929+ if (unlikely(err))
6930+ break;
6931+ }
6932+
6933+ return err;
6934+}
6935+
6936+/*
6937+ * if valid returns 1, otherwise 0.
6938+ */
6939+static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
6940+{
6941+ int valid, err;
6942+ unsigned int sigen;
6943+ unsigned char do_udba;
6944+ struct super_block *sb;
6945+ struct inode *inode;
6946+
027c5e7a
AM
6947+ /* todo: support rcu-walk? */
6948+ if (nd && (nd->flags & LOOKUP_RCU))
6949+ return -ECHILD;
6950+
6951+ valid = 0;
6952+ if (unlikely(!au_di(dentry)))
6953+ goto out;
6954+
6955+ inode = dentry->d_inode;
6956+ if (inode && is_bad_inode(inode))
6957+ goto out;
6958+
e49829fe 6959+ valid = 1;
1facf9fc 6960+ sb = dentry->d_sb;
e49829fe
JR
6961+ /*
6962+ * todo: very ugly
6963+ * i_mutex of parent dir may be held,
6964+ * but we should not return 'invalid' due to busy.
6965+ */
6966+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
6967+ if (unlikely(err)) {
6968+ valid = err;
027c5e7a 6969+ AuTraceErr(err);
e49829fe
JR
6970+ goto out;
6971+ }
027c5e7a
AM
6972+ if (unlikely(au_dbrange_test(dentry))) {
6973+ err = -EINVAL;
6974+ AuTraceErr(err);
6975+ goto out_dgrade;
1facf9fc 6976+ }
027c5e7a
AM
6977+
6978+ sigen = au_sigen(sb);
6979+ if (au_digen_test(dentry, sigen)) {
1facf9fc 6980+ AuDebugOn(IS_ROOT(dentry));
027c5e7a
AM
6981+ err = au_reval_dpath(dentry, sigen);
6982+ if (unlikely(err)) {
6983+ AuTraceErr(err);
1facf9fc 6984+ goto out_dgrade;
027c5e7a 6985+ }
1facf9fc 6986+ }
6987+ di_downgrade_lock(dentry, AuLock_IR);
6988+
1facf9fc 6989+ err = -EINVAL;
027c5e7a
AM
6990+ if (inode && (IS_DEADDIR(inode) || !inode->i_nlink))
6991+ goto out_inval;
6992+
1facf9fc 6993+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
6994+ if (do_udba && inode) {
6995+ aufs_bindex_t bstart = au_ibstart(inode);
027c5e7a 6996+ struct inode *h_inode;
1facf9fc 6997+
027c5e7a
AM
6998+ if (bstart >= 0) {
6999+ h_inode = au_h_iptr(inode, bstart);
7000+ if (h_inode && au_test_higen(inode, h_inode))
7001+ goto out_inval;
7002+ }
1facf9fc 7003+ }
7004+
7005+ err = h_d_revalidate(dentry, inode, nd, do_udba);
027c5e7a 7006+ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
1facf9fc 7007+ err = -EIO;
027c5e7a
AM
7008+ AuDbg("both of real entry and whiteout found, %.*s, err %d\n",
7009+ AuDLNPair(dentry), err);
7010+ }
e49829fe 7011+ goto out_inval;
1facf9fc 7012+
4f0767ce 7013+out_dgrade:
1facf9fc 7014+ di_downgrade_lock(dentry, AuLock_IR);
e49829fe 7015+out_inval:
1facf9fc 7016+ aufs_read_unlock(dentry, AuLock_IR);
7017+ AuTraceErr(err);
7018+ valid = !err;
e49829fe 7019+out:
027c5e7a 7020+ if (!valid) {
e49829fe 7021+ AuDbg("%.*s invalid, %d\n", AuDLNPair(dentry), valid);
027c5e7a
AM
7022+ d_drop(dentry);
7023+ }
1facf9fc 7024+ return valid;
7025+}
7026+
7027+static void aufs_d_release(struct dentry *dentry)
7028+{
027c5e7a 7029+ if (au_di(dentry)) {
4a4d8108
AM
7030+ au_di_fin(dentry);
7031+ au_hn_di_reinit(dentry);
1facf9fc 7032+ }
1facf9fc 7033+}
7034+
4a4d8108 7035+const struct dentry_operations aufs_dop = {
1facf9fc 7036+ .d_revalidate = aufs_d_revalidate,
7037+ .d_release = aufs_d_release
7038+};
7f207e10
AM
7039diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
7040--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
53392da6 7041+++ linux/fs/aufs/dentry.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 7042@@ -0,0 +1,238 @@
1facf9fc 7043+/*
027c5e7a 7044+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 7045+ *
7046+ * This program, aufs is free software; you can redistribute it and/or modify
7047+ * it under the terms of the GNU General Public License as published by
7048+ * the Free Software Foundation; either version 2 of the License, or
7049+ * (at your option) any later version.
dece6358
AM
7050+ *
7051+ * This program is distributed in the hope that it will be useful,
7052+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7053+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7054+ * GNU General Public License for more details.
7055+ *
7056+ * You should have received a copy of the GNU General Public License
7057+ * along with this program; if not, write to the Free Software
7058+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7059+ */
7060+
7061+/*
7062+ * lookup and dentry operations
7063+ */
7064+
7065+#ifndef __AUFS_DENTRY_H__
7066+#define __AUFS_DENTRY_H__
7067+
7068+#ifdef __KERNEL__
7069+
dece6358 7070+#include <linux/dcache.h>
1facf9fc 7071+#include <linux/aufs_type.h>
7072+#include "rwsem.h"
7073+
1facf9fc 7074+struct au_hdentry {
7075+ struct dentry *hd_dentry;
027c5e7a 7076+ aufs_bindex_t hd_id;
1facf9fc 7077+};
7078+
7079+struct au_dinfo {
7080+ atomic_t di_generation;
7081+
dece6358 7082+ struct au_rwsem di_rwsem;
1facf9fc 7083+ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
7084+ struct au_hdentry *di_hdentry;
4a4d8108 7085+} ____cacheline_aligned_in_smp;
1facf9fc 7086+
7087+/* ---------------------------------------------------------------------- */
7088+
7089+/* dentry.c */
4a4d8108 7090+extern const struct dentry_operations aufs_dop;
1facf9fc 7091+struct au_branch;
7092+struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
7093+ struct au_branch *br, struct nameidata *nd);
7094+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
7095+ struct au_branch *br);
7096+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
7097+ struct dentry *h_parent, struct au_branch *br);
7098+
7099+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
7100+ struct nameidata *nd);
7101+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex);
027c5e7a 7102+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
1facf9fc 7103+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
7104+
7105+/* dinfo.c */
4a4d8108 7106+void au_di_init_once(void *_di);
027c5e7a
AM
7107+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
7108+void au_di_free(struct au_dinfo *dinfo);
7109+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
7110+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
4a4d8108
AM
7111+int au_di_init(struct dentry *dentry);
7112+void au_di_fin(struct dentry *dentry);
1facf9fc 7113+int au_di_realloc(struct au_dinfo *dinfo, int nbr);
7114+
7115+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
7116+void di_read_unlock(struct dentry *d, int flags);
7117+void di_downgrade_lock(struct dentry *d, int flags);
7118+void di_write_lock(struct dentry *d, unsigned int lsc);
7119+void di_write_unlock(struct dentry *d);
7120+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
7121+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
7122+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
7123+
7124+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
2cbb1c4b 7125+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
1facf9fc 7126+aufs_bindex_t au_dbtail(struct dentry *dentry);
7127+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
7128+
7129+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
7130+ struct dentry *h_dentry);
027c5e7a
AM
7131+int au_digen_test(struct dentry *dentry, unsigned int sigen);
7132+int au_dbrange_test(struct dentry *dentry);
1facf9fc 7133+void au_update_digen(struct dentry *dentry);
7134+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
7135+void au_update_dbstart(struct dentry *dentry);
7136+void au_update_dbend(struct dentry *dentry);
7137+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
7138+
7139+/* ---------------------------------------------------------------------- */
7140+
7141+static inline struct au_dinfo *au_di(struct dentry *dentry)
7142+{
7143+ return dentry->d_fsdata;
7144+}
7145+
7146+/* ---------------------------------------------------------------------- */
7147+
7148+/* lock subclass for dinfo */
7149+enum {
7150+ AuLsc_DI_CHILD, /* child first */
4a4d8108 7151+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
1facf9fc 7152+ AuLsc_DI_CHILD3, /* copyup dirs */
7153+ AuLsc_DI_PARENT,
7154+ AuLsc_DI_PARENT2,
027c5e7a
AM
7155+ AuLsc_DI_PARENT3,
7156+ AuLsc_DI_TMP /* temp for replacing dinfo */
1facf9fc 7157+};
7158+
7159+/*
7160+ * di_read_lock_child, di_write_lock_child,
7161+ * di_read_lock_child2, di_write_lock_child2,
7162+ * di_read_lock_child3, di_write_lock_child3,
7163+ * di_read_lock_parent, di_write_lock_parent,
7164+ * di_read_lock_parent2, di_write_lock_parent2,
7165+ * di_read_lock_parent3, di_write_lock_parent3,
7166+ */
7167+#define AuReadLockFunc(name, lsc) \
7168+static inline void di_read_lock_##name(struct dentry *d, int flags) \
7169+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
7170+
7171+#define AuWriteLockFunc(name, lsc) \
7172+static inline void di_write_lock_##name(struct dentry *d) \
7173+{ di_write_lock(d, AuLsc_DI_##lsc); }
7174+
7175+#define AuRWLockFuncs(name, lsc) \
7176+ AuReadLockFunc(name, lsc) \
7177+ AuWriteLockFunc(name, lsc)
7178+
7179+AuRWLockFuncs(child, CHILD);
7180+AuRWLockFuncs(child2, CHILD2);
7181+AuRWLockFuncs(child3, CHILD3);
7182+AuRWLockFuncs(parent, PARENT);
7183+AuRWLockFuncs(parent2, PARENT2);
7184+AuRWLockFuncs(parent3, PARENT3);
7185+
7186+#undef AuReadLockFunc
7187+#undef AuWriteLockFunc
7188+#undef AuRWLockFuncs
7189+
7190+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
7191+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
7192+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 7193+
7194+/* ---------------------------------------------------------------------- */
7195+
7196+/* todo: memory barrier? */
7197+static inline unsigned int au_digen(struct dentry *d)
7198+{
7199+ return atomic_read(&au_di(d)->di_generation);
7200+}
7201+
7202+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
7203+{
7204+ hdentry->hd_dentry = NULL;
7205+}
7206+
7207+static inline void au_hdput(struct au_hdentry *hd)
7208+{
4a4d8108
AM
7209+ if (hd)
7210+ dput(hd->hd_dentry);
1facf9fc 7211+}
7212+
7213+static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
7214+{
1308ab2a 7215+ DiMustAnyLock(dentry);
1facf9fc 7216+ return au_di(dentry)->di_bstart;
7217+}
7218+
7219+static inline aufs_bindex_t au_dbend(struct dentry *dentry)
7220+{
1308ab2a 7221+ DiMustAnyLock(dentry);
1facf9fc 7222+ return au_di(dentry)->di_bend;
7223+}
7224+
7225+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
7226+{
1308ab2a 7227+ DiMustAnyLock(dentry);
1facf9fc 7228+ return au_di(dentry)->di_bwh;
7229+}
7230+
7231+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
7232+{
1308ab2a 7233+ DiMustAnyLock(dentry);
1facf9fc 7234+ return au_di(dentry)->di_bdiropq;
7235+}
7236+
7237+/* todo: hard/soft set? */
7238+static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
7239+{
1308ab2a 7240+ DiMustWriteLock(dentry);
1facf9fc 7241+ au_di(dentry)->di_bstart = bindex;
7242+}
7243+
7244+static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
7245+{
1308ab2a 7246+ DiMustWriteLock(dentry);
1facf9fc 7247+ au_di(dentry)->di_bend = bindex;
7248+}
7249+
7250+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
7251+{
1308ab2a 7252+ DiMustWriteLock(dentry);
1facf9fc 7253+ /* dbwh can be outside of bstart - bend range */
7254+ au_di(dentry)->di_bwh = bindex;
7255+}
7256+
7257+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
7258+{
1308ab2a 7259+ DiMustWriteLock(dentry);
1facf9fc 7260+ au_di(dentry)->di_bdiropq = bindex;
7261+}
7262+
7263+/* ---------------------------------------------------------------------- */
7264+
4a4d8108 7265+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 7266+static inline void au_digen_dec(struct dentry *d)
7267+{
e49829fe 7268+ atomic_dec(&au_di(d)->di_generation);
1facf9fc 7269+}
7270+
4a4d8108 7271+static inline void au_hn_di_reinit(struct dentry *dentry)
1facf9fc 7272+{
7273+ dentry->d_fsdata = NULL;
7274+}
7275+#else
4a4d8108
AM
7276+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
7277+#endif /* CONFIG_AUFS_HNOTIFY */
1facf9fc 7278+
7279+#endif /* __KERNEL__ */
7280+#endif /* __AUFS_DENTRY_H__ */
7f207e10
AM
7281diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
7282--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 7283+++ linux/fs/aufs/dinfo.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 7284@@ -0,0 +1,543 @@
1facf9fc 7285+/*
027c5e7a 7286+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 7287+ *
7288+ * This program, aufs is free software; you can redistribute it and/or modify
7289+ * it under the terms of the GNU General Public License as published by
7290+ * the Free Software Foundation; either version 2 of the License, or
7291+ * (at your option) any later version.
dece6358
AM
7292+ *
7293+ * This program is distributed in the hope that it will be useful,
7294+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7295+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7296+ * GNU General Public License for more details.
7297+ *
7298+ * You should have received a copy of the GNU General Public License
7299+ * along with this program; if not, write to the Free Software
7300+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7301+ */
7302+
7303+/*
7304+ * dentry private data
7305+ */
7306+
7307+#include "aufs.h"
7308+
e49829fe 7309+void au_di_init_once(void *_dinfo)
4a4d8108 7310+{
e49829fe
JR
7311+ struct au_dinfo *dinfo = _dinfo;
7312+ static struct lock_class_key aufs_di;
4a4d8108 7313+
e49829fe
JR
7314+ au_rw_init(&dinfo->di_rwsem);
7315+ au_rw_class(&dinfo->di_rwsem, &aufs_di);
4a4d8108
AM
7316+}
7317+
027c5e7a 7318+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
1facf9fc 7319+{
7320+ struct au_dinfo *dinfo;
027c5e7a 7321+ int nbr, i;
1facf9fc 7322+
7323+ dinfo = au_cache_alloc_dinfo();
7324+ if (unlikely(!dinfo))
7325+ goto out;
7326+
1facf9fc 7327+ nbr = au_sbend(sb) + 1;
7328+ if (nbr <= 0)
7329+ nbr = 1;
7330+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
027c5e7a
AM
7331+ if (dinfo->di_hdentry) {
7332+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
7333+ dinfo->di_bstart = -1;
7334+ dinfo->di_bend = -1;
7335+ dinfo->di_bwh = -1;
7336+ dinfo->di_bdiropq = -1;
7337+ for (i = 0; i < nbr; i++)
7338+ dinfo->di_hdentry[i].hd_id = -1;
7339+ goto out;
7340+ }
1facf9fc 7341+
1facf9fc 7342+ au_cache_free_dinfo(dinfo);
027c5e7a
AM
7343+ dinfo = NULL;
7344+
4f0767ce 7345+out:
027c5e7a 7346+ return dinfo;
1facf9fc 7347+}
7348+
027c5e7a 7349+void au_di_free(struct au_dinfo *dinfo)
4a4d8108 7350+{
4a4d8108
AM
7351+ struct au_hdentry *p;
7352+ aufs_bindex_t bend, bindex;
7353+
7354+ /* dentry may not be revalidated */
027c5e7a 7355+ bindex = dinfo->di_bstart;
4a4d8108 7356+ if (bindex >= 0) {
027c5e7a
AM
7357+ bend = dinfo->di_bend;
7358+ p = dinfo->di_hdentry + bindex;
4a4d8108
AM
7359+ while (bindex++ <= bend)
7360+ au_hdput(p++);
7361+ }
027c5e7a
AM
7362+ kfree(dinfo->di_hdentry);
7363+ au_cache_free_dinfo(dinfo);
7364+}
7365+
7366+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
7367+{
7368+ struct au_hdentry *p;
7369+ aufs_bindex_t bi;
7370+
7371+ AuRwMustWriteLock(&a->di_rwsem);
7372+ AuRwMustWriteLock(&b->di_rwsem);
7373+
7374+#define DiSwap(v, name) \
7375+ do { \
7376+ v = a->di_##name; \
7377+ a->di_##name = b->di_##name; \
7378+ b->di_##name = v; \
7379+ } while (0)
7380+
7381+ DiSwap(p, hdentry);
7382+ DiSwap(bi, bstart);
7383+ DiSwap(bi, bend);
7384+ DiSwap(bi, bwh);
7385+ DiSwap(bi, bdiropq);
7386+ /* smp_mb(); */
7387+
7388+#undef DiSwap
7389+}
7390+
7391+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
7392+{
7393+ AuRwMustWriteLock(&dst->di_rwsem);
7394+ AuRwMustWriteLock(&src->di_rwsem);
7395+
7396+ dst->di_bstart = src->di_bstart;
7397+ dst->di_bend = src->di_bend;
7398+ dst->di_bwh = src->di_bwh;
7399+ dst->di_bdiropq = src->di_bdiropq;
7400+ /* smp_mb(); */
7401+}
7402+
7403+int au_di_init(struct dentry *dentry)
7404+{
7405+ int err;
7406+ struct super_block *sb;
7407+ struct au_dinfo *dinfo;
7408+
7409+ err = 0;
7410+ sb = dentry->d_sb;
7411+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
7412+ if (dinfo) {
7413+ atomic_set(&dinfo->di_generation, au_sigen(sb));
7414+ /* smp_mb(); */ /* atomic_set */
7415+ dentry->d_fsdata = dinfo;
7416+ } else
7417+ err = -ENOMEM;
7418+
7419+ return err;
7420+}
7421+
7422+void au_di_fin(struct dentry *dentry)
7423+{
7424+ struct au_dinfo *dinfo;
7425+
7426+ dinfo = au_di(dentry);
7427+ AuRwDestroy(&dinfo->di_rwsem);
7428+ au_di_free(dinfo);
4a4d8108
AM
7429+}
7430+
1facf9fc 7431+int au_di_realloc(struct au_dinfo *dinfo, int nbr)
7432+{
7433+ int err, sz;
7434+ struct au_hdentry *hdp;
7435+
1308ab2a 7436+ AuRwMustWriteLock(&dinfo->di_rwsem);
7437+
1facf9fc 7438+ err = -ENOMEM;
7439+ sz = sizeof(*hdp) * (dinfo->di_bend + 1);
7440+ if (!sz)
7441+ sz = sizeof(*hdp);
7442+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
7443+ if (hdp) {
7444+ dinfo->di_hdentry = hdp;
7445+ err = 0;
7446+ }
7447+
7448+ return err;
7449+}
7450+
7451+/* ---------------------------------------------------------------------- */
7452+
7453+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
7454+{
7455+ switch (lsc) {
7456+ case AuLsc_DI_CHILD:
7457+ ii_write_lock_child(inode);
7458+ break;
7459+ case AuLsc_DI_CHILD2:
7460+ ii_write_lock_child2(inode);
7461+ break;
7462+ case AuLsc_DI_CHILD3:
7463+ ii_write_lock_child3(inode);
7464+ break;
7465+ case AuLsc_DI_PARENT:
7466+ ii_write_lock_parent(inode);
7467+ break;
7468+ case AuLsc_DI_PARENT2:
7469+ ii_write_lock_parent2(inode);
7470+ break;
7471+ case AuLsc_DI_PARENT3:
7472+ ii_write_lock_parent3(inode);
7473+ break;
7474+ default:
7475+ BUG();
7476+ }
7477+}
7478+
7479+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
7480+{
7481+ switch (lsc) {
7482+ case AuLsc_DI_CHILD:
7483+ ii_read_lock_child(inode);
7484+ break;
7485+ case AuLsc_DI_CHILD2:
7486+ ii_read_lock_child2(inode);
7487+ break;
7488+ case AuLsc_DI_CHILD3:
7489+ ii_read_lock_child3(inode);
7490+ break;
7491+ case AuLsc_DI_PARENT:
7492+ ii_read_lock_parent(inode);
7493+ break;
7494+ case AuLsc_DI_PARENT2:
7495+ ii_read_lock_parent2(inode);
7496+ break;
7497+ case AuLsc_DI_PARENT3:
7498+ ii_read_lock_parent3(inode);
7499+ break;
7500+ default:
7501+ BUG();
7502+ }
7503+}
7504+
7505+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
7506+{
dece6358 7507+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 7508+ if (d->d_inode) {
7509+ if (au_ftest_lock(flags, IW))
7510+ do_ii_write_lock(d->d_inode, lsc);
7511+ else if (au_ftest_lock(flags, IR))
7512+ do_ii_read_lock(d->d_inode, lsc);
7513+ }
7514+}
7515+
7516+void di_read_unlock(struct dentry *d, int flags)
7517+{
7518+ if (d->d_inode) {
027c5e7a
AM
7519+ if (au_ftest_lock(flags, IW)) {
7520+ au_dbg_verify_dinode(d);
1facf9fc 7521+ ii_write_unlock(d->d_inode);
027c5e7a
AM
7522+ } else if (au_ftest_lock(flags, IR)) {
7523+ au_dbg_verify_dinode(d);
1facf9fc 7524+ ii_read_unlock(d->d_inode);
027c5e7a 7525+ }
1facf9fc 7526+ }
dece6358 7527+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 7528+}
7529+
7530+void di_downgrade_lock(struct dentry *d, int flags)
7531+{
1facf9fc 7532+ if (d->d_inode && au_ftest_lock(flags, IR))
7533+ ii_downgrade_lock(d->d_inode);
dece6358 7534+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 7535+}
7536+
7537+void di_write_lock(struct dentry *d, unsigned int lsc)
7538+{
dece6358 7539+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 7540+ if (d->d_inode)
7541+ do_ii_write_lock(d->d_inode, lsc);
7542+}
7543+
7544+void di_write_unlock(struct dentry *d)
7545+{
027c5e7a 7546+ au_dbg_verify_dinode(d);
1facf9fc 7547+ if (d->d_inode)
7548+ ii_write_unlock(d->d_inode);
dece6358 7549+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 7550+}
7551+
7552+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
7553+{
7554+ AuDebugOn(d1 == d2
7555+ || d1->d_inode == d2->d_inode
7556+ || d1->d_sb != d2->d_sb);
7557+
7558+ if (isdir && au_test_subdir(d1, d2)) {
7559+ di_write_lock_child(d1);
7560+ di_write_lock_child2(d2);
7561+ } else {
7562+ /* there should be no races */
7563+ di_write_lock_child(d2);
7564+ di_write_lock_child2(d1);
7565+ }
7566+}
7567+
7568+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
7569+{
7570+ AuDebugOn(d1 == d2
7571+ || d1->d_inode == d2->d_inode
7572+ || d1->d_sb != d2->d_sb);
7573+
7574+ if (isdir && au_test_subdir(d1, d2)) {
7575+ di_write_lock_parent(d1);
7576+ di_write_lock_parent2(d2);
7577+ } else {
7578+ /* there should be no races */
7579+ di_write_lock_parent(d2);
7580+ di_write_lock_parent2(d1);
7581+ }
7582+}
7583+
7584+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
7585+{
7586+ di_write_unlock(d1);
7587+ if (d1->d_inode == d2->d_inode)
dece6358 7588+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 7589+ else
7590+ di_write_unlock(d2);
7591+}
7592+
7593+/* ---------------------------------------------------------------------- */
7594+
7595+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
7596+{
7597+ struct dentry *d;
7598+
1308ab2a 7599+ DiMustAnyLock(dentry);
7600+
1facf9fc 7601+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
7602+ return NULL;
7603+ AuDebugOn(bindex < 0);
7604+ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
027c5e7a 7605+ AuDebugOn(d && d->d_count <= 0);
1facf9fc 7606+ return d;
7607+}
7608+
2cbb1c4b
JR
7609+/*
7610+ * extended version of au_h_dptr().
7611+ * returns a hashed and positive h_dentry in bindex, NULL, or error.
7612+ */
7613+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
7614+{
7615+ struct dentry *h_dentry;
7616+ struct inode *inode, *h_inode;
7617+
7618+ inode = dentry->d_inode;
7619+ AuDebugOn(!inode);
7620+
7621+ h_dentry = NULL;
7622+ if (au_dbstart(dentry) <= bindex
7623+ && bindex <= au_dbend(dentry))
7624+ h_dentry = au_h_dptr(dentry, bindex);
7625+ if (h_dentry && !au_d_hashed_positive(h_dentry)) {
7626+ dget(h_dentry);
7627+ goto out; /* success */
7628+ }
7629+
7630+ AuDebugOn(bindex < au_ibstart(inode));
7631+ AuDebugOn(au_ibend(inode) < bindex);
7632+ h_inode = au_h_iptr(inode, bindex);
7633+ h_dentry = d_find_alias(h_inode);
7634+ if (h_dentry) {
7635+ if (!IS_ERR(h_dentry)) {
7636+ if (!au_d_hashed_positive(h_dentry))
7637+ goto out; /* success */
7638+ dput(h_dentry);
7639+ } else
7640+ goto out;
7641+ }
7642+
7643+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
7644+ h_dentry = au_plink_lkup(inode, bindex);
7645+ AuDebugOn(!h_dentry);
7646+ if (!IS_ERR(h_dentry)) {
7647+ if (!au_d_hashed_positive(h_dentry))
7648+ goto out; /* success */
7649+ dput(h_dentry);
7650+ h_dentry = NULL;
7651+ }
7652+ }
7653+
7654+out:
7655+ AuDbgDentry(h_dentry);
7656+ return h_dentry;
7657+}
7658+
1facf9fc 7659+aufs_bindex_t au_dbtail(struct dentry *dentry)
7660+{
7661+ aufs_bindex_t bend, bwh;
7662+
7663+ bend = au_dbend(dentry);
7664+ if (0 <= bend) {
7665+ bwh = au_dbwh(dentry);
7666+ if (!bwh)
7667+ return bwh;
7668+ if (0 < bwh && bwh < bend)
7669+ return bwh - 1;
7670+ }
7671+ return bend;
7672+}
7673+
7674+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
7675+{
7676+ aufs_bindex_t bend, bopq;
7677+
7678+ bend = au_dbtail(dentry);
7679+ if (0 <= bend) {
7680+ bopq = au_dbdiropq(dentry);
7681+ if (0 <= bopq && bopq < bend)
7682+ bend = bopq;
7683+ }
7684+ return bend;
7685+}
7686+
7687+/* ---------------------------------------------------------------------- */
7688+
7689+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
7690+ struct dentry *h_dentry)
7691+{
7692+ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
027c5e7a 7693+ struct au_branch *br;
1facf9fc 7694+
1308ab2a 7695+ DiMustWriteLock(dentry);
7696+
4a4d8108 7697+ au_hdput(hd);
1facf9fc 7698+ hd->hd_dentry = h_dentry;
027c5e7a
AM
7699+ if (h_dentry) {
7700+ br = au_sbr(dentry->d_sb, bindex);
7701+ hd->hd_id = br->br_id;
7702+ }
7703+}
7704+
7705+int au_dbrange_test(struct dentry *dentry)
7706+{
7707+ int err;
7708+ aufs_bindex_t bstart, bend;
7709+
7710+ err = 0;
7711+ bstart = au_dbstart(dentry);
7712+ bend = au_dbend(dentry);
7713+ if (bstart >= 0)
7714+ AuDebugOn(bend < 0 && bstart > bend);
7715+ else {
7716+ err = -EIO;
7717+ AuDebugOn(bend >= 0);
7718+ }
7719+
7720+ return err;
7721+}
7722+
7723+int au_digen_test(struct dentry *dentry, unsigned int sigen)
7724+{
7725+ int err;
7726+
7727+ err = 0;
7728+ if (unlikely(au_digen(dentry) != sigen
7729+ || au_iigen_test(dentry->d_inode, sigen)))
7730+ err = -EIO;
7731+
7732+ return err;
1facf9fc 7733+}
7734+
7735+void au_update_digen(struct dentry *dentry)
7736+{
7737+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
7738+ /* smp_mb(); */ /* atomic_set */
7739+}
7740+
7741+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
7742+{
7743+ struct au_dinfo *dinfo;
7744+ struct dentry *h_d;
4a4d8108 7745+ struct au_hdentry *hdp;
1facf9fc 7746+
1308ab2a 7747+ DiMustWriteLock(dentry);
7748+
1facf9fc 7749+ dinfo = au_di(dentry);
7750+ if (!dinfo || dinfo->di_bstart < 0)
7751+ return;
7752+
4a4d8108 7753+ hdp = dinfo->di_hdentry;
1facf9fc 7754+ if (do_put_zero) {
7755+ aufs_bindex_t bindex, bend;
7756+
7757+ bend = dinfo->di_bend;
7758+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
4a4d8108 7759+ h_d = hdp[0 + bindex].hd_dentry;
1facf9fc 7760+ if (h_d && !h_d->d_inode)
7761+ au_set_h_dptr(dentry, bindex, NULL);
7762+ }
7763+ }
7764+
7765+ dinfo->di_bstart = -1;
7766+ while (++dinfo->di_bstart <= dinfo->di_bend)
4a4d8108 7767+ if (hdp[0 + dinfo->di_bstart].hd_dentry)
1facf9fc 7768+ break;
7769+ if (dinfo->di_bstart > dinfo->di_bend) {
7770+ dinfo->di_bstart = -1;
7771+ dinfo->di_bend = -1;
7772+ return;
7773+ }
7774+
7775+ dinfo->di_bend++;
7776+ while (0 <= --dinfo->di_bend)
4a4d8108 7777+ if (hdp[0 + dinfo->di_bend].hd_dentry)
1facf9fc 7778+ break;
7779+ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
7780+}
7781+
7782+void au_update_dbstart(struct dentry *dentry)
7783+{
7784+ aufs_bindex_t bindex, bend;
7785+ struct dentry *h_dentry;
7786+
7787+ bend = au_dbend(dentry);
7788+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
7789+ h_dentry = au_h_dptr(dentry, bindex);
7790+ if (!h_dentry)
7791+ continue;
7792+ if (h_dentry->d_inode) {
7793+ au_set_dbstart(dentry, bindex);
7794+ return;
7795+ }
7796+ au_set_h_dptr(dentry, bindex, NULL);
7797+ }
7798+}
7799+
7800+void au_update_dbend(struct dentry *dentry)
7801+{
7802+ aufs_bindex_t bindex, bstart;
7803+ struct dentry *h_dentry;
7804+
7805+ bstart = au_dbstart(dentry);
7f207e10 7806+ for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
1facf9fc 7807+ h_dentry = au_h_dptr(dentry, bindex);
7808+ if (!h_dentry)
7809+ continue;
7810+ if (h_dentry->d_inode) {
7811+ au_set_dbend(dentry, bindex);
7812+ return;
7813+ }
7814+ au_set_h_dptr(dentry, bindex, NULL);
7815+ }
7816+}
7817+
7818+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
7819+{
7820+ aufs_bindex_t bindex, bend;
7821+
7822+ bend = au_dbend(dentry);
7823+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
7824+ if (au_h_dptr(dentry, bindex) == h_dentry)
7825+ return bindex;
7826+ return -1;
7827+}
7f207e10
AM
7828diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
7829--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
7830+++ linux/fs/aufs/dir.c 2011-10-24 20:52:23.677857076 +0200
7831@@ -0,0 +1,627 @@
1facf9fc 7832+/*
027c5e7a 7833+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 7834+ *
7835+ * This program, aufs is free software; you can redistribute it and/or modify
7836+ * it under the terms of the GNU General Public License as published by
7837+ * the Free Software Foundation; either version 2 of the License, or
7838+ * (at your option) any later version.
dece6358
AM
7839+ *
7840+ * This program is distributed in the hope that it will be useful,
7841+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7842+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7843+ * GNU General Public License for more details.
7844+ *
7845+ * You should have received a copy of the GNU General Public License
7846+ * along with this program; if not, write to the Free Software
7847+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7848+ */
7849+
7850+/*
7851+ * directory operations
7852+ */
7853+
dece6358 7854+#include <linux/file.h>
1facf9fc 7855+#include <linux/fs_stack.h>
7856+#include "aufs.h"
7857+
7858+void au_add_nlink(struct inode *dir, struct inode *h_dir)
7859+{
7860+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
7861+
7862+ dir->i_nlink += h_dir->i_nlink - 2;
7863+ if (h_dir->i_nlink < 2)
7864+ dir->i_nlink += 2;
7865+}
7866+
7867+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
7868+{
7869+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
7870+
7871+ dir->i_nlink -= h_dir->i_nlink - 2;
7872+ if (h_dir->i_nlink < 2)
7873+ dir->i_nlink -= 2;
7874+}
7875+
1308ab2a 7876+loff_t au_dir_size(struct file *file, struct dentry *dentry)
7877+{
7878+ loff_t sz;
7879+ aufs_bindex_t bindex, bend;
7880+ struct file *h_file;
7881+ struct dentry *h_dentry;
7882+
7883+ sz = 0;
7884+ if (file) {
7885+ AuDebugOn(!file->f_dentry);
7886+ AuDebugOn(!file->f_dentry->d_inode);
7887+ AuDebugOn(!S_ISDIR(file->f_dentry->d_inode->i_mode));
7888+
4a4d8108 7889+ bend = au_fbend_dir(file);
1308ab2a 7890+ for (bindex = au_fbstart(file);
7891+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
7892+ bindex++) {
4a4d8108 7893+ h_file = au_hf_dir(file, bindex);
1308ab2a 7894+ if (h_file
7895+ && h_file->f_dentry
7896+ && h_file->f_dentry->d_inode)
7897+ sz += i_size_read(h_file->f_dentry->d_inode);
7898+ }
7899+ } else {
7900+ AuDebugOn(!dentry);
7901+ AuDebugOn(!dentry->d_inode);
7902+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
7903+
7904+ bend = au_dbtaildir(dentry);
7905+ for (bindex = au_dbstart(dentry);
7906+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
7907+ bindex++) {
7908+ h_dentry = au_h_dptr(dentry, bindex);
7909+ if (h_dentry && h_dentry->d_inode)
7910+ sz += i_size_read(h_dentry->d_inode);
7911+ }
7912+ }
7913+ if (sz < KMALLOC_MAX_SIZE)
7914+ sz = roundup_pow_of_two(sz);
7915+ if (sz > KMALLOC_MAX_SIZE)
7916+ sz = KMALLOC_MAX_SIZE;
7917+ else if (sz < NAME_MAX) {
7918+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
7919+ sz = AUFS_RDBLK_DEF;
7920+ }
7921+ return sz;
7922+}
7923+
1facf9fc 7924+/* ---------------------------------------------------------------------- */
7925+
7926+static int reopen_dir(struct file *file)
7927+{
7928+ int err;
7929+ unsigned int flags;
7930+ aufs_bindex_t bindex, btail, bstart;
7931+ struct dentry *dentry, *h_dentry;
7932+ struct file *h_file;
7933+
7934+ /* open all lower dirs */
7935+ dentry = file->f_dentry;
7936+ bstart = au_dbstart(dentry);
7937+ for (bindex = au_fbstart(file); bindex < bstart; bindex++)
7938+ au_set_h_fptr(file, bindex, NULL);
7939+ au_set_fbstart(file, bstart);
7940+
7941+ btail = au_dbtaildir(dentry);
4a4d8108 7942+ for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
1facf9fc 7943+ au_set_h_fptr(file, bindex, NULL);
4a4d8108 7944+ au_set_fbend_dir(file, btail);
1facf9fc 7945+
4a4d8108 7946+ flags = vfsub_file_flags(file);
1facf9fc 7947+ for (bindex = bstart; bindex <= btail; bindex++) {
7948+ h_dentry = au_h_dptr(dentry, bindex);
7949+ if (!h_dentry)
7950+ continue;
4a4d8108 7951+ h_file = au_hf_dir(file, bindex);
1facf9fc 7952+ if (h_file)
7953+ continue;
7954+
7955+ h_file = au_h_open(dentry, bindex, flags, file);
7956+ err = PTR_ERR(h_file);
7957+ if (IS_ERR(h_file))
7958+ goto out; /* close all? */
7959+ au_set_h_fptr(file, bindex, h_file);
7960+ }
7961+ au_update_figen(file);
7962+ /* todo: necessary? */
7963+ /* file->f_ra = h_file->f_ra; */
7964+ err = 0;
7965+
4f0767ce 7966+out:
1facf9fc 7967+ return err;
7968+}
7969+
7970+static int do_open_dir(struct file *file, int flags)
7971+{
7972+ int err;
7973+ aufs_bindex_t bindex, btail;
7974+ struct dentry *dentry, *h_dentry;
7975+ struct file *h_file;
7976+
1308ab2a 7977+ FiMustWriteLock(file);
7978+
1facf9fc 7979+ dentry = file->f_dentry;
027c5e7a
AM
7980+ err = au_alive_dir(dentry);
7981+ if (unlikely(err))
7982+ goto out;
7983+
1facf9fc 7984+ file->f_version = dentry->d_inode->i_version;
7985+ bindex = au_dbstart(dentry);
7986+ au_set_fbstart(file, bindex);
7987+ btail = au_dbtaildir(dentry);
4a4d8108 7988+ au_set_fbend_dir(file, btail);
1facf9fc 7989+ for (; !err && bindex <= btail; bindex++) {
7990+ h_dentry = au_h_dptr(dentry, bindex);
7991+ if (!h_dentry)
7992+ continue;
7993+
7994+ h_file = au_h_open(dentry, bindex, flags, file);
7995+ if (IS_ERR(h_file)) {
7996+ err = PTR_ERR(h_file);
7997+ break;
7998+ }
7999+ au_set_h_fptr(file, bindex, h_file);
8000+ }
8001+ au_update_figen(file);
8002+ /* todo: necessary? */
8003+ /* file->f_ra = h_file->f_ra; */
8004+ if (!err)
8005+ return 0; /* success */
8006+
8007+ /* close all */
8008+ for (bindex = au_fbstart(file); bindex <= btail; bindex++)
8009+ au_set_h_fptr(file, bindex, NULL);
8010+ au_set_fbstart(file, -1);
4a4d8108
AM
8011+ au_set_fbend_dir(file, -1);
8012+
027c5e7a 8013+out:
1facf9fc 8014+ return err;
8015+}
8016+
8017+static int aufs_open_dir(struct inode *inode __maybe_unused,
8018+ struct file *file)
8019+{
4a4d8108
AM
8020+ int err;
8021+ struct super_block *sb;
8022+ struct au_fidir *fidir;
8023+
8024+ err = -ENOMEM;
8025+ sb = file->f_dentry->d_sb;
8026+ si_read_lock(sb, AuLock_FLUSH);
e49829fe 8027+ fidir = au_fidir_alloc(sb);
4a4d8108
AM
8028+ if (fidir) {
8029+ err = au_do_open(file, do_open_dir, fidir);
8030+ if (unlikely(err))
8031+ kfree(fidir);
8032+ }
8033+ si_read_unlock(sb);
8034+ return err;
1facf9fc 8035+}
8036+
8037+static int aufs_release_dir(struct inode *inode __maybe_unused,
8038+ struct file *file)
8039+{
8040+ struct au_vdir *vdir_cache;
4a4d8108
AM
8041+ struct au_finfo *finfo;
8042+ struct au_fidir *fidir;
8043+ aufs_bindex_t bindex, bend;
1facf9fc 8044+
4a4d8108
AM
8045+ finfo = au_fi(file);
8046+ fidir = finfo->fi_hdir;
8047+ if (fidir) {
0c5527e5
AM
8048+ /* remove me from sb->s_files */
8049+ file_sb_list_del(file);
8050+
4a4d8108
AM
8051+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
8052+ if (vdir_cache)
8053+ au_vdir_free(vdir_cache);
8054+
8055+ bindex = finfo->fi_btop;
8056+ if (bindex >= 0) {
8057+ /*
8058+ * calls fput() instead of filp_close(),
8059+ * since no dnotify or lock for the lower file.
8060+ */
8061+ bend = fidir->fd_bbot;
8062+ for (; bindex <= bend; bindex++)
8063+ au_set_h_fptr(file, bindex, NULL);
8064+ }
8065+ kfree(fidir);
8066+ finfo->fi_hdir = NULL;
1facf9fc 8067+ }
1facf9fc 8068+ au_finfo_fin(file);
1facf9fc 8069+ return 0;
8070+}
8071+
8072+/* ---------------------------------------------------------------------- */
8073+
4a4d8108
AM
8074+static int au_do_flush_dir(struct file *file, fl_owner_t id)
8075+{
8076+ int err;
8077+ aufs_bindex_t bindex, bend;
8078+ struct file *h_file;
8079+
8080+ err = 0;
8081+ bend = au_fbend_dir(file);
8082+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
8083+ h_file = au_hf_dir(file, bindex);
8084+ if (h_file)
8085+ err = vfsub_flush(h_file, id);
8086+ }
8087+ return err;
8088+}
8089+
8090+static int aufs_flush_dir(struct file *file, fl_owner_t id)
8091+{
8092+ return au_do_flush(file, id, au_do_flush_dir);
8093+}
8094+
8095+/* ---------------------------------------------------------------------- */
8096+
1facf9fc 8097+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
8098+{
8099+ int err;
8100+ aufs_bindex_t bend, bindex;
8101+ struct inode *inode;
8102+ struct super_block *sb;
8103+
8104+ err = 0;
8105+ sb = dentry->d_sb;
8106+ inode = dentry->d_inode;
8107+ IMustLock(inode);
8108+ bend = au_dbend(dentry);
8109+ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
8110+ struct path h_path;
1facf9fc 8111+
8112+ if (au_test_ro(sb, bindex, inode))
8113+ continue;
8114+ h_path.dentry = au_h_dptr(dentry, bindex);
8115+ if (!h_path.dentry)
8116+ continue;
1facf9fc 8117+
1facf9fc 8118+ h_path.mnt = au_sbr_mnt(sb, bindex);
53392da6 8119+ err = vfsub_fsync(NULL, &h_path, datasync);
1facf9fc 8120+ }
8121+
8122+ return err;
8123+}
8124+
8125+static int au_do_fsync_dir(struct file *file, int datasync)
8126+{
8127+ int err;
8128+ aufs_bindex_t bend, bindex;
8129+ struct file *h_file;
8130+ struct super_block *sb;
8131+ struct inode *inode;
1facf9fc 8132+
8133+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
8134+ if (unlikely(err))
8135+ goto out;
8136+
8137+ sb = file->f_dentry->d_sb;
8138+ inode = file->f_dentry->d_inode;
4a4d8108 8139+ bend = au_fbend_dir(file);
1facf9fc 8140+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
4a4d8108 8141+ h_file = au_hf_dir(file, bindex);
1facf9fc 8142+ if (!h_file || au_test_ro(sb, bindex, inode))
8143+ continue;
8144+
53392da6 8145+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
1facf9fc 8146+ }
8147+
4f0767ce 8148+out:
1facf9fc 8149+ return err;
8150+}
8151+
8152+/*
8153+ * @file may be NULL
8154+ */
1e00d052
AM
8155+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
8156+ int datasync)
1facf9fc 8157+{
8158+ int err;
b752ccd1 8159+ struct dentry *dentry;
1facf9fc 8160+ struct super_block *sb;
1e00d052 8161+ struct mutex *mtx;
1facf9fc 8162+
8163+ err = 0;
1e00d052
AM
8164+ dentry = file->f_dentry;
8165+ mtx = &dentry->d_inode->i_mutex;
8166+ mutex_lock(mtx);
1facf9fc 8167+ sb = dentry->d_sb;
8168+ si_noflush_read_lock(sb);
8169+ if (file)
8170+ err = au_do_fsync_dir(file, datasync);
8171+ else {
8172+ di_write_lock_child(dentry);
8173+ err = au_do_fsync_dir_no_file(dentry, datasync);
8174+ }
8175+ au_cpup_attr_timesizes(dentry->d_inode);
8176+ di_write_unlock(dentry);
8177+ if (file)
8178+ fi_write_unlock(file);
8179+
8180+ si_read_unlock(sb);
1e00d052 8181+ mutex_unlock(mtx);
1facf9fc 8182+ return err;
8183+}
8184+
8185+/* ---------------------------------------------------------------------- */
8186+
8187+static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir)
8188+{
8189+ int err;
8190+ struct dentry *dentry;
8191+ struct inode *inode;
8192+ struct super_block *sb;
8193+
8194+ dentry = file->f_dentry;
8195+ inode = dentry->d_inode;
8196+ IMustLock(inode);
8197+
8198+ sb = dentry->d_sb;
8199+ si_read_lock(sb, AuLock_FLUSH);
8200+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
8201+ if (unlikely(err))
8202+ goto out;
027c5e7a
AM
8203+ err = au_alive_dir(dentry);
8204+ if (!err)
8205+ err = au_vdir_init(file);
1facf9fc 8206+ di_downgrade_lock(dentry, AuLock_IR);
8207+ if (unlikely(err))
8208+ goto out_unlock;
8209+
b752ccd1 8210+ if (!au_test_nfsd()) {
1facf9fc 8211+ err = au_vdir_fill_de(file, dirent, filldir);
8212+ fsstack_copy_attr_atime(inode,
8213+ au_h_iptr(inode, au_ibstart(inode)));
8214+ } else {
8215+ /*
8216+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
8217+ * encode_fh() and others.
8218+ */
8219+ struct inode *h_inode = au_h_iptr(inode, au_ibstart(inode));
8220+
8221+ di_read_unlock(dentry, AuLock_IR);
8222+ si_read_unlock(sb);
1facf9fc 8223+ err = au_vdir_fill_de(file, dirent, filldir);
1facf9fc 8224+ fsstack_copy_attr_atime(inode, h_inode);
8225+ fi_write_unlock(file);
8226+
8227+ AuTraceErr(err);
8228+ return err;
8229+ }
8230+
4f0767ce 8231+out_unlock:
1facf9fc 8232+ di_read_unlock(dentry, AuLock_IR);
8233+ fi_write_unlock(file);
4f0767ce 8234+out:
1facf9fc 8235+ si_read_unlock(sb);
8236+ return err;
8237+}
8238+
8239+/* ---------------------------------------------------------------------- */
8240+
8241+#define AuTestEmpty_WHONLY 1
dece6358
AM
8242+#define AuTestEmpty_CALLED (1 << 1)
8243+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 8244+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7f207e10
AM
8245+#define au_fset_testempty(flags, name) \
8246+ do { (flags) |= AuTestEmpty_##name; } while (0)
8247+#define au_fclr_testempty(flags, name) \
8248+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
1facf9fc 8249+
dece6358
AM
8250+#ifndef CONFIG_AUFS_SHWH
8251+#undef AuTestEmpty_SHWH
8252+#define AuTestEmpty_SHWH 0
8253+#endif
8254+
1facf9fc 8255+struct test_empty_arg {
1308ab2a 8256+ struct au_nhash *whlist;
1facf9fc 8257+ unsigned int flags;
8258+ int err;
8259+ aufs_bindex_t bindex;
8260+};
8261+
8262+static int test_empty_cb(void *__arg, const char *__name, int namelen,
dece6358
AM
8263+ loff_t offset __maybe_unused, u64 ino,
8264+ unsigned int d_type)
1facf9fc 8265+{
8266+ struct test_empty_arg *arg = __arg;
8267+ char *name = (void *)__name;
8268+
8269+ arg->err = 0;
8270+ au_fset_testempty(arg->flags, CALLED);
8271+ /* smp_mb(); */
8272+ if (name[0] == '.'
8273+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
8274+ goto out; /* success */
8275+
8276+ if (namelen <= AUFS_WH_PFX_LEN
8277+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
8278+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 8279+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 8280+ arg->err = -ENOTEMPTY;
8281+ goto out;
8282+ }
8283+
8284+ name += AUFS_WH_PFX_LEN;
8285+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 8286+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 8287+ arg->err = au_nhash_append_wh
1308ab2a 8288+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 8289+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 8290+
4f0767ce 8291+out:
1facf9fc 8292+ /* smp_mb(); */
8293+ AuTraceErr(arg->err);
8294+ return arg->err;
8295+}
8296+
8297+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
8298+{
8299+ int err;
8300+ struct file *h_file;
8301+
8302+ h_file = au_h_open(dentry, arg->bindex,
8303+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
8304+ /*file*/NULL);
8305+ err = PTR_ERR(h_file);
8306+ if (IS_ERR(h_file))
8307+ goto out;
8308+
8309+ err = 0;
8310+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
8311+ && !h_file->f_dentry->d_inode->i_nlink)
8312+ goto out_put;
8313+
8314+ do {
8315+ arg->err = 0;
8316+ au_fclr_testempty(arg->flags, CALLED);
8317+ /* smp_mb(); */
8318+ err = vfsub_readdir(h_file, test_empty_cb, arg);
8319+ if (err >= 0)
8320+ err = arg->err;
8321+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
8322+
4f0767ce 8323+out_put:
1facf9fc 8324+ fput(h_file);
8325+ au_sbr_put(dentry->d_sb, arg->bindex);
4f0767ce 8326+out:
1facf9fc 8327+ return err;
8328+}
8329+
8330+struct do_test_empty_args {
8331+ int *errp;
8332+ struct dentry *dentry;
8333+ struct test_empty_arg *arg;
8334+};
8335+
8336+static void call_do_test_empty(void *args)
8337+{
8338+ struct do_test_empty_args *a = args;
8339+ *a->errp = do_test_empty(a->dentry, a->arg);
8340+}
8341+
8342+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
8343+{
8344+ int err, wkq_err;
8345+ struct dentry *h_dentry;
8346+ struct inode *h_inode;
8347+
8348+ h_dentry = au_h_dptr(dentry, arg->bindex);
8349+ h_inode = h_dentry->d_inode;
53392da6 8350+ /* todo: i_mode changes anytime? */
1facf9fc 8351+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
8352+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
8353+ mutex_unlock(&h_inode->i_mutex);
8354+ if (!err)
8355+ err = do_test_empty(dentry, arg);
8356+ else {
8357+ struct do_test_empty_args args = {
8358+ .errp = &err,
8359+ .dentry = dentry,
8360+ .arg = arg
8361+ };
8362+ unsigned int flags = arg->flags;
8363+
8364+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
8365+ if (unlikely(wkq_err))
8366+ err = wkq_err;
8367+ arg->flags = flags;
8368+ }
8369+
8370+ return err;
8371+}
8372+
8373+int au_test_empty_lower(struct dentry *dentry)
8374+{
8375+ int err;
1308ab2a 8376+ unsigned int rdhash;
1facf9fc 8377+ aufs_bindex_t bindex, bstart, btail;
1308ab2a 8378+ struct au_nhash whlist;
1facf9fc 8379+ struct test_empty_arg arg;
1facf9fc 8380+
dece6358
AM
8381+ SiMustAnyLock(dentry->d_sb);
8382+
1308ab2a 8383+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
8384+ if (!rdhash)
8385+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
8386+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 8387+ if (unlikely(err))
1facf9fc 8388+ goto out;
8389+
1facf9fc 8390+ arg.flags = 0;
1308ab2a 8391+ arg.whlist = &whlist;
8392+ bstart = au_dbstart(dentry);
dece6358
AM
8393+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
8394+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 8395+ arg.bindex = bstart;
8396+ err = do_test_empty(dentry, &arg);
8397+ if (unlikely(err))
8398+ goto out_whlist;
8399+
8400+ au_fset_testempty(arg.flags, WHONLY);
8401+ btail = au_dbtaildir(dentry);
8402+ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
8403+ struct dentry *h_dentry;
8404+
8405+ h_dentry = au_h_dptr(dentry, bindex);
8406+ if (h_dentry && h_dentry->d_inode) {
8407+ arg.bindex = bindex;
8408+ err = do_test_empty(dentry, &arg);
8409+ }
8410+ }
8411+
4f0767ce 8412+out_whlist:
1308ab2a 8413+ au_nhash_wh_free(&whlist);
4f0767ce 8414+out:
1facf9fc 8415+ return err;
8416+}
8417+
8418+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
8419+{
8420+ int err;
8421+ struct test_empty_arg arg;
8422+ aufs_bindex_t bindex, btail;
8423+
8424+ err = 0;
1308ab2a 8425+ arg.whlist = whlist;
1facf9fc 8426+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
8427+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
8428+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 8429+ btail = au_dbtaildir(dentry);
8430+ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
8431+ struct dentry *h_dentry;
8432+
8433+ h_dentry = au_h_dptr(dentry, bindex);
8434+ if (h_dentry && h_dentry->d_inode) {
8435+ arg.bindex = bindex;
8436+ err = sio_test_empty(dentry, &arg);
8437+ }
8438+ }
8439+
8440+ return err;
8441+}
8442+
8443+/* ---------------------------------------------------------------------- */
8444+
8445+const struct file_operations aufs_dir_fop = {
4a4d8108 8446+ .owner = THIS_MODULE,
027c5e7a 8447+ .llseek = default_llseek,
1facf9fc 8448+ .read = generic_read_dir,
8449+ .readdir = aufs_readdir,
8450+ .unlocked_ioctl = aufs_ioctl_dir,
b752ccd1
AM
8451+#ifdef CONFIG_COMPAT
8452+ .compat_ioctl = aufs_compat_ioctl_dir,
8453+#endif
1facf9fc 8454+ .open = aufs_open_dir,
8455+ .release = aufs_release_dir,
4a4d8108 8456+ .flush = aufs_flush_dir,
1facf9fc 8457+ .fsync = aufs_fsync_dir
8458+};
7f207e10
AM
8459diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
8460--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
53392da6 8461+++ linux/fs/aufs/dir.h 2011-08-24 13:30:24.731313534 +0200
b752ccd1 8462@@ -0,0 +1,138 @@
1facf9fc 8463+/*
027c5e7a 8464+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 8465+ *
8466+ * This program, aufs is free software; you can redistribute it and/or modify
8467+ * it under the terms of the GNU General Public License as published by
8468+ * the Free Software Foundation; either version 2 of the License, or
8469+ * (at your option) any later version.
dece6358
AM
8470+ *
8471+ * This program is distributed in the hope that it will be useful,
8472+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8473+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8474+ * GNU General Public License for more details.
8475+ *
8476+ * You should have received a copy of the GNU General Public License
8477+ * along with this program; if not, write to the Free Software
8478+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 8479+ */
8480+
8481+/*
8482+ * directory operations
8483+ */
8484+
8485+#ifndef __AUFS_DIR_H__
8486+#define __AUFS_DIR_H__
8487+
8488+#ifdef __KERNEL__
8489+
8490+#include <linux/fs.h>
8491+#include <linux/aufs_type.h>
8492+
8493+/* ---------------------------------------------------------------------- */
8494+
8495+/* need to be faster and smaller */
8496+
8497+struct au_nhash {
dece6358
AM
8498+ unsigned int nh_num;
8499+ struct hlist_head *nh_head;
1facf9fc 8500+};
8501+
8502+struct au_vdir_destr {
8503+ unsigned char len;
8504+ unsigned char name[0];
8505+} __packed;
8506+
8507+struct au_vdir_dehstr {
8508+ struct hlist_node hash;
8509+ struct au_vdir_destr *str;
4a4d8108 8510+} ____cacheline_aligned_in_smp;
1facf9fc 8511+
8512+struct au_vdir_de {
8513+ ino_t de_ino;
8514+ unsigned char de_type;
8515+ /* caution: packed */
8516+ struct au_vdir_destr de_str;
8517+} __packed;
8518+
8519+struct au_vdir_wh {
8520+ struct hlist_node wh_hash;
dece6358
AM
8521+#ifdef CONFIG_AUFS_SHWH
8522+ ino_t wh_ino;
1facf9fc 8523+ aufs_bindex_t wh_bindex;
dece6358
AM
8524+ unsigned char wh_type;
8525+#else
8526+ aufs_bindex_t wh_bindex;
8527+#endif
8528+ /* caution: packed */
1facf9fc 8529+ struct au_vdir_destr wh_str;
8530+} __packed;
8531+
8532+union au_vdir_deblk_p {
8533+ unsigned char *deblk;
8534+ struct au_vdir_de *de;
8535+};
8536+
8537+struct au_vdir {
8538+ unsigned char **vd_deblk;
8539+ unsigned long vd_nblk;
1facf9fc 8540+ struct {
8541+ unsigned long ul;
8542+ union au_vdir_deblk_p p;
8543+ } vd_last;
8544+
8545+ unsigned long vd_version;
dece6358 8546+ unsigned int vd_deblk_sz;
1facf9fc 8547+ unsigned long vd_jiffy;
4a4d8108 8548+} ____cacheline_aligned_in_smp;
1facf9fc 8549+
8550+/* ---------------------------------------------------------------------- */
8551+
8552+/* dir.c */
8553+extern const struct file_operations aufs_dir_fop;
8554+void au_add_nlink(struct inode *dir, struct inode *h_dir);
8555+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 8556+loff_t au_dir_size(struct file *file, struct dentry *dentry);
1facf9fc 8557+int au_test_empty_lower(struct dentry *dentry);
8558+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
8559+
8560+/* vdir.c */
1308ab2a 8561+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
8562+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
8563+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 8564+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
8565+ int limit);
dece6358
AM
8566+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
8567+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
8568+ unsigned int d_type, aufs_bindex_t bindex,
8569+ unsigned char shwh);
1facf9fc 8570+void au_vdir_free(struct au_vdir *vdir);
8571+int au_vdir_init(struct file *file);
8572+int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir);
8573+
8574+/* ioctl.c */
8575+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
8576+
1308ab2a 8577+#ifdef CONFIG_AUFS_RDU
8578+/* rdu.c */
8579+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
8580+#ifdef CONFIG_COMPAT
8581+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
8582+ unsigned long arg);
8583+#endif
1308ab2a 8584+#else
8585+static inline long au_rdu_ioctl(struct file *file, unsigned int cmd,
8586+ unsigned long arg)
8587+{
8588+ return -EINVAL;
8589+}
b752ccd1
AM
8590+#ifdef CONFIG_COMPAT
8591+static inline long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
8592+ unsigned long arg)
8593+{
8594+ return -EINVAL;
8595+}
8596+#endif
1308ab2a 8597+#endif
8598+
1facf9fc 8599+#endif /* __KERNEL__ */
8600+#endif /* __AUFS_DIR_H__ */
7f207e10
AM
8601diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
8602--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
53392da6 8603+++ linux/fs/aufs/dynop.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 8604@@ -0,0 +1,377 @@
1facf9fc 8605+/*
027c5e7a 8606+ * Copyright (C) 2010-2011 Junjiro R. Okajima
1facf9fc 8607+ *
8608+ * This program, aufs is free software; you can redistribute it and/or modify
8609+ * it under the terms of the GNU General Public License as published by
8610+ * the Free Software Foundation; either version 2 of the License, or
8611+ * (at your option) any later version.
dece6358
AM
8612+ *
8613+ * This program is distributed in the hope that it will be useful,
8614+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8615+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8616+ * GNU General Public License for more details.
8617+ *
8618+ * You should have received a copy of the GNU General Public License
8619+ * along with this program; if not, write to the Free Software
8620+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 8621+ */
8622+
8623+/*
4a4d8108 8624+ * dynamically customizable operations for regular files
1facf9fc 8625+ */
8626+
1facf9fc 8627+#include "aufs.h"
8628+
4a4d8108 8629+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
1facf9fc 8630+
4a4d8108
AM
8631+/*
8632+ * How large will these lists be?
8633+ * Usually just a few elements, 20-30 at most for each, I guess.
8634+ */
8635+static struct au_splhead dynop[AuDyLast];
8636+
8637+static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op)
1facf9fc 8638+{
4a4d8108
AM
8639+ struct au_dykey *key, *tmp;
8640+ struct list_head *head;
1facf9fc 8641+
4a4d8108
AM
8642+ key = NULL;
8643+ head = &spl->head;
8644+ rcu_read_lock();
8645+ list_for_each_entry_rcu(tmp, head, dk_list)
8646+ if (tmp->dk_op.dy_hop == h_op) {
8647+ key = tmp;
8648+ kref_get(&key->dk_kref);
8649+ break;
8650+ }
8651+ rcu_read_unlock();
8652+
8653+ return key;
1facf9fc 8654+}
8655+
4a4d8108 8656+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
1facf9fc 8657+{
4a4d8108
AM
8658+ struct au_dykey **k, *found;
8659+ const void *h_op = key->dk_op.dy_hop;
8660+ int i;
1facf9fc 8661+
4a4d8108
AM
8662+ found = NULL;
8663+ k = br->br_dykey;
8664+ for (i = 0; i < AuBrDynOp; i++)
8665+ if (k[i]) {
8666+ if (k[i]->dk_op.dy_hop == h_op) {
8667+ found = k[i];
8668+ break;
8669+ }
8670+ } else
8671+ break;
8672+ if (!found) {
8673+ spin_lock(&br->br_dykey_lock);
8674+ for (; i < AuBrDynOp; i++)
8675+ if (k[i]) {
8676+ if (k[i]->dk_op.dy_hop == h_op) {
8677+ found = k[i];
8678+ break;
8679+ }
8680+ } else {
8681+ k[i] = key;
8682+ break;
8683+ }
8684+ spin_unlock(&br->br_dykey_lock);
8685+ BUG_ON(i == AuBrDynOp); /* expand the array */
8686+ }
8687+
8688+ return found;
1facf9fc 8689+}
8690+
4a4d8108
AM
8691+/* kref_get() if @key is already added */
8692+static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key)
8693+{
8694+ struct au_dykey *tmp, *found;
8695+ struct list_head *head;
8696+ const void *h_op = key->dk_op.dy_hop;
1facf9fc 8697+
4a4d8108
AM
8698+ found = NULL;
8699+ head = &spl->head;
8700+ spin_lock(&spl->spin);
8701+ list_for_each_entry(tmp, head, dk_list)
8702+ if (tmp->dk_op.dy_hop == h_op) {
8703+ kref_get(&tmp->dk_kref);
8704+ found = tmp;
8705+ break;
8706+ }
8707+ if (!found)
8708+ list_add_rcu(&key->dk_list, head);
8709+ spin_unlock(&spl->spin);
1facf9fc 8710+
4a4d8108
AM
8711+ if (!found)
8712+ DyPrSym(key);
8713+ return found;
8714+}
8715+
8716+static void dy_free_rcu(struct rcu_head *rcu)
1facf9fc 8717+{
4a4d8108
AM
8718+ struct au_dykey *key;
8719+
8720+ key = container_of(rcu, struct au_dykey, dk_rcu);
8721+ DyPrSym(key);
8722+ kfree(key);
1facf9fc 8723+}
8724+
4a4d8108
AM
8725+static void dy_free(struct kref *kref)
8726+{
8727+ struct au_dykey *key;
8728+ struct au_splhead *spl;
1facf9fc 8729+
4a4d8108
AM
8730+ key = container_of(kref, struct au_dykey, dk_kref);
8731+ spl = dynop + key->dk_op.dy_type;
8732+ au_spl_del_rcu(&key->dk_list, spl);
8733+ call_rcu(&key->dk_rcu, dy_free_rcu);
8734+}
8735+
8736+void au_dy_put(struct au_dykey *key)
1facf9fc 8737+{
4a4d8108
AM
8738+ kref_put(&key->dk_kref, dy_free);
8739+}
1facf9fc 8740+
4a4d8108
AM
8741+/* ---------------------------------------------------------------------- */
8742+
8743+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
8744+
8745+#ifdef CONFIG_AUFS_DEBUG
8746+#define DyDbgDeclare(cnt) unsigned int cnt = 0
4f0767ce 8747+#define DyDbgInc(cnt) do { cnt++; } while (0)
4a4d8108
AM
8748+#else
8749+#define DyDbgDeclare(cnt) do {} while (0)
8750+#define DyDbgInc(cnt) do {} while (0)
8751+#endif
8752+
8753+#define DySet(func, dst, src, h_op, h_sb) do { \
8754+ DyDbgInc(cnt); \
8755+ if (h_op->func) { \
8756+ if (src.func) \
8757+ dst.func = src.func; \
8758+ else \
8759+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
8760+ } \
8761+} while (0)
8762+
8763+#define DySetForce(func, dst, src) do { \
8764+ AuDebugOn(!src.func); \
8765+ DyDbgInc(cnt); \
8766+ dst.func = src.func; \
8767+} while (0)
8768+
8769+#define DySetAop(func) \
8770+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
8771+#define DySetAopForce(func) \
8772+ DySetForce(func, dyaop->da_op, aufs_aop)
8773+
8774+static void dy_aop(struct au_dykey *key, const void *h_op,
8775+ struct super_block *h_sb __maybe_unused)
8776+{
8777+ struct au_dyaop *dyaop = (void *)key;
8778+ const struct address_space_operations *h_aop = h_op;
8779+ DyDbgDeclare(cnt);
8780+
8781+ AuDbg("%s\n", au_sbtype(h_sb));
8782+
8783+ DySetAop(writepage);
8784+ DySetAopForce(readpage); /* force */
4a4d8108
AM
8785+ DySetAop(writepages);
8786+ DySetAop(set_page_dirty);
8787+ DySetAop(readpages);
8788+ DySetAop(write_begin);
8789+ DySetAop(write_end);
8790+ DySetAop(bmap);
8791+ DySetAop(invalidatepage);
8792+ DySetAop(releasepage);
027c5e7a 8793+ DySetAop(freepage);
4a4d8108
AM
8794+ /* these two will be changed according to an aufs mount option */
8795+ DySetAop(direct_IO);
8796+ DySetAop(get_xip_mem);
8797+ DySetAop(migratepage);
8798+ DySetAop(launder_page);
8799+ DySetAop(is_partially_uptodate);
8800+ DySetAop(error_remove_page);
8801+
8802+ DyDbgSize(cnt, *h_aop);
8803+ dyaop->da_get_xip_mem = h_aop->get_xip_mem;
8804+}
8805+
4a4d8108
AM
8806+/* ---------------------------------------------------------------------- */
8807+
8808+static void dy_bug(struct kref *kref)
8809+{
8810+ BUG();
8811+}
8812+
8813+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
8814+{
8815+ struct au_dykey *key, *old;
8816+ struct au_splhead *spl;
b752ccd1 8817+ struct op {
4a4d8108 8818+ unsigned int sz;
b752ccd1
AM
8819+ void (*set)(struct au_dykey *key, const void *h_op,
8820+ struct super_block *h_sb __maybe_unused);
8821+ };
8822+ static const struct op a[] = {
4a4d8108
AM
8823+ [AuDy_AOP] = {
8824+ .sz = sizeof(struct au_dyaop),
b752ccd1 8825+ .set = dy_aop
4a4d8108 8826+ }
b752ccd1
AM
8827+ };
8828+ const struct op *p;
4a4d8108
AM
8829+
8830+ spl = dynop + op->dy_type;
8831+ key = dy_gfind_get(spl, op->dy_hop);
8832+ if (key)
8833+ goto out_add; /* success */
8834+
8835+ p = a + op->dy_type;
8836+ key = kzalloc(p->sz, GFP_NOFS);
8837+ if (unlikely(!key)) {
8838+ key = ERR_PTR(-ENOMEM);
8839+ goto out;
8840+ }
8841+
8842+ key->dk_op.dy_hop = op->dy_hop;
8843+ kref_init(&key->dk_kref);
b752ccd1 8844+ p->set(key, op->dy_hop, br->br_mnt->mnt_sb);
4a4d8108
AM
8845+ old = dy_gadd(spl, key);
8846+ if (old) {
8847+ kfree(key);
8848+ key = old;
8849+ }
8850+
8851+out_add:
8852+ old = dy_bradd(br, key);
8853+ if (old)
8854+ /* its ref-count should never be zero here */
8855+ kref_put(&key->dk_kref, dy_bug);
8856+out:
8857+ return key;
8858+}
8859+
8860+/* ---------------------------------------------------------------------- */
8861+/*
8862+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
8863+ * This behaviour is neccessary to return an error from open(O_DIRECT) instead
8864+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
8865+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
8866+ * See the aufs manual in detail.
8867+ *
8868+ * To keep this behaviour, aufs has to set NULL to ->get_xip_mem too, and the
8869+ * performance of fadvise() and madvise() may be affected.
8870+ */
8871+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
8872+{
8873+ if (!do_dx) {
8874+ dyaop->da_op.direct_IO = NULL;
8875+ dyaop->da_op.get_xip_mem = NULL;
8876+ } else {
8877+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
8878+ dyaop->da_op.get_xip_mem = aufs_aop.get_xip_mem;
8879+ if (!dyaop->da_get_xip_mem)
8880+ dyaop->da_op.get_xip_mem = NULL;
8881+ }
8882+}
8883+
8884+static struct au_dyaop *dy_aget(struct au_branch *br,
8885+ const struct address_space_operations *h_aop,
8886+ int do_dx)
8887+{
8888+ struct au_dyaop *dyaop;
8889+ struct au_dynop op;
8890+
8891+ op.dy_type = AuDy_AOP;
8892+ op.dy_haop = h_aop;
8893+ dyaop = (void *)dy_get(&op, br);
8894+ if (IS_ERR(dyaop))
8895+ goto out;
8896+ dy_adx(dyaop, do_dx);
8897+
8898+out:
8899+ return dyaop;
8900+}
8901+
8902+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
8903+ struct inode *h_inode)
8904+{
8905+ int err, do_dx;
8906+ struct super_block *sb;
8907+ struct au_branch *br;
8908+ struct au_dyaop *dyaop;
8909+
8910+ AuDebugOn(!S_ISREG(h_inode->i_mode));
8911+ IiMustWriteLock(inode);
8912+
8913+ sb = inode->i_sb;
8914+ br = au_sbr(sb, bindex);
8915+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
8916+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
8917+ err = PTR_ERR(dyaop);
8918+ if (IS_ERR(dyaop))
8919+ /* unnecessary to call dy_fput() */
8920+ goto out;
8921+
8922+ err = 0;
8923+ inode->i_mapping->a_ops = &dyaop->da_op;
8924+
8925+out:
8926+ return err;
8927+}
8928+
b752ccd1
AM
8929+/*
8930+ * Is it safe to replace a_ops during the inode/file is in operation?
8931+ * Yes, I hope so.
8932+ */
8933+int au_dy_irefresh(struct inode *inode)
8934+{
8935+ int err;
8936+ aufs_bindex_t bstart;
8937+ struct inode *h_inode;
8938+
8939+ err = 0;
8940+ if (S_ISREG(inode->i_mode)) {
8941+ bstart = au_ibstart(inode);
8942+ h_inode = au_h_iptr(inode, bstart);
8943+ err = au_dy_iaop(inode, bstart, h_inode);
8944+ }
8945+ return err;
8946+}
8947+
4a4d8108
AM
8948+void au_dy_arefresh(int do_dx)
8949+{
8950+ struct au_splhead *spl;
8951+ struct list_head *head;
8952+ struct au_dykey *key;
8953+
8954+ spl = dynop + AuDy_AOP;
8955+ head = &spl->head;
8956+ spin_lock(&spl->spin);
8957+ list_for_each_entry(key, head, dk_list)
8958+ dy_adx((void *)key, do_dx);
8959+ spin_unlock(&spl->spin);
8960+}
8961+
4a4d8108
AM
8962+/* ---------------------------------------------------------------------- */
8963+
8964+void __init au_dy_init(void)
8965+{
8966+ int i;
8967+
8968+ /* make sure that 'struct au_dykey *' can be any type */
8969+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
4a4d8108
AM
8970+
8971+ for (i = 0; i < AuDyLast; i++)
8972+ au_spl_init(dynop + i);
8973+}
8974+
8975+void au_dy_fin(void)
8976+{
8977+ int i;
8978+
8979+ for (i = 0; i < AuDyLast; i++)
8980+ WARN_ON(!list_empty(&dynop[i].head));
8981+}
7f207e10
AM
8982diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
8983--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
53392da6 8984+++ linux/fs/aufs/dynop.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 8985@@ -0,0 +1,80 @@
4a4d8108 8986+/*
027c5e7a 8987+ * Copyright (C) 2010-2011 Junjiro R. Okajima
4a4d8108
AM
8988+ *
8989+ * This program, aufs is free software; you can redistribute it and/or modify
8990+ * it under the terms of the GNU General Public License as published by
8991+ * the Free Software Foundation; either version 2 of the License, or
8992+ * (at your option) any later version.
8993+ *
8994+ * This program is distributed in the hope that it will be useful,
8995+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8996+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8997+ * GNU General Public License for more details.
8998+ *
8999+ * You should have received a copy of the GNU General Public License
9000+ * along with this program; if not, write to the Free Software
9001+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
9002+ */
9003+
9004+/*
9005+ * dynamically customizable operations (for regular files only)
9006+ */
9007+
9008+#ifndef __AUFS_DYNOP_H__
9009+#define __AUFS_DYNOP_H__
9010+
9011+#ifdef __KERNEL__
9012+
9013+#include <linux/fs.h>
9014+#include <linux/mm.h>
9015+#include <linux/rcupdate.h>
9016+#include <linux/aufs_type.h>
9017+#include "inode.h"
9018+
2cbb1c4b 9019+enum {AuDy_AOP, AuDyLast};
4a4d8108
AM
9020+
9021+struct au_dynop {
9022+ int dy_type;
9023+ union {
9024+ const void *dy_hop;
9025+ const struct address_space_operations *dy_haop;
4a4d8108
AM
9026+ };
9027+};
9028+
9029+struct au_dykey {
9030+ union {
9031+ struct list_head dk_list;
9032+ struct rcu_head dk_rcu;
9033+ };
9034+ struct au_dynop dk_op;
9035+
9036+ /*
9037+ * during I am in the branch local array, kref is gotten. when the
9038+ * branch is removed, kref is put.
9039+ */
9040+ struct kref dk_kref;
9041+};
9042+
9043+/* stop unioning since their sizes are very different from each other */
9044+struct au_dyaop {
9045+ struct au_dykey da_key;
9046+ struct address_space_operations da_op; /* not const */
9047+ int (*da_get_xip_mem)(struct address_space *, pgoff_t, int,
9048+ void **, unsigned long *);
9049+};
9050+
4a4d8108
AM
9051+/* ---------------------------------------------------------------------- */
9052+
9053+/* dynop.c */
9054+struct au_branch;
9055+void au_dy_put(struct au_dykey *key);
9056+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
9057+ struct inode *h_inode);
b752ccd1 9058+int au_dy_irefresh(struct inode *inode);
4a4d8108 9059+void au_dy_arefresh(int do_dio);
4a4d8108
AM
9060+
9061+void __init au_dy_init(void);
9062+void au_dy_fin(void);
9063+
4a4d8108
AM
9064+#endif /* __KERNEL__ */
9065+#endif /* __AUFS_DYNOP_H__ */
7f207e10
AM
9066diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
9067--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
53392da6 9068+++ linux/fs/aufs/export.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 9069@@ -0,0 +1,805 @@
4a4d8108 9070+/*
027c5e7a 9071+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
9072+ *
9073+ * This program, aufs is free software; you can redistribute it and/or modify
9074+ * it under the terms of the GNU General Public License as published by
9075+ * the Free Software Foundation; either version 2 of the License, or
9076+ * (at your option) any later version.
9077+ *
9078+ * This program is distributed in the hope that it will be useful,
9079+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9080+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9081+ * GNU General Public License for more details.
9082+ *
9083+ * You should have received a copy of the GNU General Public License
9084+ * along with this program; if not, write to the Free Software
9085+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
9086+ */
9087+
9088+/*
9089+ * export via nfs
9090+ */
9091+
9092+#include <linux/exportfs.h>
9093+#include <linux/file.h>
9094+#include <linux/mnt_namespace.h>
9095+#include <linux/namei.h>
9096+#include <linux/nsproxy.h>
9097+#include <linux/random.h>
9098+#include <linux/writeback.h>
9099+#include "aufs.h"
9100+
9101+union conv {
9102+#ifdef CONFIG_AUFS_INO_T_64
9103+ __u32 a[2];
9104+#else
9105+ __u32 a[1];
9106+#endif
9107+ ino_t ino;
9108+};
9109+
9110+static ino_t decode_ino(__u32 *a)
9111+{
9112+ union conv u;
9113+
9114+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
9115+ u.a[0] = a[0];
9116+#ifdef CONFIG_AUFS_INO_T_64
9117+ u.a[1] = a[1];
9118+#endif
9119+ return u.ino;
9120+}
9121+
9122+static void encode_ino(__u32 *a, ino_t ino)
9123+{
9124+ union conv u;
9125+
9126+ u.ino = ino;
9127+ a[0] = u.a[0];
9128+#ifdef CONFIG_AUFS_INO_T_64
9129+ a[1] = u.a[1];
9130+#endif
9131+}
9132+
9133+/* NFS file handle */
9134+enum {
9135+ Fh_br_id,
9136+ Fh_sigen,
9137+#ifdef CONFIG_AUFS_INO_T_64
9138+ /* support 64bit inode number */
9139+ Fh_ino1,
9140+ Fh_ino2,
9141+ Fh_dir_ino1,
9142+ Fh_dir_ino2,
9143+#else
9144+ Fh_ino1,
9145+ Fh_dir_ino1,
9146+#endif
9147+ Fh_igen,
9148+ Fh_h_type,
9149+ Fh_tail,
9150+
9151+ Fh_ino = Fh_ino1,
9152+ Fh_dir_ino = Fh_dir_ino1
9153+};
9154+
9155+static int au_test_anon(struct dentry *dentry)
9156+{
027c5e7a 9157+ /* note: read d_flags without d_lock */
4a4d8108
AM
9158+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
9159+}
9160+
9161+/* ---------------------------------------------------------------------- */
9162+/* inode generation external table */
9163+
b752ccd1 9164+void au_xigen_inc(struct inode *inode)
4a4d8108 9165+{
4a4d8108
AM
9166+ loff_t pos;
9167+ ssize_t sz;
9168+ __u32 igen;
9169+ struct super_block *sb;
9170+ struct au_sbinfo *sbinfo;
9171+
4a4d8108 9172+ sb = inode->i_sb;
b752ccd1 9173+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1facf9fc 9174+
b752ccd1 9175+ sbinfo = au_sbi(sb);
1facf9fc 9176+ pos = inode->i_ino;
9177+ pos *= sizeof(igen);
9178+ igen = inode->i_generation + 1;
1facf9fc 9179+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
9180+ sizeof(igen), &pos);
9181+ if (sz == sizeof(igen))
b752ccd1 9182+ return; /* success */
1facf9fc 9183+
b752ccd1 9184+ if (unlikely(sz >= 0))
1facf9fc 9185+ AuIOErr("xigen error (%zd)\n", sz);
1facf9fc 9186+}
9187+
9188+int au_xigen_new(struct inode *inode)
9189+{
9190+ int err;
9191+ loff_t pos;
9192+ ssize_t sz;
9193+ struct super_block *sb;
9194+ struct au_sbinfo *sbinfo;
9195+ struct file *file;
9196+
9197+ err = 0;
9198+ /* todo: dirty, at mount time */
9199+ if (inode->i_ino == AUFS_ROOT_INO)
9200+ goto out;
9201+ sb = inode->i_sb;
dece6358 9202+ SiMustAnyLock(sb);
1facf9fc 9203+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
9204+ goto out;
9205+
9206+ err = -EFBIG;
9207+ pos = inode->i_ino;
9208+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
9209+ AuIOErr1("too large i%lld\n", pos);
9210+ goto out;
9211+ }
9212+ pos *= sizeof(inode->i_generation);
9213+
9214+ err = 0;
9215+ sbinfo = au_sbi(sb);
9216+ file = sbinfo->si_xigen;
9217+ BUG_ON(!file);
9218+
9219+ if (i_size_read(file->f_dentry->d_inode)
9220+ < pos + sizeof(inode->i_generation)) {
9221+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
9222+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
9223+ sizeof(inode->i_generation), &pos);
9224+ } else
9225+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
9226+ sizeof(inode->i_generation), &pos);
9227+ if (sz == sizeof(inode->i_generation))
9228+ goto out; /* success */
9229+
9230+ err = sz;
9231+ if (unlikely(sz >= 0)) {
9232+ err = -EIO;
9233+ AuIOErr("xigen error (%zd)\n", sz);
9234+ }
9235+
4f0767ce 9236+out:
1facf9fc 9237+ return err;
9238+}
9239+
9240+int au_xigen_set(struct super_block *sb, struct file *base)
9241+{
9242+ int err;
9243+ struct au_sbinfo *sbinfo;
9244+ struct file *file;
9245+
dece6358
AM
9246+ SiMustWriteLock(sb);
9247+
1facf9fc 9248+ sbinfo = au_sbi(sb);
9249+ file = au_xino_create2(base, sbinfo->si_xigen);
9250+ err = PTR_ERR(file);
9251+ if (IS_ERR(file))
9252+ goto out;
9253+ err = 0;
9254+ if (sbinfo->si_xigen)
9255+ fput(sbinfo->si_xigen);
9256+ sbinfo->si_xigen = file;
9257+
4f0767ce 9258+out:
1facf9fc 9259+ return err;
9260+}
9261+
9262+void au_xigen_clr(struct super_block *sb)
9263+{
9264+ struct au_sbinfo *sbinfo;
9265+
dece6358
AM
9266+ SiMustWriteLock(sb);
9267+
1facf9fc 9268+ sbinfo = au_sbi(sb);
9269+ if (sbinfo->si_xigen) {
9270+ fput(sbinfo->si_xigen);
9271+ sbinfo->si_xigen = NULL;
9272+ }
9273+}
9274+
9275+/* ---------------------------------------------------------------------- */
9276+
9277+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
9278+ ino_t dir_ino)
9279+{
9280+ struct dentry *dentry, *d;
9281+ struct inode *inode;
9282+ unsigned int sigen;
9283+
9284+ dentry = NULL;
9285+ inode = ilookup(sb, ino);
9286+ if (!inode)
9287+ goto out;
9288+
9289+ dentry = ERR_PTR(-ESTALE);
9290+ sigen = au_sigen(sb);
9291+ if (unlikely(is_bad_inode(inode)
9292+ || IS_DEADDIR(inode)
9293+ || sigen != au_iigen(inode)))
9294+ goto out_iput;
9295+
9296+ dentry = NULL;
9297+ if (!dir_ino || S_ISDIR(inode->i_mode))
9298+ dentry = d_find_alias(inode);
9299+ else {
027c5e7a
AM
9300+ spin_lock(&inode->i_lock);
9301+ list_for_each_entry(d, &inode->i_dentry, d_alias) {
9302+ spin_lock(&d->d_lock);
1facf9fc 9303+ if (!au_test_anon(d)
9304+ && d->d_parent->d_inode->i_ino == dir_ino) {
027c5e7a
AM
9305+ dentry = dget_dlock(d);
9306+ spin_unlock(&d->d_lock);
1facf9fc 9307+ break;
9308+ }
027c5e7a
AM
9309+ spin_unlock(&d->d_lock);
9310+ }
9311+ spin_unlock(&inode->i_lock);
1facf9fc 9312+ }
027c5e7a 9313+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
2cbb1c4b 9314+ /* need to refresh */
1facf9fc 9315+ dput(dentry);
2cbb1c4b 9316+ dentry = NULL;
1facf9fc 9317+ }
9318+
4f0767ce 9319+out_iput:
1facf9fc 9320+ iput(inode);
4f0767ce 9321+out:
2cbb1c4b 9322+ AuTraceErrPtr(dentry);
1facf9fc 9323+ return dentry;
9324+}
9325+
9326+/* ---------------------------------------------------------------------- */
9327+
9328+/* todo: dirty? */
9329+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
4a4d8108
AM
9330+
9331+struct au_compare_mnt_args {
9332+ /* input */
9333+ struct super_block *sb;
9334+
9335+ /* output */
9336+ struct vfsmount *mnt;
9337+};
9338+
9339+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
9340+{
9341+ struct au_compare_mnt_args *a = arg;
9342+
9343+ if (mnt->mnt_sb != a->sb)
9344+ return 0;
9345+ a->mnt = mntget(mnt);
9346+ return 1;
9347+}
9348+
1facf9fc 9349+static struct vfsmount *au_mnt_get(struct super_block *sb)
9350+{
4a4d8108
AM
9351+ int err;
9352+ struct au_compare_mnt_args args = {
9353+ .sb = sb
9354+ };
1facf9fc 9355+ struct mnt_namespace *ns;
1facf9fc 9356+
0c5527e5 9357+ br_read_lock(vfsmount_lock);
1facf9fc 9358+ /* no get/put ?? */
9359+ AuDebugOn(!current->nsproxy);
9360+ ns = current->nsproxy->mnt_ns;
9361+ AuDebugOn(!ns);
4a4d8108 9362+ err = iterate_mounts(au_compare_mnt, &args, ns->root);
0c5527e5 9363+ br_read_unlock(vfsmount_lock);
4a4d8108
AM
9364+ AuDebugOn(!err);
9365+ AuDebugOn(!args.mnt);
9366+ return args.mnt;
1facf9fc 9367+}
9368+
9369+struct au_nfsd_si_lock {
4a4d8108 9370+ unsigned int sigen;
027c5e7a 9371+ aufs_bindex_t bindex, br_id;
1facf9fc 9372+ unsigned char force_lock;
9373+};
9374+
027c5e7a
AM
9375+static int si_nfsd_read_lock(struct super_block *sb,
9376+ struct au_nfsd_si_lock *nsi_lock)
1facf9fc 9377+{
027c5e7a 9378+ int err;
1facf9fc 9379+ aufs_bindex_t bindex;
9380+
9381+ si_read_lock(sb, AuLock_FLUSH);
9382+
9383+ /* branch id may be wrapped around */
027c5e7a 9384+ err = 0;
1facf9fc 9385+ bindex = au_br_index(sb, nsi_lock->br_id);
9386+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
9387+ goto out; /* success */
9388+
027c5e7a
AM
9389+ err = -ESTALE;
9390+ bindex = -1;
1facf9fc 9391+ if (!nsi_lock->force_lock)
9392+ si_read_unlock(sb);
1facf9fc 9393+
4f0767ce 9394+out:
027c5e7a
AM
9395+ nsi_lock->bindex = bindex;
9396+ return err;
1facf9fc 9397+}
9398+
9399+struct find_name_by_ino {
9400+ int called, found;
9401+ ino_t ino;
9402+ char *name;
9403+ int namelen;
9404+};
9405+
9406+static int
9407+find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset,
9408+ u64 ino, unsigned int d_type)
9409+{
9410+ struct find_name_by_ino *a = arg;
9411+
9412+ a->called++;
9413+ if (a->ino != ino)
9414+ return 0;
9415+
9416+ memcpy(a->name, name, namelen);
9417+ a->namelen = namelen;
9418+ a->found = 1;
9419+ return 1;
9420+}
9421+
9422+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
9423+ struct au_nfsd_si_lock *nsi_lock)
9424+{
9425+ struct dentry *dentry, *parent;
9426+ struct file *file;
9427+ struct inode *dir;
9428+ struct find_name_by_ino arg;
9429+ int err;
9430+
9431+ parent = path->dentry;
9432+ if (nsi_lock)
9433+ si_read_unlock(parent->d_sb);
4a4d8108 9434+ file = vfsub_dentry_open(path, au_dir_roflags);
1facf9fc 9435+ dentry = (void *)file;
9436+ if (IS_ERR(file))
9437+ goto out;
9438+
9439+ dentry = ERR_PTR(-ENOMEM);
4a4d8108 9440+ arg.name = __getname_gfp(GFP_NOFS);
1facf9fc 9441+ if (unlikely(!arg.name))
9442+ goto out_file;
9443+ arg.ino = ino;
9444+ arg.found = 0;
9445+ do {
9446+ arg.called = 0;
9447+ /* smp_mb(); */
9448+ err = vfsub_readdir(file, find_name_by_ino, &arg);
9449+ } while (!err && !arg.found && arg.called);
9450+ dentry = ERR_PTR(err);
9451+ if (unlikely(err))
9452+ goto out_name;
9453+ dentry = ERR_PTR(-ENOENT);
9454+ if (!arg.found)
9455+ goto out_name;
9456+
9457+ /* do not call au_lkup_one() */
9458+ dir = parent->d_inode;
9459+ mutex_lock(&dir->i_mutex);
9460+ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
9461+ mutex_unlock(&dir->i_mutex);
9462+ AuTraceErrPtr(dentry);
9463+ if (IS_ERR(dentry))
9464+ goto out_name;
9465+ AuDebugOn(au_test_anon(dentry));
9466+ if (unlikely(!dentry->d_inode)) {
9467+ dput(dentry);
9468+ dentry = ERR_PTR(-ENOENT);
9469+ }
9470+
4f0767ce 9471+out_name:
1facf9fc 9472+ __putname(arg.name);
4f0767ce 9473+out_file:
1facf9fc 9474+ fput(file);
4f0767ce 9475+out:
1facf9fc 9476+ if (unlikely(nsi_lock
9477+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
9478+ if (!IS_ERR(dentry)) {
9479+ dput(dentry);
9480+ dentry = ERR_PTR(-ESTALE);
9481+ }
9482+ AuTraceErrPtr(dentry);
9483+ return dentry;
9484+}
9485+
9486+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
9487+ ino_t dir_ino,
9488+ struct au_nfsd_si_lock *nsi_lock)
9489+{
9490+ struct dentry *dentry;
9491+ struct path path;
9492+
9493+ if (dir_ino != AUFS_ROOT_INO) {
9494+ path.dentry = decode_by_ino(sb, dir_ino, 0);
9495+ dentry = path.dentry;
9496+ if (!path.dentry || IS_ERR(path.dentry))
9497+ goto out;
9498+ AuDebugOn(au_test_anon(path.dentry));
9499+ } else
9500+ path.dentry = dget(sb->s_root);
9501+
9502+ path.mnt = au_mnt_get(sb);
9503+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
9504+ path_put(&path);
9505+
4f0767ce 9506+out:
1facf9fc 9507+ AuTraceErrPtr(dentry);
9508+ return dentry;
9509+}
9510+
9511+/* ---------------------------------------------------------------------- */
9512+
9513+static int h_acceptable(void *expv, struct dentry *dentry)
9514+{
9515+ return 1;
9516+}
9517+
9518+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
9519+ char *buf, int len, struct super_block *sb)
9520+{
9521+ char *p;
9522+ int n;
9523+ struct path path;
9524+
9525+ p = d_path(h_rootpath, buf, len);
9526+ if (IS_ERR(p))
9527+ goto out;
9528+ n = strlen(p);
9529+
9530+ path.mnt = h_rootpath->mnt;
9531+ path.dentry = h_parent;
9532+ p = d_path(&path, buf, len);
9533+ if (IS_ERR(p))
9534+ goto out;
9535+ if (n != 1)
9536+ p += n;
9537+
9538+ path.mnt = au_mnt_get(sb);
9539+ path.dentry = sb->s_root;
9540+ p = d_path(&path, buf, len - strlen(p));
9541+ mntput(path.mnt);
9542+ if (IS_ERR(p))
9543+ goto out;
9544+ if (n != 1)
9545+ p[strlen(p)] = '/';
9546+
4f0767ce 9547+out:
1facf9fc 9548+ AuTraceErrPtr(p);
9549+ return p;
9550+}
9551+
9552+static
027c5e7a
AM
9553+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
9554+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
1facf9fc 9555+{
9556+ struct dentry *dentry, *h_parent, *root;
9557+ struct super_block *h_sb;
9558+ char *pathname, *p;
9559+ struct vfsmount *h_mnt;
9560+ struct au_branch *br;
9561+ int err;
9562+ struct path path;
9563+
027c5e7a 9564+ br = au_sbr(sb, nsi_lock->bindex);
1facf9fc 9565+ h_mnt = br->br_mnt;
9566+ h_sb = h_mnt->mnt_sb;
9567+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
9568+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
9569+ fh_len - Fh_tail, fh[Fh_h_type],
9570+ h_acceptable, /*context*/NULL);
9571+ dentry = h_parent;
9572+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
9573+ AuWarn1("%s decode_fh failed, %ld\n",
9574+ au_sbtype(h_sb), PTR_ERR(h_parent));
9575+ goto out;
9576+ }
9577+ dentry = NULL;
9578+ if (unlikely(au_test_anon(h_parent))) {
9579+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
9580+ au_sbtype(h_sb));
9581+ goto out_h_parent;
9582+ }
9583+
9584+ dentry = ERR_PTR(-ENOMEM);
9585+ pathname = (void *)__get_free_page(GFP_NOFS);
9586+ if (unlikely(!pathname))
9587+ goto out_h_parent;
9588+
9589+ root = sb->s_root;
9590+ path.mnt = h_mnt;
9591+ di_read_lock_parent(root, !AuLock_IR);
027c5e7a 9592+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
1facf9fc 9593+ di_read_unlock(root, !AuLock_IR);
9594+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
9595+ dentry = (void *)p;
9596+ if (IS_ERR(p))
9597+ goto out_pathname;
9598+
9599+ si_read_unlock(sb);
9600+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
9601+ dentry = ERR_PTR(err);
9602+ if (unlikely(err))
9603+ goto out_relock;
9604+
9605+ dentry = ERR_PTR(-ENOENT);
9606+ AuDebugOn(au_test_anon(path.dentry));
9607+ if (unlikely(!path.dentry->d_inode))
9608+ goto out_path;
9609+
9610+ if (ino != path.dentry->d_inode->i_ino)
9611+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
9612+ else
9613+ dentry = dget(path.dentry);
9614+
4f0767ce 9615+out_path:
1facf9fc 9616+ path_put(&path);
4f0767ce 9617+out_relock:
1facf9fc 9618+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
9619+ if (!IS_ERR(dentry)) {
9620+ dput(dentry);
9621+ dentry = ERR_PTR(-ESTALE);
9622+ }
4f0767ce 9623+out_pathname:
1facf9fc 9624+ free_page((unsigned long)pathname);
4f0767ce 9625+out_h_parent:
1facf9fc 9626+ dput(h_parent);
4f0767ce 9627+out:
1facf9fc 9628+ AuTraceErrPtr(dentry);
9629+ return dentry;
9630+}
9631+
9632+/* ---------------------------------------------------------------------- */
9633+
9634+static struct dentry *
9635+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
9636+ int fh_type)
9637+{
9638+ struct dentry *dentry;
9639+ __u32 *fh = fid->raw;
027c5e7a 9640+ struct au_branch *br;
1facf9fc 9641+ ino_t ino, dir_ino;
1facf9fc 9642+ struct au_nfsd_si_lock nsi_lock = {
1facf9fc 9643+ .force_lock = 0
9644+ };
9645+
1facf9fc 9646+ dentry = ERR_PTR(-ESTALE);
4a4d8108
AM
9647+ /* it should never happen, but the file handle is unreliable */
9648+ if (unlikely(fh_len < Fh_tail))
9649+ goto out;
9650+ nsi_lock.sigen = fh[Fh_sigen];
9651+ nsi_lock.br_id = fh[Fh_br_id];
9652+
1facf9fc 9653+ /* branch id may be wrapped around */
027c5e7a
AM
9654+ br = NULL;
9655+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
1facf9fc 9656+ goto out;
9657+ nsi_lock.force_lock = 1;
9658+
9659+ /* is this inode still cached? */
9660+ ino = decode_ino(fh + Fh_ino);
4a4d8108
AM
9661+ /* it should never happen */
9662+ if (unlikely(ino == AUFS_ROOT_INO))
9663+ goto out;
9664+
1facf9fc 9665+ dir_ino = decode_ino(fh + Fh_dir_ino);
9666+ dentry = decode_by_ino(sb, ino, dir_ino);
9667+ if (IS_ERR(dentry))
9668+ goto out_unlock;
9669+ if (dentry)
9670+ goto accept;
9671+
9672+ /* is the parent dir cached? */
027c5e7a
AM
9673+ br = au_sbr(sb, nsi_lock.bindex);
9674+ atomic_inc(&br->br_count);
1facf9fc 9675+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
9676+ if (IS_ERR(dentry))
9677+ goto out_unlock;
9678+ if (dentry)
9679+ goto accept;
9680+
9681+ /* lookup path */
027c5e7a 9682+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
1facf9fc 9683+ if (IS_ERR(dentry))
9684+ goto out_unlock;
9685+ if (unlikely(!dentry))
9686+ /* todo?: make it ESTALE */
9687+ goto out_unlock;
9688+
4f0767ce 9689+accept:
027c5e7a
AM
9690+ if (!au_digen_test(dentry, au_sigen(sb))
9691+ && dentry->d_inode->i_generation == fh[Fh_igen])
1facf9fc 9692+ goto out_unlock; /* success */
9693+
9694+ dput(dentry);
9695+ dentry = ERR_PTR(-ESTALE);
4f0767ce 9696+out_unlock:
027c5e7a
AM
9697+ if (br)
9698+ atomic_dec(&br->br_count);
1facf9fc 9699+ si_read_unlock(sb);
4f0767ce 9700+out:
1facf9fc 9701+ AuTraceErrPtr(dentry);
9702+ return dentry;
9703+}
9704+
9705+#if 0 /* reserved for future use */
9706+/* support subtreecheck option */
9707+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
9708+ int fh_len, int fh_type)
9709+{
9710+ struct dentry *parent;
9711+ __u32 *fh = fid->raw;
9712+ ino_t dir_ino;
9713+
9714+ dir_ino = decode_ino(fh + Fh_dir_ino);
9715+ parent = decode_by_ino(sb, dir_ino, 0);
9716+ if (IS_ERR(parent))
9717+ goto out;
9718+ if (!parent)
9719+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
9720+ dir_ino, fh, fh_len);
9721+
4f0767ce 9722+out:
1facf9fc 9723+ AuTraceErrPtr(parent);
9724+ return parent;
9725+}
9726+#endif
9727+
9728+/* ---------------------------------------------------------------------- */
9729+
9730+static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
9731+ int connectable)
9732+{
9733+ int err;
9734+ aufs_bindex_t bindex, bend;
9735+ struct super_block *sb, *h_sb;
9736+ struct inode *inode;
9737+ struct dentry *parent, *h_parent;
9738+ struct au_branch *br;
9739+
9740+ AuDebugOn(au_test_anon(dentry));
9741+
9742+ parent = NULL;
9743+ err = -ENOSPC;
9744+ if (unlikely(*max_len <= Fh_tail)) {
9745+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
9746+ goto out;
9747+ }
9748+
9749+ err = FILEID_ROOT;
9750+ if (IS_ROOT(dentry)) {
9751+ AuDebugOn(dentry->d_inode->i_ino != AUFS_ROOT_INO);
9752+ goto out;
9753+ }
9754+
1facf9fc 9755+ h_parent = NULL;
027c5e7a
AM
9756+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_IR | AuLock_GEN);
9757+ if (unlikely(err))
9758+ goto out;
9759+
1facf9fc 9760+ inode = dentry->d_inode;
9761+ AuDebugOn(!inode);
027c5e7a 9762+ sb = dentry->d_sb;
1facf9fc 9763+#ifdef CONFIG_AUFS_DEBUG
9764+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
9765+ AuWarn1("NFS-exporting requires xino\n");
9766+#endif
027c5e7a
AM
9767+ err = -EIO;
9768+ parent = dget_parent(dentry);
9769+ di_read_lock_parent(parent, !AuLock_IR);
1facf9fc 9770+ bend = au_dbtaildir(parent);
9771+ for (bindex = au_dbstart(parent); bindex <= bend; bindex++) {
9772+ h_parent = au_h_dptr(parent, bindex);
9773+ if (h_parent) {
9774+ dget(h_parent);
9775+ break;
9776+ }
9777+ }
9778+ if (unlikely(!h_parent))
9779+ goto out_unlock;
9780+
9781+ err = -EPERM;
9782+ br = au_sbr(sb, bindex);
9783+ h_sb = br->br_mnt->mnt_sb;
9784+ if (unlikely(!h_sb->s_export_op)) {
9785+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
9786+ goto out_dput;
9787+ }
9788+
9789+ fh[Fh_br_id] = br->br_id;
9790+ fh[Fh_sigen] = au_sigen(sb);
9791+ encode_ino(fh + Fh_ino, inode->i_ino);
9792+ encode_ino(fh + Fh_dir_ino, parent->d_inode->i_ino);
9793+ fh[Fh_igen] = inode->i_generation;
9794+
9795+ *max_len -= Fh_tail;
9796+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
9797+ max_len,
9798+ /*connectable or subtreecheck*/0);
9799+ err = fh[Fh_h_type];
9800+ *max_len += Fh_tail;
9801+ /* todo: macros? */
9802+ if (err != 255)
9803+ err = 99;
9804+ else
9805+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
9806+
4f0767ce 9807+out_dput:
1facf9fc 9808+ dput(h_parent);
4f0767ce 9809+out_unlock:
1facf9fc 9810+ di_read_unlock(parent, !AuLock_IR);
9811+ dput(parent);
9812+ aufs_read_unlock(dentry, AuLock_IR);
4f0767ce 9813+out:
1facf9fc 9814+ if (unlikely(err < 0))
9815+ err = 255;
9816+ return err;
9817+}
9818+
9819+/* ---------------------------------------------------------------------- */
9820+
4a4d8108
AM
9821+static int aufs_commit_metadata(struct inode *inode)
9822+{
9823+ int err;
9824+ aufs_bindex_t bindex;
9825+ struct super_block *sb;
9826+ struct inode *h_inode;
9827+ int (*f)(struct inode *inode);
9828+
9829+ sb = inode->i_sb;
e49829fe 9830+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
9831+ ii_write_lock_child(inode);
9832+ bindex = au_ibstart(inode);
9833+ AuDebugOn(bindex < 0);
9834+ h_inode = au_h_iptr(inode, bindex);
9835+
9836+ f = h_inode->i_sb->s_export_op->commit_metadata;
9837+ if (f)
9838+ err = f(h_inode);
9839+ else {
9840+ struct writeback_control wbc = {
9841+ .sync_mode = WB_SYNC_ALL,
9842+ .nr_to_write = 0 /* metadata only */
9843+ };
9844+
9845+ err = sync_inode(h_inode, &wbc);
9846+ }
9847+
9848+ au_cpup_attr_timesizes(inode);
9849+ ii_write_unlock(inode);
9850+ si_read_unlock(sb);
9851+ return err;
9852+}
9853+
9854+/* ---------------------------------------------------------------------- */
9855+
1facf9fc 9856+static struct export_operations aufs_export_op = {
4a4d8108 9857+ .fh_to_dentry = aufs_fh_to_dentry,
1facf9fc 9858+ /* .fh_to_parent = aufs_fh_to_parent, */
4a4d8108
AM
9859+ .encode_fh = aufs_encode_fh,
9860+ .commit_metadata = aufs_commit_metadata
1facf9fc 9861+};
9862+
9863+void au_export_init(struct super_block *sb)
9864+{
9865+ struct au_sbinfo *sbinfo;
9866+ __u32 u;
9867+
9868+ sb->s_export_op = &aufs_export_op;
9869+ sbinfo = au_sbi(sb);
9870+ sbinfo->si_xigen = NULL;
9871+ get_random_bytes(&u, sizeof(u));
9872+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
9873+ atomic_set(&sbinfo->si_xigen_next, u);
9874+}
7f207e10
AM
9875diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
9876--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
53392da6 9877+++ linux/fs/aufs/file.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 9878@@ -0,0 +1,676 @@
1facf9fc 9879+/*
027c5e7a 9880+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 9881+ *
9882+ * This program, aufs is free software; you can redistribute it and/or modify
9883+ * it under the terms of the GNU General Public License as published by
9884+ * the Free Software Foundation; either version 2 of the License, or
9885+ * (at your option) any later version.
dece6358
AM
9886+ *
9887+ * This program is distributed in the hope that it will be useful,
9888+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9889+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9890+ * GNU General Public License for more details.
9891+ *
9892+ * You should have received a copy of the GNU General Public License
9893+ * along with this program; if not, write to the Free Software
9894+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 9895+ */
9896+
9897+/*
4a4d8108 9898+ * handling file/dir, and address_space operation
1facf9fc 9899+ */
9900+
dece6358 9901+#include <linux/file.h>
4a4d8108
AM
9902+#include <linux/fsnotify.h>
9903+#include <linux/namei.h>
9904+#include <linux/pagemap.h>
1facf9fc 9905+#include "aufs.h"
9906+
4a4d8108
AM
9907+/* drop flags for writing */
9908+unsigned int au_file_roflags(unsigned int flags)
9909+{
9910+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
9911+ flags |= O_RDONLY | O_NOATIME;
9912+ return flags;
9913+}
9914+
9915+/* common functions to regular file and dir */
9916+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
9917+ struct file *file)
1facf9fc 9918+{
1308ab2a 9919+ struct file *h_file;
4a4d8108
AM
9920+ struct dentry *h_dentry;
9921+ struct inode *h_inode;
9922+ struct super_block *sb;
9923+ struct au_branch *br;
9924+ struct path h_path;
9925+ int err, exec_flag;
1facf9fc 9926+
4a4d8108
AM
9927+ /* a race condition can happen between open and unlink/rmdir */
9928+ h_file = ERR_PTR(-ENOENT);
9929+ h_dentry = au_h_dptr(dentry, bindex);
b752ccd1 9930+ if (au_test_nfsd() && !h_dentry)
4a4d8108
AM
9931+ goto out;
9932+ h_inode = h_dentry->d_inode;
b752ccd1 9933+ if (au_test_nfsd() && !h_inode)
4a4d8108 9934+ goto out;
027c5e7a
AM
9935+ spin_lock(&h_dentry->d_lock);
9936+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
9937+ || !h_inode
9938+ /* || !dentry->d_inode->i_nlink */
9939+ ;
9940+ spin_unlock(&h_dentry->d_lock);
9941+ if (unlikely(err))
4a4d8108 9942+ goto out;
1facf9fc 9943+
4a4d8108
AM
9944+ sb = dentry->d_sb;
9945+ br = au_sbr(sb, bindex);
9946+ h_file = ERR_PTR(-EACCES);
2cbb1c4b 9947+ exec_flag = flags & __FMODE_EXEC;
4a4d8108 9948+ if (exec_flag && (br->br_mnt->mnt_flags & MNT_NOEXEC))
027c5e7a 9949+ goto out;
1facf9fc 9950+
4a4d8108
AM
9951+ /* drop flags for writing */
9952+ if (au_test_ro(sb, bindex, dentry->d_inode))
9953+ flags = au_file_roflags(flags);
9954+ flags &= ~O_CREAT;
9955+ atomic_inc(&br->br_count);
9956+ h_path.dentry = h_dentry;
9957+ h_path.mnt = br->br_mnt;
9958+ if (!au_special_file(h_inode->i_mode))
9959+ h_file = vfsub_dentry_open(&h_path, flags);
9960+ else {
9961+ /* this block depends upon the configuration */
9962+ di_read_unlock(dentry, AuLock_IR);
9963+ fi_write_unlock(file);
9964+ si_read_unlock(sb);
9965+ h_file = vfsub_dentry_open(&h_path, flags);
9966+ si_noflush_read_lock(sb);
9967+ fi_write_lock(file);
9968+ di_read_lock_child(dentry, AuLock_IR);
dece6358 9969+ }
4a4d8108
AM
9970+ if (IS_ERR(h_file))
9971+ goto out_br;
dece6358 9972+
4a4d8108
AM
9973+ if (exec_flag) {
9974+ err = deny_write_access(h_file);
9975+ if (unlikely(err)) {
9976+ fput(h_file);
9977+ h_file = ERR_PTR(err);
9978+ goto out_br;
9979+ }
9980+ }
953406b4 9981+ fsnotify_open(h_file);
4a4d8108 9982+ goto out; /* success */
1facf9fc 9983+
4f0767ce 9984+out_br:
4a4d8108 9985+ atomic_dec(&br->br_count);
4f0767ce 9986+out:
4a4d8108
AM
9987+ return h_file;
9988+}
1308ab2a 9989+
4a4d8108
AM
9990+int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
9991+ struct au_fidir *fidir)
1facf9fc 9992+{
dece6358 9993+ int err;
1facf9fc 9994+ struct dentry *dentry;
1308ab2a 9995+
4a4d8108
AM
9996+ err = au_finfo_init(file, fidir);
9997+ if (unlikely(err))
9998+ goto out;
1facf9fc 9999+
10000+ dentry = file->f_dentry;
4a4d8108
AM
10001+ di_read_lock_child(dentry, AuLock_IR);
10002+ err = open(file, vfsub_file_flags(file));
10003+ di_read_unlock(dentry, AuLock_IR);
1facf9fc 10004+
4a4d8108
AM
10005+ fi_write_unlock(file);
10006+ if (unlikely(err)) {
10007+ au_fi(file)->fi_hdir = NULL;
10008+ au_finfo_fin(file);
1308ab2a 10009+ }
4a4d8108 10010+
4f0767ce 10011+out:
1308ab2a 10012+ return err;
10013+}
dece6358 10014+
4a4d8108 10015+int au_reopen_nondir(struct file *file)
1308ab2a 10016+{
4a4d8108
AM
10017+ int err;
10018+ aufs_bindex_t bstart;
10019+ struct dentry *dentry;
10020+ struct file *h_file, *h_file_tmp;
1308ab2a 10021+
4a4d8108
AM
10022+ dentry = file->f_dentry;
10023+ AuDebugOn(au_special_file(dentry->d_inode->i_mode));
10024+ bstart = au_dbstart(dentry);
10025+ h_file_tmp = NULL;
10026+ if (au_fbstart(file) == bstart) {
10027+ h_file = au_hf_top(file);
10028+ if (file->f_mode == h_file->f_mode)
10029+ return 0; /* success */
10030+ h_file_tmp = h_file;
10031+ get_file(h_file_tmp);
10032+ au_set_h_fptr(file, bstart, NULL);
10033+ }
10034+ AuDebugOn(au_fi(file)->fi_hdir);
10035+ AuDebugOn(au_fbstart(file) < bstart);
1308ab2a 10036+
4a4d8108
AM
10037+ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
10038+ file);
10039+ err = PTR_ERR(h_file);
10040+ if (IS_ERR(h_file))
10041+ goto out; /* todo: close all? */
10042+
10043+ err = 0;
10044+ au_set_fbstart(file, bstart);
10045+ au_set_h_fptr(file, bstart, h_file);
10046+ au_update_figen(file);
10047+ /* todo: necessary? */
10048+ /* file->f_ra = h_file->f_ra; */
10049+
4f0767ce 10050+out:
4a4d8108
AM
10051+ if (h_file_tmp)
10052+ fput(h_file_tmp);
10053+ return err;
1facf9fc 10054+}
10055+
1308ab2a 10056+/* ---------------------------------------------------------------------- */
10057+
4a4d8108
AM
10058+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
10059+ struct dentry *hi_wh)
1facf9fc 10060+{
4a4d8108
AM
10061+ int err;
10062+ aufs_bindex_t bstart;
10063+ struct au_dinfo *dinfo;
10064+ struct dentry *h_dentry;
10065+ struct au_hdentry *hdp;
1facf9fc 10066+
4a4d8108
AM
10067+ dinfo = au_di(file->f_dentry);
10068+ AuRwMustWriteLock(&dinfo->di_rwsem);
dece6358 10069+
4a4d8108
AM
10070+ bstart = dinfo->di_bstart;
10071+ dinfo->di_bstart = btgt;
10072+ hdp = dinfo->di_hdentry;
10073+ h_dentry = hdp[0 + btgt].hd_dentry;
10074+ hdp[0 + btgt].hd_dentry = hi_wh;
10075+ err = au_reopen_nondir(file);
10076+ hdp[0 + btgt].hd_dentry = h_dentry;
10077+ dinfo->di_bstart = bstart;
1facf9fc 10078+
1facf9fc 10079+ return err;
10080+}
10081+
4a4d8108
AM
10082+static int au_ready_to_write_wh(struct file *file, loff_t len,
10083+ aufs_bindex_t bcpup)
1facf9fc 10084+{
4a4d8108 10085+ int err;
027c5e7a
AM
10086+ struct inode *inode, *h_inode;
10087+ struct dentry *dentry, *h_dentry, *hi_wh;
1facf9fc 10088+
dece6358 10089+ dentry = file->f_dentry;
4a4d8108 10090+ au_update_dbstart(dentry);
dece6358 10091+ inode = dentry->d_inode;
027c5e7a
AM
10092+ h_inode = NULL;
10093+ if (au_dbstart(dentry) <= bcpup && au_dbend(dentry) >= bcpup) {
10094+ h_dentry = au_h_dptr(dentry, bcpup);
10095+ if (h_dentry)
10096+ h_inode = h_dentry->d_inode;
10097+ }
4a4d8108 10098+ hi_wh = au_hi_wh(inode, bcpup);
027c5e7a 10099+ if (!hi_wh && !h_inode)
4a4d8108
AM
10100+ err = au_sio_cpup_wh(dentry, bcpup, len, file);
10101+ else
10102+ /* already copied-up after unlink */
10103+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 10104+
4a4d8108
AM
10105+ if (!err
10106+ && inode->i_nlink > 1
10107+ && au_opt_test(au_mntflags(dentry->d_sb), PLINK))
10108+ au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup));
1308ab2a 10109+
dece6358 10110+ return err;
1facf9fc 10111+}
10112+
4a4d8108
AM
10113+/*
10114+ * prepare the @file for writing.
10115+ */
10116+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
1facf9fc 10117+{
4a4d8108 10118+ int err;
027c5e7a 10119+ aufs_bindex_t bstart, bcpup, dbstart;
4a4d8108
AM
10120+ struct dentry *dentry, *parent, *h_dentry;
10121+ struct inode *h_inode, *inode;
1facf9fc 10122+ struct super_block *sb;
4a4d8108 10123+ struct file *h_file;
1facf9fc 10124+
10125+ dentry = file->f_dentry;
1facf9fc 10126+ sb = dentry->d_sb;
4a4d8108
AM
10127+ inode = dentry->d_inode;
10128+ AuDebugOn(au_special_file(inode->i_mode));
10129+ bstart = au_fbstart(file);
10130+ err = au_test_ro(sb, bstart, inode);
10131+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
10132+ err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0);
1facf9fc 10133+ goto out;
4a4d8108 10134+ }
1facf9fc 10135+
027c5e7a 10136+ /* need to cpup or reopen */
4a4d8108
AM
10137+ parent = dget_parent(dentry);
10138+ di_write_lock_parent(parent);
10139+ err = AuWbrCopyup(au_sbi(sb), dentry);
10140+ bcpup = err;
10141+ if (unlikely(err < 0))
10142+ goto out_dgrade;
10143+ err = 0;
10144+
027c5e7a 10145+ if (!d_unhashed(dentry) && !au_h_dptr(parent, bcpup)) {
4a4d8108 10146+ err = au_cpup_dirs(dentry, bcpup);
1facf9fc 10147+ if (unlikely(err))
4a4d8108
AM
10148+ goto out_dgrade;
10149+ }
10150+
10151+ err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE,
10152+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
10153+ if (unlikely(err))
10154+ goto out_dgrade;
10155+
10156+ h_dentry = au_hf_top(file)->f_dentry;
10157+ h_inode = h_dentry->d_inode;
027c5e7a
AM
10158+ dbstart = au_dbstart(dentry);
10159+ if (dbstart <= bcpup) {
10160+ h_dentry = au_h_dptr(dentry, bcpup);
10161+ AuDebugOn(!h_dentry);
10162+ h_inode = h_dentry->d_inode;
10163+ AuDebugOn(!h_inode);
10164+ bstart = bcpup;
10165+ }
10166+
10167+ if (dbstart <= bcpup /* just reopen */
10168+ || !d_unhashed(dentry) /* copyup and reopen */
10169+ ) {
10170+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
10171+ h_file = au_h_open_pre(dentry, bstart);
10172+ if (IS_ERR(h_file)) {
10173+ err = PTR_ERR(h_file);
10174+ h_file = NULL;
10175+ } else {
10176+ di_downgrade_lock(parent, AuLock_IR);
10177+ if (dbstart > bcpup)
10178+ err = au_sio_cpup_simple(dentry, bcpup, len,
10179+ AuCpup_DTIME);
10180+ if (!err)
10181+ err = au_reopen_nondir(file);
10182+ }
10183+ mutex_unlock(&h_inode->i_mutex);
10184+ au_h_open_post(dentry, bstart, h_file);
10185+ } else { /* copyup as wh and reopen */
10186+ /*
10187+ * since writable hfsplus branch is not supported,
10188+ * h_open_pre/post() are unnecessary.
10189+ */
10190+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
4a4d8108
AM
10191+ err = au_ready_to_write_wh(file, len, bcpup);
10192+ di_downgrade_lock(parent, AuLock_IR);
027c5e7a 10193+ mutex_unlock(&h_inode->i_mutex);
4a4d8108 10194+ }
4a4d8108
AM
10195+
10196+ if (!err) {
10197+ au_pin_set_parent_lflag(pin, /*lflag*/0);
10198+ goto out_dput; /* success */
10199+ }
10200+ au_unpin(pin);
10201+ goto out_unlock;
1facf9fc 10202+
4f0767ce 10203+out_dgrade:
4a4d8108 10204+ di_downgrade_lock(parent, AuLock_IR);
4f0767ce 10205+out_unlock:
4a4d8108 10206+ di_read_unlock(parent, AuLock_IR);
4f0767ce 10207+out_dput:
4a4d8108 10208+ dput(parent);
4f0767ce 10209+out:
1facf9fc 10210+ return err;
10211+}
10212+
4a4d8108
AM
10213+/* ---------------------------------------------------------------------- */
10214+
10215+int au_do_flush(struct file *file, fl_owner_t id,
10216+ int (*flush)(struct file *file, fl_owner_t id))
1facf9fc 10217+{
4a4d8108 10218+ int err;
1308ab2a 10219+ struct dentry *dentry;
1facf9fc 10220+ struct super_block *sb;
4a4d8108 10221+ struct inode *inode;
1facf9fc 10222+
1facf9fc 10223+ dentry = file->f_dentry;
10224+ sb = dentry->d_sb;
dece6358 10225+ inode = dentry->d_inode;
4a4d8108
AM
10226+ si_noflush_read_lock(sb);
10227+ fi_read_lock(file);
b752ccd1 10228+ ii_read_lock_child(inode);
1facf9fc 10229+
4a4d8108
AM
10230+ err = flush(file, id);
10231+ au_cpup_attr_timesizes(inode);
1facf9fc 10232+
b752ccd1 10233+ ii_read_unlock(inode);
4a4d8108 10234+ fi_read_unlock(file);
1308ab2a 10235+ si_read_unlock(sb);
dece6358 10236+ return err;
1facf9fc 10237+}
10238+
4a4d8108
AM
10239+/* ---------------------------------------------------------------------- */
10240+
10241+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 10242+{
4a4d8108
AM
10243+ int err;
10244+ aufs_bindex_t bstart;
10245+ struct au_pin pin;
10246+ struct au_finfo *finfo;
10247+ struct dentry *dentry, *parent, *hi_wh;
10248+ struct inode *inode;
1facf9fc 10249+ struct super_block *sb;
10250+
4a4d8108
AM
10251+ FiMustWriteLock(file);
10252+
10253+ err = 0;
10254+ finfo = au_fi(file);
1308ab2a 10255+ dentry = file->f_dentry;
10256+ sb = dentry->d_sb;
4a4d8108
AM
10257+ inode = dentry->d_inode;
10258+ bstart = au_ibstart(inode);
027c5e7a 10259+ if (bstart == finfo->fi_btop || IS_ROOT(dentry))
1308ab2a 10260+ goto out;
dece6358 10261+
4a4d8108
AM
10262+ parent = dget_parent(dentry);
10263+ if (au_test_ro(sb, bstart, inode)) {
10264+ di_read_lock_parent(parent, !AuLock_IR);
10265+ err = AuWbrCopyup(au_sbi(sb), dentry);
10266+ bstart = err;
10267+ di_read_unlock(parent, !AuLock_IR);
10268+ if (unlikely(err < 0))
10269+ goto out_parent;
10270+ err = 0;
1facf9fc 10271+ }
1facf9fc 10272+
4a4d8108
AM
10273+ di_read_lock_parent(parent, AuLock_IR);
10274+ hi_wh = au_hi_wh(inode, bstart);
7f207e10
AM
10275+ if (!S_ISDIR(inode->i_mode)
10276+ && au_opt_test(au_mntflags(sb), PLINK)
4a4d8108
AM
10277+ && au_plink_test(inode)
10278+ && !d_unhashed(dentry)) {
10279+ err = au_test_and_cpup_dirs(dentry, bstart);
10280+ if (unlikely(err))
10281+ goto out_unlock;
10282+
10283+ /* always superio. */
10284+ err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE,
10285+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
10286+ if (!err)
10287+ err = au_sio_cpup_simple(dentry, bstart, -1,
10288+ AuCpup_DTIME);
10289+ au_unpin(&pin);
10290+ } else if (hi_wh) {
10291+ /* already copied-up after unlink */
10292+ err = au_reopen_wh(file, bstart, hi_wh);
10293+ *need_reopen = 0;
10294+ }
1facf9fc 10295+
4f0767ce 10296+out_unlock:
4a4d8108 10297+ di_read_unlock(parent, AuLock_IR);
4f0767ce 10298+out_parent:
4a4d8108 10299+ dput(parent);
4f0767ce 10300+out:
1308ab2a 10301+ return err;
dece6358 10302+}
1facf9fc 10303+
4a4d8108 10304+static void au_do_refresh_dir(struct file *file)
dece6358 10305+{
4a4d8108
AM
10306+ aufs_bindex_t bindex, bend, new_bindex, brid;
10307+ struct au_hfile *p, tmp, *q;
10308+ struct au_finfo *finfo;
1308ab2a 10309+ struct super_block *sb;
4a4d8108 10310+ struct au_fidir *fidir;
1facf9fc 10311+
4a4d8108 10312+ FiMustWriteLock(file);
1facf9fc 10313+
4a4d8108
AM
10314+ sb = file->f_dentry->d_sb;
10315+ finfo = au_fi(file);
10316+ fidir = finfo->fi_hdir;
10317+ AuDebugOn(!fidir);
10318+ p = fidir->fd_hfile + finfo->fi_btop;
10319+ brid = p->hf_br->br_id;
10320+ bend = fidir->fd_bbot;
10321+ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
10322+ if (!p->hf_file)
10323+ continue;
1308ab2a 10324+
4a4d8108
AM
10325+ new_bindex = au_br_index(sb, p->hf_br->br_id);
10326+ if (new_bindex == bindex)
10327+ continue;
10328+ if (new_bindex < 0) {
10329+ au_set_h_fptr(file, bindex, NULL);
10330+ continue;
10331+ }
1308ab2a 10332+
4a4d8108
AM
10333+ /* swap two lower inode, and loop again */
10334+ q = fidir->fd_hfile + new_bindex;
10335+ tmp = *q;
10336+ *q = *p;
10337+ *p = tmp;
10338+ if (tmp.hf_file) {
10339+ bindex--;
10340+ p--;
10341+ }
10342+ }
1308ab2a 10343+
4a4d8108 10344+ p = fidir->fd_hfile;
027c5e7a 10345+ if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) {
4a4d8108
AM
10346+ bend = au_sbend(sb);
10347+ for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
10348+ finfo->fi_btop++, p++)
10349+ if (p->hf_file) {
10350+ if (p->hf_file->f_dentry
10351+ && p->hf_file->f_dentry->d_inode)
10352+ break;
10353+ else
10354+ au_hfput(p, file);
10355+ }
10356+ } else {
10357+ bend = au_br_index(sb, brid);
10358+ for (finfo->fi_btop = 0; finfo->fi_btop < bend;
10359+ finfo->fi_btop++, p++)
10360+ if (p->hf_file)
10361+ au_hfput(p, file);
10362+ bend = au_sbend(sb);
10363+ }
1308ab2a 10364+
4a4d8108
AM
10365+ p = fidir->fd_hfile + bend;
10366+ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
10367+ fidir->fd_bbot--, p--)
10368+ if (p->hf_file) {
10369+ if (p->hf_file->f_dentry
10370+ && p->hf_file->f_dentry->d_inode)
10371+ break;
10372+ else
10373+ au_hfput(p, file);
10374+ }
10375+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
1308ab2a 10376+}
10377+
4a4d8108
AM
10378+/*
10379+ * after branch manipulating, refresh the file.
10380+ */
10381+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 10382+{
4a4d8108
AM
10383+ int err, need_reopen;
10384+ aufs_bindex_t bend, bindex;
10385+ struct dentry *dentry;
1308ab2a 10386+ struct au_finfo *finfo;
4a4d8108 10387+ struct au_hfile *hfile;
1facf9fc 10388+
4a4d8108 10389+ dentry = file->f_dentry;
1308ab2a 10390+ finfo = au_fi(file);
4a4d8108
AM
10391+ if (!finfo->fi_hdir) {
10392+ hfile = &finfo->fi_htop;
10393+ AuDebugOn(!hfile->hf_file);
10394+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
10395+ AuDebugOn(bindex < 0);
10396+ if (bindex != finfo->fi_btop)
10397+ au_set_fbstart(file, bindex);
10398+ } else {
10399+ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
10400+ if (unlikely(err))
10401+ goto out;
10402+ au_do_refresh_dir(file);
10403+ }
1facf9fc 10404+
4a4d8108
AM
10405+ err = 0;
10406+ need_reopen = 1;
10407+ if (!au_test_mmapped(file))
10408+ err = au_file_refresh_by_inode(file, &need_reopen);
027c5e7a 10409+ if (!err && need_reopen && !d_unlinked(dentry))
4a4d8108
AM
10410+ err = reopen(file);
10411+ if (!err) {
10412+ au_update_figen(file);
10413+ goto out; /* success */
10414+ }
10415+
10416+ /* error, close all lower files */
10417+ if (finfo->fi_hdir) {
10418+ bend = au_fbend_dir(file);
10419+ for (bindex = au_fbstart(file); bindex <= bend; bindex++)
10420+ au_set_h_fptr(file, bindex, NULL);
10421+ }
1facf9fc 10422+
4f0767ce 10423+out:
1facf9fc 10424+ return err;
10425+}
10426+
4a4d8108
AM
10427+/* common function to regular file and dir */
10428+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
10429+ int wlock)
dece6358 10430+{
1308ab2a 10431+ int err;
4a4d8108
AM
10432+ unsigned int sigen, figen;
10433+ aufs_bindex_t bstart;
10434+ unsigned char pseudo_link;
10435+ struct dentry *dentry;
10436+ struct inode *inode;
1facf9fc 10437+
4a4d8108
AM
10438+ err = 0;
10439+ dentry = file->f_dentry;
10440+ inode = dentry->d_inode;
10441+ AuDebugOn(au_special_file(inode->i_mode));
10442+ sigen = au_sigen(dentry->d_sb);
10443+ fi_write_lock(file);
10444+ figen = au_figen(file);
10445+ di_write_lock_child(dentry);
10446+ bstart = au_dbstart(dentry);
10447+ pseudo_link = (bstart != au_ibstart(inode));
10448+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
10449+ if (!wlock) {
10450+ di_downgrade_lock(dentry, AuLock_IR);
10451+ fi_downgrade_lock(file);
10452+ }
10453+ goto out; /* success */
10454+ }
dece6358 10455+
4a4d8108 10456+ AuDbg("sigen %d, figen %d\n", sigen, figen);
027c5e7a 10457+ if (au_digen_test(dentry, sigen)) {
4a4d8108 10458+ err = au_reval_dpath(dentry, sigen);
027c5e7a 10459+ AuDebugOn(!err && au_digen_test(dentry, sigen));
4a4d8108 10460+ }
dece6358 10461+
027c5e7a
AM
10462+ if (!err)
10463+ err = refresh_file(file, reopen);
4a4d8108
AM
10464+ if (!err) {
10465+ if (!wlock) {
10466+ di_downgrade_lock(dentry, AuLock_IR);
10467+ fi_downgrade_lock(file);
10468+ }
10469+ } else {
10470+ di_write_unlock(dentry);
10471+ fi_write_unlock(file);
10472+ }
1facf9fc 10473+
4f0767ce 10474+out:
1308ab2a 10475+ return err;
10476+}
1facf9fc 10477+
4a4d8108
AM
10478+/* ---------------------------------------------------------------------- */
10479+
10480+/* cf. aufs_nopage() */
10481+/* for madvise(2) */
10482+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1308ab2a 10483+{
4a4d8108
AM
10484+ unlock_page(page);
10485+ return 0;
10486+}
1facf9fc 10487+
4a4d8108
AM
10488+/* it will never be called, but necessary to support O_DIRECT */
10489+static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
10490+ const struct iovec *iov, loff_t offset,
10491+ unsigned long nr_segs)
10492+{ BUG(); return 0; }
1facf9fc 10493+
4a4d8108
AM
10494+/*
10495+ * it will never be called, but madvise and fadvise behaves differently
10496+ * when get_xip_mem is defined
10497+ */
10498+static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff,
10499+ int create, void **kmem, unsigned long *pfn)
10500+{ BUG(); return 0; }
1facf9fc 10501+
4a4d8108
AM
10502+/* they will never be called. */
10503+#ifdef CONFIG_AUFS_DEBUG
10504+static int aufs_write_begin(struct file *file, struct address_space *mapping,
10505+ loff_t pos, unsigned len, unsigned flags,
10506+ struct page **pagep, void **fsdata)
10507+{ AuUnsupport(); return 0; }
10508+static int aufs_write_end(struct file *file, struct address_space *mapping,
10509+ loff_t pos, unsigned len, unsigned copied,
10510+ struct page *page, void *fsdata)
10511+{ AuUnsupport(); return 0; }
10512+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
10513+{ AuUnsupport(); return 0; }
1308ab2a 10514+
4a4d8108
AM
10515+static int aufs_set_page_dirty(struct page *page)
10516+{ AuUnsupport(); return 0; }
10517+static void aufs_invalidatepage(struct page *page, unsigned long offset)
10518+{ AuUnsupport(); }
10519+static int aufs_releasepage(struct page *page, gfp_t gfp)
10520+{ AuUnsupport(); return 0; }
10521+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
10522+ struct page *page)
10523+{ AuUnsupport(); return 0; }
10524+static int aufs_launder_page(struct page *page)
10525+{ AuUnsupport(); return 0; }
10526+static int aufs_is_partially_uptodate(struct page *page,
10527+ read_descriptor_t *desc,
10528+ unsigned long from)
10529+{ AuUnsupport(); return 0; }
10530+static int aufs_error_remove_page(struct address_space *mapping,
10531+ struct page *page)
10532+{ AuUnsupport(); return 0; }
10533+#endif /* CONFIG_AUFS_DEBUG */
10534+
10535+const struct address_space_operations aufs_aop = {
10536+ .readpage = aufs_readpage,
10537+ .direct_IO = aufs_direct_IO,
10538+ .get_xip_mem = aufs_get_xip_mem,
10539+#ifdef CONFIG_AUFS_DEBUG
10540+ .writepage = aufs_writepage,
4a4d8108
AM
10541+ /* no writepages, because of writepage */
10542+ .set_page_dirty = aufs_set_page_dirty,
10543+ /* no readpages, because of readpage */
10544+ .write_begin = aufs_write_begin,
10545+ .write_end = aufs_write_end,
10546+ /* no bmap, no block device */
10547+ .invalidatepage = aufs_invalidatepage,
10548+ .releasepage = aufs_releasepage,
10549+ .migratepage = aufs_migratepage,
10550+ .launder_page = aufs_launder_page,
10551+ .is_partially_uptodate = aufs_is_partially_uptodate,
10552+ .error_remove_page = aufs_error_remove_page
10553+#endif /* CONFIG_AUFS_DEBUG */
dece6358 10554+};
7f207e10
AM
10555diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
10556--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
10557+++ linux/fs/aufs/file.h 2011-08-24 13:30:24.731313534 +0200
10558@@ -0,0 +1,299 @@
4a4d8108 10559+/*
027c5e7a 10560+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
10561+ *
10562+ * This program, aufs is free software; you can redistribute it and/or modify
10563+ * it under the terms of the GNU General Public License as published by
10564+ * the Free Software Foundation; either version 2 of the License, or
10565+ * (at your option) any later version.
10566+ *
10567+ * This program is distributed in the hope that it will be useful,
10568+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10569+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10570+ * GNU General Public License for more details.
10571+ *
10572+ * You should have received a copy of the GNU General Public License
10573+ * along with this program; if not, write to the Free Software
10574+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
10575+ */
1facf9fc 10576+
4a4d8108
AM
10577+/*
10578+ * file operations
10579+ */
1facf9fc 10580+
4a4d8108
AM
10581+#ifndef __AUFS_FILE_H__
10582+#define __AUFS_FILE_H__
1facf9fc 10583+
4a4d8108 10584+#ifdef __KERNEL__
1facf9fc 10585+
2cbb1c4b 10586+#include <linux/file.h>
4a4d8108
AM
10587+#include <linux/fs.h>
10588+#include <linux/poll.h>
10589+#include <linux/aufs_type.h>
10590+#include "rwsem.h"
1facf9fc 10591+
4a4d8108
AM
10592+struct au_branch;
10593+struct au_hfile {
10594+ struct file *hf_file;
10595+ struct au_branch *hf_br;
10596+};
1facf9fc 10597+
4a4d8108
AM
10598+struct au_vdir;
10599+struct au_fidir {
10600+ aufs_bindex_t fd_bbot;
10601+ aufs_bindex_t fd_nent;
10602+ struct au_vdir *fd_vdir_cache;
10603+ struct au_hfile fd_hfile[];
10604+};
1facf9fc 10605+
4a4d8108 10606+static inline int au_fidir_sz(int nent)
dece6358 10607+{
4f0767ce
JR
10608+ AuDebugOn(nent < 0);
10609+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
4a4d8108 10610+}
1facf9fc 10611+
4a4d8108
AM
10612+struct au_finfo {
10613+ atomic_t fi_generation;
dece6358 10614+
4a4d8108
AM
10615+ struct au_rwsem fi_rwsem;
10616+ aufs_bindex_t fi_btop;
10617+
10618+ /* do not union them */
10619+ struct { /* for non-dir */
10620+ struct au_hfile fi_htop;
2cbb1c4b 10621+ atomic_t fi_mmapped;
4a4d8108
AM
10622+ };
10623+ struct au_fidir *fi_hdir; /* for dir only */
10624+} ____cacheline_aligned_in_smp;
1facf9fc 10625+
4a4d8108 10626+/* ---------------------------------------------------------------------- */
1facf9fc 10627+
4a4d8108
AM
10628+/* file.c */
10629+extern const struct address_space_operations aufs_aop;
10630+unsigned int au_file_roflags(unsigned int flags);
10631+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
10632+ struct file *file);
10633+int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
10634+ struct au_fidir *fidir);
10635+int au_reopen_nondir(struct file *file);
10636+struct au_pin;
10637+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
10638+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
10639+ int wlock);
10640+int au_do_flush(struct file *file, fl_owner_t id,
10641+ int (*flush)(struct file *file, fl_owner_t id));
1facf9fc 10642+
4a4d8108
AM
10643+/* poll.c */
10644+#ifdef CONFIG_AUFS_POLL
10645+unsigned int aufs_poll(struct file *file, poll_table *wait);
10646+#endif
1facf9fc 10647+
4a4d8108
AM
10648+#ifdef CONFIG_AUFS_BR_HFSPLUS
10649+/* hfsplus.c */
10650+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex);
10651+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
10652+ struct file *h_file);
10653+#else
10654+static inline
10655+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
dece6358 10656+{
4a4d8108
AM
10657+ return NULL;
10658+}
1facf9fc 10659+
4a4d8108
AM
10660+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
10661+ struct file *h_file);
10662+#endif
1facf9fc 10663+
4a4d8108
AM
10664+/* f_op.c */
10665+extern const struct file_operations aufs_file_fop;
4a4d8108
AM
10666+int au_do_open_nondir(struct file *file, int flags);
10667+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
10668+
10669+#ifdef CONFIG_AUFS_SP_IATTR
10670+/* f_op_sp.c */
10671+int au_special_file(umode_t mode);
10672+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev);
10673+#else
10674+AuStubInt0(au_special_file, umode_t mode)
10675+static inline void au_init_special_fop(struct inode *inode, umode_t mode,
10676+ dev_t rdev)
10677+{
10678+ init_special_inode(inode, mode, rdev);
10679+}
10680+#endif
1facf9fc 10681+
4a4d8108
AM
10682+/* finfo.c */
10683+void au_hfput(struct au_hfile *hf, struct file *file);
10684+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
10685+ struct file *h_file);
1facf9fc 10686+
4a4d8108 10687+void au_update_figen(struct file *file);
4a4d8108
AM
10688+struct au_fidir *au_fidir_alloc(struct super_block *sb);
10689+int au_fidir_realloc(struct au_finfo *finfo, int nbr);
1facf9fc 10690+
4a4d8108
AM
10691+void au_fi_init_once(void *_fi);
10692+void au_finfo_fin(struct file *file);
10693+int au_finfo_init(struct file *file, struct au_fidir *fidir);
1facf9fc 10694+
4a4d8108
AM
10695+/* ioctl.c */
10696+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
10697+#ifdef CONFIG_COMPAT
10698+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
10699+ unsigned long arg);
10700+#endif
1facf9fc 10701+
4a4d8108 10702+/* ---------------------------------------------------------------------- */
1facf9fc 10703+
4a4d8108
AM
10704+static inline struct au_finfo *au_fi(struct file *file)
10705+{
10706+ return file->private_data;
10707+}
1facf9fc 10708+
4a4d8108 10709+/* ---------------------------------------------------------------------- */
1facf9fc 10710+
4a4d8108
AM
10711+/*
10712+ * fi_read_lock, fi_write_lock,
10713+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
10714+ */
10715+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
1308ab2a 10716+
4a4d8108
AM
10717+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
10718+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
10719+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
1facf9fc 10720+
1308ab2a 10721+/* ---------------------------------------------------------------------- */
10722+
4a4d8108
AM
10723+/* todo: hard/soft set? */
10724+static inline aufs_bindex_t au_fbstart(struct file *file)
dece6358 10725+{
4a4d8108
AM
10726+ FiMustAnyLock(file);
10727+ return au_fi(file)->fi_btop;
10728+}
dece6358 10729+
4a4d8108
AM
10730+static inline aufs_bindex_t au_fbend_dir(struct file *file)
10731+{
10732+ FiMustAnyLock(file);
10733+ AuDebugOn(!au_fi(file)->fi_hdir);
10734+ return au_fi(file)->fi_hdir->fd_bbot;
10735+}
1facf9fc 10736+
4a4d8108
AM
10737+static inline struct au_vdir *au_fvdir_cache(struct file *file)
10738+{
10739+ FiMustAnyLock(file);
10740+ AuDebugOn(!au_fi(file)->fi_hdir);
10741+ return au_fi(file)->fi_hdir->fd_vdir_cache;
10742+}
1facf9fc 10743+
4a4d8108
AM
10744+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
10745+{
10746+ FiMustWriteLock(file);
10747+ au_fi(file)->fi_btop = bindex;
10748+}
1facf9fc 10749+
4a4d8108
AM
10750+static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
10751+{
10752+ FiMustWriteLock(file);
10753+ AuDebugOn(!au_fi(file)->fi_hdir);
10754+ au_fi(file)->fi_hdir->fd_bbot = bindex;
10755+}
1308ab2a 10756+
4a4d8108
AM
10757+static inline void au_set_fvdir_cache(struct file *file,
10758+ struct au_vdir *vdir_cache)
10759+{
10760+ FiMustWriteLock(file);
10761+ AuDebugOn(!au_fi(file)->fi_hdir);
10762+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
10763+}
dece6358 10764+
4a4d8108
AM
10765+static inline struct file *au_hf_top(struct file *file)
10766+{
10767+ FiMustAnyLock(file);
10768+ AuDebugOn(au_fi(file)->fi_hdir);
10769+ return au_fi(file)->fi_htop.hf_file;
10770+}
1facf9fc 10771+
4a4d8108
AM
10772+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
10773+{
10774+ FiMustAnyLock(file);
10775+ AuDebugOn(!au_fi(file)->fi_hdir);
10776+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
dece6358
AM
10777+}
10778+
4a4d8108
AM
10779+/* todo: memory barrier? */
10780+static inline unsigned int au_figen(struct file *f)
dece6358 10781+{
4a4d8108
AM
10782+ return atomic_read(&au_fi(f)->fi_generation);
10783+}
dece6358 10784+
2cbb1c4b
JR
10785+static inline void au_set_mmapped(struct file *f)
10786+{
10787+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
10788+ return;
10789+ pr_warning("fi_mmapped wrapped around\n");
10790+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
10791+ ;
10792+}
10793+
10794+static inline void au_unset_mmapped(struct file *f)
10795+{
10796+ atomic_dec(&au_fi(f)->fi_mmapped);
10797+}
10798+
4a4d8108
AM
10799+static inline int au_test_mmapped(struct file *f)
10800+{
2cbb1c4b
JR
10801+ return atomic_read(&au_fi(f)->fi_mmapped);
10802+}
10803+
10804+/* customize vma->vm_file */
10805+
10806+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
10807+ struct file *file)
10808+{
53392da6
AM
10809+ struct file *f;
10810+
10811+ f = vma->vm_file;
2cbb1c4b
JR
10812+ get_file(file);
10813+ vma->vm_file = file;
53392da6 10814+ fput(f);
2cbb1c4b
JR
10815+}
10816+
10817+#ifdef CONFIG_MMU
10818+#define AuDbgVmRegion(file, vma) do {} while (0)
10819+
10820+static inline void au_vm_file_reset(struct vm_area_struct *vma,
10821+ struct file *file)
10822+{
10823+ au_do_vm_file_reset(vma, file);
10824+}
10825+#else
10826+#define AuDbgVmRegion(file, vma) \
10827+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
10828+
10829+static inline void au_vm_file_reset(struct vm_area_struct *vma,
10830+ struct file *file)
10831+{
53392da6
AM
10832+ struct file *f;
10833+
2cbb1c4b 10834+ au_do_vm_file_reset(vma, file);
53392da6 10835+ f = vma->vm_region->vm_file;
2cbb1c4b
JR
10836+ get_file(file);
10837+ vma->vm_region->vm_file = file;
53392da6 10838+ fput(f);
2cbb1c4b
JR
10839+}
10840+#endif /* CONFIG_MMU */
10841+
10842+/* handle vma->vm_prfile */
10843+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
10844+ struct file *file)
10845+{
10846+#ifdef CONFIG_AUFS_PROC_MAP
10847+ get_file(file);
10848+ vma->vm_prfile = file;
10849+#ifndef CONFIG_MMU
10850+ get_file(file);
10851+ vma->vm_region->vm_prfile = file;
10852+#endif
10853+#endif
4a4d8108 10854+}
1308ab2a 10855+
4a4d8108
AM
10856+#endif /* __KERNEL__ */
10857+#endif /* __AUFS_FILE_H__ */
7f207e10
AM
10858diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
10859--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 10860+++ linux/fs/aufs/finfo.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 10861@@ -0,0 +1,153 @@
4a4d8108 10862+/*
027c5e7a 10863+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
10864+ *
10865+ * This program, aufs is free software; you can redistribute it and/or modify
10866+ * it under the terms of the GNU General Public License as published by
10867+ * the Free Software Foundation; either version 2 of the License, or
10868+ * (at your option) any later version.
10869+ *
10870+ * This program is distributed in the hope that it will be useful,
10871+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10872+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10873+ * GNU General Public License for more details.
10874+ *
10875+ * You should have received a copy of the GNU General Public License
10876+ * along with this program; if not, write to the Free Software
10877+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
10878+ */
1308ab2a 10879+
4a4d8108
AM
10880+/*
10881+ * file private data
10882+ */
1facf9fc 10883+
4a4d8108
AM
10884+#include <linux/file.h>
10885+#include "aufs.h"
1facf9fc 10886+
4a4d8108
AM
10887+void au_hfput(struct au_hfile *hf, struct file *file)
10888+{
10889+ /* todo: direct access f_flags */
2cbb1c4b 10890+ if (vfsub_file_flags(file) & __FMODE_EXEC)
4a4d8108
AM
10891+ allow_write_access(hf->hf_file);
10892+ fput(hf->hf_file);
10893+ hf->hf_file = NULL;
e49829fe 10894+ atomic_dec(&hf->hf_br->br_count);
4a4d8108
AM
10895+ hf->hf_br = NULL;
10896+}
1facf9fc 10897+
4a4d8108
AM
10898+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
10899+{
10900+ struct au_finfo *finfo = au_fi(file);
10901+ struct au_hfile *hf;
10902+ struct au_fidir *fidir;
10903+
10904+ fidir = finfo->fi_hdir;
10905+ if (!fidir) {
10906+ AuDebugOn(finfo->fi_btop != bindex);
10907+ hf = &finfo->fi_htop;
10908+ } else
10909+ hf = fidir->fd_hfile + bindex;
10910+
10911+ if (hf && hf->hf_file)
10912+ au_hfput(hf, file);
10913+ if (val) {
10914+ FiMustWriteLock(file);
10915+ hf->hf_file = val;
10916+ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
1308ab2a 10917+ }
4a4d8108 10918+}
1facf9fc 10919+
4a4d8108
AM
10920+void au_update_figen(struct file *file)
10921+{
10922+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
10923+ /* smp_mb(); */ /* atomic_set */
1facf9fc 10924+}
10925+
4a4d8108
AM
10926+/* ---------------------------------------------------------------------- */
10927+
4a4d8108
AM
10928+struct au_fidir *au_fidir_alloc(struct super_block *sb)
10929+{
10930+ struct au_fidir *fidir;
10931+ int nbr;
10932+
10933+ nbr = au_sbend(sb) + 1;
10934+ if (nbr < 2)
10935+ nbr = 2; /* initial allocate for 2 branches */
10936+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
10937+ if (fidir) {
10938+ fidir->fd_bbot = -1;
10939+ fidir->fd_nent = nbr;
10940+ fidir->fd_vdir_cache = NULL;
10941+ }
10942+
10943+ return fidir;
10944+}
10945+
10946+int au_fidir_realloc(struct au_finfo *finfo, int nbr)
10947+{
10948+ int err;
10949+ struct au_fidir *fidir, *p;
10950+
10951+ AuRwMustWriteLock(&finfo->fi_rwsem);
10952+ fidir = finfo->fi_hdir;
10953+ AuDebugOn(!fidir);
10954+
10955+ err = -ENOMEM;
10956+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
10957+ GFP_NOFS);
10958+ if (p) {
10959+ p->fd_nent = nbr;
10960+ finfo->fi_hdir = p;
10961+ err = 0;
10962+ }
1facf9fc 10963+
dece6358 10964+ return err;
1facf9fc 10965+}
1308ab2a 10966+
10967+/* ---------------------------------------------------------------------- */
10968+
4a4d8108 10969+void au_finfo_fin(struct file *file)
1308ab2a 10970+{
4a4d8108
AM
10971+ struct au_finfo *finfo;
10972+
7f207e10
AM
10973+ au_nfiles_dec(file->f_dentry->d_sb);
10974+
4a4d8108
AM
10975+ finfo = au_fi(file);
10976+ AuDebugOn(finfo->fi_hdir);
10977+ AuRwDestroy(&finfo->fi_rwsem);
10978+ au_cache_free_finfo(finfo);
1308ab2a 10979+}
1308ab2a 10980+
e49829fe 10981+void au_fi_init_once(void *_finfo)
4a4d8108 10982+{
e49829fe 10983+ struct au_finfo *finfo = _finfo;
2cbb1c4b 10984+ static struct lock_class_key aufs_fi;
1308ab2a 10985+
e49829fe
JR
10986+ au_rw_init(&finfo->fi_rwsem);
10987+ au_rw_class(&finfo->fi_rwsem, &aufs_fi);
4a4d8108 10988+}
1308ab2a 10989+
4a4d8108
AM
10990+int au_finfo_init(struct file *file, struct au_fidir *fidir)
10991+{
10992+ int err;
10993+ struct au_finfo *finfo;
10994+ struct dentry *dentry;
10995+
10996+ err = -ENOMEM;
10997+ dentry = file->f_dentry;
10998+ finfo = au_cache_alloc_finfo();
10999+ if (unlikely(!finfo))
11000+ goto out;
11001+
11002+ err = 0;
7f207e10 11003+ au_nfiles_inc(dentry->d_sb);
4a4d8108
AM
11004+ au_rw_write_lock(&finfo->fi_rwsem);
11005+ finfo->fi_btop = -1;
11006+ finfo->fi_hdir = fidir;
11007+ atomic_set(&finfo->fi_generation, au_digen(dentry));
11008+ /* smp_mb(); */ /* atomic_set */
11009+
11010+ file->private_data = finfo;
11011+
11012+out:
11013+ return err;
11014+}
7f207e10
AM
11015diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
11016--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
11017+++ linux/fs/aufs/f_op.c 2011-10-24 20:52:23.677857076 +0200
11018@@ -0,0 +1,711 @@
dece6358 11019+/*
027c5e7a 11020+ * Copyright (C) 2005-2011 Junjiro R. Okajima
dece6358
AM
11021+ *
11022+ * This program, aufs is free software; you can redistribute it and/or modify
11023+ * it under the terms of the GNU General Public License as published by
11024+ * the Free Software Foundation; either version 2 of the License, or
11025+ * (at your option) any later version.
11026+ *
11027+ * This program is distributed in the hope that it will be useful,
11028+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11029+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11030+ * GNU General Public License for more details.
11031+ *
11032+ * You should have received a copy of the GNU General Public License
11033+ * along with this program; if not, write to the Free Software
11034+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11035+ */
1facf9fc 11036+
11037+/*
4a4d8108 11038+ * file and vm operations
1facf9fc 11039+ */
dece6358
AM
11040+
11041+#include <linux/file.h>
4a4d8108
AM
11042+#include <linux/fs_stack.h>
11043+#include <linux/mman.h>
11044+#include <linux/mm.h>
11045+#include <linux/security.h>
dece6358
AM
11046+#include "aufs.h"
11047+
4a4d8108 11048+int au_do_open_nondir(struct file *file, int flags)
1facf9fc 11049+{
4a4d8108
AM
11050+ int err;
11051+ aufs_bindex_t bindex;
11052+ struct file *h_file;
11053+ struct dentry *dentry;
11054+ struct au_finfo *finfo;
11055+
11056+ FiMustWriteLock(file);
11057+
4a4d8108 11058+ dentry = file->f_dentry;
027c5e7a
AM
11059+ err = au_d_alive(dentry);
11060+ if (unlikely(err))
11061+ goto out;
11062+
4a4d8108
AM
11063+ finfo = au_fi(file);
11064+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
2cbb1c4b 11065+ atomic_set(&finfo->fi_mmapped, 0);
4a4d8108
AM
11066+ bindex = au_dbstart(dentry);
11067+ h_file = au_h_open(dentry, bindex, flags, file);
11068+ if (IS_ERR(h_file))
11069+ err = PTR_ERR(h_file);
11070+ else {
11071+ au_set_fbstart(file, bindex);
11072+ au_set_h_fptr(file, bindex, h_file);
11073+ au_update_figen(file);
11074+ /* todo: necessary? */
11075+ /* file->f_ra = h_file->f_ra; */
11076+ }
027c5e7a
AM
11077+
11078+out:
4a4d8108 11079+ return err;
1facf9fc 11080+}
11081+
4a4d8108
AM
11082+static int aufs_open_nondir(struct inode *inode __maybe_unused,
11083+ struct file *file)
1facf9fc 11084+{
4a4d8108 11085+ int err;
1308ab2a 11086+ struct super_block *sb;
1facf9fc 11087+
2cbb1c4b 11088+ AuDbg("%.*s, f_flags 0x%x, f_mode 0x%x\n",
4a4d8108
AM
11089+ AuDLNPair(file->f_dentry), vfsub_file_flags(file),
11090+ file->f_mode);
1facf9fc 11091+
4a4d8108
AM
11092+ sb = file->f_dentry->d_sb;
11093+ si_read_lock(sb, AuLock_FLUSH);
11094+ err = au_do_open(file, au_do_open_nondir, /*fidir*/NULL);
11095+ si_read_unlock(sb);
11096+ return err;
11097+}
1facf9fc 11098+
4a4d8108
AM
11099+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
11100+{
11101+ struct au_finfo *finfo;
11102+ aufs_bindex_t bindex;
1facf9fc 11103+
4a4d8108
AM
11104+ finfo = au_fi(file);
11105+ bindex = finfo->fi_btop;
0c5527e5
AM
11106+ if (bindex >= 0) {
11107+ /* remove me from sb->s_files */
11108+ file_sb_list_del(file);
4a4d8108 11109+ au_set_h_fptr(file, bindex, NULL);
0c5527e5 11110+ }
7f207e10 11111+
4a4d8108
AM
11112+ au_finfo_fin(file);
11113+ return 0;
1facf9fc 11114+}
11115+
4a4d8108
AM
11116+/* ---------------------------------------------------------------------- */
11117+
11118+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
dece6358 11119+{
1308ab2a 11120+ int err;
4a4d8108
AM
11121+ struct file *h_file;
11122+
11123+ err = 0;
11124+ h_file = au_hf_top(file);
11125+ if (h_file)
11126+ err = vfsub_flush(h_file, id);
11127+ return err;
11128+}
11129+
11130+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
11131+{
11132+ return au_do_flush(file, id, au_do_flush_nondir);
11133+}
11134+
11135+/* ---------------------------------------------------------------------- */
11136+
11137+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
11138+ loff_t *ppos)
11139+{
11140+ ssize_t err;
dece6358 11141+ struct dentry *dentry;
4a4d8108 11142+ struct file *h_file;
dece6358 11143+ struct super_block *sb;
1facf9fc 11144+
dece6358
AM
11145+ dentry = file->f_dentry;
11146+ sb = dentry->d_sb;
e49829fe 11147+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 11148+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
dece6358
AM
11149+ if (unlikely(err))
11150+ goto out;
1facf9fc 11151+
4a4d8108
AM
11152+ h_file = au_hf_top(file);
11153+ err = vfsub_read_u(h_file, buf, count, ppos);
11154+ /* todo: necessary? */
11155+ /* file->f_ra = h_file->f_ra; */
11156+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
1308ab2a 11157+
4a4d8108
AM
11158+ di_read_unlock(dentry, AuLock_IR);
11159+ fi_read_unlock(file);
4f0767ce 11160+out:
dece6358
AM
11161+ si_read_unlock(sb);
11162+ return err;
11163+}
1facf9fc 11164+
e49829fe
JR
11165+/*
11166+ * todo: very ugly
11167+ * it locks both of i_mutex and si_rwsem for read in safe.
11168+ * if the plink maintenance mode continues forever (that is the problem),
11169+ * may loop forever.
11170+ */
11171+static void au_mtx_and_read_lock(struct inode *inode)
11172+{
11173+ int err;
11174+ struct super_block *sb = inode->i_sb;
11175+
11176+ while (1) {
11177+ mutex_lock(&inode->i_mutex);
11178+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
11179+ if (!err)
11180+ break;
11181+ mutex_unlock(&inode->i_mutex);
11182+ si_read_lock(sb, AuLock_NOPLMW);
11183+ si_read_unlock(sb);
11184+ }
11185+}
11186+
4a4d8108
AM
11187+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
11188+ size_t count, loff_t *ppos)
dece6358 11189+{
4a4d8108
AM
11190+ ssize_t err;
11191+ struct au_pin pin;
dece6358 11192+ struct dentry *dentry;
4a4d8108 11193+ struct inode *inode;
4a4d8108
AM
11194+ struct file *h_file;
11195+ char __user *buf = (char __user *)ubuf;
1facf9fc 11196+
dece6358 11197+ dentry = file->f_dentry;
4a4d8108 11198+ inode = dentry->d_inode;
e49829fe 11199+ au_mtx_and_read_lock(inode);
1facf9fc 11200+
4a4d8108
AM
11201+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11202+ if (unlikely(err))
11203+ goto out;
1facf9fc 11204+
4a4d8108
AM
11205+ err = au_ready_to_write(file, -1, &pin);
11206+ di_downgrade_lock(dentry, AuLock_IR);
11207+ if (unlikely(err))
11208+ goto out_unlock;
1facf9fc 11209+
4a4d8108
AM
11210+ h_file = au_hf_top(file);
11211+ au_unpin(&pin);
11212+ err = vfsub_write_u(h_file, buf, count, ppos);
11213+ au_cpup_attr_timesizes(inode);
11214+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
1facf9fc 11215+
4f0767ce 11216+out_unlock:
4a4d8108
AM
11217+ di_read_unlock(dentry, AuLock_IR);
11218+ fi_write_unlock(file);
4f0767ce 11219+out:
e49829fe 11220+ si_read_unlock(inode->i_sb);
4a4d8108 11221+ mutex_unlock(&inode->i_mutex);
dece6358
AM
11222+ return err;
11223+}
1facf9fc 11224+
4a4d8108
AM
11225+static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio,
11226+ const struct iovec *iov, unsigned long nv, loff_t pos)
dece6358 11227+{
4a4d8108
AM
11228+ ssize_t err;
11229+ struct file *file;
11230+ ssize_t (*func)(struct kiocb *, const struct iovec *, unsigned long,
11231+ loff_t);
1facf9fc 11232+
4a4d8108
AM
11233+ err = security_file_permission(h_file, rw);
11234+ if (unlikely(err))
11235+ goto out;
1facf9fc 11236+
4a4d8108
AM
11237+ err = -ENOSYS;
11238+ func = NULL;
11239+ if (rw == MAY_READ)
11240+ func = h_file->f_op->aio_read;
11241+ else if (rw == MAY_WRITE)
11242+ func = h_file->f_op->aio_write;
11243+ if (func) {
11244+ file = kio->ki_filp;
11245+ kio->ki_filp = h_file;
2cbb1c4b 11246+ lockdep_off();
4a4d8108 11247+ err = func(kio, iov, nv, pos);
2cbb1c4b 11248+ lockdep_on();
4a4d8108
AM
11249+ kio->ki_filp = file;
11250+ } else
11251+ /* currently there is no such fs */
11252+ WARN_ON_ONCE(1);
1facf9fc 11253+
4f0767ce 11254+out:
dece6358
AM
11255+ return err;
11256+}
1facf9fc 11257+
4a4d8108
AM
11258+static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
11259+ unsigned long nv, loff_t pos)
1facf9fc 11260+{
4a4d8108
AM
11261+ ssize_t err;
11262+ struct file *file, *h_file;
11263+ struct dentry *dentry;
dece6358 11264+ struct super_block *sb;
1facf9fc 11265+
4a4d8108 11266+ file = kio->ki_filp;
dece6358 11267+ dentry = file->f_dentry;
1308ab2a 11268+ sb = dentry->d_sb;
e49829fe 11269+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11270+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11271+ if (unlikely(err))
11272+ goto out;
11273+
11274+ h_file = au_hf_top(file);
11275+ err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos);
11276+ /* todo: necessary? */
11277+ /* file->f_ra = h_file->f_ra; */
11278+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
11279+ di_read_unlock(dentry, AuLock_IR);
11280+ fi_read_unlock(file);
1facf9fc 11281+
4f0767ce 11282+out:
4a4d8108 11283+ si_read_unlock(sb);
1308ab2a 11284+ return err;
11285+}
1facf9fc 11286+
4a4d8108
AM
11287+static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
11288+ unsigned long nv, loff_t pos)
1308ab2a 11289+{
4a4d8108
AM
11290+ ssize_t err;
11291+ struct au_pin pin;
11292+ struct dentry *dentry;
11293+ struct inode *inode;
4a4d8108 11294+ struct file *file, *h_file;
1308ab2a 11295+
4a4d8108 11296+ file = kio->ki_filp;
1308ab2a 11297+ dentry = file->f_dentry;
1308ab2a 11298+ inode = dentry->d_inode;
e49829fe
JR
11299+ au_mtx_and_read_lock(inode);
11300+
4a4d8108
AM
11301+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11302+ if (unlikely(err))
1308ab2a 11303+ goto out;
1facf9fc 11304+
4a4d8108
AM
11305+ err = au_ready_to_write(file, -1, &pin);
11306+ di_downgrade_lock(dentry, AuLock_IR);
dece6358 11307+ if (unlikely(err))
4a4d8108 11308+ goto out_unlock;
1facf9fc 11309+
4a4d8108
AM
11310+ au_unpin(&pin);
11311+ h_file = au_hf_top(file);
11312+ err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos);
11313+ au_cpup_attr_timesizes(inode);
11314+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
1facf9fc 11315+
4f0767ce 11316+out_unlock:
4a4d8108
AM
11317+ di_read_unlock(dentry, AuLock_IR);
11318+ fi_write_unlock(file);
4f0767ce 11319+out:
e49829fe 11320+ si_read_unlock(inode->i_sb);
4a4d8108 11321+ mutex_unlock(&inode->i_mutex);
dece6358 11322+ return err;
1facf9fc 11323+}
11324+
4a4d8108
AM
11325+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
11326+ struct pipe_inode_info *pipe, size_t len,
11327+ unsigned int flags)
1facf9fc 11328+{
4a4d8108
AM
11329+ ssize_t err;
11330+ struct file *h_file;
11331+ struct dentry *dentry;
dece6358 11332+ struct super_block *sb;
1facf9fc 11333+
dece6358 11334+ dentry = file->f_dentry;
dece6358 11335+ sb = dentry->d_sb;
e49829fe 11336+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11337+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11338+ if (unlikely(err))
dece6358 11339+ goto out;
1facf9fc 11340+
4a4d8108
AM
11341+ err = -EINVAL;
11342+ h_file = au_hf_top(file);
11343+ if (au_test_loopback_kthread()) {
87a755f4
AM
11344+ au_warn_loopback(h_file->f_dentry->d_sb);
11345+ if (file->f_mapping != h_file->f_mapping) {
11346+ file->f_mapping = h_file->f_mapping;
11347+ smp_mb(); /* unnecessary? */
11348+ }
1308ab2a 11349+ }
4a4d8108
AM
11350+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
11351+ /* todo: necessasry? */
11352+ /* file->f_ra = h_file->f_ra; */
11353+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
1facf9fc 11354+
4a4d8108
AM
11355+ di_read_unlock(dentry, AuLock_IR);
11356+ fi_read_unlock(file);
1facf9fc 11357+
4f0767ce 11358+out:
4a4d8108 11359+ si_read_unlock(sb);
dece6358 11360+ return err;
1facf9fc 11361+}
11362+
4a4d8108
AM
11363+static ssize_t
11364+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
11365+ size_t len, unsigned int flags)
1facf9fc 11366+{
4a4d8108
AM
11367+ ssize_t err;
11368+ struct au_pin pin;
11369+ struct dentry *dentry;
11370+ struct inode *inode;
4a4d8108 11371+ struct file *h_file;
1facf9fc 11372+
4a4d8108
AM
11373+ dentry = file->f_dentry;
11374+ inode = dentry->d_inode;
e49829fe 11375+ au_mtx_and_read_lock(inode);
4a4d8108
AM
11376+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11377+ if (unlikely(err))
11378+ goto out;
1facf9fc 11379+
4a4d8108
AM
11380+ err = au_ready_to_write(file, -1, &pin);
11381+ di_downgrade_lock(dentry, AuLock_IR);
11382+ if (unlikely(err))
11383+ goto out_unlock;
1facf9fc 11384+
4a4d8108
AM
11385+ h_file = au_hf_top(file);
11386+ au_unpin(&pin);
11387+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
11388+ au_cpup_attr_timesizes(inode);
11389+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
1facf9fc 11390+
4f0767ce 11391+out_unlock:
4a4d8108
AM
11392+ di_read_unlock(dentry, AuLock_IR);
11393+ fi_write_unlock(file);
4f0767ce 11394+out:
e49829fe 11395+ si_read_unlock(inode->i_sb);
4a4d8108
AM
11396+ mutex_unlock(&inode->i_mutex);
11397+ return err;
11398+}
1facf9fc 11399+
4a4d8108
AM
11400+/* ---------------------------------------------------------------------- */
11401+
4a4d8108
AM
11402+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
11403+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
1308ab2a 11404+
4a4d8108 11405+static unsigned long au_arch_prot_conv(unsigned long flags)
dece6358 11406+{
4a4d8108
AM
11407+ /* currently ppc64 only */
11408+#ifdef CONFIG_PPC64
11409+ /* cf. linux/arch/powerpc/include/asm/mman.h */
11410+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
11411+ return AuConv_VM_PROT(flags, SAO);
11412+#else
11413+ AuDebugOn(arch_calc_vm_prot_bits(-1));
11414+ return 0;
11415+#endif
dece6358
AM
11416+}
11417+
4a4d8108 11418+static unsigned long au_prot_conv(unsigned long flags)
dece6358 11419+{
4a4d8108
AM
11420+ return AuConv_VM_PROT(flags, READ)
11421+ | AuConv_VM_PROT(flags, WRITE)
11422+ | AuConv_VM_PROT(flags, EXEC)
11423+ | au_arch_prot_conv(flags);
dece6358
AM
11424+}
11425+
4a4d8108
AM
11426+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
11427+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
dece6358 11428+
4a4d8108 11429+static unsigned long au_flag_conv(unsigned long flags)
dece6358 11430+{
4a4d8108
AM
11431+ return AuConv_VM_MAP(flags, GROWSDOWN)
11432+ | AuConv_VM_MAP(flags, DENYWRITE)
11433+ | AuConv_VM_MAP(flags, EXECUTABLE)
11434+ | AuConv_VM_MAP(flags, LOCKED);
dece6358 11435+}
1308ab2a 11436+/*
4a4d8108
AM
11437+ * This is another ugly approach to keep the lock order, particularly
11438+ * mm->mmap_sem and aufs rwsem. The previous approach was reverted and you can
11439+ * find it in git-log, if you want.
1308ab2a 11440+ *
4a4d8108
AM
11441+ * native readdir: i_mutex, copy_to_user, mmap_sem
11442+ * aufs readdir: i_mutex, rwsem, nested-i_mutex, copy_to_user, mmap_sem
1308ab2a 11443+ *
4a4d8108
AM
11444+ * Before aufs_mmap() mmap_sem is acquired already, but aufs_mmap() has to
11445+ * acquire aufs rwsem. It introduces a circular locking dependency.
11446+ * To address this problem, aufs_mmap() delegates the part which requires aufs
11447+ * rwsem to its internal workqueue.
1e00d052
AM
11448+ * But it is just a fake. A deadlock MAY happen between write() and mmap() for
11449+ * the same file in a multi-threaded application.
1308ab2a 11450+ */
11451+
4a4d8108
AM
11452+struct au_mmap_pre_args {
11453+ /* input */
11454+ struct file *file;
11455+ struct vm_area_struct *vma;
1308ab2a 11456+
4a4d8108
AM
11457+ /* output */
11458+ int *errp;
11459+ struct file *h_file;
11460+ struct au_branch *br;
4a4d8108 11461+};
dece6358 11462+
4a4d8108 11463+static int au_mmap_pre(struct file *file, struct vm_area_struct *vma,
2cbb1c4b 11464+ struct file **h_file, struct au_branch **br)
dece6358 11465+{
4a4d8108
AM
11466+ int err;
11467+ aufs_bindex_t bstart;
11468+ const unsigned char wlock
11469+ = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
11470+ struct dentry *dentry;
11471+ struct super_block *sb;
1308ab2a 11472+
4a4d8108
AM
11473+ dentry = file->f_dentry;
11474+ sb = dentry->d_sb;
e49829fe 11475+ si_read_lock(sb, AuLock_NOPLMW);
4a4d8108
AM
11476+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11477+ if (unlikely(err))
11478+ goto out;
11479+
4a4d8108
AM
11480+ if (wlock) {
11481+ struct au_pin pin;
11482+
11483+ err = au_ready_to_write(file, -1, &pin);
11484+ di_write_unlock(dentry);
11485+ if (unlikely(err))
11486+ goto out_unlock;
11487+ au_unpin(&pin);
11488+ } else
11489+ di_write_unlock(dentry);
11490+ bstart = au_fbstart(file);
11491+ *br = au_sbr(sb, bstart);
11492+ *h_file = au_hf_top(file);
11493+ get_file(*h_file);
2cbb1c4b 11494+ au_set_mmapped(file);
4a4d8108
AM
11495+
11496+out_unlock:
11497+ fi_write_unlock(file);
11498+out:
11499+ si_read_unlock(sb);
11500+ return err;
dece6358
AM
11501+}
11502+
4a4d8108 11503+static void au_call_mmap_pre(void *args)
dece6358 11504+{
4a4d8108 11505+ struct au_mmap_pre_args *a = args;
2cbb1c4b 11506+ *a->errp = au_mmap_pre(a->file, a->vma, &a->h_file, &a->br);
dece6358
AM
11507+}
11508+
4a4d8108 11509+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
dece6358 11510+{
4a4d8108 11511+ int err, wkq_err;
2cbb1c4b 11512+ unsigned long prot;
4a4d8108
AM
11513+ struct au_mmap_pre_args args = {
11514+ .file = file,
11515+ .vma = vma,
11516+ .errp = &err
11517+ };
11518+
2cbb1c4b 11519+ AuDbgVmRegion(file, vma);
b752ccd1 11520+ wkq_err = au_wkq_wait_pre(au_call_mmap_pre, &args);
4a4d8108
AM
11521+ if (unlikely(wkq_err))
11522+ err = wkq_err;
11523+ if (unlikely(err))
11524+ goto out;
1308ab2a 11525+
2cbb1c4b
JR
11526+ au_vm_file_reset(vma, args.h_file);
11527+ prot = au_prot_conv(vma->vm_flags);
11528+ err = security_file_mmap(args.h_file, /*reqprot*/prot, prot,
11529+ au_flag_conv(vma->vm_flags), vma->vm_start, 0);
4a4d8108 11530+ if (unlikely(err))
2cbb1c4b 11531+ goto out_reset;
4a4d8108 11532+
2cbb1c4b
JR
11533+ err = args.h_file->f_op->mmap(args.h_file, vma);
11534+ if (unlikely(err))
11535+ goto out_reset;
4a4d8108 11536+
2cbb1c4b 11537+ au_vm_prfile_set(vma, file);
4a4d8108
AM
11538+ vfsub_file_accessed(args.h_file);
11539+ /* update without lock, I don't think it a problem */
2cbb1c4b
JR
11540+ fsstack_copy_attr_atime(file->f_dentry->d_inode,
11541+ args.h_file->f_dentry->d_inode);
11542+ goto out_fput; /* success */
4a4d8108 11543+
2cbb1c4b
JR
11544+out_reset:
11545+ au_unset_mmapped(file);
11546+ au_vm_file_reset(vma, file);
11547+out_fput:
4a4d8108 11548+ fput(args.h_file);
4f0767ce 11549+out:
4a4d8108
AM
11550+ return err;
11551+}
11552+
11553+/* ---------------------------------------------------------------------- */
11554+
1e00d052
AM
11555+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
11556+ int datasync)
4a4d8108
AM
11557+{
11558+ int err;
11559+ struct au_pin pin;
b752ccd1 11560+ struct dentry *dentry;
4a4d8108
AM
11561+ struct inode *inode;
11562+ struct file *h_file;
11563+ struct super_block *sb;
11564+
b752ccd1 11565+ dentry = file->f_dentry;
4a4d8108 11566+ inode = dentry->d_inode;
4a4d8108 11567+ sb = dentry->d_sb;
1e00d052 11568+ mutex_lock(&inode->i_mutex);
e49829fe
JR
11569+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
11570+ if (unlikely(err))
11571+ goto out;
4a4d8108
AM
11572+
11573+ err = 0; /* -EBADF; */ /* posix? */
11574+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
e49829fe 11575+ goto out_si;
4a4d8108
AM
11576+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11577+ if (unlikely(err))
e49829fe 11578+ goto out_si;
4a4d8108
AM
11579+
11580+ err = au_ready_to_write(file, -1, &pin);
11581+ di_downgrade_lock(dentry, AuLock_IR);
11582+ if (unlikely(err))
11583+ goto out_unlock;
11584+ au_unpin(&pin);
11585+
11586+ err = -EINVAL;
11587+ h_file = au_hf_top(file);
53392da6
AM
11588+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
11589+ au_cpup_attr_timesizes(inode);
4a4d8108 11590+
4f0767ce 11591+out_unlock:
4a4d8108 11592+ di_read_unlock(dentry, AuLock_IR);
1308ab2a 11593+ fi_write_unlock(file);
e49829fe 11594+out_si:
953406b4 11595+ si_read_unlock(sb);
e49829fe 11596+out:
1e00d052 11597+ mutex_unlock(&inode->i_mutex);
4a4d8108 11598+ return err;
dece6358
AM
11599+}
11600+
4a4d8108
AM
11601+/* no one supports this operation, currently */
11602+#if 0
11603+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
dece6358 11604+{
4a4d8108
AM
11605+ int err;
11606+ struct au_pin pin;
1308ab2a 11607+ struct dentry *dentry;
4a4d8108
AM
11608+ struct inode *inode;
11609+ struct file *file, *h_file;
1308ab2a 11610+
4a4d8108 11611+ file = kio->ki_filp;
1308ab2a 11612+ dentry = file->f_dentry;
4a4d8108 11613+ inode = dentry->d_inode;
e49829fe 11614+ au_mtx_and_read_lock(inode);
4a4d8108
AM
11615+
11616+ err = 0; /* -EBADF; */ /* posix? */
11617+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
11618+ goto out;
11619+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11620+ if (unlikely(err))
1308ab2a 11621+ goto out;
11622+
4a4d8108
AM
11623+ err = au_ready_to_write(file, -1, &pin);
11624+ di_downgrade_lock(dentry, AuLock_IR);
11625+ if (unlikely(err))
11626+ goto out_unlock;
11627+ au_unpin(&pin);
1308ab2a 11628+
4a4d8108
AM
11629+ err = -ENOSYS;
11630+ h_file = au_hf_top(file);
11631+ if (h_file->f_op && h_file->f_op->aio_fsync) {
11632+ struct dentry *h_d;
11633+ struct mutex *h_mtx;
1308ab2a 11634+
4a4d8108
AM
11635+ h_d = h_file->f_dentry;
11636+ h_mtx = &h_d->d_inode->i_mutex;
11637+ if (!is_sync_kiocb(kio)) {
11638+ get_file(h_file);
11639+ fput(file);
11640+ }
11641+ kio->ki_filp = h_file;
11642+ err = h_file->f_op->aio_fsync(kio, datasync);
11643+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
11644+ if (!err)
11645+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
11646+ /*ignore*/
11647+ au_cpup_attr_timesizes(inode);
11648+ mutex_unlock(h_mtx);
11649+ }
1308ab2a 11650+
4f0767ce 11651+out_unlock:
4a4d8108
AM
11652+ di_read_unlock(dentry, AuLock_IR);
11653+ fi_write_unlock(file);
4f0767ce 11654+out:
e49829fe 11655+ si_read_unlock(inode->sb);
4a4d8108
AM
11656+ mutex_unlock(&inode->i_mutex);
11657+ return err;
dece6358 11658+}
4a4d8108 11659+#endif
dece6358 11660+
4a4d8108 11661+static int aufs_fasync(int fd, struct file *file, int flag)
dece6358 11662+{
4a4d8108
AM
11663+ int err;
11664+ struct file *h_file;
11665+ struct dentry *dentry;
11666+ struct super_block *sb;
1308ab2a 11667+
4a4d8108
AM
11668+ dentry = file->f_dentry;
11669+ sb = dentry->d_sb;
e49829fe 11670+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11671+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11672+ if (unlikely(err))
11673+ goto out;
11674+
11675+ h_file = au_hf_top(file);
11676+ if (h_file->f_op && h_file->f_op->fasync)
11677+ err = h_file->f_op->fasync(fd, h_file, flag);
11678+
11679+ di_read_unlock(dentry, AuLock_IR);
11680+ fi_read_unlock(file);
1308ab2a 11681+
4f0767ce 11682+out:
4a4d8108 11683+ si_read_unlock(sb);
1308ab2a 11684+ return err;
dece6358 11685+}
4a4d8108
AM
11686+
11687+/* ---------------------------------------------------------------------- */
11688+
11689+/* no one supports this operation, currently */
11690+#if 0
11691+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
11692+ size_t len, loff_t *pos , int more)
11693+{
11694+}
11695+#endif
11696+
11697+/* ---------------------------------------------------------------------- */
11698+
11699+const struct file_operations aufs_file_fop = {
11700+ .owner = THIS_MODULE,
2cbb1c4b 11701+
027c5e7a 11702+ .llseek = default_llseek,
4a4d8108
AM
11703+
11704+ .read = aufs_read,
11705+ .write = aufs_write,
11706+ .aio_read = aufs_aio_read,
11707+ .aio_write = aufs_aio_write,
11708+#ifdef CONFIG_AUFS_POLL
11709+ .poll = aufs_poll,
11710+#endif
11711+ .unlocked_ioctl = aufs_ioctl_nondir,
b752ccd1
AM
11712+#ifdef CONFIG_COMPAT
11713+ .compat_ioctl = aufs_ioctl_nondir, /* same */
11714+#endif
4a4d8108
AM
11715+ .mmap = aufs_mmap,
11716+ .open = aufs_open_nondir,
11717+ .flush = aufs_flush_nondir,
11718+ .release = aufs_release_nondir,
11719+ .fsync = aufs_fsync_nondir,
11720+ /* .aio_fsync = aufs_aio_fsync_nondir, */
11721+ .fasync = aufs_fasync,
11722+ /* .sendpage = aufs_sendpage, */
11723+ .splice_write = aufs_splice_write,
11724+ .splice_read = aufs_splice_read,
11725+#if 0
11726+ .aio_splice_write = aufs_aio_splice_write,
11727+ .aio_splice_read = aufs_aio_splice_read
11728+#endif
11729+};
7f207e10
AM
11730diff -urN /usr/share/empty/fs/aufs/f_op_sp.c linux/fs/aufs/f_op_sp.c
11731--- /usr/share/empty/fs/aufs/f_op_sp.c 1970-01-01 01:00:00.000000000 +0100
53392da6 11732+++ linux/fs/aufs/f_op_sp.c 2011-08-24 13:30:24.731313534 +0200
e49829fe 11733@@ -0,0 +1,299 @@
1308ab2a 11734+/*
027c5e7a 11735+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1308ab2a 11736+ *
11737+ * This program, aufs is free software; you can redistribute it and/or modify
11738+ * it under the terms of the GNU General Public License as published by
11739+ * the Free Software Foundation; either version 2 of the License, or
11740+ * (at your option) any later version.
11741+ *
11742+ * This program is distributed in the hope that it will be useful,
11743+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11744+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11745+ * GNU General Public License for more details.
11746+ *
11747+ * You should have received a copy of the GNU General Public License
11748+ * along with this program; if not, write to the Free Software
11749+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11750+ */
dece6358 11751+
1308ab2a 11752+/*
4a4d8108
AM
11753+ * file operations for special files.
11754+ * while they exist in aufs virtually,
11755+ * their file I/O is handled out of aufs.
1308ab2a 11756+ */
11757+
4a4d8108
AM
11758+#include <linux/fs_stack.h>
11759+#include "aufs.h"
1308ab2a 11760+
4a4d8108
AM
11761+static ssize_t aufs_aio_read_sp(struct kiocb *kio, const struct iovec *iov,
11762+ unsigned long nv, loff_t pos)
dece6358 11763+{
4a4d8108
AM
11764+ ssize_t err;
11765+ aufs_bindex_t bstart;
11766+ unsigned char wbr;
11767+ struct file *file, *h_file;
11768+ struct super_block *sb;
1308ab2a 11769+
4a4d8108
AM
11770+ file = kio->ki_filp;
11771+ sb = file->f_dentry->d_sb;
11772+ si_read_lock(sb, AuLock_FLUSH);
11773+ fi_read_lock(file);
11774+ bstart = au_fbstart(file);
11775+ h_file = au_hf_top(file);
11776+ fi_read_unlock(file);
11777+ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
11778+ si_read_unlock(sb);
11779+
11780+ /* do not change the file in kio */
11781+ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_read);
11782+ err = h_file->f_op->aio_read(kio, iov, nv, pos);
11783+ if (err > 0 && wbr)
11784+ file_accessed(h_file);
11785+
11786+ return err;
11787+}
11788+
11789+static ssize_t aufs_aio_write_sp(struct kiocb *kio, const struct iovec *iov,
11790+ unsigned long nv, loff_t pos)
11791+{
11792+ ssize_t err;
11793+ aufs_bindex_t bstart;
11794+ unsigned char wbr;
11795+ struct super_block *sb;
11796+ struct file *file, *h_file;
11797+
11798+ file = kio->ki_filp;
11799+ sb = file->f_dentry->d_sb;
11800+ si_read_lock(sb, AuLock_FLUSH);
11801+ fi_read_lock(file);
11802+ bstart = au_fbstart(file);
11803+ h_file = au_hf_top(file);
11804+ fi_read_unlock(file);
11805+ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
11806+ si_read_unlock(sb);
11807+
11808+ /* do not change the file in kio */
11809+ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_write);
11810+ err = h_file->f_op->aio_write(kio, iov, nv, pos);
11811+ if (err > 0 && wbr)
11812+ file_update_time(h_file);
11813+
11814+ return err;
11815+}
11816+
11817+/* ---------------------------------------------------------------------- */
11818+
11819+static int aufs_release_sp(struct inode *inode, struct file *file)
11820+{
11821+ int err;
11822+ struct file *h_file;
11823+
11824+ fi_read_lock(file);
11825+ h_file = au_hf_top(file);
11826+ fi_read_unlock(file);
11827+ /* close this fifo in aufs */
11828+ err = h_file->f_op->release(inode, file); /* ignore */
11829+ aufs_release_nondir(inode, file); /* ignore */
11830+ return err;
11831+}
11832+
11833+/* ---------------------------------------------------------------------- */
11834+
11835+/* currently, support only FIFO */
4f0767ce
JR
11836+enum {
11837+ AuSp_FIFO, AuSp_FIFO_R, AuSp_FIFO_W, AuSp_FIFO_RW,
11838+ /* AuSp_SOCK, AuSp_CHR, AuSp_BLK, */
11839+ AuSp_Last
11840+};
4a4d8108
AM
11841+static int aufs_open_sp(struct inode *inode, struct file *file);
11842+static struct au_sp_fop {
11843+ int done;
11844+ struct file_operations fop; /* not 'const' */
11845+ spinlock_t spin;
11846+} au_sp_fop[AuSp_Last] = {
11847+ [AuSp_FIFO] = {
11848+ .fop = {
11849+ .owner = THIS_MODULE,
11850+ .open = aufs_open_sp
11851+ }
11852+ }
11853+};
11854+
11855+static void au_init_fop_sp(struct file *file)
11856+{
11857+ struct au_sp_fop *p;
11858+ int i;
11859+ struct file *h_file;
11860+
11861+ p = au_sp_fop;
11862+ if (unlikely(!p->done)) {
11863+ /* initialize first time only */
11864+ static DEFINE_SPINLOCK(spin);
11865+
11866+ spin_lock(&spin);
11867+ if (!p->done) {
11868+ BUILD_BUG_ON(sizeof(au_sp_fop)/sizeof(*au_sp_fop)
11869+ != AuSp_Last);
11870+ for (i = 0; i < AuSp_Last; i++)
11871+ spin_lock_init(&p[i].spin);
11872+ p->done = 1;
11873+ }
11874+ spin_unlock(&spin);
11875+ }
11876+
11877+ switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
11878+ case FMODE_READ:
11879+ i = AuSp_FIFO_R;
11880+ break;
11881+ case FMODE_WRITE:
11882+ i = AuSp_FIFO_W;
11883+ break;
11884+ case FMODE_READ | FMODE_WRITE:
11885+ i = AuSp_FIFO_RW;
11886+ break;
11887+ default:
11888+ BUG();
11889+ }
11890+
11891+ p += i;
11892+ if (unlikely(!p->done)) {
11893+ /* initialize first time only */
11894+ h_file = au_hf_top(file);
11895+ spin_lock(&p->spin);
11896+ if (!p->done) {
11897+ p->fop = *h_file->f_op;
11898+ p->fop.owner = THIS_MODULE;
11899+ if (p->fop.aio_read)
11900+ p->fop.aio_read = aufs_aio_read_sp;
11901+ if (p->fop.aio_write)
11902+ p->fop.aio_write = aufs_aio_write_sp;
11903+ p->fop.release = aufs_release_sp;
11904+ p->done = 1;
11905+ }
11906+ spin_unlock(&p->spin);
11907+ }
11908+ file->f_op = &p->fop;
11909+}
11910+
11911+static int au_cpup_sp(struct dentry *dentry)
11912+{
11913+ int err;
11914+ aufs_bindex_t bcpup;
11915+ struct au_pin pin;
11916+ struct au_wr_dir_args wr_dir_args = {
11917+ .force_btgt = -1,
11918+ .flags = 0
11919+ };
11920+
11921+ AuDbg("%.*s\n", AuDLNPair(dentry));
11922+
11923+ di_read_unlock(dentry, AuLock_IR);
11924+ di_write_lock_child(dentry);
11925+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
11926+ if (unlikely(err < 0))
11927+ goto out;
11928+ bcpup = err;
11929+ err = 0;
11930+ if (bcpup == au_dbstart(dentry))
11931+ goto out; /* success */
11932+
11933+ err = au_pin(&pin, dentry, bcpup, au_opt_udba(dentry->d_sb),
11934+ AuPin_MNT_WRITE);
11935+ if (!err) {
11936+ err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME);
11937+ au_unpin(&pin);
11938+ }
11939+
4f0767ce 11940+out:
4a4d8108
AM
11941+ di_downgrade_lock(dentry, AuLock_IR);
11942+ return err;
11943+}
11944+
11945+static int au_do_open_sp(struct file *file, int flags)
11946+{
11947+ int err;
11948+ struct dentry *dentry;
11949+ struct super_block *sb;
11950+ struct file *h_file;
11951+ struct inode *h_inode;
11952+
11953+ dentry = file->f_dentry;
11954+ AuDbg("%.*s\n", AuDLNPair(dentry));
11955+
11956+ /*
11957+ * try copying-up.
11958+ * operate on the ro branch is not an error.
11959+ */
11960+ au_cpup_sp(dentry); /* ignore */
11961+
11962+ /* prepare h_file */
11963+ err = au_do_open_nondir(file, vfsub_file_flags(file));
11964+ if (unlikely(err))
11965+ goto out;
11966+
11967+ sb = dentry->d_sb;
11968+ h_file = au_hf_top(file);
11969+ h_inode = h_file->f_dentry->d_inode;
11970+ di_read_unlock(dentry, AuLock_IR);
11971+ fi_write_unlock(file);
11972+ si_read_unlock(sb);
11973+ /* open this fifo in aufs */
11974+ err = h_inode->i_fop->open(file->f_dentry->d_inode, file);
11975+ si_noflush_read_lock(sb);
11976+ fi_write_lock(file);
11977+ di_read_lock_child(dentry, AuLock_IR);
11978+ if (!err)
11979+ au_init_fop_sp(file);
4a4d8108 11980+
4f0767ce 11981+out:
4a4d8108
AM
11982+ return err;
11983+}
11984+
11985+static int aufs_open_sp(struct inode *inode, struct file *file)
11986+{
11987+ int err;
11988+ struct super_block *sb;
11989+
11990+ sb = file->f_dentry->d_sb;
11991+ si_read_lock(sb, AuLock_FLUSH);
11992+ err = au_do_open(file, au_do_open_sp, /*fidir*/NULL);
11993+ si_read_unlock(sb);
11994+ return err;
11995+}
11996+
11997+/* ---------------------------------------------------------------------- */
11998+
11999+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev)
12000+{
12001+ init_special_inode(inode, mode, rdev);
12002+
12003+ switch (mode & S_IFMT) {
12004+ case S_IFIFO:
12005+ inode->i_fop = &au_sp_fop[AuSp_FIFO].fop;
12006+ /*FALLTHROUGH*/
12007+ case S_IFCHR:
12008+ case S_IFBLK:
12009+ case S_IFSOCK:
12010+ break;
12011+ default:
12012+ AuDebugOn(1);
12013+ }
12014+}
12015+
12016+int au_special_file(umode_t mode)
12017+{
12018+ int ret;
12019+
12020+ ret = 0;
12021+ switch (mode & S_IFMT) {
12022+ case S_IFIFO:
12023+#if 0
12024+ case S_IFCHR:
12025+ case S_IFBLK:
12026+ case S_IFSOCK:
12027+#endif
12028+ ret = 1;
12029+ }
12030+
12031+ return ret;
12032+}
7f207e10
AM
12033diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
12034--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
53392da6 12035+++ linux/fs/aufs/fstype.h 2011-08-24 13:30:24.731313534 +0200
4a4d8108
AM
12036@@ -0,0 +1,497 @@
12037+/*
027c5e7a 12038+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
12039+ *
12040+ * This program, aufs is free software; you can redistribute it and/or modify
12041+ * it under the terms of the GNU General Public License as published by
12042+ * the Free Software Foundation; either version 2 of the License, or
12043+ * (at your option) any later version.
12044+ *
12045+ * This program is distributed in the hope that it will be useful,
12046+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12047+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12048+ * GNU General Public License for more details.
12049+ *
12050+ * You should have received a copy of the GNU General Public License
12051+ * along with this program; if not, write to the Free Software
12052+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12053+ */
12054+
12055+/*
12056+ * judging filesystem type
12057+ */
12058+
12059+#ifndef __AUFS_FSTYPE_H__
12060+#define __AUFS_FSTYPE_H__
12061+
12062+#ifdef __KERNEL__
12063+
12064+#include <linux/fs.h>
12065+#include <linux/magic.h>
12066+#include <linux/romfs_fs.h>
12067+#include <linux/aufs_type.h>
12068+
12069+static inline int au_test_aufs(struct super_block *sb)
12070+{
12071+ return sb->s_magic == AUFS_SUPER_MAGIC;
12072+}
12073+
12074+static inline const char *au_sbtype(struct super_block *sb)
12075+{
12076+ return sb->s_type->name;
12077+}
1308ab2a 12078+
12079+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
12080+{
12081+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
12082+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
12083+#else
12084+ return 0;
12085+#endif
12086+}
12087+
1308ab2a 12088+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 12089+{
1308ab2a 12090+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
12091+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
12092+#else
12093+ return 0;
12094+#endif
12095+}
12096+
1308ab2a 12097+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 12098+{
1308ab2a 12099+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
12100+ return sb->s_magic == CRAMFS_MAGIC;
12101+#endif
12102+ return 0;
12103+}
12104+
12105+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
12106+{
12107+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
12108+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
12109+#else
12110+ return 0;
12111+#endif
12112+}
12113+
1308ab2a 12114+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 12115+{
1308ab2a 12116+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
12117+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
12118+#else
12119+ return 0;
12120+#endif
12121+}
12122+
1308ab2a 12123+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 12124+{
1308ab2a 12125+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
12126+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
12127+#else
12128+ return 0;
12129+#endif
12130+}
12131+
1308ab2a 12132+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 12133+{
1308ab2a 12134+#ifdef CONFIG_TMPFS
12135+ return sb->s_magic == TMPFS_MAGIC;
12136+#else
12137+ return 0;
dece6358 12138+#endif
dece6358
AM
12139+}
12140+
1308ab2a 12141+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 12142+{
1308ab2a 12143+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
12144+ return !strcmp(au_sbtype(sb), "ecryptfs");
12145+#else
12146+ return 0;
12147+#endif
1facf9fc 12148+}
12149+
1308ab2a 12150+static inline int au_test_smbfs(struct super_block *sb __maybe_unused)
1facf9fc 12151+{
1308ab2a 12152+#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE)
12153+ return sb->s_magic == SMB_SUPER_MAGIC;
12154+#else
12155+ return 0;
1facf9fc 12156+#endif
1facf9fc 12157+}
12158+
1308ab2a 12159+static inline int au_test_ocfs2(struct super_block *sb __maybe_unused)
1facf9fc 12160+{
1308ab2a 12161+#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE)
12162+ return sb->s_magic == OCFS2_SUPER_MAGIC;
12163+#else
12164+ return 0;
12165+#endif
1facf9fc 12166+}
12167+
1308ab2a 12168+static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused)
1facf9fc 12169+{
1308ab2a 12170+#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE)
12171+ return sb->s_magic == DLMFS_MAGIC;
12172+#else
12173+ return 0;
12174+#endif
1facf9fc 12175+}
12176+
1308ab2a 12177+static inline int au_test_coda(struct super_block *sb __maybe_unused)
1facf9fc 12178+{
1308ab2a 12179+#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE)
12180+ return sb->s_magic == CODA_SUPER_MAGIC;
12181+#else
12182+ return 0;
12183+#endif
12184+}
12185+
12186+static inline int au_test_v9fs(struct super_block *sb __maybe_unused)
12187+{
12188+#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE)
12189+ return sb->s_magic == V9FS_MAGIC;
12190+#else
12191+ return 0;
12192+#endif
12193+}
12194+
12195+static inline int au_test_ext4(struct super_block *sb __maybe_unused)
12196+{
12197+#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE)
12198+ return sb->s_magic == EXT4_SUPER_MAGIC;
12199+#else
12200+ return 0;
12201+#endif
12202+}
12203+
12204+static inline int au_test_sysv(struct super_block *sb __maybe_unused)
12205+{
12206+#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE)
12207+ return !strcmp(au_sbtype(sb), "sysv");
12208+#else
12209+ return 0;
12210+#endif
12211+}
12212+
12213+static inline int au_test_ramfs(struct super_block *sb)
12214+{
12215+ return sb->s_magic == RAMFS_MAGIC;
12216+}
12217+
12218+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
12219+{
12220+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
12221+ return sb->s_magic == UBIFS_SUPER_MAGIC;
12222+#else
12223+ return 0;
12224+#endif
12225+}
12226+
12227+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
12228+{
12229+#ifdef CONFIG_PROC_FS
12230+ return sb->s_magic == PROC_SUPER_MAGIC;
12231+#else
12232+ return 0;
12233+#endif
12234+}
12235+
12236+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
12237+{
12238+#ifdef CONFIG_SYSFS
12239+ return sb->s_magic == SYSFS_MAGIC;
12240+#else
12241+ return 0;
12242+#endif
12243+}
12244+
12245+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
12246+{
12247+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
12248+ return sb->s_magic == CONFIGFS_MAGIC;
12249+#else
12250+ return 0;
12251+#endif
12252+}
12253+
12254+static inline int au_test_minix(struct super_block *sb __maybe_unused)
12255+{
12256+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
12257+ return sb->s_magic == MINIX3_SUPER_MAGIC
12258+ || sb->s_magic == MINIX2_SUPER_MAGIC
12259+ || sb->s_magic == MINIX2_SUPER_MAGIC2
12260+ || sb->s_magic == MINIX_SUPER_MAGIC
12261+ || sb->s_magic == MINIX_SUPER_MAGIC2;
12262+#else
12263+ return 0;
12264+#endif
12265+}
12266+
12267+static inline int au_test_cifs(struct super_block *sb __maybe_unused)
12268+{
12269+#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE)
12270+ return sb->s_magic == CIFS_MAGIC_NUMBER;
12271+#else
12272+ return 0;
12273+#endif
12274+}
12275+
12276+static inline int au_test_fat(struct super_block *sb __maybe_unused)
12277+{
12278+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
12279+ return sb->s_magic == MSDOS_SUPER_MAGIC;
12280+#else
12281+ return 0;
12282+#endif
12283+}
12284+
12285+static inline int au_test_msdos(struct super_block *sb)
12286+{
12287+ return au_test_fat(sb);
12288+}
12289+
12290+static inline int au_test_vfat(struct super_block *sb)
12291+{
12292+ return au_test_fat(sb);
12293+}
12294+
12295+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
12296+{
12297+#ifdef CONFIG_SECURITYFS
12298+ return sb->s_magic == SECURITYFS_MAGIC;
12299+#else
12300+ return 0;
12301+#endif
12302+}
12303+
12304+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
12305+{
12306+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
12307+ return sb->s_magic == SQUASHFS_MAGIC;
12308+#else
12309+ return 0;
12310+#endif
12311+}
12312+
12313+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
12314+{
12315+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
12316+ return sb->s_magic == BTRFS_SUPER_MAGIC;
12317+#else
12318+ return 0;
12319+#endif
12320+}
12321+
12322+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
12323+{
12324+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
12325+ return sb->s_magic == XENFS_SUPER_MAGIC;
12326+#else
12327+ return 0;
12328+#endif
12329+}
12330+
12331+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
12332+{
12333+#ifdef CONFIG_DEBUG_FS
12334+ return sb->s_magic == DEBUGFS_MAGIC;
12335+#else
12336+ return 0;
12337+#endif
12338+}
12339+
12340+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
12341+{
12342+#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
12343+ return sb->s_magic == NILFS_SUPER_MAGIC;
12344+#else
12345+ return 0;
12346+#endif
12347+}
12348+
4a4d8108
AM
12349+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
12350+{
12351+#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE)
12352+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
12353+#else
12354+ return 0;
12355+#endif
12356+}
12357+
1308ab2a 12358+/* ---------------------------------------------------------------------- */
12359+/*
12360+ * they can't be an aufs branch.
12361+ */
12362+static inline int au_test_fs_unsuppoted(struct super_block *sb)
12363+{
12364+ return
12365+#ifndef CONFIG_AUFS_BR_RAMFS
12366+ au_test_ramfs(sb) ||
12367+#endif
12368+ au_test_procfs(sb)
12369+ || au_test_sysfs(sb)
12370+ || au_test_configfs(sb)
12371+ || au_test_debugfs(sb)
12372+ || au_test_securityfs(sb)
12373+ || au_test_xenfs(sb)
12374+ || au_test_ecryptfs(sb)
12375+ /* || !strcmp(au_sbtype(sb), "unionfs") */
12376+ || au_test_aufs(sb); /* will be supported in next version */
12377+}
12378+
12379+/*
12380+ * If the filesystem supports NFS-export, then it has to support NULL as
12381+ * a nameidata parameter for ->create(), ->lookup() and ->d_revalidate().
12382+ * We can apply this principle when we handle a lower filesystem.
12383+ */
12384+static inline int au_test_fs_null_nd(struct super_block *sb)
12385+{
12386+ return !!sb->s_export_op;
12387+}
12388+
12389+static inline int au_test_fs_remote(struct super_block *sb)
12390+{
12391+ return !au_test_tmpfs(sb)
12392+#ifdef CONFIG_AUFS_BR_RAMFS
12393+ && !au_test_ramfs(sb)
12394+#endif
12395+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
12396+}
12397+
12398+/* ---------------------------------------------------------------------- */
12399+
12400+/*
12401+ * Note: these functions (below) are created after reading ->getattr() in all
12402+ * filesystems under linux/fs. it means we have to do so in every update...
12403+ */
12404+
12405+/*
12406+ * some filesystems require getattr to refresh the inode attributes before
12407+ * referencing.
12408+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
12409+ * and leave the work for d_revalidate()
12410+ */
12411+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
12412+{
12413+ return au_test_nfs(sb)
12414+ || au_test_fuse(sb)
12415+ /* || au_test_smbfs(sb) */ /* untested */
12416+ /* || au_test_ocfs2(sb) */ /* untested */
12417+ /* || au_test_btrfs(sb) */ /* untested */
12418+ /* || au_test_coda(sb) */ /* untested */
12419+ /* || au_test_v9fs(sb) */ /* untested */
12420+ ;
12421+}
12422+
12423+/*
12424+ * filesystems which don't maintain i_size or i_blocks.
12425+ */
12426+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
12427+{
12428+ return au_test_xfs(sb)
4a4d8108
AM
12429+ || au_test_btrfs(sb)
12430+ || au_test_ubifs(sb)
12431+ || au_test_hfsplus(sb) /* maintained, but incorrect */
1308ab2a 12432+ /* || au_test_ext4(sb) */ /* untested */
12433+ /* || au_test_ocfs2(sb) */ /* untested */
12434+ /* || au_test_ocfs2_dlmfs(sb) */ /* untested */
12435+ /* || au_test_sysv(sb) */ /* untested */
1308ab2a 12436+ /* || au_test_minix(sb) */ /* untested */
12437+ ;
12438+}
12439+
12440+/*
12441+ * filesystems which don't store the correct value in some of their inode
12442+ * attributes.
12443+ */
12444+static inline int au_test_fs_bad_iattr(struct super_block *sb)
12445+{
12446+ return au_test_fs_bad_iattr_size(sb)
12447+ /* || au_test_cifs(sb) */ /* untested */
12448+ || au_test_fat(sb)
12449+ || au_test_msdos(sb)
12450+ || au_test_vfat(sb);
1facf9fc 12451+}
12452+
12453+/* they don't check i_nlink in link(2) */
12454+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
12455+{
12456+ return au_test_tmpfs(sb)
12457+#ifdef CONFIG_AUFS_BR_RAMFS
12458+ || au_test_ramfs(sb)
12459+#endif
4a4d8108
AM
12460+ || au_test_ubifs(sb)
12461+ || au_test_btrfs(sb)
12462+ || au_test_hfsplus(sb);
1facf9fc 12463+}
12464+
12465+/*
12466+ * filesystems which sets S_NOATIME and S_NOCMTIME.
12467+ */
12468+static inline int au_test_fs_notime(struct super_block *sb)
12469+{
12470+ return au_test_nfs(sb)
12471+ || au_test_fuse(sb)
dece6358 12472+ || au_test_ubifs(sb)
1facf9fc 12473+ /* || au_test_cifs(sb) */ /* untested */
1facf9fc 12474+ ;
12475+}
12476+
12477+/*
12478+ * filesystems which requires replacing i_mapping.
12479+ */
12480+static inline int au_test_fs_bad_mapping(struct super_block *sb)
12481+{
dece6358
AM
12482+ return au_test_fuse(sb)
12483+ || au_test_ubifs(sb);
1facf9fc 12484+}
12485+
12486+/* temporary support for i#1 in cramfs */
12487+static inline int au_test_fs_unique_ino(struct inode *inode)
12488+{
12489+ if (au_test_cramfs(inode->i_sb))
12490+ return inode->i_ino != 1;
12491+ return 1;
12492+}
12493+
12494+/* ---------------------------------------------------------------------- */
12495+
12496+/*
12497+ * the filesystem where the xino files placed must support i/o after unlink and
12498+ * maintain i_size and i_blocks.
12499+ */
12500+static inline int au_test_fs_bad_xino(struct super_block *sb)
12501+{
12502+ return au_test_fs_remote(sb)
12503+ || au_test_fs_bad_iattr_size(sb)
12504+#ifdef CONFIG_AUFS_BR_RAMFS
12505+ || !(au_test_ramfs(sb) || au_test_fs_null_nd(sb))
12506+#else
12507+ || !au_test_fs_null_nd(sb) /* to keep xino code simple */
12508+#endif
12509+ /* don't want unnecessary work for xino */
12510+ || au_test_aufs(sb)
1308ab2a 12511+ || au_test_ecryptfs(sb)
12512+ || au_test_nilfs(sb);
1facf9fc 12513+}
12514+
12515+static inline int au_test_fs_trunc_xino(struct super_block *sb)
12516+{
12517+ return au_test_tmpfs(sb)
12518+ || au_test_ramfs(sb);
12519+}
12520+
12521+/*
12522+ * test if the @sb is real-readonly.
12523+ */
12524+static inline int au_test_fs_rr(struct super_block *sb)
12525+{
12526+ return au_test_squashfs(sb)
12527+ || au_test_iso9660(sb)
12528+ || au_test_cramfs(sb)
12529+ || au_test_romfs(sb);
12530+}
12531+
12532+#endif /* __KERNEL__ */
12533+#endif /* __AUFS_FSTYPE_H__ */
7f207e10
AM
12534diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
12535--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
53392da6 12536+++ linux/fs/aufs/hfsnotify.c 2011-08-24 13:30:24.731313534 +0200
027c5e7a 12537@@ -0,0 +1,247 @@
1facf9fc 12538+/*
027c5e7a 12539+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 12540+ *
12541+ * This program, aufs is free software; you can redistribute it and/or modify
12542+ * it under the terms of the GNU General Public License as published by
12543+ * the Free Software Foundation; either version 2 of the License, or
12544+ * (at your option) any later version.
dece6358
AM
12545+ *
12546+ * This program is distributed in the hope that it will be useful,
12547+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12548+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12549+ * GNU General Public License for more details.
12550+ *
12551+ * You should have received a copy of the GNU General Public License
12552+ * along with this program; if not, write to the Free Software
12553+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 12554+ */
12555+
12556+/*
4a4d8108 12557+ * fsnotify for the lower directories
1facf9fc 12558+ */
12559+
12560+#include "aufs.h"
12561+
4a4d8108
AM
12562+/* FS_IN_IGNORED is unnecessary */
12563+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
12564+ | FS_CREATE | FS_EVENT_ON_CHILD);
7f207e10 12565+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
1facf9fc 12566+
0c5527e5 12567+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
1facf9fc 12568+{
0c5527e5
AM
12569+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
12570+ hn_mark);
4a4d8108 12571+ AuDbg("here\n");
7f207e10
AM
12572+ hn->hn_mark_dead = 1;
12573+ smp_mb();
12574+ wake_up_all(&au_hfsn_wq);
4a4d8108 12575+}
1facf9fc 12576+
027c5e7a 12577+static int au_hfsn_alloc(struct au_hinode *hinode)
4a4d8108 12578+{
027c5e7a
AM
12579+ struct au_hnotify *hn;
12580+ struct super_block *sb;
12581+ struct au_branch *br;
0c5527e5 12582+ struct fsnotify_mark *mark;
027c5e7a 12583+ aufs_bindex_t bindex;
1facf9fc 12584+
027c5e7a
AM
12585+ hn = hinode->hi_notify;
12586+ sb = hn->hn_aufs_inode->i_sb;
12587+ bindex = au_br_index(sb, hinode->hi_id);
12588+ br = au_sbr(sb, bindex);
7f207e10 12589+ hn->hn_mark_dead = 0;
0c5527e5
AM
12590+ mark = &hn->hn_mark;
12591+ fsnotify_init_mark(mark, au_hfsn_free_mark);
12592+ mark->mask = AuHfsnMask;
7f207e10
AM
12593+ /*
12594+ * by udba rename or rmdir, aufs assign a new inode to the known
12595+ * h_inode, so specify 1 to allow dups.
12596+ */
027c5e7a
AM
12597+ return fsnotify_add_mark(mark, br->br_hfsn_group, hinode->hi_inode,
12598+ /*mnt*/NULL, /*allow_dups*/1);
1facf9fc 12599+}
12600+
027c5e7a 12601+static void au_hfsn_free(struct au_hinode *hinode)
1facf9fc 12602+{
027c5e7a 12603+ struct au_hnotify *hn;
0c5527e5 12604+ struct fsnotify_mark *mark;
953406b4 12605+
027c5e7a 12606+ hn = hinode->hi_notify;
0c5527e5
AM
12607+ mark = &hn->hn_mark;
12608+ fsnotify_destroy_mark(mark);
12609+ fsnotify_put_mark(mark);
7f207e10
AM
12610+
12611+ /* TODO: bad approach */
12612+ wait_event(au_hfsn_wq, hn->hn_mark_dead);
1facf9fc 12613+}
12614+
12615+/* ---------------------------------------------------------------------- */
12616+
4a4d8108 12617+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
1facf9fc 12618+{
0c5527e5 12619+ struct fsnotify_mark *mark;
1facf9fc 12620+
0c5527e5
AM
12621+ mark = &hinode->hi_notify->hn_mark;
12622+ spin_lock(&mark->lock);
1facf9fc 12623+ if (do_set) {
0c5527e5
AM
12624+ AuDebugOn(mark->mask & AuHfsnMask);
12625+ mark->mask |= AuHfsnMask;
1facf9fc 12626+ } else {
0c5527e5
AM
12627+ AuDebugOn(!(mark->mask & AuHfsnMask));
12628+ mark->mask &= ~AuHfsnMask;
1facf9fc 12629+ }
0c5527e5 12630+ spin_unlock(&mark->lock);
4a4d8108 12631+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
1facf9fc 12632+}
12633+
4a4d8108 12634+/* ---------------------------------------------------------------------- */
1facf9fc 12635+
4a4d8108
AM
12636+/* #define AuDbgHnotify */
12637+#ifdef AuDbgHnotify
12638+static char *au_hfsn_name(u32 mask)
12639+{
12640+#ifdef CONFIG_AUFS_DEBUG
12641+#define test_ret(flag) if (mask & flag) \
12642+ return #flag;
12643+ test_ret(FS_ACCESS);
12644+ test_ret(FS_MODIFY);
12645+ test_ret(FS_ATTRIB);
12646+ test_ret(FS_CLOSE_WRITE);
12647+ test_ret(FS_CLOSE_NOWRITE);
12648+ test_ret(FS_OPEN);
12649+ test_ret(FS_MOVED_FROM);
12650+ test_ret(FS_MOVED_TO);
12651+ test_ret(FS_CREATE);
12652+ test_ret(FS_DELETE);
12653+ test_ret(FS_DELETE_SELF);
12654+ test_ret(FS_MOVE_SELF);
12655+ test_ret(FS_UNMOUNT);
12656+ test_ret(FS_Q_OVERFLOW);
12657+ test_ret(FS_IN_IGNORED);
12658+ test_ret(FS_IN_ISDIR);
12659+ test_ret(FS_IN_ONESHOT);
12660+ test_ret(FS_EVENT_ON_CHILD);
12661+ return "";
12662+#undef test_ret
12663+#else
12664+ return "??";
12665+#endif
1facf9fc 12666+}
4a4d8108 12667+#endif
1facf9fc 12668+
12669+/* ---------------------------------------------------------------------- */
12670+
4a4d8108 12671+static int au_hfsn_handle_event(struct fsnotify_group *group,
0c5527e5
AM
12672+ struct fsnotify_mark *inode_mark,
12673+ struct fsnotify_mark *vfsmount_mark,
4a4d8108 12674+ struct fsnotify_event *event)
1facf9fc 12675+{
12676+ int err;
4a4d8108
AM
12677+ struct au_hnotify *hnotify;
12678+ struct inode *h_dir, *h_inode;
12679+ __u32 mask;
4a4d8108
AM
12680+ struct qstr h_child_qstr = {
12681+ .name = event->file_name,
12682+ .len = event->name_len
12683+ };
12684+
12685+ AuDebugOn(event->data_type != FSNOTIFY_EVENT_INODE);
1facf9fc 12686+
12687+ err = 0;
0c5527e5 12688+ /* if FS_UNMOUNT happens, there must be another bug */
4a4d8108
AM
12689+ mask = event->mask;
12690+ AuDebugOn(mask & FS_UNMOUNT);
0c5527e5 12691+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
1facf9fc 12692+ goto out;
1facf9fc 12693+
4a4d8108
AM
12694+ h_dir = event->to_tell;
12695+ h_inode = event->inode;
12696+#ifdef AuDbgHnotify
12697+ au_debug(1);
12698+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
12699+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
12700+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
12701+ h_dir->i_ino, mask, au_hfsn_name(mask),
12702+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
12703+ /* WARN_ON(1); */
1facf9fc 12704+ }
4a4d8108 12705+ au_debug(0);
1facf9fc 12706+#endif
4a4d8108 12707+
0c5527e5
AM
12708+ AuDebugOn(!inode_mark);
12709+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
12710+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
1facf9fc 12711+
4a4d8108
AM
12712+out:
12713+ return err;
12714+}
1facf9fc 12715+
027c5e7a 12716+/* isn't it waste to ask every registered 'group'? */
7f207e10 12717+/* copied from linux/fs/notify/inotify/inotify_fsnotiry.c */
4a4d8108 12718+/* it should be exported to modules */
7f207e10
AM
12719+static bool au_hfsn_should_send_event(struct fsnotify_group *group,
12720+ struct inode *h_inode,
0c5527e5
AM
12721+ struct fsnotify_mark *inode_mark,
12722+ struct fsnotify_mark *vfsmount_mark,
12723+ __u32 mask, void *data, int data_type)
4a4d8108 12724+{
4a4d8108 12725+ mask = (mask & ~FS_EVENT_ON_CHILD);
7f207e10 12726+ return inode_mark->mask & mask;
4a4d8108
AM
12727+}
12728+
12729+static struct fsnotify_ops au_hfsn_ops = {
12730+ .should_send_event = au_hfsn_should_send_event,
12731+ .handle_event = au_hfsn_handle_event
12732+};
12733+
12734+/* ---------------------------------------------------------------------- */
12735+
027c5e7a
AM
12736+static void au_hfsn_fin_br(struct au_branch *br)
12737+{
12738+ if (br->br_hfsn_group)
12739+ fsnotify_put_group(br->br_hfsn_group);
12740+}
12741+
12742+static int au_hfsn_init_br(struct au_branch *br, int perm)
12743+{
12744+ br->br_hfsn_group = NULL;
12745+ br->br_hfsn_ops = au_hfsn_ops;
12746+ return 0;
12747+}
12748+
12749+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
4a4d8108
AM
12750+{
12751+ int err;
1facf9fc 12752+
4a4d8108 12753+ err = 0;
027c5e7a
AM
12754+ if (udba != AuOpt_UDBA_HNOTIFY
12755+ || !au_br_hnotifyable(perm)) {
12756+ au_hfsn_fin_br(br);
12757+ br->br_hfsn_group = NULL;
12758+ goto out;
12759+ }
12760+
12761+ if (br->br_hfsn_group)
12762+ goto out;
12763+
12764+ br->br_hfsn_group = fsnotify_alloc_group(&br->br_hfsn_ops);
12765+ if (IS_ERR(br->br_hfsn_group)) {
12766+ err = PTR_ERR(br->br_hfsn_group);
0c5527e5 12767+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
027c5e7a 12768+ br->br_hfsn_group = NULL;
4a4d8108 12769+ }
1facf9fc 12770+
027c5e7a 12771+out:
1facf9fc 12772+ AuTraceErr(err);
12773+ return err;
12774+}
12775+
4a4d8108
AM
12776+const struct au_hnotify_op au_hnotify_op = {
12777+ .ctl = au_hfsn_ctl,
12778+ .alloc = au_hfsn_alloc,
12779+ .free = au_hfsn_free,
1facf9fc 12780+
027c5e7a
AM
12781+ .reset_br = au_hfsn_reset_br,
12782+ .fin_br = au_hfsn_fin_br,
12783+ .init_br = au_hfsn_init_br
4a4d8108 12784+};
7f207e10
AM
12785diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
12786--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
53392da6 12787+++ linux/fs/aufs/hfsplus.c 2011-08-24 13:30:24.731313534 +0200
4a4d8108
AM
12788@@ -0,0 +1,58 @@
12789+/*
027c5e7a 12790+ * Copyright (C) 2010-2011 Junjiro R. Okajima
4a4d8108
AM
12791+ *
12792+ * This program, aufs is free software; you can redistribute it and/or modify
12793+ * it under the terms of the GNU General Public License as published by
12794+ * the Free Software Foundation; either version 2 of the License, or
12795+ * (at your option) any later version.
12796+ *
12797+ * This program is distributed in the hope that it will be useful,
12798+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12799+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12800+ * GNU General Public License for more details.
12801+ *
12802+ * You should have received a copy of the GNU General Public License
12803+ * along with this program; if not, write to the Free Software
12804+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12805+ */
1facf9fc 12806+
4a4d8108
AM
12807+/*
12808+ * special support for filesystems which aqucires an inode mutex
12809+ * at final closing a file, eg, hfsplus.
12810+ *
12811+ * This trick is very simple and stupid, just to open the file before really
12812+ * neceeary open to tell hfsplus that this is not the final closing.
12813+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
12814+ * and au_h_open_post() after releasing it.
12815+ */
1facf9fc 12816+
4a4d8108
AM
12817+#include <linux/file.h>
12818+#include "aufs.h"
1facf9fc 12819+
4a4d8108
AM
12820+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
12821+{
12822+ struct file *h_file;
12823+ struct dentry *h_dentry;
1facf9fc 12824+
4a4d8108
AM
12825+ h_dentry = au_h_dptr(dentry, bindex);
12826+ AuDebugOn(!h_dentry);
12827+ AuDebugOn(!h_dentry->d_inode);
12828+ IMustLock(h_dentry->d_inode);
12829+
12830+ h_file = NULL;
12831+ if (au_test_hfsplus(h_dentry->d_sb)
12832+ && S_ISREG(h_dentry->d_inode->i_mode))
12833+ h_file = au_h_open(dentry, bindex,
12834+ O_RDONLY | O_NOATIME | O_LARGEFILE,
12835+ /*file*/NULL);
12836+ return h_file;
1facf9fc 12837+}
12838+
4a4d8108
AM
12839+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
12840+ struct file *h_file)
12841+{
12842+ if (h_file) {
12843+ fput(h_file);
12844+ au_sbr_put(dentry->d_sb, bindex);
12845+ }
12846+}
7f207e10
AM
12847diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
12848--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
12849+++ linux/fs/aufs/hnotify.c 2011-08-24 13:30:24.731313534 +0200
12850@@ -0,0 +1,712 @@
e49829fe 12851+/*
027c5e7a 12852+ * Copyright (C) 2005-2011 Junjiro R. Okajima
e49829fe
JR
12853+ *
12854+ * This program, aufs is free software; you can redistribute it and/or modify
12855+ * it under the terms of the GNU General Public License as published by
12856+ * the Free Software Foundation; either version 2 of the License, or
12857+ * (at your option) any later version.
12858+ *
12859+ * This program is distributed in the hope that it will be useful,
12860+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12861+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12862+ * GNU General Public License for more details.
12863+ *
12864+ * You should have received a copy of the GNU General Public License
12865+ * along with this program; if not, write to the Free Software
12866+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12867+ */
12868+
12869+/*
7f207e10 12870+ * abstraction to notify the direct changes on lower directories
e49829fe
JR
12871+ */
12872+
12873+#include "aufs.h"
12874+
027c5e7a 12875+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
e49829fe
JR
12876+{
12877+ int err;
7f207e10 12878+ struct au_hnotify *hn;
1facf9fc 12879+
4a4d8108
AM
12880+ err = -ENOMEM;
12881+ hn = au_cache_alloc_hnotify();
12882+ if (hn) {
12883+ hn->hn_aufs_inode = inode;
027c5e7a
AM
12884+ hinode->hi_notify = hn;
12885+ err = au_hnotify_op.alloc(hinode);
12886+ AuTraceErr(err);
12887+ if (unlikely(err)) {
12888+ hinode->hi_notify = NULL;
4a4d8108
AM
12889+ au_cache_free_hnotify(hn);
12890+ /*
12891+ * The upper dir was removed by udba, but the same named
12892+ * dir left. In this case, aufs assignes a new inode
12893+ * number and set the monitor again.
12894+ * For the lower dir, the old monitnor is still left.
12895+ */
12896+ if (err == -EEXIST)
12897+ err = 0;
12898+ }
1308ab2a 12899+ }
1308ab2a 12900+
027c5e7a 12901+ AuTraceErr(err);
1308ab2a 12902+ return err;
dece6358 12903+}
1facf9fc 12904+
4a4d8108 12905+void au_hn_free(struct au_hinode *hinode)
dece6358 12906+{
4a4d8108 12907+ struct au_hnotify *hn;
1facf9fc 12908+
4a4d8108
AM
12909+ hn = hinode->hi_notify;
12910+ if (hn) {
027c5e7a 12911+ au_hnotify_op.free(hinode);
4a4d8108
AM
12912+ au_cache_free_hnotify(hn);
12913+ hinode->hi_notify = NULL;
12914+ }
12915+}
dece6358 12916+
4a4d8108 12917+/* ---------------------------------------------------------------------- */
dece6358 12918+
4a4d8108
AM
12919+void au_hn_ctl(struct au_hinode *hinode, int do_set)
12920+{
12921+ if (hinode->hi_notify)
12922+ au_hnotify_op.ctl(hinode, do_set);
12923+}
12924+
12925+void au_hn_reset(struct inode *inode, unsigned int flags)
12926+{
12927+ aufs_bindex_t bindex, bend;
12928+ struct inode *hi;
12929+ struct dentry *iwhdentry;
1facf9fc 12930+
1308ab2a 12931+ bend = au_ibend(inode);
4a4d8108
AM
12932+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
12933+ hi = au_h_iptr(inode, bindex);
12934+ if (!hi)
12935+ continue;
1308ab2a 12936+
4a4d8108
AM
12937+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
12938+ iwhdentry = au_hi_wh(inode, bindex);
12939+ if (iwhdentry)
12940+ dget(iwhdentry);
12941+ au_igrab(hi);
12942+ au_set_h_iptr(inode, bindex, NULL, 0);
12943+ au_set_h_iptr(inode, bindex, au_igrab(hi),
12944+ flags & ~AuHi_XINO);
12945+ iput(hi);
12946+ dput(iwhdentry);
12947+ /* mutex_unlock(&hi->i_mutex); */
1facf9fc 12948+ }
1facf9fc 12949+}
12950+
1308ab2a 12951+/* ---------------------------------------------------------------------- */
1facf9fc 12952+
4a4d8108 12953+static int hn_xino(struct inode *inode, struct inode *h_inode)
1facf9fc 12954+{
4a4d8108
AM
12955+ int err;
12956+ aufs_bindex_t bindex, bend, bfound, bstart;
12957+ struct inode *h_i;
1facf9fc 12958+
4a4d8108
AM
12959+ err = 0;
12960+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
12961+ pr_warning("branch root dir was changed\n");
12962+ goto out;
12963+ }
1facf9fc 12964+
4a4d8108
AM
12965+ bfound = -1;
12966+ bend = au_ibend(inode);
12967+ bstart = au_ibstart(inode);
12968+#if 0 /* reserved for future use */
12969+ if (bindex == bend) {
12970+ /* keep this ino in rename case */
12971+ goto out;
12972+ }
12973+#endif
12974+ for (bindex = bstart; bindex <= bend; bindex++)
12975+ if (au_h_iptr(inode, bindex) == h_inode) {
12976+ bfound = bindex;
12977+ break;
12978+ }
12979+ if (bfound < 0)
1308ab2a 12980+ goto out;
1facf9fc 12981+
4a4d8108
AM
12982+ for (bindex = bstart; bindex <= bend; bindex++) {
12983+ h_i = au_h_iptr(inode, bindex);
12984+ if (!h_i)
12985+ continue;
1facf9fc 12986+
4a4d8108
AM
12987+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
12988+ /* ignore this error */
12989+ /* bad action? */
1facf9fc 12990+ }
1facf9fc 12991+
4a4d8108 12992+ /* children inode number will be broken */
1facf9fc 12993+
4f0767ce 12994+out:
4a4d8108
AM
12995+ AuTraceErr(err);
12996+ return err;
1facf9fc 12997+}
12998+
4a4d8108 12999+static int hn_gen_tree(struct dentry *dentry)
1facf9fc 13000+{
4a4d8108
AM
13001+ int err, i, j, ndentry;
13002+ struct au_dcsub_pages dpages;
13003+ struct au_dpage *dpage;
13004+ struct dentry **dentries;
1facf9fc 13005+
4a4d8108
AM
13006+ err = au_dpages_init(&dpages, GFP_NOFS);
13007+ if (unlikely(err))
13008+ goto out;
13009+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
13010+ if (unlikely(err))
13011+ goto out_dpages;
1facf9fc 13012+
4a4d8108
AM
13013+ for (i = 0; i < dpages.ndpage; i++) {
13014+ dpage = dpages.dpages + i;
13015+ dentries = dpage->dentries;
13016+ ndentry = dpage->ndentry;
13017+ for (j = 0; j < ndentry; j++) {
13018+ struct dentry *d;
13019+
13020+ d = dentries[j];
13021+ if (IS_ROOT(d))
13022+ continue;
13023+
4a4d8108
AM
13024+ au_digen_dec(d);
13025+ if (d->d_inode)
13026+ /* todo: reset children xino?
13027+ cached children only? */
13028+ au_iigen_dec(d->d_inode);
1308ab2a 13029+ }
dece6358 13030+ }
1facf9fc 13031+
4f0767ce 13032+out_dpages:
4a4d8108 13033+ au_dpages_free(&dpages);
dece6358 13034+
027c5e7a 13035+#if 0
4a4d8108
AM
13036+ /* discard children */
13037+ dentry_unhash(dentry);
13038+ dput(dentry);
027c5e7a 13039+#endif
4f0767ce 13040+out:
dece6358
AM
13041+ return err;
13042+}
13043+
1308ab2a 13044+/*
4a4d8108 13045+ * return 0 if processed.
1308ab2a 13046+ */
4a4d8108
AM
13047+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
13048+ const unsigned int isdir)
dece6358 13049+{
1308ab2a 13050+ int err;
4a4d8108
AM
13051+ struct dentry *d;
13052+ struct qstr *dname;
1facf9fc 13053+
4a4d8108
AM
13054+ err = 1;
13055+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
13056+ pr_warning("branch root dir was changed\n");
13057+ err = 0;
13058+ goto out;
13059+ }
dece6358 13060+
4a4d8108
AM
13061+ if (!isdir) {
13062+ AuDebugOn(!name);
13063+ au_iigen_dec(inode);
027c5e7a 13064+ spin_lock(&inode->i_lock);
4a4d8108 13065+ list_for_each_entry(d, &inode->i_dentry, d_alias) {
027c5e7a 13066+ spin_lock(&d->d_lock);
4a4d8108
AM
13067+ dname = &d->d_name;
13068+ if (dname->len != nlen
027c5e7a
AM
13069+ && memcmp(dname->name, name, nlen)) {
13070+ spin_unlock(&d->d_lock);
4a4d8108 13071+ continue;
027c5e7a 13072+ }
4a4d8108 13073+ err = 0;
4a4d8108
AM
13074+ au_digen_dec(d);
13075+ spin_unlock(&d->d_lock);
13076+ break;
1facf9fc 13077+ }
027c5e7a 13078+ spin_unlock(&inode->i_lock);
1308ab2a 13079+ } else {
027c5e7a 13080+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
4a4d8108
AM
13081+ d = d_find_alias(inode);
13082+ if (!d) {
13083+ au_iigen_dec(inode);
13084+ goto out;
13085+ }
1facf9fc 13086+
027c5e7a 13087+ spin_lock(&d->d_lock);
4a4d8108 13088+ dname = &d->d_name;
027c5e7a
AM
13089+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
13090+ spin_unlock(&d->d_lock);
4a4d8108 13091+ err = hn_gen_tree(d);
027c5e7a
AM
13092+ spin_lock(&d->d_lock);
13093+ }
13094+ spin_unlock(&d->d_lock);
4a4d8108
AM
13095+ dput(d);
13096+ }
1facf9fc 13097+
4f0767ce 13098+out:
4a4d8108 13099+ AuTraceErr(err);
1308ab2a 13100+ return err;
13101+}
dece6358 13102+
4a4d8108 13103+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
1facf9fc 13104+{
4a4d8108
AM
13105+ int err;
13106+ struct inode *inode;
1facf9fc 13107+
4a4d8108
AM
13108+ inode = dentry->d_inode;
13109+ if (IS_ROOT(dentry)
13110+ /* || (inode && inode->i_ino == AUFS_ROOT_INO) */
13111+ ) {
13112+ pr_warning("branch root dir was changed\n");
13113+ return 0;
13114+ }
1308ab2a 13115+
4a4d8108
AM
13116+ err = 0;
13117+ if (!isdir) {
4a4d8108
AM
13118+ au_digen_dec(dentry);
13119+ if (inode)
13120+ au_iigen_dec(inode);
13121+ } else {
027c5e7a 13122+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
4a4d8108
AM
13123+ if (inode)
13124+ err = hn_gen_tree(dentry);
13125+ }
13126+
13127+ AuTraceErr(err);
13128+ return err;
1facf9fc 13129+}
13130+
4a4d8108 13131+/* ---------------------------------------------------------------------- */
1facf9fc 13132+
4a4d8108
AM
13133+/* hnotify job flags */
13134+#define AuHnJob_XINO0 1
13135+#define AuHnJob_GEN (1 << 1)
13136+#define AuHnJob_DIRENT (1 << 2)
13137+#define AuHnJob_ISDIR (1 << 3)
13138+#define AuHnJob_TRYXINO0 (1 << 4)
13139+#define AuHnJob_MNTPNT (1 << 5)
13140+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
7f207e10
AM
13141+#define au_fset_hnjob(flags, name) \
13142+ do { (flags) |= AuHnJob_##name; } while (0)
13143+#define au_fclr_hnjob(flags, name) \
13144+ do { (flags) &= ~AuHnJob_##name; } while (0)
1facf9fc 13145+
4a4d8108
AM
13146+enum {
13147+ AuHn_CHILD,
13148+ AuHn_PARENT,
13149+ AuHnLast
13150+};
1facf9fc 13151+
4a4d8108
AM
13152+struct au_hnotify_args {
13153+ struct inode *h_dir, *dir, *h_child_inode;
13154+ u32 mask;
13155+ unsigned int flags[AuHnLast];
13156+ unsigned int h_child_nlen;
13157+ char h_child_name[];
13158+};
1facf9fc 13159+
4a4d8108
AM
13160+struct hn_job_args {
13161+ unsigned int flags;
13162+ struct inode *inode, *h_inode, *dir, *h_dir;
13163+ struct dentry *dentry;
13164+ char *h_name;
13165+ int h_nlen;
13166+};
1308ab2a 13167+
4a4d8108
AM
13168+static int hn_job(struct hn_job_args *a)
13169+{
13170+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
1308ab2a 13171+
4a4d8108
AM
13172+ /* reset xino */
13173+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
13174+ hn_xino(a->inode, a->h_inode); /* ignore this error */
1308ab2a 13175+
4a4d8108
AM
13176+ if (au_ftest_hnjob(a->flags, TRYXINO0)
13177+ && a->inode
13178+ && a->h_inode) {
13179+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
13180+ if (!a->h_inode->i_nlink)
13181+ hn_xino(a->inode, a->h_inode); /* ignore this error */
13182+ mutex_unlock(&a->h_inode->i_mutex);
1308ab2a 13183+ }
1facf9fc 13184+
4a4d8108
AM
13185+ /* make the generation obsolete */
13186+ if (au_ftest_hnjob(a->flags, GEN)) {
13187+ int err = -1;
13188+ if (a->inode)
13189+ err = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
13190+ isdir);
13191+ if (err && a->dentry)
13192+ hn_gen_by_name(a->dentry, isdir);
13193+ /* ignore this error */
1facf9fc 13194+ }
1facf9fc 13195+
4a4d8108
AM
13196+ /* make dir entries obsolete */
13197+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
13198+ struct au_vdir *vdir;
1facf9fc 13199+
4a4d8108
AM
13200+ vdir = au_ivdir(a->inode);
13201+ if (vdir)
13202+ vdir->vd_jiffy = 0;
13203+ /* IMustLock(a->inode); */
13204+ /* a->inode->i_version++; */
13205+ }
1facf9fc 13206+
4a4d8108
AM
13207+ /* can do nothing but warn */
13208+ if (au_ftest_hnjob(a->flags, MNTPNT)
13209+ && a->dentry
13210+ && d_mountpoint(a->dentry))
13211+ pr_warning("mount-point %.*s is removed or renamed\n",
13212+ AuDLNPair(a->dentry));
1facf9fc 13213+
4a4d8108 13214+ return 0;
1308ab2a 13215+}
1facf9fc 13216+
1308ab2a 13217+/* ---------------------------------------------------------------------- */
1facf9fc 13218+
4a4d8108
AM
13219+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
13220+ struct inode *dir)
1308ab2a 13221+{
4a4d8108
AM
13222+ struct dentry *dentry, *d, *parent;
13223+ struct qstr *dname;
1308ab2a 13224+
4a4d8108
AM
13225+ parent = d_find_alias(dir);
13226+ if (!parent)
13227+ return NULL;
1308ab2a 13228+
4a4d8108 13229+ dentry = NULL;
027c5e7a 13230+ spin_lock(&parent->d_lock);
4a4d8108
AM
13231+ list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) {
13232+ /* AuDbg("%.*s\n", AuDLNPair(d)); */
027c5e7a 13233+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
4a4d8108
AM
13234+ dname = &d->d_name;
13235+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
027c5e7a
AM
13236+ goto cont_unlock;
13237+ if (au_di(d))
13238+ au_digen_dec(d);
13239+ else
13240+ goto cont_unlock;
13241+ if (d->d_count) {
13242+ dentry = dget_dlock(d);
4a4d8108 13243+ spin_unlock(&d->d_lock);
027c5e7a 13244+ break;
dece6358 13245+ }
1facf9fc 13246+
027c5e7a
AM
13247+ cont_unlock:
13248+ spin_unlock(&d->d_lock);
1308ab2a 13249+ }
027c5e7a 13250+ spin_unlock(&parent->d_lock);
4a4d8108 13251+ dput(parent);
1facf9fc 13252+
4a4d8108
AM
13253+ if (dentry)
13254+ di_write_lock_child(dentry);
1308ab2a 13255+
4a4d8108
AM
13256+ return dentry;
13257+}
dece6358 13258+
4a4d8108
AM
13259+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
13260+ aufs_bindex_t bindex, ino_t h_ino)
13261+{
13262+ struct inode *inode;
13263+ ino_t ino;
13264+ int err;
13265+
13266+ inode = NULL;
13267+ err = au_xino_read(sb, bindex, h_ino, &ino);
13268+ if (!err && ino)
13269+ inode = ilookup(sb, ino);
13270+ if (!inode)
13271+ goto out;
13272+
13273+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
13274+ pr_warning("wrong root branch\n");
13275+ iput(inode);
13276+ inode = NULL;
13277+ goto out;
1308ab2a 13278+ }
13279+
4a4d8108 13280+ ii_write_lock_child(inode);
1308ab2a 13281+
4f0767ce 13282+out:
4a4d8108 13283+ return inode;
dece6358
AM
13284+}
13285+
4a4d8108 13286+static void au_hn_bh(void *_args)
1facf9fc 13287+{
4a4d8108
AM
13288+ struct au_hnotify_args *a = _args;
13289+ struct super_block *sb;
13290+ aufs_bindex_t bindex, bend, bfound;
13291+ unsigned char xino, try_iput;
1facf9fc 13292+ int err;
1308ab2a 13293+ struct inode *inode;
4a4d8108
AM
13294+ ino_t h_ino;
13295+ struct hn_job_args args;
13296+ struct dentry *dentry;
13297+ struct au_sbinfo *sbinfo;
1facf9fc 13298+
4a4d8108
AM
13299+ AuDebugOn(!_args);
13300+ AuDebugOn(!a->h_dir);
13301+ AuDebugOn(!a->dir);
13302+ AuDebugOn(!a->mask);
13303+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
13304+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
13305+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
1facf9fc 13306+
4a4d8108
AM
13307+ inode = NULL;
13308+ dentry = NULL;
13309+ /*
13310+ * do not lock a->dir->i_mutex here
13311+ * because of d_revalidate() may cause a deadlock.
13312+ */
13313+ sb = a->dir->i_sb;
13314+ AuDebugOn(!sb);
13315+ sbinfo = au_sbi(sb);
13316+ AuDebugOn(!sbinfo);
7f207e10 13317+ si_write_lock(sb, AuLock_NOPLMW);
1facf9fc 13318+
4a4d8108
AM
13319+ ii_read_lock_parent(a->dir);
13320+ bfound = -1;
13321+ bend = au_ibend(a->dir);
13322+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
13323+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
13324+ bfound = bindex;
13325+ break;
13326+ }
13327+ ii_read_unlock(a->dir);
13328+ if (unlikely(bfound < 0))
13329+ goto out;
1facf9fc 13330+
4a4d8108
AM
13331+ xino = !!au_opt_test(au_mntflags(sb), XINO);
13332+ h_ino = 0;
13333+ if (a->h_child_inode)
13334+ h_ino = a->h_child_inode->i_ino;
1facf9fc 13335+
4a4d8108
AM
13336+ if (a->h_child_nlen
13337+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
13338+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
13339+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
13340+ a->dir);
13341+ try_iput = 0;
13342+ if (dentry)
13343+ inode = dentry->d_inode;
13344+ if (xino && !inode && h_ino
13345+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
13346+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
13347+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
13348+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
13349+ try_iput = 1;
13350+ }
1facf9fc 13351+
4a4d8108
AM
13352+ args.flags = a->flags[AuHn_CHILD];
13353+ args.dentry = dentry;
13354+ args.inode = inode;
13355+ args.h_inode = a->h_child_inode;
13356+ args.dir = a->dir;
13357+ args.h_dir = a->h_dir;
13358+ args.h_name = a->h_child_name;
13359+ args.h_nlen = a->h_child_nlen;
13360+ err = hn_job(&args);
13361+ if (dentry) {
027c5e7a 13362+ if (au_di(dentry))
4a4d8108
AM
13363+ di_write_unlock(dentry);
13364+ dput(dentry);
13365+ }
13366+ if (inode && try_iput) {
13367+ ii_write_unlock(inode);
13368+ iput(inode);
13369+ }
1facf9fc 13370+
4a4d8108
AM
13371+ ii_write_lock_parent(a->dir);
13372+ args.flags = a->flags[AuHn_PARENT];
13373+ args.dentry = NULL;
13374+ args.inode = a->dir;
13375+ args.h_inode = a->h_dir;
13376+ args.dir = NULL;
13377+ args.h_dir = NULL;
13378+ args.h_name = NULL;
13379+ args.h_nlen = 0;
13380+ err = hn_job(&args);
13381+ ii_write_unlock(a->dir);
1facf9fc 13382+
4f0767ce 13383+out:
4a4d8108
AM
13384+ iput(a->h_child_inode);
13385+ iput(a->h_dir);
13386+ iput(a->dir);
027c5e7a
AM
13387+ si_write_unlock(sb);
13388+ au_nwt_done(&sbinfo->si_nowait);
1308ab2a 13389+ kfree(a);
dece6358 13390+}
1facf9fc 13391+
4a4d8108
AM
13392+/* ---------------------------------------------------------------------- */
13393+
13394+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
13395+ struct qstr *h_child_qstr, struct inode *h_child_inode)
dece6358 13396+{
4a4d8108 13397+ int err, len;
53392da6 13398+ unsigned int flags[AuHnLast], f;
4a4d8108
AM
13399+ unsigned char isdir, isroot, wh;
13400+ struct inode *dir;
13401+ struct au_hnotify_args *args;
13402+ char *p, *h_child_name;
dece6358 13403+
1308ab2a 13404+ err = 0;
4a4d8108
AM
13405+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
13406+ dir = igrab(hnotify->hn_aufs_inode);
13407+ if (!dir)
13408+ goto out;
1facf9fc 13409+
4a4d8108
AM
13410+ isroot = (dir->i_ino == AUFS_ROOT_INO);
13411+ wh = 0;
13412+ h_child_name = (void *)h_child_qstr->name;
13413+ len = h_child_qstr->len;
13414+ if (h_child_name) {
13415+ if (len > AUFS_WH_PFX_LEN
13416+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
13417+ h_child_name += AUFS_WH_PFX_LEN;
13418+ len -= AUFS_WH_PFX_LEN;
13419+ wh = 1;
13420+ }
1facf9fc 13421+ }
dece6358 13422+
4a4d8108
AM
13423+ isdir = 0;
13424+ if (h_child_inode)
13425+ isdir = !!S_ISDIR(h_child_inode->i_mode);
13426+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
13427+ flags[AuHn_CHILD] = 0;
13428+ if (isdir)
13429+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
13430+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
13431+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
13432+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
13433+ case FS_MOVED_FROM:
13434+ case FS_MOVED_TO:
13435+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
13436+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
13437+ /*FALLTHROUGH*/
13438+ case FS_CREATE:
13439+ AuDebugOn(!h_child_name || !h_child_inode);
13440+ break;
1facf9fc 13441+
4a4d8108
AM
13442+ case FS_DELETE:
13443+ /*
13444+ * aufs never be able to get this child inode.
13445+ * revalidation should be in d_revalidate()
13446+ * by checking i_nlink, i_generation or d_unhashed().
13447+ */
13448+ AuDebugOn(!h_child_name);
13449+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
13450+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
13451+ break;
dece6358 13452+
4a4d8108
AM
13453+ default:
13454+ AuDebugOn(1);
13455+ }
1308ab2a 13456+
4a4d8108
AM
13457+ if (wh)
13458+ h_child_inode = NULL;
1308ab2a 13459+
4a4d8108
AM
13460+ err = -ENOMEM;
13461+ /* iput() and kfree() will be called in au_hnotify() */
4a4d8108 13462+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
4a4d8108
AM
13463+ if (unlikely(!args)) {
13464+ AuErr1("no memory\n");
13465+ iput(dir);
13466+ goto out;
13467+ }
13468+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
13469+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
13470+ args->mask = mask;
13471+ args->dir = dir;
13472+ args->h_dir = igrab(h_dir);
13473+ if (h_child_inode)
13474+ h_child_inode = igrab(h_child_inode); /* can be NULL */
13475+ args->h_child_inode = h_child_inode;
13476+ args->h_child_nlen = len;
13477+ if (len) {
13478+ p = (void *)args;
13479+ p += sizeof(*args);
13480+ memcpy(p, h_child_name, len);
13481+ p[len] = 0;
1308ab2a 13482+ }
1308ab2a 13483+
53392da6
AM
13484+ f = 0;
13485+ if (!dir->i_nlink)
13486+ f = AuWkq_NEST;
13487+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
4a4d8108
AM
13488+ if (unlikely(err)) {
13489+ pr_err("wkq %d\n", err);
13490+ iput(args->h_child_inode);
13491+ iput(args->h_dir);
13492+ iput(args->dir);
13493+ kfree(args);
1facf9fc 13494+ }
1facf9fc 13495+
4a4d8108 13496+out:
1facf9fc 13497+ return err;
13498+}
13499+
027c5e7a
AM
13500+/* ---------------------------------------------------------------------- */
13501+
13502+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
13503+{
13504+ int err;
13505+
13506+ AuDebugOn(!(udba & AuOptMask_UDBA));
13507+
13508+ err = 0;
13509+ if (au_hnotify_op.reset_br)
13510+ err = au_hnotify_op.reset_br(udba, br, perm);
13511+
13512+ return err;
13513+}
13514+
13515+int au_hnotify_init_br(struct au_branch *br, int perm)
13516+{
13517+ int err;
13518+
13519+ err = 0;
13520+ if (au_hnotify_op.init_br)
13521+ err = au_hnotify_op.init_br(br, perm);
13522+
13523+ return err;
13524+}
13525+
13526+void au_hnotify_fin_br(struct au_branch *br)
13527+{
13528+ if (au_hnotify_op.fin_br)
13529+ au_hnotify_op.fin_br(br);
13530+}
13531+
4a4d8108
AM
13532+static void au_hn_destroy_cache(void)
13533+{
13534+ kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]);
13535+ au_cachep[AuCache_HNOTIFY] = NULL;
13536+}
1308ab2a 13537+
4a4d8108 13538+int __init au_hnotify_init(void)
1facf9fc 13539+{
1308ab2a 13540+ int err;
1308ab2a 13541+
4a4d8108
AM
13542+ err = -ENOMEM;
13543+ au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify);
13544+ if (au_cachep[AuCache_HNOTIFY]) {
027c5e7a
AM
13545+ err = 0;
13546+ if (au_hnotify_op.init)
13547+ err = au_hnotify_op.init();
4a4d8108
AM
13548+ if (unlikely(err))
13549+ au_hn_destroy_cache();
1308ab2a 13550+ }
1308ab2a 13551+ AuTraceErr(err);
4a4d8108 13552+ return err;
1308ab2a 13553+}
13554+
4a4d8108 13555+void au_hnotify_fin(void)
1308ab2a 13556+{
027c5e7a
AM
13557+ if (au_hnotify_op.fin)
13558+ au_hnotify_op.fin();
4a4d8108
AM
13559+ /* cf. au_cache_fin() */
13560+ if (au_cachep[AuCache_HNOTIFY])
13561+ au_hn_destroy_cache();
dece6358 13562+}
7f207e10
AM
13563diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
13564--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 13565+++ linux/fs/aufs/iinfo.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 13566@@ -0,0 +1,264 @@
dece6358 13567+/*
027c5e7a 13568+ * Copyright (C) 2005-2011 Junjiro R. Okajima
dece6358
AM
13569+ *
13570+ * This program, aufs is free software; you can redistribute it and/or modify
13571+ * it under the terms of the GNU General Public License as published by
13572+ * the Free Software Foundation; either version 2 of the License, or
13573+ * (at your option) any later version.
13574+ *
13575+ * This program is distributed in the hope that it will be useful,
13576+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13577+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13578+ * GNU General Public License for more details.
13579+ *
13580+ * You should have received a copy of the GNU General Public License
13581+ * along with this program; if not, write to the Free Software
13582+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
13583+ */
1facf9fc 13584+
dece6358 13585+/*
4a4d8108 13586+ * inode private data
dece6358 13587+ */
1facf9fc 13588+
1308ab2a 13589+#include "aufs.h"
1facf9fc 13590+
4a4d8108 13591+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 13592+{
4a4d8108 13593+ struct inode *h_inode;
1facf9fc 13594+
4a4d8108 13595+ IiMustAnyLock(inode);
1facf9fc 13596+
4a4d8108
AM
13597+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
13598+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
13599+ return h_inode;
13600+}
1facf9fc 13601+
4a4d8108
AM
13602+/* todo: hard/soft set? */
13603+void au_hiput(struct au_hinode *hinode)
13604+{
13605+ au_hn_free(hinode);
13606+ dput(hinode->hi_whdentry);
13607+ iput(hinode->hi_inode);
13608+}
1facf9fc 13609+
4a4d8108
AM
13610+unsigned int au_hi_flags(struct inode *inode, int isdir)
13611+{
13612+ unsigned int flags;
13613+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
1facf9fc 13614+
4a4d8108
AM
13615+ flags = 0;
13616+ if (au_opt_test(mnt_flags, XINO))
13617+ au_fset_hi(flags, XINO);
13618+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
13619+ au_fset_hi(flags, HNOTIFY);
13620+ return flags;
1facf9fc 13621+}
13622+
4a4d8108
AM
13623+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
13624+ struct inode *h_inode, unsigned int flags)
1308ab2a 13625+{
4a4d8108
AM
13626+ struct au_hinode *hinode;
13627+ struct inode *hi;
13628+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 13629+
4a4d8108 13630+ IiMustWriteLock(inode);
dece6358 13631+
4a4d8108
AM
13632+ hinode = iinfo->ii_hinode + bindex;
13633+ hi = hinode->hi_inode;
13634+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
13635+
13636+ if (hi)
13637+ au_hiput(hinode);
13638+ hinode->hi_inode = h_inode;
13639+ if (h_inode) {
13640+ int err;
13641+ struct super_block *sb = inode->i_sb;
13642+ struct au_branch *br;
13643+
027c5e7a
AM
13644+ AuDebugOn(inode->i_mode
13645+ && (h_inode->i_mode & S_IFMT)
13646+ != (inode->i_mode & S_IFMT));
4a4d8108
AM
13647+ if (bindex == iinfo->ii_bstart)
13648+ au_cpup_igen(inode, h_inode);
13649+ br = au_sbr(sb, bindex);
13650+ hinode->hi_id = br->br_id;
13651+ if (au_ftest_hi(flags, XINO)) {
13652+ err = au_xino_write(sb, bindex, h_inode->i_ino,
13653+ inode->i_ino);
13654+ if (unlikely(err))
13655+ AuIOErr1("failed au_xino_write() %d\n", err);
13656+ }
13657+
13658+ if (au_ftest_hi(flags, HNOTIFY)
13659+ && au_br_hnotifyable(br->br_perm)) {
027c5e7a 13660+ err = au_hn_alloc(hinode, inode);
4a4d8108
AM
13661+ if (unlikely(err))
13662+ AuIOErr1("au_hn_alloc() %d\n", err);
1308ab2a 13663+ }
13664+ }
4a4d8108 13665+}
dece6358 13666+
4a4d8108
AM
13667+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
13668+ struct dentry *h_wh)
13669+{
13670+ struct au_hinode *hinode;
dece6358 13671+
4a4d8108
AM
13672+ IiMustWriteLock(inode);
13673+
13674+ hinode = au_ii(inode)->ii_hinode + bindex;
13675+ AuDebugOn(hinode->hi_whdentry);
13676+ hinode->hi_whdentry = h_wh;
1facf9fc 13677+}
13678+
4a4d8108 13679+void au_update_iigen(struct inode *inode)
1308ab2a 13680+{
4a4d8108
AM
13681+ atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
13682+ /* smp_mb(); */ /* atomic_set */
13683+}
1facf9fc 13684+
4a4d8108
AM
13685+/* it may be called at remount time, too */
13686+void au_update_ibrange(struct inode *inode, int do_put_zero)
13687+{
13688+ struct au_iinfo *iinfo;
027c5e7a 13689+ aufs_bindex_t bindex, bend;
1facf9fc 13690+
4a4d8108 13691+ iinfo = au_ii(inode);
027c5e7a 13692+ if (!iinfo)
4a4d8108 13693+ return;
1facf9fc 13694+
4a4d8108 13695+ IiMustWriteLock(inode);
1facf9fc 13696+
027c5e7a 13697+ if (do_put_zero && iinfo->ii_bstart >= 0) {
4a4d8108
AM
13698+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
13699+ bindex++) {
13700+ struct inode *h_i;
1facf9fc 13701+
4a4d8108 13702+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
027c5e7a
AM
13703+ if (h_i && !h_i->i_nlink)
13704+ au_set_h_iptr(inode, bindex, NULL, 0);
13705+ }
4a4d8108
AM
13706+ }
13707+
027c5e7a
AM
13708+ iinfo->ii_bstart = -1;
13709+ iinfo->ii_bend = -1;
13710+ bend = au_sbend(inode->i_sb);
13711+ for (bindex = 0; bindex <= bend; bindex++)
13712+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
13713+ iinfo->ii_bstart = bindex;
4a4d8108 13714+ break;
027c5e7a
AM
13715+ }
13716+ if (iinfo->ii_bstart >= 0)
13717+ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
13718+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
13719+ iinfo->ii_bend = bindex;
13720+ break;
13721+ }
13722+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
1308ab2a 13723+}
1facf9fc 13724+
dece6358 13725+/* ---------------------------------------------------------------------- */
1facf9fc 13726+
4a4d8108 13727+void au_icntnr_init_once(void *_c)
dece6358 13728+{
4a4d8108
AM
13729+ struct au_icntnr *c = _c;
13730+ struct au_iinfo *iinfo = &c->iinfo;
e49829fe 13731+ static struct lock_class_key aufs_ii;
1facf9fc 13732+
4a4d8108 13733+ au_rw_init(&iinfo->ii_rwsem);
e49829fe 13734+ au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
4a4d8108
AM
13735+ inode_init_once(&c->vfs_inode);
13736+}
1facf9fc 13737+
4a4d8108
AM
13738+int au_iinfo_init(struct inode *inode)
13739+{
13740+ struct au_iinfo *iinfo;
13741+ struct super_block *sb;
13742+ int nbr, i;
1facf9fc 13743+
4a4d8108
AM
13744+ sb = inode->i_sb;
13745+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
13746+ nbr = au_sbend(sb) + 1;
13747+ if (unlikely(nbr <= 0))
13748+ nbr = 1;
13749+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
13750+ if (iinfo->ii_hinode) {
7f207e10 13751+ au_ninodes_inc(sb);
4a4d8108
AM
13752+ for (i = 0; i < nbr; i++)
13753+ iinfo->ii_hinode[i].hi_id = -1;
1facf9fc 13754+
4a4d8108
AM
13755+ atomic_set(&iinfo->ii_generation, au_sigen(sb));
13756+ /* smp_mb(); */ /* atomic_set */
13757+ iinfo->ii_bstart = -1;
13758+ iinfo->ii_bend = -1;
13759+ iinfo->ii_vdir = NULL;
13760+ return 0;
1308ab2a 13761+ }
4a4d8108
AM
13762+ return -ENOMEM;
13763+}
1facf9fc 13764+
4a4d8108
AM
13765+int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
13766+{
13767+ int err, sz;
13768+ struct au_hinode *hip;
1facf9fc 13769+
4a4d8108
AM
13770+ AuRwMustWriteLock(&iinfo->ii_rwsem);
13771+
13772+ err = -ENOMEM;
13773+ sz = sizeof(*hip) * (iinfo->ii_bend + 1);
13774+ if (!sz)
13775+ sz = sizeof(*hip);
13776+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
13777+ if (hip) {
13778+ iinfo->ii_hinode = hip;
13779+ err = 0;
1308ab2a 13780+ }
4a4d8108 13781+
1308ab2a 13782+ return err;
1facf9fc 13783+}
13784+
4a4d8108 13785+void au_iinfo_fin(struct inode *inode)
1facf9fc 13786+{
4a4d8108
AM
13787+ struct au_iinfo *iinfo;
13788+ struct au_hinode *hi;
13789+ struct super_block *sb;
b752ccd1
AM
13790+ aufs_bindex_t bindex, bend;
13791+ const unsigned char unlinked = !inode->i_nlink;
1308ab2a 13792+
4a4d8108
AM
13793+ iinfo = au_ii(inode);
13794+ /* bad_inode case */
13795+ if (!iinfo)
13796+ return;
1308ab2a 13797+
b752ccd1 13798+ sb = inode->i_sb;
7f207e10 13799+ au_ninodes_dec(sb);
b752ccd1
AM
13800+ if (si_pid_test(sb))
13801+ au_xino_delete_inode(inode, unlinked);
13802+ else {
13803+ /*
13804+ * it is safe to hide the dependency between sbinfo and
13805+ * sb->s_umount.
13806+ */
13807+ lockdep_off();
13808+ si_noflush_read_lock(sb);
13809+ au_xino_delete_inode(inode, unlinked);
13810+ si_read_unlock(sb);
13811+ lockdep_on();
13812+ }
13813+
4a4d8108
AM
13814+ if (iinfo->ii_vdir)
13815+ au_vdir_free(iinfo->ii_vdir);
1308ab2a 13816+
b752ccd1
AM
13817+ bindex = iinfo->ii_bstart;
13818+ if (bindex >= 0) {
13819+ hi = iinfo->ii_hinode + bindex;
4a4d8108 13820+ bend = iinfo->ii_bend;
b752ccd1
AM
13821+ while (bindex++ <= bend) {
13822+ if (hi->hi_inode)
4a4d8108 13823+ au_hiput(hi);
4a4d8108
AM
13824+ hi++;
13825+ }
13826+ }
4a4d8108 13827+ kfree(iinfo->ii_hinode);
027c5e7a 13828+ iinfo->ii_hinode = NULL;
4a4d8108 13829+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 13830+}
7f207e10
AM
13831diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
13832--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
53392da6 13833+++ linux/fs/aufs/inode.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 13834@@ -0,0 +1,471 @@
4a4d8108 13835+/*
027c5e7a 13836+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
13837+ *
13838+ * This program, aufs is free software; you can redistribute it and/or modify
13839+ * it under the terms of the GNU General Public License as published by
13840+ * the Free Software Foundation; either version 2 of the License, or
13841+ * (at your option) any later version.
13842+ *
13843+ * This program is distributed in the hope that it will be useful,
13844+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13845+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13846+ * GNU General Public License for more details.
13847+ *
13848+ * You should have received a copy of the GNU General Public License
13849+ * along with this program; if not, write to the Free Software
13850+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
13851+ */
1facf9fc 13852+
4a4d8108
AM
13853+/*
13854+ * inode functions
13855+ */
1facf9fc 13856+
4a4d8108 13857+#include "aufs.h"
1308ab2a 13858+
4a4d8108
AM
13859+struct inode *au_igrab(struct inode *inode)
13860+{
13861+ if (inode) {
13862+ AuDebugOn(!atomic_read(&inode->i_count));
027c5e7a 13863+ ihold(inode);
1facf9fc 13864+ }
4a4d8108
AM
13865+ return inode;
13866+}
1facf9fc 13867+
4a4d8108
AM
13868+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
13869+{
13870+ au_cpup_attr_all(inode, /*force*/0);
13871+ au_update_iigen(inode);
13872+ if (do_version)
13873+ inode->i_version++;
dece6358 13874+}
1facf9fc 13875+
027c5e7a 13876+static int au_ii_refresh(struct inode *inode, int *update)
dece6358 13877+{
4a4d8108 13878+ int err, e;
027c5e7a 13879+ umode_t type;
4a4d8108 13880+ aufs_bindex_t bindex, new_bindex;
1308ab2a 13881+ struct super_block *sb;
4a4d8108 13882+ struct au_iinfo *iinfo;
027c5e7a 13883+ struct au_hinode *p, *q, tmp;
1facf9fc 13884+
4a4d8108 13885+ IiMustWriteLock(inode);
1facf9fc 13886+
027c5e7a 13887+ *update = 0;
4a4d8108 13888+ sb = inode->i_sb;
027c5e7a 13889+ type = inode->i_mode & S_IFMT;
4a4d8108
AM
13890+ iinfo = au_ii(inode);
13891+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
13892+ if (unlikely(err))
1308ab2a 13893+ goto out;
1facf9fc 13894+
027c5e7a 13895+ AuDebugOn(iinfo->ii_bstart < 0);
4a4d8108 13896+ p = iinfo->ii_hinode + iinfo->ii_bstart;
4a4d8108
AM
13897+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
13898+ bindex++, p++) {
13899+ if (!p->hi_inode)
13900+ continue;
1facf9fc 13901+
027c5e7a 13902+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
4a4d8108
AM
13903+ new_bindex = au_br_index(sb, p->hi_id);
13904+ if (new_bindex == bindex)
13905+ continue;
1facf9fc 13906+
4a4d8108 13907+ if (new_bindex < 0) {
027c5e7a 13908+ *update = 1;
4a4d8108
AM
13909+ au_hiput(p);
13910+ p->hi_inode = NULL;
13911+ continue;
1308ab2a 13912+ }
4a4d8108
AM
13913+
13914+ if (new_bindex < iinfo->ii_bstart)
13915+ iinfo->ii_bstart = new_bindex;
13916+ if (iinfo->ii_bend < new_bindex)
13917+ iinfo->ii_bend = new_bindex;
13918+ /* swap two lower inode, and loop again */
13919+ q = iinfo->ii_hinode + new_bindex;
13920+ tmp = *q;
13921+ *q = *p;
13922+ *p = tmp;
13923+ if (tmp.hi_inode) {
13924+ bindex--;
13925+ p--;
1308ab2a 13926+ }
13927+ }
4a4d8108
AM
13928+ au_update_ibrange(inode, /*do_put_zero*/0);
13929+ e = au_dy_irefresh(inode);
13930+ if (unlikely(e && !err))
13931+ err = e;
1facf9fc 13932+
4f0767ce 13933+out:
027c5e7a
AM
13934+ AuTraceErr(err);
13935+ return err;
13936+}
13937+
13938+int au_refresh_hinode_self(struct inode *inode)
13939+{
13940+ int err, update;
13941+
13942+ err = au_ii_refresh(inode, &update);
13943+ if (!err)
13944+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
13945+
13946+ AuTraceErr(err);
4a4d8108
AM
13947+ return err;
13948+}
1facf9fc 13949+
4a4d8108
AM
13950+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
13951+{
027c5e7a 13952+ int err, e, update;
4a4d8108 13953+ unsigned int flags;
027c5e7a 13954+ umode_t mode;
4a4d8108 13955+ aufs_bindex_t bindex, bend;
027c5e7a 13956+ unsigned char isdir;
4a4d8108
AM
13957+ struct au_hinode *p;
13958+ struct au_iinfo *iinfo;
1facf9fc 13959+
027c5e7a 13960+ err = au_ii_refresh(inode, &update);
4a4d8108
AM
13961+ if (unlikely(err))
13962+ goto out;
13963+
13964+ update = 0;
13965+ iinfo = au_ii(inode);
13966+ p = iinfo->ii_hinode + iinfo->ii_bstart;
027c5e7a
AM
13967+ mode = (inode->i_mode & S_IFMT);
13968+ isdir = S_ISDIR(mode);
4a4d8108
AM
13969+ flags = au_hi_flags(inode, isdir);
13970+ bend = au_dbend(dentry);
13971+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
13972+ struct inode *h_i;
13973+ struct dentry *h_d;
13974+
13975+ h_d = au_h_dptr(dentry, bindex);
13976+ if (!h_d || !h_d->d_inode)
13977+ continue;
13978+
027c5e7a 13979+ AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT));
4a4d8108
AM
13980+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
13981+ h_i = au_h_iptr(inode, bindex);
13982+ if (h_i) {
13983+ if (h_i == h_d->d_inode)
13984+ continue;
13985+ err = -EIO;
13986+ break;
13987+ }
13988+ }
13989+ if (bindex < iinfo->ii_bstart)
13990+ iinfo->ii_bstart = bindex;
13991+ if (iinfo->ii_bend < bindex)
13992+ iinfo->ii_bend = bindex;
13993+ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
13994+ update = 1;
1308ab2a 13995+ }
4a4d8108
AM
13996+ au_update_ibrange(inode, /*do_put_zero*/0);
13997+ e = au_dy_irefresh(inode);
13998+ if (unlikely(e && !err))
13999+ err = e;
027c5e7a
AM
14000+ if (!err)
14001+ au_refresh_hinode_attr(inode, update && isdir);
4a4d8108 14002+
4f0767ce 14003+out:
4a4d8108 14004+ AuTraceErr(err);
1308ab2a 14005+ return err;
dece6358
AM
14006+}
14007+
4a4d8108 14008+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 14009+{
4a4d8108
AM
14010+ int err;
14011+ unsigned int flags;
14012+ umode_t mode;
14013+ aufs_bindex_t bindex, bstart, btail;
14014+ unsigned char isdir;
14015+ struct dentry *h_dentry;
14016+ struct inode *h_inode;
14017+ struct au_iinfo *iinfo;
dece6358 14018+
4a4d8108 14019+ IiMustWriteLock(inode);
dece6358 14020+
4a4d8108
AM
14021+ err = 0;
14022+ isdir = 0;
14023+ bstart = au_dbstart(dentry);
14024+ h_inode = au_h_dptr(dentry, bstart)->d_inode;
14025+ mode = h_inode->i_mode;
14026+ switch (mode & S_IFMT) {
14027+ case S_IFREG:
14028+ btail = au_dbtail(dentry);
14029+ inode->i_op = &aufs_iop;
14030+ inode->i_fop = &aufs_file_fop;
14031+ err = au_dy_iaop(inode, bstart, h_inode);
14032+ if (unlikely(err))
14033+ goto out;
14034+ break;
14035+ case S_IFDIR:
14036+ isdir = 1;
14037+ btail = au_dbtaildir(dentry);
14038+ inode->i_op = &aufs_dir_iop;
14039+ inode->i_fop = &aufs_dir_fop;
14040+ break;
14041+ case S_IFLNK:
14042+ btail = au_dbtail(dentry);
14043+ inode->i_op = &aufs_symlink_iop;
14044+ break;
14045+ case S_IFBLK:
14046+ case S_IFCHR:
14047+ case S_IFIFO:
14048+ case S_IFSOCK:
14049+ btail = au_dbtail(dentry);
14050+ inode->i_op = &aufs_iop;
14051+ au_init_special_fop(inode, mode, h_inode->i_rdev);
14052+ break;
14053+ default:
14054+ AuIOErr("Unknown file type 0%o\n", mode);
14055+ err = -EIO;
1308ab2a 14056+ goto out;
4a4d8108 14057+ }
dece6358 14058+
4a4d8108
AM
14059+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
14060+ flags = au_hi_flags(inode, isdir);
14061+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
14062+ && au_ftest_hi(flags, HNOTIFY)
14063+ && dentry->d_name.len > AUFS_WH_PFX_LEN
14064+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
14065+ au_fclr_hi(flags, HNOTIFY);
14066+ iinfo = au_ii(inode);
14067+ iinfo->ii_bstart = bstart;
14068+ iinfo->ii_bend = btail;
14069+ for (bindex = bstart; bindex <= btail; bindex++) {
14070+ h_dentry = au_h_dptr(dentry, bindex);
14071+ if (h_dentry)
14072+ au_set_h_iptr(inode, bindex,
14073+ au_igrab(h_dentry->d_inode), flags);
14074+ }
14075+ au_cpup_attr_all(inode, /*force*/1);
dece6358 14076+
4f0767ce 14077+out:
4a4d8108
AM
14078+ return err;
14079+}
dece6358 14080+
027c5e7a
AM
14081+/*
14082+ * successful returns with iinfo write_locked
14083+ * minus: errno
14084+ * zero: success, matched
14085+ * plus: no error, but unmatched
14086+ */
14087+static int reval_inode(struct inode *inode, struct dentry *dentry)
4a4d8108
AM
14088+{
14089+ int err;
14090+ aufs_bindex_t bindex, bend;
14091+ struct inode *h_inode, *h_dinode;
dece6358 14092+
4a4d8108
AM
14093+ /*
14094+ * before this function, if aufs got any iinfo lock, it must be only
14095+ * one, the parent dir.
14096+ * it can happen by UDBA and the obsoleted inode number.
14097+ */
14098+ err = -EIO;
14099+ if (unlikely(inode->i_ino == parent_ino(dentry)))
14100+ goto out;
14101+
027c5e7a 14102+ err = 1;
4a4d8108
AM
14103+ ii_write_lock_new_child(inode);
14104+ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
14105+ bend = au_ibend(inode);
14106+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
14107+ h_inode = au_h_iptr(inode, bindex);
14108+ if (h_inode && h_inode == h_dinode) {
4a4d8108 14109+ err = 0;
027c5e7a 14110+ if (au_iigen_test(inode, au_digen(dentry)))
4a4d8108
AM
14111+ err = au_refresh_hinode(inode, dentry);
14112+ break;
1308ab2a 14113+ }
1facf9fc 14114+ }
dece6358 14115+
4a4d8108
AM
14116+ if (unlikely(err))
14117+ ii_write_unlock(inode);
4f0767ce 14118+out:
1facf9fc 14119+ return err;
14120+}
1facf9fc 14121+
4a4d8108
AM
14122+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
14123+ unsigned int d_type, ino_t *ino)
1facf9fc 14124+{
4a4d8108
AM
14125+ int err;
14126+ struct mutex *mtx;
1facf9fc 14127+
b752ccd1 14128+ /* prevent hardlinked inode number from race condition */
4a4d8108 14129+ mtx = NULL;
b752ccd1 14130+ if (d_type != DT_DIR) {
4a4d8108
AM
14131+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
14132+ mutex_lock(mtx);
14133+ }
14134+ err = au_xino_read(sb, bindex, h_ino, ino);
14135+ if (unlikely(err))
14136+ goto out;
1308ab2a 14137+
4a4d8108
AM
14138+ if (!*ino) {
14139+ err = -EIO;
14140+ *ino = au_xino_new_ino(sb);
14141+ if (unlikely(!*ino))
1facf9fc 14142+ goto out;
4a4d8108
AM
14143+ err = au_xino_write(sb, bindex, h_ino, *ino);
14144+ if (unlikely(err))
1308ab2a 14145+ goto out;
1308ab2a 14146+ }
1facf9fc 14147+
4f0767ce 14148+out:
b752ccd1 14149+ if (mtx)
4a4d8108 14150+ mutex_unlock(mtx);
1facf9fc 14151+ return err;
14152+}
14153+
4a4d8108
AM
14154+/* successful returns with iinfo write_locked */
14155+/* todo: return with unlocked? */
14156+struct inode *au_new_inode(struct dentry *dentry, int must_new)
1facf9fc 14157+{
b752ccd1 14158+ struct inode *inode, *h_inode;
4a4d8108
AM
14159+ struct dentry *h_dentry;
14160+ struct super_block *sb;
b752ccd1 14161+ struct mutex *mtx;
4a4d8108 14162+ ino_t h_ino, ino;
027c5e7a 14163+ int err;
4a4d8108 14164+ aufs_bindex_t bstart;
1facf9fc 14165+
4a4d8108
AM
14166+ sb = dentry->d_sb;
14167+ bstart = au_dbstart(dentry);
14168+ h_dentry = au_h_dptr(dentry, bstart);
b752ccd1
AM
14169+ h_inode = h_dentry->d_inode;
14170+ h_ino = h_inode->i_ino;
14171+
14172+ /*
14173+ * stop 'race'-ing between hardlinks under different
14174+ * parents.
14175+ */
14176+ mtx = NULL;
14177+ if (!S_ISDIR(h_inode->i_mode))
14178+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
14179+
4f0767ce 14180+new_ino:
b752ccd1
AM
14181+ if (mtx)
14182+ mutex_lock(mtx);
4a4d8108
AM
14183+ err = au_xino_read(sb, bstart, h_ino, &ino);
14184+ inode = ERR_PTR(err);
14185+ if (unlikely(err))
14186+ goto out;
b752ccd1 14187+
4a4d8108
AM
14188+ if (!ino) {
14189+ ino = au_xino_new_ino(sb);
14190+ if (unlikely(!ino)) {
14191+ inode = ERR_PTR(-EIO);
dece6358
AM
14192+ goto out;
14193+ }
14194+ }
1facf9fc 14195+
4a4d8108
AM
14196+ AuDbg("i%lu\n", (unsigned long)ino);
14197+ inode = au_iget_locked(sb, ino);
14198+ err = PTR_ERR(inode);
14199+ if (IS_ERR(inode))
1facf9fc 14200+ goto out;
1facf9fc 14201+
4a4d8108
AM
14202+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
14203+ if (inode->i_state & I_NEW) {
14204+ ii_write_lock_new_child(inode);
14205+ err = set_inode(inode, dentry);
14206+ if (!err) {
14207+ unlock_new_inode(inode);
14208+ goto out; /* success */
14209+ }
1308ab2a 14210+
027c5e7a
AM
14211+ /*
14212+ * iget_failed() calls iput(), but we need to call
14213+ * ii_write_unlock() after iget_failed(). so dirty hack for
14214+ * i_count.
14215+ */
14216+ atomic_inc(&inode->i_count);
4a4d8108 14217+ iget_failed(inode);
027c5e7a
AM
14218+ ii_write_unlock(inode);
14219+ au_xino_write(sb, bstart, h_ino, /*ino*/0);
14220+ /* ignore this error */
14221+ goto out_iput;
14222+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
b752ccd1
AM
14223+ /*
14224+ * horrible race condition between lookup, readdir and copyup
14225+ * (or something).
14226+ */
14227+ if (mtx)
14228+ mutex_unlock(mtx);
027c5e7a
AM
14229+ err = reval_inode(inode, dentry);
14230+ if (unlikely(err < 0)) {
14231+ mtx = NULL;
14232+ goto out_iput;
14233+ }
14234+
b752ccd1
AM
14235+ if (!err) {
14236+ mtx = NULL;
4a4d8108 14237+ goto out; /* success */
b752ccd1
AM
14238+ } else if (mtx)
14239+ mutex_lock(mtx);
4a4d8108
AM
14240+ }
14241+
14242+ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
14243+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
14244+ " b%d, %s, %.*s, hi%lu, i%lu.\n",
14245+ bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
14246+ (unsigned long)h_ino, (unsigned long)ino);
14247+ ino = 0;
14248+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
14249+ if (!err) {
14250+ iput(inode);
b752ccd1
AM
14251+ if (mtx)
14252+ mutex_unlock(mtx);
4a4d8108
AM
14253+ goto new_ino;
14254+ }
1308ab2a 14255+
4f0767ce 14256+out_iput:
4a4d8108 14257+ iput(inode);
4a4d8108 14258+ inode = ERR_PTR(err);
4f0767ce 14259+out:
b752ccd1
AM
14260+ if (mtx)
14261+ mutex_unlock(mtx);
4a4d8108 14262+ return inode;
1facf9fc 14263+}
14264+
4a4d8108 14265+/* ---------------------------------------------------------------------- */
1facf9fc 14266+
4a4d8108
AM
14267+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
14268+ struct inode *inode)
14269+{
14270+ int err;
1facf9fc 14271+
4a4d8108 14272+ err = au_br_rdonly(au_sbr(sb, bindex));
1facf9fc 14273+
4a4d8108
AM
14274+ /* pseudo-link after flushed may happen out of bounds */
14275+ if (!err
14276+ && inode
14277+ && au_ibstart(inode) <= bindex
14278+ && bindex <= au_ibend(inode)) {
14279+ /*
14280+ * permission check is unnecessary since vfsub routine
14281+ * will be called later
14282+ */
14283+ struct inode *hi = au_h_iptr(inode, bindex);
14284+ if (hi)
14285+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
1facf9fc 14286+ }
14287+
4a4d8108
AM
14288+ return err;
14289+}
dece6358 14290+
4a4d8108
AM
14291+int au_test_h_perm(struct inode *h_inode, int mask)
14292+{
14293+ if (!current_fsuid())
14294+ return 0;
14295+ return inode_permission(h_inode, mask);
14296+}
1facf9fc 14297+
4a4d8108
AM
14298+int au_test_h_perm_sio(struct inode *h_inode, int mask)
14299+{
14300+ if (au_test_nfs(h_inode->i_sb)
14301+ && (mask & MAY_WRITE)
14302+ && S_ISDIR(h_inode->i_mode))
14303+ mask |= MAY_READ; /* force permission check */
14304+ return au_test_h_perm(h_inode, mask);
1facf9fc 14305+}
7f207e10
AM
14306diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
14307--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
14308+++ linux/fs/aufs/inode.h 2011-08-24 13:30:24.734646739 +0200
14309@@ -0,0 +1,556 @@
4a4d8108 14310+/*
027c5e7a 14311+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
14312+ *
14313+ * This program, aufs is free software; you can redistribute it and/or modify
14314+ * it under the terms of the GNU General Public License as published by
14315+ * the Free Software Foundation; either version 2 of the License, or
14316+ * (at your option) any later version.
14317+ *
14318+ * This program is distributed in the hope that it will be useful,
14319+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14320+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14321+ * GNU General Public License for more details.
14322+ *
14323+ * You should have received a copy of the GNU General Public License
14324+ * along with this program; if not, write to the Free Software
14325+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
14326+ */
1facf9fc 14327+
1308ab2a 14328+/*
4a4d8108 14329+ * inode operations
1308ab2a 14330+ */
dece6358 14331+
4a4d8108
AM
14332+#ifndef __AUFS_INODE_H__
14333+#define __AUFS_INODE_H__
dece6358 14334+
4a4d8108 14335+#ifdef __KERNEL__
1308ab2a 14336+
4a4d8108
AM
14337+#include <linux/fs.h>
14338+#include <linux/fsnotify.h>
14339+#include <linux/aufs_type.h>
14340+#include "rwsem.h"
1308ab2a 14341+
4a4d8108 14342+struct vfsmount;
1facf9fc 14343+
4a4d8108
AM
14344+struct au_hnotify {
14345+#ifdef CONFIG_AUFS_HNOTIFY
14346+#ifdef CONFIG_AUFS_HFSNOTIFY
7f207e10 14347+ /* never use fsnotify_add_vfsmount_mark() */
0c5527e5 14348+ struct fsnotify_mark hn_mark;
7f207e10 14349+ int hn_mark_dead;
4a4d8108 14350+#endif
7f207e10 14351+ struct inode *hn_aufs_inode; /* no get/put */
4a4d8108
AM
14352+#endif
14353+} ____cacheline_aligned_in_smp;
1facf9fc 14354+
4a4d8108
AM
14355+struct au_hinode {
14356+ struct inode *hi_inode;
14357+ aufs_bindex_t hi_id;
14358+#ifdef CONFIG_AUFS_HNOTIFY
14359+ struct au_hnotify *hi_notify;
14360+#endif
dece6358 14361+
4a4d8108
AM
14362+ /* reference to the copied-up whiteout with get/put */
14363+ struct dentry *hi_whdentry;
14364+};
dece6358 14365+
4a4d8108
AM
14366+struct au_vdir;
14367+struct au_iinfo {
14368+ atomic_t ii_generation;
14369+ struct super_block *ii_hsb1; /* no get/put */
1facf9fc 14370+
4a4d8108
AM
14371+ struct au_rwsem ii_rwsem;
14372+ aufs_bindex_t ii_bstart, ii_bend;
14373+ __u32 ii_higen;
14374+ struct au_hinode *ii_hinode;
14375+ struct au_vdir *ii_vdir;
14376+};
1facf9fc 14377+
4a4d8108
AM
14378+struct au_icntnr {
14379+ struct au_iinfo iinfo;
14380+ struct inode vfs_inode;
14381+} ____cacheline_aligned_in_smp;
1308ab2a 14382+
4a4d8108
AM
14383+/* au_pin flags */
14384+#define AuPin_DI_LOCKED 1
14385+#define AuPin_MNT_WRITE (1 << 1)
14386+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
7f207e10
AM
14387+#define au_fset_pin(flags, name) \
14388+ do { (flags) |= AuPin_##name; } while (0)
14389+#define au_fclr_pin(flags, name) \
14390+ do { (flags) &= ~AuPin_##name; } while (0)
4a4d8108
AM
14391+
14392+struct au_pin {
14393+ /* input */
14394+ struct dentry *dentry;
14395+ unsigned int udba;
14396+ unsigned char lsc_di, lsc_hi, flags;
14397+ aufs_bindex_t bindex;
14398+
14399+ /* output */
14400+ struct dentry *parent;
14401+ struct au_hinode *hdir;
14402+ struct vfsmount *h_mnt;
14403+};
1facf9fc 14404+
1308ab2a 14405+/* ---------------------------------------------------------------------- */
14406+
4a4d8108 14407+static inline struct au_iinfo *au_ii(struct inode *inode)
1facf9fc 14408+{
4a4d8108 14409+ struct au_iinfo *iinfo;
1facf9fc 14410+
4a4d8108
AM
14411+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
14412+ if (iinfo->ii_hinode)
14413+ return iinfo;
14414+ return NULL; /* debugging bad_inode case */
14415+}
1facf9fc 14416+
4a4d8108 14417+/* ---------------------------------------------------------------------- */
1facf9fc 14418+
4a4d8108
AM
14419+/* inode.c */
14420+struct inode *au_igrab(struct inode *inode);
027c5e7a 14421+int au_refresh_hinode_self(struct inode *inode);
4a4d8108
AM
14422+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
14423+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
14424+ unsigned int d_type, ino_t *ino);
14425+struct inode *au_new_inode(struct dentry *dentry, int must_new);
14426+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
14427+ struct inode *inode);
14428+int au_test_h_perm(struct inode *h_inode, int mask);
14429+int au_test_h_perm_sio(struct inode *h_inode, int mask);
1facf9fc 14430+
4a4d8108
AM
14431+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
14432+ ino_t h_ino, unsigned int d_type, ino_t *ino)
14433+{
14434+#ifdef CONFIG_AUFS_SHWH
14435+ return au_ino(sb, bindex, h_ino, d_type, ino);
14436+#else
14437+ return 0;
14438+#endif
14439+}
1facf9fc 14440+
4a4d8108
AM
14441+/* i_op.c */
14442+extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
1308ab2a 14443+
4a4d8108
AM
14444+/* au_wr_dir flags */
14445+#define AuWrDir_ADD_ENTRY 1
14446+#define AuWrDir_ISDIR (1 << 1)
14447+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
7f207e10
AM
14448+#define au_fset_wrdir(flags, name) \
14449+ do { (flags) |= AuWrDir_##name; } while (0)
14450+#define au_fclr_wrdir(flags, name) \
14451+ do { (flags) &= ~AuWrDir_##name; } while (0)
1facf9fc 14452+
4a4d8108
AM
14453+struct au_wr_dir_args {
14454+ aufs_bindex_t force_btgt;
14455+ unsigned char flags;
14456+};
14457+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
14458+ struct au_wr_dir_args *args);
dece6358 14459+
4a4d8108
AM
14460+struct dentry *au_pinned_h_parent(struct au_pin *pin);
14461+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
14462+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
14463+ unsigned int udba, unsigned char flags);
14464+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
14465+ unsigned int udba, unsigned char flags) __must_check;
14466+int au_do_pin(struct au_pin *pin) __must_check;
14467+void au_unpin(struct au_pin *pin);
1facf9fc 14468+
4a4d8108
AM
14469+/* i_op_add.c */
14470+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
14471+ struct dentry *h_parent, int isdir);
14472+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
14473+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
14474+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
14475+ struct nameidata *nd);
14476+int aufs_link(struct dentry *src_dentry, struct inode *dir,
14477+ struct dentry *dentry);
14478+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
1facf9fc 14479+
4a4d8108
AM
14480+/* i_op_del.c */
14481+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
14482+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
14483+ struct dentry *h_parent, int isdir);
14484+int aufs_unlink(struct inode *dir, struct dentry *dentry);
14485+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1308ab2a 14486+
4a4d8108
AM
14487+/* i_op_ren.c */
14488+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
14489+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
14490+ struct inode *dir, struct dentry *dentry);
1facf9fc 14491+
4a4d8108
AM
14492+/* iinfo.c */
14493+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
14494+void au_hiput(struct au_hinode *hinode);
14495+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
14496+ struct dentry *h_wh);
14497+unsigned int au_hi_flags(struct inode *inode, int isdir);
1308ab2a 14498+
4a4d8108
AM
14499+/* hinode flags */
14500+#define AuHi_XINO 1
14501+#define AuHi_HNOTIFY (1 << 1)
14502+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
7f207e10
AM
14503+#define au_fset_hi(flags, name) \
14504+ do { (flags) |= AuHi_##name; } while (0)
14505+#define au_fclr_hi(flags, name) \
14506+ do { (flags) &= ~AuHi_##name; } while (0)
1facf9fc 14507+
4a4d8108
AM
14508+#ifndef CONFIG_AUFS_HNOTIFY
14509+#undef AuHi_HNOTIFY
14510+#define AuHi_HNOTIFY 0
14511+#endif
1facf9fc 14512+
4a4d8108
AM
14513+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
14514+ struct inode *h_inode, unsigned int flags);
1facf9fc 14515+
4a4d8108
AM
14516+void au_update_iigen(struct inode *inode);
14517+void au_update_ibrange(struct inode *inode, int do_put_zero);
1facf9fc 14518+
4a4d8108
AM
14519+void au_icntnr_init_once(void *_c);
14520+int au_iinfo_init(struct inode *inode);
14521+void au_iinfo_fin(struct inode *inode);
14522+int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
1308ab2a 14523+
e49829fe 14524+#ifdef CONFIG_PROC_FS
4a4d8108 14525+/* plink.c */
e49829fe
JR
14526+int au_plink_maint(struct super_block *sb, int flags);
14527+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
14528+int au_plink_maint_enter(struct super_block *sb);
4a4d8108
AM
14529+#ifdef CONFIG_AUFS_DEBUG
14530+void au_plink_list(struct super_block *sb);
14531+#else
14532+AuStubVoid(au_plink_list, struct super_block *sb)
14533+#endif
14534+int au_plink_test(struct inode *inode);
14535+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
14536+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
14537+ struct dentry *h_dentry);
e49829fe
JR
14538+void au_plink_put(struct super_block *sb, int verbose);
14539+void au_plink_clean(struct super_block *sb, int verbose);
4a4d8108 14540+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
e49829fe
JR
14541+#else
14542+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
14543+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
14544+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
14545+AuStubVoid(au_plink_list, struct super_block *sb);
14546+AuStubInt0(au_plink_test, struct inode *inode);
14547+AuStub(struct dentry *, au_plink_lkup, return NULL,
14548+ struct inode *inode, aufs_bindex_t bindex);
14549+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
14550+ struct dentry *h_dentry);
14551+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
14552+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
14553+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
14554+#endif /* CONFIG_PROC_FS */
1facf9fc 14555+
4a4d8108 14556+/* ---------------------------------------------------------------------- */
1308ab2a 14557+
4a4d8108
AM
14558+/* lock subclass for iinfo */
14559+enum {
14560+ AuLsc_II_CHILD, /* child first */
14561+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
14562+ AuLsc_II_CHILD3, /* copyup dirs */
14563+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
14564+ AuLsc_II_PARENT2,
14565+ AuLsc_II_PARENT3, /* copyup dirs */
14566+ AuLsc_II_NEW_CHILD
14567+};
1308ab2a 14568+
1facf9fc 14569+/*
4a4d8108
AM
14570+ * ii_read_lock_child, ii_write_lock_child,
14571+ * ii_read_lock_child2, ii_write_lock_child2,
14572+ * ii_read_lock_child3, ii_write_lock_child3,
14573+ * ii_read_lock_parent, ii_write_lock_parent,
14574+ * ii_read_lock_parent2, ii_write_lock_parent2,
14575+ * ii_read_lock_parent3, ii_write_lock_parent3,
14576+ * ii_read_lock_new_child, ii_write_lock_new_child,
1facf9fc 14577+ */
4a4d8108
AM
14578+#define AuReadLockFunc(name, lsc) \
14579+static inline void ii_read_lock_##name(struct inode *i) \
14580+{ \
14581+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
14582+}
14583+
14584+#define AuWriteLockFunc(name, lsc) \
14585+static inline void ii_write_lock_##name(struct inode *i) \
14586+{ \
14587+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
14588+}
14589+
14590+#define AuRWLockFuncs(name, lsc) \
14591+ AuReadLockFunc(name, lsc) \
14592+ AuWriteLockFunc(name, lsc)
14593+
14594+AuRWLockFuncs(child, CHILD);
14595+AuRWLockFuncs(child2, CHILD2);
14596+AuRWLockFuncs(child3, CHILD3);
14597+AuRWLockFuncs(parent, PARENT);
14598+AuRWLockFuncs(parent2, PARENT2);
14599+AuRWLockFuncs(parent3, PARENT3);
14600+AuRWLockFuncs(new_child, NEW_CHILD);
14601+
14602+#undef AuReadLockFunc
14603+#undef AuWriteLockFunc
14604+#undef AuRWLockFuncs
1facf9fc 14605+
14606+/*
4a4d8108 14607+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
1facf9fc 14608+ */
4a4d8108 14609+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
1facf9fc 14610+
4a4d8108
AM
14611+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
14612+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
14613+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
1facf9fc 14614+
4a4d8108 14615+/* ---------------------------------------------------------------------- */
1308ab2a 14616+
027c5e7a
AM
14617+static inline void au_icntnr_init(struct au_icntnr *c)
14618+{
14619+#ifdef CONFIG_AUFS_DEBUG
14620+ c->vfs_inode.i_mode = 0;
14621+#endif
14622+}
14623+
4a4d8108
AM
14624+static inline unsigned int au_iigen(struct inode *inode)
14625+{
14626+ return atomic_read(&au_ii(inode)->ii_generation);
14627+}
1308ab2a 14628+
4a4d8108
AM
14629+/* tiny test for inode number */
14630+/* tmpfs generation is too rough */
14631+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
14632+{
14633+ struct au_iinfo *iinfo;
1308ab2a 14634+
4a4d8108
AM
14635+ iinfo = au_ii(inode);
14636+ AuRwMustAnyLock(&iinfo->ii_rwsem);
14637+ return !(iinfo->ii_hsb1 == h_inode->i_sb
14638+ && iinfo->ii_higen == h_inode->i_generation);
14639+}
1308ab2a 14640+
4a4d8108
AM
14641+static inline void au_iigen_dec(struct inode *inode)
14642+{
e49829fe 14643+ atomic_dec(&au_ii(inode)->ii_generation);
027c5e7a
AM
14644+}
14645+
14646+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
14647+{
14648+ int err;
14649+
14650+ err = 0;
14651+ if (unlikely(inode && au_iigen(inode) != sigen))
14652+ err = -EIO;
14653+
14654+ return err;
4a4d8108 14655+}
1308ab2a 14656+
4a4d8108 14657+/* ---------------------------------------------------------------------- */
1308ab2a 14658+
4a4d8108
AM
14659+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
14660+ aufs_bindex_t bindex)
14661+{
14662+ IiMustAnyLock(inode);
14663+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
14664+}
1308ab2a 14665+
4a4d8108
AM
14666+static inline aufs_bindex_t au_ibstart(struct inode *inode)
14667+{
14668+ IiMustAnyLock(inode);
14669+ return au_ii(inode)->ii_bstart;
14670+}
1308ab2a 14671+
4a4d8108
AM
14672+static inline aufs_bindex_t au_ibend(struct inode *inode)
14673+{
14674+ IiMustAnyLock(inode);
14675+ return au_ii(inode)->ii_bend;
14676+}
1308ab2a 14677+
4a4d8108
AM
14678+static inline struct au_vdir *au_ivdir(struct inode *inode)
14679+{
14680+ IiMustAnyLock(inode);
14681+ return au_ii(inode)->ii_vdir;
14682+}
1308ab2a 14683+
4a4d8108
AM
14684+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
14685+{
14686+ IiMustAnyLock(inode);
14687+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
14688+}
1308ab2a 14689+
4a4d8108 14690+static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 14691+{
4a4d8108
AM
14692+ IiMustWriteLock(inode);
14693+ au_ii(inode)->ii_bstart = bindex;
14694+}
1308ab2a 14695+
4a4d8108
AM
14696+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
14697+{
14698+ IiMustWriteLock(inode);
14699+ au_ii(inode)->ii_bend = bindex;
1308ab2a 14700+}
14701+
4a4d8108
AM
14702+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
14703+{
14704+ IiMustWriteLock(inode);
14705+ au_ii(inode)->ii_vdir = vdir;
14706+}
1facf9fc 14707+
4a4d8108 14708+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 14709+{
4a4d8108
AM
14710+ IiMustAnyLock(inode);
14711+ return au_ii(inode)->ii_hinode + bindex;
14712+}
dece6358 14713+
4a4d8108 14714+/* ---------------------------------------------------------------------- */
1facf9fc 14715+
4a4d8108
AM
14716+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
14717+{
14718+ if (pin)
14719+ return pin->parent;
14720+ return NULL;
1facf9fc 14721+}
14722+
4a4d8108 14723+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
1facf9fc 14724+{
4a4d8108
AM
14725+ if (pin && pin->hdir)
14726+ return pin->hdir->hi_inode;
14727+ return NULL;
1308ab2a 14728+}
1facf9fc 14729+
4a4d8108
AM
14730+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
14731+{
14732+ if (pin)
14733+ return pin->hdir;
14734+ return NULL;
14735+}
1facf9fc 14736+
4a4d8108 14737+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
1308ab2a 14738+{
4a4d8108
AM
14739+ if (pin)
14740+ pin->dentry = dentry;
14741+}
1308ab2a 14742+
4a4d8108
AM
14743+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
14744+ unsigned char lflag)
14745+{
14746+ if (pin) {
7f207e10 14747+ if (lflag)
4a4d8108 14748+ au_fset_pin(pin->flags, DI_LOCKED);
7f207e10 14749+ else
4a4d8108 14750+ au_fclr_pin(pin->flags, DI_LOCKED);
1308ab2a 14751+ }
4a4d8108
AM
14752+}
14753+
14754+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
14755+{
14756+ if (pin) {
14757+ dput(pin->parent);
14758+ pin->parent = dget(parent);
1facf9fc 14759+ }
4a4d8108 14760+}
1facf9fc 14761+
4a4d8108
AM
14762+/* ---------------------------------------------------------------------- */
14763+
027c5e7a 14764+struct au_branch;
4a4d8108
AM
14765+#ifdef CONFIG_AUFS_HNOTIFY
14766+struct au_hnotify_op {
14767+ void (*ctl)(struct au_hinode *hinode, int do_set);
027c5e7a
AM
14768+ int (*alloc)(struct au_hinode *hinode);
14769+ void (*free)(struct au_hinode *hinode);
4a4d8108
AM
14770+
14771+ void (*fin)(void);
14772+ int (*init)(void);
027c5e7a
AM
14773+
14774+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
14775+ void (*fin_br)(struct au_branch *br);
14776+ int (*init_br)(struct au_branch *br, int perm);
4a4d8108
AM
14777+};
14778+
14779+/* hnotify.c */
027c5e7a 14780+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
4a4d8108
AM
14781+void au_hn_free(struct au_hinode *hinode);
14782+void au_hn_ctl(struct au_hinode *hinode, int do_set);
14783+void au_hn_reset(struct inode *inode, unsigned int flags);
14784+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
14785+ struct qstr *h_child_qstr, struct inode *h_child_inode);
027c5e7a
AM
14786+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
14787+int au_hnotify_init_br(struct au_branch *br, int perm);
14788+void au_hnotify_fin_br(struct au_branch *br);
4a4d8108
AM
14789+int __init au_hnotify_init(void);
14790+void au_hnotify_fin(void);
14791+
7f207e10 14792+/* hfsnotify.c */
4a4d8108
AM
14793+extern const struct au_hnotify_op au_hnotify_op;
14794+
14795+static inline
14796+void au_hn_init(struct au_hinode *hinode)
14797+{
14798+ hinode->hi_notify = NULL;
1308ab2a 14799+}
14800+
53392da6
AM
14801+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
14802+{
14803+ return hinode->hi_notify;
14804+}
14805+
4a4d8108
AM
14806+#else
14807+static inline
14808+int au_hn_alloc(struct au_hinode *hinode __maybe_unused,
027c5e7a 14809+ struct inode *inode __maybe_unused)
1308ab2a 14810+{
4a4d8108
AM
14811+ return -EOPNOTSUPP;
14812+}
1308ab2a 14813+
53392da6
AM
14814+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
14815+{
14816+ return NULL;
14817+}
14818+
4a4d8108
AM
14819+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
14820+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
14821+ int do_set __maybe_unused)
14822+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
14823+ unsigned int flags __maybe_unused)
027c5e7a
AM
14824+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
14825+ struct au_branch *br __maybe_unused,
14826+ int perm __maybe_unused)
14827+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
14828+ int perm __maybe_unused)
14829+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
4a4d8108
AM
14830+AuStubInt0(__init au_hnotify_init, void)
14831+AuStubVoid(au_hnotify_fin, void)
14832+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
14833+#endif /* CONFIG_AUFS_HNOTIFY */
14834+
14835+static inline void au_hn_suspend(struct au_hinode *hdir)
14836+{
14837+ au_hn_ctl(hdir, /*do_set*/0);
1308ab2a 14838+}
14839+
4a4d8108 14840+static inline void au_hn_resume(struct au_hinode *hdir)
1308ab2a 14841+{
4a4d8108
AM
14842+ au_hn_ctl(hdir, /*do_set*/1);
14843+}
1308ab2a 14844+
4a4d8108
AM
14845+static inline void au_hn_imtx_lock(struct au_hinode *hdir)
14846+{
14847+ mutex_lock(&hdir->hi_inode->i_mutex);
14848+ au_hn_suspend(hdir);
14849+}
dece6358 14850+
4a4d8108
AM
14851+static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
14852+ unsigned int sc __maybe_unused)
14853+{
14854+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
14855+ au_hn_suspend(hdir);
1facf9fc 14856+}
1facf9fc 14857+
4a4d8108
AM
14858+static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
14859+{
14860+ au_hn_resume(hdir);
14861+ mutex_unlock(&hdir->hi_inode->i_mutex);
14862+}
14863+
14864+#endif /* __KERNEL__ */
14865+#endif /* __AUFS_INODE_H__ */
7f207e10
AM
14866diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
14867--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
14868+++ linux/fs/aufs/ioctl.c 2011-10-24 20:51:51.580466925 +0200
14869@@ -0,0 +1,197 @@
4a4d8108 14870+/*
027c5e7a 14871+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
14872+ *
14873+ * This program, aufs is free software; you can redistribute it and/or modify
14874+ * it under the terms of the GNU General Public License as published by
14875+ * the Free Software Foundation; either version 2 of the License, or
14876+ * (at your option) any later version.
14877+ *
14878+ * This program is distributed in the hope that it will be useful,
14879+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14880+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14881+ * GNU General Public License for more details.
14882+ *
14883+ * You should have received a copy of the GNU General Public License
14884+ * along with this program; if not, write to the Free Software
14885+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
14886+ */
14887+
14888+/*
14889+ * ioctl
14890+ * plink-management and readdir in userspace.
14891+ * assist the pathconf(3) wrapper library.
14892+ */
14893+
14894+#include <linux/file.h>
14895+#include "aufs.h"
14896+
1e00d052 14897+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
4a4d8108
AM
14898+{
14899+ int err, fd;
14900+ aufs_bindex_t wbi, bindex, bend;
14901+ struct file *h_file;
14902+ struct super_block *sb;
14903+ struct dentry *root;
1e00d052
AM
14904+ struct au_branch *br;
14905+ struct aufs_wbr_fd wbrfd = {
14906+ .oflags = au_dir_roflags,
14907+ .brid = -1
14908+ };
14909+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
14910+ | O_NOATIME | O_CLOEXEC;
4a4d8108 14911+
1e00d052
AM
14912+ AuDebugOn(wbrfd.oflags & ~valid);
14913+
14914+ if (arg) {
14915+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
14916+ if (unlikely(err)) {
14917+ err = -EFAULT;
14918+ goto out;
14919+ }
14920+
14921+ err = -EINVAL;
14922+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
14923+ wbrfd.oflags |= au_dir_roflags;
14924+ AuDbg("0%o\n", wbrfd.oflags);
14925+ if (unlikely(wbrfd.oflags & ~valid))
14926+ goto out;
14927+ }
14928+
14929+ fd = get_unused_fd();
14930+ err = fd;
14931+ if (unlikely(fd < 0))
4a4d8108 14932+ goto out;
4a4d8108 14933+
1e00d052 14934+ h_file = ERR_PTR(-EINVAL);
4a4d8108 14935+ wbi = 0;
1e00d052 14936+ br = NULL;
4a4d8108
AM
14937+ sb = path->dentry->d_sb;
14938+ root = sb->s_root;
14939+ aufs_read_lock(root, AuLock_IR);
1e00d052
AM
14940+ bend = au_sbend(sb);
14941+ if (wbrfd.brid >= 0) {
14942+ wbi = au_br_index(sb, wbrfd.brid);
14943+ if (unlikely(wbi < 0 || wbi > bend))
14944+ goto out_unlock;
14945+ }
14946+
14947+ h_file = ERR_PTR(-ENOENT);
14948+ br = au_sbr(sb, wbi);
14949+ if (!au_br_writable(br->br_perm)) {
14950+ if (arg)
14951+ goto out_unlock;
14952+
14953+ bindex = wbi + 1;
14954+ wbi = -1;
14955+ for (; bindex <= bend; bindex++) {
14956+ br = au_sbr(sb, bindex);
14957+ if (au_br_writable(br->br_perm)) {
4a4d8108 14958+ wbi = bindex;
1e00d052 14959+ br = au_sbr(sb, wbi);
4a4d8108
AM
14960+ break;
14961+ }
14962+ }
4a4d8108
AM
14963+ }
14964+ AuDbg("wbi %d\n", wbi);
1e00d052
AM
14965+ if (wbi >= 0)
14966+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL);
14967+
14968+out_unlock:
4a4d8108
AM
14969+ aufs_read_unlock(root, AuLock_IR);
14970+ err = PTR_ERR(h_file);
14971+ if (IS_ERR(h_file))
14972+ goto out_fd;
14973+
1e00d052 14974+ atomic_dec(&br->br_count); /* cf. au_h_open() */
4a4d8108
AM
14975+ fd_install(fd, h_file);
14976+ err = fd;
14977+ goto out; /* success */
14978+
4f0767ce 14979+out_fd:
4a4d8108 14980+ put_unused_fd(fd);
4f0767ce 14981+out:
1e00d052 14982+ AuTraceErr(err);
4a4d8108
AM
14983+ return err;
14984+}
14985+
14986+/* ---------------------------------------------------------------------- */
14987+
14988+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
14989+{
14990+ long err;
14991+
14992+ switch (cmd) {
4a4d8108
AM
14993+ case AUFS_CTL_RDU:
14994+ case AUFS_CTL_RDU_INO:
14995+ err = au_rdu_ioctl(file, cmd, arg);
14996+ break;
14997+
14998+ case AUFS_CTL_WBR_FD:
1e00d052 14999+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
15000+ break;
15001+
027c5e7a
AM
15002+ case AUFS_CTL_IBUSY:
15003+ err = au_ibusy_ioctl(file, arg);
15004+ break;
15005+
4a4d8108
AM
15006+ default:
15007+ /* do not call the lower */
15008+ AuDbg("0x%x\n", cmd);
15009+ err = -ENOTTY;
15010+ }
15011+
15012+ AuTraceErr(err);
15013+ return err;
15014+}
15015+
15016+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
15017+{
15018+ long err;
15019+
15020+ switch (cmd) {
15021+ case AUFS_CTL_WBR_FD:
1e00d052 15022+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
15023+ break;
15024+
15025+ default:
15026+ /* do not call the lower */
15027+ AuDbg("0x%x\n", cmd);
15028+ err = -ENOTTY;
15029+ }
15030+
15031+ AuTraceErr(err);
15032+ return err;
15033+}
b752ccd1
AM
15034+
15035+#ifdef CONFIG_COMPAT
15036+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
15037+ unsigned long arg)
15038+{
15039+ long err;
15040+
15041+ switch (cmd) {
15042+ case AUFS_CTL_RDU:
15043+ case AUFS_CTL_RDU_INO:
15044+ err = au_rdu_compat_ioctl(file, cmd, arg);
15045+ break;
15046+
027c5e7a
AM
15047+ case AUFS_CTL_IBUSY:
15048+ err = au_ibusy_compat_ioctl(file, arg);
15049+ break;
15050+
b752ccd1
AM
15051+ default:
15052+ err = aufs_ioctl_dir(file, cmd, arg);
15053+ }
15054+
15055+ AuTraceErr(err);
15056+ return err;
15057+}
15058+
15059+#if 0 /* unused yet */
15060+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
15061+ unsigned long arg)
15062+{
15063+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
15064+}
15065+#endif
15066+#endif
7f207e10
AM
15067diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
15068--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
1e00d052 15069+++ linux/fs/aufs/i_op_add.c 2011-10-24 20:51:51.580466925 +0200
2cbb1c4b 15070@@ -0,0 +1,711 @@
4a4d8108 15071+/*
027c5e7a 15072+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
15073+ *
15074+ * This program, aufs is free software; you can redistribute it and/or modify
15075+ * it under the terms of the GNU General Public License as published by
15076+ * the Free Software Foundation; either version 2 of the License, or
15077+ * (at your option) any later version.
15078+ *
15079+ * This program is distributed in the hope that it will be useful,
15080+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15081+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15082+ * GNU General Public License for more details.
15083+ *
15084+ * You should have received a copy of the GNU General Public License
15085+ * along with this program; if not, write to the Free Software
15086+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
15087+ */
15088+
15089+/*
15090+ * inode operations (add entry)
15091+ */
15092+
15093+#include "aufs.h"
15094+
15095+/*
15096+ * final procedure of adding a new entry, except link(2).
15097+ * remove whiteout, instantiate, copyup the parent dir's times and size
15098+ * and update version.
15099+ * if it failed, re-create the removed whiteout.
15100+ */
15101+static int epilog(struct inode *dir, aufs_bindex_t bindex,
15102+ struct dentry *wh_dentry, struct dentry *dentry)
15103+{
15104+ int err, rerr;
15105+ aufs_bindex_t bwh;
15106+ struct path h_path;
15107+ struct inode *inode, *h_dir;
15108+ struct dentry *wh;
15109+
15110+ bwh = -1;
15111+ if (wh_dentry) {
15112+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
15113+ IMustLock(h_dir);
15114+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
15115+ bwh = au_dbwh(dentry);
15116+ h_path.dentry = wh_dentry;
15117+ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
15118+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
15119+ dentry);
15120+ if (unlikely(err))
15121+ goto out;
15122+ }
15123+
15124+ inode = au_new_inode(dentry, /*must_new*/1);
15125+ if (!IS_ERR(inode)) {
15126+ d_instantiate(dentry, inode);
15127+ dir = dentry->d_parent->d_inode; /* dir inode is locked */
15128+ IMustLock(dir);
15129+ if (au_ibstart(dir) == au_dbstart(dentry))
15130+ au_cpup_attr_timesizes(dir);
15131+ dir->i_version++;
15132+ return 0; /* success */
15133+ }
15134+
15135+ err = PTR_ERR(inode);
15136+ if (!wh_dentry)
15137+ goto out;
15138+
15139+ /* revert */
15140+ /* dir inode is locked */
15141+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
15142+ rerr = PTR_ERR(wh);
15143+ if (IS_ERR(wh)) {
15144+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
15145+ AuDLNPair(dentry), err, rerr);
15146+ err = -EIO;
15147+ } else
15148+ dput(wh);
15149+
4f0767ce 15150+out:
4a4d8108
AM
15151+ return err;
15152+}
15153+
027c5e7a
AM
15154+static int au_d_may_add(struct dentry *dentry)
15155+{
15156+ int err;
15157+
15158+ err = 0;
15159+ if (unlikely(d_unhashed(dentry)))
15160+ err = -ENOENT;
15161+ if (unlikely(dentry->d_inode))
15162+ err = -EEXIST;
15163+ return err;
15164+}
15165+
4a4d8108
AM
15166+/*
15167+ * simple tests for the adding inode operations.
15168+ * following the checks in vfs, plus the parent-child relationship.
15169+ */
15170+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
15171+ struct dentry *h_parent, int isdir)
15172+{
15173+ int err;
15174+ umode_t h_mode;
15175+ struct dentry *h_dentry;
15176+ struct inode *h_inode;
15177+
15178+ err = -ENAMETOOLONG;
15179+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
15180+ goto out;
15181+
15182+ h_dentry = au_h_dptr(dentry, bindex);
15183+ h_inode = h_dentry->d_inode;
15184+ if (!dentry->d_inode) {
15185+ err = -EEXIST;
15186+ if (unlikely(h_inode))
15187+ goto out;
15188+ } else {
15189+ /* rename(2) case */
15190+ err = -EIO;
15191+ if (unlikely(!h_inode || !h_inode->i_nlink))
15192+ goto out;
15193+
15194+ h_mode = h_inode->i_mode;
15195+ if (!isdir) {
15196+ err = -EISDIR;
15197+ if (unlikely(S_ISDIR(h_mode)))
15198+ goto out;
15199+ } else if (unlikely(!S_ISDIR(h_mode))) {
15200+ err = -ENOTDIR;
15201+ goto out;
15202+ }
15203+ }
15204+
15205+ err = 0;
15206+ /* expected parent dir is locked */
15207+ if (unlikely(h_parent != h_dentry->d_parent))
15208+ err = -EIO;
15209+
4f0767ce 15210+out:
4a4d8108
AM
15211+ AuTraceErr(err);
15212+ return err;
15213+}
15214+
15215+/*
15216+ * initial procedure of adding a new entry.
15217+ * prepare writable branch and the parent dir, lock it,
15218+ * and lookup whiteout for the new entry.
15219+ */
15220+static struct dentry*
15221+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
15222+ struct dentry *src_dentry, struct au_pin *pin,
15223+ struct au_wr_dir_args *wr_dir_args)
15224+{
15225+ struct dentry *wh_dentry, *h_parent;
15226+ struct super_block *sb;
15227+ struct au_branch *br;
15228+ int err;
15229+ unsigned int udba;
15230+ aufs_bindex_t bcpup;
15231+
15232+ AuDbg("%.*s\n", AuDLNPair(dentry));
15233+
15234+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
15235+ bcpup = err;
15236+ wh_dentry = ERR_PTR(err);
15237+ if (unlikely(err < 0))
15238+ goto out;
15239+
15240+ sb = dentry->d_sb;
15241+ udba = au_opt_udba(sb);
15242+ err = au_pin(pin, dentry, bcpup, udba,
15243+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15244+ wh_dentry = ERR_PTR(err);
15245+ if (unlikely(err))
15246+ goto out;
15247+
15248+ h_parent = au_pinned_h_parent(pin);
15249+ if (udba != AuOpt_UDBA_NONE
15250+ && au_dbstart(dentry) == bcpup)
15251+ err = au_may_add(dentry, bcpup, h_parent,
15252+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
15253+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
15254+ err = -ENAMETOOLONG;
15255+ wh_dentry = ERR_PTR(err);
15256+ if (unlikely(err))
15257+ goto out_unpin;
15258+
15259+ br = au_sbr(sb, bcpup);
15260+ if (dt) {
15261+ struct path tmp = {
15262+ .dentry = h_parent,
15263+ .mnt = br->br_mnt
15264+ };
15265+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
15266+ }
15267+
15268+ wh_dentry = NULL;
15269+ if (bcpup != au_dbwh(dentry))
15270+ goto out; /* success */
15271+
15272+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
15273+
4f0767ce 15274+out_unpin:
4a4d8108
AM
15275+ if (IS_ERR(wh_dentry))
15276+ au_unpin(pin);
4f0767ce 15277+out:
4a4d8108
AM
15278+ return wh_dentry;
15279+}
15280+
15281+/* ---------------------------------------------------------------------- */
15282+
15283+enum { Mknod, Symlink, Creat };
15284+struct simple_arg {
15285+ int type;
15286+ union {
15287+ struct {
15288+ int mode;
15289+ struct nameidata *nd;
15290+ } c;
15291+ struct {
15292+ const char *symname;
15293+ } s;
15294+ struct {
15295+ int mode;
15296+ dev_t dev;
15297+ } m;
15298+ } u;
15299+};
15300+
15301+static int add_simple(struct inode *dir, struct dentry *dentry,
15302+ struct simple_arg *arg)
15303+{
15304+ int err;
15305+ aufs_bindex_t bstart;
15306+ unsigned char created;
15307+ struct au_dtime dt;
15308+ struct au_pin pin;
15309+ struct path h_path;
15310+ struct dentry *wh_dentry, *parent;
15311+ struct inode *h_dir;
15312+ struct au_wr_dir_args wr_dir_args = {
15313+ .force_btgt = -1,
15314+ .flags = AuWrDir_ADD_ENTRY
15315+ };
15316+
15317+ AuDbg("%.*s\n", AuDLNPair(dentry));
15318+ IMustLock(dir);
15319+
15320+ parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
15321+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
15322+ if (unlikely(err))
15323+ goto out;
15324+ err = au_d_may_add(dentry);
15325+ if (unlikely(err))
15326+ goto out_unlock;
4a4d8108
AM
15327+ di_write_lock_parent(parent);
15328+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
15329+ &wr_dir_args);
15330+ err = PTR_ERR(wh_dentry);
15331+ if (IS_ERR(wh_dentry))
027c5e7a 15332+ goto out_parent;
4a4d8108
AM
15333+
15334+ bstart = au_dbstart(dentry);
15335+ h_path.dentry = au_h_dptr(dentry, bstart);
15336+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
15337+ h_dir = au_pinned_h_dir(&pin);
15338+ switch (arg->type) {
15339+ case Creat:
15340+ err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
15341+ break;
15342+ case Symlink:
15343+ err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
15344+ break;
15345+ case Mknod:
15346+ err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
15347+ break;
15348+ default:
15349+ BUG();
15350+ }
15351+ created = !err;
15352+ if (!err)
15353+ err = epilog(dir, bstart, wh_dentry, dentry);
15354+
15355+ /* revert */
15356+ if (unlikely(created && err && h_path.dentry->d_inode)) {
15357+ int rerr;
15358+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
15359+ if (rerr) {
15360+ AuIOErr("%.*s revert failure(%d, %d)\n",
15361+ AuDLNPair(dentry), err, rerr);
15362+ err = -EIO;
15363+ }
15364+ au_dtime_revert(&dt);
4a4d8108
AM
15365+ }
15366+
15367+ au_unpin(&pin);
15368+ dput(wh_dentry);
15369+
027c5e7a
AM
15370+out_parent:
15371+ di_write_unlock(parent);
15372+out_unlock:
4a4d8108
AM
15373+ if (unlikely(err)) {
15374+ au_update_dbstart(dentry);
15375+ d_drop(dentry);
15376+ }
4a4d8108 15377+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 15378+out:
4a4d8108
AM
15379+ return err;
15380+}
15381+
15382+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
15383+{
15384+ struct simple_arg arg = {
15385+ .type = Mknod,
15386+ .u.m = {
15387+ .mode = mode,
15388+ .dev = dev
15389+ }
15390+ };
15391+ return add_simple(dir, dentry, &arg);
15392+}
15393+
15394+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
15395+{
15396+ struct simple_arg arg = {
15397+ .type = Symlink,
15398+ .u.s.symname = symname
15399+ };
15400+ return add_simple(dir, dentry, &arg);
15401+}
15402+
15403+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
15404+ struct nameidata *nd)
15405+{
15406+ struct simple_arg arg = {
15407+ .type = Creat,
15408+ .u.c = {
15409+ .mode = mode,
15410+ .nd = nd
15411+ }
15412+ };
15413+ return add_simple(dir, dentry, &arg);
15414+}
15415+
15416+/* ---------------------------------------------------------------------- */
15417+
15418+struct au_link_args {
15419+ aufs_bindex_t bdst, bsrc;
15420+ struct au_pin pin;
15421+ struct path h_path;
15422+ struct dentry *src_parent, *parent;
15423+};
15424+
15425+static int au_cpup_before_link(struct dentry *src_dentry,
15426+ struct au_link_args *a)
15427+{
15428+ int err;
15429+ struct dentry *h_src_dentry;
15430+ struct mutex *h_mtx;
15431+ struct file *h_file;
15432+
15433+ di_read_lock_parent(a->src_parent, AuLock_IR);
15434+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
15435+ if (unlikely(err))
15436+ goto out;
15437+
15438+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
15439+ h_mtx = &h_src_dentry->d_inode->i_mutex;
15440+ err = au_pin(&a->pin, src_dentry, a->bdst,
15441+ au_opt_udba(src_dentry->d_sb),
15442+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15443+ if (unlikely(err))
15444+ goto out;
15445+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15446+ h_file = au_h_open_pre(src_dentry, a->bsrc);
15447+ if (IS_ERR(h_file)) {
15448+ err = PTR_ERR(h_file);
15449+ h_file = NULL;
15450+ } else
1e00d052 15451+ err = au_sio_cpup_simple(src_dentry, a->bdst, -1,
4a4d8108
AM
15452+ AuCpup_DTIME /* | AuCpup_KEEPLINO */);
15453+ mutex_unlock(h_mtx);
15454+ au_h_open_post(src_dentry, a->bsrc, h_file);
15455+ au_unpin(&a->pin);
15456+
4f0767ce 15457+out:
4a4d8108
AM
15458+ di_read_unlock(a->src_parent, AuLock_IR);
15459+ return err;
15460+}
15461+
15462+static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
15463+{
15464+ int err;
15465+ unsigned char plink;
15466+ struct inode *h_inode, *inode;
15467+ struct dentry *h_src_dentry;
15468+ struct super_block *sb;
15469+ struct file *h_file;
15470+
15471+ plink = 0;
15472+ h_inode = NULL;
15473+ sb = src_dentry->d_sb;
15474+ inode = src_dentry->d_inode;
15475+ if (au_ibstart(inode) <= a->bdst)
15476+ h_inode = au_h_iptr(inode, a->bdst);
15477+ if (!h_inode || !h_inode->i_nlink) {
15478+ /* copyup src_dentry as the name of dentry. */
15479+ au_set_dbstart(src_dentry, a->bdst);
15480+ au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
15481+ h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
15482+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
15483+ h_file = au_h_open_pre(src_dentry, a->bsrc);
15484+ if (IS_ERR(h_file)) {
15485+ err = PTR_ERR(h_file);
15486+ h_file = NULL;
15487+ } else
15488+ err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc,
15489+ -1, AuCpup_KEEPLINO,
15490+ a->parent);
15491+ mutex_unlock(&h_inode->i_mutex);
15492+ au_h_open_post(src_dentry, a->bsrc, h_file);
15493+ au_set_h_dptr(src_dentry, a->bdst, NULL);
15494+ au_set_dbstart(src_dentry, a->bsrc);
15495+ } else {
15496+ /* the inode of src_dentry already exists on a.bdst branch */
15497+ h_src_dentry = d_find_alias(h_inode);
15498+ if (!h_src_dentry && au_plink_test(inode)) {
15499+ plink = 1;
15500+ h_src_dentry = au_plink_lkup(inode, a->bdst);
15501+ err = PTR_ERR(h_src_dentry);
15502+ if (IS_ERR(h_src_dentry))
15503+ goto out;
15504+
15505+ if (unlikely(!h_src_dentry->d_inode)) {
15506+ dput(h_src_dentry);
15507+ h_src_dentry = NULL;
15508+ }
15509+
15510+ }
15511+ if (h_src_dentry) {
15512+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
15513+ &a->h_path);
15514+ dput(h_src_dentry);
15515+ } else {
15516+ AuIOErr("no dentry found for hi%lu on b%d\n",
15517+ h_inode->i_ino, a->bdst);
15518+ err = -EIO;
15519+ }
15520+ }
15521+
15522+ if (!err && !plink)
15523+ au_plink_append(inode, a->bdst, a->h_path.dentry);
15524+
15525+out:
2cbb1c4b 15526+ AuTraceErr(err);
4a4d8108
AM
15527+ return err;
15528+}
15529+
15530+int aufs_link(struct dentry *src_dentry, struct inode *dir,
15531+ struct dentry *dentry)
15532+{
15533+ int err, rerr;
15534+ struct au_dtime dt;
15535+ struct au_link_args *a;
15536+ struct dentry *wh_dentry, *h_src_dentry;
15537+ struct inode *inode;
15538+ struct super_block *sb;
15539+ struct au_wr_dir_args wr_dir_args = {
15540+ /* .force_btgt = -1, */
15541+ .flags = AuWrDir_ADD_ENTRY
15542+ };
15543+
15544+ IMustLock(dir);
15545+ inode = src_dentry->d_inode;
15546+ IMustLock(inode);
15547+
4a4d8108
AM
15548+ err = -ENOMEM;
15549+ a = kzalloc(sizeof(*a), GFP_NOFS);
15550+ if (unlikely(!a))
15551+ goto out;
15552+
15553+ a->parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
15554+ err = aufs_read_and_write_lock2(dentry, src_dentry,
15555+ AuLock_NOPLM | AuLock_GEN);
e49829fe
JR
15556+ if (unlikely(err))
15557+ goto out_kfree;
027c5e7a
AM
15558+ err = au_d_hashed_positive(src_dentry);
15559+ if (unlikely(err))
15560+ goto out_unlock;
15561+ err = au_d_may_add(dentry);
15562+ if (unlikely(err))
15563+ goto out_unlock;
e49829fe 15564+
4a4d8108 15565+ a->src_parent = dget_parent(src_dentry);
2cbb1c4b 15566+ wr_dir_args.force_btgt = au_ibstart(inode);
4a4d8108
AM
15567+
15568+ di_write_lock_parent(a->parent);
15569+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
15570+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
15571+ &wr_dir_args);
15572+ err = PTR_ERR(wh_dentry);
15573+ if (IS_ERR(wh_dentry))
027c5e7a 15574+ goto out_parent;
4a4d8108
AM
15575+
15576+ err = 0;
15577+ sb = dentry->d_sb;
15578+ a->bdst = au_dbstart(dentry);
15579+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
15580+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
2cbb1c4b
JR
15581+ a->bsrc = au_ibstart(inode);
15582+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
15583+ if (!h_src_dentry) {
15584+ a->bsrc = au_dbstart(src_dentry);
15585+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
15586+ AuDebugOn(!h_src_dentry);
15587+ } else if (IS_ERR(h_src_dentry))
15588+ goto out_parent;
15589+
4a4d8108
AM
15590+ if (au_opt_test(au_mntflags(sb), PLINK)) {
15591+ if (a->bdst < a->bsrc
15592+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
15593+ err = au_cpup_or_link(src_dentry, a);
2cbb1c4b 15594+ else
4a4d8108
AM
15595+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
15596+ &a->h_path);
2cbb1c4b 15597+ dput(h_src_dentry);
4a4d8108
AM
15598+ } else {
15599+ /*
15600+ * copyup src_dentry to the branch we process,
15601+ * and then link(2) to it.
15602+ */
2cbb1c4b 15603+ dput(h_src_dentry);
4a4d8108
AM
15604+ if (a->bdst < a->bsrc
15605+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
15606+ au_unpin(&a->pin);
15607+ di_write_unlock(a->parent);
15608+ err = au_cpup_before_link(src_dentry, a);
15609+ di_write_lock_parent(a->parent);
15610+ if (!err)
15611+ err = au_pin(&a->pin, dentry, a->bdst,
15612+ au_opt_udba(sb),
15613+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15614+ if (unlikely(err))
15615+ goto out_wh;
15616+ }
15617+ if (!err) {
15618+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
15619+ err = -ENOENT;
15620+ if (h_src_dentry && h_src_dentry->d_inode)
15621+ err = vfsub_link(h_src_dentry,
15622+ au_pinned_h_dir(&a->pin),
15623+ &a->h_path);
15624+ }
15625+ }
15626+ if (unlikely(err))
15627+ goto out_unpin;
15628+
15629+ if (wh_dentry) {
15630+ a->h_path.dentry = wh_dentry;
15631+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
15632+ dentry);
15633+ if (unlikely(err))
15634+ goto out_revert;
15635+ }
15636+
15637+ dir->i_version++;
15638+ if (au_ibstart(dir) == au_dbstart(dentry))
15639+ au_cpup_attr_timesizes(dir);
15640+ inc_nlink(inode);
15641+ inode->i_ctime = dir->i_ctime;
027c5e7a
AM
15642+ d_instantiate(dentry, au_igrab(inode));
15643+ if (d_unhashed(a->h_path.dentry))
4a4d8108
AM
15644+ /* some filesystem calls d_drop() */
15645+ d_drop(dentry);
15646+ goto out_unpin; /* success */
15647+
4f0767ce 15648+out_revert:
4a4d8108 15649+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
027c5e7a
AM
15650+ if (unlikely(rerr)) {
15651+ AuIOErr("%.*s reverting failed(%d, %d)\n",
15652+ AuDLNPair(dentry), err, rerr);
15653+ err = -EIO;
15654+ }
4a4d8108 15655+ au_dtime_revert(&dt);
4f0767ce 15656+out_unpin:
4a4d8108 15657+ au_unpin(&a->pin);
4f0767ce 15658+out_wh:
4a4d8108 15659+ dput(wh_dentry);
027c5e7a
AM
15660+out_parent:
15661+ di_write_unlock(a->parent);
15662+ dput(a->src_parent);
4f0767ce 15663+out_unlock:
4a4d8108
AM
15664+ if (unlikely(err)) {
15665+ au_update_dbstart(dentry);
15666+ d_drop(dentry);
15667+ }
4a4d8108 15668+ aufs_read_and_write_unlock2(dentry, src_dentry);
e49829fe 15669+out_kfree:
4a4d8108 15670+ kfree(a);
4f0767ce 15671+out:
4a4d8108
AM
15672+ return err;
15673+}
15674+
15675+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
15676+{
15677+ int err, rerr;
15678+ aufs_bindex_t bindex;
15679+ unsigned char diropq;
15680+ struct path h_path;
15681+ struct dentry *wh_dentry, *parent, *opq_dentry;
15682+ struct mutex *h_mtx;
15683+ struct super_block *sb;
15684+ struct {
15685+ struct au_pin pin;
15686+ struct au_dtime dt;
15687+ } *a; /* reduce the stack usage */
15688+ struct au_wr_dir_args wr_dir_args = {
15689+ .force_btgt = -1,
15690+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
15691+ };
15692+
15693+ IMustLock(dir);
15694+
15695+ err = -ENOMEM;
15696+ a = kmalloc(sizeof(*a), GFP_NOFS);
15697+ if (unlikely(!a))
15698+ goto out;
15699+
027c5e7a
AM
15700+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
15701+ if (unlikely(err))
15702+ goto out_free;
15703+ err = au_d_may_add(dentry);
15704+ if (unlikely(err))
15705+ goto out_unlock;
15706+
4a4d8108
AM
15707+ parent = dentry->d_parent; /* dir inode is locked */
15708+ di_write_lock_parent(parent);
15709+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
15710+ &a->pin, &wr_dir_args);
15711+ err = PTR_ERR(wh_dentry);
15712+ if (IS_ERR(wh_dentry))
027c5e7a 15713+ goto out_parent;
4a4d8108
AM
15714+
15715+ sb = dentry->d_sb;
15716+ bindex = au_dbstart(dentry);
15717+ h_path.dentry = au_h_dptr(dentry, bindex);
15718+ h_path.mnt = au_sbr_mnt(sb, bindex);
15719+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
15720+ if (unlikely(err))
027c5e7a 15721+ goto out_unpin;
4a4d8108
AM
15722+
15723+ /* make the dir opaque */
15724+ diropq = 0;
15725+ h_mtx = &h_path.dentry->d_inode->i_mutex;
15726+ if (wh_dentry
15727+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
15728+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15729+ opq_dentry = au_diropq_create(dentry, bindex);
15730+ mutex_unlock(h_mtx);
15731+ err = PTR_ERR(opq_dentry);
15732+ if (IS_ERR(opq_dentry))
15733+ goto out_dir;
15734+ dput(opq_dentry);
15735+ diropq = 1;
15736+ }
15737+
15738+ err = epilog(dir, bindex, wh_dentry, dentry);
15739+ if (!err) {
15740+ inc_nlink(dir);
027c5e7a 15741+ goto out_unpin; /* success */
4a4d8108
AM
15742+ }
15743+
15744+ /* revert */
15745+ if (diropq) {
15746+ AuLabel(revert opq);
15747+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15748+ rerr = au_diropq_remove(dentry, bindex);
15749+ mutex_unlock(h_mtx);
15750+ if (rerr) {
15751+ AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
15752+ AuDLNPair(dentry), err, rerr);
15753+ err = -EIO;
15754+ }
15755+ }
15756+
4f0767ce 15757+out_dir:
4a4d8108
AM
15758+ AuLabel(revert dir);
15759+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
15760+ if (rerr) {
15761+ AuIOErr("%.*s reverting dir failed(%d, %d)\n",
15762+ AuDLNPair(dentry), err, rerr);
15763+ err = -EIO;
15764+ }
4a4d8108 15765+ au_dtime_revert(&a->dt);
027c5e7a 15766+out_unpin:
4a4d8108
AM
15767+ au_unpin(&a->pin);
15768+ dput(wh_dentry);
027c5e7a
AM
15769+out_parent:
15770+ di_write_unlock(parent);
15771+out_unlock:
4a4d8108
AM
15772+ if (unlikely(err)) {
15773+ au_update_dbstart(dentry);
15774+ d_drop(dentry);
15775+ }
4a4d8108 15776+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 15777+out_free:
4a4d8108 15778+ kfree(a);
4f0767ce 15779+out:
4a4d8108
AM
15780+ return err;
15781+}
7f207e10
AM
15782diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
15783--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
15784+++ linux/fs/aufs/i_op.c 2011-10-24 20:52:23.677857076 +0200
15785@@ -0,0 +1,974 @@
4a4d8108 15786+/*
027c5e7a 15787+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
15788+ *
15789+ * This program, aufs is free software; you can redistribute it and/or modify
15790+ * it under the terms of the GNU General Public License as published by
15791+ * the Free Software Foundation; either version 2 of the License, or
15792+ * (at your option) any later version.
15793+ *
15794+ * This program is distributed in the hope that it will be useful,
15795+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15796+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15797+ * GNU General Public License for more details.
15798+ *
15799+ * You should have received a copy of the GNU General Public License
15800+ * along with this program; if not, write to the Free Software
15801+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
15802+ */
1facf9fc 15803+
1308ab2a 15804+/*
4a4d8108 15805+ * inode operations (except add/del/rename)
1308ab2a 15806+ */
4a4d8108
AM
15807+
15808+#include <linux/device_cgroup.h>
15809+#include <linux/fs_stack.h>
15810+#include <linux/mm.h>
15811+#include <linux/namei.h>
15812+#include <linux/security.h>
15813+#include <linux/uaccess.h>
15814+#include "aufs.h"
15815+
1e00d052 15816+static int h_permission(struct inode *h_inode, int mask,
4a4d8108 15817+ struct vfsmount *h_mnt, int brperm)
1facf9fc 15818+{
1308ab2a 15819+ int err;
4a4d8108 15820+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
1facf9fc 15821+
4a4d8108
AM
15822+ err = -EACCES;
15823+ if ((write_mask && IS_IMMUTABLE(h_inode))
15824+ || ((mask & MAY_EXEC)
15825+ && S_ISREG(h_inode->i_mode)
15826+ && ((h_mnt->mnt_flags & MNT_NOEXEC)
15827+ || !(h_inode->i_mode & S_IXUGO))))
15828+ goto out;
15829+
15830+ /*
15831+ * - skip the lower fs test in the case of write to ro branch.
15832+ * - nfs dir permission write check is optimized, but a policy for
15833+ * link/rename requires a real check.
15834+ */
15835+ if ((write_mask && !au_br_writable(brperm))
15836+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
15837+ && write_mask && !(mask & MAY_READ))
15838+ || !h_inode->i_op->permission) {
15839+ /* AuLabel(generic_permission); */
1e00d052 15840+ err = generic_permission(h_inode, mask);
1308ab2a 15841+ } else {
4a4d8108 15842+ /* AuLabel(h_inode->permission); */
1e00d052 15843+ err = h_inode->i_op->permission(h_inode, mask);
4a4d8108
AM
15844+ AuTraceErr(err);
15845+ }
1facf9fc 15846+
4a4d8108
AM
15847+ if (!err)
15848+ err = devcgroup_inode_permission(h_inode, mask);
7f207e10 15849+ if (!err)
4a4d8108 15850+ err = security_inode_permission(h_inode, mask);
4a4d8108
AM
15851+
15852+#if 0
15853+ if (!err) {
15854+ /* todo: do we need to call ima_path_check()? */
15855+ struct path h_path = {
15856+ .dentry =
15857+ .mnt = h_mnt
15858+ };
15859+ err = ima_path_check(&h_path,
15860+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
15861+ IMA_COUNT_LEAVE);
1308ab2a 15862+ }
4a4d8108 15863+#endif
dece6358 15864+
4f0767ce 15865+out:
1308ab2a 15866+ return err;
15867+}
dece6358 15868+
1e00d052 15869+static int aufs_permission(struct inode *inode, int mask)
1308ab2a 15870+{
15871+ int err;
4a4d8108
AM
15872+ aufs_bindex_t bindex, bend;
15873+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
15874+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
15875+ struct inode *h_inode;
15876+ struct super_block *sb;
15877+ struct au_branch *br;
1facf9fc 15878+
027c5e7a 15879+ /* todo: support rcu-walk? */
1e00d052 15880+ if (mask & MAY_NOT_BLOCK)
027c5e7a
AM
15881+ return -ECHILD;
15882+
4a4d8108
AM
15883+ sb = inode->i_sb;
15884+ si_read_lock(sb, AuLock_FLUSH);
15885+ ii_read_lock_child(inode);
027c5e7a
AM
15886+#if 0
15887+ err = au_iigen_test(inode, au_sigen(sb));
15888+ if (unlikely(err))
15889+ goto out;
15890+#endif
dece6358 15891+
4a4d8108
AM
15892+ if (!isdir || write_mask) {
15893+ err = au_busy_or_stale();
15894+ h_inode = au_h_iptr(inode, au_ibstart(inode));
15895+ if (unlikely(!h_inode
15896+ || (h_inode->i_mode & S_IFMT)
15897+ != (inode->i_mode & S_IFMT)))
15898+ goto out;
1facf9fc 15899+
4a4d8108
AM
15900+ err = 0;
15901+ bindex = au_ibstart(inode);
15902+ br = au_sbr(sb, bindex);
1e00d052 15903+ err = h_permission(h_inode, mask, br->br_mnt, br->br_perm);
4a4d8108
AM
15904+ if (write_mask
15905+ && !err
15906+ && !special_file(h_inode->i_mode)) {
15907+ /* test whether the upper writable branch exists */
15908+ err = -EROFS;
15909+ for (; bindex >= 0; bindex--)
15910+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
15911+ err = 0;
15912+ break;
15913+ }
15914+ }
15915+ goto out;
15916+ }
dece6358 15917+
4a4d8108 15918+ /* non-write to dir */
1308ab2a 15919+ err = 0;
4a4d8108
AM
15920+ bend = au_ibend(inode);
15921+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
15922+ h_inode = au_h_iptr(inode, bindex);
15923+ if (h_inode) {
15924+ err = au_busy_or_stale();
15925+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
15926+ break;
15927+
15928+ br = au_sbr(sb, bindex);
1e00d052 15929+ err = h_permission(h_inode, mask, br->br_mnt,
4a4d8108
AM
15930+ br->br_perm);
15931+ }
15932+ }
1308ab2a 15933+
4f0767ce 15934+out:
4a4d8108
AM
15935+ ii_read_unlock(inode);
15936+ si_read_unlock(sb);
1308ab2a 15937+ return err;
15938+}
15939+
4a4d8108 15940+/* ---------------------------------------------------------------------- */
1facf9fc 15941+
4a4d8108
AM
15942+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
15943+ struct nameidata *nd)
15944+{
15945+ struct dentry *ret, *parent;
b752ccd1 15946+ struct inode *inode;
4a4d8108
AM
15947+ struct super_block *sb;
15948+ int err, npositive;
dece6358 15949+
4a4d8108 15950+ IMustLock(dir);
1308ab2a 15951+
4a4d8108 15952+ sb = dir->i_sb;
7f207e10
AM
15953+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
15954+ ret = ERR_PTR(err);
15955+ if (unlikely(err))
15956+ goto out;
15957+
4a4d8108
AM
15958+ ret = ERR_PTR(-ENAMETOOLONG);
15959+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
7f207e10 15960+ goto out_si;
4a4d8108
AM
15961+ err = au_di_init(dentry);
15962+ ret = ERR_PTR(err);
15963+ if (unlikely(err))
7f207e10 15964+ goto out_si;
1308ab2a 15965+
027c5e7a 15966+ npositive = 0; /* suppress a warning */
4a4d8108
AM
15967+ parent = dentry->d_parent; /* dir inode is locked */
15968+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
15969+ err = au_alive_dir(parent);
15970+ if (!err)
15971+ err = au_digen_test(parent, au_sigen(sb));
15972+ if (!err) {
15973+ npositive = au_lkup_dentry(dentry, au_dbstart(parent),
15974+ /*type*/0, nd);
15975+ err = npositive;
15976+ }
4a4d8108 15977+ di_read_unlock(parent, AuLock_IR);
4a4d8108
AM
15978+ ret = ERR_PTR(err);
15979+ if (unlikely(err < 0))
15980+ goto out_unlock;
1308ab2a 15981+
4a4d8108
AM
15982+ inode = NULL;
15983+ if (npositive) {
b752ccd1 15984+ inode = au_new_inode(dentry, /*must_new*/0);
4a4d8108 15985+ ret = (void *)inode;
1facf9fc 15986+ }
4a4d8108
AM
15987+ if (IS_ERR(inode))
15988+ goto out_unlock;
15989+
15990+ ret = d_splice_alias(inode, dentry);
7f207e10 15991+ if (unlikely(IS_ERR(ret) && inode)) {
4a4d8108 15992+ ii_write_unlock(inode);
7f207e10
AM
15993+ iput(inode);
15994+ }
1facf9fc 15995+
4f0767ce 15996+out_unlock:
4a4d8108 15997+ di_write_unlock(dentry);
7f207e10 15998+out_si:
4a4d8108 15999+ si_read_unlock(sb);
7f207e10 16000+out:
4a4d8108
AM
16001+ return ret;
16002+}
1facf9fc 16003+
4a4d8108 16004+/* ---------------------------------------------------------------------- */
1facf9fc 16005+
4a4d8108
AM
16006+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
16007+ const unsigned char add_entry, aufs_bindex_t bcpup,
16008+ aufs_bindex_t bstart)
16009+{
16010+ int err;
16011+ struct dentry *h_parent;
16012+ struct inode *h_dir;
1facf9fc 16013+
027c5e7a 16014+ if (add_entry)
4a4d8108 16015+ IMustLock(parent->d_inode);
027c5e7a 16016+ else
4a4d8108
AM
16017+ di_write_lock_parent(parent);
16018+
16019+ err = 0;
16020+ if (!au_h_dptr(parent, bcpup)) {
16021+ if (bstart < bcpup)
16022+ err = au_cpdown_dirs(dentry, bcpup);
16023+ else
16024+ err = au_cpup_dirs(dentry, bcpup);
16025+ }
16026+ if (!err && add_entry) {
16027+ h_parent = au_h_dptr(parent, bcpup);
16028+ h_dir = h_parent->d_inode;
16029+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
16030+ err = au_lkup_neg(dentry, bcpup);
16031+ /* todo: no unlock here */
16032+ mutex_unlock(&h_dir->i_mutex);
027c5e7a
AM
16033+
16034+ AuDbg("bcpup %d\n", bcpup);
16035+ if (!err) {
16036+ if (!dentry->d_inode)
16037+ au_set_h_dptr(dentry, bstart, NULL);
4a4d8108
AM
16038+ au_update_dbrange(dentry, /*do_put_zero*/0);
16039+ }
1308ab2a 16040+ }
1facf9fc 16041+
4a4d8108
AM
16042+ if (!add_entry)
16043+ di_write_unlock(parent);
16044+ if (!err)
16045+ err = bcpup; /* success */
1308ab2a 16046+
027c5e7a 16047+ AuTraceErr(err);
4a4d8108
AM
16048+ return err;
16049+}
1facf9fc 16050+
4a4d8108
AM
16051+/*
16052+ * decide the branch and the parent dir where we will create a new entry.
16053+ * returns new bindex or an error.
16054+ * copyup the parent dir if needed.
16055+ */
16056+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
16057+ struct au_wr_dir_args *args)
16058+{
16059+ int err;
16060+ aufs_bindex_t bcpup, bstart, src_bstart;
16061+ const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
16062+ ADD_ENTRY);
16063+ struct super_block *sb;
16064+ struct dentry *parent;
16065+ struct au_sbinfo *sbinfo;
1facf9fc 16066+
4a4d8108
AM
16067+ sb = dentry->d_sb;
16068+ sbinfo = au_sbi(sb);
16069+ parent = dget_parent(dentry);
16070+ bstart = au_dbstart(dentry);
16071+ bcpup = bstart;
16072+ if (args->force_btgt < 0) {
16073+ if (src_dentry) {
16074+ src_bstart = au_dbstart(src_dentry);
16075+ if (src_bstart < bstart)
16076+ bcpup = src_bstart;
16077+ } else if (add_entry) {
16078+ err = AuWbrCreate(sbinfo, dentry,
16079+ au_ftest_wrdir(args->flags, ISDIR));
16080+ bcpup = err;
16081+ }
1facf9fc 16082+
4a4d8108
AM
16083+ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
16084+ if (add_entry)
16085+ err = AuWbrCopyup(sbinfo, dentry);
16086+ else {
16087+ if (!IS_ROOT(dentry)) {
16088+ di_read_lock_parent(parent, !AuLock_IR);
16089+ err = AuWbrCopyup(sbinfo, dentry);
16090+ di_read_unlock(parent, !AuLock_IR);
16091+ } else
16092+ err = AuWbrCopyup(sbinfo, dentry);
16093+ }
16094+ bcpup = err;
16095+ if (unlikely(err < 0))
16096+ goto out;
16097+ }
16098+ } else {
16099+ bcpup = args->force_btgt;
16100+ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
1308ab2a 16101+ }
027c5e7a 16102+
4a4d8108
AM
16103+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
16104+ err = bcpup;
16105+ if (bcpup == bstart)
16106+ goto out; /* success */
4a4d8108
AM
16107+
16108+ /* copyup the new parent into the branch we process */
16109+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
027c5e7a
AM
16110+ if (err >= 0) {
16111+ if (!dentry->d_inode) {
16112+ au_set_h_dptr(dentry, bstart, NULL);
16113+ au_set_dbstart(dentry, bcpup);
16114+ au_set_dbend(dentry, bcpup);
16115+ }
16116+ AuDebugOn(add_entry && !au_h_dptr(dentry, bcpup));
16117+ }
4a4d8108 16118+
4f0767ce 16119+out:
4a4d8108 16120+ dput(parent);
dece6358
AM
16121+ return err;
16122+}
1facf9fc 16123+
1308ab2a 16124+/* ---------------------------------------------------------------------- */
16125+
4a4d8108 16126+struct dentry *au_pinned_h_parent(struct au_pin *pin)
1308ab2a 16127+{
4a4d8108
AM
16128+ if (pin && pin->parent)
16129+ return au_h_dptr(pin->parent, pin->bindex);
16130+ return NULL;
dece6358 16131+}
1facf9fc 16132+
4a4d8108 16133+void au_unpin(struct au_pin *p)
dece6358 16134+{
e49829fe 16135+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
4a4d8108
AM
16136+ mnt_drop_write(p->h_mnt);
16137+ if (!p->hdir)
16138+ return;
1facf9fc 16139+
4a4d8108
AM
16140+ au_hn_imtx_unlock(p->hdir);
16141+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16142+ di_read_unlock(p->parent, AuLock_IR);
16143+ iput(p->hdir->hi_inode);
16144+ dput(p->parent);
16145+ p->parent = NULL;
16146+ p->hdir = NULL;
16147+ p->h_mnt = NULL;
16148+}
1308ab2a 16149+
4a4d8108
AM
16150+int au_do_pin(struct au_pin *p)
16151+{
16152+ int err;
16153+ struct super_block *sb;
16154+ struct dentry *h_dentry, *h_parent;
16155+ struct au_branch *br;
16156+ struct inode *h_dir;
16157+
16158+ err = 0;
16159+ sb = p->dentry->d_sb;
16160+ br = au_sbr(sb, p->bindex);
16161+ if (IS_ROOT(p->dentry)) {
16162+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
16163+ p->h_mnt = br->br_mnt;
16164+ err = mnt_want_write(p->h_mnt);
16165+ if (unlikely(err)) {
16166+ au_fclr_pin(p->flags, MNT_WRITE);
16167+ goto out_err;
16168+ }
16169+ }
dece6358 16170+ goto out;
1facf9fc 16171+ }
16172+
4a4d8108
AM
16173+ h_dentry = NULL;
16174+ if (p->bindex <= au_dbend(p->dentry))
16175+ h_dentry = au_h_dptr(p->dentry, p->bindex);
dece6358 16176+
4a4d8108
AM
16177+ p->parent = dget_parent(p->dentry);
16178+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16179+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
dece6358 16180+
4a4d8108
AM
16181+ h_dir = NULL;
16182+ h_parent = au_h_dptr(p->parent, p->bindex);
16183+ p->hdir = au_hi(p->parent->d_inode, p->bindex);
16184+ if (p->hdir)
16185+ h_dir = p->hdir->hi_inode;
dece6358 16186+
b752ccd1
AM
16187+ /*
16188+ * udba case, or
16189+ * if DI_LOCKED is not set, then p->parent may be different
16190+ * and h_parent can be NULL.
16191+ */
16192+ if (unlikely(!p->hdir || !h_dir || !h_parent)) {
e49829fe 16193+ err = -EBUSY;
4a4d8108
AM
16194+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16195+ di_read_unlock(p->parent, AuLock_IR);
16196+ dput(p->parent);
16197+ p->parent = NULL;
16198+ goto out_err;
16199+ }
1308ab2a 16200+
4a4d8108
AM
16201+ au_igrab(h_dir);
16202+ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
1308ab2a 16203+
4a4d8108
AM
16204+ if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) {
16205+ err = -EBUSY;
16206+ goto out_unpin;
16207+ }
16208+ if (h_dentry) {
16209+ err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
16210+ if (unlikely(err)) {
16211+ au_fclr_pin(p->flags, MNT_WRITE);
16212+ goto out_unpin;
16213+ }
1facf9fc 16214+ }
dece6358 16215+
4a4d8108
AM
16216+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
16217+ p->h_mnt = br->br_mnt;
16218+ err = mnt_want_write(p->h_mnt);
dece6358 16219+ if (unlikely(err)) {
4a4d8108
AM
16220+ au_fclr_pin(p->flags, MNT_WRITE);
16221+ goto out_unpin;
dece6358
AM
16222+ }
16223+ }
4a4d8108
AM
16224+ goto out; /* success */
16225+
4f0767ce 16226+out_unpin:
4a4d8108 16227+ au_unpin(p);
4f0767ce 16228+out_err:
4a4d8108
AM
16229+ pr_err("err %d\n", err);
16230+ err = au_busy_or_stale();
4f0767ce 16231+out:
1facf9fc 16232+ return err;
16233+}
16234+
4a4d8108
AM
16235+void au_pin_init(struct au_pin *p, struct dentry *dentry,
16236+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
16237+ unsigned int udba, unsigned char flags)
16238+{
16239+ p->dentry = dentry;
16240+ p->udba = udba;
16241+ p->lsc_di = lsc_di;
16242+ p->lsc_hi = lsc_hi;
16243+ p->flags = flags;
16244+ p->bindex = bindex;
16245+
16246+ p->parent = NULL;
16247+ p->hdir = NULL;
16248+ p->h_mnt = NULL;
16249+}
16250+
16251+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
16252+ unsigned int udba, unsigned char flags)
16253+{
16254+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
16255+ udba, flags);
16256+ return au_do_pin(pin);
16257+}
16258+
dece6358
AM
16259+/* ---------------------------------------------------------------------- */
16260+
1308ab2a 16261+/*
4a4d8108
AM
16262+ * ->setattr() and ->getattr() are called in various cases.
16263+ * chmod, stat: dentry is revalidated.
16264+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
16265+ * unhashed.
16266+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
1308ab2a 16267+ */
027c5e7a 16268+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
4a4d8108 16269+static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
1facf9fc 16270+{
4a4d8108
AM
16271+ int err;
16272+ struct inode *inode;
16273+ struct dentry *parent;
1facf9fc 16274+
1308ab2a 16275+ err = 0;
4a4d8108 16276+ inode = dentry->d_inode;
027c5e7a 16277+ if (au_digen_test(dentry, sigen)) {
4a4d8108
AM
16278+ parent = dget_parent(dentry);
16279+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 16280+ err = au_refresh_dentry(dentry, parent);
4a4d8108
AM
16281+ di_read_unlock(parent, AuLock_IR);
16282+ dput(parent);
dece6358 16283+ }
1facf9fc 16284+
4a4d8108 16285+ AuTraceErr(err);
1308ab2a 16286+ return err;
16287+}
dece6358 16288+
4a4d8108
AM
16289+#define AuIcpup_DID_CPUP 1
16290+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
7f207e10
AM
16291+#define au_fset_icpup(flags, name) \
16292+ do { (flags) |= AuIcpup_##name; } while (0)
16293+#define au_fclr_icpup(flags, name) \
16294+ do { (flags) &= ~AuIcpup_##name; } while (0)
1308ab2a 16295+
4a4d8108
AM
16296+struct au_icpup_args {
16297+ unsigned char flags;
16298+ unsigned char pin_flags;
16299+ aufs_bindex_t btgt;
16300+ unsigned int udba;
16301+ struct au_pin pin;
16302+ struct path h_path;
16303+ struct inode *h_inode;
16304+};
1308ab2a 16305+
4a4d8108
AM
16306+static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
16307+ struct au_icpup_args *a)
1308ab2a 16308+{
16309+ int err;
4a4d8108 16310+ loff_t sz;
e49829fe 16311+ aufs_bindex_t bstart, ibstart;
4a4d8108
AM
16312+ struct dentry *hi_wh, *parent;
16313+ struct inode *inode;
16314+ struct file *h_file;
16315+ struct au_wr_dir_args wr_dir_args = {
16316+ .force_btgt = -1,
16317+ .flags = 0
16318+ };
16319+
16320+ bstart = au_dbstart(dentry);
16321+ inode = dentry->d_inode;
16322+ if (S_ISDIR(inode->i_mode))
16323+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
16324+ /* plink or hi_wh() case */
e49829fe 16325+ ibstart = au_ibstart(inode);
027c5e7a 16326+ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
e49829fe 16327+ wr_dir_args.force_btgt = ibstart;
4a4d8108
AM
16328+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
16329+ if (unlikely(err < 0))
16330+ goto out;
16331+ a->btgt = err;
16332+ if (err != bstart)
16333+ au_fset_icpup(a->flags, DID_CPUP);
16334+
16335+ err = 0;
16336+ a->pin_flags = AuPin_MNT_WRITE;
16337+ parent = NULL;
16338+ if (!IS_ROOT(dentry)) {
16339+ au_fset_pin(a->pin_flags, DI_LOCKED);
16340+ parent = dget_parent(dentry);
16341+ di_write_lock_parent(parent);
16342+ }
16343+
16344+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
16345+ if (unlikely(err))
16346+ goto out_parent;
16347+
16348+ a->h_path.dentry = au_h_dptr(dentry, bstart);
16349+ a->h_inode = a->h_path.dentry->d_inode;
16350+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
16351+ sz = -1;
16352+ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
16353+ sz = ia->ia_size;
16354+
16355+ h_file = NULL;
16356+ hi_wh = NULL;
027c5e7a 16357+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
4a4d8108
AM
16358+ hi_wh = au_hi_wh(inode, a->btgt);
16359+ if (!hi_wh) {
16360+ err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
16361+ if (unlikely(err))
16362+ goto out_unlock;
16363+ hi_wh = au_hi_wh(inode, a->btgt);
16364+ /* todo: revalidate hi_wh? */
16365+ }
16366+ }
16367+
16368+ if (parent) {
16369+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
16370+ di_downgrade_lock(parent, AuLock_IR);
16371+ dput(parent);
16372+ parent = NULL;
16373+ }
16374+ if (!au_ftest_icpup(a->flags, DID_CPUP))
16375+ goto out; /* success */
16376+
16377+ if (!d_unhashed(dentry)) {
16378+ h_file = au_h_open_pre(dentry, bstart);
16379+ if (IS_ERR(h_file)) {
16380+ err = PTR_ERR(h_file);
16381+ h_file = NULL;
16382+ } else
16383+ err = au_sio_cpup_simple(dentry, a->btgt, sz,
16384+ AuCpup_DTIME);
16385+ if (!err)
16386+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
16387+ } else if (!hi_wh)
16388+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
16389+ else
16390+ a->h_path.dentry = hi_wh; /* do not dget here */
1308ab2a 16391+
4f0767ce 16392+out_unlock:
4a4d8108
AM
16393+ mutex_unlock(&a->h_inode->i_mutex);
16394+ au_h_open_post(dentry, bstart, h_file);
16395+ a->h_inode = a->h_path.dentry->d_inode;
dece6358 16396+ if (!err) {
4a4d8108 16397+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
dece6358 16398+ goto out; /* success */
1facf9fc 16399+ }
dece6358 16400+
4a4d8108 16401+ au_unpin(&a->pin);
4f0767ce 16402+out_parent:
4a4d8108
AM
16403+ if (parent) {
16404+ di_write_unlock(parent);
16405+ dput(parent);
16406+ }
4f0767ce 16407+out:
1facf9fc 16408+ return err;
16409+}
16410+
4a4d8108 16411+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
1facf9fc 16412+{
4a4d8108
AM
16413+ int err;
16414+ struct inode *inode;
16415+ struct super_block *sb;
16416+ struct file *file;
16417+ struct au_icpup_args *a;
1facf9fc 16418+
4a4d8108
AM
16419+ inode = dentry->d_inode;
16420+ IMustLock(inode);
dece6358 16421+
4a4d8108
AM
16422+ err = -ENOMEM;
16423+ a = kzalloc(sizeof(*a), GFP_NOFS);
16424+ if (unlikely(!a))
16425+ goto out;
1facf9fc 16426+
4a4d8108
AM
16427+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
16428+ ia->ia_valid &= ~ATTR_MODE;
dece6358 16429+
4a4d8108
AM
16430+ file = NULL;
16431+ sb = dentry->d_sb;
e49829fe
JR
16432+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
16433+ if (unlikely(err))
16434+ goto out_kfree;
16435+
4a4d8108
AM
16436+ if (ia->ia_valid & ATTR_FILE) {
16437+ /* currently ftruncate(2) only */
16438+ AuDebugOn(!S_ISREG(inode->i_mode));
16439+ file = ia->ia_file;
16440+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
16441+ if (unlikely(err))
16442+ goto out_si;
16443+ ia->ia_file = au_hf_top(file);
16444+ a->udba = AuOpt_UDBA_NONE;
16445+ } else {
16446+ /* fchmod() doesn't pass ia_file */
16447+ a->udba = au_opt_udba(sb);
027c5e7a
AM
16448+ di_write_lock_child(dentry);
16449+ /* no d_unlinked(), to set UDBA_NONE for root */
4a4d8108
AM
16450+ if (d_unhashed(dentry))
16451+ a->udba = AuOpt_UDBA_NONE;
4a4d8108
AM
16452+ if (a->udba != AuOpt_UDBA_NONE) {
16453+ AuDebugOn(IS_ROOT(dentry));
16454+ err = au_reval_for_attr(dentry, au_sigen(sb));
16455+ if (unlikely(err))
16456+ goto out_dentry;
16457+ }
dece6358 16458+ }
dece6358 16459+
4a4d8108
AM
16460+ err = au_pin_and_icpup(dentry, ia, a);
16461+ if (unlikely(err < 0))
16462+ goto out_dentry;
16463+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
16464+ ia->ia_file = NULL;
16465+ ia->ia_valid &= ~ATTR_FILE;
1308ab2a 16466+ }
dece6358 16467+
4a4d8108
AM
16468+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
16469+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
16470+ == (ATTR_MODE | ATTR_CTIME)) {
16471+ err = security_path_chmod(a->h_path.dentry, a->h_path.mnt,
16472+ ia->ia_mode);
16473+ if (unlikely(err))
16474+ goto out_unlock;
16475+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
16476+ && (ia->ia_valid & ATTR_CTIME)) {
16477+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
16478+ if (unlikely(err))
16479+ goto out_unlock;
16480+ }
dece6358 16481+
4a4d8108
AM
16482+ if (ia->ia_valid & ATTR_SIZE) {
16483+ struct file *f;
1308ab2a 16484+
953406b4 16485+ if (ia->ia_size < i_size_read(inode))
4a4d8108 16486+ /* unmap only */
953406b4 16487+ truncate_setsize(inode, ia->ia_size);
1308ab2a 16488+
4a4d8108
AM
16489+ f = NULL;
16490+ if (ia->ia_valid & ATTR_FILE)
16491+ f = ia->ia_file;
16492+ mutex_unlock(&a->h_inode->i_mutex);
16493+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
16494+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
16495+ } else
16496+ err = vfsub_notify_change(&a->h_path, ia);
16497+ if (!err)
16498+ au_cpup_attr_changeable(inode);
1308ab2a 16499+
4f0767ce 16500+out_unlock:
4a4d8108
AM
16501+ mutex_unlock(&a->h_inode->i_mutex);
16502+ au_unpin(&a->pin);
027c5e7a
AM
16503+ if (unlikely(err))
16504+ au_update_dbstart(dentry);
4f0767ce 16505+out_dentry:
4a4d8108
AM
16506+ di_write_unlock(dentry);
16507+ if (file) {
16508+ fi_write_unlock(file);
16509+ ia->ia_file = file;
16510+ ia->ia_valid |= ATTR_FILE;
16511+ }
4f0767ce 16512+out_si:
4a4d8108 16513+ si_read_unlock(sb);
e49829fe 16514+out_kfree:
4a4d8108 16515+ kfree(a);
4f0767ce 16516+out:
4a4d8108
AM
16517+ AuTraceErr(err);
16518+ return err;
1facf9fc 16519+}
16520+
4a4d8108
AM
16521+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
16522+ unsigned int nlink)
1facf9fc 16523+{
4a4d8108
AM
16524+ inode->i_mode = st->mode;
16525+ inode->i_uid = st->uid;
16526+ inode->i_gid = st->gid;
16527+ inode->i_atime = st->atime;
16528+ inode->i_mtime = st->mtime;
16529+ inode->i_ctime = st->ctime;
1facf9fc 16530+
4a4d8108
AM
16531+ au_cpup_attr_nlink(inode, /*force*/0);
16532+ if (S_ISDIR(inode->i_mode)) {
16533+ inode->i_nlink -= nlink;
16534+ inode->i_nlink += st->nlink;
16535+ }
1facf9fc 16536+
4a4d8108
AM
16537+ spin_lock(&inode->i_lock);
16538+ inode->i_blocks = st->blocks;
16539+ i_size_write(inode, st->size);
16540+ spin_unlock(&inode->i_lock);
1facf9fc 16541+}
16542+
4a4d8108
AM
16543+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
16544+ struct dentry *dentry, struct kstat *st)
1facf9fc 16545+{
4a4d8108
AM
16546+ int err;
16547+ unsigned int mnt_flags;
16548+ aufs_bindex_t bindex;
16549+ unsigned char udba_none, positive;
16550+ struct super_block *sb, *h_sb;
16551+ struct inode *inode;
16552+ struct vfsmount *h_mnt;
16553+ struct dentry *h_dentry;
1facf9fc 16554+
4a4d8108
AM
16555+ sb = dentry->d_sb;
16556+ inode = dentry->d_inode;
7f207e10
AM
16557+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
16558+ if (unlikely(err))
16559+ goto out;
4a4d8108
AM
16560+ mnt_flags = au_mntflags(sb);
16561+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
1facf9fc 16562+
4a4d8108 16563+ /* support fstat(2) */
027c5e7a 16564+ if (!d_unlinked(dentry) && !udba_none) {
4a4d8108 16565+ unsigned int sigen = au_sigen(sb);
027c5e7a
AM
16566+ err = au_digen_test(dentry, sigen);
16567+ if (!err) {
4a4d8108 16568+ di_read_lock_child(dentry, AuLock_IR);
027c5e7a
AM
16569+ err = au_dbrange_test(dentry);
16570+ if (unlikely(err))
16571+ goto out_unlock;
16572+ } else {
4a4d8108
AM
16573+ AuDebugOn(IS_ROOT(dentry));
16574+ di_write_lock_child(dentry);
027c5e7a
AM
16575+ err = au_dbrange_test(dentry);
16576+ if (!err)
16577+ err = au_reval_for_attr(dentry, sigen);
4a4d8108
AM
16578+ di_downgrade_lock(dentry, AuLock_IR);
16579+ if (unlikely(err))
7f207e10 16580+ goto out_unlock;
4a4d8108
AM
16581+ }
16582+ } else
16583+ di_read_lock_child(dentry, AuLock_IR);
1facf9fc 16584+
4a4d8108
AM
16585+ bindex = au_ibstart(inode);
16586+ h_mnt = au_sbr_mnt(sb, bindex);
16587+ h_sb = h_mnt->mnt_sb;
16588+ if (!au_test_fs_bad_iattr(h_sb) && udba_none)
16589+ goto out_fill; /* success */
1facf9fc 16590+
4a4d8108
AM
16591+ h_dentry = NULL;
16592+ if (au_dbstart(dentry) == bindex)
16593+ h_dentry = dget(au_h_dptr(dentry, bindex));
16594+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
16595+ h_dentry = au_plink_lkup(inode, bindex);
16596+ if (IS_ERR(h_dentry))
16597+ goto out_fill; /* pretending success */
16598+ }
16599+ /* illegally overlapped or something */
16600+ if (unlikely(!h_dentry))
16601+ goto out_fill; /* pretending success */
16602+
16603+ positive = !!h_dentry->d_inode;
16604+ if (positive)
16605+ err = vfs_getattr(h_mnt, h_dentry, st);
16606+ dput(h_dentry);
16607+ if (!err) {
16608+ if (positive)
16609+ au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
16610+ goto out_fill; /* success */
1facf9fc 16611+ }
7f207e10
AM
16612+ AuTraceErr(err);
16613+ goto out_unlock;
4a4d8108 16614+
4f0767ce 16615+out_fill:
4a4d8108 16616+ generic_fillattr(inode, st);
7f207e10 16617+out_unlock:
4a4d8108
AM
16618+ di_read_unlock(dentry, AuLock_IR);
16619+ si_read_unlock(sb);
7f207e10
AM
16620+out:
16621+ AuTraceErr(err);
4a4d8108 16622+ return err;
1facf9fc 16623+}
16624+
16625+/* ---------------------------------------------------------------------- */
16626+
4a4d8108
AM
16627+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
16628+ int bufsiz)
1facf9fc 16629+{
16630+ int err;
4a4d8108
AM
16631+ struct super_block *sb;
16632+ struct dentry *h_dentry;
1facf9fc 16633+
4a4d8108
AM
16634+ err = -EINVAL;
16635+ h_dentry = au_h_dptr(dentry, bindex);
16636+ if (unlikely(!h_dentry->d_inode->i_op->readlink))
16637+ goto out;
1facf9fc 16638+
4a4d8108
AM
16639+ err = security_inode_readlink(h_dentry);
16640+ if (unlikely(err))
dece6358 16641+ goto out;
1facf9fc 16642+
4a4d8108
AM
16643+ sb = dentry->d_sb;
16644+ if (!au_test_ro(sb, bindex, dentry->d_inode)) {
16645+ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
16646+ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
1facf9fc 16647+ }
4a4d8108 16648+ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
1facf9fc 16649+
4f0767ce 16650+out:
4a4d8108
AM
16651+ return err;
16652+}
1facf9fc 16653+
4a4d8108
AM
16654+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
16655+{
16656+ int err;
1facf9fc 16657+
027c5e7a
AM
16658+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
16659+ if (unlikely(err))
16660+ goto out;
16661+ err = au_d_hashed_positive(dentry);
16662+ if (!err)
16663+ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
4a4d8108 16664+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 16665+
027c5e7a 16666+out:
4a4d8108
AM
16667+ return err;
16668+}
1facf9fc 16669+
4a4d8108
AM
16670+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
16671+{
16672+ int err;
4a4d8108 16673+ mm_segment_t old_fs;
b752ccd1
AM
16674+ union {
16675+ char *k;
16676+ char __user *u;
16677+ } buf;
1facf9fc 16678+
4a4d8108 16679+ err = -ENOMEM;
b752ccd1
AM
16680+ buf.k = __getname_gfp(GFP_NOFS);
16681+ if (unlikely(!buf.k))
4a4d8108 16682+ goto out;
1facf9fc 16683+
027c5e7a
AM
16684+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
16685+ if (unlikely(err))
16686+ goto out_name;
16687+
16688+ err = au_d_hashed_positive(dentry);
16689+ if (!err) {
16690+ old_fs = get_fs();
16691+ set_fs(KERNEL_DS);
16692+ err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX);
16693+ set_fs(old_fs);
16694+ }
4a4d8108 16695+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 16696+
4a4d8108 16697+ if (err >= 0) {
b752ccd1 16698+ buf.k[err] = 0;
4a4d8108 16699+ /* will be freed by put_link */
b752ccd1 16700+ nd_set_link(nd, buf.k);
4a4d8108 16701+ return NULL; /* success */
1308ab2a 16702+ }
1facf9fc 16703+
027c5e7a
AM
16704+out_name:
16705+ __putname(buf.k);
4f0767ce 16706+out:
4a4d8108
AM
16707+ path_put(&nd->path);
16708+ AuTraceErr(err);
16709+ return ERR_PTR(err);
16710+}
1facf9fc 16711+
4a4d8108
AM
16712+static void aufs_put_link(struct dentry *dentry __maybe_unused,
16713+ struct nameidata *nd, void *cookie __maybe_unused)
16714+{
16715+ __putname(nd_get_link(nd));
16716+}
1facf9fc 16717+
4a4d8108 16718+/* ---------------------------------------------------------------------- */
1facf9fc 16719+
4a4d8108
AM
16720+static void aufs_truncate_range(struct inode *inode __maybe_unused,
16721+ loff_t start __maybe_unused,
16722+ loff_t end __maybe_unused)
16723+{
16724+ AuUnsupport();
16725+}
1facf9fc 16726+
4a4d8108 16727+/* ---------------------------------------------------------------------- */
1308ab2a 16728+
4a4d8108
AM
16729+struct inode_operations aufs_symlink_iop = {
16730+ .permission = aufs_permission,
16731+ .setattr = aufs_setattr,
16732+ .getattr = aufs_getattr,
16733+ .readlink = aufs_readlink,
16734+ .follow_link = aufs_follow_link,
16735+ .put_link = aufs_put_link
16736+};
16737+
16738+struct inode_operations aufs_dir_iop = {
16739+ .create = aufs_create,
16740+ .lookup = aufs_lookup,
16741+ .link = aufs_link,
16742+ .unlink = aufs_unlink,
16743+ .symlink = aufs_symlink,
16744+ .mkdir = aufs_mkdir,
16745+ .rmdir = aufs_rmdir,
16746+ .mknod = aufs_mknod,
16747+ .rename = aufs_rename,
16748+
16749+ .permission = aufs_permission,
16750+ .setattr = aufs_setattr,
16751+ .getattr = aufs_getattr
16752+};
16753+
16754+struct inode_operations aufs_iop = {
16755+ .permission = aufs_permission,
16756+ .setattr = aufs_setattr,
16757+ .getattr = aufs_getattr,
16758+ .truncate_range = aufs_truncate_range
16759+};
7f207e10
AM
16760diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
16761--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
16762+++ linux/fs/aufs/i_op_del.c 2011-08-24 13:30:24.731313534 +0200
16763@@ -0,0 +1,478 @@
1facf9fc 16764+/*
027c5e7a 16765+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 16766+ *
16767+ * This program, aufs is free software; you can redistribute it and/or modify
16768+ * it under the terms of the GNU General Public License as published by
16769+ * the Free Software Foundation; either version 2 of the License, or
16770+ * (at your option) any later version.
dece6358
AM
16771+ *
16772+ * This program is distributed in the hope that it will be useful,
16773+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16774+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16775+ * GNU General Public License for more details.
16776+ *
16777+ * You should have received a copy of the GNU General Public License
16778+ * along with this program; if not, write to the Free Software
16779+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 16780+ */
16781+
16782+/*
4a4d8108 16783+ * inode operations (del entry)
1308ab2a 16784+ */
dece6358 16785+
1308ab2a 16786+#include "aufs.h"
dece6358 16787+
4a4d8108
AM
16788+/*
16789+ * decide if a new whiteout for @dentry is necessary or not.
16790+ * when it is necessary, prepare the parent dir for the upper branch whose
16791+ * branch index is @bcpup for creation. the actual creation of the whiteout will
16792+ * be done by caller.
16793+ * return value:
16794+ * 0: wh is unnecessary
16795+ * plus: wh is necessary
16796+ * minus: error
16797+ */
16798+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1308ab2a 16799+{
4a4d8108
AM
16800+ int need_wh, err;
16801+ aufs_bindex_t bstart;
16802+ struct super_block *sb;
dece6358 16803+
4a4d8108
AM
16804+ sb = dentry->d_sb;
16805+ bstart = au_dbstart(dentry);
16806+ if (*bcpup < 0) {
16807+ *bcpup = bstart;
16808+ if (au_test_ro(sb, bstart, dentry->d_inode)) {
16809+ err = AuWbrCopyup(au_sbi(sb), dentry);
16810+ *bcpup = err;
16811+ if (unlikely(err < 0))
16812+ goto out;
16813+ }
16814+ } else
16815+ AuDebugOn(bstart < *bcpup
16816+ || au_test_ro(sb, *bcpup, dentry->d_inode));
16817+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
1308ab2a 16818+
4a4d8108
AM
16819+ if (*bcpup != bstart) {
16820+ err = au_cpup_dirs(dentry, *bcpup);
16821+ if (unlikely(err))
16822+ goto out;
16823+ need_wh = 1;
16824+ } else {
027c5e7a 16825+ struct au_dinfo *dinfo, *tmp;
4a4d8108 16826+
027c5e7a
AM
16827+ need_wh = -ENOMEM;
16828+ dinfo = au_di(dentry);
16829+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
16830+ if (tmp) {
16831+ au_di_cp(tmp, dinfo);
16832+ au_di_swap(tmp, dinfo);
16833+ /* returns the number of positive dentries */
16834+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
16835+ /*nd*/NULL);
16836+ au_di_swap(tmp, dinfo);
16837+ au_rw_write_unlock(&tmp->di_rwsem);
16838+ au_di_free(tmp);
4a4d8108
AM
16839+ }
16840+ }
16841+ AuDbg("need_wh %d\n", need_wh);
16842+ err = need_wh;
16843+
4f0767ce 16844+out:
4a4d8108 16845+ return err;
1facf9fc 16846+}
16847+
4a4d8108
AM
16848+/*
16849+ * simple tests for the del-entry operations.
16850+ * following the checks in vfs, plus the parent-child relationship.
16851+ */
16852+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
16853+ struct dentry *h_parent, int isdir)
1facf9fc 16854+{
4a4d8108
AM
16855+ int err;
16856+ umode_t h_mode;
16857+ struct dentry *h_dentry, *h_latest;
1308ab2a 16858+ struct inode *h_inode;
1facf9fc 16859+
4a4d8108
AM
16860+ h_dentry = au_h_dptr(dentry, bindex);
16861+ h_inode = h_dentry->d_inode;
16862+ if (dentry->d_inode) {
16863+ err = -ENOENT;
16864+ if (unlikely(!h_inode || !h_inode->i_nlink))
16865+ goto out;
1facf9fc 16866+
4a4d8108
AM
16867+ h_mode = h_inode->i_mode;
16868+ if (!isdir) {
16869+ err = -EISDIR;
16870+ if (unlikely(S_ISDIR(h_mode)))
16871+ goto out;
16872+ } else if (unlikely(!S_ISDIR(h_mode))) {
16873+ err = -ENOTDIR;
16874+ goto out;
16875+ }
16876+ } else {
16877+ /* rename(2) case */
16878+ err = -EIO;
16879+ if (unlikely(h_inode))
16880+ goto out;
16881+ }
1facf9fc 16882+
4a4d8108
AM
16883+ err = -ENOENT;
16884+ /* expected parent dir is locked */
16885+ if (unlikely(h_parent != h_dentry->d_parent))
16886+ goto out;
16887+ err = 0;
16888+
16889+ /*
16890+ * rmdir a dir may break the consistency on some filesystem.
16891+ * let's try heavy test.
16892+ */
16893+ err = -EACCES;
16894+ if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
16895+ goto out;
16896+
16897+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
16898+ au_sbr(dentry->d_sb, bindex));
16899+ err = -EIO;
16900+ if (IS_ERR(h_latest))
16901+ goto out;
16902+ if (h_latest == h_dentry)
16903+ err = 0;
16904+ dput(h_latest);
16905+
4f0767ce 16906+out:
4a4d8108 16907+ return err;
1308ab2a 16908+}
1facf9fc 16909+
4a4d8108
AM
16910+/*
16911+ * decide the branch where we operate for @dentry. the branch index will be set
16912+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
16913+ * dir for reverting.
16914+ * when a new whiteout is necessary, create it.
16915+ */
16916+static struct dentry*
16917+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
16918+ struct au_dtime *dt, struct au_pin *pin)
1308ab2a 16919+{
4a4d8108
AM
16920+ struct dentry *wh_dentry;
16921+ struct super_block *sb;
16922+ struct path h_path;
16923+ int err, need_wh;
16924+ unsigned int udba;
16925+ aufs_bindex_t bcpup;
dece6358 16926+
4a4d8108
AM
16927+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
16928+ wh_dentry = ERR_PTR(need_wh);
16929+ if (unlikely(need_wh < 0))
16930+ goto out;
16931+
16932+ sb = dentry->d_sb;
16933+ udba = au_opt_udba(sb);
16934+ bcpup = *rbcpup;
16935+ err = au_pin(pin, dentry, bcpup, udba,
16936+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
16937+ wh_dentry = ERR_PTR(err);
16938+ if (unlikely(err))
16939+ goto out;
16940+
16941+ h_path.dentry = au_pinned_h_parent(pin);
16942+ if (udba != AuOpt_UDBA_NONE
16943+ && au_dbstart(dentry) == bcpup) {
16944+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
16945+ wh_dentry = ERR_PTR(err);
16946+ if (unlikely(err))
16947+ goto out_unpin;
16948+ }
16949+
16950+ h_path.mnt = au_sbr_mnt(sb, bcpup);
16951+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
16952+ wh_dentry = NULL;
16953+ if (!need_wh)
16954+ goto out; /* success, no need to create whiteout */
16955+
16956+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
16957+ if (IS_ERR(wh_dentry))
16958+ goto out_unpin;
16959+
16960+ /* returns with the parent is locked and wh_dentry is dget-ed */
16961+ goto out; /* success */
16962+
4f0767ce 16963+out_unpin:
4a4d8108 16964+ au_unpin(pin);
4f0767ce 16965+out:
4a4d8108 16966+ return wh_dentry;
1facf9fc 16967+}
16968+
4a4d8108
AM
16969+/*
16970+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
16971+ * in order to be revertible and save time for removing many child whiteouts
16972+ * under the dir.
16973+ * returns 1 when there are too many child whiteout and caller should remove
16974+ * them asynchronously. returns 0 when the number of children is enough small to
16975+ * remove now or the branch fs is a remote fs.
16976+ * otherwise return an error.
16977+ */
16978+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
16979+ struct au_nhash *whlist, struct inode *dir)
1facf9fc 16980+{
4a4d8108
AM
16981+ int rmdir_later, err, dirwh;
16982+ struct dentry *h_dentry;
16983+ struct super_block *sb;
16984+
16985+ sb = dentry->d_sb;
16986+ SiMustAnyLock(sb);
16987+ h_dentry = au_h_dptr(dentry, bindex);
16988+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
16989+ if (unlikely(err))
16990+ goto out;
16991+
16992+ /* stop monitoring */
16993+ au_hn_free(au_hi(dentry->d_inode, bindex));
16994+
16995+ if (!au_test_fs_remote(h_dentry->d_sb)) {
16996+ dirwh = au_sbi(sb)->si_dirwh;
16997+ rmdir_later = (dirwh <= 1);
16998+ if (!rmdir_later)
16999+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
17000+ dirwh);
17001+ if (rmdir_later)
17002+ return rmdir_later;
17003+ }
1facf9fc 17004+
4a4d8108
AM
17005+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
17006+ if (unlikely(err)) {
17007+ AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
17008+ AuDLNPair(h_dentry), bindex, err);
17009+ err = 0;
17010+ }
dece6358 17011+
4f0767ce 17012+out:
4a4d8108
AM
17013+ AuTraceErr(err);
17014+ return err;
17015+}
1308ab2a 17016+
4a4d8108
AM
17017+/*
17018+ * final procedure for deleting a entry.
17019+ * maintain dentry and iattr.
17020+ */
17021+static void epilog(struct inode *dir, struct dentry *dentry,
17022+ aufs_bindex_t bindex)
17023+{
17024+ struct inode *inode;
1308ab2a 17025+
4a4d8108
AM
17026+ inode = dentry->d_inode;
17027+ d_drop(dentry);
17028+ inode->i_ctime = dir->i_ctime;
1308ab2a 17029+
4a4d8108
AM
17030+ if (au_ibstart(dir) == bindex)
17031+ au_cpup_attr_timesizes(dir);
17032+ dir->i_version++;
1facf9fc 17033+}
17034+
4a4d8108
AM
17035+/*
17036+ * when an error happened, remove the created whiteout and revert everything.
17037+ */
7f207e10
AM
17038+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
17039+ aufs_bindex_t bwh, struct dentry *wh_dentry,
17040+ struct dentry *dentry, struct au_dtime *dt)
1facf9fc 17041+{
4a4d8108
AM
17042+ int rerr;
17043+ struct path h_path = {
17044+ .dentry = wh_dentry,
7f207e10 17045+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
4a4d8108 17046+ };
dece6358 17047+
7f207e10 17048+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
4a4d8108
AM
17049+ if (!rerr) {
17050+ au_set_dbwh(dentry, bwh);
17051+ au_dtime_revert(dt);
17052+ return 0;
17053+ }
dece6358 17054+
4a4d8108
AM
17055+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
17056+ AuDLNPair(dentry), err, rerr);
17057+ return -EIO;
1facf9fc 17058+}
17059+
4a4d8108 17060+/* ---------------------------------------------------------------------- */
1facf9fc 17061+
4a4d8108 17062+int aufs_unlink(struct inode *dir, struct dentry *dentry)
1308ab2a 17063+{
4a4d8108
AM
17064+ int err;
17065+ aufs_bindex_t bwh, bindex, bstart;
17066+ struct au_dtime dt;
17067+ struct au_pin pin;
17068+ struct path h_path;
17069+ struct inode *inode, *h_dir;
17070+ struct dentry *parent, *wh_dentry;
1facf9fc 17071+
4a4d8108 17072+ IMustLock(dir);
027c5e7a
AM
17073+
17074+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
17075+ if (unlikely(err))
17076+ goto out;
17077+ err = au_d_hashed_positive(dentry);
17078+ if (unlikely(err))
17079+ goto out_unlock;
4a4d8108 17080+ inode = dentry->d_inode;
4a4d8108 17081+ IMustLock(inode);
027c5e7a
AM
17082+ err = -EISDIR;
17083+ if (unlikely(S_ISDIR(inode->i_mode)))
17084+ goto out_unlock; /* possible? */
1facf9fc 17085+
4a4d8108
AM
17086+ bstart = au_dbstart(dentry);
17087+ bwh = au_dbwh(dentry);
17088+ bindex = -1;
027c5e7a
AM
17089+ parent = dentry->d_parent; /* dir inode is locked */
17090+ di_write_lock_parent(parent);
4a4d8108
AM
17091+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
17092+ err = PTR_ERR(wh_dentry);
17093+ if (IS_ERR(wh_dentry))
027c5e7a 17094+ goto out_parent;
1facf9fc 17095+
4a4d8108
AM
17096+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
17097+ h_path.dentry = au_h_dptr(dentry, bstart);
17098+ dget(h_path.dentry);
17099+ if (bindex == bstart) {
17100+ h_dir = au_pinned_h_dir(&pin);
17101+ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
17102+ } else {
17103+ /* dir inode is locked */
17104+ h_dir = wh_dentry->d_parent->d_inode;
17105+ IMustLock(h_dir);
17106+ err = 0;
17107+ }
dece6358 17108+
4a4d8108 17109+ if (!err) {
7f207e10 17110+ vfsub_drop_nlink(inode);
4a4d8108
AM
17111+ epilog(dir, dentry, bindex);
17112+
17113+ /* update target timestamps */
17114+ if (bindex == bstart) {
17115+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
17116+ inode->i_ctime = h_path.dentry->d_inode->i_ctime;
17117+ } else
17118+ /* todo: this timestamp may be reverted later */
17119+ inode->i_ctime = h_dir->i_ctime;
027c5e7a 17120+ goto out_unpin; /* success */
1facf9fc 17121+ }
17122+
4a4d8108
AM
17123+ /* revert */
17124+ if (wh_dentry) {
17125+ int rerr;
17126+
7f207e10 17127+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
4a4d8108
AM
17128+ if (rerr)
17129+ err = rerr;
dece6358 17130+ }
1facf9fc 17131+
027c5e7a 17132+out_unpin:
4a4d8108
AM
17133+ au_unpin(&pin);
17134+ dput(wh_dentry);
17135+ dput(h_path.dentry);
027c5e7a 17136+out_parent:
4a4d8108 17137+ di_write_unlock(parent);
027c5e7a 17138+out_unlock:
4a4d8108 17139+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 17140+out:
4a4d8108 17141+ return err;
dece6358
AM
17142+}
17143+
4a4d8108 17144+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
1308ab2a 17145+{
4a4d8108
AM
17146+ int err, rmdir_later;
17147+ aufs_bindex_t bwh, bindex, bstart;
17148+ struct au_dtime dt;
17149+ struct au_pin pin;
17150+ struct inode *inode;
17151+ struct dentry *parent, *wh_dentry, *h_dentry;
17152+ struct au_whtmp_rmdir *args;
1facf9fc 17153+
4a4d8108 17154+ IMustLock(dir);
027c5e7a
AM
17155+
17156+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
17157+ if (unlikely(err))
4a4d8108 17158+ goto out;
53392da6
AM
17159+ err = au_alive_dir(dentry);
17160+ if (unlikely(err))
027c5e7a 17161+ goto out_unlock;
53392da6 17162+ inode = dentry->d_inode;
4a4d8108 17163+ IMustLock(inode);
027c5e7a
AM
17164+ err = -ENOTDIR;
17165+ if (unlikely(!S_ISDIR(inode->i_mode)))
17166+ goto out_unlock; /* possible? */
dece6358 17167+
4a4d8108
AM
17168+ err = -ENOMEM;
17169+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
17170+ if (unlikely(!args))
17171+ goto out_unlock;
dece6358 17172+
4a4d8108
AM
17173+ parent = dentry->d_parent; /* dir inode is locked */
17174+ di_write_lock_parent(parent);
17175+ err = au_test_empty(dentry, &args->whlist);
17176+ if (unlikely(err))
027c5e7a 17177+ goto out_parent;
1facf9fc 17178+
4a4d8108
AM
17179+ bstart = au_dbstart(dentry);
17180+ bwh = au_dbwh(dentry);
17181+ bindex = -1;
17182+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
17183+ err = PTR_ERR(wh_dentry);
17184+ if (IS_ERR(wh_dentry))
027c5e7a 17185+ goto out_parent;
1facf9fc 17186+
4a4d8108
AM
17187+ h_dentry = au_h_dptr(dentry, bstart);
17188+ dget(h_dentry);
17189+ rmdir_later = 0;
17190+ if (bindex == bstart) {
17191+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
17192+ if (err > 0) {
17193+ rmdir_later = err;
17194+ err = 0;
17195+ }
17196+ } else {
17197+ /* stop monitoring */
17198+ au_hn_free(au_hi(inode, bstart));
17199+
17200+ /* dir inode is locked */
17201+ IMustLock(wh_dentry->d_parent->d_inode);
1facf9fc 17202+ err = 0;
17203+ }
17204+
4a4d8108 17205+ if (!err) {
027c5e7a 17206+ vfsub_dead_dir(inode);
4a4d8108
AM
17207+ au_set_dbdiropq(dentry, -1);
17208+ epilog(dir, dentry, bindex);
1308ab2a 17209+
4a4d8108
AM
17210+ if (rmdir_later) {
17211+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
17212+ args = NULL;
17213+ }
1308ab2a 17214+
4a4d8108 17215+ goto out_unpin; /* success */
1facf9fc 17216+ }
17217+
4a4d8108
AM
17218+ /* revert */
17219+ AuLabel(revert);
17220+ if (wh_dentry) {
17221+ int rerr;
1308ab2a 17222+
7f207e10 17223+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
4a4d8108
AM
17224+ if (rerr)
17225+ err = rerr;
1facf9fc 17226+ }
17227+
4f0767ce 17228+out_unpin:
4a4d8108
AM
17229+ au_unpin(&pin);
17230+ dput(wh_dentry);
17231+ dput(h_dentry);
027c5e7a 17232+out_parent:
4a4d8108
AM
17233+ di_write_unlock(parent);
17234+ if (args)
17235+ au_whtmp_rmdir_free(args);
4f0767ce 17236+out_unlock:
4a4d8108 17237+ aufs_read_unlock(dentry, AuLock_DW);
4f0767ce 17238+out:
4a4d8108
AM
17239+ AuTraceErr(err);
17240+ return err;
dece6358 17241+}
7f207e10
AM
17242diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
17243--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
53392da6 17244+++ linux/fs/aufs/i_op_ren.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 17245@@ -0,0 +1,1017 @@
1facf9fc 17246+/*
027c5e7a 17247+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 17248+ *
17249+ * This program, aufs is free software; you can redistribute it and/or modify
17250+ * it under the terms of the GNU General Public License as published by
17251+ * the Free Software Foundation; either version 2 of the License, or
17252+ * (at your option) any later version.
dece6358
AM
17253+ *
17254+ * This program is distributed in the hope that it will be useful,
17255+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17256+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17257+ * GNU General Public License for more details.
17258+ *
17259+ * You should have received a copy of the GNU General Public License
17260+ * along with this program; if not, write to the Free Software
17261+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 17262+ */
17263+
17264+/*
4a4d8108
AM
17265+ * inode operation (rename entry)
17266+ * todo: this is crazy monster
1facf9fc 17267+ */
17268+
17269+#include "aufs.h"
17270+
4a4d8108
AM
17271+enum { AuSRC, AuDST, AuSrcDst };
17272+enum { AuPARENT, AuCHILD, AuParentChild };
1facf9fc 17273+
4a4d8108
AM
17274+#define AuRen_ISDIR 1
17275+#define AuRen_ISSAMEDIR (1 << 1)
17276+#define AuRen_WHSRC (1 << 2)
17277+#define AuRen_WHDST (1 << 3)
17278+#define AuRen_MNT_WRITE (1 << 4)
17279+#define AuRen_DT_DSTDIR (1 << 5)
17280+#define AuRen_DIROPQ (1 << 6)
17281+#define AuRen_CPUP (1 << 7)
17282+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
7f207e10
AM
17283+#define au_fset_ren(flags, name) \
17284+ do { (flags) |= AuRen_##name; } while (0)
17285+#define au_fclr_ren(flags, name) \
17286+ do { (flags) &= ~AuRen_##name; } while (0)
1facf9fc 17287+
4a4d8108
AM
17288+struct au_ren_args {
17289+ struct {
17290+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
17291+ *wh_dentry;
17292+ struct inode *dir, *inode;
17293+ struct au_hinode *hdir;
17294+ struct au_dtime dt[AuParentChild];
17295+ aufs_bindex_t bstart;
17296+ } sd[AuSrcDst];
1facf9fc 17297+
4a4d8108
AM
17298+#define src_dentry sd[AuSRC].dentry
17299+#define src_dir sd[AuSRC].dir
17300+#define src_inode sd[AuSRC].inode
17301+#define src_h_dentry sd[AuSRC].h_dentry
17302+#define src_parent sd[AuSRC].parent
17303+#define src_h_parent sd[AuSRC].h_parent
17304+#define src_wh_dentry sd[AuSRC].wh_dentry
17305+#define src_hdir sd[AuSRC].hdir
17306+#define src_h_dir sd[AuSRC].hdir->hi_inode
17307+#define src_dt sd[AuSRC].dt
17308+#define src_bstart sd[AuSRC].bstart
1facf9fc 17309+
4a4d8108
AM
17310+#define dst_dentry sd[AuDST].dentry
17311+#define dst_dir sd[AuDST].dir
17312+#define dst_inode sd[AuDST].inode
17313+#define dst_h_dentry sd[AuDST].h_dentry
17314+#define dst_parent sd[AuDST].parent
17315+#define dst_h_parent sd[AuDST].h_parent
17316+#define dst_wh_dentry sd[AuDST].wh_dentry
17317+#define dst_hdir sd[AuDST].hdir
17318+#define dst_h_dir sd[AuDST].hdir->hi_inode
17319+#define dst_dt sd[AuDST].dt
17320+#define dst_bstart sd[AuDST].bstart
17321+
17322+ struct dentry *h_trap;
17323+ struct au_branch *br;
17324+ struct au_hinode *src_hinode;
17325+ struct path h_path;
17326+ struct au_nhash whlist;
027c5e7a 17327+ aufs_bindex_t btgt, src_bwh, src_bdiropq;
1facf9fc 17328+
1308ab2a 17329+ unsigned int flags;
1facf9fc 17330+
4a4d8108
AM
17331+ struct au_whtmp_rmdir *thargs;
17332+ struct dentry *h_dst;
17333+};
1308ab2a 17334+
4a4d8108 17335+/* ---------------------------------------------------------------------- */
1308ab2a 17336+
4a4d8108
AM
17337+/*
17338+ * functions for reverting.
17339+ * when an error happened in a single rename systemcall, we should revert
17340+ * everything as if nothing happend.
17341+ * we don't need to revert the copied-up/down the parent dir since they are
17342+ * harmless.
17343+ */
1facf9fc 17344+
4a4d8108
AM
17345+#define RevertFailure(fmt, ...) do { \
17346+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
17347+ ##__VA_ARGS__, err, rerr); \
17348+ err = -EIO; \
17349+} while (0)
1facf9fc 17350+
4a4d8108 17351+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
1facf9fc 17352+{
4a4d8108 17353+ int rerr;
1facf9fc 17354+
4a4d8108
AM
17355+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
17356+ rerr = au_diropq_remove(a->src_dentry, a->btgt);
17357+ au_hn_imtx_unlock(a->src_hinode);
027c5e7a 17358+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
4a4d8108
AM
17359+ if (rerr)
17360+ RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
17361+}
1facf9fc 17362+
4a4d8108
AM
17363+static void au_ren_rev_rename(int err, struct au_ren_args *a)
17364+{
17365+ int rerr;
1facf9fc 17366+
4a4d8108
AM
17367+ a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
17368+ a->br, /*nd*/NULL);
17369+ rerr = PTR_ERR(a->h_path.dentry);
17370+ if (IS_ERR(a->h_path.dentry)) {
17371+ RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
17372+ return;
1facf9fc 17373+ }
17374+
4a4d8108
AM
17375+ rerr = vfsub_rename(a->dst_h_dir,
17376+ au_h_dptr(a->src_dentry, a->btgt),
17377+ a->src_h_dir, &a->h_path);
17378+ d_drop(a->h_path.dentry);
17379+ dput(a->h_path.dentry);
17380+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
17381+ if (rerr)
17382+ RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
1facf9fc 17383+}
17384+
4a4d8108 17385+static void au_ren_rev_cpup(int err, struct au_ren_args *a)
1facf9fc 17386+{
4a4d8108 17387+ int rerr;
1facf9fc 17388+
4a4d8108
AM
17389+ a->h_path.dentry = a->dst_h_dentry;
17390+ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
17391+ au_set_h_dptr(a->src_dentry, a->btgt, NULL);
17392+ au_set_dbstart(a->src_dentry, a->src_bstart);
17393+ if (rerr)
17394+ RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
1facf9fc 17395+}
17396+
4a4d8108 17397+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
1facf9fc 17398+{
4a4d8108 17399+ int rerr;
dece6358 17400+
4a4d8108
AM
17401+ a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
17402+ a->br, /*nd*/NULL);
17403+ rerr = PTR_ERR(a->h_path.dentry);
17404+ if (IS_ERR(a->h_path.dentry)) {
17405+ RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
17406+ return;
17407+ }
17408+ if (a->h_path.dentry->d_inode) {
17409+ d_drop(a->h_path.dentry);
17410+ dput(a->h_path.dentry);
17411+ return;
dece6358
AM
17412+ }
17413+
4a4d8108
AM
17414+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
17415+ d_drop(a->h_path.dentry);
17416+ dput(a->h_path.dentry);
17417+ if (!rerr)
17418+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
17419+ else
17420+ RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
17421+}
1308ab2a 17422+
4a4d8108
AM
17423+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
17424+{
17425+ int rerr;
1308ab2a 17426+
4a4d8108
AM
17427+ a->h_path.dentry = a->src_wh_dentry;
17428+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
027c5e7a 17429+ au_set_dbwh(a->src_dentry, a->src_bwh);
4a4d8108
AM
17430+ if (rerr)
17431+ RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
17432+}
4a4d8108 17433+#undef RevertFailure
1facf9fc 17434+
1308ab2a 17435+/* ---------------------------------------------------------------------- */
17436+
4a4d8108
AM
17437+/*
17438+ * when we have to copyup the renaming entry, do it with the rename-target name
17439+ * in order to minimize the cost (the later actual rename is unnecessary).
17440+ * otherwise rename it on the target branch.
17441+ */
17442+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 17443+{
dece6358 17444+ int err;
4a4d8108 17445+ struct dentry *d;
1facf9fc 17446+
4a4d8108
AM
17447+ d = a->src_dentry;
17448+ if (au_dbstart(d) == a->btgt) {
17449+ a->h_path.dentry = a->dst_h_dentry;
17450+ if (au_ftest_ren(a->flags, DIROPQ)
17451+ && au_dbdiropq(d) == a->btgt)
17452+ au_fclr_ren(a->flags, DIROPQ);
17453+ AuDebugOn(au_dbstart(d) != a->btgt);
17454+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
17455+ a->dst_h_dir, &a->h_path);
17456+ } else {
17457+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
17458+ struct file *h_file;
1308ab2a 17459+
4a4d8108
AM
17460+ au_fset_ren(a->flags, CPUP);
17461+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
17462+ au_set_dbstart(d, a->btgt);
17463+ au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
17464+ h_file = au_h_open_pre(d, a->src_bstart);
17465+ if (IS_ERR(h_file)) {
17466+ err = PTR_ERR(h_file);
17467+ h_file = NULL;
17468+ } else
17469+ err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
17470+ !AuCpup_DTIME, a->dst_parent);
17471+ mutex_unlock(h_mtx);
17472+ au_h_open_post(d, a->src_bstart, h_file);
17473+ if (!err) {
17474+ d = a->dst_dentry;
17475+ au_set_h_dptr(d, a->btgt, NULL);
17476+ au_update_dbstart(d);
17477+ } else {
17478+ au_set_h_dptr(d, a->btgt, NULL);
17479+ au_set_dbstart(d, a->src_bstart);
17480+ }
1308ab2a 17481+ }
027c5e7a
AM
17482+ if (!err && a->h_dst)
17483+ /* it will be set to dinfo later */
17484+ dget(a->h_dst);
1facf9fc 17485+
dece6358
AM
17486+ return err;
17487+}
1facf9fc 17488+
4a4d8108
AM
17489+/* cf. aufs_rmdir() */
17490+static int au_ren_del_whtmp(struct au_ren_args *a)
dece6358 17491+{
4a4d8108
AM
17492+ int err;
17493+ struct inode *dir;
1facf9fc 17494+
4a4d8108
AM
17495+ dir = a->dst_dir;
17496+ SiMustAnyLock(dir->i_sb);
17497+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
17498+ au_sbi(dir->i_sb)->si_dirwh)
17499+ || au_test_fs_remote(a->h_dst->d_sb)) {
17500+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
17501+ if (unlikely(err))
17502+ pr_warning("failed removing whtmp dir %.*s (%d), "
17503+ "ignored.\n", AuDLNPair(a->h_dst), err);
17504+ } else {
17505+ au_nhash_wh_free(&a->thargs->whlist);
17506+ a->thargs->whlist = a->whlist;
17507+ a->whlist.nh_num = 0;
17508+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
17509+ dput(a->h_dst);
17510+ a->thargs = NULL;
17511+ }
17512+
17513+ return 0;
1308ab2a 17514+}
1facf9fc 17515+
4a4d8108
AM
17516+/* make it 'opaque' dir. */
17517+static int au_ren_diropq(struct au_ren_args *a)
17518+{
17519+ int err;
17520+ struct dentry *diropq;
1facf9fc 17521+
4a4d8108 17522+ err = 0;
027c5e7a 17523+ a->src_bdiropq = au_dbdiropq(a->src_dentry);
4a4d8108
AM
17524+ a->src_hinode = au_hi(a->src_inode, a->btgt);
17525+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
17526+ diropq = au_diropq_create(a->src_dentry, a->btgt);
17527+ au_hn_imtx_unlock(a->src_hinode);
17528+ if (IS_ERR(diropq))
17529+ err = PTR_ERR(diropq);
17530+ dput(diropq);
1facf9fc 17531+
4a4d8108
AM
17532+ return err;
17533+}
1facf9fc 17534+
4a4d8108
AM
17535+static int do_rename(struct au_ren_args *a)
17536+{
17537+ int err;
17538+ struct dentry *d, *h_d;
1facf9fc 17539+
4a4d8108
AM
17540+ /* prepare workqueue args for asynchronous rmdir */
17541+ h_d = a->dst_h_dentry;
17542+ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
17543+ err = -ENOMEM;
17544+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
17545+ if (unlikely(!a->thargs))
17546+ goto out;
17547+ a->h_dst = dget(h_d);
17548+ }
1facf9fc 17549+
4a4d8108
AM
17550+ /* create whiteout for src_dentry */
17551+ if (au_ftest_ren(a->flags, WHSRC)) {
027c5e7a
AM
17552+ a->src_bwh = au_dbwh(a->src_dentry);
17553+ AuDebugOn(a->src_bwh >= 0);
4a4d8108
AM
17554+ a->src_wh_dentry
17555+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
17556+ err = PTR_ERR(a->src_wh_dentry);
17557+ if (IS_ERR(a->src_wh_dentry))
17558+ goto out_thargs;
17559+ }
1facf9fc 17560+
4a4d8108
AM
17561+ /* lookup whiteout for dentry */
17562+ if (au_ftest_ren(a->flags, WHDST)) {
17563+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
17564+ a->br);
17565+ err = PTR_ERR(h_d);
17566+ if (IS_ERR(h_d))
17567+ goto out_whsrc;
17568+ if (!h_d->d_inode)
17569+ dput(h_d);
17570+ else
17571+ a->dst_wh_dentry = h_d;
17572+ }
1facf9fc 17573+
4a4d8108
AM
17574+ /* rename dentry to tmpwh */
17575+ if (a->thargs) {
17576+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
17577+ if (unlikely(err))
17578+ goto out_whdst;
dece6358 17579+
4a4d8108
AM
17580+ d = a->dst_dentry;
17581+ au_set_h_dptr(d, a->btgt, NULL);
17582+ err = au_lkup_neg(d, a->btgt);
17583+ if (unlikely(err))
17584+ goto out_whtmp;
17585+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
17586+ }
1facf9fc 17587+
4a4d8108
AM
17588+ /* cpup src */
17589+ if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
17590+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
17591+ struct file *h_file;
1facf9fc 17592+
4a4d8108
AM
17593+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
17594+ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
17595+ h_file = au_h_open_pre(a->src_dentry, a->src_bstart);
17596+ if (IS_ERR(h_file)) {
17597+ err = PTR_ERR(h_file);
17598+ h_file = NULL;
17599+ } else
17600+ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
17601+ !AuCpup_DTIME);
17602+ mutex_unlock(h_mtx);
17603+ au_h_open_post(a->src_dentry, a->src_bstart, h_file);
17604+ if (unlikely(err))
17605+ goto out_whtmp;
17606+ }
1facf9fc 17607+
4a4d8108
AM
17608+ /* rename by vfs_rename or cpup */
17609+ d = a->dst_dentry;
17610+ if (au_ftest_ren(a->flags, ISDIR)
17611+ && (a->dst_wh_dentry
17612+ || au_dbdiropq(d) == a->btgt
17613+ /* hide the lower to keep xino */
17614+ || a->btgt < au_dbend(d)
17615+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
17616+ au_fset_ren(a->flags, DIROPQ);
17617+ err = au_ren_or_cpup(a);
17618+ if (unlikely(err))
17619+ /* leave the copied-up one */
17620+ goto out_whtmp;
1308ab2a 17621+
4a4d8108
AM
17622+ /* make dir opaque */
17623+ if (au_ftest_ren(a->flags, DIROPQ)) {
17624+ err = au_ren_diropq(a);
17625+ if (unlikely(err))
17626+ goto out_rename;
17627+ }
1308ab2a 17628+
4a4d8108
AM
17629+ /* update target timestamps */
17630+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
17631+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
17632+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
17633+ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
1facf9fc 17634+
4a4d8108
AM
17635+ /* remove whiteout for dentry */
17636+ if (a->dst_wh_dentry) {
17637+ a->h_path.dentry = a->dst_wh_dentry;
17638+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
17639+ a->dst_dentry);
17640+ if (unlikely(err))
17641+ goto out_diropq;
17642+ }
1facf9fc 17643+
4a4d8108
AM
17644+ /* remove whtmp */
17645+ if (a->thargs)
17646+ au_ren_del_whtmp(a); /* ignore this error */
1308ab2a 17647+
4a4d8108
AM
17648+ err = 0;
17649+ goto out_success;
17650+
4f0767ce 17651+out_diropq:
4a4d8108
AM
17652+ if (au_ftest_ren(a->flags, DIROPQ))
17653+ au_ren_rev_diropq(err, a);
4f0767ce 17654+out_rename:
4a4d8108
AM
17655+ if (!au_ftest_ren(a->flags, CPUP))
17656+ au_ren_rev_rename(err, a);
17657+ else
17658+ au_ren_rev_cpup(err, a);
027c5e7a 17659+ dput(a->h_dst);
4f0767ce 17660+out_whtmp:
4a4d8108
AM
17661+ if (a->thargs)
17662+ au_ren_rev_whtmp(err, a);
4f0767ce 17663+out_whdst:
4a4d8108
AM
17664+ dput(a->dst_wh_dentry);
17665+ a->dst_wh_dentry = NULL;
4f0767ce 17666+out_whsrc:
4a4d8108
AM
17667+ if (a->src_wh_dentry)
17668+ au_ren_rev_whsrc(err, a);
4f0767ce 17669+out_success:
4a4d8108
AM
17670+ dput(a->src_wh_dentry);
17671+ dput(a->dst_wh_dentry);
4f0767ce 17672+out_thargs:
4a4d8108
AM
17673+ if (a->thargs) {
17674+ dput(a->h_dst);
17675+ au_whtmp_rmdir_free(a->thargs);
17676+ a->thargs = NULL;
17677+ }
4f0767ce 17678+out:
4a4d8108 17679+ return err;
dece6358 17680+}
1facf9fc 17681+
1308ab2a 17682+/* ---------------------------------------------------------------------- */
1facf9fc 17683+
4a4d8108
AM
17684+/*
17685+ * test if @dentry dir can be rename destination or not.
17686+ * success means, it is a logically empty dir.
17687+ */
17688+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
1308ab2a 17689+{
4a4d8108 17690+ return au_test_empty(dentry, whlist);
1308ab2a 17691+}
1facf9fc 17692+
4a4d8108
AM
17693+/*
17694+ * test if @dentry dir can be rename source or not.
17695+ * if it can, return 0 and @children is filled.
17696+ * success means,
17697+ * - it is a logically empty dir.
17698+ * - or, it exists on writable branch and has no children including whiteouts
17699+ * on the lower branch.
17700+ */
17701+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
17702+{
17703+ int err;
17704+ unsigned int rdhash;
17705+ aufs_bindex_t bstart;
1facf9fc 17706+
4a4d8108
AM
17707+ bstart = au_dbstart(dentry);
17708+ if (bstart != btgt) {
17709+ struct au_nhash whlist;
dece6358 17710+
4a4d8108
AM
17711+ SiMustAnyLock(dentry->d_sb);
17712+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
17713+ if (!rdhash)
17714+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
17715+ dentry));
17716+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
17717+ if (unlikely(err))
17718+ goto out;
17719+ err = au_test_empty(dentry, &whlist);
17720+ au_nhash_wh_free(&whlist);
17721+ goto out;
17722+ }
dece6358 17723+
4a4d8108
AM
17724+ if (bstart == au_dbtaildir(dentry))
17725+ return 0; /* success */
dece6358 17726+
4a4d8108 17727+ err = au_test_empty_lower(dentry);
1facf9fc 17728+
4f0767ce 17729+out:
4a4d8108
AM
17730+ if (err == -ENOTEMPTY) {
17731+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
17732+ " is not supported\n");
17733+ err = -EXDEV;
17734+ }
17735+ return err;
17736+}
1308ab2a 17737+
4a4d8108
AM
17738+/* side effect: sets whlist and h_dentry */
17739+static int au_ren_may_dir(struct au_ren_args *a)
1308ab2a 17740+{
4a4d8108
AM
17741+ int err;
17742+ unsigned int rdhash;
17743+ struct dentry *d;
1facf9fc 17744+
4a4d8108
AM
17745+ d = a->dst_dentry;
17746+ SiMustAnyLock(d->d_sb);
1facf9fc 17747+
4a4d8108
AM
17748+ err = 0;
17749+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
17750+ rdhash = au_sbi(d->d_sb)->si_rdhash;
17751+ if (!rdhash)
17752+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
17753+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
17754+ if (unlikely(err))
17755+ goto out;
1308ab2a 17756+
4a4d8108
AM
17757+ au_set_dbstart(d, a->dst_bstart);
17758+ err = may_rename_dstdir(d, &a->whlist);
17759+ au_set_dbstart(d, a->btgt);
17760+ }
17761+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
17762+ if (unlikely(err))
17763+ goto out;
17764+
17765+ d = a->src_dentry;
17766+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
17767+ if (au_ftest_ren(a->flags, ISDIR)) {
17768+ err = may_rename_srcdir(d, a->btgt);
17769+ if (unlikely(err)) {
17770+ au_nhash_wh_free(&a->whlist);
17771+ a->whlist.nh_num = 0;
17772+ }
17773+ }
4f0767ce 17774+out:
4a4d8108 17775+ return err;
1facf9fc 17776+}
17777+
4a4d8108 17778+/* ---------------------------------------------------------------------- */
1facf9fc 17779+
4a4d8108
AM
17780+/*
17781+ * simple tests for rename.
17782+ * following the checks in vfs, plus the parent-child relationship.
17783+ */
17784+static int au_may_ren(struct au_ren_args *a)
17785+{
17786+ int err, isdir;
17787+ struct inode *h_inode;
1facf9fc 17788+
4a4d8108
AM
17789+ if (a->src_bstart == a->btgt) {
17790+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
17791+ au_ftest_ren(a->flags, ISDIR));
17792+ if (unlikely(err))
17793+ goto out;
17794+ err = -EINVAL;
17795+ if (unlikely(a->src_h_dentry == a->h_trap))
17796+ goto out;
17797+ }
1facf9fc 17798+
4a4d8108
AM
17799+ err = 0;
17800+ if (a->dst_bstart != a->btgt)
17801+ goto out;
1facf9fc 17802+
027c5e7a
AM
17803+ err = -ENOTEMPTY;
17804+ if (unlikely(a->dst_h_dentry == a->h_trap))
17805+ goto out;
17806+
4a4d8108
AM
17807+ err = -EIO;
17808+ h_inode = a->dst_h_dentry->d_inode;
17809+ isdir = !!au_ftest_ren(a->flags, ISDIR);
17810+ if (!a->dst_dentry->d_inode) {
17811+ if (unlikely(h_inode))
17812+ goto out;
17813+ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
17814+ isdir);
17815+ } else {
17816+ if (unlikely(!h_inode || !h_inode->i_nlink))
17817+ goto out;
17818+ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
17819+ isdir);
17820+ if (unlikely(err))
17821+ goto out;
4a4d8108 17822+ }
1facf9fc 17823+
4f0767ce 17824+out:
4a4d8108
AM
17825+ if (unlikely(err == -ENOENT || err == -EEXIST))
17826+ err = -EIO;
17827+ AuTraceErr(err);
17828+ return err;
17829+}
1facf9fc 17830+
1308ab2a 17831+/* ---------------------------------------------------------------------- */
1facf9fc 17832+
4a4d8108
AM
17833+/*
17834+ * locking order
17835+ * (VFS)
17836+ * - src_dir and dir by lock_rename()
17837+ * - inode if exitsts
17838+ * (aufs)
17839+ * - lock all
17840+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
17841+ * + si_read_lock
17842+ * + di_write_lock2_child()
17843+ * + di_write_lock_child()
17844+ * + ii_write_lock_child()
17845+ * + di_write_lock_child2()
17846+ * + ii_write_lock_child2()
17847+ * + src_parent and parent
17848+ * + di_write_lock_parent()
17849+ * + ii_write_lock_parent()
17850+ * + di_write_lock_parent2()
17851+ * + ii_write_lock_parent2()
17852+ * + lower src_dir and dir by vfsub_lock_rename()
17853+ * + verify the every relationships between child and parent. if any
17854+ * of them failed, unlock all and return -EBUSY.
17855+ */
17856+static void au_ren_unlock(struct au_ren_args *a)
1308ab2a 17857+{
4a4d8108
AM
17858+ struct super_block *sb;
17859+
17860+ sb = a->dst_dentry->d_sb;
17861+ if (au_ftest_ren(a->flags, MNT_WRITE))
17862+ mnt_drop_write(a->br->br_mnt);
17863+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
17864+ a->dst_h_parent, a->dst_hdir);
1308ab2a 17865+}
17866+
4a4d8108 17867+static int au_ren_lock(struct au_ren_args *a)
1308ab2a 17868+{
4a4d8108
AM
17869+ int err;
17870+ unsigned int udba;
1308ab2a 17871+
4a4d8108
AM
17872+ err = 0;
17873+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
17874+ a->src_hdir = au_hi(a->src_dir, a->btgt);
17875+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
17876+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
17877+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
17878+ a->dst_h_parent, a->dst_hdir);
17879+ udba = au_opt_udba(a->src_dentry->d_sb);
17880+ if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
17881+ || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
17882+ err = au_busy_or_stale();
17883+ if (!err && au_dbstart(a->src_dentry) == a->btgt)
17884+ err = au_h_verify(a->src_h_dentry, udba,
17885+ a->src_h_parent->d_inode, a->src_h_parent,
17886+ a->br);
17887+ if (!err && au_dbstart(a->dst_dentry) == a->btgt)
17888+ err = au_h_verify(a->dst_h_dentry, udba,
17889+ a->dst_h_parent->d_inode, a->dst_h_parent,
17890+ a->br);
17891+ if (!err) {
17892+ err = mnt_want_write(a->br->br_mnt);
17893+ if (unlikely(err))
17894+ goto out_unlock;
17895+ au_fset_ren(a->flags, MNT_WRITE);
17896+ goto out; /* success */
17897+ }
17898+
17899+ err = au_busy_or_stale();
17900+
4f0767ce 17901+out_unlock:
4a4d8108 17902+ au_ren_unlock(a);
4f0767ce 17903+out:
4a4d8108 17904+ return err;
1facf9fc 17905+}
17906+
17907+/* ---------------------------------------------------------------------- */
17908+
4a4d8108 17909+static void au_ren_refresh_dir(struct au_ren_args *a)
1facf9fc 17910+{
4a4d8108 17911+ struct inode *dir;
dece6358 17912+
4a4d8108
AM
17913+ dir = a->dst_dir;
17914+ dir->i_version++;
17915+ if (au_ftest_ren(a->flags, ISDIR)) {
17916+ /* is this updating defined in POSIX? */
17917+ au_cpup_attr_timesizes(a->src_inode);
17918+ au_cpup_attr_nlink(dir, /*force*/1);
4a4d8108 17919+ }
027c5e7a 17920+
4a4d8108
AM
17921+ if (au_ibstart(dir) == a->btgt)
17922+ au_cpup_attr_timesizes(dir);
dece6358 17923+
4a4d8108
AM
17924+ if (au_ftest_ren(a->flags, ISSAMEDIR))
17925+ return;
dece6358 17926+
4a4d8108
AM
17927+ dir = a->src_dir;
17928+ dir->i_version++;
17929+ if (au_ftest_ren(a->flags, ISDIR))
17930+ au_cpup_attr_nlink(dir, /*force*/1);
17931+ if (au_ibstart(dir) == a->btgt)
17932+ au_cpup_attr_timesizes(dir);
1facf9fc 17933+}
17934+
4a4d8108 17935+static void au_ren_refresh(struct au_ren_args *a)
1facf9fc 17936+{
4a4d8108
AM
17937+ aufs_bindex_t bend, bindex;
17938+ struct dentry *d, *h_d;
17939+ struct inode *i, *h_i;
17940+ struct super_block *sb;
dece6358 17941+
027c5e7a
AM
17942+ d = a->dst_dentry;
17943+ d_drop(d);
17944+ if (a->h_dst)
17945+ /* already dget-ed by au_ren_or_cpup() */
17946+ au_set_h_dptr(d, a->btgt, a->h_dst);
17947+
17948+ i = a->dst_inode;
17949+ if (i) {
17950+ if (!au_ftest_ren(a->flags, ISDIR))
17951+ vfsub_drop_nlink(i);
17952+ else {
17953+ vfsub_dead_dir(i);
17954+ au_cpup_attr_timesizes(i);
17955+ }
17956+ au_update_dbrange(d, /*do_put_zero*/1);
17957+ } else {
17958+ bend = a->btgt;
17959+ for (bindex = au_dbstart(d); bindex < bend; bindex++)
17960+ au_set_h_dptr(d, bindex, NULL);
17961+ bend = au_dbend(d);
17962+ for (bindex = a->btgt + 1; bindex <= bend; bindex++)
17963+ au_set_h_dptr(d, bindex, NULL);
17964+ au_update_dbrange(d, /*do_put_zero*/0);
17965+ }
17966+
4a4d8108
AM
17967+ d = a->src_dentry;
17968+ au_set_dbwh(d, -1);
17969+ bend = au_dbend(d);
17970+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
17971+ h_d = au_h_dptr(d, bindex);
17972+ if (h_d)
17973+ au_set_h_dptr(d, bindex, NULL);
17974+ }
17975+ au_set_dbend(d, a->btgt);
17976+
17977+ sb = d->d_sb;
17978+ i = a->src_inode;
17979+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
17980+ return; /* success */
17981+
17982+ bend = au_ibend(i);
17983+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
17984+ h_i = au_h_iptr(i, bindex);
17985+ if (h_i) {
17986+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
17987+ /* ignore this error */
17988+ au_set_h_iptr(i, bindex, NULL, 0);
17989+ }
17990+ }
17991+ au_set_ibend(i, a->btgt);
1308ab2a 17992+}
dece6358 17993+
4a4d8108
AM
17994+/* ---------------------------------------------------------------------- */
17995+
17996+/* mainly for link(2) and rename(2) */
17997+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
1308ab2a 17998+{
4a4d8108
AM
17999+ aufs_bindex_t bdiropq, bwh;
18000+ struct dentry *parent;
18001+ struct au_branch *br;
18002+
18003+ parent = dentry->d_parent;
18004+ IMustLock(parent->d_inode); /* dir is locked */
18005+
18006+ bdiropq = au_dbdiropq(parent);
18007+ bwh = au_dbwh(dentry);
18008+ br = au_sbr(dentry->d_sb, btgt);
18009+ if (au_br_rdonly(br)
18010+ || (0 <= bdiropq && bdiropq < btgt)
18011+ || (0 <= bwh && bwh < btgt))
18012+ btgt = -1;
18013+
18014+ AuDbg("btgt %d\n", btgt);
18015+ return btgt;
1facf9fc 18016+}
18017+
4a4d8108
AM
18018+/* sets src_bstart, dst_bstart and btgt */
18019+static int au_ren_wbr(struct au_ren_args *a)
1facf9fc 18020+{
4a4d8108
AM
18021+ int err;
18022+ struct au_wr_dir_args wr_dir_args = {
18023+ /* .force_btgt = -1, */
18024+ .flags = AuWrDir_ADD_ENTRY
18025+ };
dece6358 18026+
4a4d8108
AM
18027+ a->src_bstart = au_dbstart(a->src_dentry);
18028+ a->dst_bstart = au_dbstart(a->dst_dentry);
18029+ if (au_ftest_ren(a->flags, ISDIR))
18030+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
18031+ wr_dir_args.force_btgt = a->src_bstart;
18032+ if (a->dst_inode && a->dst_bstart < a->src_bstart)
18033+ wr_dir_args.force_btgt = a->dst_bstart;
18034+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
18035+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
18036+ a->btgt = err;
dece6358 18037+
4a4d8108 18038+ return err;
1facf9fc 18039+}
18040+
4a4d8108 18041+static void au_ren_dt(struct au_ren_args *a)
1facf9fc 18042+{
4a4d8108
AM
18043+ a->h_path.dentry = a->src_h_parent;
18044+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
18045+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
18046+ a->h_path.dentry = a->dst_h_parent;
18047+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
18048+ }
1facf9fc 18049+
4a4d8108
AM
18050+ au_fclr_ren(a->flags, DT_DSTDIR);
18051+ if (!au_ftest_ren(a->flags, ISDIR))
18052+ return;
dece6358 18053+
4a4d8108
AM
18054+ a->h_path.dentry = a->src_h_dentry;
18055+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
18056+ if (a->dst_h_dentry->d_inode) {
18057+ au_fset_ren(a->flags, DT_DSTDIR);
18058+ a->h_path.dentry = a->dst_h_dentry;
18059+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
18060+ }
1308ab2a 18061+}
dece6358 18062+
4a4d8108 18063+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1308ab2a 18064+{
4a4d8108
AM
18065+ struct dentry *h_d;
18066+ struct mutex *h_mtx;
18067+
18068+ au_dtime_revert(a->src_dt + AuPARENT);
18069+ if (!au_ftest_ren(a->flags, ISSAMEDIR))
18070+ au_dtime_revert(a->dst_dt + AuPARENT);
18071+
18072+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
18073+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
18074+ h_mtx = &h_d->d_inode->i_mutex;
18075+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18076+ au_dtime_revert(a->src_dt + AuCHILD);
18077+ mutex_unlock(h_mtx);
18078+
18079+ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
18080+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
18081+ h_mtx = &h_d->d_inode->i_mutex;
18082+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18083+ au_dtime_revert(a->dst_dt + AuCHILD);
18084+ mutex_unlock(h_mtx);
1facf9fc 18085+ }
18086+ }
18087+}
18088+
4a4d8108
AM
18089+/* ---------------------------------------------------------------------- */
18090+
18091+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
18092+ struct inode *_dst_dir, struct dentry *_dst_dentry)
1facf9fc 18093+{
e49829fe 18094+ int err, flags;
4a4d8108
AM
18095+ /* reduce stack space */
18096+ struct au_ren_args *a;
18097+
18098+ AuDbg("%.*s, %.*s\n", AuDLNPair(_src_dentry), AuDLNPair(_dst_dentry));
18099+ IMustLock(_src_dir);
18100+ IMustLock(_dst_dir);
18101+
18102+ err = -ENOMEM;
18103+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
18104+ a = kzalloc(sizeof(*a), GFP_NOFS);
18105+ if (unlikely(!a))
18106+ goto out;
18107+
18108+ a->src_dir = _src_dir;
18109+ a->src_dentry = _src_dentry;
18110+ a->src_inode = a->src_dentry->d_inode;
18111+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
18112+ a->dst_dir = _dst_dir;
18113+ a->dst_dentry = _dst_dentry;
18114+ a->dst_inode = a->dst_dentry->d_inode;
18115+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
18116+ if (a->dst_inode) {
18117+ IMustLock(a->dst_inode);
18118+ au_igrab(a->dst_inode);
1facf9fc 18119+ }
1facf9fc 18120+
4a4d8108 18121+ err = -ENOTDIR;
027c5e7a 18122+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
4a4d8108
AM
18123+ if (S_ISDIR(a->src_inode->i_mode)) {
18124+ au_fset_ren(a->flags, ISDIR);
18125+ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
18126+ goto out_free;
e49829fe
JR
18127+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
18128+ AuLock_DIR | flags);
4a4d8108 18129+ } else
e49829fe
JR
18130+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
18131+ flags);
18132+ if (unlikely(err))
18133+ goto out_free;
1facf9fc 18134+
027c5e7a
AM
18135+ err = au_d_hashed_positive(a->src_dentry);
18136+ if (unlikely(err))
18137+ goto out_unlock;
18138+ err = -ENOENT;
18139+ if (a->dst_inode) {
18140+ /*
18141+ * If it is a dir, VFS unhash dst_dentry before this
18142+ * function. It means we cannot rely upon d_unhashed().
18143+ */
18144+ if (unlikely(!a->dst_inode->i_nlink))
18145+ goto out_unlock;
18146+ if (!S_ISDIR(a->dst_inode->i_mode)) {
18147+ err = au_d_hashed_positive(a->dst_dentry);
18148+ if (unlikely(err))
18149+ goto out_unlock;
18150+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
18151+ goto out_unlock;
18152+ } else if (unlikely(d_unhashed(a->dst_dentry)))
18153+ goto out_unlock;
18154+
4a4d8108
AM
18155+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
18156+ di_write_lock_parent(a->dst_parent);
1facf9fc 18157+
4a4d8108
AM
18158+ /* which branch we process */
18159+ err = au_ren_wbr(a);
18160+ if (unlikely(err < 0))
027c5e7a 18161+ goto out_parent;
4a4d8108
AM
18162+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
18163+ a->h_path.mnt = a->br->br_mnt;
1facf9fc 18164+
4a4d8108
AM
18165+ /* are they available to be renamed */
18166+ err = au_ren_may_dir(a);
18167+ if (unlikely(err))
18168+ goto out_children;
1facf9fc 18169+
4a4d8108
AM
18170+ /* prepare the writable parent dir on the same branch */
18171+ if (a->dst_bstart == a->btgt) {
18172+ au_fset_ren(a->flags, WHDST);
18173+ } else {
18174+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
18175+ if (unlikely(err))
18176+ goto out_children;
18177+ }
1facf9fc 18178+
4a4d8108
AM
18179+ if (a->src_dir != a->dst_dir) {
18180+ /*
18181+ * this temporary unlock is safe,
18182+ * because both dir->i_mutex are locked.
18183+ */
18184+ di_write_unlock(a->dst_parent);
18185+ di_write_lock_parent(a->src_parent);
18186+ err = au_wr_dir_need_wh(a->src_dentry,
18187+ au_ftest_ren(a->flags, ISDIR),
18188+ &a->btgt);
18189+ di_write_unlock(a->src_parent);
18190+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
18191+ au_fclr_ren(a->flags, ISSAMEDIR);
18192+ } else
18193+ err = au_wr_dir_need_wh(a->src_dentry,
18194+ au_ftest_ren(a->flags, ISDIR),
18195+ &a->btgt);
18196+ if (unlikely(err < 0))
18197+ goto out_children;
18198+ if (err)
18199+ au_fset_ren(a->flags, WHSRC);
1facf9fc 18200+
4a4d8108
AM
18201+ /* lock them all */
18202+ err = au_ren_lock(a);
18203+ if (unlikely(err))
18204+ goto out_children;
1facf9fc 18205+
4a4d8108
AM
18206+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
18207+ err = au_may_ren(a);
18208+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
18209+ err = -ENAMETOOLONG;
18210+ if (unlikely(err))
18211+ goto out_hdir;
1facf9fc 18212+
4a4d8108
AM
18213+ /* store timestamps to be revertible */
18214+ au_ren_dt(a);
1facf9fc 18215+
4a4d8108
AM
18216+ /* here we go */
18217+ err = do_rename(a);
18218+ if (unlikely(err))
18219+ goto out_dt;
18220+
18221+ /* update dir attributes */
18222+ au_ren_refresh_dir(a);
18223+
18224+ /* dput/iput all lower dentries */
18225+ au_ren_refresh(a);
18226+
18227+ goto out_hdir; /* success */
18228+
4f0767ce 18229+out_dt:
4a4d8108 18230+ au_ren_rev_dt(err, a);
4f0767ce 18231+out_hdir:
4a4d8108 18232+ au_ren_unlock(a);
4f0767ce 18233+out_children:
4a4d8108 18234+ au_nhash_wh_free(&a->whlist);
027c5e7a
AM
18235+ if (err && a->dst_inode && a->dst_bstart != a->btgt) {
18236+ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
18237+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
18238+ au_set_dbstart(a->dst_dentry, a->dst_bstart);
4a4d8108 18239+ }
027c5e7a 18240+out_parent:
4a4d8108
AM
18241+ if (!err)
18242+ d_move(a->src_dentry, a->dst_dentry);
027c5e7a
AM
18243+ else {
18244+ au_update_dbstart(a->dst_dentry);
18245+ if (!a->dst_inode)
18246+ d_drop(a->dst_dentry);
18247+ }
4a4d8108
AM
18248+ if (au_ftest_ren(a->flags, ISSAMEDIR))
18249+ di_write_unlock(a->dst_parent);
18250+ else
18251+ di_write_unlock2(a->src_parent, a->dst_parent);
027c5e7a 18252+out_unlock:
4a4d8108 18253+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
4f0767ce 18254+out_free:
4a4d8108
AM
18255+ iput(a->dst_inode);
18256+ if (a->thargs)
18257+ au_whtmp_rmdir_free(a->thargs);
18258+ kfree(a);
4f0767ce 18259+out:
4a4d8108
AM
18260+ AuTraceErr(err);
18261+ return err;
1308ab2a 18262+}
7f207e10
AM
18263diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
18264--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
53392da6 18265+++ linux/fs/aufs/Kconfig 2011-08-24 13:30:24.727980364 +0200
2cbb1c4b 18266@@ -0,0 +1,203 @@
4a4d8108
AM
18267+config AUFS_FS
18268+ tristate "Aufs (Advanced multi layered unification filesystem) support"
18269+ depends on EXPERIMENTAL
18270+ help
18271+ Aufs is a stackable unification filesystem such as Unionfs,
18272+ which unifies several directories and provides a merged single
18273+ directory.
18274+ In the early days, aufs was entirely re-designed and
18275+ re-implemented Unionfs Version 1.x series. Introducing many
18276+ original ideas, approaches and improvements, it becomes totally
18277+ different from Unionfs while keeping the basic features.
1facf9fc 18278+
4a4d8108
AM
18279+if AUFS_FS
18280+choice
18281+ prompt "Maximum number of branches"
18282+ default AUFS_BRANCH_MAX_127
18283+ help
18284+ Specifies the maximum number of branches (or member directories)
18285+ in a single aufs. The larger value consumes more system
18286+ resources and has a minor impact to performance.
18287+config AUFS_BRANCH_MAX_127
18288+ bool "127"
18289+ help
18290+ Specifies the maximum number of branches (or member directories)
18291+ in a single aufs. The larger value consumes more system
18292+ resources and has a minor impact to performance.
18293+config AUFS_BRANCH_MAX_511
18294+ bool "511"
18295+ help
18296+ Specifies the maximum number of branches (or member directories)
18297+ in a single aufs. The larger value consumes more system
18298+ resources and has a minor impact to performance.
18299+config AUFS_BRANCH_MAX_1023
18300+ bool "1023"
18301+ help
18302+ Specifies the maximum number of branches (or member directories)
18303+ in a single aufs. The larger value consumes more system
18304+ resources and has a minor impact to performance.
18305+config AUFS_BRANCH_MAX_32767
18306+ bool "32767"
18307+ help
18308+ Specifies the maximum number of branches (or member directories)
18309+ in a single aufs. The larger value consumes more system
18310+ resources and has a minor impact to performance.
18311+endchoice
1facf9fc 18312+
e49829fe
JR
18313+config AUFS_SBILIST
18314+ bool
18315+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
18316+ default y
18317+ help
18318+ Automatic configuration for internal use.
18319+ When aufs supports Magic SysRq or /proc, enabled automatically.
18320+
4a4d8108
AM
18321+config AUFS_HNOTIFY
18322+ bool "Detect direct branch access (bypassing aufs)"
18323+ help
18324+ If you want to modify files on branches directly, eg. bypassing aufs,
18325+ and want aufs to detect the changes of them fully, then enable this
18326+ option and use 'udba=notify' mount option.
7f207e10 18327+ Currently there is only one available configuration, "fsnotify".
4a4d8108
AM
18328+ It will have a negative impact to the performance.
18329+ See detail in aufs.5.
dece6358 18330+
4a4d8108
AM
18331+choice
18332+ prompt "method" if AUFS_HNOTIFY
18333+ default AUFS_HFSNOTIFY
18334+config AUFS_HFSNOTIFY
18335+ bool "fsnotify"
18336+ select FSNOTIFY
4a4d8108 18337+endchoice
1facf9fc 18338+
4a4d8108
AM
18339+config AUFS_EXPORT
18340+ bool "NFS-exportable aufs"
2cbb1c4b 18341+ depends on EXPORTFS
4a4d8108
AM
18342+ help
18343+ If you want to export your mounted aufs via NFS, then enable this
18344+ option. There are several requirements for this configuration.
18345+ See detail in aufs.5.
1facf9fc 18346+
4a4d8108
AM
18347+config AUFS_INO_T_64
18348+ bool
18349+ depends on AUFS_EXPORT
18350+ depends on 64BIT && !(ALPHA || S390)
18351+ default y
18352+ help
18353+ Automatic configuration for internal use.
18354+ /* typedef unsigned long/int __kernel_ino_t */
18355+ /* alpha and s390x are int */
1facf9fc 18356+
4a4d8108
AM
18357+config AUFS_RDU
18358+ bool "Readdir in userspace"
18359+ help
18360+ Aufs has two methods to provide a merged view for a directory,
18361+ by a user-space library and by kernel-space natively. The latter
18362+ is always enabled but sometimes large and slow.
18363+ If you enable this option, install the library in aufs2-util
18364+ package, and set some environment variables for your readdir(3),
18365+ then the work will be handled in user-space which generally
18366+ shows better performance in most cases.
18367+ See detail in aufs.5.
1facf9fc 18368+
2cbb1c4b
JR
18369+config AUFS_PROC_MAP
18370+ bool "support for /proc/maps and lsof(1)"
18371+ depends on PROC_FS
18372+ help
18373+ When you issue mmap(2) in aufs, it is actually a direct mmap(2)
18374+ call to the file on the branch fs since the file in aufs is
18375+ purely virtual. And the file path printed in /proc/maps (and
18376+ others) will be the path on the branch fs. In most cases, it
18377+ does no harm. But some utilities like lsof(1) may confuse since
18378+ the utility or user may expect the file path in aufs to be
18379+ printed.
18380+ To address this issue, aufs provides a patch which introduces a
18381+ new member called vm_prfile into struct vm_are_struct. The patch
18382+ is meaningless without enabling this configuration since nobody
18383+ sets the new vm_prfile member.
18384+ If you don't apply the patch, then enabling this configuration
18385+ will cause a compile error.
18386+ This approach is fragile since if someone else make some changes
18387+ around vm_file, then vm_prfile may not work anymore. As a
18388+ workaround such case, aufs provides this configuration. If you
18389+ disable it, then lsof(1) may produce incorrect result but the
18390+ problem will be gone even if the aufs patch is applied (I hope).
18391+
4a4d8108
AM
18392+config AUFS_SP_IATTR
18393+ bool "Respect the attributes (mtime/ctime mainly) of special files"
18394+ help
18395+ When you write something to a special file, some attributes of it
18396+ (mtime/ctime mainly) may be updated. Generally such updates are
18397+ less important (actually some device drivers and NFS ignore
18398+ it). But some applications (such like test program) requires
18399+ such updates. If you need these updates, then enable this
18400+ configuration which introduces some overhead.
18401+ Currently this configuration handles FIFO only.
1facf9fc 18402+
4a4d8108
AM
18403+config AUFS_SHWH
18404+ bool "Show whiteouts"
18405+ help
18406+ If you want to make the whiteouts in aufs visible, then enable
18407+ this option and specify 'shwh' mount option. Although it may
18408+ sounds like philosophy or something, but in technically it
18409+ simply shows the name of whiteout with keeping its behaviour.
1facf9fc 18410+
4a4d8108
AM
18411+config AUFS_BR_RAMFS
18412+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
18413+ help
18414+ If you want to use ramfs as an aufs branch fs, then enable this
18415+ option. Generally tmpfs is recommended.
18416+ Aufs prohibited them to be a branch fs by default, because
18417+ initramfs becomes unusable after switch_root or something
18418+ generally. If you sets initramfs as an aufs branch and boot your
18419+ system by switch_root, you will meet a problem easily since the
18420+ files in initramfs may be inaccessible.
18421+ Unless you are going to use ramfs as an aufs branch fs without
18422+ switch_root or something, leave it N.
1facf9fc 18423+
4a4d8108
AM
18424+config AUFS_BR_FUSE
18425+ bool "Fuse fs as an aufs branch"
18426+ depends on FUSE_FS
18427+ select AUFS_POLL
18428+ help
18429+ If you want to use fuse-based userspace filesystem as an aufs
18430+ branch fs, then enable this option.
18431+ It implements the internal poll(2) operation which is
18432+ implemented by fuse only (curretnly).
1facf9fc 18433+
4a4d8108
AM
18434+config AUFS_POLL
18435+ bool
18436+ help
18437+ Automatic configuration for internal use.
1facf9fc 18438+
4a4d8108
AM
18439+config AUFS_BR_HFSPLUS
18440+ bool "Hfsplus as an aufs branch"
18441+ depends on HFSPLUS_FS
18442+ default y
18443+ help
18444+ If you want to use hfsplus fs as an aufs branch fs, then enable
18445+ this option. This option introduces a small overhead at
18446+ copying-up a file on hfsplus.
1facf9fc 18447+
4a4d8108
AM
18448+config AUFS_BDEV_LOOP
18449+ bool
18450+ depends on BLK_DEV_LOOP
18451+ default y
18452+ help
18453+ Automatic configuration for internal use.
18454+ Convert =[ym] into =y.
1308ab2a 18455+
4a4d8108
AM
18456+config AUFS_DEBUG
18457+ bool "Debug aufs"
18458+ help
18459+ Enable this to compile aufs internal debug code.
18460+ It will have a negative impact to the performance.
18461+
18462+config AUFS_MAGIC_SYSRQ
18463+ bool
18464+ depends on AUFS_DEBUG && MAGIC_SYSRQ
18465+ default y
18466+ help
18467+ Automatic configuration for internal use.
18468+ When aufs supports Magic SysRq, enabled automatically.
18469+endif
7f207e10
AM
18470diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
18471--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
53392da6 18472+++ linux/fs/aufs/loop.c 2011-08-24 13:30:24.734646739 +0200
87a755f4 18473@@ -0,0 +1,133 @@
1facf9fc 18474+/*
027c5e7a 18475+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18476+ *
18477+ * This program, aufs is free software; you can redistribute it and/or modify
18478+ * it under the terms of the GNU General Public License as published by
18479+ * the Free Software Foundation; either version 2 of the License, or
18480+ * (at your option) any later version.
dece6358
AM
18481+ *
18482+ * This program is distributed in the hope that it will be useful,
18483+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18484+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18485+ * GNU General Public License for more details.
18486+ *
18487+ * You should have received a copy of the GNU General Public License
18488+ * along with this program; if not, write to the Free Software
18489+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18490+ */
18491+
18492+/*
18493+ * support for loopback block device as a branch
18494+ */
18495+
18496+#include <linux/loop.h>
18497+#include "aufs.h"
18498+
18499+/*
18500+ * test if two lower dentries have overlapping branches.
18501+ */
b752ccd1 18502+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
1facf9fc 18503+{
b752ccd1 18504+ struct super_block *h_sb;
1facf9fc 18505+ struct loop_device *l;
18506+
b752ccd1
AM
18507+ h_sb = h_adding->d_sb;
18508+ if (MAJOR(h_sb->s_dev) != LOOP_MAJOR)
1facf9fc 18509+ return 0;
18510+
b752ccd1
AM
18511+ l = h_sb->s_bdev->bd_disk->private_data;
18512+ h_adding = l->lo_backing_file->f_dentry;
18513+ /*
18514+ * h_adding can be local NFS.
18515+ * in this case aufs cannot detect the loop.
18516+ */
18517+ if (unlikely(h_adding->d_sb == sb))
1facf9fc 18518+ return 1;
b752ccd1 18519+ return !!au_test_subdir(h_adding, sb->s_root);
1facf9fc 18520+}
18521+
18522+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
18523+int au_test_loopback_kthread(void)
18524+{
b752ccd1
AM
18525+ int ret;
18526+ struct task_struct *tsk = current;
18527+
18528+ ret = 0;
18529+ if (tsk->flags & PF_KTHREAD) {
18530+ const char c = tsk->comm[4];
18531+ ret = ('0' <= c && c <= '9'
18532+ && !strncmp(tsk->comm, "loop", 4));
18533+ }
1facf9fc 18534+
b752ccd1 18535+ return ret;
1facf9fc 18536+}
87a755f4
AM
18537+
18538+/* ---------------------------------------------------------------------- */
18539+
18540+#define au_warn_loopback_step 16
18541+static int au_warn_loopback_nelem = au_warn_loopback_step;
18542+static unsigned long *au_warn_loopback_array;
18543+
18544+void au_warn_loopback(struct super_block *h_sb)
18545+{
18546+ int i, new_nelem;
18547+ unsigned long *a, magic;
18548+ static DEFINE_SPINLOCK(spin);
18549+
18550+ magic = h_sb->s_magic;
18551+ spin_lock(&spin);
18552+ a = au_warn_loopback_array;
18553+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
18554+ if (a[i] == magic) {
18555+ spin_unlock(&spin);
18556+ return;
18557+ }
18558+
18559+ /* h_sb is new to us, print it */
18560+ if (i < au_warn_loopback_nelem) {
18561+ a[i] = magic;
18562+ goto pr;
18563+ }
18564+
18565+ /* expand the array */
18566+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
18567+ a = au_kzrealloc(au_warn_loopback_array,
18568+ au_warn_loopback_nelem * sizeof(unsigned long),
18569+ new_nelem * sizeof(unsigned long), GFP_ATOMIC);
18570+ if (a) {
18571+ au_warn_loopback_nelem = new_nelem;
18572+ au_warn_loopback_array = a;
18573+ a[i] = magic;
18574+ goto pr;
18575+ }
18576+
18577+ spin_unlock(&spin);
18578+ AuWarn1("realloc failed, ignored\n");
18579+ return;
18580+
18581+pr:
18582+ spin_unlock(&spin);
18583+ pr_warning("you may want to try another patch for loopback file "
18584+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
18585+}
18586+
18587+int au_loopback_init(void)
18588+{
18589+ int err;
18590+ struct super_block *sb __maybe_unused;
18591+
18592+ AuDebugOn(sizeof(sb->s_magic) != sizeof(unsigned long));
18593+
18594+ err = 0;
18595+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
18596+ sizeof(unsigned long), GFP_NOFS);
18597+ if (unlikely(!au_warn_loopback_array))
18598+ err = -ENOMEM;
18599+
18600+ return err;
18601+}
18602+
18603+void au_loopback_fin(void)
18604+{
18605+ kfree(au_warn_loopback_array);
18606+}
7f207e10
AM
18607diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
18608--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
53392da6 18609+++ linux/fs/aufs/loop.h 2011-08-24 13:30:24.734646739 +0200
87a755f4 18610@@ -0,0 +1,50 @@
1facf9fc 18611+/*
027c5e7a 18612+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18613+ *
18614+ * This program, aufs is free software; you can redistribute it and/or modify
18615+ * it under the terms of the GNU General Public License as published by
18616+ * the Free Software Foundation; either version 2 of the License, or
18617+ * (at your option) any later version.
dece6358
AM
18618+ *
18619+ * This program is distributed in the hope that it will be useful,
18620+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18621+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18622+ * GNU General Public License for more details.
18623+ *
18624+ * You should have received a copy of the GNU General Public License
18625+ * along with this program; if not, write to the Free Software
18626+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18627+ */
18628+
18629+/*
18630+ * support for loopback mount as a branch
18631+ */
18632+
18633+#ifndef __AUFS_LOOP_H__
18634+#define __AUFS_LOOP_H__
18635+
18636+#ifdef __KERNEL__
18637+
dece6358
AM
18638+struct dentry;
18639+struct super_block;
1facf9fc 18640+
18641+#ifdef CONFIG_AUFS_BDEV_LOOP
18642+/* loop.c */
b752ccd1 18643+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
1facf9fc 18644+int au_test_loopback_kthread(void);
87a755f4
AM
18645+void au_warn_loopback(struct super_block *h_sb);
18646+
18647+int au_loopback_init(void);
18648+void au_loopback_fin(void);
1facf9fc 18649+#else
4a4d8108 18650+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
b752ccd1 18651+ struct dentry *h_adding)
4a4d8108 18652+AuStubInt0(au_test_loopback_kthread, void)
87a755f4
AM
18653+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
18654+
18655+AuStubInt0(au_loopback_init, void)
18656+AuStubVoid(au_loopback_fin, void)
1facf9fc 18657+#endif /* BLK_DEV_LOOP */
18658+
18659+#endif /* __KERNEL__ */
18660+#endif /* __AUFS_LOOP_H__ */
7f207e10
AM
18661diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
18662--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
53392da6 18663+++ linux/fs/aufs/magic.mk 2011-08-24 13:30:24.734646739 +0200
4a4d8108 18664@@ -0,0 +1,54 @@
1facf9fc 18665+
18666+# defined in ${srctree}/fs/fuse/inode.c
18667+# tristate
18668+ifdef CONFIG_FUSE_FS
18669+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
18670+endif
18671+
18672+# defined in ${srctree}/fs/ocfs2/ocfs2_fs.h
18673+# tristate
18674+ifdef CONFIG_OCFS2_FS
18675+ccflags-y += -DOCFS2_SUPER_MAGIC=0x7461636f
18676+endif
18677+
18678+# defined in ${srctree}/fs/ocfs2/dlm/userdlm.h
18679+# tristate
18680+ifdef CONFIG_OCFS2_FS_O2CB
18681+ccflags-y += -DDLMFS_MAGIC=0x76a9f425
18682+endif
18683+
1facf9fc 18684+# defined in ${srctree}/fs/cifs/cifsfs.c
18685+# tristate
18686+ifdef CONFIG_CIFS_FS
18687+ccflags-y += -DCIFS_MAGIC_NUMBER=0xFF534D42
18688+endif
18689+
18690+# defined in ${srctree}/fs/xfs/xfs_sb.h
18691+# tristate
18692+ifdef CONFIG_XFS_FS
18693+ccflags-y += -DXFS_SB_MAGIC=0x58465342
18694+endif
18695+
18696+# defined in ${srctree}/fs/configfs/mount.c
18697+# tristate
18698+ifdef CONFIG_CONFIGFS_FS
18699+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
18700+endif
18701+
18702+# defined in ${srctree}/fs/9p/v9fs.h
18703+# tristate
18704+ifdef CONFIG_9P_FS
18705+ccflags-y += -DV9FS_MAGIC=0x01021997
18706+endif
18707+
18708+# defined in ${srctree}/fs/ubifs/ubifs.h
18709+# tristate
18710+ifdef CONFIG_UBIFS_FS
18711+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
18712+endif
4a4d8108
AM
18713+
18714+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
18715+# tristate
18716+ifdef CONFIG_HFSPLUS_FS
18717+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
18718+endif
7f207e10
AM
18719diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
18720--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
53392da6 18721+++ linux/fs/aufs/Makefile 2011-08-24 13:30:24.727980364 +0200
7f207e10 18722@@ -0,0 +1,38 @@
4a4d8108
AM
18723+
18724+include ${src}/magic.mk
18725+ifeq (${CONFIG_AUFS_FS},m)
18726+include ${src}/conf.mk
18727+endif
18728+-include ${src}/priv_def.mk
18729+
18730+# cf. include/linux/kernel.h
18731+# enable pr_debug
18732+ccflags-y += -DDEBUG
7f207e10
AM
18733+# sparse doesn't allow spaces
18734+ccflags-y += -D'pr_fmt(fmt)=AUFS_NAME"\040%s:%d:%s[%d]:\040"fmt,__func__,__LINE__,current->comm,current->pid'
4a4d8108
AM
18735+
18736+obj-$(CONFIG_AUFS_FS) += aufs.o
18737+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
18738+ wkq.o vfsub.o dcsub.o \
e49829fe 18739+ cpup.o whout.o wbr_policy.o \
4a4d8108
AM
18740+ dinfo.o dentry.o \
18741+ dynop.o \
18742+ finfo.o file.o f_op.o \
18743+ dir.o vdir.o \
18744+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
18745+ ioctl.o
18746+
18747+# all are boolean
e49829fe 18748+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
4a4d8108
AM
18749+aufs-$(CONFIG_SYSFS) += sysfs.o
18750+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
18751+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
18752+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
18753+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
4a4d8108
AM
18754+aufs-$(CONFIG_AUFS_EXPORT) += export.o
18755+aufs-$(CONFIG_AUFS_POLL) += poll.o
18756+aufs-$(CONFIG_AUFS_RDU) += rdu.o
18757+aufs-$(CONFIG_AUFS_SP_IATTR) += f_op_sp.o
18758+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
18759+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
18760+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
7f207e10
AM
18761diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
18762--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
18763+++ linux/fs/aufs/module.c 2011-10-24 20:52:23.677857076 +0200
18764@@ -0,0 +1,193 @@
1facf9fc 18765+/*
027c5e7a 18766+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18767+ *
18768+ * This program, aufs is free software; you can redistribute it and/or modify
18769+ * it under the terms of the GNU General Public License as published by
18770+ * the Free Software Foundation; either version 2 of the License, or
18771+ * (at your option) any later version.
dece6358
AM
18772+ *
18773+ * This program is distributed in the hope that it will be useful,
18774+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18775+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18776+ * GNU General Public License for more details.
18777+ *
18778+ * You should have received a copy of the GNU General Public License
18779+ * along with this program; if not, write to the Free Software
18780+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18781+ */
18782+
18783+/*
18784+ * module global variables and operations
18785+ */
18786+
18787+#include <linux/module.h>
18788+#include <linux/seq_file.h>
18789+#include "aufs.h"
18790+
18791+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
18792+{
18793+ if (new_sz <= nused)
18794+ return p;
18795+
18796+ p = krealloc(p, new_sz, gfp);
18797+ if (p)
18798+ memset(p + nused, 0, new_sz - nused);
18799+ return p;
18800+}
18801+
18802+/* ---------------------------------------------------------------------- */
18803+
18804+/*
18805+ * aufs caches
18806+ */
18807+struct kmem_cache *au_cachep[AuCache_Last];
18808+static int __init au_cache_init(void)
18809+{
4a4d8108 18810+ au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
1facf9fc 18811+ if (au_cachep[AuCache_DINFO])
027c5e7a 18812+ /* SLAB_DESTROY_BY_RCU */
4a4d8108
AM
18813+ au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
18814+ au_icntnr_init_once);
1facf9fc 18815+ if (au_cachep[AuCache_ICNTNR])
4a4d8108
AM
18816+ au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo,
18817+ au_fi_init_once);
1facf9fc 18818+ if (au_cachep[AuCache_FINFO])
18819+ au_cachep[AuCache_VDIR] = AuCache(au_vdir);
18820+ if (au_cachep[AuCache_VDIR])
18821+ au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
18822+ if (au_cachep[AuCache_DEHSTR])
18823+ return 0;
18824+
18825+ return -ENOMEM;
18826+}
18827+
18828+static void au_cache_fin(void)
18829+{
18830+ int i;
4a4d8108
AM
18831+
18832+ /* including AuCache_HNOTIFY */
1facf9fc 18833+ for (i = 0; i < AuCache_Last; i++)
18834+ if (au_cachep[i]) {
18835+ kmem_cache_destroy(au_cachep[i]);
18836+ au_cachep[i] = NULL;
18837+ }
18838+}
18839+
18840+/* ---------------------------------------------------------------------- */
18841+
18842+int au_dir_roflags;
18843+
e49829fe 18844+#ifdef CONFIG_AUFS_SBILIST
1e00d052
AM
18845+/*
18846+ * iterate_supers_type() doesn't protect us from
18847+ * remounting (branch management)
18848+ */
e49829fe
JR
18849+struct au_splhead au_sbilist;
18850+#endif
18851+
1facf9fc 18852+/*
18853+ * functions for module interface.
18854+ */
18855+MODULE_LICENSE("GPL");
18856+/* MODULE_LICENSE("GPL v2"); */
dece6358 18857+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 18858+MODULE_DESCRIPTION(AUFS_NAME
18859+ " -- Advanced multi layered unification filesystem");
18860+MODULE_VERSION(AUFS_VERSION);
18861+
1facf9fc 18862+/* this module parameter has no meaning when SYSFS is disabled */
18863+int sysaufs_brs = 1;
18864+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
18865+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
18866+
18867+/* ---------------------------------------------------------------------- */
18868+
18869+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
18870+
18871+int au_seq_path(struct seq_file *seq, struct path *path)
18872+{
18873+ return seq_path(seq, path, au_esc_chars);
18874+}
18875+
18876+/* ---------------------------------------------------------------------- */
18877+
18878+static int __init aufs_init(void)
18879+{
18880+ int err, i;
18881+ char *p;
18882+
18883+ p = au_esc_chars;
18884+ for (i = 1; i <= ' '; i++)
18885+ *p++ = i;
18886+ *p++ = '\\';
18887+ *p++ = '\x7f';
18888+ *p = 0;
18889+
18890+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
18891+
e49829fe 18892+ au_sbilist_init();
1facf9fc 18893+ sysaufs_brs_init();
18894+ au_debug_init();
4a4d8108 18895+ au_dy_init();
1facf9fc 18896+ err = sysaufs_init();
18897+ if (unlikely(err))
18898+ goto out;
e49829fe 18899+ err = au_procfs_init();
4f0767ce 18900+ if (unlikely(err))
953406b4 18901+ goto out_sysaufs;
e49829fe
JR
18902+ err = au_wkq_init();
18903+ if (unlikely(err))
18904+ goto out_procfs;
87a755f4 18905+ err = au_loopback_init();
1facf9fc 18906+ if (unlikely(err))
18907+ goto out_wkq;
87a755f4
AM
18908+ err = au_hnotify_init();
18909+ if (unlikely(err))
18910+ goto out_loopback;
1facf9fc 18911+ err = au_sysrq_init();
18912+ if (unlikely(err))
18913+ goto out_hin;
18914+ err = au_cache_init();
18915+ if (unlikely(err))
18916+ goto out_sysrq;
18917+ err = register_filesystem(&aufs_fs_type);
18918+ if (unlikely(err))
18919+ goto out_cache;
4a4d8108
AM
18920+ /* since we define pr_fmt, call printk directly */
18921+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
1facf9fc 18922+ goto out; /* success */
18923+
4f0767ce 18924+out_cache:
1facf9fc 18925+ au_cache_fin();
4f0767ce 18926+out_sysrq:
1facf9fc 18927+ au_sysrq_fin();
4f0767ce 18928+out_hin:
4a4d8108 18929+ au_hnotify_fin();
87a755f4
AM
18930+out_loopback:
18931+ au_loopback_fin();
4f0767ce 18932+out_wkq:
1facf9fc 18933+ au_wkq_fin();
e49829fe
JR
18934+out_procfs:
18935+ au_procfs_fin();
4f0767ce 18936+out_sysaufs:
1facf9fc 18937+ sysaufs_fin();
4a4d8108 18938+ au_dy_fin();
4f0767ce 18939+out:
1facf9fc 18940+ return err;
18941+}
18942+
18943+static void __exit aufs_exit(void)
18944+{
18945+ unregister_filesystem(&aufs_fs_type);
18946+ au_cache_fin();
18947+ au_sysrq_fin();
4a4d8108 18948+ au_hnotify_fin();
87a755f4 18949+ au_loopback_fin();
1facf9fc 18950+ au_wkq_fin();
e49829fe 18951+ au_procfs_fin();
1facf9fc 18952+ sysaufs_fin();
4a4d8108 18953+ au_dy_fin();
1facf9fc 18954+}
18955+
18956+module_init(aufs_init);
18957+module_exit(aufs_exit);
7f207e10
AM
18958diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
18959--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
53392da6 18960+++ linux/fs/aufs/module.h 2011-08-24 13:30:24.734646739 +0200
e49829fe 18961@@ -0,0 +1,91 @@
1facf9fc 18962+/*
027c5e7a 18963+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18964+ *
18965+ * This program, aufs is free software; you can redistribute it and/or modify
18966+ * it under the terms of the GNU General Public License as published by
18967+ * the Free Software Foundation; either version 2 of the License, or
18968+ * (at your option) any later version.
dece6358
AM
18969+ *
18970+ * This program is distributed in the hope that it will be useful,
18971+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18972+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18973+ * GNU General Public License for more details.
18974+ *
18975+ * You should have received a copy of the GNU General Public License
18976+ * along with this program; if not, write to the Free Software
18977+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18978+ */
18979+
18980+/*
18981+ * module initialization and module-global
18982+ */
18983+
18984+#ifndef __AUFS_MODULE_H__
18985+#define __AUFS_MODULE_H__
18986+
18987+#ifdef __KERNEL__
18988+
18989+#include <linux/slab.h>
18990+
dece6358
AM
18991+struct path;
18992+struct seq_file;
18993+
1facf9fc 18994+/* module parameters */
1facf9fc 18995+extern int sysaufs_brs;
18996+
18997+/* ---------------------------------------------------------------------- */
18998+
18999+extern int au_dir_roflags;
19000+
19001+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
19002+int au_seq_path(struct seq_file *seq, struct path *path);
19003+
e49829fe
JR
19004+#ifdef CONFIG_PROC_FS
19005+/* procfs.c */
19006+int __init au_procfs_init(void);
19007+void au_procfs_fin(void);
19008+#else
19009+AuStubInt0(au_procfs_init, void);
19010+AuStubVoid(au_procfs_fin, void);
19011+#endif
19012+
4f0767ce
JR
19013+/* ---------------------------------------------------------------------- */
19014+
19015+/* kmem cache */
1facf9fc 19016+enum {
19017+ AuCache_DINFO,
19018+ AuCache_ICNTNR,
19019+ AuCache_FINFO,
19020+ AuCache_VDIR,
19021+ AuCache_DEHSTR,
4a4d8108
AM
19022+#ifdef CONFIG_AUFS_HNOTIFY
19023+ AuCache_HNOTIFY,
1facf9fc 19024+#endif
19025+ AuCache_Last
19026+};
19027+
4a4d8108
AM
19028+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
19029+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
19030+#define AuCacheCtor(type, ctor) \
19031+ kmem_cache_create(#type, sizeof(struct type), \
19032+ __alignof__(struct type), AuCacheFlags, ctor)
1facf9fc 19033+
19034+extern struct kmem_cache *au_cachep[];
19035+
19036+#define AuCacheFuncs(name, index) \
4a4d8108 19037+static inline struct au_##name *au_cache_alloc_##name(void) \
1facf9fc 19038+{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
4a4d8108 19039+static inline void au_cache_free_##name(struct au_##name *p) \
1facf9fc 19040+{ kmem_cache_free(au_cachep[AuCache_##index], p); }
19041+
19042+AuCacheFuncs(dinfo, DINFO);
19043+AuCacheFuncs(icntnr, ICNTNR);
19044+AuCacheFuncs(finfo, FINFO);
19045+AuCacheFuncs(vdir, VDIR);
4a4d8108
AM
19046+AuCacheFuncs(vdir_dehstr, DEHSTR);
19047+#ifdef CONFIG_AUFS_HNOTIFY
19048+AuCacheFuncs(hnotify, HNOTIFY);
19049+#endif
1facf9fc 19050+
4a4d8108
AM
19051+#endif /* __KERNEL__ */
19052+#endif /* __AUFS_MODULE_H__ */
7f207e10
AM
19053diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
19054--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
19055+++ linux/fs/aufs/opts.c 2011-10-24 20:51:51.580466925 +0200
19056@@ -0,0 +1,1679 @@
1facf9fc 19057+/*
027c5e7a 19058+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 19059+ *
19060+ * This program, aufs is free software; you can redistribute it and/or modify
19061+ * it under the terms of the GNU General Public License as published by
19062+ * the Free Software Foundation; either version 2 of the License, or
19063+ * (at your option) any later version.
dece6358
AM
19064+ *
19065+ * This program is distributed in the hope that it will be useful,
19066+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19067+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19068+ * GNU General Public License for more details.
19069+ *
19070+ * You should have received a copy of the GNU General Public License
19071+ * along with this program; if not, write to the Free Software
19072+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 19073+ */
19074+
19075+/*
19076+ * mount options/flags
19077+ */
19078+
dece6358 19079+#include <linux/file.h>
e49829fe 19080+#include <linux/jiffies.h>
dece6358 19081+#include <linux/namei.h>
1facf9fc 19082+#include <linux/types.h> /* a distribution requires */
19083+#include <linux/parser.h>
19084+#include "aufs.h"
19085+
19086+/* ---------------------------------------------------------------------- */
19087+
19088+enum {
19089+ Opt_br,
19090+ Opt_add, Opt_del, Opt_mod, Opt_reorder, Opt_append, Opt_prepend,
19091+ Opt_idel, Opt_imod, Opt_ireorder,
19092+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash, Opt_rendir,
dece6358 19093+ Opt_rdblk_def, Opt_rdhash_def,
1facf9fc 19094+ Opt_xino, Opt_zxino, Opt_noxino,
19095+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
19096+ Opt_trunc_xino_path, Opt_itrunc_xino,
19097+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 19098+ Opt_shwh, Opt_noshwh,
1facf9fc 19099+ Opt_plink, Opt_noplink, Opt_list_plink,
19100+ Opt_udba,
4a4d8108 19101+ Opt_dio, Opt_nodio,
1facf9fc 19102+ /* Opt_lock, Opt_unlock, */
19103+ Opt_cmd, Opt_cmd_args,
19104+ Opt_diropq_a, Opt_diropq_w,
19105+ Opt_warn_perm, Opt_nowarn_perm,
19106+ Opt_wbr_copyup, Opt_wbr_create,
19107+ Opt_refrof, Opt_norefrof,
19108+ Opt_verbose, Opt_noverbose,
19109+ Opt_sum, Opt_nosum, Opt_wsum,
19110+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
19111+};
19112+
19113+static match_table_t options = {
19114+ {Opt_br, "br=%s"},
19115+ {Opt_br, "br:%s"},
19116+
19117+ {Opt_add, "add=%d:%s"},
19118+ {Opt_add, "add:%d:%s"},
19119+ {Opt_add, "ins=%d:%s"},
19120+ {Opt_add, "ins:%d:%s"},
19121+ {Opt_append, "append=%s"},
19122+ {Opt_append, "append:%s"},
19123+ {Opt_prepend, "prepend=%s"},
19124+ {Opt_prepend, "prepend:%s"},
19125+
19126+ {Opt_del, "del=%s"},
19127+ {Opt_del, "del:%s"},
19128+ /* {Opt_idel, "idel:%d"}, */
19129+ {Opt_mod, "mod=%s"},
19130+ {Opt_mod, "mod:%s"},
19131+ /* {Opt_imod, "imod:%d:%s"}, */
19132+
19133+ {Opt_dirwh, "dirwh=%d"},
19134+
19135+ {Opt_xino, "xino=%s"},
19136+ {Opt_noxino, "noxino"},
19137+ {Opt_trunc_xino, "trunc_xino"},
19138+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
19139+ {Opt_notrunc_xino, "notrunc_xino"},
19140+ {Opt_trunc_xino_path, "trunc_xino=%s"},
19141+ {Opt_itrunc_xino, "itrunc_xino=%d"},
19142+ /* {Opt_zxino, "zxino=%s"}, */
19143+ {Opt_trunc_xib, "trunc_xib"},
19144+ {Opt_notrunc_xib, "notrunc_xib"},
19145+
e49829fe 19146+#ifdef CONFIG_PROC_FS
1facf9fc 19147+ {Opt_plink, "plink"},
e49829fe
JR
19148+#else
19149+ {Opt_ignore_silent, "plink"},
19150+#endif
19151+
1facf9fc 19152+ {Opt_noplink, "noplink"},
e49829fe 19153+
1facf9fc 19154+#ifdef CONFIG_AUFS_DEBUG
19155+ {Opt_list_plink, "list_plink"},
19156+#endif
19157+
19158+ {Opt_udba, "udba=%s"},
19159+
4a4d8108
AM
19160+ {Opt_dio, "dio"},
19161+ {Opt_nodio, "nodio"},
19162+
1facf9fc 19163+ {Opt_diropq_a, "diropq=always"},
19164+ {Opt_diropq_a, "diropq=a"},
19165+ {Opt_diropq_w, "diropq=whiteouted"},
19166+ {Opt_diropq_w, "diropq=w"},
19167+
19168+ {Opt_warn_perm, "warn_perm"},
19169+ {Opt_nowarn_perm, "nowarn_perm"},
19170+
19171+ /* keep them temporary */
19172+ {Opt_ignore_silent, "coo=%s"},
19173+ {Opt_ignore_silent, "nodlgt"},
19174+ {Opt_ignore_silent, "nodirperm1"},
1facf9fc 19175+ {Opt_ignore_silent, "clean_plink"},
19176+
dece6358
AM
19177+#ifdef CONFIG_AUFS_SHWH
19178+ {Opt_shwh, "shwh"},
19179+#endif
19180+ {Opt_noshwh, "noshwh"},
19181+
1facf9fc 19182+ {Opt_rendir, "rendir=%d"},
19183+
19184+ {Opt_refrof, "refrof"},
19185+ {Opt_norefrof, "norefrof"},
19186+
19187+ {Opt_verbose, "verbose"},
19188+ {Opt_verbose, "v"},
19189+ {Opt_noverbose, "noverbose"},
19190+ {Opt_noverbose, "quiet"},
19191+ {Opt_noverbose, "q"},
19192+ {Opt_noverbose, "silent"},
19193+
19194+ {Opt_sum, "sum"},
19195+ {Opt_nosum, "nosum"},
19196+ {Opt_wsum, "wsum"},
19197+
19198+ {Opt_rdcache, "rdcache=%d"},
19199+ {Opt_rdblk, "rdblk=%d"},
dece6358 19200+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 19201+ {Opt_rdhash, "rdhash=%d"},
dece6358 19202+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 19203+
19204+ {Opt_wbr_create, "create=%s"},
19205+ {Opt_wbr_create, "create_policy=%s"},
19206+ {Opt_wbr_copyup, "cpup=%s"},
19207+ {Opt_wbr_copyup, "copyup=%s"},
19208+ {Opt_wbr_copyup, "copyup_policy=%s"},
19209+
19210+ /* internal use for the scripts */
19211+ {Opt_ignore_silent, "si=%s"},
19212+
19213+ {Opt_br, "dirs=%s"},
19214+ {Opt_ignore, "debug=%d"},
19215+ {Opt_ignore, "delete=whiteout"},
19216+ {Opt_ignore, "delete=all"},
19217+ {Opt_ignore, "imap=%s"},
19218+
1308ab2a 19219+ /* temporary workaround, due to old mount(8)? */
19220+ {Opt_ignore_silent, "relatime"},
19221+
1facf9fc 19222+ {Opt_err, NULL}
19223+};
19224+
19225+/* ---------------------------------------------------------------------- */
19226+
19227+static const char *au_parser_pattern(int val, struct match_token *token)
19228+{
19229+ while (token->pattern) {
19230+ if (token->token == val)
19231+ return token->pattern;
19232+ token++;
19233+ }
19234+ BUG();
19235+ return "??";
19236+}
19237+
19238+/* ---------------------------------------------------------------------- */
19239+
1e00d052 19240+static match_table_t brperm = {
1facf9fc 19241+ {AuBrPerm_RO, AUFS_BRPERM_RO},
19242+ {AuBrPerm_RR, AUFS_BRPERM_RR},
19243+ {AuBrPerm_RW, AUFS_BRPERM_RW},
1e00d052
AM
19244+ {0, NULL}
19245+};
1facf9fc 19246+
1e00d052
AM
19247+static match_table_t brrattr = {
19248+ {AuBrRAttr_WH, AUFS_BRRATTR_WH},
19249+ {0, NULL}
19250+};
1facf9fc 19251+
1e00d052
AM
19252+static match_table_t brwattr = {
19253+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
19254+ {0, NULL}
1facf9fc 19255+};
19256+
1e00d052
AM
19257+#define AuBrStr_LONGEST AUFS_BRPERM_RW "+" AUFS_BRWATTR_NLWH
19258+
19259+static int br_attr_val(char *str, match_table_t table, substring_t args[])
19260+{
19261+ int attr, v;
19262+ char *p;
19263+
19264+ attr = 0;
19265+ do {
19266+ p = strchr(str, '+');
19267+ if (p)
19268+ *p = 0;
19269+ v = match_token(str, table, args);
19270+ if (v)
19271+ attr |= v;
19272+ else {
19273+ if (p)
19274+ *p = '+';
19275+ pr_warning("ignored branch attribute %s\n", str);
19276+ break;
19277+ }
19278+ if (p)
19279+ str = p + 1;
19280+ } while (p);
19281+
19282+ return attr;
19283+}
19284+
4a4d8108 19285+static int noinline_for_stack br_perm_val(char *perm)
1facf9fc 19286+{
19287+ int val;
1e00d052 19288+ char *p;
1facf9fc 19289+ substring_t args[MAX_OPT_ARGS];
19290+
1e00d052
AM
19291+ p = strchr(perm, '+');
19292+ if (p)
19293+ *p = 0;
19294+ val = match_token(perm, brperm, args);
19295+ if (!val) {
19296+ if (p)
19297+ *p = '+';
19298+ pr_warning("ignored branch permission %s\n", perm);
19299+ val = AuBrPerm_RO;
19300+ goto out;
19301+ }
19302+ if (!p)
19303+ goto out;
19304+
19305+ switch (val) {
19306+ case AuBrPerm_RO:
19307+ case AuBrPerm_RR:
19308+ val |= br_attr_val(p + 1, brrattr, args);
19309+ break;
19310+ case AuBrPerm_RW:
19311+ val |= br_attr_val(p + 1, brwattr, args);
19312+ break;
19313+ }
19314+
19315+out:
1facf9fc 19316+ return val;
19317+}
19318+
1e00d052
AM
19319+/* Caller should free the return value */
19320+char *au_optstr_br_perm(int brperm)
1facf9fc 19321+{
1e00d052
AM
19322+ char *p, a[sizeof(AuBrStr_LONGEST)];
19323+ int sz;
19324+
19325+#define SetPerm(str) do { \
19326+ sz = sizeof(str); \
19327+ memcpy(a, str, sz); \
19328+ p = a + sz - 1; \
19329+ } while (0)
19330+
19331+#define AppendAttr(flag, str) do { \
19332+ if (brperm & flag) { \
19333+ sz = sizeof(str); \
19334+ *p++ = '+'; \
19335+ memcpy(p, str, sz); \
19336+ p += sz - 1; \
19337+ } \
19338+ } while (0)
19339+
19340+ switch (brperm & AuBrPerm_Mask) {
19341+ case AuBrPerm_RO:
19342+ SetPerm(AUFS_BRPERM_RO);
19343+ break;
19344+ case AuBrPerm_RR:
19345+ SetPerm(AUFS_BRPERM_RR);
19346+ break;
19347+ case AuBrPerm_RW:
19348+ SetPerm(AUFS_BRPERM_RW);
19349+ break;
19350+ default:
19351+ AuDebugOn(1);
19352+ }
19353+
19354+ AppendAttr(AuBrRAttr_WH, AUFS_BRRATTR_WH);
19355+ AppendAttr(AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH);
19356+
19357+ AuDebugOn(strlen(a) >= sizeof(a));
19358+ return kstrdup(a, GFP_NOFS);
19359+#undef SetPerm
19360+#undef AppendAttr
1facf9fc 19361+}
19362+
19363+/* ---------------------------------------------------------------------- */
19364+
19365+static match_table_t udbalevel = {
19366+ {AuOpt_UDBA_REVAL, "reval"},
19367+ {AuOpt_UDBA_NONE, "none"},
4a4d8108
AM
19368+#ifdef CONFIG_AUFS_HNOTIFY
19369+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
19370+#ifdef CONFIG_AUFS_HFSNOTIFY
19371+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
4a4d8108 19372+#endif
1facf9fc 19373+#endif
19374+ {-1, NULL}
19375+};
19376+
4a4d8108 19377+static int noinline_for_stack udba_val(char *str)
1facf9fc 19378+{
19379+ substring_t args[MAX_OPT_ARGS];
19380+
7f207e10 19381+ return match_token(str, udbalevel, args);
1facf9fc 19382+}
19383+
19384+const char *au_optstr_udba(int udba)
19385+{
19386+ return au_parser_pattern(udba, (void *)udbalevel);
19387+}
19388+
19389+/* ---------------------------------------------------------------------- */
19390+
19391+static match_table_t au_wbr_create_policy = {
19392+ {AuWbrCreate_TDP, "tdp"},
19393+ {AuWbrCreate_TDP, "top-down-parent"},
19394+ {AuWbrCreate_RR, "rr"},
19395+ {AuWbrCreate_RR, "round-robin"},
19396+ {AuWbrCreate_MFS, "mfs"},
19397+ {AuWbrCreate_MFS, "most-free-space"},
19398+ {AuWbrCreate_MFSV, "mfs:%d"},
19399+ {AuWbrCreate_MFSV, "most-free-space:%d"},
19400+
19401+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
19402+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
19403+ {AuWbrCreate_PMFS, "pmfs"},
19404+ {AuWbrCreate_PMFSV, "pmfs:%d"},
19405+
19406+ {-1, NULL}
19407+};
19408+
dece6358
AM
19409+/*
19410+ * cf. linux/lib/parser.c and cmdline.c
19411+ * gave up calling memparse() since it uses simple_strtoull() instead of
19412+ * strict_...().
19413+ */
4a4d8108
AM
19414+static int noinline_for_stack
19415+au_match_ull(substring_t *s, unsigned long long *result)
1facf9fc 19416+{
19417+ int err;
19418+ unsigned int len;
19419+ char a[32];
19420+
19421+ err = -ERANGE;
19422+ len = s->to - s->from;
19423+ if (len + 1 <= sizeof(a)) {
19424+ memcpy(a, s->from, len);
19425+ a[len] = '\0';
19426+ err = strict_strtoull(a, 0, result);
19427+ }
19428+ return err;
19429+}
19430+
19431+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
19432+ struct au_opt_wbr_create *create)
19433+{
19434+ int err;
19435+ unsigned long long ull;
19436+
19437+ err = 0;
19438+ if (!au_match_ull(arg, &ull))
19439+ create->mfsrr_watermark = ull;
19440+ else {
4a4d8108 19441+ pr_err("bad integer in %s\n", str);
1facf9fc 19442+ err = -EINVAL;
19443+ }
19444+
19445+ return err;
19446+}
19447+
19448+static int au_wbr_mfs_sec(substring_t *arg, char *str,
19449+ struct au_opt_wbr_create *create)
19450+{
19451+ int n, err;
19452+
19453+ err = 0;
027c5e7a 19454+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
1facf9fc 19455+ create->mfs_second = n;
19456+ else {
4a4d8108 19457+ pr_err("bad integer in %s\n", str);
1facf9fc 19458+ err = -EINVAL;
19459+ }
19460+
19461+ return err;
19462+}
19463+
4a4d8108
AM
19464+static int noinline_for_stack
19465+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
1facf9fc 19466+{
19467+ int err, e;
19468+ substring_t args[MAX_OPT_ARGS];
19469+
19470+ err = match_token(str, au_wbr_create_policy, args);
19471+ create->wbr_create = err;
19472+ switch (err) {
19473+ case AuWbrCreate_MFSRRV:
19474+ e = au_wbr_mfs_wmark(&args[0], str, create);
19475+ if (!e)
19476+ e = au_wbr_mfs_sec(&args[1], str, create);
19477+ if (unlikely(e))
19478+ err = e;
19479+ break;
19480+ case AuWbrCreate_MFSRR:
19481+ e = au_wbr_mfs_wmark(&args[0], str, create);
19482+ if (unlikely(e)) {
19483+ err = e;
19484+ break;
19485+ }
19486+ /*FALLTHROUGH*/
19487+ case AuWbrCreate_MFS:
19488+ case AuWbrCreate_PMFS:
027c5e7a 19489+ create->mfs_second = AUFS_MFS_DEF_SEC;
1facf9fc 19490+ break;
19491+ case AuWbrCreate_MFSV:
19492+ case AuWbrCreate_PMFSV:
19493+ e = au_wbr_mfs_sec(&args[0], str, create);
19494+ if (unlikely(e))
19495+ err = e;
19496+ break;
19497+ }
19498+
19499+ return err;
19500+}
19501+
19502+const char *au_optstr_wbr_create(int wbr_create)
19503+{
19504+ return au_parser_pattern(wbr_create, (void *)au_wbr_create_policy);
19505+}
19506+
19507+static match_table_t au_wbr_copyup_policy = {
19508+ {AuWbrCopyup_TDP, "tdp"},
19509+ {AuWbrCopyup_TDP, "top-down-parent"},
19510+ {AuWbrCopyup_BUP, "bup"},
19511+ {AuWbrCopyup_BUP, "bottom-up-parent"},
19512+ {AuWbrCopyup_BU, "bu"},
19513+ {AuWbrCopyup_BU, "bottom-up"},
19514+ {-1, NULL}
19515+};
19516+
4a4d8108 19517+static int noinline_for_stack au_wbr_copyup_val(char *str)
1facf9fc 19518+{
19519+ substring_t args[MAX_OPT_ARGS];
19520+
19521+ return match_token(str, au_wbr_copyup_policy, args);
19522+}
19523+
19524+const char *au_optstr_wbr_copyup(int wbr_copyup)
19525+{
19526+ return au_parser_pattern(wbr_copyup, (void *)au_wbr_copyup_policy);
19527+}
19528+
19529+/* ---------------------------------------------------------------------- */
19530+
19531+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
19532+
19533+static void dump_opts(struct au_opts *opts)
19534+{
19535+#ifdef CONFIG_AUFS_DEBUG
19536+ /* reduce stack space */
19537+ union {
19538+ struct au_opt_add *add;
19539+ struct au_opt_del *del;
19540+ struct au_opt_mod *mod;
19541+ struct au_opt_xino *xino;
19542+ struct au_opt_xino_itrunc *xino_itrunc;
19543+ struct au_opt_wbr_create *create;
19544+ } u;
19545+ struct au_opt *opt;
19546+
19547+ opt = opts->opt;
19548+ while (opt->type != Opt_tail) {
19549+ switch (opt->type) {
19550+ case Opt_add:
19551+ u.add = &opt->add;
19552+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
19553+ u.add->bindex, u.add->pathname, u.add->perm,
19554+ u.add->path.dentry);
19555+ break;
19556+ case Opt_del:
19557+ case Opt_idel:
19558+ u.del = &opt->del;
19559+ AuDbg("del {%s, %p}\n",
19560+ u.del->pathname, u.del->h_path.dentry);
19561+ break;
19562+ case Opt_mod:
19563+ case Opt_imod:
19564+ u.mod = &opt->mod;
19565+ AuDbg("mod {%s, 0x%x, %p}\n",
19566+ u.mod->path, u.mod->perm, u.mod->h_root);
19567+ break;
19568+ case Opt_append:
19569+ u.add = &opt->add;
19570+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
19571+ u.add->bindex, u.add->pathname, u.add->perm,
19572+ u.add->path.dentry);
19573+ break;
19574+ case Opt_prepend:
19575+ u.add = &opt->add;
19576+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
19577+ u.add->bindex, u.add->pathname, u.add->perm,
19578+ u.add->path.dentry);
19579+ break;
19580+ case Opt_dirwh:
19581+ AuDbg("dirwh %d\n", opt->dirwh);
19582+ break;
19583+ case Opt_rdcache:
19584+ AuDbg("rdcache %d\n", opt->rdcache);
19585+ break;
19586+ case Opt_rdblk:
19587+ AuDbg("rdblk %u\n", opt->rdblk);
19588+ break;
dece6358
AM
19589+ case Opt_rdblk_def:
19590+ AuDbg("rdblk_def\n");
19591+ break;
1facf9fc 19592+ case Opt_rdhash:
19593+ AuDbg("rdhash %u\n", opt->rdhash);
19594+ break;
dece6358
AM
19595+ case Opt_rdhash_def:
19596+ AuDbg("rdhash_def\n");
19597+ break;
1facf9fc 19598+ case Opt_xino:
19599+ u.xino = &opt->xino;
19600+ AuDbg("xino {%s %.*s}\n",
19601+ u.xino->path,
19602+ AuDLNPair(u.xino->file->f_dentry));
19603+ break;
19604+ case Opt_trunc_xino:
19605+ AuLabel(trunc_xino);
19606+ break;
19607+ case Opt_notrunc_xino:
19608+ AuLabel(notrunc_xino);
19609+ break;
19610+ case Opt_trunc_xino_path:
19611+ case Opt_itrunc_xino:
19612+ u.xino_itrunc = &opt->xino_itrunc;
19613+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
19614+ break;
19615+
19616+ case Opt_noxino:
19617+ AuLabel(noxino);
19618+ break;
19619+ case Opt_trunc_xib:
19620+ AuLabel(trunc_xib);
19621+ break;
19622+ case Opt_notrunc_xib:
19623+ AuLabel(notrunc_xib);
19624+ break;
dece6358
AM
19625+ case Opt_shwh:
19626+ AuLabel(shwh);
19627+ break;
19628+ case Opt_noshwh:
19629+ AuLabel(noshwh);
19630+ break;
1facf9fc 19631+ case Opt_plink:
19632+ AuLabel(plink);
19633+ break;
19634+ case Opt_noplink:
19635+ AuLabel(noplink);
19636+ break;
19637+ case Opt_list_plink:
19638+ AuLabel(list_plink);
19639+ break;
19640+ case Opt_udba:
19641+ AuDbg("udba %d, %s\n",
19642+ opt->udba, au_optstr_udba(opt->udba));
19643+ break;
4a4d8108
AM
19644+ case Opt_dio:
19645+ AuLabel(dio);
19646+ break;
19647+ case Opt_nodio:
19648+ AuLabel(nodio);
19649+ break;
1facf9fc 19650+ case Opt_diropq_a:
19651+ AuLabel(diropq_a);
19652+ break;
19653+ case Opt_diropq_w:
19654+ AuLabel(diropq_w);
19655+ break;
19656+ case Opt_warn_perm:
19657+ AuLabel(warn_perm);
19658+ break;
19659+ case Opt_nowarn_perm:
19660+ AuLabel(nowarn_perm);
19661+ break;
19662+ case Opt_refrof:
19663+ AuLabel(refrof);
19664+ break;
19665+ case Opt_norefrof:
19666+ AuLabel(norefrof);
19667+ break;
19668+ case Opt_verbose:
19669+ AuLabel(verbose);
19670+ break;
19671+ case Opt_noverbose:
19672+ AuLabel(noverbose);
19673+ break;
19674+ case Opt_sum:
19675+ AuLabel(sum);
19676+ break;
19677+ case Opt_nosum:
19678+ AuLabel(nosum);
19679+ break;
19680+ case Opt_wsum:
19681+ AuLabel(wsum);
19682+ break;
19683+ case Opt_wbr_create:
19684+ u.create = &opt->wbr_create;
19685+ AuDbg("create %d, %s\n", u.create->wbr_create,
19686+ au_optstr_wbr_create(u.create->wbr_create));
19687+ switch (u.create->wbr_create) {
19688+ case AuWbrCreate_MFSV:
19689+ case AuWbrCreate_PMFSV:
19690+ AuDbg("%d sec\n", u.create->mfs_second);
19691+ break;
19692+ case AuWbrCreate_MFSRR:
19693+ AuDbg("%llu watermark\n",
19694+ u.create->mfsrr_watermark);
19695+ break;
19696+ case AuWbrCreate_MFSRRV:
19697+ AuDbg("%llu watermark, %d sec\n",
19698+ u.create->mfsrr_watermark,
19699+ u.create->mfs_second);
19700+ break;
19701+ }
19702+ break;
19703+ case Opt_wbr_copyup:
19704+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
19705+ au_optstr_wbr_copyup(opt->wbr_copyup));
19706+ break;
19707+ default:
19708+ BUG();
19709+ }
19710+ opt++;
19711+ }
19712+#endif
19713+}
19714+
19715+void au_opts_free(struct au_opts *opts)
19716+{
19717+ struct au_opt *opt;
19718+
19719+ opt = opts->opt;
19720+ while (opt->type != Opt_tail) {
19721+ switch (opt->type) {
19722+ case Opt_add:
19723+ case Opt_append:
19724+ case Opt_prepend:
19725+ path_put(&opt->add.path);
19726+ break;
19727+ case Opt_del:
19728+ case Opt_idel:
19729+ path_put(&opt->del.h_path);
19730+ break;
19731+ case Opt_mod:
19732+ case Opt_imod:
19733+ dput(opt->mod.h_root);
19734+ break;
19735+ case Opt_xino:
19736+ fput(opt->xino.file);
19737+ break;
19738+ }
19739+ opt++;
19740+ }
19741+}
19742+
19743+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
19744+ aufs_bindex_t bindex)
19745+{
19746+ int err;
19747+ struct au_opt_add *add = &opt->add;
19748+ char *p;
19749+
19750+ add->bindex = bindex;
1e00d052 19751+ add->perm = AuBrPerm_RO;
1facf9fc 19752+ add->pathname = opt_str;
19753+ p = strchr(opt_str, '=');
19754+ if (p) {
19755+ *p++ = 0;
19756+ if (*p)
19757+ add->perm = br_perm_val(p);
19758+ }
19759+
19760+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
19761+ if (!err) {
19762+ if (!p) {
19763+ add->perm = AuBrPerm_RO;
19764+ if (au_test_fs_rr(add->path.dentry->d_sb))
19765+ add->perm = AuBrPerm_RR;
19766+ else if (!bindex && !(sb_flags & MS_RDONLY))
19767+ add->perm = AuBrPerm_RW;
19768+ }
19769+ opt->type = Opt_add;
19770+ goto out;
19771+ }
4a4d8108 19772+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
1facf9fc 19773+ err = -EINVAL;
19774+
4f0767ce 19775+out:
1facf9fc 19776+ return err;
19777+}
19778+
19779+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
19780+{
19781+ int err;
19782+
19783+ del->pathname = args[0].from;
19784+ AuDbg("del path %s\n", del->pathname);
19785+
19786+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
19787+ if (unlikely(err))
4a4d8108 19788+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
1facf9fc 19789+
19790+ return err;
19791+}
19792+
19793+#if 0 /* reserved for future use */
19794+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
19795+ struct au_opt_del *del, substring_t args[])
19796+{
19797+ int err;
19798+ struct dentry *root;
19799+
19800+ err = -EINVAL;
19801+ root = sb->s_root;
19802+ aufs_read_lock(root, AuLock_FLUSH);
19803+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 19804+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 19805+ goto out;
19806+ }
19807+
19808+ err = 0;
19809+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
19810+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
19811+
4f0767ce 19812+out:
1facf9fc 19813+ aufs_read_unlock(root, !AuLock_IR);
19814+ return err;
19815+}
19816+#endif
19817+
4a4d8108
AM
19818+static int noinline_for_stack
19819+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
1facf9fc 19820+{
19821+ int err;
19822+ struct path path;
19823+ char *p;
19824+
19825+ err = -EINVAL;
19826+ mod->path = args[0].from;
19827+ p = strchr(mod->path, '=');
19828+ if (unlikely(!p)) {
4a4d8108 19829+ pr_err("no permssion %s\n", args[0].from);
1facf9fc 19830+ goto out;
19831+ }
19832+
19833+ *p++ = 0;
19834+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
19835+ if (unlikely(err)) {
4a4d8108 19836+ pr_err("lookup failed %s (%d)\n", mod->path, err);
1facf9fc 19837+ goto out;
19838+ }
19839+
19840+ mod->perm = br_perm_val(p);
19841+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
19842+ mod->h_root = dget(path.dentry);
19843+ path_put(&path);
19844+
4f0767ce 19845+out:
1facf9fc 19846+ return err;
19847+}
19848+
19849+#if 0 /* reserved for future use */
19850+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
19851+ struct au_opt_mod *mod, substring_t args[])
19852+{
19853+ int err;
19854+ struct dentry *root;
19855+
19856+ err = -EINVAL;
19857+ root = sb->s_root;
19858+ aufs_read_lock(root, AuLock_FLUSH);
19859+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 19860+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 19861+ goto out;
19862+ }
19863+
19864+ err = 0;
19865+ mod->perm = br_perm_val(args[1].from);
19866+ AuDbg("mod path %s, perm 0x%x, %s\n",
19867+ mod->path, mod->perm, args[1].from);
19868+ mod->h_root = dget(au_h_dptr(root, bindex));
19869+
4f0767ce 19870+out:
1facf9fc 19871+ aufs_read_unlock(root, !AuLock_IR);
19872+ return err;
19873+}
19874+#endif
19875+
19876+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
19877+ substring_t args[])
19878+{
19879+ int err;
19880+ struct file *file;
19881+
19882+ file = au_xino_create(sb, args[0].from, /*silent*/0);
19883+ err = PTR_ERR(file);
19884+ if (IS_ERR(file))
19885+ goto out;
19886+
19887+ err = -EINVAL;
19888+ if (unlikely(file->f_dentry->d_sb == sb)) {
19889+ fput(file);
4a4d8108 19890+ pr_err("%s must be outside\n", args[0].from);
1facf9fc 19891+ goto out;
19892+ }
19893+
19894+ err = 0;
19895+ xino->file = file;
19896+ xino->path = args[0].from;
19897+
4f0767ce 19898+out:
1facf9fc 19899+ return err;
19900+}
19901+
4a4d8108
AM
19902+static int noinline_for_stack
19903+au_opts_parse_xino_itrunc_path(struct super_block *sb,
19904+ struct au_opt_xino_itrunc *xino_itrunc,
19905+ substring_t args[])
1facf9fc 19906+{
19907+ int err;
19908+ aufs_bindex_t bend, bindex;
19909+ struct path path;
19910+ struct dentry *root;
19911+
19912+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
19913+ if (unlikely(err)) {
4a4d8108 19914+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
1facf9fc 19915+ goto out;
19916+ }
19917+
19918+ xino_itrunc->bindex = -1;
19919+ root = sb->s_root;
19920+ aufs_read_lock(root, AuLock_FLUSH);
19921+ bend = au_sbend(sb);
19922+ for (bindex = 0; bindex <= bend; bindex++) {
19923+ if (au_h_dptr(root, bindex) == path.dentry) {
19924+ xino_itrunc->bindex = bindex;
19925+ break;
19926+ }
19927+ }
19928+ aufs_read_unlock(root, !AuLock_IR);
19929+ path_put(&path);
19930+
19931+ if (unlikely(xino_itrunc->bindex < 0)) {
4a4d8108 19932+ pr_err("no such branch %s\n", args[0].from);
1facf9fc 19933+ err = -EINVAL;
19934+ }
19935+
4f0767ce 19936+out:
1facf9fc 19937+ return err;
19938+}
19939+
19940+/* called without aufs lock */
19941+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
19942+{
19943+ int err, n, token;
19944+ aufs_bindex_t bindex;
19945+ unsigned char skipped;
19946+ struct dentry *root;
19947+ struct au_opt *opt, *opt_tail;
19948+ char *opt_str;
19949+ /* reduce the stack space */
19950+ union {
19951+ struct au_opt_xino_itrunc *xino_itrunc;
19952+ struct au_opt_wbr_create *create;
19953+ } u;
19954+ struct {
19955+ substring_t args[MAX_OPT_ARGS];
19956+ } *a;
19957+
19958+ err = -ENOMEM;
19959+ a = kmalloc(sizeof(*a), GFP_NOFS);
19960+ if (unlikely(!a))
19961+ goto out;
19962+
19963+ root = sb->s_root;
19964+ err = 0;
19965+ bindex = 0;
19966+ opt = opts->opt;
19967+ opt_tail = opt + opts->max_opt - 1;
19968+ opt->type = Opt_tail;
19969+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
19970+ err = -EINVAL;
19971+ skipped = 0;
19972+ token = match_token(opt_str, options, a->args);
19973+ switch (token) {
19974+ case Opt_br:
19975+ err = 0;
19976+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
19977+ && *opt_str) {
19978+ err = opt_add(opt, opt_str, opts->sb_flags,
19979+ bindex++);
19980+ if (unlikely(!err && ++opt > opt_tail)) {
19981+ err = -E2BIG;
19982+ break;
19983+ }
19984+ opt->type = Opt_tail;
19985+ skipped = 1;
19986+ }
19987+ break;
19988+ case Opt_add:
19989+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 19990+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19991+ break;
19992+ }
19993+ bindex = n;
19994+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
19995+ bindex);
19996+ if (!err)
19997+ opt->type = token;
19998+ break;
19999+ case Opt_append:
20000+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
20001+ /*dummy bindex*/1);
20002+ if (!err)
20003+ opt->type = token;
20004+ break;
20005+ case Opt_prepend:
20006+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
20007+ /*bindex*/0);
20008+ if (!err)
20009+ opt->type = token;
20010+ break;
20011+ case Opt_del:
20012+ err = au_opts_parse_del(&opt->del, a->args);
20013+ if (!err)
20014+ opt->type = token;
20015+ break;
20016+#if 0 /* reserved for future use */
20017+ case Opt_idel:
20018+ del->pathname = "(indexed)";
20019+ if (unlikely(match_int(&args[0], &n))) {
4a4d8108 20020+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20021+ break;
20022+ }
20023+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
20024+ if (!err)
20025+ opt->type = token;
20026+ break;
20027+#endif
20028+ case Opt_mod:
20029+ err = au_opts_parse_mod(&opt->mod, a->args);
20030+ if (!err)
20031+ opt->type = token;
20032+ break;
20033+#ifdef IMOD /* reserved for future use */
20034+ case Opt_imod:
20035+ u.mod->path = "(indexed)";
20036+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 20037+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20038+ break;
20039+ }
20040+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
20041+ if (!err)
20042+ opt->type = token;
20043+ break;
20044+#endif
20045+ case Opt_xino:
20046+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
20047+ if (!err)
20048+ opt->type = token;
20049+ break;
20050+
20051+ case Opt_trunc_xino_path:
20052+ err = au_opts_parse_xino_itrunc_path
20053+ (sb, &opt->xino_itrunc, a->args);
20054+ if (!err)
20055+ opt->type = token;
20056+ break;
20057+
20058+ case Opt_itrunc_xino:
20059+ u.xino_itrunc = &opt->xino_itrunc;
20060+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 20061+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20062+ break;
20063+ }
20064+ u.xino_itrunc->bindex = n;
20065+ aufs_read_lock(root, AuLock_FLUSH);
20066+ if (n < 0 || au_sbend(sb) < n) {
4a4d8108 20067+ pr_err("out of bounds, %d\n", n);
1facf9fc 20068+ aufs_read_unlock(root, !AuLock_IR);
20069+ break;
20070+ }
20071+ aufs_read_unlock(root, !AuLock_IR);
20072+ err = 0;
20073+ opt->type = token;
20074+ break;
20075+
20076+ case Opt_dirwh:
20077+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
20078+ break;
20079+ err = 0;
20080+ opt->type = token;
20081+ break;
20082+
20083+ case Opt_rdcache:
027c5e7a
AM
20084+ if (unlikely(match_int(&a->args[0], &n))) {
20085+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20086+ break;
027c5e7a
AM
20087+ }
20088+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
20089+ pr_err("rdcache must be smaller than %d\n",
20090+ AUFS_RDCACHE_MAX);
20091+ break;
20092+ }
20093+ opt->rdcache = n;
1facf9fc 20094+ err = 0;
20095+ opt->type = token;
20096+ break;
20097+ case Opt_rdblk:
20098+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 20099+ || n < 0
1facf9fc 20100+ || n > KMALLOC_MAX_SIZE)) {
4a4d8108 20101+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20102+ break;
20103+ }
1308ab2a 20104+ if (unlikely(n && n < NAME_MAX)) {
4a4d8108
AM
20105+ pr_err("rdblk must be larger than %d\n",
20106+ NAME_MAX);
1facf9fc 20107+ break;
20108+ }
20109+ opt->rdblk = n;
20110+ err = 0;
20111+ opt->type = token;
20112+ break;
20113+ case Opt_rdhash:
20114+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 20115+ || n < 0
1facf9fc 20116+ || n * sizeof(struct hlist_head)
20117+ > KMALLOC_MAX_SIZE)) {
4a4d8108 20118+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 20119+ break;
20120+ }
20121+ opt->rdhash = n;
20122+ err = 0;
20123+ opt->type = token;
20124+ break;
20125+
20126+ case Opt_trunc_xino:
20127+ case Opt_notrunc_xino:
20128+ case Opt_noxino:
20129+ case Opt_trunc_xib:
20130+ case Opt_notrunc_xib:
dece6358
AM
20131+ case Opt_shwh:
20132+ case Opt_noshwh:
1facf9fc 20133+ case Opt_plink:
20134+ case Opt_noplink:
20135+ case Opt_list_plink:
4a4d8108
AM
20136+ case Opt_dio:
20137+ case Opt_nodio:
1facf9fc 20138+ case Opt_diropq_a:
20139+ case Opt_diropq_w:
20140+ case Opt_warn_perm:
20141+ case Opt_nowarn_perm:
20142+ case Opt_refrof:
20143+ case Opt_norefrof:
20144+ case Opt_verbose:
20145+ case Opt_noverbose:
20146+ case Opt_sum:
20147+ case Opt_nosum:
20148+ case Opt_wsum:
dece6358
AM
20149+ case Opt_rdblk_def:
20150+ case Opt_rdhash_def:
1facf9fc 20151+ err = 0;
20152+ opt->type = token;
20153+ break;
20154+
20155+ case Opt_udba:
20156+ opt->udba = udba_val(a->args[0].from);
20157+ if (opt->udba >= 0) {
20158+ err = 0;
20159+ opt->type = token;
20160+ } else
4a4d8108 20161+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20162+ break;
20163+
20164+ case Opt_wbr_create:
20165+ u.create = &opt->wbr_create;
20166+ u.create->wbr_create
20167+ = au_wbr_create_val(a->args[0].from, u.create);
20168+ if (u.create->wbr_create >= 0) {
20169+ err = 0;
20170+ opt->type = token;
20171+ } else
4a4d8108 20172+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20173+ break;
20174+ case Opt_wbr_copyup:
20175+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
20176+ if (opt->wbr_copyup >= 0) {
20177+ err = 0;
20178+ opt->type = token;
20179+ } else
4a4d8108 20180+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20181+ break;
20182+
20183+ case Opt_ignore:
4a4d8108 20184+ pr_warning("ignored %s\n", opt_str);
1facf9fc 20185+ /*FALLTHROUGH*/
20186+ case Opt_ignore_silent:
20187+ skipped = 1;
20188+ err = 0;
20189+ break;
20190+ case Opt_err:
4a4d8108 20191+ pr_err("unknown option %s\n", opt_str);
1facf9fc 20192+ break;
20193+ }
20194+
20195+ if (!err && !skipped) {
20196+ if (unlikely(++opt > opt_tail)) {
20197+ err = -E2BIG;
20198+ opt--;
20199+ opt->type = Opt_tail;
20200+ break;
20201+ }
20202+ opt->type = Opt_tail;
20203+ }
20204+ }
20205+
20206+ kfree(a);
20207+ dump_opts(opts);
20208+ if (unlikely(err))
20209+ au_opts_free(opts);
20210+
4f0767ce 20211+out:
1facf9fc 20212+ return err;
20213+}
20214+
20215+static int au_opt_wbr_create(struct super_block *sb,
20216+ struct au_opt_wbr_create *create)
20217+{
20218+ int err;
20219+ struct au_sbinfo *sbinfo;
20220+
dece6358
AM
20221+ SiMustWriteLock(sb);
20222+
1facf9fc 20223+ err = 1; /* handled */
20224+ sbinfo = au_sbi(sb);
20225+ if (sbinfo->si_wbr_create_ops->fin) {
20226+ err = sbinfo->si_wbr_create_ops->fin(sb);
20227+ if (!err)
20228+ err = 1;
20229+ }
20230+
20231+ sbinfo->si_wbr_create = create->wbr_create;
20232+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
20233+ switch (create->wbr_create) {
20234+ case AuWbrCreate_MFSRRV:
20235+ case AuWbrCreate_MFSRR:
20236+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
20237+ /*FALLTHROUGH*/
20238+ case AuWbrCreate_MFS:
20239+ case AuWbrCreate_MFSV:
20240+ case AuWbrCreate_PMFS:
20241+ case AuWbrCreate_PMFSV:
e49829fe
JR
20242+ sbinfo->si_wbr_mfs.mfs_expire
20243+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
1facf9fc 20244+ break;
20245+ }
20246+
20247+ if (sbinfo->si_wbr_create_ops->init)
20248+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
20249+
20250+ return err;
20251+}
20252+
20253+/*
20254+ * returns,
20255+ * plus: processed without an error
20256+ * zero: unprocessed
20257+ */
20258+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
20259+ struct au_opts *opts)
20260+{
20261+ int err;
20262+ struct au_sbinfo *sbinfo;
20263+
dece6358
AM
20264+ SiMustWriteLock(sb);
20265+
1facf9fc 20266+ err = 1; /* handled */
20267+ sbinfo = au_sbi(sb);
20268+ switch (opt->type) {
20269+ case Opt_udba:
20270+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
20271+ sbinfo->si_mntflags |= opt->udba;
20272+ opts->given_udba |= opt->udba;
20273+ break;
20274+
20275+ case Opt_plink:
20276+ au_opt_set(sbinfo->si_mntflags, PLINK);
20277+ break;
20278+ case Opt_noplink:
20279+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
e49829fe 20280+ au_plink_put(sb, /*verbose*/1);
1facf9fc 20281+ au_opt_clr(sbinfo->si_mntflags, PLINK);
20282+ break;
20283+ case Opt_list_plink:
20284+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
20285+ au_plink_list(sb);
20286+ break;
20287+
4a4d8108
AM
20288+ case Opt_dio:
20289+ au_opt_set(sbinfo->si_mntflags, DIO);
20290+ au_fset_opts(opts->flags, REFRESH_DYAOP);
20291+ break;
20292+ case Opt_nodio:
20293+ au_opt_clr(sbinfo->si_mntflags, DIO);
20294+ au_fset_opts(opts->flags, REFRESH_DYAOP);
20295+ break;
20296+
1facf9fc 20297+ case Opt_diropq_a:
20298+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
20299+ break;
20300+ case Opt_diropq_w:
20301+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
20302+ break;
20303+
20304+ case Opt_warn_perm:
20305+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
20306+ break;
20307+ case Opt_nowarn_perm:
20308+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
20309+ break;
20310+
20311+ case Opt_refrof:
20312+ au_opt_set(sbinfo->si_mntflags, REFROF);
20313+ break;
20314+ case Opt_norefrof:
20315+ au_opt_clr(sbinfo->si_mntflags, REFROF);
20316+ break;
20317+
20318+ case Opt_verbose:
20319+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
20320+ break;
20321+ case Opt_noverbose:
20322+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
20323+ break;
20324+
20325+ case Opt_sum:
20326+ au_opt_set(sbinfo->si_mntflags, SUM);
20327+ break;
20328+ case Opt_wsum:
20329+ au_opt_clr(sbinfo->si_mntflags, SUM);
20330+ au_opt_set(sbinfo->si_mntflags, SUM_W);
20331+ case Opt_nosum:
20332+ au_opt_clr(sbinfo->si_mntflags, SUM);
20333+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
20334+ break;
20335+
20336+ case Opt_wbr_create:
20337+ err = au_opt_wbr_create(sb, &opt->wbr_create);
20338+ break;
20339+ case Opt_wbr_copyup:
20340+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
20341+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
20342+ break;
20343+
20344+ case Opt_dirwh:
20345+ sbinfo->si_dirwh = opt->dirwh;
20346+ break;
20347+
20348+ case Opt_rdcache:
e49829fe
JR
20349+ sbinfo->si_rdcache
20350+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
1facf9fc 20351+ break;
20352+ case Opt_rdblk:
20353+ sbinfo->si_rdblk = opt->rdblk;
20354+ break;
dece6358
AM
20355+ case Opt_rdblk_def:
20356+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
20357+ break;
1facf9fc 20358+ case Opt_rdhash:
20359+ sbinfo->si_rdhash = opt->rdhash;
20360+ break;
dece6358
AM
20361+ case Opt_rdhash_def:
20362+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
20363+ break;
20364+
20365+ case Opt_shwh:
20366+ au_opt_set(sbinfo->si_mntflags, SHWH);
20367+ break;
20368+ case Opt_noshwh:
20369+ au_opt_clr(sbinfo->si_mntflags, SHWH);
20370+ break;
1facf9fc 20371+
20372+ case Opt_trunc_xino:
20373+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
20374+ break;
20375+ case Opt_notrunc_xino:
20376+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
20377+ break;
20378+
20379+ case Opt_trunc_xino_path:
20380+ case Opt_itrunc_xino:
20381+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
20382+ if (!err)
20383+ err = 1;
20384+ break;
20385+
20386+ case Opt_trunc_xib:
20387+ au_fset_opts(opts->flags, TRUNC_XIB);
20388+ break;
20389+ case Opt_notrunc_xib:
20390+ au_fclr_opts(opts->flags, TRUNC_XIB);
20391+ break;
20392+
20393+ default:
20394+ err = 0;
20395+ break;
20396+ }
20397+
20398+ return err;
20399+}
20400+
20401+/*
20402+ * returns tri-state.
20403+ * plus: processed without an error
20404+ * zero: unprocessed
20405+ * minus: error
20406+ */
20407+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
20408+ struct au_opts *opts)
20409+{
20410+ int err, do_refresh;
20411+
20412+ err = 0;
20413+ switch (opt->type) {
20414+ case Opt_append:
20415+ opt->add.bindex = au_sbend(sb) + 1;
20416+ if (opt->add.bindex < 0)
20417+ opt->add.bindex = 0;
20418+ goto add;
20419+ case Opt_prepend:
20420+ opt->add.bindex = 0;
20421+ add:
20422+ case Opt_add:
20423+ err = au_br_add(sb, &opt->add,
20424+ au_ftest_opts(opts->flags, REMOUNT));
20425+ if (!err) {
20426+ err = 1;
027c5e7a 20427+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20428+ }
20429+ break;
20430+
20431+ case Opt_del:
20432+ case Opt_idel:
20433+ err = au_br_del(sb, &opt->del,
20434+ au_ftest_opts(opts->flags, REMOUNT));
20435+ if (!err) {
20436+ err = 1;
20437+ au_fset_opts(opts->flags, TRUNC_XIB);
027c5e7a 20438+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20439+ }
20440+ break;
20441+
20442+ case Opt_mod:
20443+ case Opt_imod:
20444+ err = au_br_mod(sb, &opt->mod,
20445+ au_ftest_opts(opts->flags, REMOUNT),
20446+ &do_refresh);
20447+ if (!err) {
20448+ err = 1;
027c5e7a
AM
20449+ if (do_refresh)
20450+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20451+ }
20452+ break;
20453+ }
20454+
20455+ return err;
20456+}
20457+
20458+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
20459+ struct au_opt_xino **opt_xino,
20460+ struct au_opts *opts)
20461+{
20462+ int err;
20463+ aufs_bindex_t bend, bindex;
20464+ struct dentry *root, *parent, *h_root;
20465+
20466+ err = 0;
20467+ switch (opt->type) {
20468+ case Opt_xino:
20469+ err = au_xino_set(sb, &opt->xino,
20470+ !!au_ftest_opts(opts->flags, REMOUNT));
20471+ if (unlikely(err))
20472+ break;
20473+
20474+ *opt_xino = &opt->xino;
20475+ au_xino_brid_set(sb, -1);
20476+
20477+ /* safe d_parent access */
20478+ parent = opt->xino.file->f_dentry->d_parent;
20479+ root = sb->s_root;
20480+ bend = au_sbend(sb);
20481+ for (bindex = 0; bindex <= bend; bindex++) {
20482+ h_root = au_h_dptr(root, bindex);
20483+ if (h_root == parent) {
20484+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
20485+ break;
20486+ }
20487+ }
20488+ break;
20489+
20490+ case Opt_noxino:
20491+ au_xino_clr(sb);
20492+ au_xino_brid_set(sb, -1);
20493+ *opt_xino = (void *)-1;
20494+ break;
20495+ }
20496+
20497+ return err;
20498+}
20499+
20500+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
20501+ unsigned int pending)
20502+{
20503+ int err;
20504+ aufs_bindex_t bindex, bend;
20505+ unsigned char do_plink, skip, do_free;
20506+ struct au_branch *br;
20507+ struct au_wbr *wbr;
20508+ struct dentry *root;
20509+ struct inode *dir, *h_dir;
20510+ struct au_sbinfo *sbinfo;
20511+ struct au_hinode *hdir;
20512+
dece6358
AM
20513+ SiMustAnyLock(sb);
20514+
1facf9fc 20515+ sbinfo = au_sbi(sb);
20516+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
20517+
dece6358
AM
20518+ if (!(sb_flags & MS_RDONLY)) {
20519+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
4a4d8108 20520+ pr_warning("first branch should be rw\n");
dece6358 20521+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
4a4d8108 20522+ pr_warning("shwh should be used with ro\n");
dece6358 20523+ }
1facf9fc 20524+
4a4d8108 20525+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
1facf9fc 20526+ && !au_opt_test(sbinfo->si_mntflags, XINO))
4a4d8108 20527+ pr_warning("udba=*notify requires xino\n");
1facf9fc 20528+
20529+ err = 0;
20530+ root = sb->s_root;
4a4d8108 20531+ dir = root->d_inode;
1facf9fc 20532+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
20533+ bend = au_sbend(sb);
20534+ for (bindex = 0; !err && bindex <= bend; bindex++) {
20535+ skip = 0;
20536+ h_dir = au_h_iptr(dir, bindex);
20537+ br = au_sbr(sb, bindex);
20538+ do_free = 0;
20539+
20540+ wbr = br->br_wbr;
20541+ if (wbr)
20542+ wbr_wh_read_lock(wbr);
20543+
1e00d052 20544+ if (!au_br_writable(br->br_perm)) {
1facf9fc 20545+ do_free = !!wbr;
20546+ skip = (!wbr
20547+ || (!wbr->wbr_whbase
20548+ && !wbr->wbr_plink
20549+ && !wbr->wbr_orph));
1e00d052 20550+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 20551+ /* skip = (!br->br_whbase && !br->br_orph); */
20552+ skip = (!wbr || !wbr->wbr_whbase);
20553+ if (skip && wbr) {
20554+ if (do_plink)
20555+ skip = !!wbr->wbr_plink;
20556+ else
20557+ skip = !wbr->wbr_plink;
20558+ }
1e00d052 20559+ } else {
1facf9fc 20560+ /* skip = (br->br_whbase && br->br_ohph); */
20561+ skip = (wbr && wbr->wbr_whbase);
20562+ if (skip) {
20563+ if (do_plink)
20564+ skip = !!wbr->wbr_plink;
20565+ else
20566+ skip = !wbr->wbr_plink;
20567+ }
1facf9fc 20568+ }
20569+ if (wbr)
20570+ wbr_wh_read_unlock(wbr);
20571+
20572+ if (skip)
20573+ continue;
20574+
20575+ hdir = au_hi(dir, bindex);
4a4d8108 20576+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 20577+ if (wbr)
20578+ wbr_wh_write_lock(wbr);
20579+ err = au_wh_init(au_h_dptr(root, bindex), br, sb);
20580+ if (wbr)
20581+ wbr_wh_write_unlock(wbr);
4a4d8108 20582+ au_hn_imtx_unlock(hdir);
1facf9fc 20583+
20584+ if (!err && do_free) {
20585+ kfree(wbr);
20586+ br->br_wbr = NULL;
20587+ }
20588+ }
20589+
20590+ return err;
20591+}
20592+
20593+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
20594+{
20595+ int err;
20596+ unsigned int tmp;
027c5e7a 20597+ aufs_bindex_t bindex, bend;
1facf9fc 20598+ struct au_opt *opt;
20599+ struct au_opt_xino *opt_xino, xino;
20600+ struct au_sbinfo *sbinfo;
027c5e7a 20601+ struct au_branch *br;
1facf9fc 20602+
dece6358
AM
20603+ SiMustWriteLock(sb);
20604+
1facf9fc 20605+ err = 0;
20606+ opt_xino = NULL;
20607+ opt = opts->opt;
20608+ while (err >= 0 && opt->type != Opt_tail)
20609+ err = au_opt_simple(sb, opt++, opts);
20610+ if (err > 0)
20611+ err = 0;
20612+ else if (unlikely(err < 0))
20613+ goto out;
20614+
20615+ /* disable xino and udba temporary */
20616+ sbinfo = au_sbi(sb);
20617+ tmp = sbinfo->si_mntflags;
20618+ au_opt_clr(sbinfo->si_mntflags, XINO);
20619+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
20620+
20621+ opt = opts->opt;
20622+ while (err >= 0 && opt->type != Opt_tail)
20623+ err = au_opt_br(sb, opt++, opts);
20624+ if (err > 0)
20625+ err = 0;
20626+ else if (unlikely(err < 0))
20627+ goto out;
20628+
20629+ bend = au_sbend(sb);
20630+ if (unlikely(bend < 0)) {
20631+ err = -EINVAL;
4a4d8108 20632+ pr_err("no branches\n");
1facf9fc 20633+ goto out;
20634+ }
20635+
20636+ if (au_opt_test(tmp, XINO))
20637+ au_opt_set(sbinfo->si_mntflags, XINO);
20638+ opt = opts->opt;
20639+ while (!err && opt->type != Opt_tail)
20640+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
20641+ if (unlikely(err))
20642+ goto out;
20643+
20644+ err = au_opts_verify(sb, sb->s_flags, tmp);
20645+ if (unlikely(err))
20646+ goto out;
20647+
20648+ /* restore xino */
20649+ if (au_opt_test(tmp, XINO) && !opt_xino) {
20650+ xino.file = au_xino_def(sb);
20651+ err = PTR_ERR(xino.file);
20652+ if (IS_ERR(xino.file))
20653+ goto out;
20654+
20655+ err = au_xino_set(sb, &xino, /*remount*/0);
20656+ fput(xino.file);
20657+ if (unlikely(err))
20658+ goto out;
20659+ }
20660+
20661+ /* restore udba */
027c5e7a 20662+ tmp &= AuOptMask_UDBA;
1facf9fc 20663+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
027c5e7a
AM
20664+ sbinfo->si_mntflags |= tmp;
20665+ bend = au_sbend(sb);
20666+ for (bindex = 0; bindex <= bend; bindex++) {
20667+ br = au_sbr(sb, bindex);
20668+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
20669+ if (unlikely(err))
20670+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
20671+ bindex, err);
20672+ /* go on even if err */
20673+ }
4a4d8108 20674+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
1facf9fc 20675+ struct inode *dir = sb->s_root->d_inode;
4a4d8108 20676+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
1facf9fc 20677+ }
20678+
4f0767ce 20679+out:
1facf9fc 20680+ return err;
20681+}
20682+
20683+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
20684+{
20685+ int err, rerr;
20686+ struct inode *dir;
20687+ struct au_opt_xino *opt_xino;
20688+ struct au_opt *opt;
20689+ struct au_sbinfo *sbinfo;
20690+
dece6358
AM
20691+ SiMustWriteLock(sb);
20692+
1facf9fc 20693+ dir = sb->s_root->d_inode;
20694+ sbinfo = au_sbi(sb);
20695+ err = 0;
20696+ opt_xino = NULL;
20697+ opt = opts->opt;
20698+ while (err >= 0 && opt->type != Opt_tail) {
20699+ err = au_opt_simple(sb, opt, opts);
20700+ if (!err)
20701+ err = au_opt_br(sb, opt, opts);
20702+ if (!err)
20703+ err = au_opt_xino(sb, opt, &opt_xino, opts);
20704+ opt++;
20705+ }
20706+ if (err > 0)
20707+ err = 0;
20708+ AuTraceErr(err);
20709+ /* go on even err */
20710+
20711+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
20712+ if (unlikely(rerr && !err))
20713+ err = rerr;
20714+
20715+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
20716+ rerr = au_xib_trunc(sb);
20717+ if (unlikely(rerr && !err))
20718+ err = rerr;
20719+ }
20720+
20721+ /* will be handled by the caller */
027c5e7a 20722+ if (!au_ftest_opts(opts->flags, REFRESH)
1facf9fc 20723+ && (opts->given_udba || au_opt_test(sbinfo->si_mntflags, XINO)))
027c5e7a 20724+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20725+
20726+ AuDbg("status 0x%x\n", opts->flags);
20727+ return err;
20728+}
20729+
20730+/* ---------------------------------------------------------------------- */
20731+
20732+unsigned int au_opt_udba(struct super_block *sb)
20733+{
20734+ return au_mntflags(sb) & AuOptMask_UDBA;
20735+}
7f207e10
AM
20736diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
20737--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
1e00d052 20738+++ linux/fs/aufs/opts.h 2011-10-24 20:51:51.580466925 +0200
027c5e7a 20739@@ -0,0 +1,210 @@
1facf9fc 20740+/*
027c5e7a 20741+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 20742+ *
20743+ * This program, aufs is free software; you can redistribute it and/or modify
20744+ * it under the terms of the GNU General Public License as published by
20745+ * the Free Software Foundation; either version 2 of the License, or
20746+ * (at your option) any later version.
dece6358
AM
20747+ *
20748+ * This program is distributed in the hope that it will be useful,
20749+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20750+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20751+ * GNU General Public License for more details.
20752+ *
20753+ * You should have received a copy of the GNU General Public License
20754+ * along with this program; if not, write to the Free Software
20755+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20756+ */
20757+
20758+/*
20759+ * mount options/flags
20760+ */
20761+
20762+#ifndef __AUFS_OPTS_H__
20763+#define __AUFS_OPTS_H__
20764+
20765+#ifdef __KERNEL__
20766+
dece6358 20767+#include <linux/path.h>
1facf9fc 20768+#include <linux/aufs_type.h>
20769+
dece6358
AM
20770+struct file;
20771+struct super_block;
20772+
1facf9fc 20773+/* ---------------------------------------------------------------------- */
20774+
20775+/* mount flags */
20776+#define AuOpt_XINO 1 /* external inode number bitmap
20777+ and translation table */
20778+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
20779+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
20780+#define AuOpt_UDBA_REVAL (1 << 3)
4a4d8108 20781+#define AuOpt_UDBA_HNOTIFY (1 << 4)
dece6358
AM
20782+#define AuOpt_SHWH (1 << 5) /* show whiteout */
20783+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
20784+#define AuOpt_DIRPERM1 (1 << 7) /* unimplemented */
20785+#define AuOpt_REFROF (1 << 8) /* unimplemented */
20786+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
20787+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
20788+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
20789+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
20790+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
4a4d8108 20791+#define AuOpt_DIO (1 << 14) /* direct io */
1facf9fc 20792+
4a4d8108
AM
20793+#ifndef CONFIG_AUFS_HNOTIFY
20794+#undef AuOpt_UDBA_HNOTIFY
20795+#define AuOpt_UDBA_HNOTIFY 0
1facf9fc 20796+#endif
dece6358
AM
20797+#ifndef CONFIG_AUFS_SHWH
20798+#undef AuOpt_SHWH
20799+#define AuOpt_SHWH 0
20800+#endif
1facf9fc 20801+
20802+#define AuOpt_Def (AuOpt_XINO \
20803+ | AuOpt_UDBA_REVAL \
20804+ | AuOpt_PLINK \
20805+ /* | AuOpt_DIRPERM1 */ \
20806+ | AuOpt_WARN_PERM)
20807+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
20808+ | AuOpt_UDBA_REVAL \
4a4d8108 20809+ | AuOpt_UDBA_HNOTIFY)
1facf9fc 20810+
20811+#define au_opt_test(flags, name) (flags & AuOpt_##name)
20812+#define au_opt_set(flags, name) do { \
20813+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
20814+ ((flags) |= AuOpt_##name); \
20815+} while (0)
20816+#define au_opt_set_udba(flags, name) do { \
20817+ (flags) &= ~AuOptMask_UDBA; \
20818+ ((flags) |= AuOpt_##name); \
20819+} while (0)
7f207e10
AM
20820+#define au_opt_clr(flags, name) do { \
20821+ ((flags) &= ~AuOpt_##name); \
20822+} while (0)
1facf9fc 20823+
e49829fe
JR
20824+static inline unsigned int au_opts_plink(unsigned int mntflags)
20825+{
20826+#ifdef CONFIG_PROC_FS
20827+ return mntflags;
20828+#else
20829+ return mntflags & ~AuOpt_PLINK;
20830+#endif
20831+}
20832+
1facf9fc 20833+/* ---------------------------------------------------------------------- */
20834+
20835+/* policies to select one among multiple writable branches */
20836+enum {
20837+ AuWbrCreate_TDP, /* top down parent */
20838+ AuWbrCreate_RR, /* round robin */
20839+ AuWbrCreate_MFS, /* most free space */
20840+ AuWbrCreate_MFSV, /* mfs with seconds */
20841+ AuWbrCreate_MFSRR, /* mfs then rr */
20842+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
20843+ AuWbrCreate_PMFS, /* parent and mfs */
20844+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
20845+
20846+ AuWbrCreate_Def = AuWbrCreate_TDP
20847+};
20848+
20849+enum {
20850+ AuWbrCopyup_TDP, /* top down parent */
20851+ AuWbrCopyup_BUP, /* bottom up parent */
20852+ AuWbrCopyup_BU, /* bottom up */
20853+
20854+ AuWbrCopyup_Def = AuWbrCopyup_TDP
20855+};
20856+
20857+/* ---------------------------------------------------------------------- */
20858+
20859+struct au_opt_add {
20860+ aufs_bindex_t bindex;
20861+ char *pathname;
20862+ int perm;
20863+ struct path path;
20864+};
20865+
20866+struct au_opt_del {
20867+ char *pathname;
20868+ struct path h_path;
20869+};
20870+
20871+struct au_opt_mod {
20872+ char *path;
20873+ int perm;
20874+ struct dentry *h_root;
20875+};
20876+
20877+struct au_opt_xino {
20878+ char *path;
20879+ struct file *file;
20880+};
20881+
20882+struct au_opt_xino_itrunc {
20883+ aufs_bindex_t bindex;
20884+};
20885+
20886+struct au_opt_wbr_create {
20887+ int wbr_create;
20888+ int mfs_second;
20889+ unsigned long long mfsrr_watermark;
20890+};
20891+
20892+struct au_opt {
20893+ int type;
20894+ union {
20895+ struct au_opt_xino xino;
20896+ struct au_opt_xino_itrunc xino_itrunc;
20897+ struct au_opt_add add;
20898+ struct au_opt_del del;
20899+ struct au_opt_mod mod;
20900+ int dirwh;
20901+ int rdcache;
20902+ unsigned int rdblk;
20903+ unsigned int rdhash;
20904+ int udba;
20905+ struct au_opt_wbr_create wbr_create;
20906+ int wbr_copyup;
20907+ };
20908+};
20909+
20910+/* opts flags */
20911+#define AuOpts_REMOUNT 1
027c5e7a
AM
20912+#define AuOpts_REFRESH (1 << 1)
20913+#define AuOpts_TRUNC_XIB (1 << 2)
20914+#define AuOpts_REFRESH_DYAOP (1 << 3)
1facf9fc 20915+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
7f207e10
AM
20916+#define au_fset_opts(flags, name) \
20917+ do { (flags) |= AuOpts_##name; } while (0)
20918+#define au_fclr_opts(flags, name) \
20919+ do { (flags) &= ~AuOpts_##name; } while (0)
1facf9fc 20920+
20921+struct au_opts {
20922+ struct au_opt *opt;
20923+ int max_opt;
20924+
20925+ unsigned int given_udba;
20926+ unsigned int flags;
20927+ unsigned long sb_flags;
20928+};
20929+
20930+/* ---------------------------------------------------------------------- */
20931+
1e00d052 20932+char *au_optstr_br_perm(int brperm);
1facf9fc 20933+const char *au_optstr_udba(int udba);
20934+const char *au_optstr_wbr_copyup(int wbr_copyup);
20935+const char *au_optstr_wbr_create(int wbr_create);
20936+
20937+void au_opts_free(struct au_opts *opts);
20938+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
20939+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
20940+ unsigned int pending);
20941+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
20942+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
20943+
20944+unsigned int au_opt_udba(struct super_block *sb);
20945+
20946+/* ---------------------------------------------------------------------- */
20947+
20948+#endif /* __KERNEL__ */
20949+#endif /* __AUFS_OPTS_H__ */
7f207e10
AM
20950diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
20951--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
53392da6 20952+++ linux/fs/aufs/plink.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 20953@@ -0,0 +1,515 @@
1facf9fc 20954+/*
027c5e7a 20955+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 20956+ *
20957+ * This program, aufs is free software; you can redistribute it and/or modify
20958+ * it under the terms of the GNU General Public License as published by
20959+ * the Free Software Foundation; either version 2 of the License, or
20960+ * (at your option) any later version.
dece6358
AM
20961+ *
20962+ * This program is distributed in the hope that it will be useful,
20963+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20964+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20965+ * GNU General Public License for more details.
20966+ *
20967+ * You should have received a copy of the GNU General Public License
20968+ * along with this program; if not, write to the Free Software
20969+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20970+ */
20971+
20972+/*
20973+ * pseudo-link
20974+ */
20975+
20976+#include "aufs.h"
20977+
20978+/*
e49829fe 20979+ * the pseudo-link maintenance mode.
1facf9fc 20980+ * during a user process maintains the pseudo-links,
20981+ * prohibit adding a new plink and branch manipulation.
e49829fe
JR
20982+ *
20983+ * Flags
20984+ * NOPLM:
20985+ * For entry functions which will handle plink, and i_mutex is already held
20986+ * in VFS.
20987+ * They cannot wait and should return an error at once.
20988+ * Callers has to check the error.
20989+ * NOPLMW:
20990+ * For entry functions which will handle plink, but i_mutex is not held
20991+ * in VFS.
20992+ * They can wait the plink maintenance mode to finish.
20993+ *
20994+ * They behave like F_SETLK and F_SETLKW.
20995+ * If the caller never handle plink, then both flags are unnecessary.
1facf9fc 20996+ */
e49829fe
JR
20997+
20998+int au_plink_maint(struct super_block *sb, int flags)
1facf9fc 20999+{
e49829fe
JR
21000+ int err;
21001+ pid_t pid, ppid;
21002+ struct au_sbinfo *sbi;
dece6358
AM
21003+
21004+ SiMustAnyLock(sb);
21005+
e49829fe
JR
21006+ err = 0;
21007+ if (!au_opt_test(au_mntflags(sb), PLINK))
21008+ goto out;
21009+
21010+ sbi = au_sbi(sb);
21011+ pid = sbi->si_plink_maint_pid;
21012+ if (!pid || pid == current->pid)
21013+ goto out;
21014+
21015+ /* todo: it highly depends upon /sbin/mount.aufs */
21016+ rcu_read_lock();
21017+ ppid = task_pid_vnr(rcu_dereference(current->real_parent));
21018+ rcu_read_unlock();
21019+ if (pid == ppid)
21020+ goto out;
21021+
21022+ if (au_ftest_lock(flags, NOPLMW)) {
027c5e7a
AM
21023+ /* if there is no i_mutex lock in VFS, we don't need to wait */
21024+ /* AuDebugOn(!lockdep_depth(current)); */
e49829fe
JR
21025+ while (sbi->si_plink_maint_pid) {
21026+ si_read_unlock(sb);
21027+ /* gave up wake_up_bit() */
21028+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
21029+
21030+ if (au_ftest_lock(flags, FLUSH))
21031+ au_nwt_flush(&sbi->si_nowait);
21032+ si_noflush_read_lock(sb);
21033+ }
21034+ } else if (au_ftest_lock(flags, NOPLM)) {
21035+ AuDbg("ppid %d, pid %d\n", ppid, pid);
21036+ err = -EAGAIN;
21037+ }
21038+
21039+out:
21040+ return err;
4a4d8108
AM
21041+}
21042+
e49829fe 21043+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
4a4d8108 21044+{
4a4d8108 21045+ spin_lock(&sbinfo->si_plink_maint_lock);
027c5e7a 21046+ sbinfo->si_plink_maint_pid = 0;
4a4d8108 21047+ spin_unlock(&sbinfo->si_plink_maint_lock);
027c5e7a 21048+ wake_up_all(&sbinfo->si_plink_wq);
4a4d8108
AM
21049+}
21050+
e49829fe 21051+int au_plink_maint_enter(struct super_block *sb)
4a4d8108
AM
21052+{
21053+ int err;
4a4d8108
AM
21054+ struct au_sbinfo *sbinfo;
21055+
21056+ err = 0;
4a4d8108
AM
21057+ sbinfo = au_sbi(sb);
21058+ /* make sure i am the only one in this fs */
e49829fe
JR
21059+ si_write_lock(sb, AuLock_FLUSH);
21060+ if (au_opt_test(au_mntflags(sb), PLINK)) {
21061+ spin_lock(&sbinfo->si_plink_maint_lock);
21062+ if (!sbinfo->si_plink_maint_pid)
21063+ sbinfo->si_plink_maint_pid = current->pid;
21064+ else
21065+ err = -EBUSY;
21066+ spin_unlock(&sbinfo->si_plink_maint_lock);
21067+ }
4a4d8108
AM
21068+ si_write_unlock(sb);
21069+
21070+ return err;
1facf9fc 21071+}
21072+
21073+/* ---------------------------------------------------------------------- */
21074+
21075+struct pseudo_link {
4a4d8108
AM
21076+ union {
21077+ struct list_head list;
21078+ struct rcu_head rcu;
21079+ };
1facf9fc 21080+ struct inode *inode;
21081+};
21082+
21083+#ifdef CONFIG_AUFS_DEBUG
21084+void au_plink_list(struct super_block *sb)
21085+{
21086+ struct au_sbinfo *sbinfo;
21087+ struct list_head *plink_list;
21088+ struct pseudo_link *plink;
21089+
dece6358
AM
21090+ SiMustAnyLock(sb);
21091+
1facf9fc 21092+ sbinfo = au_sbi(sb);
21093+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21094+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21095+
21096+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
21097+ rcu_read_lock();
21098+ list_for_each_entry_rcu(plink, plink_list, list)
1facf9fc 21099+ AuDbg("%lu\n", plink->inode->i_ino);
4a4d8108 21100+ rcu_read_unlock();
1facf9fc 21101+}
21102+#endif
21103+
21104+/* is the inode pseudo-linked? */
21105+int au_plink_test(struct inode *inode)
21106+{
21107+ int found;
21108+ struct au_sbinfo *sbinfo;
21109+ struct list_head *plink_list;
21110+ struct pseudo_link *plink;
21111+
21112+ sbinfo = au_sbi(inode->i_sb);
dece6358 21113+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 21114+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
e49829fe 21115+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
1facf9fc 21116+
21117+ found = 0;
21118+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
21119+ rcu_read_lock();
21120+ list_for_each_entry_rcu(plink, plink_list, list)
1facf9fc 21121+ if (plink->inode == inode) {
21122+ found = 1;
21123+ break;
21124+ }
4a4d8108 21125+ rcu_read_unlock();
1facf9fc 21126+ return found;
21127+}
21128+
21129+/* ---------------------------------------------------------------------- */
21130+
21131+/*
21132+ * generate a name for plink.
21133+ * the file will be stored under AUFS_WH_PLINKDIR.
21134+ */
21135+/* 20 is max digits length of ulong 64 */
21136+#define PLINK_NAME_LEN ((20 + 1) * 2)
21137+
21138+static int plink_name(char *name, int len, struct inode *inode,
21139+ aufs_bindex_t bindex)
21140+{
21141+ int rlen;
21142+ struct inode *h_inode;
21143+
21144+ h_inode = au_h_iptr(inode, bindex);
21145+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
21146+ return rlen;
21147+}
21148+
7f207e10
AM
21149+struct au_do_plink_lkup_args {
21150+ struct dentry **errp;
21151+ struct qstr *tgtname;
21152+ struct dentry *h_parent;
21153+ struct au_branch *br;
21154+};
21155+
21156+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
21157+ struct dentry *h_parent,
21158+ struct au_branch *br)
21159+{
21160+ struct dentry *h_dentry;
21161+ struct mutex *h_mtx;
21162+
21163+ h_mtx = &h_parent->d_inode->i_mutex;
21164+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
21165+ h_dentry = au_lkup_one(tgtname, h_parent, br, /*nd*/NULL);
21166+ mutex_unlock(h_mtx);
21167+ return h_dentry;
21168+}
21169+
21170+static void au_call_do_plink_lkup(void *args)
21171+{
21172+ struct au_do_plink_lkup_args *a = args;
21173+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
21174+}
21175+
1facf9fc 21176+/* lookup the plink-ed @inode under the branch at @bindex */
21177+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
21178+{
21179+ struct dentry *h_dentry, *h_parent;
21180+ struct au_branch *br;
21181+ struct inode *h_dir;
7f207e10 21182+ int wkq_err;
1facf9fc 21183+ char a[PLINK_NAME_LEN];
21184+ struct qstr tgtname = {
21185+ .name = a
21186+ };
21187+
e49829fe
JR
21188+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
21189+
1facf9fc 21190+ br = au_sbr(inode->i_sb, bindex);
21191+ h_parent = br->br_wbr->wbr_plink;
21192+ h_dir = h_parent->d_inode;
21193+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
21194+
7f207e10
AM
21195+ if (current_fsuid()) {
21196+ struct au_do_plink_lkup_args args = {
21197+ .errp = &h_dentry,
21198+ .tgtname = &tgtname,
21199+ .h_parent = h_parent,
21200+ .br = br
21201+ };
21202+
21203+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
21204+ if (unlikely(wkq_err))
21205+ h_dentry = ERR_PTR(wkq_err);
21206+ } else
21207+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
21208+
1facf9fc 21209+ return h_dentry;
21210+}
21211+
21212+/* create a pseudo-link */
21213+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
21214+ struct dentry *h_dentry, struct au_branch *br)
21215+{
21216+ int err;
21217+ struct path h_path = {
21218+ .mnt = br->br_mnt
21219+ };
21220+ struct inode *h_dir;
21221+
21222+ h_dir = h_parent->d_inode;
7f207e10 21223+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
4f0767ce 21224+again:
1facf9fc 21225+ h_path.dentry = au_lkup_one(tgt, h_parent, br, /*nd*/NULL);
21226+ err = PTR_ERR(h_path.dentry);
21227+ if (IS_ERR(h_path.dentry))
21228+ goto out;
21229+
21230+ err = 0;
21231+ /* wh.plink dir is not monitored */
7f207e10 21232+ /* todo: is it really safe? */
1facf9fc 21233+ if (h_path.dentry->d_inode
21234+ && h_path.dentry->d_inode != h_dentry->d_inode) {
21235+ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
21236+ dput(h_path.dentry);
21237+ h_path.dentry = NULL;
21238+ if (!err)
21239+ goto again;
21240+ }
21241+ if (!err && !h_path.dentry->d_inode)
21242+ err = vfsub_link(h_dentry, h_dir, &h_path);
21243+ dput(h_path.dentry);
21244+
4f0767ce 21245+out:
7f207e10 21246+ mutex_unlock(&h_dir->i_mutex);
1facf9fc 21247+ return err;
21248+}
21249+
21250+struct do_whplink_args {
21251+ int *errp;
21252+ struct qstr *tgt;
21253+ struct dentry *h_parent;
21254+ struct dentry *h_dentry;
21255+ struct au_branch *br;
21256+};
21257+
21258+static void call_do_whplink(void *args)
21259+{
21260+ struct do_whplink_args *a = args;
21261+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
21262+}
21263+
21264+static int whplink(struct dentry *h_dentry, struct inode *inode,
21265+ aufs_bindex_t bindex, struct au_branch *br)
21266+{
21267+ int err, wkq_err;
21268+ struct au_wbr *wbr;
21269+ struct dentry *h_parent;
21270+ struct inode *h_dir;
21271+ char a[PLINK_NAME_LEN];
21272+ struct qstr tgtname = {
21273+ .name = a
21274+ };
21275+
21276+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
21277+ h_parent = wbr->wbr_plink;
21278+ h_dir = h_parent->d_inode;
21279+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
21280+
21281+ /* always superio. */
b752ccd1 21282+ if (current_fsuid()) {
1facf9fc 21283+ struct do_whplink_args args = {
21284+ .errp = &err,
21285+ .tgt = &tgtname,
21286+ .h_parent = h_parent,
21287+ .h_dentry = h_dentry,
21288+ .br = br
21289+ };
21290+ wkq_err = au_wkq_wait(call_do_whplink, &args);
21291+ if (unlikely(wkq_err))
21292+ err = wkq_err;
21293+ } else
21294+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
1facf9fc 21295+
21296+ return err;
21297+}
21298+
21299+/* free a single plink */
21300+static void do_put_plink(struct pseudo_link *plink, int do_del)
21301+{
1facf9fc 21302+ if (do_del)
21303+ list_del(&plink->list);
4a4d8108
AM
21304+ iput(plink->inode);
21305+ kfree(plink);
21306+}
21307+
21308+static void do_put_plink_rcu(struct rcu_head *rcu)
21309+{
21310+ struct pseudo_link *plink;
21311+
21312+ plink = container_of(rcu, struct pseudo_link, rcu);
21313+ iput(plink->inode);
1facf9fc 21314+ kfree(plink);
21315+}
21316+
21317+/*
21318+ * create a new pseudo-link for @h_dentry on @bindex.
21319+ * the linked inode is held in aufs @inode.
21320+ */
21321+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
21322+ struct dentry *h_dentry)
21323+{
21324+ struct super_block *sb;
21325+ struct au_sbinfo *sbinfo;
21326+ struct list_head *plink_list;
4a4d8108 21327+ struct pseudo_link *plink, *tmp;
1facf9fc 21328+ int found, err, cnt;
21329+
21330+ sb = inode->i_sb;
21331+ sbinfo = au_sbi(sb);
21332+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21333+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21334+
1facf9fc 21335+ cnt = 0;
21336+ found = 0;
21337+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
21338+ rcu_read_lock();
21339+ list_for_each_entry_rcu(plink, plink_list, list) {
1facf9fc 21340+ cnt++;
21341+ if (plink->inode == inode) {
21342+ found = 1;
21343+ break;
21344+ }
21345+ }
4a4d8108
AM
21346+ rcu_read_unlock();
21347+ if (found)
1facf9fc 21348+ return;
4a4d8108
AM
21349+
21350+ tmp = kmalloc(sizeof(*plink), GFP_NOFS);
21351+ if (tmp)
21352+ tmp->inode = au_igrab(inode);
21353+ else {
21354+ err = -ENOMEM;
21355+ goto out;
1facf9fc 21356+ }
21357+
4a4d8108
AM
21358+ spin_lock(&sbinfo->si_plink.spin);
21359+ list_for_each_entry(plink, plink_list, list) {
21360+ if (plink->inode == inode) {
21361+ found = 1;
21362+ break;
21363+ }
1facf9fc 21364+ }
4a4d8108
AM
21365+ if (!found)
21366+ list_add_rcu(&tmp->list, plink_list);
1facf9fc 21367+ spin_unlock(&sbinfo->si_plink.spin);
4a4d8108
AM
21368+ if (!found) {
21369+ cnt++;
21370+ WARN_ONCE(cnt > AUFS_PLINK_WARN,
21371+ "unexpectedly many pseudo links, %d\n", cnt);
1facf9fc 21372+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
4a4d8108
AM
21373+ } else {
21374+ do_put_plink(tmp, 0);
21375+ return;
1facf9fc 21376+ }
21377+
4a4d8108 21378+out:
1facf9fc 21379+ if (unlikely(err)) {
4a4d8108
AM
21380+ pr_warning("err %d, damaged pseudo link.\n", err);
21381+ if (tmp) {
21382+ au_spl_del_rcu(&tmp->list, &sbinfo->si_plink);
21383+ call_rcu(&tmp->rcu, do_put_plink_rcu);
21384+ }
1facf9fc 21385+ }
21386+}
21387+
21388+/* free all plinks */
e49829fe 21389+void au_plink_put(struct super_block *sb, int verbose)
1facf9fc 21390+{
21391+ struct au_sbinfo *sbinfo;
21392+ struct list_head *plink_list;
21393+ struct pseudo_link *plink, *tmp;
21394+
dece6358
AM
21395+ SiMustWriteLock(sb);
21396+
1facf9fc 21397+ sbinfo = au_sbi(sb);
21398+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21399+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21400+
21401+ plink_list = &sbinfo->si_plink.head;
21402+ /* no spin_lock since sbinfo is write-locked */
e49829fe 21403+ WARN(verbose && !list_empty(plink_list), "pseudo-link is not flushed");
1facf9fc 21404+ list_for_each_entry_safe(plink, tmp, plink_list, list)
21405+ do_put_plink(plink, 0);
21406+ INIT_LIST_HEAD(plink_list);
21407+}
21408+
e49829fe
JR
21409+void au_plink_clean(struct super_block *sb, int verbose)
21410+{
21411+ struct dentry *root;
21412+
21413+ root = sb->s_root;
21414+ aufs_write_lock(root);
21415+ if (au_opt_test(au_mntflags(sb), PLINK))
21416+ au_plink_put(sb, verbose);
21417+ aufs_write_unlock(root);
21418+}
21419+
1facf9fc 21420+/* free the plinks on a branch specified by @br_id */
21421+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
21422+{
21423+ struct au_sbinfo *sbinfo;
21424+ struct list_head *plink_list;
21425+ struct pseudo_link *plink, *tmp;
21426+ struct inode *inode;
21427+ aufs_bindex_t bstart, bend, bindex;
21428+ unsigned char do_put;
21429+
dece6358
AM
21430+ SiMustWriteLock(sb);
21431+
1facf9fc 21432+ sbinfo = au_sbi(sb);
21433+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21434+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21435+
21436+ plink_list = &sbinfo->si_plink.head;
21437+ /* no spin_lock since sbinfo is write-locked */
21438+ list_for_each_entry_safe(plink, tmp, plink_list, list) {
21439+ do_put = 0;
21440+ inode = au_igrab(plink->inode);
21441+ ii_write_lock_child(inode);
21442+ bstart = au_ibstart(inode);
21443+ bend = au_ibend(inode);
21444+ if (bstart >= 0) {
21445+ for (bindex = bstart; bindex <= bend; bindex++) {
21446+ if (!au_h_iptr(inode, bindex)
21447+ || au_ii_br_id(inode, bindex) != br_id)
21448+ continue;
21449+ au_set_h_iptr(inode, bindex, NULL, 0);
21450+ do_put = 1;
21451+ break;
21452+ }
21453+ } else
21454+ do_put_plink(plink, 1);
21455+
dece6358
AM
21456+ if (do_put) {
21457+ for (bindex = bstart; bindex <= bend; bindex++)
21458+ if (au_h_iptr(inode, bindex)) {
21459+ do_put = 0;
21460+ break;
21461+ }
21462+ if (do_put)
21463+ do_put_plink(plink, 1);
21464+ }
21465+ ii_write_unlock(inode);
21466+ iput(inode);
21467+ }
21468+}
7f207e10
AM
21469diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
21470--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
53392da6 21471+++ linux/fs/aufs/poll.c 2011-08-24 13:30:24.734646739 +0200
dece6358
AM
21472@@ -0,0 +1,56 @@
21473+/*
027c5e7a 21474+ * Copyright (C) 2005-2011 Junjiro R. Okajima
dece6358
AM
21475+ *
21476+ * This program, aufs is free software; you can redistribute it and/or modify
21477+ * it under the terms of the GNU General Public License as published by
21478+ * the Free Software Foundation; either version 2 of the License, or
21479+ * (at your option) any later version.
21480+ *
21481+ * This program is distributed in the hope that it will be useful,
21482+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21483+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21484+ * GNU General Public License for more details.
21485+ *
21486+ * You should have received a copy of the GNU General Public License
21487+ * along with this program; if not, write to the Free Software
21488+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21489+ */
21490+
1308ab2a 21491+/*
21492+ * poll operation
21493+ * There is only one filesystem which implements ->poll operation, currently.
21494+ */
21495+
21496+#include "aufs.h"
21497+
21498+unsigned int aufs_poll(struct file *file, poll_table *wait)
21499+{
21500+ unsigned int mask;
21501+ int err;
21502+ struct file *h_file;
21503+ struct dentry *dentry;
21504+ struct super_block *sb;
21505+
21506+ /* We should pretend an error happened. */
21507+ mask = POLLERR /* | POLLIN | POLLOUT */;
21508+ dentry = file->f_dentry;
21509+ sb = dentry->d_sb;
e49829fe 21510+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
1308ab2a 21511+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
21512+ if (unlikely(err))
21513+ goto out;
21514+
21515+ /* it is not an error if h_file has no operation */
21516+ mask = DEFAULT_POLLMASK;
4a4d8108 21517+ h_file = au_hf_top(file);
1308ab2a 21518+ if (h_file->f_op && h_file->f_op->poll)
21519+ mask = h_file->f_op->poll(h_file, wait);
21520+
21521+ di_read_unlock(dentry, AuLock_IR);
21522+ fi_read_unlock(file);
21523+
4f0767ce 21524+out:
1308ab2a 21525+ si_read_unlock(sb);
21526+ AuTraceErr((int)mask);
21527+ return mask;
21528+}
7f207e10
AM
21529diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
21530--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
21531+++ linux/fs/aufs/procfs.c 2011-08-24 13:30:24.734646739 +0200
21532@@ -0,0 +1,170 @@
e49829fe 21533+/*
027c5e7a 21534+ * Copyright (C) 2010-2011 Junjiro R. Okajima
e49829fe
JR
21535+ *
21536+ * This program, aufs is free software; you can redistribute it and/or modify
21537+ * it under the terms of the GNU General Public License as published by
21538+ * the Free Software Foundation; either version 2 of the License, or
21539+ * (at your option) any later version.
21540+ *
21541+ * This program is distributed in the hope that it will be useful,
21542+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21543+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21544+ * GNU General Public License for more details.
21545+ *
21546+ * You should have received a copy of the GNU General Public License
21547+ * along with this program; if not, write to the Free Software
21548+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21549+ */
21550+
21551+/*
21552+ * procfs interfaces
21553+ */
21554+
21555+#include <linux/proc_fs.h>
21556+#include "aufs.h"
21557+
21558+static int au_procfs_plm_release(struct inode *inode, struct file *file)
21559+{
21560+ struct au_sbinfo *sbinfo;
21561+
21562+ sbinfo = file->private_data;
21563+ if (sbinfo) {
21564+ au_plink_maint_leave(sbinfo);
21565+ kobject_put(&sbinfo->si_kobj);
21566+ }
21567+
21568+ return 0;
21569+}
21570+
21571+static void au_procfs_plm_write_clean(struct file *file)
21572+{
21573+ struct au_sbinfo *sbinfo;
21574+
21575+ sbinfo = file->private_data;
21576+ if (sbinfo)
21577+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
21578+}
21579+
21580+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
21581+{
21582+ int err;
21583+ struct super_block *sb;
21584+ struct au_sbinfo *sbinfo;
21585+
21586+ err = -EBUSY;
21587+ if (unlikely(file->private_data))
21588+ goto out;
21589+
21590+ sb = NULL;
53392da6 21591+ /* don't use au_sbilist_lock() here */
e49829fe
JR
21592+ spin_lock(&au_sbilist.spin);
21593+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
21594+ if (id == sysaufs_si_id(sbinfo)) {
21595+ kobject_get(&sbinfo->si_kobj);
21596+ sb = sbinfo->si_sb;
21597+ break;
21598+ }
21599+ spin_unlock(&au_sbilist.spin);
21600+
21601+ err = -EINVAL;
21602+ if (unlikely(!sb))
21603+ goto out;
21604+
21605+ err = au_plink_maint_enter(sb);
21606+ if (!err)
21607+ /* keep kobject_get() */
21608+ file->private_data = sbinfo;
21609+ else
21610+ kobject_put(&sbinfo->si_kobj);
21611+out:
21612+ return err;
21613+}
21614+
21615+/*
21616+ * Accept a valid "si=xxxx" only.
21617+ * Once it is accepted successfully, accept "clean" too.
21618+ */
21619+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
21620+ size_t count, loff_t *ppos)
21621+{
21622+ ssize_t err;
21623+ unsigned long id;
21624+ /* last newline is allowed */
21625+ char buf[3 + sizeof(unsigned long) * 2 + 1];
21626+
21627+ err = -EACCES;
21628+ if (unlikely(!capable(CAP_SYS_ADMIN)))
21629+ goto out;
21630+
21631+ err = -EINVAL;
21632+ if (unlikely(count > sizeof(buf)))
21633+ goto out;
21634+
21635+ err = copy_from_user(buf, ubuf, count);
21636+ if (unlikely(err)) {
21637+ err = -EFAULT;
21638+ goto out;
21639+ }
21640+ buf[count] = 0;
21641+
21642+ err = -EINVAL;
21643+ if (!strcmp("clean", buf)) {
21644+ au_procfs_plm_write_clean(file);
21645+ goto out_success;
21646+ } else if (unlikely(strncmp("si=", buf, 3)))
21647+ goto out;
21648+
21649+ err = strict_strtoul(buf + 3, 16, &id);
21650+ if (unlikely(err))
21651+ goto out;
21652+
21653+ err = au_procfs_plm_write_si(file, id);
21654+ if (unlikely(err))
21655+ goto out;
21656+
21657+out_success:
21658+ err = count; /* success */
21659+out:
21660+ return err;
21661+}
21662+
21663+static const struct file_operations au_procfs_plm_fop = {
21664+ .write = au_procfs_plm_write,
21665+ .release = au_procfs_plm_release,
21666+ .owner = THIS_MODULE
21667+};
21668+
21669+/* ---------------------------------------------------------------------- */
21670+
21671+static struct proc_dir_entry *au_procfs_dir;
21672+
21673+void au_procfs_fin(void)
21674+{
21675+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
21676+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
21677+}
21678+
21679+int __init au_procfs_init(void)
21680+{
21681+ int err;
21682+ struct proc_dir_entry *entry;
21683+
21684+ err = -ENOMEM;
21685+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
21686+ if (unlikely(!au_procfs_dir))
21687+ goto out;
21688+
21689+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
21690+ au_procfs_dir, &au_procfs_plm_fop);
21691+ if (unlikely(!entry))
21692+ goto out_dir;
21693+
21694+ err = 0;
21695+ goto out; /* success */
21696+
21697+
21698+out_dir:
21699+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
21700+out:
21701+ return err;
21702+}
7f207e10
AM
21703diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
21704--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
53392da6 21705+++ linux/fs/aufs/rdu.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 21706@@ -0,0 +1,383 @@
1308ab2a 21707+/*
027c5e7a 21708+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1308ab2a 21709+ *
21710+ * This program, aufs is free software; you can redistribute it and/or modify
21711+ * it under the terms of the GNU General Public License as published by
21712+ * the Free Software Foundation; either version 2 of the License, or
21713+ * (at your option) any later version.
21714+ *
21715+ * This program is distributed in the hope that it will be useful,
21716+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21717+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21718+ * GNU General Public License for more details.
21719+ *
21720+ * You should have received a copy of the GNU General Public License
21721+ * along with this program; if not, write to the Free Software
21722+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21723+ */
21724+
21725+/*
21726+ * readdir in userspace.
21727+ */
21728+
b752ccd1 21729+#include <linux/compat.h>
4a4d8108 21730+#include <linux/fs_stack.h>
1308ab2a 21731+#include <linux/security.h>
21732+#include <linux/uaccess.h>
21733+#include <linux/aufs_type.h>
21734+#include "aufs.h"
21735+
21736+/* bits for struct aufs_rdu.flags */
21737+#define AuRdu_CALLED 1
21738+#define AuRdu_CONT (1 << 1)
21739+#define AuRdu_FULL (1 << 2)
21740+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
7f207e10
AM
21741+#define au_fset_rdu(flags, name) \
21742+ do { (flags) |= AuRdu_##name; } while (0)
21743+#define au_fclr_rdu(flags, name) \
21744+ do { (flags) &= ~AuRdu_##name; } while (0)
1308ab2a 21745+
21746+struct au_rdu_arg {
21747+ struct aufs_rdu *rdu;
21748+ union au_rdu_ent_ul ent;
21749+ unsigned long end;
21750+
21751+ struct super_block *sb;
21752+ int err;
21753+};
21754+
21755+static int au_rdu_fill(void *__arg, const char *name, int nlen,
21756+ loff_t offset, u64 h_ino, unsigned int d_type)
21757+{
21758+ int err, len;
21759+ struct au_rdu_arg *arg = __arg;
21760+ struct aufs_rdu *rdu = arg->rdu;
21761+ struct au_rdu_ent ent;
21762+
21763+ err = 0;
21764+ arg->err = 0;
21765+ au_fset_rdu(rdu->cookie.flags, CALLED);
21766+ len = au_rdu_len(nlen);
21767+ if (arg->ent.ul + len < arg->end) {
21768+ ent.ino = h_ino;
21769+ ent.bindex = rdu->cookie.bindex;
21770+ ent.type = d_type;
21771+ ent.nlen = nlen;
4a4d8108
AM
21772+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
21773+ ent.type = DT_UNKNOWN;
1308ab2a 21774+
21775+ err = -EFAULT;
21776+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
21777+ goto out;
21778+ if (copy_to_user(arg->ent.e->name, name, nlen))
21779+ goto out;
21780+ /* the terminating NULL */
21781+ if (__put_user(0, arg->ent.e->name + nlen))
21782+ goto out;
21783+ err = 0;
21784+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
21785+ arg->ent.ul += len;
21786+ rdu->rent++;
21787+ } else {
21788+ err = -EFAULT;
21789+ au_fset_rdu(rdu->cookie.flags, FULL);
21790+ rdu->full = 1;
21791+ rdu->tail = arg->ent;
21792+ }
21793+
4f0767ce 21794+out:
1308ab2a 21795+ /* AuTraceErr(err); */
21796+ return err;
21797+}
21798+
21799+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
21800+{
21801+ int err;
21802+ loff_t offset;
21803+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
21804+
21805+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
21806+ err = offset;
21807+ if (unlikely(offset != cookie->h_pos))
21808+ goto out;
21809+
21810+ err = 0;
21811+ do {
21812+ arg->err = 0;
21813+ au_fclr_rdu(cookie->flags, CALLED);
21814+ /* smp_mb(); */
21815+ err = vfsub_readdir(h_file, au_rdu_fill, arg);
21816+ if (err >= 0)
21817+ err = arg->err;
21818+ } while (!err
21819+ && au_ftest_rdu(cookie->flags, CALLED)
21820+ && !au_ftest_rdu(cookie->flags, FULL));
21821+ cookie->h_pos = h_file->f_pos;
21822+
4f0767ce 21823+out:
1308ab2a 21824+ AuTraceErr(err);
21825+ return err;
21826+}
21827+
21828+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
21829+{
21830+ int err;
21831+ aufs_bindex_t bend;
21832+ struct au_rdu_arg arg;
21833+ struct dentry *dentry;
21834+ struct inode *inode;
21835+ struct file *h_file;
21836+ struct au_rdu_cookie *cookie = &rdu->cookie;
21837+
21838+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
21839+ if (unlikely(err)) {
21840+ err = -EFAULT;
21841+ AuTraceErr(err);
21842+ goto out;
21843+ }
21844+ rdu->rent = 0;
21845+ rdu->tail = rdu->ent;
21846+ rdu->full = 0;
21847+ arg.rdu = rdu;
21848+ arg.ent = rdu->ent;
21849+ arg.end = arg.ent.ul;
21850+ arg.end += rdu->sz;
21851+
21852+ err = -ENOTDIR;
21853+ if (unlikely(!file->f_op || !file->f_op->readdir))
21854+ goto out;
21855+
21856+ err = security_file_permission(file, MAY_READ);
21857+ AuTraceErr(err);
21858+ if (unlikely(err))
21859+ goto out;
21860+
21861+ dentry = file->f_dentry;
21862+ inode = dentry->d_inode;
21863+#if 1
21864+ mutex_lock(&inode->i_mutex);
21865+#else
21866+ err = mutex_lock_killable(&inode->i_mutex);
21867+ AuTraceErr(err);
21868+ if (unlikely(err))
21869+ goto out;
21870+#endif
1308ab2a 21871+
21872+ arg.sb = inode->i_sb;
e49829fe
JR
21873+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
21874+ if (unlikely(err))
21875+ goto out_mtx;
027c5e7a
AM
21876+ err = au_alive_dir(dentry);
21877+ if (unlikely(err))
21878+ goto out_si;
e49829fe 21879+ /* todo: reval? */
1308ab2a 21880+ fi_read_lock(file);
21881+
21882+ err = -EAGAIN;
21883+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
21884+ && cookie->generation != au_figen(file)))
21885+ goto out_unlock;
21886+
21887+ err = 0;
21888+ if (!rdu->blk) {
21889+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
21890+ if (!rdu->blk)
21891+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
21892+ }
21893+ bend = au_fbstart(file);
21894+ if (cookie->bindex < bend)
21895+ cookie->bindex = bend;
4a4d8108 21896+ bend = au_fbend_dir(file);
1308ab2a 21897+ /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
21898+ for (; !err && cookie->bindex <= bend;
21899+ cookie->bindex++, cookie->h_pos = 0) {
4a4d8108 21900+ h_file = au_hf_dir(file, cookie->bindex);
1308ab2a 21901+ if (!h_file)
21902+ continue;
21903+
21904+ au_fclr_rdu(cookie->flags, FULL);
21905+ err = au_rdu_do(h_file, &arg);
21906+ AuTraceErr(err);
21907+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
21908+ break;
21909+ }
21910+ AuDbg("rent %llu\n", rdu->rent);
21911+
21912+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
21913+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
21914+ au_fset_rdu(cookie->flags, CONT);
21915+ cookie->generation = au_figen(file);
21916+ }
21917+
21918+ ii_read_lock_child(inode);
21919+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
21920+ ii_read_unlock(inode);
21921+
4f0767ce 21922+out_unlock:
1308ab2a 21923+ fi_read_unlock(file);
027c5e7a 21924+out_si:
1308ab2a 21925+ si_read_unlock(arg.sb);
4f0767ce 21926+out_mtx:
1308ab2a 21927+ mutex_unlock(&inode->i_mutex);
4f0767ce 21928+out:
1308ab2a 21929+ AuTraceErr(err);
21930+ return err;
21931+}
21932+
21933+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
21934+{
21935+ int err;
21936+ ino_t ino;
21937+ unsigned long long nent;
21938+ union au_rdu_ent_ul *u;
21939+ struct au_rdu_ent ent;
21940+ struct super_block *sb;
21941+
21942+ err = 0;
21943+ nent = rdu->nent;
21944+ u = &rdu->ent;
21945+ sb = file->f_dentry->d_sb;
21946+ si_read_lock(sb, AuLock_FLUSH);
21947+ while (nent-- > 0) {
1308ab2a 21948+ err = copy_from_user(&ent, u->e, sizeof(ent));
4a4d8108
AM
21949+ if (!err)
21950+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
1308ab2a 21951+ if (unlikely(err)) {
21952+ err = -EFAULT;
21953+ AuTraceErr(err);
21954+ break;
21955+ }
21956+
21957+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
21958+ if (!ent.wh)
21959+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
21960+ else
21961+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
21962+ &ino);
21963+ if (unlikely(err)) {
21964+ AuTraceErr(err);
21965+ break;
21966+ }
21967+
21968+ err = __put_user(ino, &u->e->ino);
21969+ if (unlikely(err)) {
21970+ err = -EFAULT;
21971+ AuTraceErr(err);
21972+ break;
21973+ }
21974+ u->ul += au_rdu_len(ent.nlen);
21975+ }
21976+ si_read_unlock(sb);
21977+
21978+ return err;
21979+}
21980+
21981+/* ---------------------------------------------------------------------- */
21982+
21983+static int au_rdu_verify(struct aufs_rdu *rdu)
21984+{
b752ccd1 21985+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
1308ab2a 21986+ "%llu, b%d, 0x%x, g%u}\n",
b752ccd1 21987+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
1308ab2a 21988+ rdu->blk,
21989+ rdu->rent, rdu->shwh, rdu->full,
21990+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
21991+ rdu->cookie.generation);
dece6358 21992+
b752ccd1 21993+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
1308ab2a 21994+ return 0;
dece6358 21995+
b752ccd1
AM
21996+ AuDbg("%u:%u\n",
21997+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
1308ab2a 21998+ return -EINVAL;
21999+}
22000+
22001+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 22002+{
1308ab2a 22003+ long err, e;
22004+ struct aufs_rdu rdu;
22005+ void __user *p = (void __user *)arg;
dece6358 22006+
1308ab2a 22007+ err = copy_from_user(&rdu, p, sizeof(rdu));
22008+ if (unlikely(err)) {
22009+ err = -EFAULT;
22010+ AuTraceErr(err);
22011+ goto out;
22012+ }
22013+ err = au_rdu_verify(&rdu);
dece6358
AM
22014+ if (unlikely(err))
22015+ goto out;
22016+
1308ab2a 22017+ switch (cmd) {
22018+ case AUFS_CTL_RDU:
22019+ err = au_rdu(file, &rdu);
22020+ if (unlikely(err))
22021+ break;
dece6358 22022+
1308ab2a 22023+ e = copy_to_user(p, &rdu, sizeof(rdu));
22024+ if (unlikely(e)) {
22025+ err = -EFAULT;
22026+ AuTraceErr(err);
22027+ }
22028+ break;
22029+ case AUFS_CTL_RDU_INO:
22030+ err = au_rdu_ino(file, &rdu);
22031+ break;
22032+
22033+ default:
4a4d8108 22034+ /* err = -ENOTTY; */
1308ab2a 22035+ err = -EINVAL;
22036+ }
dece6358 22037+
4f0767ce 22038+out:
1308ab2a 22039+ AuTraceErr(err);
22040+ return err;
1facf9fc 22041+}
b752ccd1
AM
22042+
22043+#ifdef CONFIG_COMPAT
22044+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
22045+{
22046+ long err, e;
22047+ struct aufs_rdu rdu;
22048+ void __user *p = compat_ptr(arg);
22049+
22050+ /* todo: get_user()? */
22051+ err = copy_from_user(&rdu, p, sizeof(rdu));
22052+ if (unlikely(err)) {
22053+ err = -EFAULT;
22054+ AuTraceErr(err);
22055+ goto out;
22056+ }
22057+ rdu.ent.e = compat_ptr(rdu.ent.ul);
22058+ err = au_rdu_verify(&rdu);
22059+ if (unlikely(err))
22060+ goto out;
22061+
22062+ switch (cmd) {
22063+ case AUFS_CTL_RDU:
22064+ err = au_rdu(file, &rdu);
22065+ if (unlikely(err))
22066+ break;
22067+
22068+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
22069+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
22070+ e = copy_to_user(p, &rdu, sizeof(rdu));
22071+ if (unlikely(e)) {
22072+ err = -EFAULT;
22073+ AuTraceErr(err);
22074+ }
22075+ break;
22076+ case AUFS_CTL_RDU_INO:
22077+ err = au_rdu_ino(file, &rdu);
22078+ break;
22079+
22080+ default:
22081+ /* err = -ENOTTY; */
22082+ err = -EINVAL;
22083+ }
22084+
4f0767ce 22085+out:
b752ccd1
AM
22086+ AuTraceErr(err);
22087+ return err;
22088+}
22089+#endif
7f207e10
AM
22090diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
22091--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
53392da6 22092+++ linux/fs/aufs/rwsem.h 2011-08-24 13:30:24.734646739 +0200
e49829fe 22093@@ -0,0 +1,189 @@
1facf9fc 22094+/*
027c5e7a 22095+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 22096+ *
22097+ * This program, aufs is free software; you can redistribute it and/or modify
22098+ * it under the terms of the GNU General Public License as published by
22099+ * the Free Software Foundation; either version 2 of the License, or
22100+ * (at your option) any later version.
dece6358
AM
22101+ *
22102+ * This program is distributed in the hope that it will be useful,
22103+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22104+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22105+ * GNU General Public License for more details.
22106+ *
22107+ * You should have received a copy of the GNU General Public License
22108+ * along with this program; if not, write to the Free Software
22109+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22110+ */
22111+
22112+/*
22113+ * simple read-write semaphore wrappers
22114+ */
22115+
22116+#ifndef __AUFS_RWSEM_H__
22117+#define __AUFS_RWSEM_H__
22118+
22119+#ifdef __KERNEL__
22120+
dece6358 22121+#include <linux/rwsem.h>
4a4d8108 22122+#include "debug.h"
dece6358
AM
22123+
22124+struct au_rwsem {
22125+ struct rw_semaphore rwsem;
22126+#ifdef CONFIG_AUFS_DEBUG
22127+ /* just for debugging, not almighty counter */
22128+ atomic_t rcnt, wcnt;
22129+#endif
22130+};
22131+
22132+#ifdef CONFIG_AUFS_DEBUG
22133+#define AuDbgCntInit(rw) do { \
22134+ atomic_set(&(rw)->rcnt, 0); \
22135+ atomic_set(&(rw)->wcnt, 0); \
22136+ smp_mb(); /* atomic set */ \
22137+} while (0)
22138+
e49829fe 22139+#define AuDbgRcntInc(rw) atomic_inc(&(rw)->rcnt)
dece6358 22140+#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
e49829fe 22141+#define AuDbgWcntInc(rw) atomic_inc(&(rw)->wcnt)
dece6358
AM
22142+#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
22143+#else
22144+#define AuDbgCntInit(rw) do {} while (0)
22145+#define AuDbgRcntInc(rw) do {} while (0)
22146+#define AuDbgRcntDec(rw) do {} while (0)
22147+#define AuDbgWcntInc(rw) do {} while (0)
22148+#define AuDbgWcntDec(rw) do {} while (0)
22149+#endif /* CONFIG_AUFS_DEBUG */
22150+
22151+/* to debug easier, do not make them inlined functions */
22152+#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
22153+/* rwsem_is_locked() is unusable */
22154+#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
22155+#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
22156+#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
22157+ && atomic_read(&(rw)->wcnt) <= 0)
22158+#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \
22159+ || atomic_read(&(rw)->wcnt))
22160+
e49829fe
JR
22161+#define au_rw_class(rw, key) lockdep_set_class(&(rw)->rwsem, key)
22162+
dece6358
AM
22163+static inline void au_rw_init(struct au_rwsem *rw)
22164+{
22165+ AuDbgCntInit(rw);
22166+ init_rwsem(&rw->rwsem);
22167+}
22168+
22169+static inline void au_rw_init_wlock(struct au_rwsem *rw)
22170+{
22171+ au_rw_init(rw);
22172+ down_write(&rw->rwsem);
22173+ AuDbgWcntInc(rw);
22174+}
22175+
22176+static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
22177+ unsigned int lsc)
22178+{
22179+ au_rw_init(rw);
22180+ down_write_nested(&rw->rwsem, lsc);
22181+ AuDbgWcntInc(rw);
22182+}
22183+
22184+static inline void au_rw_read_lock(struct au_rwsem *rw)
22185+{
22186+ down_read(&rw->rwsem);
22187+ AuDbgRcntInc(rw);
22188+}
22189+
22190+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
22191+{
22192+ down_read_nested(&rw->rwsem, lsc);
22193+ AuDbgRcntInc(rw);
22194+}
22195+
22196+static inline void au_rw_read_unlock(struct au_rwsem *rw)
22197+{
22198+ AuRwMustReadLock(rw);
22199+ AuDbgRcntDec(rw);
22200+ up_read(&rw->rwsem);
22201+}
22202+
22203+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
22204+{
22205+ AuRwMustWriteLock(rw);
22206+ AuDbgRcntInc(rw);
22207+ AuDbgWcntDec(rw);
22208+ downgrade_write(&rw->rwsem);
22209+}
22210+
22211+static inline void au_rw_write_lock(struct au_rwsem *rw)
22212+{
22213+ down_write(&rw->rwsem);
22214+ AuDbgWcntInc(rw);
22215+}
22216+
22217+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
22218+ unsigned int lsc)
22219+{
22220+ down_write_nested(&rw->rwsem, lsc);
22221+ AuDbgWcntInc(rw);
22222+}
1facf9fc 22223+
dece6358
AM
22224+static inline void au_rw_write_unlock(struct au_rwsem *rw)
22225+{
22226+ AuRwMustWriteLock(rw);
22227+ AuDbgWcntDec(rw);
22228+ up_write(&rw->rwsem);
22229+}
22230+
22231+/* why is not _nested version defined */
22232+static inline int au_rw_read_trylock(struct au_rwsem *rw)
22233+{
22234+ int ret = down_read_trylock(&rw->rwsem);
22235+ if (ret)
22236+ AuDbgRcntInc(rw);
22237+ return ret;
22238+}
22239+
22240+static inline int au_rw_write_trylock(struct au_rwsem *rw)
22241+{
22242+ int ret = down_write_trylock(&rw->rwsem);
22243+ if (ret)
22244+ AuDbgWcntInc(rw);
22245+ return ret;
22246+}
22247+
22248+#undef AuDbgCntInit
22249+#undef AuDbgRcntInc
22250+#undef AuDbgRcntDec
22251+#undef AuDbgWcntInc
22252+#undef AuDbgWcntDec
1facf9fc 22253+
22254+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
22255+static inline void prefix##_read_lock(param) \
dece6358 22256+{ au_rw_read_lock(rwsem); } \
1facf9fc 22257+static inline void prefix##_write_lock(param) \
dece6358 22258+{ au_rw_write_lock(rwsem); } \
1facf9fc 22259+static inline int prefix##_read_trylock(param) \
dece6358 22260+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 22261+static inline int prefix##_write_trylock(param) \
dece6358 22262+{ return au_rw_write_trylock(rwsem); }
1facf9fc 22263+/* why is not _nested version defined */
22264+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 22265+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 22266+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 22267+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 22268+
22269+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
22270+static inline void prefix##_read_unlock(param) \
dece6358 22271+{ au_rw_read_unlock(rwsem); } \
1facf9fc 22272+static inline void prefix##_write_unlock(param) \
dece6358 22273+{ au_rw_write_unlock(rwsem); } \
1facf9fc 22274+static inline void prefix##_downgrade_lock(param) \
dece6358 22275+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 22276+
22277+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
22278+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
22279+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
22280+
22281+#endif /* __KERNEL__ */
22282+#endif /* __AUFS_RWSEM_H__ */
7f207e10
AM
22283diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
22284--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 22285+++ linux/fs/aufs/sbinfo.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 22286@@ -0,0 +1,344 @@
1facf9fc 22287+/*
027c5e7a 22288+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 22289+ *
22290+ * This program, aufs is free software; you can redistribute it and/or modify
22291+ * it under the terms of the GNU General Public License as published by
22292+ * the Free Software Foundation; either version 2 of the License, or
22293+ * (at your option) any later version.
dece6358
AM
22294+ *
22295+ * This program is distributed in the hope that it will be useful,
22296+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22297+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22298+ * GNU General Public License for more details.
22299+ *
22300+ * You should have received a copy of the GNU General Public License
22301+ * along with this program; if not, write to the Free Software
22302+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22303+ */
22304+
22305+/*
22306+ * superblock private data
22307+ */
22308+
e49829fe 22309+#include <linux/jiffies.h>
1facf9fc 22310+#include "aufs.h"
22311+
22312+/*
22313+ * they are necessary regardless sysfs is disabled.
22314+ */
22315+void au_si_free(struct kobject *kobj)
22316+{
22317+ struct au_sbinfo *sbinfo;
b752ccd1 22318+ char *locked __maybe_unused; /* debug only */
1facf9fc 22319+
22320+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
22321+ AuDebugOn(!list_empty(&sbinfo->si_plink.head));
e49829fe 22322+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
1facf9fc 22323+
e49829fe 22324+ au_rw_write_lock(&sbinfo->si_rwsem);
1facf9fc 22325+ au_br_free(sbinfo);
e49829fe 22326+ au_rw_write_unlock(&sbinfo->si_rwsem);
b752ccd1
AM
22327+
22328+ AuDebugOn(radix_tree_gang_lookup
22329+ (&sbinfo->au_si_pid.tree, (void **)&locked,
22330+ /*first_index*/PID_MAX_DEFAULT - 1,
22331+ /*max_items*/sizeof(locked)/sizeof(*locked)));
22332+
1facf9fc 22333+ kfree(sbinfo->si_branch);
b752ccd1 22334+ kfree(sbinfo->au_si_pid.bitmap);
1facf9fc 22335+ mutex_destroy(&sbinfo->si_xib_mtx);
dece6358 22336+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 22337+
22338+ kfree(sbinfo);
22339+}
22340+
22341+int au_si_alloc(struct super_block *sb)
22342+{
22343+ int err;
22344+ struct au_sbinfo *sbinfo;
e49829fe 22345+ static struct lock_class_key aufs_si;
1facf9fc 22346+
22347+ err = -ENOMEM;
4a4d8108 22348+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
1facf9fc 22349+ if (unlikely(!sbinfo))
22350+ goto out;
22351+
b752ccd1
AM
22352+ BUILD_BUG_ON(sizeof(unsigned long) !=
22353+ sizeof(*sbinfo->au_si_pid.bitmap));
22354+ sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT),
22355+ sizeof(*sbinfo->au_si_pid.bitmap),
22356+ GFP_NOFS);
22357+ if (unlikely(!sbinfo->au_si_pid.bitmap))
22358+ goto out_sbinfo;
22359+
1facf9fc 22360+ /* will be reallocated separately */
22361+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
22362+ if (unlikely(!sbinfo->si_branch))
b752ccd1 22363+ goto out_pidmap;
1facf9fc 22364+
1facf9fc 22365+ err = sysaufs_si_init(sbinfo);
22366+ if (unlikely(err))
22367+ goto out_br;
22368+
22369+ au_nwt_init(&sbinfo->si_nowait);
dece6358 22370+ au_rw_init_wlock(&sbinfo->si_rwsem);
e49829fe 22371+ au_rw_class(&sbinfo->si_rwsem, &aufs_si);
b752ccd1
AM
22372+ spin_lock_init(&sbinfo->au_si_pid.tree_lock);
22373+ INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL);
22374+
7f207e10 22375+ atomic_long_set(&sbinfo->si_ninodes, 0);
7f207e10
AM
22376+ atomic_long_set(&sbinfo->si_nfiles, 0);
22377+
1facf9fc 22378+ sbinfo->si_bend = -1;
1facf9fc 22379+
22380+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
22381+ sbinfo->si_wbr_create = AuWbrCreate_Def;
4a4d8108
AM
22382+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
22383+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
1facf9fc 22384+
e49829fe 22385+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
1facf9fc 22386+
1facf9fc 22387+ mutex_init(&sbinfo->si_xib_mtx);
1facf9fc 22388+ sbinfo->si_xino_brid = -1;
22389+ /* leave si_xib_last_pindex and si_xib_next_bit */
22390+
e49829fe 22391+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
1facf9fc 22392+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
22393+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
22394+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
22395+
22396+ au_spl_init(&sbinfo->si_plink);
22397+ init_waitqueue_head(&sbinfo->si_plink_wq);
4a4d8108 22398+ spin_lock_init(&sbinfo->si_plink_maint_lock);
1facf9fc 22399+
22400+ /* leave other members for sysaufs and si_mnt. */
22401+ sbinfo->si_sb = sb;
22402+ sb->s_fs_info = sbinfo;
b752ccd1 22403+ si_pid_set(sb);
1facf9fc 22404+ au_debug_sbinfo_init(sbinfo);
22405+ return 0; /* success */
22406+
4f0767ce 22407+out_br:
1facf9fc 22408+ kfree(sbinfo->si_branch);
4f0767ce 22409+out_pidmap:
b752ccd1 22410+ kfree(sbinfo->au_si_pid.bitmap);
4f0767ce 22411+out_sbinfo:
1facf9fc 22412+ kfree(sbinfo);
4f0767ce 22413+out:
1facf9fc 22414+ return err;
22415+}
22416+
22417+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
22418+{
22419+ int err, sz;
22420+ struct au_branch **brp;
22421+
dece6358
AM
22422+ AuRwMustWriteLock(&sbinfo->si_rwsem);
22423+
1facf9fc 22424+ err = -ENOMEM;
22425+ sz = sizeof(*brp) * (sbinfo->si_bend + 1);
22426+ if (unlikely(!sz))
22427+ sz = sizeof(*brp);
22428+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
22429+ if (brp) {
22430+ sbinfo->si_branch = brp;
22431+ err = 0;
22432+ }
22433+
22434+ return err;
22435+}
22436+
22437+/* ---------------------------------------------------------------------- */
22438+
22439+unsigned int au_sigen_inc(struct super_block *sb)
22440+{
22441+ unsigned int gen;
22442+
dece6358
AM
22443+ SiMustWriteLock(sb);
22444+
1facf9fc 22445+ gen = ++au_sbi(sb)->si_generation;
22446+ au_update_digen(sb->s_root);
22447+ au_update_iigen(sb->s_root->d_inode);
22448+ sb->s_root->d_inode->i_version++;
22449+ return gen;
22450+}
22451+
22452+aufs_bindex_t au_new_br_id(struct super_block *sb)
22453+{
22454+ aufs_bindex_t br_id;
22455+ int i;
22456+ struct au_sbinfo *sbinfo;
22457+
dece6358
AM
22458+ SiMustWriteLock(sb);
22459+
1facf9fc 22460+ sbinfo = au_sbi(sb);
22461+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
22462+ br_id = ++sbinfo->si_last_br_id;
7f207e10 22463+ AuDebugOn(br_id < 0);
1facf9fc 22464+ if (br_id && au_br_index(sb, br_id) < 0)
22465+ return br_id;
22466+ }
22467+
22468+ return -1;
22469+}
22470+
22471+/* ---------------------------------------------------------------------- */
22472+
e49829fe
JR
22473+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
22474+int si_read_lock(struct super_block *sb, int flags)
22475+{
22476+ int err;
22477+
22478+ err = 0;
22479+ if (au_ftest_lock(flags, FLUSH))
22480+ au_nwt_flush(&au_sbi(sb)->si_nowait);
22481+
22482+ si_noflush_read_lock(sb);
22483+ err = au_plink_maint(sb, flags);
22484+ if (unlikely(err))
22485+ si_read_unlock(sb);
22486+
22487+ return err;
22488+}
22489+
22490+int si_write_lock(struct super_block *sb, int flags)
22491+{
22492+ int err;
22493+
22494+ if (au_ftest_lock(flags, FLUSH))
22495+ au_nwt_flush(&au_sbi(sb)->si_nowait);
22496+
22497+ si_noflush_write_lock(sb);
22498+ err = au_plink_maint(sb, flags);
22499+ if (unlikely(err))
22500+ si_write_unlock(sb);
22501+
22502+ return err;
22503+}
22504+
1facf9fc 22505+/* dentry and super_block lock. call at entry point */
e49829fe 22506+int aufs_read_lock(struct dentry *dentry, int flags)
1facf9fc 22507+{
e49829fe 22508+ int err;
027c5e7a 22509+ struct super_block *sb;
e49829fe 22510+
027c5e7a
AM
22511+ sb = dentry->d_sb;
22512+ err = si_read_lock(sb, flags);
22513+ if (unlikely(err))
22514+ goto out;
22515+
22516+ if (au_ftest_lock(flags, DW))
22517+ di_write_lock_child(dentry);
22518+ else
22519+ di_read_lock_child(dentry, flags);
22520+
22521+ if (au_ftest_lock(flags, GEN)) {
22522+ err = au_digen_test(dentry, au_sigen(sb));
22523+ AuDebugOn(!err && au_dbrange_test(dentry));
22524+ if (unlikely(err))
22525+ aufs_read_unlock(dentry, flags);
e49829fe
JR
22526+ }
22527+
027c5e7a 22528+out:
e49829fe 22529+ return err;
1facf9fc 22530+}
22531+
22532+void aufs_read_unlock(struct dentry *dentry, int flags)
22533+{
22534+ if (au_ftest_lock(flags, DW))
22535+ di_write_unlock(dentry);
22536+ else
22537+ di_read_unlock(dentry, flags);
22538+ si_read_unlock(dentry->d_sb);
22539+}
22540+
22541+void aufs_write_lock(struct dentry *dentry)
22542+{
e49829fe 22543+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 22544+ di_write_lock_child(dentry);
22545+}
22546+
22547+void aufs_write_unlock(struct dentry *dentry)
22548+{
22549+ di_write_unlock(dentry);
22550+ si_write_unlock(dentry->d_sb);
22551+}
22552+
e49829fe 22553+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
1facf9fc 22554+{
e49829fe 22555+ int err;
027c5e7a
AM
22556+ unsigned int sigen;
22557+ struct super_block *sb;
e49829fe 22558+
027c5e7a
AM
22559+ sb = d1->d_sb;
22560+ err = si_read_lock(sb, flags);
22561+ if (unlikely(err))
22562+ goto out;
22563+
22564+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIR));
22565+
22566+ if (au_ftest_lock(flags, GEN)) {
22567+ sigen = au_sigen(sb);
22568+ err = au_digen_test(d1, sigen);
22569+ AuDebugOn(!err && au_dbrange_test(d1));
22570+ if (!err) {
22571+ err = au_digen_test(d2, sigen);
22572+ AuDebugOn(!err && au_dbrange_test(d2));
22573+ }
22574+ if (unlikely(err))
22575+ aufs_read_and_write_unlock2(d1, d2);
22576+ }
22577+
22578+out:
e49829fe 22579+ return err;
1facf9fc 22580+}
22581+
22582+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
22583+{
22584+ di_write_unlock2(d1, d2);
22585+ si_read_unlock(d1->d_sb);
22586+}
b752ccd1
AM
22587+
22588+/* ---------------------------------------------------------------------- */
22589+
22590+int si_pid_test_slow(struct super_block *sb)
22591+{
22592+ void *p;
22593+
22594+ rcu_read_lock();
22595+ p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid);
22596+ rcu_read_unlock();
22597+
027c5e7a 22598+ return (long)!!p;
b752ccd1
AM
22599+}
22600+
22601+void si_pid_set_slow(struct super_block *sb)
22602+{
22603+ int err;
22604+ struct au_sbinfo *sbinfo;
22605+
22606+ AuDebugOn(si_pid_test_slow(sb));
22607+
22608+ sbinfo = au_sbi(sb);
22609+ err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
22610+ AuDebugOn(err);
22611+ spin_lock(&sbinfo->au_si_pid.tree_lock);
22612+ err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid,
027c5e7a 22613+ /*any valid ptr*/sb);
b752ccd1
AM
22614+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
22615+ AuDebugOn(err);
22616+ radix_tree_preload_end();
22617+}
22618+
22619+void si_pid_clr_slow(struct super_block *sb)
22620+{
22621+ void *p;
22622+ struct au_sbinfo *sbinfo;
22623+
22624+ AuDebugOn(!si_pid_test_slow(sb));
22625+
22626+ sbinfo = au_sbi(sb);
22627+ spin_lock(&sbinfo->au_si_pid.tree_lock);
22628+ p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
22629+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
b752ccd1 22630+}
7f207e10
AM
22631diff -urN /usr/share/empty/fs/aufs/spl.h linux/fs/aufs/spl.h
22632--- /usr/share/empty/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
53392da6 22633+++ linux/fs/aufs/spl.h 2011-08-24 13:30:24.734646739 +0200
4a4d8108 22634@@ -0,0 +1,66 @@
1facf9fc 22635+/*
027c5e7a 22636+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 22637+ *
22638+ * This program, aufs is free software; you can redistribute it and/or modify
22639+ * it under the terms of the GNU General Public License as published by
22640+ * the Free Software Foundation; either version 2 of the License, or
22641+ * (at your option) any later version.
dece6358
AM
22642+ *
22643+ * This program is distributed in the hope that it will be useful,
22644+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22645+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22646+ * GNU General Public License for more details.
22647+ *
22648+ * You should have received a copy of the GNU General Public License
22649+ * along with this program; if not, write to the Free Software
22650+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22651+ */
22652+
22653+/*
22654+ * simple list protected by a spinlock
22655+ */
22656+
22657+#ifndef __AUFS_SPL_H__
22658+#define __AUFS_SPL_H__
22659+
22660+#ifdef __KERNEL__
22661+
dece6358
AM
22662+#include <linux/spinlock.h>
22663+#include <linux/list.h>
4a4d8108 22664+#include <linux/rculist.h>
1facf9fc 22665+
22666+struct au_splhead {
22667+ spinlock_t spin;
22668+ struct list_head head;
22669+};
22670+
22671+static inline void au_spl_init(struct au_splhead *spl)
22672+{
22673+ spin_lock_init(&spl->spin);
22674+ INIT_LIST_HEAD(&spl->head);
22675+}
22676+
22677+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
22678+{
22679+ spin_lock(&spl->spin);
22680+ list_add(list, &spl->head);
22681+ spin_unlock(&spl->spin);
22682+}
22683+
22684+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
22685+{
22686+ spin_lock(&spl->spin);
22687+ list_del(list);
22688+ spin_unlock(&spl->spin);
22689+}
22690+
4a4d8108
AM
22691+static inline void au_spl_del_rcu(struct list_head *list,
22692+ struct au_splhead *spl)
22693+{
22694+ spin_lock(&spl->spin);
22695+ list_del_rcu(list);
22696+ spin_unlock(&spl->spin);
22697+}
22698+
1facf9fc 22699+#endif /* __KERNEL__ */
22700+#endif /* __AUFS_SPL_H__ */
7f207e10
AM
22701diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
22702--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
22703+++ linux/fs/aufs/super.c 2011-10-24 20:51:51.583800333 +0200
22704@@ -0,0 +1,939 @@
1facf9fc 22705+/*
027c5e7a 22706+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 22707+ *
22708+ * This program, aufs is free software; you can redistribute it and/or modify
22709+ * it under the terms of the GNU General Public License as published by
22710+ * the Free Software Foundation; either version 2 of the License, or
22711+ * (at your option) any later version.
dece6358
AM
22712+ *
22713+ * This program is distributed in the hope that it will be useful,
22714+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22715+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22716+ * GNU General Public License for more details.
22717+ *
22718+ * You should have received a copy of the GNU General Public License
22719+ * along with this program; if not, write to the Free Software
22720+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22721+ */
22722+
22723+/*
22724+ * mount and super_block operations
22725+ */
22726+
22727+#include <linux/buffer_head.h>
e49829fe 22728+#include <linux/jiffies.h>
dece6358 22729+#include <linux/module.h>
1facf9fc 22730+#include <linux/seq_file.h>
22731+#include <linux/statfs.h>
7f207e10
AM
22732+#include <linux/vmalloc.h>
22733+#include <linux/writeback.h>
1facf9fc 22734+#include "aufs.h"
22735+
22736+/*
22737+ * super_operations
22738+ */
22739+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
22740+{
22741+ struct au_icntnr *c;
22742+
22743+ c = au_cache_alloc_icntnr();
22744+ if (c) {
027c5e7a 22745+ au_icntnr_init(c);
1facf9fc 22746+ c->vfs_inode.i_version = 1; /* sigen(sb); */
22747+ c->iinfo.ii_hinode = NULL;
22748+ return &c->vfs_inode;
22749+ }
22750+ return NULL;
22751+}
22752+
027c5e7a
AM
22753+static void aufs_destroy_inode_cb(struct rcu_head *head)
22754+{
22755+ struct inode *inode = container_of(head, struct inode, i_rcu);
22756+
22757+ INIT_LIST_HEAD(&inode->i_dentry);
22758+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
22759+}
22760+
1facf9fc 22761+static void aufs_destroy_inode(struct inode *inode)
22762+{
22763+ au_iinfo_fin(inode);
027c5e7a 22764+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
1facf9fc 22765+}
22766+
22767+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
22768+{
22769+ struct inode *inode;
22770+ int err;
22771+
22772+ inode = iget_locked(sb, ino);
22773+ if (unlikely(!inode)) {
22774+ inode = ERR_PTR(-ENOMEM);
22775+ goto out;
22776+ }
22777+ if (!(inode->i_state & I_NEW))
22778+ goto out;
22779+
22780+ err = au_xigen_new(inode);
22781+ if (!err)
22782+ err = au_iinfo_init(inode);
22783+ if (!err)
22784+ inode->i_version++;
22785+ else {
22786+ iget_failed(inode);
22787+ inode = ERR_PTR(err);
22788+ }
22789+
4f0767ce 22790+out:
1facf9fc 22791+ /* never return NULL */
22792+ AuDebugOn(!inode);
22793+ AuTraceErrPtr(inode);
22794+ return inode;
22795+}
22796+
22797+/* lock free root dinfo */
22798+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
22799+{
22800+ int err;
22801+ aufs_bindex_t bindex, bend;
22802+ struct path path;
4a4d8108 22803+ struct au_hdentry *hdp;
1facf9fc 22804+ struct au_branch *br;
1e00d052 22805+ char *perm;
1facf9fc 22806+
22807+ err = 0;
22808+ bend = au_sbend(sb);
4a4d8108 22809+ hdp = au_di(sb->s_root)->di_hdentry;
1facf9fc 22810+ for (bindex = 0; !err && bindex <= bend; bindex++) {
22811+ br = au_sbr(sb, bindex);
22812+ path.mnt = br->br_mnt;
4a4d8108 22813+ path.dentry = hdp[bindex].hd_dentry;
1facf9fc 22814+ err = au_seq_path(seq, &path);
1e00d052
AM
22815+ if (err > 0) {
22816+ perm = au_optstr_br_perm(br->br_perm);
22817+ if (perm) {
22818+ err = seq_printf(seq, "=%s", perm);
22819+ kfree(perm);
22820+ if (err == -1)
22821+ err = -E2BIG;
22822+ } else
22823+ err = -ENOMEM;
22824+ }
1facf9fc 22825+ if (!err && bindex != bend)
22826+ err = seq_putc(seq, ':');
22827+ }
22828+
22829+ return err;
22830+}
22831+
22832+static void au_show_wbr_create(struct seq_file *m, int v,
22833+ struct au_sbinfo *sbinfo)
22834+{
22835+ const char *pat;
22836+
dece6358
AM
22837+ AuRwMustAnyLock(&sbinfo->si_rwsem);
22838+
1facf9fc 22839+ seq_printf(m, ",create=");
22840+ pat = au_optstr_wbr_create(v);
22841+ switch (v) {
22842+ case AuWbrCreate_TDP:
22843+ case AuWbrCreate_RR:
22844+ case AuWbrCreate_MFS:
22845+ case AuWbrCreate_PMFS:
22846+ seq_printf(m, pat);
22847+ break;
22848+ case AuWbrCreate_MFSV:
22849+ seq_printf(m, /*pat*/"mfs:%lu",
e49829fe
JR
22850+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22851+ / MSEC_PER_SEC);
1facf9fc 22852+ break;
22853+ case AuWbrCreate_PMFSV:
22854+ seq_printf(m, /*pat*/"pmfs:%lu",
e49829fe
JR
22855+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22856+ / MSEC_PER_SEC);
1facf9fc 22857+ break;
22858+ case AuWbrCreate_MFSRR:
22859+ seq_printf(m, /*pat*/"mfsrr:%llu",
22860+ sbinfo->si_wbr_mfs.mfsrr_watermark);
22861+ break;
22862+ case AuWbrCreate_MFSRRV:
22863+ seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
22864+ sbinfo->si_wbr_mfs.mfsrr_watermark,
e49829fe
JR
22865+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22866+ / MSEC_PER_SEC);
1facf9fc 22867+ break;
22868+ }
22869+}
22870+
22871+static int au_show_xino(struct seq_file *seq, struct vfsmount *mnt)
22872+{
22873+#ifdef CONFIG_SYSFS
22874+ return 0;
22875+#else
22876+ int err;
22877+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
22878+ aufs_bindex_t bindex, brid;
22879+ struct super_block *sb;
22880+ struct qstr *name;
22881+ struct file *f;
22882+ struct dentry *d, *h_root;
4a4d8108 22883+ struct au_hdentry *hdp;
1facf9fc 22884+
dece6358
AM
22885+ AuRwMustAnyLock(&sbinfo->si_rwsem);
22886+
1facf9fc 22887+ err = 0;
22888+ sb = mnt->mnt_sb;
22889+ f = au_sbi(sb)->si_xib;
22890+ if (!f)
22891+ goto out;
22892+
22893+ /* stop printing the default xino path on the first writable branch */
22894+ h_root = NULL;
22895+ brid = au_xino_brid(sb);
22896+ if (brid >= 0) {
22897+ bindex = au_br_index(sb, brid);
4a4d8108
AM
22898+ hdp = au_di(sb->s_root)->di_hdentry;
22899+ h_root = hdp[0 + bindex].hd_dentry;
1facf9fc 22900+ }
22901+ d = f->f_dentry;
22902+ name = &d->d_name;
22903+ /* safe ->d_parent because the file is unlinked */
22904+ if (d->d_parent == h_root
22905+ && name->len == len
22906+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
22907+ goto out;
22908+
22909+ seq_puts(seq, ",xino=");
22910+ err = au_xino_path(seq, f);
22911+
4f0767ce 22912+out:
1facf9fc 22913+ return err;
22914+#endif
22915+}
22916+
22917+/* seq_file will re-call me in case of too long string */
22918+static int aufs_show_options(struct seq_file *m, struct vfsmount *mnt)
22919+{
027c5e7a 22920+ int err;
1facf9fc 22921+ unsigned int mnt_flags, v;
22922+ struct super_block *sb;
22923+ struct au_sbinfo *sbinfo;
22924+
22925+#define AuBool(name, str) do { \
22926+ v = au_opt_test(mnt_flags, name); \
22927+ if (v != au_opt_test(AuOpt_Def, name)) \
22928+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
22929+} while (0)
22930+
22931+#define AuStr(name, str) do { \
22932+ v = mnt_flags & AuOptMask_##name; \
22933+ if (v != (AuOpt_Def & AuOptMask_##name)) \
22934+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
22935+} while (0)
22936+
22937+#define AuUInt(name, str, val) do { \
22938+ if (val != AUFS_##name##_DEF) \
22939+ seq_printf(m, "," #str "=%u", val); \
22940+} while (0)
22941+
22942+ /* lock free root dinfo */
22943+ sb = mnt->mnt_sb;
22944+ si_noflush_read_lock(sb);
22945+ sbinfo = au_sbi(sb);
22946+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
22947+
22948+ mnt_flags = au_mntflags(sb);
22949+ if (au_opt_test(mnt_flags, XINO)) {
22950+ err = au_show_xino(m, mnt);
22951+ if (unlikely(err))
22952+ goto out;
22953+ } else
22954+ seq_puts(m, ",noxino");
22955+
22956+ AuBool(TRUNC_XINO, trunc_xino);
22957+ AuStr(UDBA, udba);
dece6358 22958+ AuBool(SHWH, shwh);
1facf9fc 22959+ AuBool(PLINK, plink);
4a4d8108 22960+ AuBool(DIO, dio);
1facf9fc 22961+ /* AuBool(DIRPERM1, dirperm1); */
22962+ /* AuBool(REFROF, refrof); */
22963+
22964+ v = sbinfo->si_wbr_create;
22965+ if (v != AuWbrCreate_Def)
22966+ au_show_wbr_create(m, v, sbinfo);
22967+
22968+ v = sbinfo->si_wbr_copyup;
22969+ if (v != AuWbrCopyup_Def)
22970+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
22971+
22972+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
22973+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
22974+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
22975+
22976+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
22977+
027c5e7a
AM
22978+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
22979+ AuUInt(RDCACHE, rdcache, v);
1facf9fc 22980+
22981+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
22982+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
22983+
22984+ AuBool(SUM, sum);
22985+ /* AuBool(SUM_W, wsum); */
22986+ AuBool(WARN_PERM, warn_perm);
22987+ AuBool(VERBOSE, verbose);
22988+
4f0767ce 22989+out:
1facf9fc 22990+ /* be sure to print "br:" last */
22991+ if (!sysaufs_brs) {
22992+ seq_puts(m, ",br:");
22993+ au_show_brs(m, sb);
22994+ }
22995+ si_read_unlock(sb);
22996+ return 0;
22997+
1facf9fc 22998+#undef AuBool
22999+#undef AuStr
4a4d8108 23000+#undef AuUInt
1facf9fc 23001+}
23002+
23003+/* ---------------------------------------------------------------------- */
23004+
23005+/* sum mode which returns the summation for statfs(2) */
23006+
23007+static u64 au_add_till_max(u64 a, u64 b)
23008+{
23009+ u64 old;
23010+
23011+ old = a;
23012+ a += b;
23013+ if (old < a)
23014+ return a;
23015+ return ULLONG_MAX;
23016+}
23017+
23018+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
23019+{
23020+ int err;
23021+ u64 blocks, bfree, bavail, files, ffree;
23022+ aufs_bindex_t bend, bindex, i;
23023+ unsigned char shared;
7f207e10 23024+ struct path h_path;
1facf9fc 23025+ struct super_block *h_sb;
23026+
23027+ blocks = 0;
23028+ bfree = 0;
23029+ bavail = 0;
23030+ files = 0;
23031+ ffree = 0;
23032+
23033+ err = 0;
23034+ bend = au_sbend(sb);
23035+ for (bindex = bend; bindex >= 0; bindex--) {
7f207e10
AM
23036+ h_path.mnt = au_sbr_mnt(sb, bindex);
23037+ h_sb = h_path.mnt->mnt_sb;
1facf9fc 23038+ shared = 0;
23039+ for (i = bindex + 1; !shared && i <= bend; i++)
23040+ shared = (au_sbr_sb(sb, i) == h_sb);
23041+ if (shared)
23042+ continue;
23043+
23044+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
23045+ h_path.dentry = h_path.mnt->mnt_root;
23046+ err = vfs_statfs(&h_path, buf);
1facf9fc 23047+ if (unlikely(err))
23048+ goto out;
23049+
23050+ blocks = au_add_till_max(blocks, buf->f_blocks);
23051+ bfree = au_add_till_max(bfree, buf->f_bfree);
23052+ bavail = au_add_till_max(bavail, buf->f_bavail);
23053+ files = au_add_till_max(files, buf->f_files);
23054+ ffree = au_add_till_max(ffree, buf->f_ffree);
23055+ }
23056+
23057+ buf->f_blocks = blocks;
23058+ buf->f_bfree = bfree;
23059+ buf->f_bavail = bavail;
23060+ buf->f_files = files;
23061+ buf->f_ffree = ffree;
23062+
4f0767ce 23063+out:
1facf9fc 23064+ return err;
23065+}
23066+
23067+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
23068+{
23069+ int err;
7f207e10 23070+ struct path h_path;
1facf9fc 23071+ struct super_block *sb;
23072+
23073+ /* lock free root dinfo */
23074+ sb = dentry->d_sb;
23075+ si_noflush_read_lock(sb);
7f207e10 23076+ if (!au_opt_test(au_mntflags(sb), SUM)) {
1facf9fc 23077+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
23078+ h_path.mnt = au_sbr_mnt(sb, 0);
23079+ h_path.dentry = h_path.mnt->mnt_root;
23080+ err = vfs_statfs(&h_path, buf);
23081+ } else
1facf9fc 23082+ err = au_statfs_sum(sb, buf);
23083+ si_read_unlock(sb);
23084+
23085+ if (!err) {
23086+ buf->f_type = AUFS_SUPER_MAGIC;
4a4d8108 23087+ buf->f_namelen = AUFS_MAX_NAMELEN;
1facf9fc 23088+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
23089+ }
23090+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
23091+
23092+ return err;
23093+}
23094+
23095+/* ---------------------------------------------------------------------- */
23096+
1facf9fc 23097+/* final actions when unmounting a file system */
23098+static void aufs_put_super(struct super_block *sb)
23099+{
23100+ struct au_sbinfo *sbinfo;
23101+
23102+ sbinfo = au_sbi(sb);
23103+ if (!sbinfo)
23104+ return;
23105+
1facf9fc 23106+ dbgaufs_si_fin(sbinfo);
23107+ kobject_put(&sbinfo->si_kobj);
23108+}
23109+
23110+/* ---------------------------------------------------------------------- */
23111+
7f207e10
AM
23112+void au_array_free(void *array)
23113+{
23114+ if (array) {
23115+ if (!is_vmalloc_addr(array))
23116+ kfree(array);
23117+ else
23118+ vfree(array);
23119+ }
23120+}
23121+
23122+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg)
23123+{
23124+ void *array;
23125+ unsigned long long n;
23126+
23127+ array = NULL;
23128+ n = 0;
23129+ if (!*hint)
23130+ goto out;
23131+
23132+ if (*hint > ULLONG_MAX / sizeof(array)) {
23133+ array = ERR_PTR(-EMFILE);
23134+ pr_err("hint %llu\n", *hint);
23135+ goto out;
23136+ }
23137+
23138+ array = kmalloc(sizeof(array) * *hint, GFP_NOFS);
23139+ if (unlikely(!array))
23140+ array = vmalloc(sizeof(array) * *hint);
23141+ if (unlikely(!array)) {
23142+ array = ERR_PTR(-ENOMEM);
23143+ goto out;
23144+ }
23145+
23146+ n = cb(array, *hint, arg);
23147+ AuDebugOn(n > *hint);
23148+
23149+out:
23150+ *hint = n;
23151+ return array;
23152+}
23153+
23154+static unsigned long long au_iarray_cb(void *a,
23155+ unsigned long long max __maybe_unused,
23156+ void *arg)
23157+{
23158+ unsigned long long n;
23159+ struct inode **p, *inode;
23160+ struct list_head *head;
23161+
23162+ n = 0;
23163+ p = a;
23164+ head = arg;
2cbb1c4b 23165+ spin_lock(&inode_sb_list_lock);
7f207e10
AM
23166+ list_for_each_entry(inode, head, i_sb_list) {
23167+ if (!is_bad_inode(inode)
23168+ && au_ii(inode)->ii_bstart >= 0) {
2cbb1c4b
JR
23169+ spin_lock(&inode->i_lock);
23170+ if (atomic_read(&inode->i_count)) {
23171+ au_igrab(inode);
23172+ *p++ = inode;
23173+ n++;
23174+ AuDebugOn(n > max);
23175+ }
23176+ spin_unlock(&inode->i_lock);
7f207e10
AM
23177+ }
23178+ }
2cbb1c4b 23179+ spin_unlock(&inode_sb_list_lock);
7f207e10
AM
23180+
23181+ return n;
23182+}
23183+
23184+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
23185+{
23186+ *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
23187+ return au_array_alloc(max, au_iarray_cb, &sb->s_inodes);
23188+}
23189+
23190+void au_iarray_free(struct inode **a, unsigned long long max)
23191+{
23192+ unsigned long long ull;
23193+
23194+ for (ull = 0; ull < max; ull++)
23195+ iput(a[ull]);
23196+ au_array_free(a);
23197+}
23198+
23199+/* ---------------------------------------------------------------------- */
23200+
1facf9fc 23201+/*
23202+ * refresh dentry and inode at remount time.
23203+ */
027c5e7a
AM
23204+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
23205+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
23206+ struct dentry *parent)
1facf9fc 23207+{
23208+ int err;
1facf9fc 23209+
23210+ di_write_lock_child(dentry);
1facf9fc 23211+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
23212+ err = au_refresh_dentry(dentry, parent);
23213+ if (!err && dir_flags)
23214+ au_hn_reset(dentry->d_inode, dir_flags);
1facf9fc 23215+ di_read_unlock(parent, AuLock_IR);
1facf9fc 23216+ di_write_unlock(dentry);
23217+
23218+ return err;
23219+}
23220+
027c5e7a
AM
23221+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
23222+ struct au_sbinfo *sbinfo,
23223+ const unsigned int dir_flags)
1facf9fc 23224+{
027c5e7a
AM
23225+ int err;
23226+ struct dentry *parent;
23227+ struct inode *inode;
23228+
23229+ err = 0;
23230+ parent = dget_parent(dentry);
23231+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
23232+ inode = dentry->d_inode;
23233+ if (inode) {
23234+ if (!S_ISDIR(inode->i_mode))
23235+ err = au_do_refresh(dentry, /*dir_flags*/0,
23236+ parent);
23237+ else {
23238+ err = au_do_refresh(dentry, dir_flags, parent);
23239+ if (unlikely(err))
23240+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
23241+ }
23242+ } else
23243+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
23244+ AuDbgDentry(dentry);
23245+ }
23246+ dput(parent);
23247+
23248+ AuTraceErr(err);
23249+ return err;
1facf9fc 23250+}
23251+
027c5e7a 23252+static int au_refresh_d(struct super_block *sb)
1facf9fc 23253+{
23254+ int err, i, j, ndentry, e;
027c5e7a 23255+ unsigned int sigen;
1facf9fc 23256+ struct au_dcsub_pages dpages;
23257+ struct au_dpage *dpage;
027c5e7a
AM
23258+ struct dentry **dentries, *d;
23259+ struct au_sbinfo *sbinfo;
23260+ struct dentry *root = sb->s_root;
23261+ const unsigned int dir_flags = au_hi_flags(root->d_inode, /*isdir*/1);
1facf9fc 23262+
027c5e7a
AM
23263+ err = au_dpages_init(&dpages, GFP_NOFS);
23264+ if (unlikely(err))
1facf9fc 23265+ goto out;
027c5e7a
AM
23266+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
23267+ if (unlikely(err))
1facf9fc 23268+ goto out_dpages;
1facf9fc 23269+
027c5e7a
AM
23270+ sigen = au_sigen(sb);
23271+ sbinfo = au_sbi(sb);
23272+ for (i = 0; i < dpages.ndpage; i++) {
1facf9fc 23273+ dpage = dpages.dpages + i;
23274+ dentries = dpage->dentries;
23275+ ndentry = dpage->ndentry;
027c5e7a 23276+ for (j = 0; j < ndentry; j++) {
1facf9fc 23277+ d = dentries[j];
027c5e7a
AM
23278+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags);
23279+ if (unlikely(e && !err))
23280+ err = e;
23281+ /* go on even err */
1facf9fc 23282+ }
23283+ }
23284+
4f0767ce 23285+out_dpages:
1facf9fc 23286+ au_dpages_free(&dpages);
4f0767ce 23287+out:
1facf9fc 23288+ return err;
23289+}
23290+
027c5e7a 23291+static int au_refresh_i(struct super_block *sb)
1facf9fc 23292+{
027c5e7a
AM
23293+ int err, e;
23294+ unsigned int sigen;
23295+ unsigned long long max, ull;
23296+ struct inode *inode, **array;
1facf9fc 23297+
027c5e7a
AM
23298+ array = au_iarray_alloc(sb, &max);
23299+ err = PTR_ERR(array);
23300+ if (IS_ERR(array))
23301+ goto out;
1facf9fc 23302+
23303+ err = 0;
027c5e7a
AM
23304+ sigen = au_sigen(sb);
23305+ for (ull = 0; ull < max; ull++) {
23306+ inode = array[ull];
23307+ if (au_iigen(inode) != sigen) {
1facf9fc 23308+ ii_write_lock_child(inode);
027c5e7a 23309+ e = au_refresh_hinode_self(inode);
1facf9fc 23310+ ii_write_unlock(inode);
23311+ if (unlikely(e)) {
027c5e7a 23312+ pr_err("error %d, i%lu\n", e, inode->i_ino);
1facf9fc 23313+ if (!err)
23314+ err = e;
23315+ /* go on even if err */
23316+ }
23317+ }
1facf9fc 23318+ }
23319+
027c5e7a 23320+ au_iarray_free(array, max);
1facf9fc 23321+
4f0767ce 23322+out:
1facf9fc 23323+ return err;
23324+}
23325+
027c5e7a 23326+static void au_remount_refresh(struct super_block *sb)
1facf9fc 23327+{
027c5e7a
AM
23328+ int err, e;
23329+ unsigned int udba;
23330+ aufs_bindex_t bindex, bend;
1facf9fc 23331+ struct dentry *root;
23332+ struct inode *inode;
027c5e7a 23333+ struct au_branch *br;
1facf9fc 23334+
23335+ au_sigen_inc(sb);
027c5e7a 23336+ au_fclr_si(au_sbi(sb), FAILED_REFRESH_DIR);
1facf9fc 23337+
23338+ root = sb->s_root;
23339+ DiMustNoWaiters(root);
23340+ inode = root->d_inode;
23341+ IiMustNoWaiters(inode);
1facf9fc 23342+
027c5e7a
AM
23343+ udba = au_opt_udba(sb);
23344+ bend = au_sbend(sb);
23345+ for (bindex = 0; bindex <= bend; bindex++) {
23346+ br = au_sbr(sb, bindex);
23347+ err = au_hnotify_reset_br(udba, br, br->br_perm);
1facf9fc 23348+ if (unlikely(err))
027c5e7a
AM
23349+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
23350+ bindex, err);
23351+ /* go on even if err */
1facf9fc 23352+ }
027c5e7a 23353+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
1facf9fc 23354+
027c5e7a
AM
23355+ di_write_unlock(root);
23356+ err = au_refresh_d(sb);
23357+ e = au_refresh_i(sb);
23358+ if (unlikely(e && !err))
23359+ err = e;
1facf9fc 23360+ /* aufs_write_lock() calls ..._child() */
23361+ di_write_lock_child(root);
027c5e7a
AM
23362+
23363+ au_cpup_attr_all(inode, /*force*/1);
23364+
23365+ if (unlikely(err))
23366+ AuIOErr("refresh failed, ignored, %d\n", err);
1facf9fc 23367+}
23368+
23369+/* stop extra interpretation of errno in mount(8), and strange error messages */
23370+static int cvt_err(int err)
23371+{
23372+ AuTraceErr(err);
23373+
23374+ switch (err) {
23375+ case -ENOENT:
23376+ case -ENOTDIR:
23377+ case -EEXIST:
23378+ case -EIO:
23379+ err = -EINVAL;
23380+ }
23381+ return err;
23382+}
23383+
23384+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
23385+{
4a4d8108
AM
23386+ int err, do_dx;
23387+ unsigned int mntflags;
1facf9fc 23388+ struct au_opts opts;
23389+ struct dentry *root;
23390+ struct inode *inode;
23391+ struct au_sbinfo *sbinfo;
23392+
23393+ err = 0;
23394+ root = sb->s_root;
23395+ if (!data || !*data) {
e49829fe
JR
23396+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
23397+ if (!err) {
23398+ di_write_lock_child(root);
23399+ err = au_opts_verify(sb, *flags, /*pending*/0);
23400+ aufs_write_unlock(root);
23401+ }
1facf9fc 23402+ goto out;
23403+ }
23404+
23405+ err = -ENOMEM;
23406+ memset(&opts, 0, sizeof(opts));
23407+ opts.opt = (void *)__get_free_page(GFP_NOFS);
23408+ if (unlikely(!opts.opt))
23409+ goto out;
23410+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
23411+ opts.flags = AuOpts_REMOUNT;
23412+ opts.sb_flags = *flags;
23413+
23414+ /* parse it before aufs lock */
23415+ err = au_opts_parse(sb, data, &opts);
23416+ if (unlikely(err))
23417+ goto out_opts;
23418+
23419+ sbinfo = au_sbi(sb);
23420+ inode = root->d_inode;
23421+ mutex_lock(&inode->i_mutex);
e49829fe
JR
23422+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
23423+ if (unlikely(err))
23424+ goto out_mtx;
23425+ di_write_lock_child(root);
1facf9fc 23426+
23427+ /* au_opts_remount() may return an error */
23428+ err = au_opts_remount(sb, &opts);
23429+ au_opts_free(&opts);
23430+
027c5e7a
AM
23431+ if (au_ftest_opts(opts.flags, REFRESH))
23432+ au_remount_refresh(sb);
1facf9fc 23433+
4a4d8108
AM
23434+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
23435+ mntflags = au_mntflags(sb);
23436+ do_dx = !!au_opt_test(mntflags, DIO);
23437+ au_dy_arefresh(do_dx);
23438+ }
23439+
1facf9fc 23440+ aufs_write_unlock(root);
953406b4 23441+
e49829fe
JR
23442+out_mtx:
23443+ mutex_unlock(&inode->i_mutex);
4f0767ce 23444+out_opts:
1facf9fc 23445+ free_page((unsigned long)opts.opt);
4f0767ce 23446+out:
1facf9fc 23447+ err = cvt_err(err);
23448+ AuTraceErr(err);
23449+ return err;
23450+}
23451+
4a4d8108 23452+static const struct super_operations aufs_sop = {
1facf9fc 23453+ .alloc_inode = aufs_alloc_inode,
23454+ .destroy_inode = aufs_destroy_inode,
b752ccd1 23455+ /* always deleting, no clearing */
1facf9fc 23456+ .drop_inode = generic_delete_inode,
23457+ .show_options = aufs_show_options,
23458+ .statfs = aufs_statfs,
23459+ .put_super = aufs_put_super,
23460+ .remount_fs = aufs_remount_fs
23461+};
23462+
23463+/* ---------------------------------------------------------------------- */
23464+
23465+static int alloc_root(struct super_block *sb)
23466+{
23467+ int err;
23468+ struct inode *inode;
23469+ struct dentry *root;
23470+
23471+ err = -ENOMEM;
23472+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
23473+ err = PTR_ERR(inode);
23474+ if (IS_ERR(inode))
23475+ goto out;
23476+
23477+ inode->i_op = &aufs_dir_iop;
23478+ inode->i_fop = &aufs_dir_fop;
23479+ inode->i_mode = S_IFDIR;
23480+ inode->i_nlink = 2;
23481+ unlock_new_inode(inode);
23482+
23483+ root = d_alloc_root(inode);
23484+ if (unlikely(!root))
23485+ goto out_iput;
23486+ err = PTR_ERR(root);
23487+ if (IS_ERR(root))
23488+ goto out_iput;
23489+
4a4d8108 23490+ err = au_di_init(root);
1facf9fc 23491+ if (!err) {
23492+ sb->s_root = root;
23493+ return 0; /* success */
23494+ }
23495+ dput(root);
23496+ goto out; /* do not iput */
23497+
4f0767ce 23498+out_iput:
1facf9fc 23499+ iget_failed(inode);
4f0767ce 23500+out:
1facf9fc 23501+ return err;
23502+
23503+}
23504+
23505+static int aufs_fill_super(struct super_block *sb, void *raw_data,
23506+ int silent __maybe_unused)
23507+{
23508+ int err;
23509+ struct au_opts opts;
23510+ struct dentry *root;
23511+ struct inode *inode;
23512+ char *arg = raw_data;
23513+
23514+ if (unlikely(!arg || !*arg)) {
23515+ err = -EINVAL;
4a4d8108 23516+ pr_err("no arg\n");
1facf9fc 23517+ goto out;
23518+ }
23519+
23520+ err = -ENOMEM;
23521+ memset(&opts, 0, sizeof(opts));
23522+ opts.opt = (void *)__get_free_page(GFP_NOFS);
23523+ if (unlikely(!opts.opt))
23524+ goto out;
23525+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
23526+ opts.sb_flags = sb->s_flags;
23527+
23528+ err = au_si_alloc(sb);
23529+ if (unlikely(err))
23530+ goto out_opts;
23531+
23532+ /* all timestamps always follow the ones on the branch */
23533+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
23534+ sb->s_op = &aufs_sop;
027c5e7a 23535+ sb->s_d_op = &aufs_dop;
1facf9fc 23536+ sb->s_magic = AUFS_SUPER_MAGIC;
23537+ sb->s_maxbytes = 0;
23538+ au_export_init(sb);
23539+
23540+ err = alloc_root(sb);
23541+ if (unlikely(err)) {
23542+ si_write_unlock(sb);
23543+ goto out_info;
23544+ }
23545+ root = sb->s_root;
23546+ inode = root->d_inode;
23547+
23548+ /*
23549+ * actually we can parse options regardless aufs lock here.
23550+ * but at remount time, parsing must be done before aufs lock.
23551+ * so we follow the same rule.
23552+ */
23553+ ii_write_lock_parent(inode);
23554+ aufs_write_unlock(root);
23555+ err = au_opts_parse(sb, arg, &opts);
23556+ if (unlikely(err))
23557+ goto out_root;
23558+
23559+ /* lock vfs_inode first, then aufs. */
23560+ mutex_lock(&inode->i_mutex);
1facf9fc 23561+ aufs_write_lock(root);
23562+ err = au_opts_mount(sb, &opts);
23563+ au_opts_free(&opts);
1facf9fc 23564+ aufs_write_unlock(root);
23565+ mutex_unlock(&inode->i_mutex);
4a4d8108
AM
23566+ if (!err)
23567+ goto out_opts; /* success */
1facf9fc 23568+
4f0767ce 23569+out_root:
1facf9fc 23570+ dput(root);
23571+ sb->s_root = NULL;
4f0767ce 23572+out_info:
2cbb1c4b 23573+ dbgaufs_si_fin(au_sbi(sb));
1facf9fc 23574+ kobject_put(&au_sbi(sb)->si_kobj);
23575+ sb->s_fs_info = NULL;
4f0767ce 23576+out_opts:
1facf9fc 23577+ free_page((unsigned long)opts.opt);
4f0767ce 23578+out:
1facf9fc 23579+ AuTraceErr(err);
23580+ err = cvt_err(err);
23581+ AuTraceErr(err);
23582+ return err;
23583+}
23584+
23585+/* ---------------------------------------------------------------------- */
23586+
027c5e7a
AM
23587+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
23588+ const char *dev_name __maybe_unused,
23589+ void *raw_data)
1facf9fc 23590+{
027c5e7a 23591+ struct dentry *root;
1facf9fc 23592+ struct super_block *sb;
23593+
23594+ /* all timestamps always follow the ones on the branch */
23595+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
027c5e7a
AM
23596+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
23597+ if (IS_ERR(root))
23598+ goto out;
23599+
23600+ sb = root->d_sb;
23601+ si_write_lock(sb, !AuLock_FLUSH);
23602+ sysaufs_brs_add(sb, 0);
23603+ si_write_unlock(sb);
23604+ au_sbilist_add(sb);
23605+
23606+out:
23607+ return root;
1facf9fc 23608+}
23609+
e49829fe
JR
23610+static void aufs_kill_sb(struct super_block *sb)
23611+{
23612+ struct au_sbinfo *sbinfo;
23613+
23614+ sbinfo = au_sbi(sb);
23615+ if (sbinfo) {
23616+ au_sbilist_del(sb);
23617+ aufs_write_lock(sb->s_root);
23618+ if (sbinfo->si_wbr_create_ops->fin)
23619+ sbinfo->si_wbr_create_ops->fin(sb);
23620+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
23621+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
027c5e7a 23622+ au_remount_refresh(sb);
e49829fe
JR
23623+ }
23624+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
23625+ au_plink_put(sb, /*verbose*/1);
23626+ au_xino_clr(sb);
1e00d052 23627+ sbinfo->si_sb = NULL;
e49829fe 23628+ aufs_write_unlock(sb->s_root);
e49829fe
JR
23629+ au_nwt_flush(&sbinfo->si_nowait);
23630+ }
23631+ generic_shutdown_super(sb);
23632+}
23633+
1facf9fc 23634+struct file_system_type aufs_fs_type = {
23635+ .name = AUFS_FSTYPE,
23636+ .fs_flags =
23637+ FS_RENAME_DOES_D_MOVE /* a race between rename and others */
23638+ | FS_REVAL_DOT, /* for NFS branch and udba */
027c5e7a 23639+ .mount = aufs_mount,
e49829fe 23640+ .kill_sb = aufs_kill_sb,
1facf9fc 23641+ /* no need to __module_get() and module_put(). */
23642+ .owner = THIS_MODULE,
23643+};
7f207e10
AM
23644diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
23645--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
23646+++ linux/fs/aufs/super.h 2011-08-24 13:30:24.734646739 +0200
23647@@ -0,0 +1,547 @@
1facf9fc 23648+/*
027c5e7a 23649+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 23650+ *
23651+ * This program, aufs is free software; you can redistribute it and/or modify
23652+ * it under the terms of the GNU General Public License as published by
23653+ * the Free Software Foundation; either version 2 of the License, or
23654+ * (at your option) any later version.
dece6358
AM
23655+ *
23656+ * This program is distributed in the hope that it will be useful,
23657+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23658+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23659+ * GNU General Public License for more details.
23660+ *
23661+ * You should have received a copy of the GNU General Public License
23662+ * along with this program; if not, write to the Free Software
23663+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 23664+ */
23665+
23666+/*
23667+ * super_block operations
23668+ */
23669+
23670+#ifndef __AUFS_SUPER_H__
23671+#define __AUFS_SUPER_H__
23672+
23673+#ifdef __KERNEL__
23674+
23675+#include <linux/fs.h>
1facf9fc 23676+#include <linux/aufs_type.h>
23677+#include "rwsem.h"
23678+#include "spl.h"
23679+#include "wkq.h"
23680+
23681+typedef ssize_t (*au_readf_t)(struct file *, char __user *, size_t, loff_t *);
23682+typedef ssize_t (*au_writef_t)(struct file *, const char __user *, size_t,
23683+ loff_t *);
23684+
23685+/* policies to select one among multiple writable branches */
23686+struct au_wbr_copyup_operations {
23687+ int (*copyup)(struct dentry *dentry);
23688+};
23689+
23690+struct au_wbr_create_operations {
23691+ int (*create)(struct dentry *dentry, int isdir);
23692+ int (*init)(struct super_block *sb);
23693+ int (*fin)(struct super_block *sb);
23694+};
23695+
23696+struct au_wbr_mfs {
23697+ struct mutex mfs_lock; /* protect this structure */
23698+ unsigned long mfs_jiffy;
23699+ unsigned long mfs_expire;
23700+ aufs_bindex_t mfs_bindex;
23701+
23702+ unsigned long long mfsrr_bytes;
23703+ unsigned long long mfsrr_watermark;
23704+};
23705+
1facf9fc 23706+struct au_branch;
23707+struct au_sbinfo {
23708+ /* nowait tasks in the system-wide workqueue */
23709+ struct au_nowait_tasks si_nowait;
23710+
b752ccd1
AM
23711+ /*
23712+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
23713+ * rwsem for au_sbinfo is necessary.
23714+ */
dece6358 23715+ struct au_rwsem si_rwsem;
1facf9fc 23716+
b752ccd1
AM
23717+ /* prevent recursive locking in deleting inode */
23718+ struct {
23719+ unsigned long *bitmap;
23720+ spinlock_t tree_lock;
23721+ struct radix_tree_root tree;
23722+ } au_si_pid;
23723+
7f207e10
AM
23724+ /*
23725+ * dirty approach to protect sb->sb_inodes and ->s_files from remount.
23726+ */
23727+ atomic_long_t si_ninodes, si_nfiles;
23728+
1facf9fc 23729+ /* branch management */
23730+ unsigned int si_generation;
23731+
23732+ /* see above flags */
23733+ unsigned char au_si_status;
23734+
23735+ aufs_bindex_t si_bend;
7f207e10
AM
23736+
23737+ /* dirty trick to keep br_id plus */
23738+ unsigned int si_last_br_id :
23739+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
1facf9fc 23740+ struct au_branch **si_branch;
23741+
23742+ /* policy to select a writable branch */
23743+ unsigned char si_wbr_copyup;
23744+ unsigned char si_wbr_create;
23745+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
23746+ struct au_wbr_create_operations *si_wbr_create_ops;
23747+
23748+ /* round robin */
23749+ atomic_t si_wbr_rr_next;
23750+
23751+ /* most free space */
23752+ struct au_wbr_mfs si_wbr_mfs;
23753+
23754+ /* mount flags */
23755+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
23756+ unsigned int si_mntflags;
23757+
23758+ /* external inode number (bitmap and translation table) */
23759+ au_readf_t si_xread;
23760+ au_writef_t si_xwrite;
23761+ struct file *si_xib;
23762+ struct mutex si_xib_mtx; /* protect xib members */
23763+ unsigned long *si_xib_buf;
23764+ unsigned long si_xib_last_pindex;
23765+ int si_xib_next_bit;
23766+ aufs_bindex_t si_xino_brid;
23767+ /* reserved for future use */
23768+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
23769+
23770+#ifdef CONFIG_AUFS_EXPORT
23771+ /* i_generation */
23772+ struct file *si_xigen;
23773+ atomic_t si_xigen_next;
23774+#endif
23775+
23776+ /* vdir parameters */
e49829fe 23777+ unsigned long si_rdcache; /* max cache time in jiffies */
1facf9fc 23778+ unsigned int si_rdblk; /* deblk size */
23779+ unsigned int si_rdhash; /* hash size */
23780+
23781+ /*
23782+ * If the number of whiteouts are larger than si_dirwh, leave all of
23783+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
23784+ * future fsck.aufs or kernel thread will remove them later.
23785+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
23786+ */
23787+ unsigned int si_dirwh;
23788+
23789+ /*
23790+ * rename(2) a directory with all children.
23791+ */
23792+ /* reserved for future use */
23793+ /* int si_rendir; */
23794+
23795+ /* pseudo_link list */
23796+ struct au_splhead si_plink;
23797+ wait_queue_head_t si_plink_wq;
4a4d8108 23798+ spinlock_t si_plink_maint_lock;
e49829fe 23799+ pid_t si_plink_maint_pid;
1facf9fc 23800+
23801+ /*
23802+ * sysfs and lifetime management.
23803+ * this is not a small structure and it may be a waste of memory in case
23804+ * of sysfs is disabled, particulary when many aufs-es are mounted.
23805+ * but using sysfs is majority.
23806+ */
23807+ struct kobject si_kobj;
23808+#ifdef CONFIG_DEBUG_FS
23809+ struct dentry *si_dbgaufs, *si_dbgaufs_xib;
23810+#ifdef CONFIG_AUFS_EXPORT
23811+ struct dentry *si_dbgaufs_xigen;
23812+#endif
23813+#endif
23814+
e49829fe
JR
23815+#ifdef CONFIG_AUFS_SBILIST
23816+ struct list_head si_list;
23817+#endif
23818+
1facf9fc 23819+ /* dirty, necessary for unmounting, sysfs and sysrq */
23820+ struct super_block *si_sb;
23821+};
23822+
dece6358
AM
23823+/* sbinfo status flags */
23824+/*
23825+ * set true when refresh_dirs() failed at remount time.
23826+ * then try refreshing dirs at access time again.
23827+ * if it is false, refreshing dirs at access time is unnecesary
23828+ */
027c5e7a 23829+#define AuSi_FAILED_REFRESH_DIR 1
dece6358
AM
23830+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
23831+ unsigned int flag)
23832+{
23833+ AuRwMustAnyLock(&sbi->si_rwsem);
23834+ return sbi->au_si_status & flag;
23835+}
23836+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
23837+#define au_fset_si(sbinfo, name) do { \
23838+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
23839+ (sbinfo)->au_si_status |= AuSi_##name; \
23840+} while (0)
23841+#define au_fclr_si(sbinfo, name) do { \
23842+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
23843+ (sbinfo)->au_si_status &= ~AuSi_##name; \
23844+} while (0)
23845+
1facf9fc 23846+/* ---------------------------------------------------------------------- */
23847+
23848+/* policy to select one among writable branches */
4a4d8108
AM
23849+#define AuWbrCopyup(sbinfo, ...) \
23850+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
23851+#define AuWbrCreate(sbinfo, ...) \
23852+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
1facf9fc 23853+
23854+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
23855+#define AuLock_DW 1 /* write-lock dentry */
23856+#define AuLock_IR (1 << 1) /* read-lock inode */
23857+#define AuLock_IW (1 << 2) /* write-lock inode */
23858+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
23859+#define AuLock_DIR (1 << 4) /* target is a dir */
e49829fe
JR
23860+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
23861+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
027c5e7a 23862+#define AuLock_GEN (1 << 7) /* test digen/iigen */
1facf9fc 23863+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
7f207e10
AM
23864+#define au_fset_lock(flags, name) \
23865+ do { (flags) |= AuLock_##name; } while (0)
23866+#define au_fclr_lock(flags, name) \
23867+ do { (flags) &= ~AuLock_##name; } while (0)
1facf9fc 23868+
23869+/* ---------------------------------------------------------------------- */
23870+
23871+/* super.c */
23872+extern struct file_system_type aufs_fs_type;
23873+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
7f207e10
AM
23874+typedef unsigned long long (*au_arraycb_t)(void *array, unsigned long long max,
23875+ void *arg);
23876+void au_array_free(void *array);
23877+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg);
23878+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
23879+void au_iarray_free(struct inode **a, unsigned long long max);
1facf9fc 23880+
23881+/* sbinfo.c */
23882+void au_si_free(struct kobject *kobj);
23883+int au_si_alloc(struct super_block *sb);
23884+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
23885+
23886+unsigned int au_sigen_inc(struct super_block *sb);
23887+aufs_bindex_t au_new_br_id(struct super_block *sb);
23888+
e49829fe
JR
23889+int si_read_lock(struct super_block *sb, int flags);
23890+int si_write_lock(struct super_block *sb, int flags);
23891+int aufs_read_lock(struct dentry *dentry, int flags);
1facf9fc 23892+void aufs_read_unlock(struct dentry *dentry, int flags);
23893+void aufs_write_lock(struct dentry *dentry);
23894+void aufs_write_unlock(struct dentry *dentry);
e49829fe 23895+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
1facf9fc 23896+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
23897+
b752ccd1
AM
23898+int si_pid_test_slow(struct super_block *sb);
23899+void si_pid_set_slow(struct super_block *sb);
23900+void si_pid_clr_slow(struct super_block *sb);
23901+
1facf9fc 23902+/* wbr_policy.c */
23903+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
23904+extern struct au_wbr_create_operations au_wbr_create_ops[];
23905+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
23906+
23907+/* ---------------------------------------------------------------------- */
23908+
23909+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
23910+{
23911+ return sb->s_fs_info;
23912+}
23913+
23914+/* ---------------------------------------------------------------------- */
23915+
23916+#ifdef CONFIG_AUFS_EXPORT
23917+void au_export_init(struct super_block *sb);
23918+
b752ccd1 23919+static inline int au_test_nfsd(void)
1facf9fc 23920+{
b752ccd1
AM
23921+ struct task_struct *tsk = current;
23922+
23923+ return (tsk->flags & PF_KTHREAD)
23924+ && !strcmp(tsk->comm, "nfsd");
1facf9fc 23925+}
23926+
b752ccd1 23927+void au_xigen_inc(struct inode *inode);
1facf9fc 23928+int au_xigen_new(struct inode *inode);
23929+int au_xigen_set(struct super_block *sb, struct file *base);
23930+void au_xigen_clr(struct super_block *sb);
23931+
23932+static inline int au_busy_or_stale(void)
23933+{
b752ccd1 23934+ if (!au_test_nfsd())
1facf9fc 23935+ return -EBUSY;
23936+ return -ESTALE;
23937+}
23938+#else
4a4d8108 23939+AuStubVoid(au_export_init, struct super_block *sb)
b752ccd1
AM
23940+AuStubInt0(au_test_nfsd, void)
23941+AuStubVoid(au_xigen_inc, struct inode *inode)
4a4d8108
AM
23942+AuStubInt0(au_xigen_new, struct inode *inode)
23943+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
23944+AuStubVoid(au_xigen_clr, struct super_block *sb)
1facf9fc 23945+static inline int au_busy_or_stale(void)
23946+{
23947+ return -EBUSY;
23948+}
23949+#endif /* CONFIG_AUFS_EXPORT */
23950+
23951+/* ---------------------------------------------------------------------- */
23952+
e49829fe
JR
23953+#ifdef CONFIG_AUFS_SBILIST
23954+/* module.c */
23955+extern struct au_splhead au_sbilist;
23956+
23957+static inline void au_sbilist_init(void)
23958+{
23959+ au_spl_init(&au_sbilist);
23960+}
23961+
23962+static inline void au_sbilist_add(struct super_block *sb)
23963+{
23964+ au_spl_add(&au_sbi(sb)->si_list, &au_sbilist);
23965+}
23966+
23967+static inline void au_sbilist_del(struct super_block *sb)
23968+{
23969+ au_spl_del(&au_sbi(sb)->si_list, &au_sbilist);
23970+}
53392da6
AM
23971+
23972+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
23973+static inline void au_sbilist_lock(void)
23974+{
23975+ spin_lock(&au_sbilist.spin);
23976+}
23977+
23978+static inline void au_sbilist_unlock(void)
23979+{
23980+ spin_unlock(&au_sbilist.spin);
23981+}
23982+#define AuGFP_SBILIST GFP_ATOMIC
23983+#else
23984+AuStubVoid(au_sbilist_lock, void)
23985+AuStubVoid(au_sbilist_unlock, void)
23986+#define AuGFP_SBILIST GFP_NOFS
23987+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
e49829fe
JR
23988+#else
23989+AuStubVoid(au_sbilist_init, void)
23990+AuStubVoid(au_sbilist_add, struct super_block*)
23991+AuStubVoid(au_sbilist_del, struct super_block*)
53392da6
AM
23992+AuStubVoid(au_sbilist_lock, void)
23993+AuStubVoid(au_sbilist_unlock, void)
23994+#define AuGFP_SBILIST GFP_NOFS
e49829fe
JR
23995+#endif
23996+
23997+/* ---------------------------------------------------------------------- */
23998+
1facf9fc 23999+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
24000+{
dece6358
AM
24001+ /*
24002+ * This function is a dynamic '__init' fucntion actually,
24003+ * so the tiny check for si_rwsem is unnecessary.
24004+ */
24005+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 24006+#ifdef CONFIG_DEBUG_FS
24007+ sbinfo->si_dbgaufs = NULL;
24008+ sbinfo->si_dbgaufs_xib = NULL;
24009+#ifdef CONFIG_AUFS_EXPORT
24010+ sbinfo->si_dbgaufs_xigen = NULL;
24011+#endif
24012+#endif
24013+}
24014+
24015+/* ---------------------------------------------------------------------- */
24016+
b752ccd1
AM
24017+static inline pid_t si_pid_bit(void)
24018+{
24019+ /* the origin of pid is 1, but the bitmap's is 0 */
24020+ return current->pid - 1;
24021+}
24022+
24023+static inline int si_pid_test(struct super_block *sb)
24024+{
24025+ pid_t bit = si_pid_bit();
24026+ if (bit < PID_MAX_DEFAULT)
24027+ return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
24028+ else
24029+ return si_pid_test_slow(sb);
24030+}
24031+
24032+static inline void si_pid_set(struct super_block *sb)
24033+{
24034+ pid_t bit = si_pid_bit();
24035+ if (bit < PID_MAX_DEFAULT) {
24036+ AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
24037+ set_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
24038+ /* smp_mb(); */
24039+ } else
24040+ si_pid_set_slow(sb);
24041+}
24042+
24043+static inline void si_pid_clr(struct super_block *sb)
24044+{
24045+ pid_t bit = si_pid_bit();
24046+ if (bit < PID_MAX_DEFAULT) {
24047+ AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
24048+ clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
24049+ /* smp_mb(); */
24050+ } else
24051+ si_pid_clr_slow(sb);
24052+}
24053+
24054+/* ---------------------------------------------------------------------- */
24055+
1facf9fc 24056+/* lock superblock. mainly for entry point functions */
24057+/*
b752ccd1
AM
24058+ * __si_read_lock, __si_write_lock,
24059+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
1facf9fc 24060+ */
b752ccd1 24061+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
1facf9fc 24062+
dece6358
AM
24063+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
24064+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
24065+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
24066+
b752ccd1
AM
24067+static inline void si_noflush_read_lock(struct super_block *sb)
24068+{
24069+ __si_read_lock(sb);
24070+ si_pid_set(sb);
24071+}
24072+
24073+static inline int si_noflush_read_trylock(struct super_block *sb)
24074+{
24075+ int locked = __si_read_trylock(sb);
24076+ if (locked)
24077+ si_pid_set(sb);
24078+ return locked;
24079+}
24080+
24081+static inline void si_noflush_write_lock(struct super_block *sb)
24082+{
24083+ __si_write_lock(sb);
24084+ si_pid_set(sb);
24085+}
24086+
24087+static inline int si_noflush_write_trylock(struct super_block *sb)
24088+{
24089+ int locked = __si_write_trylock(sb);
24090+ if (locked)
24091+ si_pid_set(sb);
24092+ return locked;
24093+}
24094+
e49829fe 24095+#if 0 /* unused */
1facf9fc 24096+static inline int si_read_trylock(struct super_block *sb, int flags)
24097+{
24098+ if (au_ftest_lock(flags, FLUSH))
24099+ au_nwt_flush(&au_sbi(sb)->si_nowait);
24100+ return si_noflush_read_trylock(sb);
24101+}
e49829fe 24102+#endif
1facf9fc 24103+
b752ccd1
AM
24104+static inline void si_read_unlock(struct super_block *sb)
24105+{
24106+ si_pid_clr(sb);
24107+ __si_read_unlock(sb);
24108+}
24109+
b752ccd1 24110+#if 0 /* unused */
1facf9fc 24111+static inline int si_write_trylock(struct super_block *sb, int flags)
24112+{
24113+ if (au_ftest_lock(flags, FLUSH))
24114+ au_nwt_flush(&au_sbi(sb)->si_nowait);
24115+ return si_noflush_write_trylock(sb);
24116+}
b752ccd1
AM
24117+#endif
24118+
24119+static inline void si_write_unlock(struct super_block *sb)
24120+{
24121+ si_pid_clr(sb);
24122+ __si_write_unlock(sb);
24123+}
24124+
24125+#if 0 /* unused */
24126+static inline void si_downgrade_lock(struct super_block *sb)
24127+{
24128+ __si_downgrade_lock(sb);
24129+}
24130+#endif
1facf9fc 24131+
24132+/* ---------------------------------------------------------------------- */
24133+
24134+static inline aufs_bindex_t au_sbend(struct super_block *sb)
24135+{
dece6358 24136+ SiMustAnyLock(sb);
1facf9fc 24137+ return au_sbi(sb)->si_bend;
24138+}
24139+
24140+static inline unsigned int au_mntflags(struct super_block *sb)
24141+{
dece6358 24142+ SiMustAnyLock(sb);
1facf9fc 24143+ return au_sbi(sb)->si_mntflags;
24144+}
24145+
24146+static inline unsigned int au_sigen(struct super_block *sb)
24147+{
dece6358 24148+ SiMustAnyLock(sb);
1facf9fc 24149+ return au_sbi(sb)->si_generation;
24150+}
24151+
7f207e10
AM
24152+static inline void au_ninodes_inc(struct super_block *sb)
24153+{
24154+ atomic_long_inc(&au_sbi(sb)->si_ninodes);
24155+}
24156+
24157+static inline void au_ninodes_dec(struct super_block *sb)
24158+{
24159+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
24160+ atomic_long_dec(&au_sbi(sb)->si_ninodes);
24161+}
24162+
24163+static inline void au_nfiles_inc(struct super_block *sb)
24164+{
24165+ atomic_long_inc(&au_sbi(sb)->si_nfiles);
24166+}
24167+
24168+static inline void au_nfiles_dec(struct super_block *sb)
24169+{
24170+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
24171+ atomic_long_dec(&au_sbi(sb)->si_nfiles);
24172+}
24173+
1facf9fc 24174+static inline struct au_branch *au_sbr(struct super_block *sb,
24175+ aufs_bindex_t bindex)
24176+{
dece6358 24177+ SiMustAnyLock(sb);
1facf9fc 24178+ return au_sbi(sb)->si_branch[0 + bindex];
24179+}
24180+
24181+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
24182+{
dece6358 24183+ SiMustWriteLock(sb);
1facf9fc 24184+ au_sbi(sb)->si_xino_brid = brid;
24185+}
24186+
24187+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
24188+{
dece6358 24189+ SiMustAnyLock(sb);
1facf9fc 24190+ return au_sbi(sb)->si_xino_brid;
24191+}
24192+
24193+#endif /* __KERNEL__ */
24194+#endif /* __AUFS_SUPER_H__ */
7f207e10
AM
24195diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
24196--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
53392da6 24197+++ linux/fs/aufs/sysaufs.c 2011-08-24 13:30:24.734646739 +0200
4a4d8108 24198@@ -0,0 +1,107 @@
1facf9fc 24199+/*
027c5e7a 24200+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24201+ *
24202+ * This program, aufs is free software; you can redistribute it and/or modify
24203+ * it under the terms of the GNU General Public License as published by
24204+ * the Free Software Foundation; either version 2 of the License, or
24205+ * (at your option) any later version.
dece6358
AM
24206+ *
24207+ * This program is distributed in the hope that it will be useful,
24208+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24209+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24210+ * GNU General Public License for more details.
24211+ *
24212+ * You should have received a copy of the GNU General Public License
24213+ * along with this program; if not, write to the Free Software
24214+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24215+ */
24216+
24217+/*
24218+ * sysfs interface and lifetime management
24219+ * they are necessary regardless sysfs is disabled.
24220+ */
24221+
24222+#include <linux/fs.h>
24223+#include <linux/random.h>
24224+#include <linux/sysfs.h>
24225+#include "aufs.h"
24226+
24227+unsigned long sysaufs_si_mask;
e49829fe 24228+struct kset *sysaufs_kset;
1facf9fc 24229+
24230+#define AuSiAttr(_name) { \
24231+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
24232+ .show = sysaufs_si_##_name, \
24233+}
24234+
24235+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
24236+struct attribute *sysaufs_si_attrs[] = {
24237+ &sysaufs_si_attr_xi_path.attr,
24238+ NULL,
24239+};
24240+
4a4d8108 24241+static const struct sysfs_ops au_sbi_ops = {
1facf9fc 24242+ .show = sysaufs_si_show
24243+};
24244+
24245+static struct kobj_type au_sbi_ktype = {
24246+ .release = au_si_free,
24247+ .sysfs_ops = &au_sbi_ops,
24248+ .default_attrs = sysaufs_si_attrs
24249+};
24250+
24251+/* ---------------------------------------------------------------------- */
24252+
24253+int sysaufs_si_init(struct au_sbinfo *sbinfo)
24254+{
24255+ int err;
24256+
e49829fe 24257+ sbinfo->si_kobj.kset = sysaufs_kset;
1facf9fc 24258+ /* cf. sysaufs_name() */
24259+ err = kobject_init_and_add
e49829fe 24260+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
1facf9fc 24261+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
24262+
24263+ dbgaufs_si_null(sbinfo);
24264+ if (!err) {
24265+ err = dbgaufs_si_init(sbinfo);
24266+ if (unlikely(err))
24267+ kobject_put(&sbinfo->si_kobj);
24268+ }
24269+ return err;
24270+}
24271+
24272+void sysaufs_fin(void)
24273+{
24274+ dbgaufs_fin();
e49829fe
JR
24275+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
24276+ kset_unregister(sysaufs_kset);
1facf9fc 24277+}
24278+
24279+int __init sysaufs_init(void)
24280+{
24281+ int err;
24282+
24283+ do {
24284+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
24285+ } while (!sysaufs_si_mask);
24286+
4a4d8108 24287+ err = -EINVAL;
e49829fe
JR
24288+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
24289+ if (unlikely(!sysaufs_kset))
4a4d8108 24290+ goto out;
e49829fe
JR
24291+ err = PTR_ERR(sysaufs_kset);
24292+ if (IS_ERR(sysaufs_kset))
1facf9fc 24293+ goto out;
e49829fe 24294+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
1facf9fc 24295+ if (unlikely(err)) {
e49829fe 24296+ kset_unregister(sysaufs_kset);
1facf9fc 24297+ goto out;
24298+ }
24299+
24300+ err = dbgaufs_init();
24301+ if (unlikely(err))
24302+ sysaufs_fin();
4f0767ce 24303+out:
1facf9fc 24304+ return err;
24305+}
7f207e10
AM
24306diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
24307--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
53392da6 24308+++ linux/fs/aufs/sysaufs.h 2011-08-24 13:30:24.734646739 +0200
4a4d8108 24309@@ -0,0 +1,105 @@
1facf9fc 24310+/*
027c5e7a 24311+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24312+ *
24313+ * This program, aufs is free software; you can redistribute it and/or modify
24314+ * it under the terms of the GNU General Public License as published by
24315+ * the Free Software Foundation; either version 2 of the License, or
24316+ * (at your option) any later version.
dece6358
AM
24317+ *
24318+ * This program is distributed in the hope that it will be useful,
24319+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24320+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24321+ * GNU General Public License for more details.
24322+ *
24323+ * You should have received a copy of the GNU General Public License
24324+ * along with this program; if not, write to the Free Software
24325+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24326+ */
24327+
24328+/*
24329+ * sysfs interface and mount lifetime management
24330+ */
24331+
24332+#ifndef __SYSAUFS_H__
24333+#define __SYSAUFS_H__
24334+
24335+#ifdef __KERNEL__
24336+
1facf9fc 24337+#include <linux/sysfs.h>
24338+#include <linux/aufs_type.h>
24339+#include "module.h"
24340+
dece6358
AM
24341+struct super_block;
24342+struct au_sbinfo;
24343+
1facf9fc 24344+struct sysaufs_si_attr {
24345+ struct attribute attr;
24346+ int (*show)(struct seq_file *seq, struct super_block *sb);
24347+};
24348+
24349+/* ---------------------------------------------------------------------- */
24350+
24351+/* sysaufs.c */
24352+extern unsigned long sysaufs_si_mask;
e49829fe 24353+extern struct kset *sysaufs_kset;
1facf9fc 24354+extern struct attribute *sysaufs_si_attrs[];
24355+int sysaufs_si_init(struct au_sbinfo *sbinfo);
24356+int __init sysaufs_init(void);
24357+void sysaufs_fin(void);
24358+
24359+/* ---------------------------------------------------------------------- */
24360+
24361+/* some people doesn't like to show a pointer in kernel */
24362+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
24363+{
24364+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
24365+}
24366+
24367+#define SysaufsSiNamePrefix "si_"
24368+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
24369+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
24370+{
24371+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
24372+ sysaufs_si_id(sbinfo));
24373+}
24374+
24375+struct au_branch;
24376+#ifdef CONFIG_SYSFS
24377+/* sysfs.c */
24378+extern struct attribute_group *sysaufs_attr_group;
24379+
24380+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
24381+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
24382+ char *buf);
24383+
24384+void sysaufs_br_init(struct au_branch *br);
24385+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
24386+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
24387+
24388+#define sysaufs_brs_init() do {} while (0)
24389+
24390+#else
24391+#define sysaufs_attr_group NULL
24392+
4a4d8108 24393+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
1facf9fc 24394+
24395+static inline
24396+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
24397+ char *buf)
24398+{
24399+ return 0;
24400+}
24401+
4a4d8108
AM
24402+AuStubVoid(sysaufs_br_init, struct au_branch *br)
24403+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
24404+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
1facf9fc 24405+
24406+static inline void sysaufs_brs_init(void)
24407+{
24408+ sysaufs_brs = 0;
24409+}
24410+
24411+#endif /* CONFIG_SYSFS */
24412+
24413+#endif /* __KERNEL__ */
24414+#endif /* __SYSAUFS_H__ */
7f207e10
AM
24415diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
24416--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
24417+++ linux/fs/aufs/sysfs.c 2011-10-24 20:51:51.583800333 +0200
24418@@ -0,0 +1,260 @@
1facf9fc 24419+/*
027c5e7a 24420+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24421+ *
24422+ * This program, aufs is free software; you can redistribute it and/or modify
24423+ * it under the terms of the GNU General Public License as published by
24424+ * the Free Software Foundation; either version 2 of the License, or
24425+ * (at your option) any later version.
dece6358
AM
24426+ *
24427+ * This program is distributed in the hope that it will be useful,
24428+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24429+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24430+ * GNU General Public License for more details.
24431+ *
24432+ * You should have received a copy of the GNU General Public License
24433+ * along with this program; if not, write to the Free Software
24434+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24435+ */
24436+
24437+/*
24438+ * sysfs interface
24439+ */
24440+
24441+#include <linux/fs.h>
dece6358 24442+#include <linux/module.h>
1facf9fc 24443+#include <linux/seq_file.h>
24444+#include <linux/sysfs.h>
24445+#include "aufs.h"
24446+
4a4d8108
AM
24447+#ifdef CONFIG_AUFS_FS_MODULE
24448+/* this entry violates the "one line per file" policy of sysfs */
24449+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
24450+ char *buf)
24451+{
24452+ ssize_t err;
24453+ static char *conf =
24454+/* this file is generated at compiling */
24455+#include "conf.str"
24456+ ;
24457+
24458+ err = snprintf(buf, PAGE_SIZE, conf);
24459+ if (unlikely(err >= PAGE_SIZE))
24460+ err = -EFBIG;
24461+ return err;
24462+}
24463+
24464+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
24465+#endif
24466+
1facf9fc 24467+static struct attribute *au_attr[] = {
4a4d8108
AM
24468+#ifdef CONFIG_AUFS_FS_MODULE
24469+ &au_config_attr.attr,
24470+#endif
1facf9fc 24471+ NULL, /* need to NULL terminate the list of attributes */
24472+};
24473+
24474+static struct attribute_group sysaufs_attr_group_body = {
24475+ .attrs = au_attr
24476+};
24477+
24478+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
24479+
24480+/* ---------------------------------------------------------------------- */
24481+
24482+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
24483+{
24484+ int err;
24485+
dece6358
AM
24486+ SiMustAnyLock(sb);
24487+
1facf9fc 24488+ err = 0;
24489+ if (au_opt_test(au_mntflags(sb), XINO)) {
24490+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
24491+ seq_putc(seq, '\n');
24492+ }
24493+ return err;
24494+}
24495+
24496+/*
24497+ * the lifetime of branch is independent from the entry under sysfs.
24498+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
24499+ * unlinked.
24500+ */
24501+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
24502+ aufs_bindex_t bindex)
24503+{
1e00d052 24504+ int err;
1facf9fc 24505+ struct path path;
24506+ struct dentry *root;
24507+ struct au_branch *br;
1e00d052 24508+ char *perm;
1facf9fc 24509+
24510+ AuDbg("b%d\n", bindex);
24511+
1e00d052 24512+ err = 0;
1facf9fc 24513+ root = sb->s_root;
24514+ di_read_lock_parent(root, !AuLock_IR);
24515+ br = au_sbr(sb, bindex);
24516+ path.mnt = br->br_mnt;
24517+ path.dentry = au_h_dptr(root, bindex);
24518+ au_seq_path(seq, &path);
24519+ di_read_unlock(root, !AuLock_IR);
1e00d052
AM
24520+ perm = au_optstr_br_perm(br->br_perm);
24521+ if (perm) {
24522+ err = seq_printf(seq, "=%s\n", perm);
24523+ kfree(perm);
24524+ if (err == -1)
24525+ err = -E2BIG;
24526+ } else
24527+ err = -ENOMEM;
24528+ return err;
1facf9fc 24529+}
24530+
24531+/* ---------------------------------------------------------------------- */
24532+
24533+static struct seq_file *au_seq(char *p, ssize_t len)
24534+{
24535+ struct seq_file *seq;
24536+
24537+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
24538+ if (seq) {
24539+ /* mutex_init(&seq.lock); */
24540+ seq->buf = p;
24541+ seq->size = len;
24542+ return seq; /* success */
24543+ }
24544+
24545+ seq = ERR_PTR(-ENOMEM);
24546+ return seq;
24547+}
24548+
24549+#define SysaufsBr_PREFIX "br"
24550+
24551+/* todo: file size may exceed PAGE_SIZE */
24552+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 24553+ char *buf)
1facf9fc 24554+{
24555+ ssize_t err;
24556+ long l;
24557+ aufs_bindex_t bend;
24558+ struct au_sbinfo *sbinfo;
24559+ struct super_block *sb;
24560+ struct seq_file *seq;
24561+ char *name;
24562+ struct attribute **cattr;
24563+
24564+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
24565+ sb = sbinfo->si_sb;
1308ab2a 24566+
24567+ /*
24568+ * prevent a race condition between sysfs and aufs.
24569+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
24570+ * prohibits maintaining the sysfs entries.
24571+ * hew we acquire read lock after sysfs_get_active_two().
24572+ * on the other hand, the remount process may maintain the sysfs/aufs
24573+ * entries after acquiring write lock.
24574+ * it can cause a deadlock.
24575+ * simply we gave up processing read here.
24576+ */
24577+ err = -EBUSY;
24578+ if (unlikely(!si_noflush_read_trylock(sb)))
24579+ goto out;
1facf9fc 24580+
24581+ seq = au_seq(buf, PAGE_SIZE);
24582+ err = PTR_ERR(seq);
24583+ if (IS_ERR(seq))
1308ab2a 24584+ goto out_unlock;
1facf9fc 24585+
24586+ name = (void *)attr->name;
24587+ cattr = sysaufs_si_attrs;
24588+ while (*cattr) {
24589+ if (!strcmp(name, (*cattr)->name)) {
24590+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
24591+ ->show(seq, sb);
24592+ goto out_seq;
24593+ }
24594+ cattr++;
24595+ }
24596+
24597+ bend = au_sbend(sb);
24598+ if (!strncmp(name, SysaufsBr_PREFIX, sizeof(SysaufsBr_PREFIX) - 1)) {
24599+ name += sizeof(SysaufsBr_PREFIX) - 1;
24600+ err = strict_strtol(name, 10, &l);
24601+ if (!err) {
24602+ if (l <= bend)
24603+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l);
24604+ else
24605+ err = -ENOENT;
24606+ }
24607+ goto out_seq;
24608+ }
24609+ BUG();
24610+
4f0767ce 24611+out_seq:
1facf9fc 24612+ if (!err) {
24613+ err = seq->count;
24614+ /* sysfs limit */
24615+ if (unlikely(err == PAGE_SIZE))
24616+ err = -EFBIG;
24617+ }
24618+ kfree(seq);
4f0767ce 24619+out_unlock:
1facf9fc 24620+ si_read_unlock(sb);
4f0767ce 24621+out:
1facf9fc 24622+ return err;
24623+}
24624+
24625+/* ---------------------------------------------------------------------- */
24626+
24627+void sysaufs_br_init(struct au_branch *br)
24628+{
4a4d8108
AM
24629+ struct attribute *attr = &br->br_attr;
24630+
24631+ sysfs_attr_init(attr);
24632+ attr->name = br->br_name;
24633+ attr->mode = S_IRUGO;
1facf9fc 24634+}
24635+
24636+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
24637+{
24638+ struct au_branch *br;
24639+ struct kobject *kobj;
24640+ aufs_bindex_t bend;
24641+
24642+ dbgaufs_brs_del(sb, bindex);
24643+
24644+ if (!sysaufs_brs)
24645+ return;
24646+
24647+ kobj = &au_sbi(sb)->si_kobj;
24648+ bend = au_sbend(sb);
24649+ for (; bindex <= bend; bindex++) {
24650+ br = au_sbr(sb, bindex);
24651+ sysfs_remove_file(kobj, &br->br_attr);
24652+ }
24653+}
24654+
24655+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
24656+{
24657+ int err;
24658+ aufs_bindex_t bend;
24659+ struct kobject *kobj;
24660+ struct au_branch *br;
24661+
24662+ dbgaufs_brs_add(sb, bindex);
24663+
24664+ if (!sysaufs_brs)
24665+ return;
24666+
24667+ kobj = &au_sbi(sb)->si_kobj;
24668+ bend = au_sbend(sb);
24669+ for (; bindex <= bend; bindex++) {
24670+ br = au_sbr(sb, bindex);
24671+ snprintf(br->br_name, sizeof(br->br_name), SysaufsBr_PREFIX
24672+ "%d", bindex);
24673+ err = sysfs_create_file(kobj, &br->br_attr);
24674+ if (unlikely(err))
4a4d8108
AM
24675+ pr_warning("failed %s under sysfs(%d)\n",
24676+ br->br_name, err);
1facf9fc 24677+ }
24678+}
7f207e10
AM
24679diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
24680--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
53392da6 24681+++ linux/fs/aufs/sysrq.c 2011-08-24 13:30:24.734646739 +0200
2cbb1c4b 24682@@ -0,0 +1,151 @@
1facf9fc 24683+/*
027c5e7a 24684+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24685+ *
24686+ * This program, aufs is free software; you can redistribute it and/or modify
24687+ * it under the terms of the GNU General Public License as published by
24688+ * the Free Software Foundation; either version 2 of the License, or
24689+ * (at your option) any later version.
dece6358
AM
24690+ *
24691+ * This program is distributed in the hope that it will be useful,
24692+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24693+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24694+ * GNU General Public License for more details.
24695+ *
24696+ * You should have received a copy of the GNU General Public License
24697+ * along with this program; if not, write to the Free Software
24698+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24699+ */
24700+
24701+/*
24702+ * magic sysrq hanlder
24703+ */
24704+
24705+#include <linux/fs.h>
24706+#include <linux/module.h>
24707+#include <linux/moduleparam.h>
24708+/* #include <linux/sysrq.h> */
027c5e7a 24709+#include <linux/writeback.h>
1facf9fc 24710+#include "aufs.h"
24711+
24712+/* ---------------------------------------------------------------------- */
24713+
24714+static void sysrq_sb(struct super_block *sb)
24715+{
24716+ char *plevel;
24717+ struct au_sbinfo *sbinfo;
24718+ struct file *file;
24719+
24720+ plevel = au_plevel;
24721+ au_plevel = KERN_WARNING;
1facf9fc 24722+
24723+ sbinfo = au_sbi(sb);
4a4d8108
AM
24724+ /* since we define pr_fmt, call printk directly */
24725+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
24726+ printk(KERN_WARNING AUFS_NAME ": superblock\n");
1facf9fc 24727+ au_dpri_sb(sb);
027c5e7a
AM
24728+
24729+#if 0
4a4d8108 24730+ printk(KERN_WARNING AUFS_NAME ": root dentry\n");
1facf9fc 24731+ au_dpri_dentry(sb->s_root);
4a4d8108 24732+ printk(KERN_WARNING AUFS_NAME ": root inode\n");
1facf9fc 24733+ au_dpri_inode(sb->s_root->d_inode);
027c5e7a
AM
24734+#endif
24735+
1facf9fc 24736+#if 0
027c5e7a
AM
24737+ do {
24738+ int err, i, j, ndentry;
24739+ struct au_dcsub_pages dpages;
24740+ struct au_dpage *dpage;
24741+
24742+ err = au_dpages_init(&dpages, GFP_ATOMIC);
24743+ if (unlikely(err))
24744+ break;
24745+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
24746+ if (!err)
24747+ for (i = 0; i < dpages.ndpage; i++) {
24748+ dpage = dpages.dpages + i;
24749+ ndentry = dpage->ndentry;
24750+ for (j = 0; j < ndentry; j++)
24751+ au_dpri_dentry(dpage->dentries[j]);
24752+ }
24753+ au_dpages_free(&dpages);
24754+ } while (0);
24755+#endif
24756+
24757+#if 1
24758+ {
24759+ struct inode *i;
24760+ printk(KERN_WARNING AUFS_NAME ": isolated inode\n");
2cbb1c4b
JR
24761+ spin_lock(&inode_sb_list_lock);
24762+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
24763+ spin_lock(&i->i_lock);
027c5e7a
AM
24764+ if (1 || list_empty(&i->i_dentry))
24765+ au_dpri_inode(i);
2cbb1c4b
JR
24766+ spin_unlock(&i->i_lock);
24767+ }
24768+ spin_unlock(&inode_sb_list_lock);
027c5e7a 24769+ }
1facf9fc 24770+#endif
4a4d8108 24771+ printk(KERN_WARNING AUFS_NAME ": files\n");
0c5527e5
AM
24772+ lg_global_lock(files_lglock);
24773+ do_file_list_for_each_entry(sb, file) {
4a4d8108
AM
24774+ umode_t mode;
24775+ mode = file->f_dentry->d_inode->i_mode;
24776+ if (!special_file(mode) || au_special_file(mode))
1facf9fc 24777+ au_dpri_file(file);
0c5527e5
AM
24778+ } while_file_list_for_each_entry;
24779+ lg_global_unlock(files_lglock);
e49829fe 24780+ printk(KERN_WARNING AUFS_NAME ": done\n");
1facf9fc 24781+
24782+ au_plevel = plevel;
1facf9fc 24783+}
24784+
24785+/* ---------------------------------------------------------------------- */
24786+
24787+/* module parameter */
24788+static char *aufs_sysrq_key = "a";
24789+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
24790+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
24791+
0c5527e5 24792+static void au_sysrq(int key __maybe_unused)
1facf9fc 24793+{
1facf9fc 24794+ struct au_sbinfo *sbinfo;
24795+
027c5e7a 24796+ lockdep_off();
53392da6 24797+ au_sbilist_lock();
e49829fe 24798+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
1facf9fc 24799+ sysrq_sb(sbinfo->si_sb);
53392da6 24800+ au_sbilist_unlock();
027c5e7a 24801+ lockdep_on();
1facf9fc 24802+}
24803+
24804+static struct sysrq_key_op au_sysrq_op = {
24805+ .handler = au_sysrq,
24806+ .help_msg = "Aufs",
24807+ .action_msg = "Aufs",
24808+ .enable_mask = SYSRQ_ENABLE_DUMP
24809+};
24810+
24811+/* ---------------------------------------------------------------------- */
24812+
24813+int __init au_sysrq_init(void)
24814+{
24815+ int err;
24816+ char key;
24817+
24818+ err = -1;
24819+ key = *aufs_sysrq_key;
24820+ if ('a' <= key && key <= 'z')
24821+ err = register_sysrq_key(key, &au_sysrq_op);
24822+ if (unlikely(err))
4a4d8108 24823+ pr_err("err %d, sysrq=%c\n", err, key);
1facf9fc 24824+ return err;
24825+}
24826+
24827+void au_sysrq_fin(void)
24828+{
24829+ int err;
24830+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
24831+ if (unlikely(err))
4a4d8108 24832+ pr_err("err %d (ignored)\n", err);
1facf9fc 24833+}
7f207e10
AM
24834diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
24835--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
53392da6 24836+++ linux/fs/aufs/vdir.c 2011-08-24 13:30:24.734646739 +0200
7f207e10 24837@@ -0,0 +1,886 @@
1facf9fc 24838+/*
027c5e7a 24839+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24840+ *
24841+ * This program, aufs is free software; you can redistribute it and/or modify
24842+ * it under the terms of the GNU General Public License as published by
24843+ * the Free Software Foundation; either version 2 of the License, or
24844+ * (at your option) any later version.
dece6358
AM
24845+ *
24846+ * This program is distributed in the hope that it will be useful,
24847+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24848+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24849+ * GNU General Public License for more details.
24850+ *
24851+ * You should have received a copy of the GNU General Public License
24852+ * along with this program; if not, write to the Free Software
24853+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24854+ */
24855+
24856+/*
24857+ * virtual or vertical directory
24858+ */
24859+
dece6358 24860+#include <linux/hash.h>
1facf9fc 24861+#include "aufs.h"
24862+
dece6358 24863+static unsigned int calc_size(int nlen)
1facf9fc 24864+{
dece6358 24865+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 24866+}
24867+
24868+static int set_deblk_end(union au_vdir_deblk_p *p,
24869+ union au_vdir_deblk_p *deblk_end)
24870+{
24871+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
24872+ p->de->de_str.len = 0;
24873+ /* smp_mb(); */
24874+ return 0;
24875+ }
24876+ return -1; /* error */
24877+}
24878+
24879+/* returns true or false */
24880+static int is_deblk_end(union au_vdir_deblk_p *p,
24881+ union au_vdir_deblk_p *deblk_end)
24882+{
24883+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
24884+ return !p->de->de_str.len;
24885+ return 1;
24886+}
24887+
24888+static unsigned char *last_deblk(struct au_vdir *vdir)
24889+{
24890+ return vdir->vd_deblk[vdir->vd_nblk - 1];
24891+}
24892+
24893+/* ---------------------------------------------------------------------- */
24894+
1308ab2a 24895+/* estimate the apropriate size for name hash table */
24896+unsigned int au_rdhash_est(loff_t sz)
24897+{
24898+ unsigned int n;
24899+
24900+ n = UINT_MAX;
24901+ sz >>= 10;
24902+ if (sz < n)
24903+ n = sz;
24904+ if (sz < AUFS_RDHASH_DEF)
24905+ n = AUFS_RDHASH_DEF;
4a4d8108 24906+ /* pr_info("n %u\n", n); */
1308ab2a 24907+ return n;
24908+}
24909+
1facf9fc 24910+/*
24911+ * the allocated memory has to be freed by
dece6358 24912+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 24913+ */
dece6358 24914+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 24915+{
1facf9fc 24916+ struct hlist_head *head;
dece6358 24917+ unsigned int u;
1facf9fc 24918+
dece6358
AM
24919+ head = kmalloc(sizeof(*nhash->nh_head) * num_hash, gfp);
24920+ if (head) {
24921+ nhash->nh_num = num_hash;
24922+ nhash->nh_head = head;
24923+ for (u = 0; u < num_hash; u++)
1facf9fc 24924+ INIT_HLIST_HEAD(head++);
dece6358 24925+ return 0; /* success */
1facf9fc 24926+ }
1facf9fc 24927+
dece6358 24928+ return -ENOMEM;
1facf9fc 24929+}
24930+
dece6358
AM
24931+static void nhash_count(struct hlist_head *head)
24932+{
24933+#if 0
24934+ unsigned long n;
24935+ struct hlist_node *pos;
24936+
24937+ n = 0;
24938+ hlist_for_each(pos, head)
24939+ n++;
4a4d8108 24940+ pr_info("%lu\n", n);
dece6358
AM
24941+#endif
24942+}
24943+
24944+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 24945+{
1facf9fc 24946+ struct au_vdir_wh *tpos;
24947+ struct hlist_node *pos, *node;
24948+
dece6358
AM
24949+ hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) {
24950+ /* hlist_del(pos); */
24951+ kfree(tpos);
1facf9fc 24952+ }
24953+}
24954+
dece6358 24955+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 24956+{
dece6358
AM
24957+ struct au_vdir_dehstr *tpos;
24958+ struct hlist_node *pos, *node;
1facf9fc 24959+
dece6358
AM
24960+ hlist_for_each_entry_safe(tpos, pos, node, head, hash) {
24961+ /* hlist_del(pos); */
4a4d8108 24962+ au_cache_free_vdir_dehstr(tpos);
1facf9fc 24963+ }
1facf9fc 24964+}
24965+
dece6358
AM
24966+static void au_nhash_do_free(struct au_nhash *nhash,
24967+ void (*free)(struct hlist_head *head))
1facf9fc 24968+{
1308ab2a 24969+ unsigned int n;
1facf9fc 24970+ struct hlist_head *head;
1facf9fc 24971+
dece6358 24972+ n = nhash->nh_num;
1308ab2a 24973+ if (!n)
24974+ return;
24975+
dece6358 24976+ head = nhash->nh_head;
1308ab2a 24977+ while (n-- > 0) {
dece6358
AM
24978+ nhash_count(head);
24979+ free(head++);
1facf9fc 24980+ }
dece6358 24981+ kfree(nhash->nh_head);
1facf9fc 24982+}
24983+
dece6358 24984+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 24985+{
dece6358
AM
24986+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
24987+}
1facf9fc 24988+
dece6358
AM
24989+static void au_nhash_de_free(struct au_nhash *delist)
24990+{
24991+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 24992+}
24993+
24994+/* ---------------------------------------------------------------------- */
24995+
24996+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
24997+ int limit)
24998+{
24999+ int num;
25000+ unsigned int u, n;
25001+ struct hlist_head *head;
25002+ struct au_vdir_wh *tpos;
25003+ struct hlist_node *pos;
25004+
25005+ num = 0;
25006+ n = whlist->nh_num;
25007+ head = whlist->nh_head;
1308ab2a 25008+ for (u = 0; u < n; u++, head++)
1facf9fc 25009+ hlist_for_each_entry(tpos, pos, head, wh_hash)
25010+ if (tpos->wh_bindex == btgt && ++num > limit)
25011+ return 1;
1facf9fc 25012+ return 0;
25013+}
25014+
25015+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 25016+ unsigned char *name,
1facf9fc 25017+ unsigned int len)
25018+{
dece6358
AM
25019+ unsigned int v;
25020+ /* const unsigned int magic_bit = 12; */
25021+
1308ab2a 25022+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
25023+
dece6358
AM
25024+ v = 0;
25025+ while (len--)
25026+ v += *name++;
25027+ /* v = hash_long(v, magic_bit); */
25028+ v %= nhash->nh_num;
25029+ return nhash->nh_head + v;
25030+}
25031+
25032+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
25033+ int nlen)
25034+{
25035+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 25036+}
25037+
25038+/* returns found or not */
dece6358 25039+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 25040+{
25041+ struct hlist_head *head;
25042+ struct au_vdir_wh *tpos;
25043+ struct hlist_node *pos;
25044+ struct au_vdir_destr *str;
25045+
dece6358 25046+ head = au_name_hash(whlist, name, nlen);
1facf9fc 25047+ hlist_for_each_entry(tpos, pos, head, wh_hash) {
25048+ str = &tpos->wh_str;
25049+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
25050+ if (au_nhash_test_name(str, name, nlen))
25051+ return 1;
25052+ }
25053+ return 0;
25054+}
25055+
25056+/* returns found(true) or not */
25057+static int test_known(struct au_nhash *delist, char *name, int nlen)
25058+{
25059+ struct hlist_head *head;
25060+ struct au_vdir_dehstr *tpos;
25061+ struct hlist_node *pos;
25062+ struct au_vdir_destr *str;
25063+
25064+ head = au_name_hash(delist, name, nlen);
25065+ hlist_for_each_entry(tpos, pos, head, hash) {
25066+ str = tpos->str;
25067+ AuDbg("%.*s\n", str->len, str->name);
25068+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 25069+ return 1;
25070+ }
25071+ return 0;
25072+}
25073+
dece6358
AM
25074+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
25075+ unsigned char d_type)
25076+{
25077+#ifdef CONFIG_AUFS_SHWH
25078+ wh->wh_ino = ino;
25079+ wh->wh_type = d_type;
25080+#endif
25081+}
25082+
25083+/* ---------------------------------------------------------------------- */
25084+
25085+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
25086+ unsigned int d_type, aufs_bindex_t bindex,
25087+ unsigned char shwh)
1facf9fc 25088+{
25089+ int err;
25090+ struct au_vdir_destr *str;
25091+ struct au_vdir_wh *wh;
25092+
dece6358 25093+ AuDbg("%.*s\n", nlen, name);
1308ab2a 25094+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
25095+
1facf9fc 25096+ err = -ENOMEM;
dece6358 25097+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 25098+ if (unlikely(!wh))
25099+ goto out;
25100+
25101+ err = 0;
25102+ wh->wh_bindex = bindex;
dece6358
AM
25103+ if (shwh)
25104+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 25105+ str = &wh->wh_str;
dece6358
AM
25106+ str->len = nlen;
25107+ memcpy(str->name, name, nlen);
25108+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 25109+ /* smp_mb(); */
25110+
4f0767ce 25111+out:
1facf9fc 25112+ return err;
25113+}
25114+
1facf9fc 25115+static int append_deblk(struct au_vdir *vdir)
25116+{
25117+ int err;
dece6358 25118+ unsigned long ul;
1facf9fc 25119+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
25120+ union au_vdir_deblk_p p, deblk_end;
25121+ unsigned char **o;
25122+
25123+ err = -ENOMEM;
dece6358
AM
25124+ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
25125+ GFP_NOFS);
1facf9fc 25126+ if (unlikely(!o))
25127+ goto out;
25128+
25129+ vdir->vd_deblk = o;
25130+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
25131+ if (p.deblk) {
25132+ ul = vdir->vd_nblk++;
25133+ vdir->vd_deblk[ul] = p.deblk;
25134+ vdir->vd_last.ul = ul;
25135+ vdir->vd_last.p.deblk = p.deblk;
25136+ deblk_end.deblk = p.deblk + deblk_sz;
25137+ err = set_deblk_end(&p, &deblk_end);
25138+ }
25139+
4f0767ce 25140+out:
1facf9fc 25141+ return err;
25142+}
25143+
dece6358
AM
25144+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
25145+ unsigned int d_type, struct au_nhash *delist)
25146+{
25147+ int err;
25148+ unsigned int sz;
25149+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
25150+ union au_vdir_deblk_p p, *room, deblk_end;
25151+ struct au_vdir_dehstr *dehstr;
25152+
25153+ p.deblk = last_deblk(vdir);
25154+ deblk_end.deblk = p.deblk + deblk_sz;
25155+ room = &vdir->vd_last.p;
25156+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
25157+ || !is_deblk_end(room, &deblk_end));
25158+
25159+ sz = calc_size(nlen);
25160+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
25161+ err = append_deblk(vdir);
25162+ if (unlikely(err))
25163+ goto out;
25164+
25165+ p.deblk = last_deblk(vdir);
25166+ deblk_end.deblk = p.deblk + deblk_sz;
25167+ /* smp_mb(); */
25168+ AuDebugOn(room->deblk != p.deblk);
25169+ }
25170+
25171+ err = -ENOMEM;
4a4d8108 25172+ dehstr = au_cache_alloc_vdir_dehstr();
dece6358
AM
25173+ if (unlikely(!dehstr))
25174+ goto out;
25175+
25176+ dehstr->str = &room->de->de_str;
25177+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
25178+ room->de->de_ino = ino;
25179+ room->de->de_type = d_type;
25180+ room->de->de_str.len = nlen;
25181+ memcpy(room->de->de_str.name, name, nlen);
25182+
25183+ err = 0;
25184+ room->deblk += sz;
25185+ if (unlikely(set_deblk_end(room, &deblk_end)))
25186+ err = append_deblk(vdir);
25187+ /* smp_mb(); */
25188+
4f0767ce 25189+out:
dece6358
AM
25190+ return err;
25191+}
25192+
25193+/* ---------------------------------------------------------------------- */
25194+
25195+void au_vdir_free(struct au_vdir *vdir)
25196+{
25197+ unsigned char **deblk;
25198+
25199+ deblk = vdir->vd_deblk;
25200+ while (vdir->vd_nblk--)
25201+ kfree(*deblk++);
25202+ kfree(vdir->vd_deblk);
25203+ au_cache_free_vdir(vdir);
25204+}
25205+
1308ab2a 25206+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 25207+{
25208+ struct au_vdir *vdir;
1308ab2a 25209+ struct super_block *sb;
1facf9fc 25210+ int err;
25211+
1308ab2a 25212+ sb = file->f_dentry->d_sb;
dece6358
AM
25213+ SiMustAnyLock(sb);
25214+
1facf9fc 25215+ err = -ENOMEM;
25216+ vdir = au_cache_alloc_vdir();
25217+ if (unlikely(!vdir))
25218+ goto out;
25219+
25220+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
25221+ if (unlikely(!vdir->vd_deblk))
25222+ goto out_free;
25223+
25224+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 25225+ if (!vdir->vd_deblk_sz) {
25226+ /* estimate the apropriate size for deblk */
25227+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
4a4d8108 25228+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
1308ab2a 25229+ }
1facf9fc 25230+ vdir->vd_nblk = 0;
25231+ vdir->vd_version = 0;
25232+ vdir->vd_jiffy = 0;
25233+ err = append_deblk(vdir);
25234+ if (!err)
25235+ return vdir; /* success */
25236+
25237+ kfree(vdir->vd_deblk);
25238+
4f0767ce 25239+out_free:
1facf9fc 25240+ au_cache_free_vdir(vdir);
4f0767ce 25241+out:
1facf9fc 25242+ vdir = ERR_PTR(err);
25243+ return vdir;
25244+}
25245+
25246+static int reinit_vdir(struct au_vdir *vdir)
25247+{
25248+ int err;
25249+ union au_vdir_deblk_p p, deblk_end;
25250+
25251+ while (vdir->vd_nblk > 1) {
25252+ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
25253+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
25254+ vdir->vd_nblk--;
25255+ }
25256+ p.deblk = vdir->vd_deblk[0];
25257+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
25258+ err = set_deblk_end(&p, &deblk_end);
25259+ /* keep vd_dblk_sz */
25260+ vdir->vd_last.ul = 0;
25261+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
25262+ vdir->vd_version = 0;
25263+ vdir->vd_jiffy = 0;
25264+ /* smp_mb(); */
25265+ return err;
25266+}
25267+
25268+/* ---------------------------------------------------------------------- */
25269+
1facf9fc 25270+#define AuFillVdir_CALLED 1
25271+#define AuFillVdir_WHABLE (1 << 1)
dece6358 25272+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 25273+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
7f207e10
AM
25274+#define au_fset_fillvdir(flags, name) \
25275+ do { (flags) |= AuFillVdir_##name; } while (0)
25276+#define au_fclr_fillvdir(flags, name) \
25277+ do { (flags) &= ~AuFillVdir_##name; } while (0)
1facf9fc 25278+
dece6358
AM
25279+#ifndef CONFIG_AUFS_SHWH
25280+#undef AuFillVdir_SHWH
25281+#define AuFillVdir_SHWH 0
25282+#endif
25283+
1facf9fc 25284+struct fillvdir_arg {
25285+ struct file *file;
25286+ struct au_vdir *vdir;
dece6358
AM
25287+ struct au_nhash delist;
25288+ struct au_nhash whlist;
1facf9fc 25289+ aufs_bindex_t bindex;
25290+ unsigned int flags;
25291+ int err;
25292+};
25293+
dece6358 25294+static int fillvdir(void *__arg, const char *__name, int nlen,
1facf9fc 25295+ loff_t offset __maybe_unused, u64 h_ino,
25296+ unsigned int d_type)
25297+{
25298+ struct fillvdir_arg *arg = __arg;
25299+ char *name = (void *)__name;
25300+ struct super_block *sb;
1facf9fc 25301+ ino_t ino;
dece6358 25302+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 25303+
1facf9fc 25304+ arg->err = 0;
dece6358 25305+ sb = arg->file->f_dentry->d_sb;
1facf9fc 25306+ au_fset_fillvdir(arg->flags, CALLED);
25307+ /* smp_mb(); */
dece6358 25308+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 25309+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
25310+ if (test_known(&arg->delist, name, nlen)
25311+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
25312+ goto out; /* already exists or whiteouted */
1facf9fc 25313+
25314+ sb = arg->file->f_dentry->d_sb;
dece6358 25315+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
4a4d8108
AM
25316+ if (!arg->err) {
25317+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
25318+ d_type = DT_UNKNOWN;
dece6358
AM
25319+ arg->err = append_de(arg->vdir, name, nlen, ino,
25320+ d_type, &arg->delist);
4a4d8108 25321+ }
1facf9fc 25322+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
25323+ name += AUFS_WH_PFX_LEN;
dece6358
AM
25324+ nlen -= AUFS_WH_PFX_LEN;
25325+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
25326+ goto out; /* already whiteouted */
1facf9fc 25327+
dece6358
AM
25328+ if (shwh)
25329+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
25330+ &ino);
4a4d8108
AM
25331+ if (!arg->err) {
25332+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
25333+ d_type = DT_UNKNOWN;
1facf9fc 25334+ arg->err = au_nhash_append_wh
dece6358
AM
25335+ (&arg->whlist, name, nlen, ino, d_type,
25336+ arg->bindex, shwh);
4a4d8108 25337+ }
1facf9fc 25338+ }
25339+
4f0767ce 25340+out:
1facf9fc 25341+ if (!arg->err)
25342+ arg->vdir->vd_jiffy = jiffies;
25343+ /* smp_mb(); */
25344+ AuTraceErr(arg->err);
25345+ return arg->err;
25346+}
25347+
dece6358
AM
25348+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
25349+ struct au_nhash *whlist, struct au_nhash *delist)
25350+{
25351+#ifdef CONFIG_AUFS_SHWH
25352+ int err;
25353+ unsigned int nh, u;
25354+ struct hlist_head *head;
25355+ struct au_vdir_wh *tpos;
25356+ struct hlist_node *pos, *n;
25357+ char *p, *o;
25358+ struct au_vdir_destr *destr;
25359+
25360+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
25361+
25362+ err = -ENOMEM;
4a4d8108 25363+ o = p = __getname_gfp(GFP_NOFS);
dece6358
AM
25364+ if (unlikely(!p))
25365+ goto out;
25366+
25367+ err = 0;
25368+ nh = whlist->nh_num;
25369+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
25370+ p += AUFS_WH_PFX_LEN;
25371+ for (u = 0; u < nh; u++) {
25372+ head = whlist->nh_head + u;
25373+ hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) {
25374+ destr = &tpos->wh_str;
25375+ memcpy(p, destr->name, destr->len);
25376+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
25377+ tpos->wh_ino, tpos->wh_type, delist);
25378+ if (unlikely(err))
25379+ break;
25380+ }
25381+ }
25382+
25383+ __putname(o);
25384+
4f0767ce 25385+out:
dece6358
AM
25386+ AuTraceErr(err);
25387+ return err;
25388+#else
25389+ return 0;
25390+#endif
25391+}
25392+
1facf9fc 25393+static int au_do_read_vdir(struct fillvdir_arg *arg)
25394+{
25395+ int err;
dece6358 25396+ unsigned int rdhash;
1facf9fc 25397+ loff_t offset;
dece6358
AM
25398+ aufs_bindex_t bend, bindex, bstart;
25399+ unsigned char shwh;
1facf9fc 25400+ struct file *hf, *file;
25401+ struct super_block *sb;
25402+
1facf9fc 25403+ file = arg->file;
25404+ sb = file->f_dentry->d_sb;
dece6358
AM
25405+ SiMustAnyLock(sb);
25406+
25407+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 25408+ if (!rdhash)
25409+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
25410+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
25411+ if (unlikely(err))
1facf9fc 25412+ goto out;
dece6358
AM
25413+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
25414+ if (unlikely(err))
1facf9fc 25415+ goto out_delist;
25416+
25417+ err = 0;
25418+ arg->flags = 0;
dece6358
AM
25419+ shwh = 0;
25420+ if (au_opt_test(au_mntflags(sb), SHWH)) {
25421+ shwh = 1;
25422+ au_fset_fillvdir(arg->flags, SHWH);
25423+ }
25424+ bstart = au_fbstart(file);
4a4d8108 25425+ bend = au_fbend_dir(file);
dece6358 25426+ for (bindex = bstart; !err && bindex <= bend; bindex++) {
4a4d8108 25427+ hf = au_hf_dir(file, bindex);
1facf9fc 25428+ if (!hf)
25429+ continue;
25430+
25431+ offset = vfsub_llseek(hf, 0, SEEK_SET);
25432+ err = offset;
25433+ if (unlikely(offset))
25434+ break;
25435+
25436+ arg->bindex = bindex;
25437+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358
AM
25438+ if (shwh
25439+ || (bindex != bend
25440+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 25441+ au_fset_fillvdir(arg->flags, WHABLE);
25442+ do {
25443+ arg->err = 0;
25444+ au_fclr_fillvdir(arg->flags, CALLED);
25445+ /* smp_mb(); */
25446+ err = vfsub_readdir(hf, fillvdir, arg);
25447+ if (err >= 0)
25448+ err = arg->err;
25449+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
25450+ }
dece6358
AM
25451+
25452+ if (!err && shwh)
25453+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
25454+
25455+ au_nhash_wh_free(&arg->whlist);
1facf9fc 25456+
4f0767ce 25457+out_delist:
dece6358 25458+ au_nhash_de_free(&arg->delist);
4f0767ce 25459+out:
1facf9fc 25460+ return err;
25461+}
25462+
25463+static int read_vdir(struct file *file, int may_read)
25464+{
25465+ int err;
25466+ unsigned long expire;
25467+ unsigned char do_read;
25468+ struct fillvdir_arg arg;
25469+ struct inode *inode;
25470+ struct au_vdir *vdir, *allocated;
25471+
25472+ err = 0;
25473+ inode = file->f_dentry->d_inode;
25474+ IMustLock(inode);
dece6358
AM
25475+ SiMustAnyLock(inode->i_sb);
25476+
1facf9fc 25477+ allocated = NULL;
25478+ do_read = 0;
25479+ expire = au_sbi(inode->i_sb)->si_rdcache;
25480+ vdir = au_ivdir(inode);
25481+ if (!vdir) {
25482+ do_read = 1;
1308ab2a 25483+ vdir = alloc_vdir(file);
1facf9fc 25484+ err = PTR_ERR(vdir);
25485+ if (IS_ERR(vdir))
25486+ goto out;
25487+ err = 0;
25488+ allocated = vdir;
25489+ } else if (may_read
25490+ && (inode->i_version != vdir->vd_version
25491+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
25492+ do_read = 1;
25493+ err = reinit_vdir(vdir);
25494+ if (unlikely(err))
25495+ goto out;
25496+ }
25497+
25498+ if (!do_read)
25499+ return 0; /* success */
25500+
25501+ arg.file = file;
25502+ arg.vdir = vdir;
25503+ err = au_do_read_vdir(&arg);
25504+ if (!err) {
25505+ /* file->f_pos = 0; */
25506+ vdir->vd_version = inode->i_version;
25507+ vdir->vd_last.ul = 0;
25508+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
25509+ if (allocated)
25510+ au_set_ivdir(inode, allocated);
25511+ } else if (allocated)
25512+ au_vdir_free(allocated);
25513+
4f0767ce 25514+out:
1facf9fc 25515+ return err;
25516+}
25517+
25518+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
25519+{
25520+ int err, rerr;
25521+ unsigned long ul, n;
25522+ const unsigned int deblk_sz = src->vd_deblk_sz;
25523+
25524+ AuDebugOn(tgt->vd_nblk != 1);
25525+
25526+ err = -ENOMEM;
25527+ if (tgt->vd_nblk < src->vd_nblk) {
25528+ unsigned char **p;
25529+
dece6358
AM
25530+ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
25531+ GFP_NOFS);
1facf9fc 25532+ if (unlikely(!p))
25533+ goto out;
25534+ tgt->vd_deblk = p;
25535+ }
25536+
1308ab2a 25537+ if (tgt->vd_deblk_sz != deblk_sz) {
25538+ unsigned char *p;
25539+
25540+ tgt->vd_deblk_sz = deblk_sz;
25541+ p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
25542+ if (unlikely(!p))
25543+ goto out;
25544+ tgt->vd_deblk[0] = p;
25545+ }
1facf9fc 25546+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 25547+ tgt->vd_version = src->vd_version;
25548+ tgt->vd_jiffy = src->vd_jiffy;
25549+
25550+ n = src->vd_nblk;
25551+ for (ul = 1; ul < n; ul++) {
dece6358
AM
25552+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
25553+ GFP_NOFS);
25554+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 25555+ goto out;
1308ab2a 25556+ tgt->vd_nblk++;
1facf9fc 25557+ }
1308ab2a 25558+ tgt->vd_nblk = n;
25559+ tgt->vd_last.ul = tgt->vd_last.ul;
25560+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
25561+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
25562+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 25563+ /* smp_mb(); */
25564+ return 0; /* success */
25565+
4f0767ce 25566+out:
1facf9fc 25567+ rerr = reinit_vdir(tgt);
25568+ BUG_ON(rerr);
25569+ return err;
25570+}
25571+
25572+int au_vdir_init(struct file *file)
25573+{
25574+ int err;
25575+ struct inode *inode;
25576+ struct au_vdir *vdir_cache, *allocated;
25577+
25578+ err = read_vdir(file, !file->f_pos);
25579+ if (unlikely(err))
25580+ goto out;
25581+
25582+ allocated = NULL;
25583+ vdir_cache = au_fvdir_cache(file);
25584+ if (!vdir_cache) {
1308ab2a 25585+ vdir_cache = alloc_vdir(file);
1facf9fc 25586+ err = PTR_ERR(vdir_cache);
25587+ if (IS_ERR(vdir_cache))
25588+ goto out;
25589+ allocated = vdir_cache;
25590+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
25591+ err = reinit_vdir(vdir_cache);
25592+ if (unlikely(err))
25593+ goto out;
25594+ } else
25595+ return 0; /* success */
25596+
25597+ inode = file->f_dentry->d_inode;
25598+ err = copy_vdir(vdir_cache, au_ivdir(inode));
25599+ if (!err) {
25600+ file->f_version = inode->i_version;
25601+ if (allocated)
25602+ au_set_fvdir_cache(file, allocated);
25603+ } else if (allocated)
25604+ au_vdir_free(allocated);
25605+
4f0767ce 25606+out:
1facf9fc 25607+ return err;
25608+}
25609+
25610+static loff_t calc_offset(struct au_vdir *vdir)
25611+{
25612+ loff_t offset;
25613+ union au_vdir_deblk_p p;
25614+
25615+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
25616+ offset = vdir->vd_last.p.deblk - p.deblk;
25617+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
25618+ return offset;
25619+}
25620+
25621+/* returns true or false */
25622+static int seek_vdir(struct file *file)
25623+{
25624+ int valid;
25625+ unsigned int deblk_sz;
25626+ unsigned long ul, n;
25627+ loff_t offset;
25628+ union au_vdir_deblk_p p, deblk_end;
25629+ struct au_vdir *vdir_cache;
25630+
25631+ valid = 1;
25632+ vdir_cache = au_fvdir_cache(file);
25633+ offset = calc_offset(vdir_cache);
25634+ AuDbg("offset %lld\n", offset);
25635+ if (file->f_pos == offset)
25636+ goto out;
25637+
25638+ vdir_cache->vd_last.ul = 0;
25639+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
25640+ if (!file->f_pos)
25641+ goto out;
25642+
25643+ valid = 0;
25644+ deblk_sz = vdir_cache->vd_deblk_sz;
25645+ ul = div64_u64(file->f_pos, deblk_sz);
25646+ AuDbg("ul %lu\n", ul);
25647+ if (ul >= vdir_cache->vd_nblk)
25648+ goto out;
25649+
25650+ n = vdir_cache->vd_nblk;
25651+ for (; ul < n; ul++) {
25652+ p.deblk = vdir_cache->vd_deblk[ul];
25653+ deblk_end.deblk = p.deblk + deblk_sz;
25654+ offset = ul;
25655+ offset *= deblk_sz;
25656+ while (!is_deblk_end(&p, &deblk_end) && offset < file->f_pos) {
25657+ unsigned int l;
25658+
25659+ l = calc_size(p.de->de_str.len);
25660+ offset += l;
25661+ p.deblk += l;
25662+ }
25663+ if (!is_deblk_end(&p, &deblk_end)) {
25664+ valid = 1;
25665+ vdir_cache->vd_last.ul = ul;
25666+ vdir_cache->vd_last.p = p;
25667+ break;
25668+ }
25669+ }
25670+
4f0767ce 25671+out:
1facf9fc 25672+ /* smp_mb(); */
25673+ AuTraceErr(!valid);
25674+ return valid;
25675+}
25676+
25677+int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir)
25678+{
25679+ int err;
25680+ unsigned int l, deblk_sz;
25681+ union au_vdir_deblk_p deblk_end;
25682+ struct au_vdir *vdir_cache;
25683+ struct au_vdir_de *de;
25684+
25685+ vdir_cache = au_fvdir_cache(file);
25686+ if (!seek_vdir(file))
25687+ return 0;
25688+
25689+ deblk_sz = vdir_cache->vd_deblk_sz;
25690+ while (1) {
25691+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
25692+ deblk_end.deblk += deblk_sz;
25693+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
25694+ de = vdir_cache->vd_last.p.de;
25695+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
25696+ de->de_str.len, de->de_str.name, file->f_pos,
25697+ (unsigned long)de->de_ino, de->de_type);
25698+ err = filldir(dirent, de->de_str.name, de->de_str.len,
25699+ file->f_pos, de->de_ino, de->de_type);
25700+ if (unlikely(err)) {
25701+ AuTraceErr(err);
25702+ /* todo: ignore the error caused by udba? */
25703+ /* return err; */
25704+ return 0;
25705+ }
25706+
25707+ l = calc_size(de->de_str.len);
25708+ vdir_cache->vd_last.p.deblk += l;
25709+ file->f_pos += l;
25710+ }
25711+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
25712+ vdir_cache->vd_last.ul++;
25713+ vdir_cache->vd_last.p.deblk
25714+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
25715+ file->f_pos = deblk_sz * vdir_cache->vd_last.ul;
25716+ continue;
25717+ }
25718+ break;
25719+ }
25720+
25721+ /* smp_mb(); */
25722+ return 0;
25723+}
7f207e10
AM
25724diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
25725--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
25726+++ linux/fs/aufs/vfsub.c 2011-08-24 13:30:24.734646739 +0200
25727@@ -0,0 +1,836 @@
1facf9fc 25728+/*
027c5e7a 25729+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 25730+ *
25731+ * This program, aufs is free software; you can redistribute it and/or modify
25732+ * it under the terms of the GNU General Public License as published by
25733+ * the Free Software Foundation; either version 2 of the License, or
25734+ * (at your option) any later version.
dece6358
AM
25735+ *
25736+ * This program is distributed in the hope that it will be useful,
25737+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25738+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25739+ * GNU General Public License for more details.
25740+ *
25741+ * You should have received a copy of the GNU General Public License
25742+ * along with this program; if not, write to the Free Software
25743+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 25744+ */
25745+
25746+/*
25747+ * sub-routines for VFS
25748+ */
25749+
4a4d8108 25750+#include <linux/file.h>
1308ab2a 25751+#include <linux/ima.h>
dece6358
AM
25752+#include <linux/namei.h>
25753+#include <linux/security.h>
25754+#include <linux/splice.h>
1facf9fc 25755+#include <linux/uaccess.h>
25756+#include "aufs.h"
25757+
25758+int vfsub_update_h_iattr(struct path *h_path, int *did)
25759+{
25760+ int err;
25761+ struct kstat st;
25762+ struct super_block *h_sb;
25763+
25764+ /* for remote fs, leave work for its getattr or d_revalidate */
25765+ /* for bad i_attr fs, handle them in aufs_getattr() */
25766+ /* still some fs may acquire i_mutex. we need to skip them */
25767+ err = 0;
25768+ if (!did)
25769+ did = &err;
25770+ h_sb = h_path->dentry->d_sb;
25771+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
25772+ if (*did)
25773+ err = vfs_getattr(h_path->mnt, h_path->dentry, &st);
25774+
25775+ return err;
25776+}
25777+
25778+/* ---------------------------------------------------------------------- */
25779+
4a4d8108 25780+struct file *vfsub_dentry_open(struct path *path, int flags)
1308ab2a 25781+{
25782+ struct file *file;
25783+
4a4d8108 25784+ path_get(path);
0c5527e5 25785+ file = dentry_open(path->dentry, path->mnt,
2cbb1c4b 25786+ flags /* | __FMODE_NONOTIFY */,
7f207e10 25787+ current_cred());
2cbb1c4b
JR
25788+ if (!IS_ERR_OR_NULL(file)
25789+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
25790+ i_readcount_inc(path->dentry->d_inode);
4a4d8108 25791+
1308ab2a 25792+ return file;
25793+}
25794+
1facf9fc 25795+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
25796+{
25797+ struct file *file;
25798+
2cbb1c4b 25799+ lockdep_off();
7f207e10 25800+ file = filp_open(path,
2cbb1c4b 25801+ oflags /* | __FMODE_NONOTIFY */,
7f207e10 25802+ mode);
2cbb1c4b 25803+ lockdep_on();
1facf9fc 25804+ if (IS_ERR(file))
25805+ goto out;
25806+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
25807+
4f0767ce 25808+out:
1facf9fc 25809+ return file;
25810+}
25811+
25812+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
25813+{
25814+ int err;
25815+
1facf9fc 25816+ err = kern_path(name, flags, path);
1facf9fc 25817+ if (!err && path->dentry->d_inode)
25818+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
25819+ return err;
25820+}
25821+
25822+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
25823+ int len)
25824+{
25825+ struct path path = {
25826+ .mnt = NULL
25827+ };
25828+
1308ab2a 25829+ /* VFS checks it too, but by WARN_ON_ONCE() */
1facf9fc 25830+ IMustLock(parent->d_inode);
25831+
25832+ path.dentry = lookup_one_len(name, parent, len);
25833+ if (IS_ERR(path.dentry))
25834+ goto out;
25835+ if (path.dentry->d_inode)
25836+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
25837+
4f0767ce 25838+out:
4a4d8108 25839+ AuTraceErrPtr(path.dentry);
1facf9fc 25840+ return path.dentry;
25841+}
25842+
25843+struct dentry *vfsub_lookup_hash(struct nameidata *nd)
25844+{
25845+ struct path path = {
25846+ .mnt = nd->path.mnt
25847+ };
25848+
25849+ IMustLock(nd->path.dentry->d_inode);
25850+
25851+ path.dentry = lookup_hash(nd);
4a4d8108
AM
25852+ if (IS_ERR(path.dentry))
25853+ goto out;
25854+ if (path.dentry->d_inode)
1facf9fc 25855+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
25856+
4f0767ce 25857+out:
4a4d8108 25858+ AuTraceErrPtr(path.dentry);
1facf9fc 25859+ return path.dentry;
25860+}
25861+
2cbb1c4b
JR
25862+/*
25863+ * this is "VFS:__lookup_one_len()" which was removed and merged into
25864+ * VFS:lookup_one_len() by the commit.
25865+ * 6a96ba5 2011-03-14 kill __lookup_one_len()
25866+ * this function should always be equivalent to the corresponding part in
25867+ * VFS:lookup_one_len().
25868+ */
25869+int vfsub_name_hash(const char *name, struct qstr *this, int len)
25870+{
25871+ unsigned long hash;
25872+ unsigned int c;
25873+
25874+ this->name = name;
25875+ this->len = len;
25876+ if (!len)
25877+ return -EACCES;
25878+
25879+ hash = init_name_hash();
25880+ while (len--) {
25881+ c = *(const unsigned char *)name++;
25882+ if (c == '/' || c == '\0')
25883+ return -EACCES;
25884+ hash = partial_name_hash(c, hash);
25885+ }
25886+ this->hash = end_name_hash(hash);
25887+ return 0;
25888+}
25889+
1facf9fc 25890+/* ---------------------------------------------------------------------- */
25891+
25892+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
25893+ struct dentry *d2, struct au_hinode *hdir2)
25894+{
25895+ struct dentry *d;
25896+
2cbb1c4b 25897+ lockdep_off();
1facf9fc 25898+ d = lock_rename(d1, d2);
2cbb1c4b 25899+ lockdep_on();
4a4d8108 25900+ au_hn_suspend(hdir1);
1facf9fc 25901+ if (hdir1 != hdir2)
4a4d8108 25902+ au_hn_suspend(hdir2);
1facf9fc 25903+
25904+ return d;
25905+}
25906+
25907+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
25908+ struct dentry *d2, struct au_hinode *hdir2)
25909+{
4a4d8108 25910+ au_hn_resume(hdir1);
1facf9fc 25911+ if (hdir1 != hdir2)
4a4d8108 25912+ au_hn_resume(hdir2);
2cbb1c4b 25913+ lockdep_off();
1facf9fc 25914+ unlock_rename(d1, d2);
2cbb1c4b 25915+ lockdep_on();
1facf9fc 25916+}
25917+
25918+/* ---------------------------------------------------------------------- */
25919+
25920+int vfsub_create(struct inode *dir, struct path *path, int mode)
25921+{
25922+ int err;
25923+ struct dentry *d;
25924+
25925+ IMustLock(dir);
25926+
25927+ d = path->dentry;
25928+ path->dentry = d->d_parent;
b752ccd1 25929+ err = security_path_mknod(path, d, mode, 0);
1facf9fc 25930+ path->dentry = d;
25931+ if (unlikely(err))
25932+ goto out;
25933+
25934+ if (au_test_fs_null_nd(dir->i_sb))
25935+ err = vfs_create(dir, path->dentry, mode, NULL);
25936+ else {
25937+ struct nameidata h_nd;
25938+
25939+ memset(&h_nd, 0, sizeof(h_nd));
25940+ h_nd.flags = LOOKUP_CREATE;
25941+ h_nd.intent.open.flags = O_CREAT
25942+ | vfsub_fmode_to_uint(FMODE_READ);
25943+ h_nd.intent.open.create_mode = mode;
25944+ h_nd.path.dentry = path->dentry->d_parent;
25945+ h_nd.path.mnt = path->mnt;
25946+ path_get(&h_nd.path);
25947+ err = vfs_create(dir, path->dentry, mode, &h_nd);
25948+ path_put(&h_nd.path);
25949+ }
25950+
25951+ if (!err) {
25952+ struct path tmp = *path;
25953+ int did;
25954+
25955+ vfsub_update_h_iattr(&tmp, &did);
25956+ if (did) {
25957+ tmp.dentry = path->dentry->d_parent;
25958+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25959+ }
25960+ /*ignore*/
25961+ }
25962+
4f0767ce 25963+out:
1facf9fc 25964+ return err;
25965+}
25966+
25967+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
25968+{
25969+ int err;
25970+ struct dentry *d;
25971+
25972+ IMustLock(dir);
25973+
25974+ d = path->dentry;
25975+ path->dentry = d->d_parent;
b752ccd1 25976+ err = security_path_symlink(path, d, symname);
1facf9fc 25977+ path->dentry = d;
25978+ if (unlikely(err))
25979+ goto out;
25980+
25981+ err = vfs_symlink(dir, path->dentry, symname);
25982+ if (!err) {
25983+ struct path tmp = *path;
25984+ int did;
25985+
25986+ vfsub_update_h_iattr(&tmp, &did);
25987+ if (did) {
25988+ tmp.dentry = path->dentry->d_parent;
25989+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25990+ }
25991+ /*ignore*/
25992+ }
25993+
4f0767ce 25994+out:
1facf9fc 25995+ return err;
25996+}
25997+
25998+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
25999+{
26000+ int err;
26001+ struct dentry *d;
26002+
26003+ IMustLock(dir);
26004+
26005+ d = path->dentry;
26006+ path->dentry = d->d_parent;
027c5e7a 26007+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
1facf9fc 26008+ path->dentry = d;
26009+ if (unlikely(err))
26010+ goto out;
26011+
26012+ err = vfs_mknod(dir, path->dentry, mode, dev);
26013+ if (!err) {
26014+ struct path tmp = *path;
26015+ int did;
26016+
26017+ vfsub_update_h_iattr(&tmp, &did);
26018+ if (did) {
26019+ tmp.dentry = path->dentry->d_parent;
26020+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26021+ }
26022+ /*ignore*/
26023+ }
26024+
4f0767ce 26025+out:
1facf9fc 26026+ return err;
26027+}
26028+
26029+static int au_test_nlink(struct inode *inode)
26030+{
26031+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
26032+
26033+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
26034+ || inode->i_nlink < link_max)
26035+ return 0;
26036+ return -EMLINK;
26037+}
26038+
26039+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path)
26040+{
26041+ int err;
26042+ struct dentry *d;
26043+
26044+ IMustLock(dir);
26045+
26046+ err = au_test_nlink(src_dentry->d_inode);
26047+ if (unlikely(err))
26048+ return err;
26049+
26050+ d = path->dentry;
26051+ path->dentry = d->d_parent;
b752ccd1 26052+ err = security_path_link(src_dentry, path, d);
1facf9fc 26053+ path->dentry = d;
26054+ if (unlikely(err))
26055+ goto out;
26056+
2cbb1c4b 26057+ lockdep_off();
1facf9fc 26058+ err = vfs_link(src_dentry, dir, path->dentry);
2cbb1c4b 26059+ lockdep_on();
1facf9fc 26060+ if (!err) {
26061+ struct path tmp = *path;
26062+ int did;
26063+
26064+ /* fuse has different memory inode for the same inumber */
26065+ vfsub_update_h_iattr(&tmp, &did);
26066+ if (did) {
26067+ tmp.dentry = path->dentry->d_parent;
26068+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26069+ tmp.dentry = src_dentry;
26070+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26071+ }
26072+ /*ignore*/
26073+ }
26074+
4f0767ce 26075+out:
1facf9fc 26076+ return err;
26077+}
26078+
26079+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
26080+ struct inode *dir, struct path *path)
26081+{
26082+ int err;
26083+ struct path tmp = {
26084+ .mnt = path->mnt
26085+ };
26086+ struct dentry *d;
26087+
26088+ IMustLock(dir);
26089+ IMustLock(src_dir);
26090+
26091+ d = path->dentry;
26092+ path->dentry = d->d_parent;
26093+ tmp.dentry = src_dentry->d_parent;
b752ccd1 26094+ err = security_path_rename(&tmp, src_dentry, path, d);
1facf9fc 26095+ path->dentry = d;
26096+ if (unlikely(err))
26097+ goto out;
26098+
2cbb1c4b 26099+ lockdep_off();
1facf9fc 26100+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry);
2cbb1c4b 26101+ lockdep_on();
1facf9fc 26102+ if (!err) {
26103+ int did;
26104+
26105+ tmp.dentry = d->d_parent;
26106+ vfsub_update_h_iattr(&tmp, &did);
26107+ if (did) {
26108+ tmp.dentry = src_dentry;
26109+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26110+ tmp.dentry = src_dentry->d_parent;
26111+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26112+ }
26113+ /*ignore*/
26114+ }
26115+
4f0767ce 26116+out:
1facf9fc 26117+ return err;
26118+}
26119+
26120+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
26121+{
26122+ int err;
26123+ struct dentry *d;
26124+
26125+ IMustLock(dir);
26126+
26127+ d = path->dentry;
26128+ path->dentry = d->d_parent;
b752ccd1 26129+ err = security_path_mkdir(path, d, mode);
1facf9fc 26130+ path->dentry = d;
26131+ if (unlikely(err))
26132+ goto out;
26133+
26134+ err = vfs_mkdir(dir, path->dentry, mode);
26135+ if (!err) {
26136+ struct path tmp = *path;
26137+ int did;
26138+
26139+ vfsub_update_h_iattr(&tmp, &did);
26140+ if (did) {
26141+ tmp.dentry = path->dentry->d_parent;
26142+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
26143+ }
26144+ /*ignore*/
26145+ }
26146+
4f0767ce 26147+out:
1facf9fc 26148+ return err;
26149+}
26150+
26151+int vfsub_rmdir(struct inode *dir, struct path *path)
26152+{
26153+ int err;
26154+ struct dentry *d;
26155+
26156+ IMustLock(dir);
26157+
26158+ d = path->dentry;
26159+ path->dentry = d->d_parent;
b752ccd1 26160+ err = security_path_rmdir(path, d);
1facf9fc 26161+ path->dentry = d;
26162+ if (unlikely(err))
26163+ goto out;
26164+
2cbb1c4b 26165+ lockdep_off();
1facf9fc 26166+ err = vfs_rmdir(dir, path->dentry);
2cbb1c4b 26167+ lockdep_on();
1facf9fc 26168+ if (!err) {
26169+ struct path tmp = {
26170+ .dentry = path->dentry->d_parent,
26171+ .mnt = path->mnt
26172+ };
26173+
26174+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
26175+ }
26176+
4f0767ce 26177+out:
1facf9fc 26178+ return err;
26179+}
26180+
26181+/* ---------------------------------------------------------------------- */
26182+
26183+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
26184+ loff_t *ppos)
26185+{
26186+ ssize_t err;
26187+
2cbb1c4b 26188+ lockdep_off();
1facf9fc 26189+ err = vfs_read(file, ubuf, count, ppos);
2cbb1c4b 26190+ lockdep_on();
1facf9fc 26191+ if (err >= 0)
26192+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26193+ return err;
26194+}
26195+
26196+/* todo: kernel_read()? */
26197+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
26198+ loff_t *ppos)
26199+{
26200+ ssize_t err;
26201+ mm_segment_t oldfs;
b752ccd1
AM
26202+ union {
26203+ void *k;
26204+ char __user *u;
26205+ } buf;
1facf9fc 26206+
b752ccd1 26207+ buf.k = kbuf;
1facf9fc 26208+ oldfs = get_fs();
26209+ set_fs(KERNEL_DS);
b752ccd1 26210+ err = vfsub_read_u(file, buf.u, count, ppos);
1facf9fc 26211+ set_fs(oldfs);
26212+ return err;
26213+}
26214+
26215+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
26216+ loff_t *ppos)
26217+{
26218+ ssize_t err;
26219+
2cbb1c4b 26220+ lockdep_off();
1facf9fc 26221+ err = vfs_write(file, ubuf, count, ppos);
2cbb1c4b 26222+ lockdep_on();
1facf9fc 26223+ if (err >= 0)
26224+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26225+ return err;
26226+}
26227+
26228+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
26229+{
26230+ ssize_t err;
26231+ mm_segment_t oldfs;
b752ccd1
AM
26232+ union {
26233+ void *k;
26234+ const char __user *u;
26235+ } buf;
1facf9fc 26236+
b752ccd1 26237+ buf.k = kbuf;
1facf9fc 26238+ oldfs = get_fs();
26239+ set_fs(KERNEL_DS);
b752ccd1 26240+ err = vfsub_write_u(file, buf.u, count, ppos);
1facf9fc 26241+ set_fs(oldfs);
26242+ return err;
26243+}
26244+
4a4d8108
AM
26245+int vfsub_flush(struct file *file, fl_owner_t id)
26246+{
26247+ int err;
26248+
26249+ err = 0;
26250+ if (file->f_op && file->f_op->flush) {
2cbb1c4b
JR
26251+ if (!au_test_nfs(file->f_dentry->d_sb))
26252+ err = file->f_op->flush(file, id);
26253+ else {
26254+ lockdep_off();
26255+ err = file->f_op->flush(file, id);
26256+ lockdep_on();
26257+ }
4a4d8108
AM
26258+ if (!err)
26259+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
26260+ /*ignore*/
26261+ }
26262+ return err;
26263+}
26264+
1facf9fc 26265+int vfsub_readdir(struct file *file, filldir_t filldir, void *arg)
26266+{
26267+ int err;
26268+
2cbb1c4b 26269+ lockdep_off();
1facf9fc 26270+ err = vfs_readdir(file, filldir, arg);
2cbb1c4b 26271+ lockdep_on();
1facf9fc 26272+ if (err >= 0)
26273+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26274+ return err;
26275+}
26276+
26277+long vfsub_splice_to(struct file *in, loff_t *ppos,
26278+ struct pipe_inode_info *pipe, size_t len,
26279+ unsigned int flags)
26280+{
26281+ long err;
26282+
2cbb1c4b 26283+ lockdep_off();
0fc653ad 26284+ err = do_splice_to(in, ppos, pipe, len, flags);
2cbb1c4b 26285+ lockdep_on();
4a4d8108 26286+ file_accessed(in);
1facf9fc 26287+ if (err >= 0)
26288+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
26289+ return err;
26290+}
26291+
26292+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
26293+ loff_t *ppos, size_t len, unsigned int flags)
26294+{
26295+ long err;
26296+
2cbb1c4b 26297+ lockdep_off();
0fc653ad 26298+ err = do_splice_from(pipe, out, ppos, len, flags);
2cbb1c4b 26299+ lockdep_on();
1facf9fc 26300+ if (err >= 0)
26301+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
26302+ return err;
26303+}
26304+
53392da6
AM
26305+int vfsub_fsync(struct file *file, struct path *path, int datasync)
26306+{
26307+ int err;
26308+
26309+ /* file can be NULL */
26310+ lockdep_off();
26311+ err = vfs_fsync(file, datasync);
26312+ lockdep_on();
26313+ if (!err) {
26314+ if (!path) {
26315+ AuDebugOn(!file);
26316+ path = &file->f_path;
26317+ }
26318+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
26319+ }
26320+ return err;
26321+}
26322+
1facf9fc 26323+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
26324+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
26325+ struct file *h_file)
26326+{
26327+ int err;
26328+ struct inode *h_inode;
26329+
26330+ h_inode = h_path->dentry->d_inode;
26331+ if (!h_file) {
26332+ err = mnt_want_write(h_path->mnt);
26333+ if (err)
26334+ goto out;
26335+ err = inode_permission(h_inode, MAY_WRITE);
26336+ if (err)
26337+ goto out_mnt;
26338+ err = get_write_access(h_inode);
26339+ if (err)
26340+ goto out_mnt;
4a4d8108 26341+ err = break_lease(h_inode, O_WRONLY);
1facf9fc 26342+ if (err)
26343+ goto out_inode;
26344+ }
26345+
26346+ err = locks_verify_truncate(h_inode, h_file, length);
26347+ if (!err)
953406b4 26348+ err = security_path_truncate(h_path);
2cbb1c4b
JR
26349+ if (!err) {
26350+ lockdep_off();
1facf9fc 26351+ err = do_truncate(h_path->dentry, length, attr, h_file);
2cbb1c4b
JR
26352+ lockdep_on();
26353+ }
1facf9fc 26354+
4f0767ce 26355+out_inode:
1facf9fc 26356+ if (!h_file)
26357+ put_write_access(h_inode);
4f0767ce 26358+out_mnt:
1facf9fc 26359+ if (!h_file)
26360+ mnt_drop_write(h_path->mnt);
4f0767ce 26361+out:
1facf9fc 26362+ return err;
26363+}
26364+
26365+/* ---------------------------------------------------------------------- */
26366+
26367+struct au_vfsub_mkdir_args {
26368+ int *errp;
26369+ struct inode *dir;
26370+ struct path *path;
26371+ int mode;
26372+};
26373+
26374+static void au_call_vfsub_mkdir(void *args)
26375+{
26376+ struct au_vfsub_mkdir_args *a = args;
26377+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
26378+}
26379+
26380+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
26381+{
26382+ int err, do_sio, wkq_err;
26383+
26384+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
26385+ if (!do_sio)
26386+ err = vfsub_mkdir(dir, path, mode);
26387+ else {
26388+ struct au_vfsub_mkdir_args args = {
26389+ .errp = &err,
26390+ .dir = dir,
26391+ .path = path,
26392+ .mode = mode
26393+ };
26394+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
26395+ if (unlikely(wkq_err))
26396+ err = wkq_err;
26397+ }
26398+
26399+ return err;
26400+}
26401+
26402+struct au_vfsub_rmdir_args {
26403+ int *errp;
26404+ struct inode *dir;
26405+ struct path *path;
26406+};
26407+
26408+static void au_call_vfsub_rmdir(void *args)
26409+{
26410+ struct au_vfsub_rmdir_args *a = args;
26411+ *a->errp = vfsub_rmdir(a->dir, a->path);
26412+}
26413+
26414+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
26415+{
26416+ int err, do_sio, wkq_err;
26417+
26418+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
26419+ if (!do_sio)
26420+ err = vfsub_rmdir(dir, path);
26421+ else {
26422+ struct au_vfsub_rmdir_args args = {
26423+ .errp = &err,
26424+ .dir = dir,
26425+ .path = path
26426+ };
26427+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
26428+ if (unlikely(wkq_err))
26429+ err = wkq_err;
26430+ }
26431+
26432+ return err;
26433+}
26434+
26435+/* ---------------------------------------------------------------------- */
26436+
26437+struct notify_change_args {
26438+ int *errp;
26439+ struct path *path;
26440+ struct iattr *ia;
26441+};
26442+
26443+static void call_notify_change(void *args)
26444+{
26445+ struct notify_change_args *a = args;
26446+ struct inode *h_inode;
26447+
26448+ h_inode = a->path->dentry->d_inode;
26449+ IMustLock(h_inode);
26450+
26451+ *a->errp = -EPERM;
26452+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
1facf9fc 26453+ *a->errp = notify_change(a->path->dentry, a->ia);
1facf9fc 26454+ if (!*a->errp)
26455+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
26456+ }
26457+ AuTraceErr(*a->errp);
26458+}
26459+
26460+int vfsub_notify_change(struct path *path, struct iattr *ia)
26461+{
26462+ int err;
26463+ struct notify_change_args args = {
26464+ .errp = &err,
26465+ .path = path,
26466+ .ia = ia
26467+ };
26468+
26469+ call_notify_change(&args);
26470+
26471+ return err;
26472+}
26473+
26474+int vfsub_sio_notify_change(struct path *path, struct iattr *ia)
26475+{
26476+ int err, wkq_err;
26477+ struct notify_change_args args = {
26478+ .errp = &err,
26479+ .path = path,
26480+ .ia = ia
26481+ };
26482+
26483+ wkq_err = au_wkq_wait(call_notify_change, &args);
26484+ if (unlikely(wkq_err))
26485+ err = wkq_err;
26486+
26487+ return err;
26488+}
26489+
26490+/* ---------------------------------------------------------------------- */
26491+
26492+struct unlink_args {
26493+ int *errp;
26494+ struct inode *dir;
26495+ struct path *path;
26496+};
26497+
26498+static void call_unlink(void *args)
26499+{
26500+ struct unlink_args *a = args;
26501+ struct dentry *d = a->path->dentry;
26502+ struct inode *h_inode;
26503+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
027c5e7a 26504+ && d->d_count == 1);
1facf9fc 26505+
26506+ IMustLock(a->dir);
26507+
26508+ a->path->dentry = d->d_parent;
26509+ *a->errp = security_path_unlink(a->path, d);
26510+ a->path->dentry = d;
26511+ if (unlikely(*a->errp))
26512+ return;
26513+
26514+ if (!stop_sillyrename)
26515+ dget(d);
26516+ h_inode = d->d_inode;
26517+ if (h_inode)
027c5e7a 26518+ ihold(h_inode);
1facf9fc 26519+
2cbb1c4b 26520+ lockdep_off();
1facf9fc 26521+ *a->errp = vfs_unlink(a->dir, d);
2cbb1c4b 26522+ lockdep_on();
1facf9fc 26523+ if (!*a->errp) {
26524+ struct path tmp = {
26525+ .dentry = d->d_parent,
26526+ .mnt = a->path->mnt
26527+ };
26528+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
26529+ }
26530+
26531+ if (!stop_sillyrename)
26532+ dput(d);
26533+ if (h_inode)
26534+ iput(h_inode);
26535+
26536+ AuTraceErr(*a->errp);
26537+}
26538+
26539+/*
26540+ * @dir: must be locked.
26541+ * @dentry: target dentry.
26542+ */
26543+int vfsub_unlink(struct inode *dir, struct path *path, int force)
26544+{
26545+ int err;
26546+ struct unlink_args args = {
26547+ .errp = &err,
26548+ .dir = dir,
26549+ .path = path
26550+ };
26551+
26552+ if (!force)
26553+ call_unlink(&args);
26554+ else {
26555+ int wkq_err;
26556+
26557+ wkq_err = au_wkq_wait(call_unlink, &args);
26558+ if (unlikely(wkq_err))
26559+ err = wkq_err;
26560+ }
26561+
26562+ return err;
26563+}
7f207e10
AM
26564diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
26565--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
26566+++ linux/fs/aufs/vfsub.h 2011-08-24 13:30:24.734646739 +0200
26567@@ -0,0 +1,232 @@
1facf9fc 26568+/*
027c5e7a 26569+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 26570+ *
26571+ * This program, aufs is free software; you can redistribute it and/or modify
26572+ * it under the terms of the GNU General Public License as published by
26573+ * the Free Software Foundation; either version 2 of the License, or
26574+ * (at your option) any later version.
dece6358
AM
26575+ *
26576+ * This program is distributed in the hope that it will be useful,
26577+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26578+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26579+ * GNU General Public License for more details.
26580+ *
26581+ * You should have received a copy of the GNU General Public License
26582+ * along with this program; if not, write to the Free Software
26583+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 26584+ */
26585+
26586+/*
26587+ * sub-routines for VFS
26588+ */
26589+
26590+#ifndef __AUFS_VFSUB_H__
26591+#define __AUFS_VFSUB_H__
26592+
26593+#ifdef __KERNEL__
26594+
26595+#include <linux/fs.h>
0c5527e5 26596+#include <linux/lglock.h>
7f207e10 26597+#include "debug.h"
1facf9fc 26598+
7f207e10 26599+/* copied from linux/fs/internal.h */
2cbb1c4b 26600+/* todo: BAD approach!! */
0c5527e5 26601+DECLARE_BRLOCK(vfsmount_lock);
0c5527e5 26602+extern void file_sb_list_del(struct file *f);
2cbb1c4b 26603+extern spinlock_t inode_sb_list_lock;
0c5527e5 26604+
7f207e10
AM
26605+/* copied from linux/fs/file_table.c */
26606+DECLARE_LGLOCK(files_lglock);
0c5527e5
AM
26607+#ifdef CONFIG_SMP
26608+/*
26609+ * These macros iterate all files on all CPUs for a given superblock.
26610+ * files_lglock must be held globally.
26611+ */
26612+#define do_file_list_for_each_entry(__sb, __file) \
26613+{ \
26614+ int i; \
26615+ for_each_possible_cpu(i) { \
26616+ struct list_head *list; \
26617+ list = per_cpu_ptr((__sb)->s_files, i); \
26618+ list_for_each_entry((__file), list, f_u.fu_list)
26619+
26620+#define while_file_list_for_each_entry \
26621+ } \
26622+}
26623+
26624+#else
26625+
26626+#define do_file_list_for_each_entry(__sb, __file) \
26627+{ \
26628+ struct list_head *list; \
26629+ list = &(sb)->s_files; \
26630+ list_for_each_entry((__file), list, f_u.fu_list)
26631+
26632+#define while_file_list_for_each_entry \
26633+}
7f207e10
AM
26634+#endif
26635+
26636+/* ---------------------------------------------------------------------- */
1facf9fc 26637+
26638+/* lock subclass for lower inode */
26639+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
26640+/* reduce? gave up. */
26641+enum {
26642+ AuLsc_I_Begin = I_MUTEX_QUOTA, /* 4 */
26643+ AuLsc_I_PARENT, /* lower inode, parent first */
26644+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 26645+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 26646+ AuLsc_I_CHILD,
26647+ AuLsc_I_CHILD2,
26648+ AuLsc_I_End
26649+};
26650+
26651+/* to debug easier, do not make them inlined functions */
26652+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
26653+#define IMustLock(i) MtxMustLock(&(i)->i_mutex)
26654+
26655+/* ---------------------------------------------------------------------- */
26656+
7f207e10
AM
26657+static inline void vfsub_drop_nlink(struct inode *inode)
26658+{
26659+ AuDebugOn(!inode->i_nlink);
26660+ drop_nlink(inode);
26661+}
26662+
027c5e7a
AM
26663+static inline void vfsub_dead_dir(struct inode *inode)
26664+{
26665+ AuDebugOn(!S_ISDIR(inode->i_mode));
26666+ inode->i_flags |= S_DEAD;
26667+ clear_nlink(inode);
26668+}
26669+
7f207e10
AM
26670+/* ---------------------------------------------------------------------- */
26671+
26672+int vfsub_update_h_iattr(struct path *h_path, int *did);
26673+struct file *vfsub_dentry_open(struct path *path, int flags);
26674+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
1facf9fc 26675+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
26676+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
26677+ int len);
26678+struct dentry *vfsub_lookup_hash(struct nameidata *nd);
2cbb1c4b 26679+int vfsub_name_hash(const char *name, struct qstr *this, int len);
1facf9fc 26680+
26681+/* ---------------------------------------------------------------------- */
26682+
26683+struct au_hinode;
26684+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
26685+ struct dentry *d2, struct au_hinode *hdir2);
26686+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
26687+ struct dentry *d2, struct au_hinode *hdir2);
26688+
26689+int vfsub_create(struct inode *dir, struct path *path, int mode);
26690+int vfsub_symlink(struct inode *dir, struct path *path,
26691+ const char *symname);
26692+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
26693+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
26694+ struct path *path);
26695+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
26696+ struct inode *hdir, struct path *path);
26697+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
26698+int vfsub_rmdir(struct inode *dir, struct path *path);
26699+
26700+/* ---------------------------------------------------------------------- */
26701+
26702+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
26703+ loff_t *ppos);
26704+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
26705+ loff_t *ppos);
26706+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
26707+ loff_t *ppos);
26708+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
26709+ loff_t *ppos);
4a4d8108 26710+int vfsub_flush(struct file *file, fl_owner_t id);
1facf9fc 26711+int vfsub_readdir(struct file *file, filldir_t filldir, void *arg);
26712+
4a4d8108
AM
26713+static inline unsigned int vfsub_file_flags(struct file *file)
26714+{
26715+ unsigned int flags;
26716+
26717+ spin_lock(&file->f_lock);
26718+ flags = file->f_flags;
26719+ spin_unlock(&file->f_lock);
26720+
26721+ return flags;
26722+}
1308ab2a 26723+
1facf9fc 26724+static inline void vfsub_file_accessed(struct file *h_file)
26725+{
26726+ file_accessed(h_file);
26727+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
26728+}
26729+
26730+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
26731+ struct dentry *h_dentry)
26732+{
26733+ struct path h_path = {
26734+ .dentry = h_dentry,
26735+ .mnt = h_mnt
26736+ };
26737+ touch_atime(h_mnt, h_dentry);
26738+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
26739+}
26740+
4a4d8108
AM
26741+long vfsub_splice_to(struct file *in, loff_t *ppos,
26742+ struct pipe_inode_info *pipe, size_t len,
26743+ unsigned int flags);
26744+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
26745+ loff_t *ppos, size_t len, unsigned int flags);
26746+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
26747+ struct file *h_file);
53392da6 26748+int vfsub_fsync(struct file *file, struct path *path, int datasync);
4a4d8108 26749+
1facf9fc 26750+/* ---------------------------------------------------------------------- */
26751+
26752+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
26753+{
26754+ loff_t err;
26755+
2cbb1c4b 26756+ lockdep_off();
1facf9fc 26757+ err = vfs_llseek(file, offset, origin);
2cbb1c4b 26758+ lockdep_on();
1facf9fc 26759+ return err;
26760+}
26761+
26762+/* ---------------------------------------------------------------------- */
26763+
26764+/* dirty workaround for strict type of fmode_t */
26765+union vfsub_fmu {
26766+ fmode_t fm;
26767+ unsigned int ui;
26768+};
26769+
26770+static inline unsigned int vfsub_fmode_to_uint(fmode_t fm)
26771+{
26772+ union vfsub_fmu u = {
26773+ .fm = fm
26774+ };
26775+
26776+ BUILD_BUG_ON(sizeof(u.fm) != sizeof(u.ui));
26777+
26778+ return u.ui;
26779+}
26780+
26781+static inline fmode_t vfsub_uint_to_fmode(unsigned int ui)
26782+{
26783+ union vfsub_fmu u = {
26784+ .ui = ui
26785+ };
26786+
26787+ return u.fm;
26788+}
26789+
4a4d8108
AM
26790+/* ---------------------------------------------------------------------- */
26791+
26792+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
26793+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
26794+int vfsub_sio_notify_change(struct path *path, struct iattr *ia);
26795+int vfsub_notify_change(struct path *path, struct iattr *ia);
26796+int vfsub_unlink(struct inode *dir, struct path *path, int force);
26797+
1facf9fc 26798+#endif /* __KERNEL__ */
26799+#endif /* __AUFS_VFSUB_H__ */
7f207e10
AM
26800diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
26801--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
53392da6 26802+++ linux/fs/aufs/wbr_policy.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 26803@@ -0,0 +1,700 @@
1facf9fc 26804+/*
027c5e7a 26805+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 26806+ *
26807+ * This program, aufs is free software; you can redistribute it and/or modify
26808+ * it under the terms of the GNU General Public License as published by
26809+ * the Free Software Foundation; either version 2 of the License, or
26810+ * (at your option) any later version.
dece6358
AM
26811+ *
26812+ * This program is distributed in the hope that it will be useful,
26813+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26814+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26815+ * GNU General Public License for more details.
26816+ *
26817+ * You should have received a copy of the GNU General Public License
26818+ * along with this program; if not, write to the Free Software
26819+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 26820+ */
26821+
26822+/*
26823+ * policies for selecting one among multiple writable branches
26824+ */
26825+
26826+#include <linux/statfs.h>
26827+#include "aufs.h"
26828+
26829+/* subset of cpup_attr() */
26830+static noinline_for_stack
26831+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
26832+{
26833+ int err, sbits;
26834+ struct iattr ia;
26835+ struct inode *h_isrc;
26836+
26837+ h_isrc = h_src->d_inode;
26838+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
26839+ ia.ia_mode = h_isrc->i_mode;
26840+ ia.ia_uid = h_isrc->i_uid;
26841+ ia.ia_gid = h_isrc->i_gid;
26842+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
26843+ au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc);
26844+ err = vfsub_sio_notify_change(h_path, &ia);
26845+
26846+ /* is this nfs only? */
26847+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
26848+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
26849+ ia.ia_mode = h_isrc->i_mode;
26850+ err = vfsub_sio_notify_change(h_path, &ia);
26851+ }
26852+
26853+ return err;
26854+}
26855+
26856+#define AuCpdown_PARENT_OPQ 1
26857+#define AuCpdown_WHED (1 << 1)
26858+#define AuCpdown_MADE_DIR (1 << 2)
26859+#define AuCpdown_DIROPQ (1 << 3)
26860+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
7f207e10
AM
26861+#define au_fset_cpdown(flags, name) \
26862+ do { (flags) |= AuCpdown_##name; } while (0)
26863+#define au_fclr_cpdown(flags, name) \
26864+ do { (flags) &= ~AuCpdown_##name; } while (0)
1facf9fc 26865+
26866+struct au_cpdown_dir_args {
26867+ struct dentry *parent;
26868+ unsigned int flags;
26869+};
26870+
26871+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
26872+ struct au_cpdown_dir_args *a)
26873+{
26874+ int err;
26875+ struct dentry *opq_dentry;
26876+
26877+ opq_dentry = au_diropq_create(dentry, bdst);
26878+ err = PTR_ERR(opq_dentry);
26879+ if (IS_ERR(opq_dentry))
26880+ goto out;
26881+ dput(opq_dentry);
26882+ au_fset_cpdown(a->flags, DIROPQ);
26883+
4f0767ce 26884+out:
1facf9fc 26885+ return err;
26886+}
26887+
26888+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
26889+ struct inode *dir, aufs_bindex_t bdst)
26890+{
26891+ int err;
26892+ struct path h_path;
26893+ struct au_branch *br;
26894+
26895+ br = au_sbr(dentry->d_sb, bdst);
26896+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
26897+ err = PTR_ERR(h_path.dentry);
26898+ if (IS_ERR(h_path.dentry))
26899+ goto out;
26900+
26901+ err = 0;
26902+ if (h_path.dentry->d_inode) {
26903+ h_path.mnt = br->br_mnt;
26904+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
26905+ dentry);
26906+ }
26907+ dput(h_path.dentry);
26908+
4f0767ce 26909+out:
1facf9fc 26910+ return err;
26911+}
26912+
26913+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
26914+ struct dentry *h_parent, void *arg)
26915+{
26916+ int err, rerr;
4a4d8108 26917+ aufs_bindex_t bopq, bstart;
1facf9fc 26918+ struct path h_path;
26919+ struct dentry *parent;
26920+ struct inode *h_dir, *h_inode, *inode, *dir;
26921+ struct au_cpdown_dir_args *args = arg;
26922+
26923+ bstart = au_dbstart(dentry);
26924+ /* dentry is di-locked */
26925+ parent = dget_parent(dentry);
26926+ dir = parent->d_inode;
26927+ h_dir = h_parent->d_inode;
26928+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
26929+ IMustLock(h_dir);
26930+
26931+ err = au_lkup_neg(dentry, bdst);
26932+ if (unlikely(err < 0))
26933+ goto out;
26934+ h_path.dentry = au_h_dptr(dentry, bdst);
26935+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
26936+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
26937+ S_IRWXU | S_IRUGO | S_IXUGO);
26938+ if (unlikely(err))
26939+ goto out_put;
26940+ au_fset_cpdown(args->flags, MADE_DIR);
26941+
1facf9fc 26942+ bopq = au_dbdiropq(dentry);
26943+ au_fclr_cpdown(args->flags, WHED);
26944+ au_fclr_cpdown(args->flags, DIROPQ);
26945+ if (au_dbwh(dentry) == bdst)
26946+ au_fset_cpdown(args->flags, WHED);
26947+ if (!au_ftest_cpdown(args->flags, PARENT_OPQ) && bopq <= bdst)
26948+ au_fset_cpdown(args->flags, PARENT_OPQ);
1facf9fc 26949+ h_inode = h_path.dentry->d_inode;
26950+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
26951+ if (au_ftest_cpdown(args->flags, WHED)) {
26952+ err = au_cpdown_dir_opq(dentry, bdst, args);
26953+ if (unlikely(err)) {
26954+ mutex_unlock(&h_inode->i_mutex);
26955+ goto out_dir;
26956+ }
26957+ }
26958+
26959+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
26960+ mutex_unlock(&h_inode->i_mutex);
26961+ if (unlikely(err))
26962+ goto out_opq;
26963+
26964+ if (au_ftest_cpdown(args->flags, WHED)) {
26965+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
26966+ if (unlikely(err))
26967+ goto out_opq;
26968+ }
26969+
26970+ inode = dentry->d_inode;
26971+ if (au_ibend(inode) < bdst)
26972+ au_set_ibend(inode, bdst);
26973+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
26974+ au_hi_flags(inode, /*isdir*/1));
26975+ goto out; /* success */
26976+
26977+ /* revert */
4f0767ce 26978+out_opq:
1facf9fc 26979+ if (au_ftest_cpdown(args->flags, DIROPQ)) {
26980+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
26981+ rerr = au_diropq_remove(dentry, bdst);
26982+ mutex_unlock(&h_inode->i_mutex);
26983+ if (unlikely(rerr)) {
26984+ AuIOErr("failed removing diropq for %.*s b%d (%d)\n",
26985+ AuDLNPair(dentry), bdst, rerr);
26986+ err = -EIO;
26987+ goto out;
26988+ }
26989+ }
4f0767ce 26990+out_dir:
1facf9fc 26991+ if (au_ftest_cpdown(args->flags, MADE_DIR)) {
26992+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
26993+ if (unlikely(rerr)) {
26994+ AuIOErr("failed removing %.*s b%d (%d)\n",
26995+ AuDLNPair(dentry), bdst, rerr);
26996+ err = -EIO;
26997+ }
26998+ }
4f0767ce 26999+out_put:
1facf9fc 27000+ au_set_h_dptr(dentry, bdst, NULL);
27001+ if (au_dbend(dentry) == bdst)
27002+ au_update_dbend(dentry);
4f0767ce 27003+out:
1facf9fc 27004+ dput(parent);
27005+ return err;
27006+}
27007+
27008+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
27009+{
27010+ int err;
27011+ struct au_cpdown_dir_args args = {
27012+ .parent = dget_parent(dentry),
27013+ .flags = 0
27014+ };
27015+
27016+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &args);
27017+ dput(args.parent);
27018+
27019+ return err;
27020+}
27021+
27022+/* ---------------------------------------------------------------------- */
27023+
27024+/* policies for create */
27025+
4a4d8108
AM
27026+static int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
27027+{
27028+ int err, i, j, ndentry;
27029+ aufs_bindex_t bopq;
27030+ struct au_dcsub_pages dpages;
27031+ struct au_dpage *dpage;
27032+ struct dentry **dentries, *parent, *d;
27033+
27034+ err = au_dpages_init(&dpages, GFP_NOFS);
27035+ if (unlikely(err))
27036+ goto out;
27037+ parent = dget_parent(dentry);
027c5e7a 27038+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
4a4d8108
AM
27039+ if (unlikely(err))
27040+ goto out_free;
27041+
27042+ err = bindex;
27043+ for (i = 0; i < dpages.ndpage; i++) {
27044+ dpage = dpages.dpages + i;
27045+ dentries = dpage->dentries;
27046+ ndentry = dpage->ndentry;
27047+ for (j = 0; j < ndentry; j++) {
27048+ d = dentries[j];
27049+ di_read_lock_parent2(d, !AuLock_IR);
27050+ bopq = au_dbdiropq(d);
27051+ di_read_unlock(d, !AuLock_IR);
27052+ if (bopq >= 0 && bopq < err)
27053+ err = bopq;
27054+ }
27055+ }
27056+
27057+out_free:
27058+ dput(parent);
27059+ au_dpages_free(&dpages);
27060+out:
27061+ return err;
27062+}
27063+
1facf9fc 27064+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
27065+{
27066+ for (; bindex >= 0; bindex--)
27067+ if (!au_br_rdonly(au_sbr(sb, bindex)))
27068+ return bindex;
27069+ return -EROFS;
27070+}
27071+
27072+/* top down parent */
27073+static int au_wbr_create_tdp(struct dentry *dentry, int isdir __maybe_unused)
27074+{
27075+ int err;
27076+ aufs_bindex_t bstart, bindex;
27077+ struct super_block *sb;
27078+ struct dentry *parent, *h_parent;
27079+
27080+ sb = dentry->d_sb;
27081+ bstart = au_dbstart(dentry);
27082+ err = bstart;
27083+ if (!au_br_rdonly(au_sbr(sb, bstart)))
27084+ goto out;
27085+
27086+ err = -EROFS;
27087+ parent = dget_parent(dentry);
27088+ for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
27089+ h_parent = au_h_dptr(parent, bindex);
27090+ if (!h_parent || !h_parent->d_inode)
27091+ continue;
27092+
27093+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
27094+ err = bindex;
27095+ break;
27096+ }
27097+ }
27098+ dput(parent);
27099+
27100+ /* bottom up here */
4a4d8108 27101+ if (unlikely(err < 0)) {
1facf9fc 27102+ err = au_wbr_bu(sb, bstart - 1);
4a4d8108
AM
27103+ if (err >= 0)
27104+ err = au_wbr_nonopq(dentry, err);
27105+ }
1facf9fc 27106+
4f0767ce 27107+out:
1facf9fc 27108+ AuDbg("b%d\n", err);
27109+ return err;
27110+}
27111+
27112+/* ---------------------------------------------------------------------- */
27113+
27114+/* an exception for the policy other than tdp */
27115+static int au_wbr_create_exp(struct dentry *dentry)
27116+{
27117+ int err;
27118+ aufs_bindex_t bwh, bdiropq;
27119+ struct dentry *parent;
27120+
27121+ err = -1;
27122+ bwh = au_dbwh(dentry);
27123+ parent = dget_parent(dentry);
27124+ bdiropq = au_dbdiropq(parent);
27125+ if (bwh >= 0) {
27126+ if (bdiropq >= 0)
27127+ err = min(bdiropq, bwh);
27128+ else
27129+ err = bwh;
27130+ AuDbg("%d\n", err);
27131+ } else if (bdiropq >= 0) {
27132+ err = bdiropq;
27133+ AuDbg("%d\n", err);
27134+ }
27135+ dput(parent);
27136+
4a4d8108
AM
27137+ if (err >= 0)
27138+ err = au_wbr_nonopq(dentry, err);
27139+
1facf9fc 27140+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
27141+ err = -1;
27142+
27143+ AuDbg("%d\n", err);
27144+ return err;
27145+}
27146+
27147+/* ---------------------------------------------------------------------- */
27148+
27149+/* round robin */
27150+static int au_wbr_create_init_rr(struct super_block *sb)
27151+{
27152+ int err;
27153+
27154+ err = au_wbr_bu(sb, au_sbend(sb));
27155+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 27156+ /* smp_mb(); */
1facf9fc 27157+
27158+ AuDbg("b%d\n", err);
27159+ return err;
27160+}
27161+
27162+static int au_wbr_create_rr(struct dentry *dentry, int isdir)
27163+{
27164+ int err, nbr;
27165+ unsigned int u;
27166+ aufs_bindex_t bindex, bend;
27167+ struct super_block *sb;
27168+ atomic_t *next;
27169+
27170+ err = au_wbr_create_exp(dentry);
27171+ if (err >= 0)
27172+ goto out;
27173+
27174+ sb = dentry->d_sb;
27175+ next = &au_sbi(sb)->si_wbr_rr_next;
27176+ bend = au_sbend(sb);
27177+ nbr = bend + 1;
27178+ for (bindex = 0; bindex <= bend; bindex++) {
27179+ if (!isdir) {
27180+ err = atomic_dec_return(next) + 1;
27181+ /* modulo for 0 is meaningless */
27182+ if (unlikely(!err))
27183+ err = atomic_dec_return(next) + 1;
27184+ } else
27185+ err = atomic_read(next);
27186+ AuDbg("%d\n", err);
27187+ u = err;
27188+ err = u % nbr;
27189+ AuDbg("%d\n", err);
27190+ if (!au_br_rdonly(au_sbr(sb, err)))
27191+ break;
27192+ err = -EROFS;
27193+ }
27194+
4a4d8108
AM
27195+ if (err >= 0)
27196+ err = au_wbr_nonopq(dentry, err);
27197+
4f0767ce 27198+out:
1facf9fc 27199+ AuDbg("%d\n", err);
27200+ return err;
27201+}
27202+
27203+/* ---------------------------------------------------------------------- */
27204+
27205+/* most free space */
27206+static void au_mfs(struct dentry *dentry)
27207+{
27208+ struct super_block *sb;
27209+ struct au_branch *br;
27210+ struct au_wbr_mfs *mfs;
27211+ aufs_bindex_t bindex, bend;
27212+ int err;
27213+ unsigned long long b, bavail;
7f207e10 27214+ struct path h_path;
1facf9fc 27215+ /* reduce the stack usage */
27216+ struct kstatfs *st;
27217+
27218+ st = kmalloc(sizeof(*st), GFP_NOFS);
27219+ if (unlikely(!st)) {
27220+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
27221+ return;
27222+ }
27223+
27224+ bavail = 0;
27225+ sb = dentry->d_sb;
27226+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 27227+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 27228+ mfs->mfs_bindex = -EROFS;
27229+ mfs->mfsrr_bytes = 0;
27230+ bend = au_sbend(sb);
27231+ for (bindex = 0; bindex <= bend; bindex++) {
27232+ br = au_sbr(sb, bindex);
27233+ if (au_br_rdonly(br))
27234+ continue;
27235+
27236+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27237+ h_path.mnt = br->br_mnt;
27238+ h_path.dentry = h_path.mnt->mnt_root;
27239+ err = vfs_statfs(&h_path, st);
1facf9fc 27240+ if (unlikely(err)) {
27241+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
27242+ continue;
27243+ }
27244+
27245+ /* when the available size is equal, select the lower one */
27246+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
27247+ || sizeof(b) < sizeof(st->f_bsize));
27248+ b = st->f_bavail * st->f_bsize;
27249+ br->br_wbr->wbr_bytes = b;
27250+ if (b >= bavail) {
27251+ bavail = b;
27252+ mfs->mfs_bindex = bindex;
27253+ mfs->mfs_jiffy = jiffies;
27254+ }
27255+ }
27256+
27257+ mfs->mfsrr_bytes = bavail;
27258+ AuDbg("b%d\n", mfs->mfs_bindex);
27259+ kfree(st);
27260+}
27261+
27262+static int au_wbr_create_mfs(struct dentry *dentry, int isdir __maybe_unused)
27263+{
27264+ int err;
27265+ struct super_block *sb;
27266+ struct au_wbr_mfs *mfs;
27267+
27268+ err = au_wbr_create_exp(dentry);
27269+ if (err >= 0)
27270+ goto out;
27271+
27272+ sb = dentry->d_sb;
27273+ mfs = &au_sbi(sb)->si_wbr_mfs;
27274+ mutex_lock(&mfs->mfs_lock);
27275+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
27276+ || mfs->mfs_bindex < 0
27277+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
27278+ au_mfs(dentry);
27279+ mutex_unlock(&mfs->mfs_lock);
27280+ err = mfs->mfs_bindex;
27281+
4a4d8108
AM
27282+ if (err >= 0)
27283+ err = au_wbr_nonopq(dentry, err);
27284+
4f0767ce 27285+out:
1facf9fc 27286+ AuDbg("b%d\n", err);
27287+ return err;
27288+}
27289+
27290+static int au_wbr_create_init_mfs(struct super_block *sb)
27291+{
27292+ struct au_wbr_mfs *mfs;
27293+
27294+ mfs = &au_sbi(sb)->si_wbr_mfs;
27295+ mutex_init(&mfs->mfs_lock);
27296+ mfs->mfs_jiffy = 0;
27297+ mfs->mfs_bindex = -EROFS;
27298+
27299+ return 0;
27300+}
27301+
27302+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
27303+{
27304+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
27305+ return 0;
27306+}
27307+
27308+/* ---------------------------------------------------------------------- */
27309+
27310+/* most free space and then round robin */
27311+static int au_wbr_create_mfsrr(struct dentry *dentry, int isdir)
27312+{
27313+ int err;
27314+ struct au_wbr_mfs *mfs;
27315+
27316+ err = au_wbr_create_mfs(dentry, isdir);
27317+ if (err >= 0) {
27318+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 27319+ mutex_lock(&mfs->mfs_lock);
1facf9fc 27320+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
27321+ err = au_wbr_create_rr(dentry, isdir);
dece6358 27322+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 27323+ }
27324+
27325+ AuDbg("b%d\n", err);
27326+ return err;
27327+}
27328+
27329+static int au_wbr_create_init_mfsrr(struct super_block *sb)
27330+{
27331+ int err;
27332+
27333+ au_wbr_create_init_mfs(sb); /* ignore */
27334+ err = au_wbr_create_init_rr(sb);
27335+
27336+ return err;
27337+}
27338+
27339+/* ---------------------------------------------------------------------- */
27340+
27341+/* top down parent and most free space */
27342+static int au_wbr_create_pmfs(struct dentry *dentry, int isdir)
27343+{
27344+ int err, e2;
27345+ unsigned long long b;
27346+ aufs_bindex_t bindex, bstart, bend;
27347+ struct super_block *sb;
27348+ struct dentry *parent, *h_parent;
27349+ struct au_branch *br;
27350+
27351+ err = au_wbr_create_tdp(dentry, isdir);
27352+ if (unlikely(err < 0))
27353+ goto out;
27354+ parent = dget_parent(dentry);
27355+ bstart = au_dbstart(parent);
27356+ bend = au_dbtaildir(parent);
27357+ if (bstart == bend)
27358+ goto out_parent; /* success */
27359+
27360+ e2 = au_wbr_create_mfs(dentry, isdir);
27361+ if (e2 < 0)
27362+ goto out_parent; /* success */
27363+
27364+ /* when the available size is equal, select upper one */
27365+ sb = dentry->d_sb;
27366+ br = au_sbr(sb, err);
27367+ b = br->br_wbr->wbr_bytes;
27368+ AuDbg("b%d, %llu\n", err, b);
27369+
27370+ for (bindex = bstart; bindex <= bend; bindex++) {
27371+ h_parent = au_h_dptr(parent, bindex);
27372+ if (!h_parent || !h_parent->d_inode)
27373+ continue;
27374+
27375+ br = au_sbr(sb, bindex);
27376+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
27377+ b = br->br_wbr->wbr_bytes;
27378+ err = bindex;
27379+ AuDbg("b%d, %llu\n", err, b);
27380+ }
27381+ }
27382+
4a4d8108
AM
27383+ if (err >= 0)
27384+ err = au_wbr_nonopq(dentry, err);
27385+
4f0767ce 27386+out_parent:
1facf9fc 27387+ dput(parent);
4f0767ce 27388+out:
1facf9fc 27389+ AuDbg("b%d\n", err);
27390+ return err;
27391+}
27392+
27393+/* ---------------------------------------------------------------------- */
27394+
27395+/* policies for copyup */
27396+
27397+/* top down parent */
27398+static int au_wbr_copyup_tdp(struct dentry *dentry)
27399+{
27400+ return au_wbr_create_tdp(dentry, /*isdir, anything is ok*/0);
27401+}
27402+
27403+/* bottom up parent */
27404+static int au_wbr_copyup_bup(struct dentry *dentry)
27405+{
27406+ int err;
27407+ aufs_bindex_t bindex, bstart;
27408+ struct dentry *parent, *h_parent;
27409+ struct super_block *sb;
27410+
27411+ err = -EROFS;
27412+ sb = dentry->d_sb;
27413+ parent = dget_parent(dentry);
27414+ bstart = au_dbstart(parent);
27415+ for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
27416+ h_parent = au_h_dptr(parent, bindex);
27417+ if (!h_parent || !h_parent->d_inode)
27418+ continue;
27419+
27420+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
27421+ err = bindex;
27422+ break;
27423+ }
27424+ }
27425+ dput(parent);
27426+
27427+ /* bottom up here */
27428+ if (unlikely(err < 0))
27429+ err = au_wbr_bu(sb, bstart - 1);
27430+
27431+ AuDbg("b%d\n", err);
27432+ return err;
27433+}
27434+
27435+/* bottom up */
27436+static int au_wbr_copyup_bu(struct dentry *dentry)
27437+{
27438+ int err;
4a4d8108 27439+ aufs_bindex_t bstart;
1facf9fc 27440+
4a4d8108
AM
27441+ bstart = au_dbstart(dentry);
27442+ err = au_wbr_bu(dentry->d_sb, bstart);
27443+ AuDbg("b%d\n", err);
27444+ if (err > bstart)
27445+ err = au_wbr_nonopq(dentry, err);
1facf9fc 27446+
27447+ AuDbg("b%d\n", err);
27448+ return err;
27449+}
27450+
27451+/* ---------------------------------------------------------------------- */
27452+
27453+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
27454+ [AuWbrCopyup_TDP] = {
27455+ .copyup = au_wbr_copyup_tdp
27456+ },
27457+ [AuWbrCopyup_BUP] = {
27458+ .copyup = au_wbr_copyup_bup
27459+ },
27460+ [AuWbrCopyup_BU] = {
27461+ .copyup = au_wbr_copyup_bu
27462+ }
27463+};
27464+
27465+struct au_wbr_create_operations au_wbr_create_ops[] = {
27466+ [AuWbrCreate_TDP] = {
27467+ .create = au_wbr_create_tdp
27468+ },
27469+ [AuWbrCreate_RR] = {
27470+ .create = au_wbr_create_rr,
27471+ .init = au_wbr_create_init_rr
27472+ },
27473+ [AuWbrCreate_MFS] = {
27474+ .create = au_wbr_create_mfs,
27475+ .init = au_wbr_create_init_mfs,
27476+ .fin = au_wbr_create_fin_mfs
27477+ },
27478+ [AuWbrCreate_MFSV] = {
27479+ .create = au_wbr_create_mfs,
27480+ .init = au_wbr_create_init_mfs,
27481+ .fin = au_wbr_create_fin_mfs
27482+ },
27483+ [AuWbrCreate_MFSRR] = {
27484+ .create = au_wbr_create_mfsrr,
27485+ .init = au_wbr_create_init_mfsrr,
27486+ .fin = au_wbr_create_fin_mfs
27487+ },
27488+ [AuWbrCreate_MFSRRV] = {
27489+ .create = au_wbr_create_mfsrr,
27490+ .init = au_wbr_create_init_mfsrr,
27491+ .fin = au_wbr_create_fin_mfs
27492+ },
27493+ [AuWbrCreate_PMFS] = {
27494+ .create = au_wbr_create_pmfs,
27495+ .init = au_wbr_create_init_mfs,
27496+ .fin = au_wbr_create_fin_mfs
27497+ },
27498+ [AuWbrCreate_PMFSV] = {
27499+ .create = au_wbr_create_pmfs,
27500+ .init = au_wbr_create_init_mfs,
27501+ .fin = au_wbr_create_fin_mfs
27502+ }
27503+};
7f207e10
AM
27504diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
27505--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
27506+++ linux/fs/aufs/whout.c 2011-10-24 20:51:51.583800333 +0200
27507@@ -0,0 +1,1050 @@
1facf9fc 27508+/*
027c5e7a 27509+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 27510+ *
27511+ * This program, aufs is free software; you can redistribute it and/or modify
27512+ * it under the terms of the GNU General Public License as published by
27513+ * the Free Software Foundation; either version 2 of the License, or
27514+ * (at your option) any later version.
dece6358
AM
27515+ *
27516+ * This program is distributed in the hope that it will be useful,
27517+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27518+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27519+ * GNU General Public License for more details.
27520+ *
27521+ * You should have received a copy of the GNU General Public License
27522+ * along with this program; if not, write to the Free Software
27523+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 27524+ */
27525+
27526+/*
27527+ * whiteout for logical deletion and opaque directory
27528+ */
27529+
27530+#include <linux/fs.h>
27531+#include "aufs.h"
27532+
27533+#define WH_MASK S_IRUGO
27534+
27535+/*
27536+ * If a directory contains this file, then it is opaque. We start with the
27537+ * .wh. flag so that it is blocked by lookup.
27538+ */
27539+static struct qstr diropq_name = {
27540+ .name = AUFS_WH_DIROPQ,
27541+ .len = sizeof(AUFS_WH_DIROPQ) - 1
27542+};
27543+
27544+/*
27545+ * generate whiteout name, which is NOT terminated by NULL.
27546+ * @name: original d_name.name
27547+ * @len: original d_name.len
27548+ * @wh: whiteout qstr
27549+ * returns zero when succeeds, otherwise error.
27550+ * succeeded value as wh->name should be freed by kfree().
27551+ */
27552+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
27553+{
27554+ char *p;
27555+
27556+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
27557+ return -ENAMETOOLONG;
27558+
27559+ wh->len = name->len + AUFS_WH_PFX_LEN;
27560+ p = kmalloc(wh->len, GFP_NOFS);
27561+ wh->name = p;
27562+ if (p) {
27563+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
27564+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
27565+ /* smp_mb(); */
27566+ return 0;
27567+ }
27568+ return -ENOMEM;
27569+}
27570+
27571+/* ---------------------------------------------------------------------- */
27572+
27573+/*
27574+ * test if the @wh_name exists under @h_parent.
27575+ * @try_sio specifies the necessary of super-io.
27576+ */
27577+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
27578+ struct au_branch *br, int try_sio)
27579+{
27580+ int err;
27581+ struct dentry *wh_dentry;
1facf9fc 27582+
1facf9fc 27583+ if (!try_sio)
27584+ wh_dentry = au_lkup_one(wh_name, h_parent, br, /*nd*/NULL);
27585+ else
27586+ wh_dentry = au_sio_lkup_one(wh_name, h_parent, br);
27587+ err = PTR_ERR(wh_dentry);
27588+ if (IS_ERR(wh_dentry))
27589+ goto out;
27590+
27591+ err = 0;
27592+ if (!wh_dentry->d_inode)
27593+ goto out_wh; /* success */
27594+
27595+ err = 1;
27596+ if (S_ISREG(wh_dentry->d_inode->i_mode))
27597+ goto out_wh; /* success */
27598+
27599+ err = -EIO;
27600+ AuIOErr("%.*s Invalid whiteout entry type 0%o.\n",
27601+ AuDLNPair(wh_dentry), wh_dentry->d_inode->i_mode);
27602+
4f0767ce 27603+out_wh:
1facf9fc 27604+ dput(wh_dentry);
4f0767ce 27605+out:
1facf9fc 27606+ return err;
27607+}
27608+
27609+/*
27610+ * test if the @h_dentry sets opaque or not.
27611+ */
27612+int au_diropq_test(struct dentry *h_dentry, struct au_branch *br)
27613+{
27614+ int err;
27615+ struct inode *h_dir;
27616+
27617+ h_dir = h_dentry->d_inode;
27618+ err = au_wh_test(h_dentry, &diropq_name, br,
27619+ au_test_h_perm_sio(h_dir, MAY_EXEC));
27620+ return err;
27621+}
27622+
27623+/*
27624+ * returns a negative dentry whose name is unique and temporary.
27625+ */
27626+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
27627+ struct qstr *prefix)
27628+{
1facf9fc 27629+ struct dentry *dentry;
27630+ int i;
027c5e7a 27631+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
4a4d8108 27632+ *name, *p;
027c5e7a 27633+ /* strict atomic_t is unnecessary here */
1facf9fc 27634+ static unsigned short cnt;
27635+ struct qstr qs;
27636+
4a4d8108
AM
27637+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
27638+
1facf9fc 27639+ name = defname;
027c5e7a
AM
27640+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
27641+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
1facf9fc 27642+ dentry = ERR_PTR(-ENAMETOOLONG);
4a4d8108 27643+ if (unlikely(qs.len > NAME_MAX))
1facf9fc 27644+ goto out;
27645+ dentry = ERR_PTR(-ENOMEM);
27646+ name = kmalloc(qs.len + 1, GFP_NOFS);
27647+ if (unlikely(!name))
27648+ goto out;
27649+ }
27650+
27651+ /* doubly whiteout-ed */
27652+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
27653+ p = name + AUFS_WH_PFX_LEN * 2;
27654+ memcpy(p, prefix->name, prefix->len);
27655+ p += prefix->len;
27656+ *p++ = '.';
4a4d8108 27657+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
1facf9fc 27658+
27659+ qs.name = name;
27660+ for (i = 0; i < 3; i++) {
b752ccd1 27661+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
1facf9fc 27662+ dentry = au_sio_lkup_one(&qs, h_parent, br);
27663+ if (IS_ERR(dentry) || !dentry->d_inode)
27664+ goto out_name;
27665+ dput(dentry);
27666+ }
4a4d8108 27667+ /* pr_warning("could not get random name\n"); */
1facf9fc 27668+ dentry = ERR_PTR(-EEXIST);
27669+ AuDbg("%.*s\n", AuLNPair(&qs));
27670+ BUG();
27671+
4f0767ce 27672+out_name:
1facf9fc 27673+ if (name != defname)
27674+ kfree(name);
4f0767ce 27675+out:
4a4d8108 27676+ AuTraceErrPtr(dentry);
1facf9fc 27677+ return dentry;
1facf9fc 27678+}
27679+
27680+/*
27681+ * rename the @h_dentry on @br to the whiteouted temporary name.
27682+ */
27683+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
27684+{
27685+ int err;
27686+ struct path h_path = {
27687+ .mnt = br->br_mnt
27688+ };
27689+ struct inode *h_dir;
27690+ struct dentry *h_parent;
27691+
27692+ h_parent = h_dentry->d_parent; /* dir inode is locked */
27693+ h_dir = h_parent->d_inode;
27694+ IMustLock(h_dir);
27695+
27696+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
27697+ err = PTR_ERR(h_path.dentry);
27698+ if (IS_ERR(h_path.dentry))
27699+ goto out;
27700+
27701+ /* under the same dir, no need to lock_rename() */
27702+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path);
27703+ AuTraceErr(err);
27704+ dput(h_path.dentry);
27705+
4f0767ce 27706+out:
4a4d8108 27707+ AuTraceErr(err);
1facf9fc 27708+ return err;
27709+}
27710+
27711+/* ---------------------------------------------------------------------- */
27712+/*
27713+ * functions for removing a whiteout
27714+ */
27715+
27716+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
27717+{
27718+ int force;
27719+
27720+ /*
27721+ * forces superio when the dir has a sticky bit.
27722+ * this may be a violation of unix fs semantics.
27723+ */
27724+ force = (h_dir->i_mode & S_ISVTX)
27725+ && h_path->dentry->d_inode->i_uid != current_fsuid();
27726+ return vfsub_unlink(h_dir, h_path, force);
27727+}
27728+
27729+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
27730+ struct dentry *dentry)
27731+{
27732+ int err;
27733+
27734+ err = do_unlink_wh(h_dir, h_path);
27735+ if (!err && dentry)
27736+ au_set_dbwh(dentry, -1);
27737+
27738+ return err;
27739+}
27740+
27741+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
27742+ struct au_branch *br)
27743+{
27744+ int err;
27745+ struct path h_path = {
27746+ .mnt = br->br_mnt
27747+ };
27748+
27749+ err = 0;
27750+ h_path.dentry = au_lkup_one(wh, h_parent, br, /*nd*/NULL);
27751+ if (IS_ERR(h_path.dentry))
27752+ err = PTR_ERR(h_path.dentry);
27753+ else {
27754+ if (h_path.dentry->d_inode
27755+ && S_ISREG(h_path.dentry->d_inode->i_mode))
27756+ err = do_unlink_wh(h_parent->d_inode, &h_path);
27757+ dput(h_path.dentry);
27758+ }
27759+
27760+ return err;
27761+}
27762+
27763+/* ---------------------------------------------------------------------- */
27764+/*
27765+ * initialize/clean whiteout for a branch
27766+ */
27767+
27768+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
27769+ const int isdir)
27770+{
27771+ int err;
27772+
27773+ if (!whpath->dentry->d_inode)
27774+ return;
27775+
27776+ err = mnt_want_write(whpath->mnt);
27777+ if (!err) {
27778+ if (isdir)
27779+ err = vfsub_rmdir(h_dir, whpath);
27780+ else
27781+ err = vfsub_unlink(h_dir, whpath, /*force*/0);
27782+ mnt_drop_write(whpath->mnt);
27783+ }
27784+ if (unlikely(err))
4a4d8108
AM
27785+ pr_warning("failed removing %.*s (%d), ignored.\n",
27786+ AuDLNPair(whpath->dentry), err);
1facf9fc 27787+}
27788+
27789+static int test_linkable(struct dentry *h_root)
27790+{
27791+ struct inode *h_dir = h_root->d_inode;
27792+
27793+ if (h_dir->i_op->link)
27794+ return 0;
27795+
4a4d8108
AM
27796+ pr_err("%.*s (%s) doesn't support link(2), use noplink and rw+nolwh\n",
27797+ AuDLNPair(h_root), au_sbtype(h_root->d_sb));
1facf9fc 27798+ return -ENOSYS;
27799+}
27800+
27801+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
27802+static int au_whdir(struct inode *h_dir, struct path *path)
27803+{
27804+ int err;
27805+
27806+ err = -EEXIST;
27807+ if (!path->dentry->d_inode) {
27808+ int mode = S_IRWXU;
27809+
27810+ if (au_test_nfs(path->dentry->d_sb))
27811+ mode |= S_IXUGO;
27812+ err = mnt_want_write(path->mnt);
27813+ if (!err) {
27814+ err = vfsub_mkdir(h_dir, path, mode);
27815+ mnt_drop_write(path->mnt);
27816+ }
27817+ } else if (S_ISDIR(path->dentry->d_inode->i_mode))
27818+ err = 0;
27819+ else
4a4d8108 27820+ pr_err("unknown %.*s exists\n", AuDLNPair(path->dentry));
1facf9fc 27821+
27822+ return err;
27823+}
27824+
27825+struct au_wh_base {
27826+ const struct qstr *name;
27827+ struct dentry *dentry;
27828+};
27829+
27830+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
27831+ struct path *h_path)
27832+{
27833+ h_path->dentry = base[AuBrWh_BASE].dentry;
27834+ au_wh_clean(h_dir, h_path, /*isdir*/0);
27835+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27836+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27837+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27838+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27839+}
27840+
27841+/*
27842+ * returns tri-state,
27843+ * minus: error, caller should print the mesage
27844+ * zero: succuess
27845+ * plus: error, caller should NOT print the mesage
27846+ */
27847+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
27848+ int do_plink, struct au_wh_base base[],
27849+ struct path *h_path)
27850+{
27851+ int err;
27852+ struct inode *h_dir;
27853+
27854+ h_dir = h_root->d_inode;
27855+ h_path->dentry = base[AuBrWh_BASE].dentry;
27856+ au_wh_clean(h_dir, h_path, /*isdir*/0);
27857+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27858+ if (do_plink) {
27859+ err = test_linkable(h_root);
27860+ if (unlikely(err)) {
27861+ err = 1;
27862+ goto out;
27863+ }
27864+
27865+ err = au_whdir(h_dir, h_path);
27866+ if (unlikely(err))
27867+ goto out;
27868+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
27869+ } else
27870+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27871+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27872+ err = au_whdir(h_dir, h_path);
27873+ if (unlikely(err))
27874+ goto out;
27875+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
27876+
4f0767ce 27877+out:
1facf9fc 27878+ return err;
27879+}
27880+
27881+/*
27882+ * for the moment, aufs supports the branch filesystem which does not support
27883+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
27884+ * copyup failed. finally, such filesystem will not be used as the writable
27885+ * branch.
27886+ *
27887+ * returns tri-state, see above.
27888+ */
27889+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
27890+ int do_plink, struct au_wh_base base[],
27891+ struct path *h_path)
27892+{
27893+ int err;
27894+ struct inode *h_dir;
27895+
1308ab2a 27896+ WbrWhMustWriteLock(wbr);
27897+
1facf9fc 27898+ err = test_linkable(h_root);
27899+ if (unlikely(err)) {
27900+ err = 1;
27901+ goto out;
27902+ }
27903+
27904+ /*
27905+ * todo: should this create be done in /sbin/mount.aufs helper?
27906+ */
27907+ err = -EEXIST;
27908+ h_dir = h_root->d_inode;
27909+ if (!base[AuBrWh_BASE].dentry->d_inode) {
27910+ err = mnt_want_write(h_path->mnt);
27911+ if (!err) {
27912+ h_path->dentry = base[AuBrWh_BASE].dentry;
27913+ err = vfsub_create(h_dir, h_path, WH_MASK);
27914+ mnt_drop_write(h_path->mnt);
27915+ }
27916+ } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
27917+ err = 0;
27918+ else
4a4d8108
AM
27919+ pr_err("unknown %.*s/%.*s exists\n",
27920+ AuDLNPair(h_root), AuDLNPair(base[AuBrWh_BASE].dentry));
1facf9fc 27921+ if (unlikely(err))
27922+ goto out;
27923+
27924+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27925+ if (do_plink) {
27926+ err = au_whdir(h_dir, h_path);
27927+ if (unlikely(err))
27928+ goto out;
27929+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
27930+ } else
27931+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27932+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
27933+
27934+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27935+ err = au_whdir(h_dir, h_path);
27936+ if (unlikely(err))
27937+ goto out;
27938+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
27939+
4f0767ce 27940+out:
1facf9fc 27941+ return err;
27942+}
27943+
27944+/*
27945+ * initialize the whiteout base file/dir for @br.
27946+ */
27947+int au_wh_init(struct dentry *h_root, struct au_branch *br,
27948+ struct super_block *sb)
27949+{
27950+ int err, i;
27951+ const unsigned char do_plink
27952+ = !!au_opt_test(au_mntflags(sb), PLINK);
27953+ struct path path = {
27954+ .mnt = br->br_mnt
27955+ };
27956+ struct inode *h_dir;
27957+ struct au_wbr *wbr = br->br_wbr;
27958+ static const struct qstr base_name[] = {
27959+ [AuBrWh_BASE] = {
27960+ .name = AUFS_BASE_NAME,
27961+ .len = sizeof(AUFS_BASE_NAME) - 1
27962+ },
27963+ [AuBrWh_PLINK] = {
27964+ .name = AUFS_PLINKDIR_NAME,
27965+ .len = sizeof(AUFS_PLINKDIR_NAME) - 1
27966+ },
27967+ [AuBrWh_ORPH] = {
27968+ .name = AUFS_ORPHDIR_NAME,
27969+ .len = sizeof(AUFS_ORPHDIR_NAME) - 1
27970+ }
27971+ };
27972+ struct au_wh_base base[] = {
27973+ [AuBrWh_BASE] = {
27974+ .name = base_name + AuBrWh_BASE,
27975+ .dentry = NULL
27976+ },
27977+ [AuBrWh_PLINK] = {
27978+ .name = base_name + AuBrWh_PLINK,
27979+ .dentry = NULL
27980+ },
27981+ [AuBrWh_ORPH] = {
27982+ .name = base_name + AuBrWh_ORPH,
27983+ .dentry = NULL
27984+ }
27985+ };
27986+
1308ab2a 27987+ if (wbr)
27988+ WbrWhMustWriteLock(wbr);
1facf9fc 27989+
1facf9fc 27990+ for (i = 0; i < AuBrWh_Last; i++) {
27991+ /* doubly whiteouted */
27992+ struct dentry *d;
27993+
27994+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
27995+ err = PTR_ERR(d);
27996+ if (IS_ERR(d))
27997+ goto out;
27998+
27999+ base[i].dentry = d;
28000+ AuDebugOn(wbr
28001+ && wbr->wbr_wh[i]
28002+ && wbr->wbr_wh[i] != base[i].dentry);
28003+ }
28004+
28005+ if (wbr)
28006+ for (i = 0; i < AuBrWh_Last; i++) {
28007+ dput(wbr->wbr_wh[i]);
28008+ wbr->wbr_wh[i] = NULL;
28009+ }
28010+
28011+ err = 0;
1e00d052 28012+ if (!au_br_writable(br->br_perm)) {
4a4d8108 28013+ h_dir = h_root->d_inode;
1facf9fc 28014+ au_wh_init_ro(h_dir, base, &path);
1e00d052 28015+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 28016+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
28017+ if (err > 0)
28018+ goto out;
28019+ else if (err)
28020+ goto out_err;
1e00d052 28021+ } else {
1facf9fc 28022+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
28023+ if (err > 0)
28024+ goto out;
28025+ else if (err)
28026+ goto out_err;
1facf9fc 28027+ }
28028+ goto out; /* success */
28029+
4f0767ce 28030+out_err:
4a4d8108
AM
28031+ pr_err("an error(%d) on the writable branch %.*s(%s)\n",
28032+ err, AuDLNPair(h_root), au_sbtype(h_root->d_sb));
4f0767ce 28033+out:
1facf9fc 28034+ for (i = 0; i < AuBrWh_Last; i++)
28035+ dput(base[i].dentry);
28036+ return err;
28037+}
28038+
28039+/* ---------------------------------------------------------------------- */
28040+/*
28041+ * whiteouts are all hard-linked usually.
28042+ * when its link count reaches a ceiling, we create a new whiteout base
28043+ * asynchronously.
28044+ */
28045+
28046+struct reinit_br_wh {
28047+ struct super_block *sb;
28048+ struct au_branch *br;
28049+};
28050+
28051+static void reinit_br_wh(void *arg)
28052+{
28053+ int err;
28054+ aufs_bindex_t bindex;
28055+ struct path h_path;
28056+ struct reinit_br_wh *a = arg;
28057+ struct au_wbr *wbr;
28058+ struct inode *dir;
28059+ struct dentry *h_root;
28060+ struct au_hinode *hdir;
28061+
28062+ err = 0;
28063+ wbr = a->br->br_wbr;
28064+ /* big aufs lock */
28065+ si_noflush_write_lock(a->sb);
28066+ if (!au_br_writable(a->br->br_perm))
28067+ goto out;
28068+ bindex = au_br_index(a->sb, a->br->br_id);
28069+ if (unlikely(bindex < 0))
28070+ goto out;
28071+
1308ab2a 28072+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
1facf9fc 28073+ dir = a->sb->s_root->d_inode;
1facf9fc 28074+ hdir = au_hi(dir, bindex);
28075+ h_root = au_h_dptr(a->sb->s_root, bindex);
28076+
4a4d8108 28077+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 28078+ wbr_wh_write_lock(wbr);
28079+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
28080+ h_root, a->br);
28081+ if (!err) {
28082+ err = mnt_want_write(a->br->br_mnt);
28083+ if (!err) {
28084+ h_path.dentry = wbr->wbr_whbase;
28085+ h_path.mnt = a->br->br_mnt;
28086+ err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0);
28087+ mnt_drop_write(a->br->br_mnt);
28088+ }
28089+ } else {
4a4d8108
AM
28090+ pr_warning("%.*s is moved, ignored\n",
28091+ AuDLNPair(wbr->wbr_whbase));
1facf9fc 28092+ err = 0;
28093+ }
28094+ dput(wbr->wbr_whbase);
28095+ wbr->wbr_whbase = NULL;
28096+ if (!err)
28097+ err = au_wh_init(h_root, a->br, a->sb);
28098+ wbr_wh_write_unlock(wbr);
4a4d8108 28099+ au_hn_imtx_unlock(hdir);
1308ab2a 28100+ di_read_unlock(a->sb->s_root, AuLock_IR);
1facf9fc 28101+
4f0767ce 28102+out:
1facf9fc 28103+ if (wbr)
28104+ atomic_dec(&wbr->wbr_wh_running);
28105+ atomic_dec(&a->br->br_count);
1facf9fc 28106+ si_write_unlock(a->sb);
027c5e7a 28107+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
1facf9fc 28108+ kfree(arg);
28109+ if (unlikely(err))
28110+ AuIOErr("err %d\n", err);
28111+}
28112+
28113+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
28114+{
28115+ int do_dec, wkq_err;
28116+ struct reinit_br_wh *arg;
28117+
28118+ do_dec = 1;
28119+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
28120+ goto out;
28121+
28122+ /* ignore ENOMEM */
28123+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
28124+ if (arg) {
28125+ /*
28126+ * dec(wh_running), kfree(arg) and dec(br_count)
28127+ * in reinit function
28128+ */
28129+ arg->sb = sb;
28130+ arg->br = br;
28131+ atomic_inc(&br->br_count);
53392da6 28132+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
1facf9fc 28133+ if (unlikely(wkq_err)) {
28134+ atomic_dec(&br->br_wbr->wbr_wh_running);
28135+ atomic_dec(&br->br_count);
28136+ kfree(arg);
28137+ }
28138+ do_dec = 0;
28139+ }
28140+
4f0767ce 28141+out:
1facf9fc 28142+ if (do_dec)
28143+ atomic_dec(&br->br_wbr->wbr_wh_running);
28144+}
28145+
28146+/* ---------------------------------------------------------------------- */
28147+
28148+/*
28149+ * create the whiteout @wh.
28150+ */
28151+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
28152+ struct dentry *wh)
28153+{
28154+ int err;
28155+ struct path h_path = {
28156+ .dentry = wh
28157+ };
28158+ struct au_branch *br;
28159+ struct au_wbr *wbr;
28160+ struct dentry *h_parent;
28161+ struct inode *h_dir;
28162+
28163+ h_parent = wh->d_parent; /* dir inode is locked */
28164+ h_dir = h_parent->d_inode;
28165+ IMustLock(h_dir);
28166+
28167+ br = au_sbr(sb, bindex);
28168+ h_path.mnt = br->br_mnt;
28169+ wbr = br->br_wbr;
28170+ wbr_wh_read_lock(wbr);
28171+ if (wbr->wbr_whbase) {
28172+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path);
28173+ if (!err || err != -EMLINK)
28174+ goto out;
28175+
28176+ /* link count full. re-initialize br_whbase. */
28177+ kick_reinit_br_wh(sb, br);
28178+ }
28179+
28180+ /* return this error in this context */
28181+ err = vfsub_create(h_dir, &h_path, WH_MASK);
28182+
4f0767ce 28183+out:
1facf9fc 28184+ wbr_wh_read_unlock(wbr);
28185+ return err;
28186+}
28187+
28188+/* ---------------------------------------------------------------------- */
28189+
28190+/*
28191+ * create or remove the diropq.
28192+ */
28193+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
28194+ unsigned int flags)
28195+{
28196+ struct dentry *opq_dentry, *h_dentry;
28197+ struct super_block *sb;
28198+ struct au_branch *br;
28199+ int err;
28200+
28201+ sb = dentry->d_sb;
28202+ br = au_sbr(sb, bindex);
28203+ h_dentry = au_h_dptr(dentry, bindex);
28204+ opq_dentry = au_lkup_one(&diropq_name, h_dentry, br, /*nd*/NULL);
28205+ if (IS_ERR(opq_dentry))
28206+ goto out;
28207+
28208+ if (au_ftest_diropq(flags, CREATE)) {
28209+ err = link_or_create_wh(sb, bindex, opq_dentry);
28210+ if (!err) {
28211+ au_set_dbdiropq(dentry, bindex);
28212+ goto out; /* success */
28213+ }
28214+ } else {
28215+ struct path tmp = {
28216+ .dentry = opq_dentry,
28217+ .mnt = br->br_mnt
28218+ };
28219+ err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
28220+ if (!err)
28221+ au_set_dbdiropq(dentry, -1);
28222+ }
28223+ dput(opq_dentry);
28224+ opq_dentry = ERR_PTR(err);
28225+
4f0767ce 28226+out:
1facf9fc 28227+ return opq_dentry;
28228+}
28229+
28230+struct do_diropq_args {
28231+ struct dentry **errp;
28232+ struct dentry *dentry;
28233+ aufs_bindex_t bindex;
28234+ unsigned int flags;
28235+};
28236+
28237+static void call_do_diropq(void *args)
28238+{
28239+ struct do_diropq_args *a = args;
28240+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
28241+}
28242+
28243+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
28244+ unsigned int flags)
28245+{
28246+ struct dentry *diropq, *h_dentry;
28247+
28248+ h_dentry = au_h_dptr(dentry, bindex);
28249+ if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
28250+ diropq = do_diropq(dentry, bindex, flags);
28251+ else {
28252+ int wkq_err;
28253+ struct do_diropq_args args = {
28254+ .errp = &diropq,
28255+ .dentry = dentry,
28256+ .bindex = bindex,
28257+ .flags = flags
28258+ };
28259+
28260+ wkq_err = au_wkq_wait(call_do_diropq, &args);
28261+ if (unlikely(wkq_err))
28262+ diropq = ERR_PTR(wkq_err);
28263+ }
28264+
28265+ return diropq;
28266+}
28267+
28268+/* ---------------------------------------------------------------------- */
28269+
28270+/*
28271+ * lookup whiteout dentry.
28272+ * @h_parent: lower parent dentry which must exist and be locked
28273+ * @base_name: name of dentry which will be whiteouted
28274+ * returns dentry for whiteout.
28275+ */
28276+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
28277+ struct au_branch *br)
28278+{
28279+ int err;
28280+ struct qstr wh_name;
28281+ struct dentry *wh_dentry;
28282+
28283+ err = au_wh_name_alloc(&wh_name, base_name);
28284+ wh_dentry = ERR_PTR(err);
28285+ if (!err) {
28286+ wh_dentry = au_lkup_one(&wh_name, h_parent, br, /*nd*/NULL);
28287+ kfree(wh_name.name);
28288+ }
28289+ return wh_dentry;
28290+}
28291+
28292+/*
28293+ * link/create a whiteout for @dentry on @bindex.
28294+ */
28295+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
28296+ struct dentry *h_parent)
28297+{
28298+ struct dentry *wh_dentry;
28299+ struct super_block *sb;
28300+ int err;
28301+
28302+ sb = dentry->d_sb;
28303+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
28304+ if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
28305+ err = link_or_create_wh(sb, bindex, wh_dentry);
28306+ if (!err)
28307+ au_set_dbwh(dentry, bindex);
28308+ else {
28309+ dput(wh_dentry);
28310+ wh_dentry = ERR_PTR(err);
28311+ }
28312+ }
28313+
28314+ return wh_dentry;
28315+}
28316+
28317+/* ---------------------------------------------------------------------- */
28318+
28319+/* Delete all whiteouts in this directory on branch bindex. */
28320+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
28321+ aufs_bindex_t bindex, struct au_branch *br)
28322+{
28323+ int err;
28324+ unsigned long ul, n;
28325+ struct qstr wh_name;
28326+ char *p;
28327+ struct hlist_head *head;
28328+ struct au_vdir_wh *tpos;
28329+ struct hlist_node *pos;
28330+ struct au_vdir_destr *str;
28331+
28332+ err = -ENOMEM;
4a4d8108 28333+ p = __getname_gfp(GFP_NOFS);
1facf9fc 28334+ wh_name.name = p;
28335+ if (unlikely(!wh_name.name))
28336+ goto out;
28337+
28338+ err = 0;
28339+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
28340+ p += AUFS_WH_PFX_LEN;
28341+ n = whlist->nh_num;
28342+ head = whlist->nh_head;
28343+ for (ul = 0; !err && ul < n; ul++, head++) {
28344+ hlist_for_each_entry(tpos, pos, head, wh_hash) {
28345+ if (tpos->wh_bindex != bindex)
28346+ continue;
28347+
28348+ str = &tpos->wh_str;
28349+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
28350+ memcpy(p, str->name, str->len);
28351+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
28352+ err = unlink_wh_name(h_dentry, &wh_name, br);
28353+ if (!err)
28354+ continue;
28355+ break;
28356+ }
28357+ AuIOErr("whiteout name too long %.*s\n",
28358+ str->len, str->name);
28359+ err = -EIO;
28360+ break;
28361+ }
28362+ }
28363+ __putname(wh_name.name);
28364+
4f0767ce 28365+out:
1facf9fc 28366+ return err;
28367+}
28368+
28369+struct del_wh_children_args {
28370+ int *errp;
28371+ struct dentry *h_dentry;
1308ab2a 28372+ struct au_nhash *whlist;
1facf9fc 28373+ aufs_bindex_t bindex;
28374+ struct au_branch *br;
28375+};
28376+
28377+static void call_del_wh_children(void *args)
28378+{
28379+ struct del_wh_children_args *a = args;
1308ab2a 28380+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 28381+}
28382+
28383+/* ---------------------------------------------------------------------- */
28384+
28385+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
28386+{
28387+ struct au_whtmp_rmdir *whtmp;
dece6358 28388+ int err;
1308ab2a 28389+ unsigned int rdhash;
dece6358
AM
28390+
28391+ SiMustAnyLock(sb);
1facf9fc 28392+
28393+ whtmp = kmalloc(sizeof(*whtmp), gfp);
dece6358
AM
28394+ if (unlikely(!whtmp)) {
28395+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 28396+ goto out;
dece6358 28397+ }
1facf9fc 28398+
28399+ whtmp->dir = NULL;
027c5e7a 28400+ whtmp->br = NULL;
1facf9fc 28401+ whtmp->wh_dentry = NULL;
1308ab2a 28402+ /* no estimation for dir size */
28403+ rdhash = au_sbi(sb)->si_rdhash;
28404+ if (!rdhash)
28405+ rdhash = AUFS_RDHASH_DEF;
28406+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
28407+ if (unlikely(err)) {
28408+ kfree(whtmp);
28409+ whtmp = ERR_PTR(err);
28410+ }
dece6358 28411+
4f0767ce 28412+out:
dece6358 28413+ return whtmp;
1facf9fc 28414+}
28415+
28416+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
28417+{
027c5e7a
AM
28418+ if (whtmp->br)
28419+ atomic_dec(&whtmp->br->br_count);
1facf9fc 28420+ dput(whtmp->wh_dentry);
28421+ iput(whtmp->dir);
dece6358 28422+ au_nhash_wh_free(&whtmp->whlist);
1facf9fc 28423+ kfree(whtmp);
28424+}
28425+
28426+/*
28427+ * rmdir the whiteouted temporary named dir @h_dentry.
28428+ * @whlist: whiteouted children.
28429+ */
28430+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
28431+ struct dentry *wh_dentry, struct au_nhash *whlist)
28432+{
28433+ int err;
28434+ struct path h_tmp;
28435+ struct inode *wh_inode, *h_dir;
28436+ struct au_branch *br;
28437+
28438+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
28439+ IMustLock(h_dir);
28440+
28441+ br = au_sbr(dir->i_sb, bindex);
28442+ wh_inode = wh_dentry->d_inode;
28443+ mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
28444+
28445+ /*
28446+ * someone else might change some whiteouts while we were sleeping.
28447+ * it means this whlist may have an obsoleted entry.
28448+ */
28449+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
28450+ err = del_wh_children(wh_dentry, whlist, bindex, br);
28451+ else {
28452+ int wkq_err;
28453+ struct del_wh_children_args args = {
28454+ .errp = &err,
28455+ .h_dentry = wh_dentry,
1308ab2a 28456+ .whlist = whlist,
1facf9fc 28457+ .bindex = bindex,
28458+ .br = br
28459+ };
28460+
28461+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
28462+ if (unlikely(wkq_err))
28463+ err = wkq_err;
28464+ }
28465+ mutex_unlock(&wh_inode->i_mutex);
28466+
28467+ if (!err) {
28468+ h_tmp.dentry = wh_dentry;
28469+ h_tmp.mnt = br->br_mnt;
28470+ err = vfsub_rmdir(h_dir, &h_tmp);
1facf9fc 28471+ }
28472+
28473+ if (!err) {
28474+ if (au_ibstart(dir) == bindex) {
7f207e10 28475+ /* todo: dir->i_mutex is necessary */
1facf9fc 28476+ au_cpup_attr_timesizes(dir);
7f207e10 28477+ vfsub_drop_nlink(dir);
1facf9fc 28478+ }
28479+ return 0; /* success */
28480+ }
28481+
4a4d8108
AM
28482+ pr_warning("failed removing %.*s(%d), ignored\n",
28483+ AuDLNPair(wh_dentry), err);
1facf9fc 28484+ return err;
28485+}
28486+
28487+static void call_rmdir_whtmp(void *args)
28488+{
28489+ int err;
e49829fe 28490+ aufs_bindex_t bindex;
1facf9fc 28491+ struct au_whtmp_rmdir *a = args;
28492+ struct super_block *sb;
28493+ struct dentry *h_parent;
28494+ struct inode *h_dir;
1facf9fc 28495+ struct au_hinode *hdir;
28496+
28497+ /* rmdir by nfsd may cause deadlock with this i_mutex */
28498+ /* mutex_lock(&a->dir->i_mutex); */
e49829fe 28499+ err = -EROFS;
1facf9fc 28500+ sb = a->dir->i_sb;
e49829fe
JR
28501+ si_read_lock(sb, !AuLock_FLUSH);
28502+ if (!au_br_writable(a->br->br_perm))
28503+ goto out;
28504+ bindex = au_br_index(sb, a->br->br_id);
28505+ if (unlikely(bindex < 0))
1facf9fc 28506+ goto out;
28507+
28508+ err = -EIO;
1facf9fc 28509+ ii_write_lock_parent(a->dir);
28510+ h_parent = dget_parent(a->wh_dentry);
28511+ h_dir = h_parent->d_inode;
e49829fe 28512+ hdir = au_hi(a->dir, bindex);
4a4d8108 28513+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
e49829fe
JR
28514+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
28515+ a->br);
1facf9fc 28516+ if (!err) {
e49829fe 28517+ err = mnt_want_write(a->br->br_mnt);
1facf9fc 28518+ if (!err) {
e49829fe 28519+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry,
dece6358 28520+ &a->whlist);
e49829fe 28521+ mnt_drop_write(a->br->br_mnt);
1facf9fc 28522+ }
28523+ }
4a4d8108 28524+ au_hn_imtx_unlock(hdir);
1facf9fc 28525+ dput(h_parent);
28526+ ii_write_unlock(a->dir);
28527+
4f0767ce 28528+out:
1facf9fc 28529+ /* mutex_unlock(&a->dir->i_mutex); */
1facf9fc 28530+ au_whtmp_rmdir_free(a);
027c5e7a
AM
28531+ si_read_unlock(sb);
28532+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 28533+ if (unlikely(err))
28534+ AuIOErr("err %d\n", err);
28535+}
28536+
28537+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
28538+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
28539+{
28540+ int wkq_err;
e49829fe 28541+ struct super_block *sb;
1facf9fc 28542+
28543+ IMustLock(dir);
28544+
28545+ /* all post-process will be done in do_rmdir_whtmp(). */
e49829fe 28546+ sb = dir->i_sb;
1facf9fc 28547+ args->dir = au_igrab(dir);
e49829fe
JR
28548+ args->br = au_sbr(sb, bindex);
28549+ atomic_inc(&args->br->br_count);
1facf9fc 28550+ args->wh_dentry = dget(wh_dentry);
53392da6 28551+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1facf9fc 28552+ if (unlikely(wkq_err)) {
4a4d8108
AM
28553+ pr_warning("rmdir error %.*s (%d), ignored\n",
28554+ AuDLNPair(wh_dentry), wkq_err);
1facf9fc 28555+ au_whtmp_rmdir_free(args);
28556+ }
28557+}
7f207e10
AM
28558diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
28559--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
53392da6 28560+++ linux/fs/aufs/whout.h 2011-08-24 13:30:24.734646739 +0200
7f207e10 28561@@ -0,0 +1,89 @@
1facf9fc 28562+/*
027c5e7a 28563+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 28564+ *
28565+ * This program, aufs is free software; you can redistribute it and/or modify
28566+ * it under the terms of the GNU General Public License as published by
28567+ * the Free Software Foundation; either version 2 of the License, or
28568+ * (at your option) any later version.
dece6358
AM
28569+ *
28570+ * This program is distributed in the hope that it will be useful,
28571+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28572+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28573+ * GNU General Public License for more details.
28574+ *
28575+ * You should have received a copy of the GNU General Public License
28576+ * along with this program; if not, write to the Free Software
28577+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28578+ */
28579+
28580+/*
28581+ * whiteout for logical deletion and opaque directory
28582+ */
28583+
28584+#ifndef __AUFS_WHOUT_H__
28585+#define __AUFS_WHOUT_H__
28586+
28587+#ifdef __KERNEL__
28588+
1facf9fc 28589+#include <linux/aufs_type.h>
28590+#include "dir.h"
28591+
28592+/* whout.c */
28593+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
28594+struct au_branch;
28595+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
28596+ struct au_branch *br, int try_sio);
28597+int au_diropq_test(struct dentry *h_dentry, struct au_branch *br);
28598+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
28599+ struct qstr *prefix);
28600+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
28601+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
28602+ struct dentry *dentry);
28603+int au_wh_init(struct dentry *h_parent, struct au_branch *br,
28604+ struct super_block *sb);
28605+
28606+/* diropq flags */
28607+#define AuDiropq_CREATE 1
28608+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
7f207e10
AM
28609+#define au_fset_diropq(flags, name) \
28610+ do { (flags) |= AuDiropq_##name; } while (0)
28611+#define au_fclr_diropq(flags, name) \
28612+ do { (flags) &= ~AuDiropq_##name; } while (0)
1facf9fc 28613+
28614+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
28615+ unsigned int flags);
28616+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
28617+ struct au_branch *br);
28618+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
28619+ struct dentry *h_parent);
28620+
28621+/* real rmdir for the whiteout-ed dir */
28622+struct au_whtmp_rmdir {
28623+ struct inode *dir;
e49829fe 28624+ struct au_branch *br;
1facf9fc 28625+ struct dentry *wh_dentry;
dece6358 28626+ struct au_nhash whlist;
1facf9fc 28627+};
28628+
28629+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
28630+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
28631+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
28632+ struct dentry *wh_dentry, struct au_nhash *whlist);
28633+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
28634+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
28635+
28636+/* ---------------------------------------------------------------------- */
28637+
28638+static inline struct dentry *au_diropq_create(struct dentry *dentry,
28639+ aufs_bindex_t bindex)
28640+{
28641+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
28642+}
28643+
28644+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
28645+{
28646+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
28647+}
28648+
28649+#endif /* __KERNEL__ */
28650+#endif /* __AUFS_WHOUT_H__ */
7f207e10
AM
28651diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
28652--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
28653+++ linux/fs/aufs/wkq.c 2011-08-24 13:30:24.734646739 +0200
28654@@ -0,0 +1,244 @@
1facf9fc 28655+/*
027c5e7a 28656+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 28657+ *
28658+ * This program, aufs is free software; you can redistribute it and/or modify
28659+ * it under the terms of the GNU General Public License as published by
28660+ * the Free Software Foundation; either version 2 of the License, or
28661+ * (at your option) any later version.
dece6358
AM
28662+ *
28663+ * This program is distributed in the hope that it will be useful,
28664+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28665+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28666+ * GNU General Public License for more details.
28667+ *
28668+ * You should have received a copy of the GNU General Public License
28669+ * along with this program; if not, write to the Free Software
28670+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28671+ */
28672+
28673+/*
28674+ * workqueue for asynchronous/super-io operations
28675+ * todo: try new dredential scheme
28676+ */
28677+
dece6358 28678+#include <linux/module.h>
1facf9fc 28679+#include "aufs.h"
28680+
b752ccd1
AM
28681+/* internal workqueue named AUFS_WKQ_NAME and AUFS_WKQ_PRE_NAME */
28682+enum {
28683+ AuWkq_INORMAL,
28684+ AuWkq_IPRE
28685+};
28686+
28687+static struct {
28688+ char *name;
28689+ struct workqueue_struct *wkq;
28690+} au_wkq[] = {
28691+ [AuWkq_INORMAL] = {
28692+ .name = AUFS_WKQ_NAME
28693+ },
28694+ [AuWkq_IPRE] = {
28695+ .name = AUFS_WKQ_PRE_NAME
28696+ }
28697+};
1facf9fc 28698+
28699+struct au_wkinfo {
28700+ struct work_struct wk;
7f207e10 28701+ struct kobject *kobj;
1facf9fc 28702+
28703+ unsigned int flags; /* see wkq.h */
28704+
28705+ au_wkq_func_t func;
28706+ void *args;
28707+
1facf9fc 28708+ struct completion *comp;
28709+};
28710+
28711+/* ---------------------------------------------------------------------- */
28712+
1facf9fc 28713+static void wkq_func(struct work_struct *wk)
28714+{
28715+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
28716+
7f207e10
AM
28717+ AuDebugOn(current_fsuid());
28718+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
28719+
1facf9fc 28720+ wkinfo->func(wkinfo->args);
1facf9fc 28721+ if (au_ftest_wkq(wkinfo->flags, WAIT))
28722+ complete(wkinfo->comp);
28723+ else {
7f207e10 28724+ kobject_put(wkinfo->kobj);
1facf9fc 28725+ module_put(THIS_MODULE);
28726+ kfree(wkinfo);
28727+ }
28728+}
28729+
28730+/*
28731+ * Since struct completion is large, try allocating it dynamically.
28732+ */
28733+#if defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS)
28734+#define AuWkqCompDeclare(name) struct completion *comp = NULL
28735+
28736+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
28737+{
28738+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
28739+ if (*comp) {
28740+ init_completion(*comp);
28741+ wkinfo->comp = *comp;
28742+ return 0;
28743+ }
28744+ return -ENOMEM;
28745+}
28746+
28747+static void au_wkq_comp_free(struct completion *comp)
28748+{
28749+ kfree(comp);
28750+}
28751+
28752+#else
28753+
28754+/* no braces */
28755+#define AuWkqCompDeclare(name) \
28756+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
28757+ struct completion *comp = &_ ## name
28758+
28759+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
28760+{
28761+ wkinfo->comp = *comp;
28762+ return 0;
28763+}
28764+
28765+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
28766+{
28767+ /* empty */
28768+}
28769+#endif /* 4KSTACKS */
28770+
53392da6 28771+static void au_wkq_run(struct au_wkinfo *wkinfo)
1facf9fc 28772+{
b752ccd1
AM
28773+ struct workqueue_struct *wkq;
28774+
53392da6
AM
28775+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
28776+ if (au_wkq_test()) {
28777+ AuWarn1("wkq from wkq, due to a dead dir by UDBA?\n");
28778+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
28779+ }
28780+ } else
28781+ au_dbg_verify_kthread();
28782+
28783+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
a1f66529 28784+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
b752ccd1 28785+ wkq = au_wkq[AuWkq_INORMAL].wkq;
53392da6 28786+ if (au_ftest_wkq(wkinfo->flags, PRE))
b752ccd1
AM
28787+ wkq = au_wkq[AuWkq_IPRE].wkq;
28788+ queue_work(wkq, &wkinfo->wk);
4a4d8108
AM
28789+ } else {
28790+ INIT_WORK(&wkinfo->wk, wkq_func);
28791+ schedule_work(&wkinfo->wk);
28792+ }
1facf9fc 28793+}
28794+
7f207e10
AM
28795+/*
28796+ * Be careful. It is easy to make deadlock happen.
28797+ * processA: lock, wkq and wait
28798+ * processB: wkq and wait, lock in wkq
28799+ * --> deadlock
28800+ */
b752ccd1 28801+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
1facf9fc 28802+{
28803+ int err;
28804+ AuWkqCompDeclare(comp);
28805+ struct au_wkinfo wkinfo = {
b752ccd1 28806+ .flags = flags,
1facf9fc 28807+ .func = func,
28808+ .args = args
28809+ };
28810+
28811+ err = au_wkq_comp_alloc(&wkinfo, &comp);
28812+ if (!err) {
53392da6 28813+ au_wkq_run(&wkinfo);
1facf9fc 28814+ /* no timeout, no interrupt */
28815+ wait_for_completion(wkinfo.comp);
28816+ au_wkq_comp_free(comp);
4a4d8108 28817+ destroy_work_on_stack(&wkinfo.wk);
1facf9fc 28818+ }
28819+
28820+ return err;
28821+
28822+}
28823+
027c5e7a
AM
28824+/*
28825+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
28826+ * problem in a concurrent umounting.
28827+ */
53392da6
AM
28828+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
28829+ unsigned int flags)
1facf9fc 28830+{
28831+ int err;
28832+ struct au_wkinfo *wkinfo;
28833+
28834+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
28835+
28836+ /*
28837+ * wkq_func() must free this wkinfo.
28838+ * it highly depends upon the implementation of workqueue.
28839+ */
28840+ err = 0;
28841+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
28842+ if (wkinfo) {
7f207e10 28843+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
53392da6 28844+ wkinfo->flags = flags & ~AuWkq_WAIT;
1facf9fc 28845+ wkinfo->func = func;
28846+ wkinfo->args = args;
28847+ wkinfo->comp = NULL;
7f207e10 28848+ kobject_get(wkinfo->kobj);
1facf9fc 28849+ __module_get(THIS_MODULE);
28850+
53392da6 28851+ au_wkq_run(wkinfo);
1facf9fc 28852+ } else {
28853+ err = -ENOMEM;
e49829fe 28854+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 28855+ }
28856+
28857+ return err;
28858+}
28859+
28860+/* ---------------------------------------------------------------------- */
28861+
28862+void au_nwt_init(struct au_nowait_tasks *nwt)
28863+{
28864+ atomic_set(&nwt->nw_len, 0);
4a4d8108 28865+ /* smp_mb(); */ /* atomic_set */
1facf9fc 28866+ init_waitqueue_head(&nwt->nw_wq);
28867+}
28868+
28869+void au_wkq_fin(void)
28870+{
b752ccd1
AM
28871+ int i;
28872+
28873+ for (i = 0; i < ARRAY_SIZE(au_wkq); i++)
28874+ if (au_wkq[i].wkq)
28875+ destroy_workqueue(au_wkq[i].wkq);
1facf9fc 28876+}
28877+
28878+int __init au_wkq_init(void)
28879+{
b752ccd1
AM
28880+ int err, i;
28881+
28882+ err = 0;
28883+ for (i = 0; !err && i < ARRAY_SIZE(au_wkq); i++) {
7f207e10
AM
28884+ BUILD_BUG_ON(!WQ_RESCUER);
28885+ au_wkq[i].wkq = alloc_workqueue(au_wkq[i].name, !WQ_RESCUER,
28886+ WQ_DFL_ACTIVE);
b752ccd1
AM
28887+ if (IS_ERR(au_wkq[i].wkq))
28888+ err = PTR_ERR(au_wkq[i].wkq);
28889+ else if (!au_wkq[i].wkq)
28890+ err = -ENOMEM;
28891+ if (unlikely(err))
28892+ au_wkq[i].wkq = NULL;
28893+ }
7f207e10 28894+ if (unlikely(err))
b752ccd1
AM
28895+ au_wkq_fin();
28896+
28897+ return err;
1facf9fc 28898+}
7f207e10
AM
28899diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
28900--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
28901+++ linux/fs/aufs/wkq.h 2011-08-24 13:30:24.737979976 +0200
28902@@ -0,0 +1,101 @@
1facf9fc 28903+/*
027c5e7a 28904+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 28905+ *
28906+ * This program, aufs is free software; you can redistribute it and/or modify
28907+ * it under the terms of the GNU General Public License as published by
28908+ * the Free Software Foundation; either version 2 of the License, or
28909+ * (at your option) any later version.
dece6358
AM
28910+ *
28911+ * This program is distributed in the hope that it will be useful,
28912+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28913+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28914+ * GNU General Public License for more details.
28915+ *
28916+ * You should have received a copy of the GNU General Public License
28917+ * along with this program; if not, write to the Free Software
28918+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28919+ */
28920+
28921+/*
28922+ * workqueue for asynchronous/super-io operations
28923+ * todo: try new credentials management scheme
28924+ */
28925+
28926+#ifndef __AUFS_WKQ_H__
28927+#define __AUFS_WKQ_H__
28928+
28929+#ifdef __KERNEL__
28930+
1facf9fc 28931+#include <linux/sched.h>
dece6358 28932+#include <linux/wait.h>
1facf9fc 28933+#include <linux/aufs_type.h>
28934+
dece6358
AM
28935+struct super_block;
28936+
1facf9fc 28937+/* ---------------------------------------------------------------------- */
28938+
28939+/*
28940+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
28941+ */
28942+struct au_nowait_tasks {
28943+ atomic_t nw_len;
28944+ wait_queue_head_t nw_wq;
28945+};
28946+
28947+/* ---------------------------------------------------------------------- */
28948+
28949+typedef void (*au_wkq_func_t)(void *args);
28950+
28951+/* wkq flags */
28952+#define AuWkq_WAIT 1
b752ccd1 28953+#define AuWkq_PRE (1 << 1)
53392da6
AM
28954+#ifdef CONFIG_AUFS_HNOTIFY
28955+#define AuWkq_NEST (1 << 2)
28956+#else
28957+#define AuWkq_NEST 0
28958+#endif
1facf9fc 28959+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
7f207e10
AM
28960+#define au_fset_wkq(flags, name) \
28961+ do { (flags) |= AuWkq_##name; } while (0)
28962+#define au_fclr_wkq(flags, name) \
28963+ do { (flags) &= ~AuWkq_##name; } while (0)
1facf9fc 28964+
28965+/* wkq.c */
b752ccd1 28966+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
53392da6
AM
28967+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
28968+ unsigned int flags);
1facf9fc 28969+void au_nwt_init(struct au_nowait_tasks *nwt);
28970+int __init au_wkq_init(void);
28971+void au_wkq_fin(void);
28972+
28973+/* ---------------------------------------------------------------------- */
28974+
53392da6
AM
28975+static inline int au_wkq_test(void)
28976+{
28977+ return current->flags & PF_WQ_WORKER;
28978+}
28979+
b752ccd1
AM
28980+static inline int au_wkq_wait_pre(au_wkq_func_t func, void *args)
28981+{
28982+ return au_wkq_do_wait(AuWkq_WAIT | AuWkq_PRE, func, args);
28983+}
28984+
28985+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
1facf9fc 28986+{
b752ccd1 28987+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
1facf9fc 28988+}
28989+
28990+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
28991+{
e49829fe 28992+ if (atomic_dec_and_test(&nwt->nw_len))
1facf9fc 28993+ wake_up_all(&nwt->nw_wq);
28994+}
28995+
28996+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
28997+{
28998+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
28999+ return 0;
29000+}
29001+
29002+#endif /* __KERNEL__ */
29003+#endif /* __AUFS_WKQ_H__ */
7f207e10
AM
29004diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
29005--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
53392da6 29006+++ linux/fs/aufs/xino.c 2011-08-24 13:30:24.737979976 +0200
7f207e10 29007@@ -0,0 +1,1265 @@
1facf9fc 29008+/*
027c5e7a 29009+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 29010+ *
29011+ * This program, aufs is free software; you can redistribute it and/or modify
29012+ * it under the terms of the GNU General Public License as published by
29013+ * the Free Software Foundation; either version 2 of the License, or
29014+ * (at your option) any later version.
dece6358
AM
29015+ *
29016+ * This program is distributed in the hope that it will be useful,
29017+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29018+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29019+ * GNU General Public License for more details.
29020+ *
29021+ * You should have received a copy of the GNU General Public License
29022+ * along with this program; if not, write to the Free Software
29023+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 29024+ */
29025+
29026+/*
29027+ * external inode number translation table and bitmap
29028+ */
29029+
dece6358 29030+#include <linux/file.h>
1facf9fc 29031+#include <linux/seq_file.h>
29032+#include <linux/uaccess.h>
29033+#include "aufs.h"
29034+
b752ccd1 29035+ssize_t xino_fread(au_readf_t func, struct file *file, void *kbuf, size_t size,
1facf9fc 29036+ loff_t *pos)
29037+{
29038+ ssize_t err;
29039+ mm_segment_t oldfs;
b752ccd1
AM
29040+ union {
29041+ void *k;
29042+ char __user *u;
29043+ } buf;
1facf9fc 29044+
b752ccd1 29045+ buf.k = kbuf;
1facf9fc 29046+ oldfs = get_fs();
29047+ set_fs(KERNEL_DS);
29048+ do {
29049+ /* todo: signal_pending? */
b752ccd1 29050+ err = func(file, buf.u, size, pos);
1facf9fc 29051+ } while (err == -EAGAIN || err == -EINTR);
29052+ set_fs(oldfs);
29053+
29054+#if 0 /* reserved for future use */
29055+ if (err > 0)
29056+ fsnotify_access(file->f_dentry);
29057+#endif
29058+
29059+ return err;
29060+}
29061+
29062+/* ---------------------------------------------------------------------- */
29063+
b752ccd1 29064+static ssize_t do_xino_fwrite(au_writef_t func, struct file *file, void *kbuf,
1facf9fc 29065+ size_t size, loff_t *pos)
29066+{
29067+ ssize_t err;
29068+ mm_segment_t oldfs;
b752ccd1
AM
29069+ union {
29070+ void *k;
29071+ const char __user *u;
29072+ } buf;
1facf9fc 29073+
b752ccd1 29074+ buf.k = kbuf;
1facf9fc 29075+ oldfs = get_fs();
29076+ set_fs(KERNEL_DS);
1facf9fc 29077+ do {
29078+ /* todo: signal_pending? */
b752ccd1 29079+ err = func(file, buf.u, size, pos);
1facf9fc 29080+ } while (err == -EAGAIN || err == -EINTR);
1facf9fc 29081+ set_fs(oldfs);
29082+
29083+#if 0 /* reserved for future use */
29084+ if (err > 0)
29085+ fsnotify_modify(file->f_dentry);
29086+#endif
29087+
29088+ return err;
29089+}
29090+
29091+struct do_xino_fwrite_args {
29092+ ssize_t *errp;
29093+ au_writef_t func;
29094+ struct file *file;
29095+ void *buf;
29096+ size_t size;
29097+ loff_t *pos;
29098+};
29099+
29100+static void call_do_xino_fwrite(void *args)
29101+{
29102+ struct do_xino_fwrite_args *a = args;
29103+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
29104+}
29105+
29106+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
29107+ loff_t *pos)
29108+{
29109+ ssize_t err;
29110+
29111+ /* todo: signal block and no wkq? */
b752ccd1
AM
29112+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
29113+ lockdep_off();
29114+ err = do_xino_fwrite(func, file, buf, size, pos);
29115+ lockdep_on();
29116+ } else {
29117+ /*
29118+ * it breaks RLIMIT_FSIZE and normal user's limit,
29119+ * users should care about quota and real 'filesystem full.'
29120+ */
1facf9fc 29121+ int wkq_err;
29122+ struct do_xino_fwrite_args args = {
29123+ .errp = &err,
29124+ .func = func,
29125+ .file = file,
29126+ .buf = buf,
29127+ .size = size,
29128+ .pos = pos
29129+ };
29130+
29131+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
29132+ if (unlikely(wkq_err))
29133+ err = wkq_err;
b752ccd1 29134+ }
1facf9fc 29135+
29136+ return err;
29137+}
29138+
29139+/* ---------------------------------------------------------------------- */
29140+
29141+/*
29142+ * create a new xinofile at the same place/path as @base_file.
29143+ */
29144+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
29145+{
29146+ struct file *file;
4a4d8108 29147+ struct dentry *base, *parent;
1facf9fc 29148+ struct inode *dir;
29149+ struct qstr *name;
1308ab2a 29150+ struct path path;
4a4d8108 29151+ int err;
1facf9fc 29152+
29153+ base = base_file->f_dentry;
29154+ parent = base->d_parent; /* dir inode is locked */
29155+ dir = parent->d_inode;
29156+ IMustLock(dir);
29157+
29158+ file = ERR_PTR(-EINVAL);
29159+ name = &base->d_name;
4a4d8108
AM
29160+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
29161+ if (IS_ERR(path.dentry)) {
29162+ file = (void *)path.dentry;
29163+ pr_err("%.*s lookup err %ld\n",
29164+ AuLNPair(name), PTR_ERR(path.dentry));
1facf9fc 29165+ goto out;
29166+ }
29167+
29168+ /* no need to mnt_want_write() since we call dentry_open() later */
4a4d8108 29169+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
1facf9fc 29170+ if (unlikely(err)) {
29171+ file = ERR_PTR(err);
4a4d8108 29172+ pr_err("%.*s create err %d\n", AuLNPair(name), err);
1facf9fc 29173+ goto out_dput;
29174+ }
29175+
1308ab2a 29176+ path.mnt = base_file->f_vfsmnt;
4a4d8108 29177+ file = vfsub_dentry_open(&path,
7f207e10 29178+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 29179+ /* | __FMODE_NONOTIFY */);
1facf9fc 29180+ if (IS_ERR(file)) {
4a4d8108 29181+ pr_err("%.*s open err %ld\n", AuLNPair(name), PTR_ERR(file));
1facf9fc 29182+ goto out_dput;
29183+ }
29184+
29185+ err = vfsub_unlink(dir, &file->f_path, /*force*/0);
29186+ if (unlikely(err)) {
4a4d8108 29187+ pr_err("%.*s unlink err %d\n", AuLNPair(name), err);
1facf9fc 29188+ goto out_fput;
29189+ }
29190+
29191+ if (copy_src) {
29192+ /* no one can touch copy_src xino */
29193+ err = au_copy_file(file, copy_src,
29194+ i_size_read(copy_src->f_dentry->d_inode));
29195+ if (unlikely(err)) {
4a4d8108 29196+ pr_err("%.*s copy err %d\n", AuLNPair(name), err);
1facf9fc 29197+ goto out_fput;
29198+ }
29199+ }
29200+ goto out_dput; /* success */
29201+
4f0767ce 29202+out_fput:
1facf9fc 29203+ fput(file);
29204+ file = ERR_PTR(err);
4f0767ce 29205+out_dput:
4a4d8108 29206+ dput(path.dentry);
4f0767ce 29207+out:
1facf9fc 29208+ return file;
29209+}
29210+
29211+struct au_xino_lock_dir {
29212+ struct au_hinode *hdir;
29213+ struct dentry *parent;
29214+ struct mutex *mtx;
29215+};
29216+
29217+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
29218+ struct au_xino_lock_dir *ldir)
29219+{
29220+ aufs_bindex_t brid, bindex;
29221+
29222+ ldir->hdir = NULL;
29223+ bindex = -1;
29224+ brid = au_xino_brid(sb);
29225+ if (brid >= 0)
29226+ bindex = au_br_index(sb, brid);
29227+ if (bindex >= 0) {
29228+ ldir->hdir = au_hi(sb->s_root->d_inode, bindex);
4a4d8108 29229+ au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
1facf9fc 29230+ } else {
29231+ ldir->parent = dget_parent(xino->f_dentry);
29232+ ldir->mtx = &ldir->parent->d_inode->i_mutex;
29233+ mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
29234+ }
29235+}
29236+
29237+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
29238+{
29239+ if (ldir->hdir)
4a4d8108 29240+ au_hn_imtx_unlock(ldir->hdir);
1facf9fc 29241+ else {
29242+ mutex_unlock(ldir->mtx);
29243+ dput(ldir->parent);
29244+ }
29245+}
29246+
29247+/* ---------------------------------------------------------------------- */
29248+
29249+/* trucate xino files asynchronously */
29250+
29251+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
29252+{
29253+ int err;
29254+ aufs_bindex_t bi, bend;
29255+ struct au_branch *br;
29256+ struct file *new_xino, *file;
29257+ struct super_block *h_sb;
29258+ struct au_xino_lock_dir ldir;
29259+
29260+ err = -EINVAL;
29261+ bend = au_sbend(sb);
29262+ if (unlikely(bindex < 0 || bend < bindex))
29263+ goto out;
29264+ br = au_sbr(sb, bindex);
29265+ file = br->br_xino.xi_file;
29266+ if (!file)
29267+ goto out;
29268+
29269+ au_xino_lock_dir(sb, file, &ldir);
29270+ /* mnt_want_write() is unnecessary here */
29271+ new_xino = au_xino_create2(file, file);
29272+ au_xino_unlock_dir(&ldir);
29273+ err = PTR_ERR(new_xino);
29274+ if (IS_ERR(new_xino))
29275+ goto out;
29276+ err = 0;
29277+ fput(file);
29278+ br->br_xino.xi_file = new_xino;
29279+
29280+ h_sb = br->br_mnt->mnt_sb;
29281+ for (bi = 0; bi <= bend; bi++) {
29282+ if (unlikely(bi == bindex))
29283+ continue;
29284+ br = au_sbr(sb, bi);
29285+ if (br->br_mnt->mnt_sb != h_sb)
29286+ continue;
29287+
29288+ fput(br->br_xino.xi_file);
29289+ br->br_xino.xi_file = new_xino;
29290+ get_file(new_xino);
29291+ }
29292+
4f0767ce 29293+out:
1facf9fc 29294+ return err;
29295+}
29296+
29297+struct xino_do_trunc_args {
29298+ struct super_block *sb;
29299+ struct au_branch *br;
29300+};
29301+
29302+static void xino_do_trunc(void *_args)
29303+{
29304+ struct xino_do_trunc_args *args = _args;
29305+ struct super_block *sb;
29306+ struct au_branch *br;
29307+ struct inode *dir;
29308+ int err;
29309+ aufs_bindex_t bindex;
29310+
29311+ err = 0;
29312+ sb = args->sb;
29313+ dir = sb->s_root->d_inode;
29314+ br = args->br;
29315+
29316+ si_noflush_write_lock(sb);
29317+ ii_read_lock_parent(dir);
29318+ bindex = au_br_index(sb, br->br_id);
29319+ err = au_xino_trunc(sb, bindex);
dece6358
AM
29320+ if (!err
29321+ && br->br_xino.xi_file->f_dentry->d_inode->i_blocks
1facf9fc 29322+ >= br->br_xino_upper)
29323+ br->br_xino_upper += AUFS_XINO_TRUNC_STEP;
29324+
1facf9fc 29325+ ii_read_unlock(dir);
29326+ if (unlikely(err))
4a4d8108 29327+ pr_warning("err b%d, (%d)\n", bindex, err);
1facf9fc 29328+ atomic_dec(&br->br_xino_running);
29329+ atomic_dec(&br->br_count);
1facf9fc 29330+ si_write_unlock(sb);
027c5e7a 29331+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 29332+ kfree(args);
29333+}
29334+
29335+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
29336+{
29337+ struct xino_do_trunc_args *args;
29338+ int wkq_err;
29339+
29340+ if (br->br_xino.xi_file->f_dentry->d_inode->i_blocks
29341+ < br->br_xino_upper)
29342+ return;
29343+
29344+ if (atomic_inc_return(&br->br_xino_running) > 1)
29345+ goto out;
29346+
29347+ /* lock and kfree() will be called in trunc_xino() */
29348+ args = kmalloc(sizeof(*args), GFP_NOFS);
29349+ if (unlikely(!args)) {
29350+ AuErr1("no memory\n");
29351+ goto out_args;
29352+ }
29353+
e49829fe 29354+ atomic_inc(&br->br_count);
1facf9fc 29355+ args->sb = sb;
29356+ args->br = br;
53392da6 29357+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
1facf9fc 29358+ if (!wkq_err)
29359+ return; /* success */
29360+
4a4d8108 29361+ pr_err("wkq %d\n", wkq_err);
e49829fe 29362+ atomic_dec(&br->br_count);
1facf9fc 29363+
4f0767ce 29364+out_args:
1facf9fc 29365+ kfree(args);
4f0767ce 29366+out:
e49829fe 29367+ atomic_dec(&br->br_xino_running);
1facf9fc 29368+}
29369+
29370+/* ---------------------------------------------------------------------- */
29371+
29372+static int au_xino_do_write(au_writef_t write, struct file *file,
29373+ ino_t h_ino, ino_t ino)
29374+{
29375+ loff_t pos;
29376+ ssize_t sz;
29377+
29378+ pos = h_ino;
29379+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
29380+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
29381+ return -EFBIG;
29382+ }
29383+ pos *= sizeof(ino);
29384+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
29385+ if (sz == sizeof(ino))
29386+ return 0; /* success */
29387+
29388+ AuIOErr("write failed (%zd)\n", sz);
29389+ return -EIO;
29390+}
29391+
29392+/*
29393+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
29394+ * at the position of @h_ino.
29395+ * even if @ino is zero, it is written to the xinofile and means no entry.
29396+ * if the size of the xino file on a specific filesystem exceeds the watermark,
29397+ * try truncating it.
29398+ */
29399+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
29400+ ino_t ino)
29401+{
29402+ int err;
29403+ unsigned int mnt_flags;
29404+ struct au_branch *br;
29405+
29406+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
29407+ || ((loff_t)-1) > 0);
dece6358 29408+ SiMustAnyLock(sb);
1facf9fc 29409+
29410+ mnt_flags = au_mntflags(sb);
29411+ if (!au_opt_test(mnt_flags, XINO))
29412+ return 0;
29413+
29414+ br = au_sbr(sb, bindex);
29415+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
29416+ h_ino, ino);
29417+ if (!err) {
29418+ if (au_opt_test(mnt_flags, TRUNC_XINO)
29419+ && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
29420+ xino_try_trunc(sb, br);
29421+ return 0; /* success */
29422+ }
29423+
29424+ AuIOErr("write failed (%d)\n", err);
29425+ return -EIO;
29426+}
29427+
29428+/* ---------------------------------------------------------------------- */
29429+
29430+/* aufs inode number bitmap */
29431+
29432+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
29433+static ino_t xib_calc_ino(unsigned long pindex, int bit)
29434+{
29435+ ino_t ino;
29436+
29437+ AuDebugOn(bit < 0 || page_bits <= bit);
29438+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
29439+ return ino;
29440+}
29441+
29442+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
29443+{
29444+ AuDebugOn(ino < AUFS_FIRST_INO);
29445+ ino -= AUFS_FIRST_INO;
29446+ *pindex = ino / page_bits;
29447+ *bit = ino % page_bits;
29448+}
29449+
29450+static int xib_pindex(struct super_block *sb, unsigned long pindex)
29451+{
29452+ int err;
29453+ loff_t pos;
29454+ ssize_t sz;
29455+ struct au_sbinfo *sbinfo;
29456+ struct file *xib;
29457+ unsigned long *p;
29458+
29459+ sbinfo = au_sbi(sb);
29460+ MtxMustLock(&sbinfo->si_xib_mtx);
29461+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
29462+ || !au_opt_test(sbinfo->si_mntflags, XINO));
29463+
29464+ if (pindex == sbinfo->si_xib_last_pindex)
29465+ return 0;
29466+
29467+ xib = sbinfo->si_xib;
29468+ p = sbinfo->si_xib_buf;
29469+ pos = sbinfo->si_xib_last_pindex;
29470+ pos *= PAGE_SIZE;
29471+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
29472+ if (unlikely(sz != PAGE_SIZE))
29473+ goto out;
29474+
29475+ pos = pindex;
29476+ pos *= PAGE_SIZE;
29477+ if (i_size_read(xib->f_dentry->d_inode) >= pos + PAGE_SIZE)
29478+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
29479+ else {
29480+ memset(p, 0, PAGE_SIZE);
29481+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
29482+ }
29483+ if (sz == PAGE_SIZE) {
29484+ sbinfo->si_xib_last_pindex = pindex;
29485+ return 0; /* success */
29486+ }
29487+
4f0767ce 29488+out:
b752ccd1
AM
29489+ AuIOErr1("write failed (%zd)\n", sz);
29490+ err = sz;
29491+ if (sz >= 0)
29492+ err = -EIO;
29493+ return err;
29494+}
29495+
29496+/* ---------------------------------------------------------------------- */
29497+
29498+static void au_xib_clear_bit(struct inode *inode)
29499+{
29500+ int err, bit;
29501+ unsigned long pindex;
29502+ struct super_block *sb;
29503+ struct au_sbinfo *sbinfo;
29504+
29505+ AuDebugOn(inode->i_nlink);
29506+
29507+ sb = inode->i_sb;
29508+ xib_calc_bit(inode->i_ino, &pindex, &bit);
29509+ AuDebugOn(page_bits <= bit);
29510+ sbinfo = au_sbi(sb);
29511+ mutex_lock(&sbinfo->si_xib_mtx);
29512+ err = xib_pindex(sb, pindex);
29513+ if (!err) {
29514+ clear_bit(bit, sbinfo->si_xib_buf);
29515+ sbinfo->si_xib_next_bit = bit;
29516+ }
29517+ mutex_unlock(&sbinfo->si_xib_mtx);
29518+}
29519+
29520+/* for s_op->delete_inode() */
29521+void au_xino_delete_inode(struct inode *inode, const int unlinked)
29522+{
29523+ int err;
29524+ unsigned int mnt_flags;
29525+ aufs_bindex_t bindex, bend, bi;
29526+ unsigned char try_trunc;
29527+ struct au_iinfo *iinfo;
29528+ struct super_block *sb;
29529+ struct au_hinode *hi;
29530+ struct inode *h_inode;
29531+ struct au_branch *br;
29532+ au_writef_t xwrite;
29533+
29534+ sb = inode->i_sb;
29535+ mnt_flags = au_mntflags(sb);
29536+ if (!au_opt_test(mnt_flags, XINO)
29537+ || inode->i_ino == AUFS_ROOT_INO)
29538+ return;
29539+
29540+ if (unlinked) {
29541+ au_xigen_inc(inode);
29542+ au_xib_clear_bit(inode);
29543+ }
29544+
29545+ iinfo = au_ii(inode);
29546+ if (!iinfo)
29547+ return;
1facf9fc 29548+
b752ccd1
AM
29549+ bindex = iinfo->ii_bstart;
29550+ if (bindex < 0)
29551+ return;
1facf9fc 29552+
b752ccd1
AM
29553+ xwrite = au_sbi(sb)->si_xwrite;
29554+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
29555+ hi = iinfo->ii_hinode + bindex;
29556+ bend = iinfo->ii_bend;
29557+ for (; bindex <= bend; bindex++, hi++) {
29558+ h_inode = hi->hi_inode;
29559+ if (!h_inode
29560+ || (!unlinked && h_inode->i_nlink))
29561+ continue;
1facf9fc 29562+
b752ccd1
AM
29563+ /* inode may not be revalidated */
29564+ bi = au_br_index(sb, hi->hi_id);
29565+ if (bi < 0)
29566+ continue;
1facf9fc 29567+
b752ccd1
AM
29568+ br = au_sbr(sb, bi);
29569+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
29570+ h_inode->i_ino, /*ino*/0);
29571+ if (!err && try_trunc
29572+ && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
29573+ xino_try_trunc(sb, br);
1facf9fc 29574+ }
1facf9fc 29575+}
29576+
29577+/* get an unused inode number from bitmap */
29578+ino_t au_xino_new_ino(struct super_block *sb)
29579+{
29580+ ino_t ino;
29581+ unsigned long *p, pindex, ul, pend;
29582+ struct au_sbinfo *sbinfo;
29583+ struct file *file;
29584+ int free_bit, err;
29585+
29586+ if (!au_opt_test(au_mntflags(sb), XINO))
29587+ return iunique(sb, AUFS_FIRST_INO);
29588+
29589+ sbinfo = au_sbi(sb);
29590+ mutex_lock(&sbinfo->si_xib_mtx);
29591+ p = sbinfo->si_xib_buf;
29592+ free_bit = sbinfo->si_xib_next_bit;
29593+ if (free_bit < page_bits && !test_bit(free_bit, p))
29594+ goto out; /* success */
29595+ free_bit = find_first_zero_bit(p, page_bits);
29596+ if (free_bit < page_bits)
29597+ goto out; /* success */
29598+
29599+ pindex = sbinfo->si_xib_last_pindex;
29600+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
29601+ err = xib_pindex(sb, ul);
29602+ if (unlikely(err))
29603+ goto out_err;
29604+ free_bit = find_first_zero_bit(p, page_bits);
29605+ if (free_bit < page_bits)
29606+ goto out; /* success */
29607+ }
29608+
29609+ file = sbinfo->si_xib;
29610+ pend = i_size_read(file->f_dentry->d_inode) / PAGE_SIZE;
29611+ for (ul = pindex + 1; ul <= pend; ul++) {
29612+ err = xib_pindex(sb, ul);
29613+ if (unlikely(err))
29614+ goto out_err;
29615+ free_bit = find_first_zero_bit(p, page_bits);
29616+ if (free_bit < page_bits)
29617+ goto out; /* success */
29618+ }
29619+ BUG();
29620+
4f0767ce 29621+out:
1facf9fc 29622+ set_bit(free_bit, p);
7f207e10 29623+ sbinfo->si_xib_next_bit = free_bit + 1;
1facf9fc 29624+ pindex = sbinfo->si_xib_last_pindex;
29625+ mutex_unlock(&sbinfo->si_xib_mtx);
29626+ ino = xib_calc_ino(pindex, free_bit);
29627+ AuDbg("i%lu\n", (unsigned long)ino);
29628+ return ino;
4f0767ce 29629+out_err:
1facf9fc 29630+ mutex_unlock(&sbinfo->si_xib_mtx);
29631+ AuDbg("i0\n");
29632+ return 0;
29633+}
29634+
29635+/*
29636+ * read @ino from xinofile for the specified branch{@sb, @bindex}
29637+ * at the position of @h_ino.
29638+ * if @ino does not exist and @do_new is true, get new one.
29639+ */
29640+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
29641+ ino_t *ino)
29642+{
29643+ int err;
29644+ ssize_t sz;
29645+ loff_t pos;
29646+ struct file *file;
29647+ struct au_sbinfo *sbinfo;
29648+
29649+ *ino = 0;
29650+ if (!au_opt_test(au_mntflags(sb), XINO))
29651+ return 0; /* no xino */
29652+
29653+ err = 0;
29654+ sbinfo = au_sbi(sb);
29655+ pos = h_ino;
29656+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
29657+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
29658+ return -EFBIG;
29659+ }
29660+ pos *= sizeof(*ino);
29661+
29662+ file = au_sbr(sb, bindex)->br_xino.xi_file;
29663+ if (i_size_read(file->f_dentry->d_inode) < pos + sizeof(*ino))
29664+ return 0; /* no ino */
29665+
29666+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
29667+ if (sz == sizeof(*ino))
29668+ return 0; /* success */
29669+
29670+ err = sz;
29671+ if (unlikely(sz >= 0)) {
29672+ err = -EIO;
29673+ AuIOErr("xino read error (%zd)\n", sz);
29674+ }
29675+
29676+ return err;
29677+}
29678+
29679+/* ---------------------------------------------------------------------- */
29680+
29681+/* create and set a new xino file */
29682+
29683+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
29684+{
29685+ struct file *file;
29686+ struct dentry *h_parent, *d;
29687+ struct inode *h_dir;
29688+ int err;
29689+
29690+ /*
29691+ * at mount-time, and the xino file is the default path,
4a4d8108 29692+ * hnotify is disabled so we have no notify events to ignore.
1facf9fc 29693+ * when a user specified the xino, we cannot get au_hdir to be ignored.
29694+ */
7f207e10 29695+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 29696+ /* | __FMODE_NONOTIFY */,
1facf9fc 29697+ S_IRUGO | S_IWUGO);
29698+ if (IS_ERR(file)) {
29699+ if (!silent)
4a4d8108 29700+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
1facf9fc 29701+ return file;
29702+ }
29703+
29704+ /* keep file count */
29705+ h_parent = dget_parent(file->f_dentry);
29706+ h_dir = h_parent->d_inode;
29707+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
29708+ /* mnt_want_write() is unnecessary here */
29709+ err = vfsub_unlink(h_dir, &file->f_path, /*force*/0);
29710+ mutex_unlock(&h_dir->i_mutex);
29711+ dput(h_parent);
29712+ if (unlikely(err)) {
29713+ if (!silent)
4a4d8108 29714+ pr_err("unlink %s(%d)\n", fname, err);
1facf9fc 29715+ goto out;
29716+ }
29717+
29718+ err = -EINVAL;
29719+ d = file->f_dentry;
29720+ if (unlikely(sb == d->d_sb)) {
29721+ if (!silent)
4a4d8108 29722+ pr_err("%s must be outside\n", fname);
1facf9fc 29723+ goto out;
29724+ }
29725+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
29726+ if (!silent)
4a4d8108
AM
29727+ pr_err("xino doesn't support %s(%s)\n",
29728+ fname, au_sbtype(d->d_sb));
1facf9fc 29729+ goto out;
29730+ }
29731+ return file; /* success */
29732+
4f0767ce 29733+out:
1facf9fc 29734+ fput(file);
29735+ file = ERR_PTR(err);
29736+ return file;
29737+}
29738+
29739+/*
29740+ * find another branch who is on the same filesystem of the specified
29741+ * branch{@btgt}. search until @bend.
29742+ */
29743+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
29744+ aufs_bindex_t bend)
29745+{
29746+ aufs_bindex_t bindex;
29747+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
29748+
29749+ for (bindex = 0; bindex < btgt; bindex++)
29750+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
29751+ return bindex;
29752+ for (bindex++; bindex <= bend; bindex++)
29753+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
29754+ return bindex;
29755+ return -1;
29756+}
29757+
29758+/* ---------------------------------------------------------------------- */
29759+
29760+/*
29761+ * initialize the xinofile for the specified branch @br
29762+ * at the place/path where @base_file indicates.
29763+ * test whether another branch is on the same filesystem or not,
29764+ * if @do_test is true.
29765+ */
29766+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
29767+ struct file *base_file, int do_test)
29768+{
29769+ int err;
29770+ ino_t ino;
29771+ aufs_bindex_t bend, bindex;
29772+ struct au_branch *shared_br, *b;
29773+ struct file *file;
29774+ struct super_block *tgt_sb;
29775+
29776+ shared_br = NULL;
29777+ bend = au_sbend(sb);
29778+ if (do_test) {
29779+ tgt_sb = br->br_mnt->mnt_sb;
29780+ for (bindex = 0; bindex <= bend; bindex++) {
29781+ b = au_sbr(sb, bindex);
29782+ if (tgt_sb == b->br_mnt->mnt_sb) {
29783+ shared_br = b;
29784+ break;
29785+ }
29786+ }
29787+ }
29788+
29789+ if (!shared_br || !shared_br->br_xino.xi_file) {
29790+ struct au_xino_lock_dir ldir;
29791+
29792+ au_xino_lock_dir(sb, base_file, &ldir);
29793+ /* mnt_want_write() is unnecessary here */
29794+ file = au_xino_create2(base_file, NULL);
29795+ au_xino_unlock_dir(&ldir);
29796+ err = PTR_ERR(file);
29797+ if (IS_ERR(file))
29798+ goto out;
29799+ br->br_xino.xi_file = file;
29800+ } else {
29801+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
29802+ get_file(br->br_xino.xi_file);
29803+ }
29804+
29805+ ino = AUFS_ROOT_INO;
29806+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
29807+ h_ino, ino);
b752ccd1
AM
29808+ if (unlikely(err)) {
29809+ fput(br->br_xino.xi_file);
29810+ br->br_xino.xi_file = NULL;
29811+ }
1facf9fc 29812+
4f0767ce 29813+out:
1facf9fc 29814+ return err;
29815+}
29816+
29817+/* ---------------------------------------------------------------------- */
29818+
29819+/* trucate a xino bitmap file */
29820+
29821+/* todo: slow */
29822+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
29823+{
29824+ int err, bit;
29825+ ssize_t sz;
29826+ unsigned long pindex;
29827+ loff_t pos, pend;
29828+ struct au_sbinfo *sbinfo;
29829+ au_readf_t func;
29830+ ino_t *ino;
29831+ unsigned long *p;
29832+
29833+ err = 0;
29834+ sbinfo = au_sbi(sb);
dece6358 29835+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 29836+ p = sbinfo->si_xib_buf;
29837+ func = sbinfo->si_xread;
29838+ pend = i_size_read(file->f_dentry->d_inode);
29839+ pos = 0;
29840+ while (pos < pend) {
29841+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
29842+ err = sz;
29843+ if (unlikely(sz <= 0))
29844+ goto out;
29845+
29846+ err = 0;
29847+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
29848+ if (unlikely(*ino < AUFS_FIRST_INO))
29849+ continue;
29850+
29851+ xib_calc_bit(*ino, &pindex, &bit);
29852+ AuDebugOn(page_bits <= bit);
29853+ err = xib_pindex(sb, pindex);
29854+ if (!err)
29855+ set_bit(bit, p);
29856+ else
29857+ goto out;
29858+ }
29859+ }
29860+
4f0767ce 29861+out:
1facf9fc 29862+ return err;
29863+}
29864+
29865+static int xib_restore(struct super_block *sb)
29866+{
29867+ int err;
29868+ aufs_bindex_t bindex, bend;
29869+ void *page;
29870+
29871+ err = -ENOMEM;
29872+ page = (void *)__get_free_page(GFP_NOFS);
29873+ if (unlikely(!page))
29874+ goto out;
29875+
29876+ err = 0;
29877+ bend = au_sbend(sb);
29878+ for (bindex = 0; !err && bindex <= bend; bindex++)
29879+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
29880+ err = do_xib_restore
29881+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
29882+ else
29883+ AuDbg("b%d\n", bindex);
29884+ free_page((unsigned long)page);
29885+
4f0767ce 29886+out:
1facf9fc 29887+ return err;
29888+}
29889+
29890+int au_xib_trunc(struct super_block *sb)
29891+{
29892+ int err;
29893+ ssize_t sz;
29894+ loff_t pos;
29895+ struct au_xino_lock_dir ldir;
29896+ struct au_sbinfo *sbinfo;
29897+ unsigned long *p;
29898+ struct file *file;
29899+
dece6358
AM
29900+ SiMustWriteLock(sb);
29901+
1facf9fc 29902+ err = 0;
29903+ sbinfo = au_sbi(sb);
29904+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
29905+ goto out;
29906+
29907+ file = sbinfo->si_xib;
29908+ if (i_size_read(file->f_dentry->d_inode) <= PAGE_SIZE)
29909+ goto out;
29910+
29911+ au_xino_lock_dir(sb, file, &ldir);
29912+ /* mnt_want_write() is unnecessary here */
29913+ file = au_xino_create2(sbinfo->si_xib, NULL);
29914+ au_xino_unlock_dir(&ldir);
29915+ err = PTR_ERR(file);
29916+ if (IS_ERR(file))
29917+ goto out;
29918+ fput(sbinfo->si_xib);
29919+ sbinfo->si_xib = file;
29920+
29921+ p = sbinfo->si_xib_buf;
29922+ memset(p, 0, PAGE_SIZE);
29923+ pos = 0;
29924+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
29925+ if (unlikely(sz != PAGE_SIZE)) {
29926+ err = sz;
29927+ AuIOErr("err %d\n", err);
29928+ if (sz >= 0)
29929+ err = -EIO;
29930+ goto out;
29931+ }
29932+
29933+ mutex_lock(&sbinfo->si_xib_mtx);
29934+ /* mnt_want_write() is unnecessary here */
29935+ err = xib_restore(sb);
29936+ mutex_unlock(&sbinfo->si_xib_mtx);
29937+
29938+out:
29939+ return err;
29940+}
29941+
29942+/* ---------------------------------------------------------------------- */
29943+
29944+/*
29945+ * xino mount option handlers
29946+ */
29947+static au_readf_t find_readf(struct file *h_file)
29948+{
29949+ const struct file_operations *fop = h_file->f_op;
29950+
29951+ if (fop) {
29952+ if (fop->read)
29953+ return fop->read;
29954+ if (fop->aio_read)
29955+ return do_sync_read;
29956+ }
29957+ return ERR_PTR(-ENOSYS);
29958+}
29959+
29960+static au_writef_t find_writef(struct file *h_file)
29961+{
29962+ const struct file_operations *fop = h_file->f_op;
29963+
29964+ if (fop) {
29965+ if (fop->write)
29966+ return fop->write;
29967+ if (fop->aio_write)
29968+ return do_sync_write;
29969+ }
29970+ return ERR_PTR(-ENOSYS);
29971+}
29972+
29973+/* xino bitmap */
29974+static void xino_clear_xib(struct super_block *sb)
29975+{
29976+ struct au_sbinfo *sbinfo;
29977+
dece6358
AM
29978+ SiMustWriteLock(sb);
29979+
1facf9fc 29980+ sbinfo = au_sbi(sb);
29981+ sbinfo->si_xread = NULL;
29982+ sbinfo->si_xwrite = NULL;
29983+ if (sbinfo->si_xib)
29984+ fput(sbinfo->si_xib);
29985+ sbinfo->si_xib = NULL;
29986+ free_page((unsigned long)sbinfo->si_xib_buf);
29987+ sbinfo->si_xib_buf = NULL;
29988+}
29989+
29990+static int au_xino_set_xib(struct super_block *sb, struct file *base)
29991+{
29992+ int err;
29993+ loff_t pos;
29994+ struct au_sbinfo *sbinfo;
29995+ struct file *file;
29996+
dece6358
AM
29997+ SiMustWriteLock(sb);
29998+
1facf9fc 29999+ sbinfo = au_sbi(sb);
30000+ file = au_xino_create2(base, sbinfo->si_xib);
30001+ err = PTR_ERR(file);
30002+ if (IS_ERR(file))
30003+ goto out;
30004+ if (sbinfo->si_xib)
30005+ fput(sbinfo->si_xib);
30006+ sbinfo->si_xib = file;
30007+ sbinfo->si_xread = find_readf(file);
30008+ sbinfo->si_xwrite = find_writef(file);
30009+
30010+ err = -ENOMEM;
30011+ if (!sbinfo->si_xib_buf)
30012+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
30013+ if (unlikely(!sbinfo->si_xib_buf))
30014+ goto out_unset;
30015+
30016+ sbinfo->si_xib_last_pindex = 0;
30017+ sbinfo->si_xib_next_bit = 0;
30018+ if (i_size_read(file->f_dentry->d_inode) < PAGE_SIZE) {
30019+ pos = 0;
30020+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
30021+ PAGE_SIZE, &pos);
30022+ if (unlikely(err != PAGE_SIZE))
30023+ goto out_free;
30024+ }
30025+ err = 0;
30026+ goto out; /* success */
30027+
4f0767ce 30028+out_free:
1facf9fc 30029+ free_page((unsigned long)sbinfo->si_xib_buf);
b752ccd1
AM
30030+ sbinfo->si_xib_buf = NULL;
30031+ if (err >= 0)
30032+ err = -EIO;
4f0767ce 30033+out_unset:
b752ccd1
AM
30034+ fput(sbinfo->si_xib);
30035+ sbinfo->si_xib = NULL;
30036+ sbinfo->si_xread = NULL;
30037+ sbinfo->si_xwrite = NULL;
4f0767ce 30038+out:
b752ccd1 30039+ return err;
1facf9fc 30040+}
30041+
b752ccd1
AM
30042+/* xino for each branch */
30043+static void xino_clear_br(struct super_block *sb)
30044+{
30045+ aufs_bindex_t bindex, bend;
30046+ struct au_branch *br;
1facf9fc 30047+
b752ccd1
AM
30048+ bend = au_sbend(sb);
30049+ for (bindex = 0; bindex <= bend; bindex++) {
30050+ br = au_sbr(sb, bindex);
30051+ if (!br || !br->br_xino.xi_file)
30052+ continue;
30053+
30054+ fput(br->br_xino.xi_file);
30055+ br->br_xino.xi_file = NULL;
30056+ }
30057+}
30058+
30059+static int au_xino_set_br(struct super_block *sb, struct file *base)
1facf9fc 30060+{
30061+ int err;
b752ccd1
AM
30062+ ino_t ino;
30063+ aufs_bindex_t bindex, bend, bshared;
30064+ struct {
30065+ struct file *old, *new;
30066+ } *fpair, *p;
30067+ struct au_branch *br;
30068+ struct inode *inode;
30069+ au_writef_t writef;
1facf9fc 30070+
b752ccd1
AM
30071+ SiMustWriteLock(sb);
30072+
30073+ err = -ENOMEM;
30074+ bend = au_sbend(sb);
30075+ fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
30076+ if (unlikely(!fpair))
1facf9fc 30077+ goto out;
30078+
b752ccd1
AM
30079+ inode = sb->s_root->d_inode;
30080+ ino = AUFS_ROOT_INO;
30081+ writef = au_sbi(sb)->si_xwrite;
30082+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
30083+ br = au_sbr(sb, bindex);
30084+ bshared = is_sb_shared(sb, bindex, bindex - 1);
30085+ if (bshared >= 0) {
30086+ /* shared xino */
30087+ *p = fpair[bshared];
30088+ get_file(p->new);
30089+ }
30090+
30091+ if (!p->new) {
30092+ /* new xino */
30093+ p->old = br->br_xino.xi_file;
30094+ p->new = au_xino_create2(base, br->br_xino.xi_file);
30095+ err = PTR_ERR(p->new);
30096+ if (IS_ERR(p->new)) {
30097+ p->new = NULL;
30098+ goto out_pair;
30099+ }
30100+ }
30101+
30102+ err = au_xino_do_write(writef, p->new,
30103+ au_h_iptr(inode, bindex)->i_ino, ino);
30104+ if (unlikely(err))
30105+ goto out_pair;
30106+ }
30107+
30108+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
30109+ br = au_sbr(sb, bindex);
30110+ if (br->br_xino.xi_file)
30111+ fput(br->br_xino.xi_file);
30112+ get_file(p->new);
30113+ br->br_xino.xi_file = p->new;
30114+ }
1facf9fc 30115+
4f0767ce 30116+out_pair:
b752ccd1
AM
30117+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
30118+ if (p->new)
30119+ fput(p->new);
30120+ else
30121+ break;
30122+ kfree(fpair);
4f0767ce 30123+out:
1facf9fc 30124+ return err;
30125+}
b752ccd1
AM
30126+
30127+void au_xino_clr(struct super_block *sb)
30128+{
30129+ struct au_sbinfo *sbinfo;
30130+
30131+ au_xigen_clr(sb);
30132+ xino_clear_xib(sb);
30133+ xino_clear_br(sb);
30134+ sbinfo = au_sbi(sb);
30135+ /* lvalue, do not call au_mntflags() */
30136+ au_opt_clr(sbinfo->si_mntflags, XINO);
30137+}
30138+
30139+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
30140+{
30141+ int err, skip;
30142+ struct dentry *parent, *cur_parent;
30143+ struct qstr *dname, *cur_name;
30144+ struct file *cur_xino;
30145+ struct inode *dir;
30146+ struct au_sbinfo *sbinfo;
30147+
30148+ SiMustWriteLock(sb);
30149+
30150+ err = 0;
30151+ sbinfo = au_sbi(sb);
30152+ parent = dget_parent(xino->file->f_dentry);
30153+ if (remount) {
30154+ skip = 0;
30155+ dname = &xino->file->f_dentry->d_name;
30156+ cur_xino = sbinfo->si_xib;
30157+ if (cur_xino) {
30158+ cur_parent = dget_parent(cur_xino->f_dentry);
30159+ cur_name = &cur_xino->f_dentry->d_name;
30160+ skip = (cur_parent == parent
30161+ && dname->len == cur_name->len
30162+ && !memcmp(dname->name, cur_name->name,
30163+ dname->len));
30164+ dput(cur_parent);
30165+ }
30166+ if (skip)
30167+ goto out;
30168+ }
30169+
30170+ au_opt_set(sbinfo->si_mntflags, XINO);
30171+ dir = parent->d_inode;
30172+ mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
30173+ /* mnt_want_write() is unnecessary here */
30174+ err = au_xino_set_xib(sb, xino->file);
30175+ if (!err)
30176+ err = au_xigen_set(sb, xino->file);
30177+ if (!err)
30178+ err = au_xino_set_br(sb, xino->file);
30179+ mutex_unlock(&dir->i_mutex);
30180+ if (!err)
30181+ goto out; /* success */
30182+
30183+ /* reset all */
30184+ AuIOErr("failed creating xino(%d).\n", err);
30185+
4f0767ce 30186+out:
b752ccd1
AM
30187+ dput(parent);
30188+ return err;
30189+}
30190+
30191+/* ---------------------------------------------------------------------- */
30192+
30193+/*
30194+ * create a xinofile at the default place/path.
30195+ */
30196+struct file *au_xino_def(struct super_block *sb)
30197+{
30198+ struct file *file;
30199+ char *page, *p;
30200+ struct au_branch *br;
30201+ struct super_block *h_sb;
30202+ struct path path;
30203+ aufs_bindex_t bend, bindex, bwr;
30204+
30205+ br = NULL;
30206+ bend = au_sbend(sb);
30207+ bwr = -1;
30208+ for (bindex = 0; bindex <= bend; bindex++) {
30209+ br = au_sbr(sb, bindex);
30210+ if (au_br_writable(br->br_perm)
30211+ && !au_test_fs_bad_xino(br->br_mnt->mnt_sb)) {
30212+ bwr = bindex;
30213+ break;
30214+ }
30215+ }
30216+
7f207e10
AM
30217+ if (bwr >= 0) {
30218+ file = ERR_PTR(-ENOMEM);
30219+ page = __getname_gfp(GFP_NOFS);
30220+ if (unlikely(!page))
30221+ goto out;
30222+ path.mnt = br->br_mnt;
30223+ path.dentry = au_h_dptr(sb->s_root, bwr);
30224+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
30225+ file = (void *)p;
30226+ if (!IS_ERR(p)) {
30227+ strcat(p, "/" AUFS_XINO_FNAME);
30228+ AuDbg("%s\n", p);
30229+ file = au_xino_create(sb, p, /*silent*/0);
30230+ if (!IS_ERR(file))
30231+ au_xino_brid_set(sb, br->br_id);
30232+ }
30233+ __putname(page);
30234+ } else {
30235+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
30236+ if (IS_ERR(file))
30237+ goto out;
30238+ h_sb = file->f_dentry->d_sb;
30239+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
30240+ pr_err("xino doesn't support %s(%s)\n",
30241+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
30242+ fput(file);
30243+ file = ERR_PTR(-EINVAL);
30244+ }
30245+ if (!IS_ERR(file))
30246+ au_xino_brid_set(sb, -1);
30247+ }
0c5527e5 30248+
7f207e10
AM
30249+out:
30250+ return file;
30251+}
30252+
30253+/* ---------------------------------------------------------------------- */
30254+
30255+int au_xino_path(struct seq_file *seq, struct file *file)
30256+{
30257+ int err;
30258+
30259+ err = au_seq_path(seq, &file->f_path);
30260+ if (unlikely(err < 0))
30261+ goto out;
30262+
30263+ err = 0;
30264+#define Deleted "\\040(deleted)"
30265+ seq->count -= sizeof(Deleted) - 1;
30266+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
30267+ sizeof(Deleted) - 1));
30268+#undef Deleted
30269+
30270+out:
30271+ return err;
30272+}
30273diff -urN /usr/share/empty/include/linux/aufs_type.h linux/include/linux/aufs_type.h
30274--- /usr/share/empty/include/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
1e00d052
AM
30275+++ linux/include/linux/aufs_type.h 2011-10-24 20:52:23.677857076 +0200
30276@@ -0,0 +1,220 @@
7f207e10 30277+/*
027c5e7a 30278+ * Copyright (C) 2005-2011 Junjiro R. Okajima
7f207e10
AM
30279+ *
30280+ * This program, aufs is free software; you can redistribute it and/or modify
30281+ * it under the terms of the GNU General Public License as published by
30282+ * the Free Software Foundation; either version 2 of the License, or
30283+ * (at your option) any later version.
30284+ *
30285+ * This program is distributed in the hope that it will be useful,
30286+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30287+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30288+ * GNU General Public License for more details.
30289+ *
30290+ * You should have received a copy of the GNU General Public License
30291+ * along with this program; if not, write to the Free Software
30292+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30293+ */
30294+
30295+#ifndef __AUFS_TYPE_H__
30296+#define __AUFS_TYPE_H__
30297+
30298+#include <linux/ioctl.h>
30299+#include <linux/kernel.h>
30300+#include <linux/limits.h>
30301+#include <linux/types.h>
30302+
1e00d052 30303+#define AUFS_VERSION "3.x-rcN-20111024"
7f207e10
AM
30304+
30305+/* todo? move this to linux-2.6.19/include/magic.h */
30306+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
30307+
30308+/* ---------------------------------------------------------------------- */
30309+
30310+#ifdef CONFIG_AUFS_BRANCH_MAX_127
30311+typedef __s8 aufs_bindex_t;
30312+#define AUFS_BRANCH_MAX 127
30313+#else
30314+typedef __s16 aufs_bindex_t;
30315+#ifdef CONFIG_AUFS_BRANCH_MAX_511
30316+#define AUFS_BRANCH_MAX 511
30317+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
30318+#define AUFS_BRANCH_MAX 1023
30319+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
30320+#define AUFS_BRANCH_MAX 32767
30321+#endif
30322+#endif
30323+
30324+#ifdef __KERNEL__
30325+#ifndef AUFS_BRANCH_MAX
30326+#error unknown CONFIG_AUFS_BRANCH_MAX value
30327+#endif
30328+#endif /* __KERNEL__ */
30329+
30330+/* ---------------------------------------------------------------------- */
30331+
30332+#define AUFS_NAME "aufs"
30333+#define AUFS_FSTYPE AUFS_NAME
30334+
30335+#define AUFS_ROOT_INO 2
30336+#define AUFS_FIRST_INO 11
30337+
30338+#define AUFS_WH_PFX ".wh."
30339+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
30340+#define AUFS_WH_TMP_LEN 4
30341+/* a limit for rmdir/rename a dir */
30342+#define AUFS_MAX_NAMELEN (NAME_MAX \
30343+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
30344+ - 1 /* dot */\
30345+ - AUFS_WH_TMP_LEN) /* hex */
30346+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
30347+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
30348+#define AUFS_XINO_TRUNC_INIT 64 /* blocks */
30349+#define AUFS_XINO_TRUNC_STEP 4 /* blocks */
30350+#define AUFS_DIRWH_DEF 3
30351+#define AUFS_RDCACHE_DEF 10 /* seconds */
027c5e7a 30352+#define AUFS_RDCACHE_MAX 3600 /* seconds */
7f207e10
AM
30353+#define AUFS_RDBLK_DEF 512 /* bytes */
30354+#define AUFS_RDHASH_DEF 32
30355+#define AUFS_WKQ_NAME AUFS_NAME "d"
30356+#define AUFS_WKQ_PRE_NAME AUFS_WKQ_NAME "_pre"
027c5e7a
AM
30357+#define AUFS_MFS_DEF_SEC 30 /* seconds */
30358+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
7f207e10
AM
30359+#define AUFS_PLINK_WARN 100 /* number of plinks */
30360+
30361+/* pseudo-link maintenace under /proc */
30362+#define AUFS_PLINK_MAINT_NAME "plink_maint"
30363+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
30364+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
30365+
30366+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
30367+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
30368+
30369+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
30370+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
30371+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
30372+
30373+/* doubly whiteouted */
30374+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
30375+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
30376+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
30377+
1e00d052 30378+/* branch permissions and attributes */
7f207e10
AM
30379+#define AUFS_BRPERM_RW "rw"
30380+#define AUFS_BRPERM_RO "ro"
30381+#define AUFS_BRPERM_RR "rr"
1e00d052
AM
30382+#define AUFS_BRRATTR_WH "wh"
30383+#define AUFS_BRWATTR_NLWH "nolwh"
7f207e10
AM
30384+
30385+/* ---------------------------------------------------------------------- */
30386+
30387+/* ioctl */
30388+enum {
30389+ /* readdir in userspace */
30390+ AuCtl_RDU,
30391+ AuCtl_RDU_INO,
30392+
30393+ /* pathconf wrapper */
027c5e7a
AM
30394+ AuCtl_WBR_FD,
30395+
30396+ /* busy inode */
30397+ AuCtl_IBUSY
7f207e10
AM
30398+};
30399+
30400+/* borrowed from linux/include/linux/kernel.h */
30401+#ifndef ALIGN
30402+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
30403+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
30404+#endif
30405+
30406+/* borrowed from linux/include/linux/compiler-gcc3.h */
30407+#ifndef __aligned
30408+#define __aligned(x) __attribute__((aligned(x)))
53392da6
AM
30409+#endif
30410+
30411+#ifdef __KERNEL__
30412+#ifndef __packed
7f207e10
AM
30413+#define __packed __attribute__((packed))
30414+#endif
53392da6 30415+#endif
7f207e10
AM
30416+
30417+struct au_rdu_cookie {
30418+ __u64 h_pos;
30419+ __s16 bindex;
30420+ __u8 flags;
30421+ __u8 pad;
30422+ __u32 generation;
30423+} __aligned(8);
30424+
30425+struct au_rdu_ent {
30426+ __u64 ino;
30427+ __s16 bindex;
30428+ __u8 type;
30429+ __u8 nlen;
30430+ __u8 wh;
30431+ char name[0];
30432+} __aligned(8);
30433+
30434+static inline int au_rdu_len(int nlen)
30435+{
30436+ /* include the terminating NULL */
30437+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
30438+ sizeof(__u64));
30439+}
30440+
30441+union au_rdu_ent_ul {
30442+ struct au_rdu_ent __user *e;
30443+ __u64 ul;
30444+};
30445+
30446+enum {
30447+ AufsCtlRduV_SZ,
30448+ AufsCtlRduV_End
30449+};
30450+
30451+struct aufs_rdu {
30452+ /* input */
30453+ union {
30454+ __u64 sz; /* AuCtl_RDU */
30455+ __u64 nent; /* AuCtl_RDU_INO */
30456+ };
30457+ union au_rdu_ent_ul ent;
30458+ __u16 verify[AufsCtlRduV_End];
30459+
30460+ /* input/output */
30461+ __u32 blk;
30462+
30463+ /* output */
30464+ union au_rdu_ent_ul tail;
30465+ /* number of entries which were added in a single call */
30466+ __u64 rent;
30467+ __u8 full;
30468+ __u8 shwh;
30469+
30470+ struct au_rdu_cookie cookie;
30471+} __aligned(8);
30472+
1e00d052
AM
30473+/* ---------------------------------------------------------------------- */
30474+
30475+struct aufs_wbr_fd {
30476+ __u32 oflags;
30477+ __s16 brid;
30478+} __aligned(8);
30479+
30480+/* ---------------------------------------------------------------------- */
30481+
027c5e7a 30482+struct aufs_ibusy {
1e00d052
AM
30483+ __u64 ino, h_ino;
30484+ __s16 bindex;
027c5e7a
AM
30485+} __aligned(8);
30486+
1e00d052
AM
30487+/* ---------------------------------------------------------------------- */
30488+
7f207e10
AM
30489+#define AuCtlType 'A'
30490+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
30491+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
1e00d052
AM
30492+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
30493+ struct aufs_wbr_fd)
027c5e7a 30494+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
7f207e10
AM
30495+
30496+#endif /* __AUFS_TYPE_H__ */
This page took 4.699885 seconds and 4 git commands to generate.