]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs3.patch
- updated for 3.1
[packages/kernel.git] / kernel-aufs3.patch
CommitLineData
53392da6 1aufs3.0 kbuild patch
7f207e10
AM
2
3diff --git a/fs/Kconfig b/fs/Kconfig
2cbb1c4b 4index 19891aa..b660b64 100644
7f207e10
AM
5--- a/fs/Kconfig
6+++ b/fs/Kconfig
2cbb1c4b 7@@ -208,6 +208,7 @@ source "fs/pstore/Kconfig"
7f207e10
AM
8 source "fs/sysv/Kconfig"
9 source "fs/ufs/Kconfig"
10 source "fs/exofs/Kconfig"
11+source "fs/aufs/Kconfig"
12
13 endif # MISC_FILESYSTEMS
14
15diff --git a/fs/Makefile b/fs/Makefile
2cbb1c4b 16index fb68c2b..c031a85 100644
7f207e10
AM
17--- a/fs/Makefile
18+++ b/fs/Makefile
2cbb1c4b 19@@ -124,3 +124,4 @@ obj-$(CONFIG_GFS2_FS) += gfs2/
7f207e10
AM
20 obj-$(CONFIG_EXOFS_FS) += exofs/
21 obj-$(CONFIG_CEPH_FS) += ceph/
bf0370f2 22 obj-$(CONFIG_PSTORE) += pstore/
2cbb1c4b 23+obj-$(CONFIG_AUFS_FS) += aufs/
7f207e10 24diff --git a/include/linux/Kbuild b/include/linux/Kbuild
2cbb1c4b 25index 01f6362..8b3b9f1 100644
7f207e10
AM
26--- a/include/linux/Kbuild
27+++ b/include/linux/Kbuild
2cbb1c4b 28@@ -65,6 +65,7 @@ header-y += atmppp.h
7f207e10
AM
29 header-y += atmsap.h
30 header-y += atmsvc.h
31 header-y += audit.h
32+header-y += aufs_type.h
33 header-y += auto_fs.h
34 header-y += auto_fs4.h
35 header-y += auxvec.h
53392da6 36aufs3.0 base patch
7f207e10
AM
37
38diff --git a/fs/namei.c b/fs/namei.c
53392da6 39index 14ab8d3..eb4aef1 100644
7f207e10
AM
40--- a/fs/namei.c
41+++ b/fs/namei.c
53392da6 42@@ -1697,7 +1697,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
7f207e10
AM
43 * needs parent already locked. Doesn't follow mounts.
44 * SMP-safe.
45 */
46-static struct dentry *lookup_hash(struct nameidata *nd)
47+struct dentry *lookup_hash(struct nameidata *nd)
48 {
7f207e10
AM
49 return __lookup_hash(&nd->last, nd->path.dentry, nd);
50 }
7f207e10 51diff --git a/fs/splice.c b/fs/splice.c
2cbb1c4b 52index aa866d3..19afec6 100644
7f207e10
AM
53--- a/fs/splice.c
54+++ b/fs/splice.c
2cbb1c4b 55@@ -1085,8 +1085,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
7f207e10
AM
56 /*
57 * Attempt to initiate a splice from pipe to file.
58 */
59-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
60- loff_t *ppos, size_t len, unsigned int flags)
61+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
62+ loff_t *ppos, size_t len, unsigned int flags)
63 {
64 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
65 loff_t *, size_t, unsigned int);
2cbb1c4b 66@@ -1113,9 +1113,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
67 /*
68 * Attempt to initiate a splice from a file to a pipe.
69 */
70-static long do_splice_to(struct file *in, loff_t *ppos,
71- struct pipe_inode_info *pipe, size_t len,
72- unsigned int flags)
73+long do_splice_to(struct file *in, loff_t *ppos,
74+ struct pipe_inode_info *pipe, size_t len,
75+ unsigned int flags)
76 {
77 ssize_t (*splice_read)(struct file *, loff_t *,
78 struct pipe_inode_info *, size_t, unsigned int);
79diff --git a/include/linux/namei.h b/include/linux/namei.h
2cbb1c4b 80index eba45ea..21ed6c9 100644
7f207e10
AM
81--- a/include/linux/namei.h
82+++ b/include/linux/namei.h
2cbb1c4b 83@@ -82,6 +82,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
7f207e10
AM
84 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
85 int (*open)(struct inode *, struct file *));
86
87+extern struct dentry *lookup_hash(struct nameidata *nd);
7f207e10
AM
88 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
89
027c5e7a 90 extern int follow_down_one(struct path *);
4b3da204
AM
91--- linux-3.1/include/linux/splice.h~ 2011-10-24 09:10:05.000000000 +0200
92+++ linux-3.1/include/linux/splice.h 2011-10-24 16:01:13.962765332 +0200
93@@ -88,6 +88,11 @@
94 extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *);
7f207e10
AM
95 extern void splice_shrink_spd(struct pipe_inode_info *,
96 struct splice_pipe_desc *);
7f207e10 97+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
4b3da204 98+ loff_t *ppos, size_t len, unsigned int flags);
7f207e10 99+extern long do_splice_to(struct file *in, loff_t *ppos,
4b3da204
AM
100+ struct pipe_inode_info *pipe, size_t len,
101+ unsigned int flags);
102 extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
103
104 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
53392da6 105aufs3.0 standalone patch
7f207e10
AM
106
107diff --git a/fs/file_table.c b/fs/file_table.c
2cbb1c4b 108index 01e4c1e..0e800e2 100644
7f207e10
AM
109--- a/fs/file_table.c
110+++ b/fs/file_table.c
2cbb1c4b 111@@ -443,6 +443,8 @@ void file_sb_list_del(struct file *file)
7f207e10
AM
112 }
113 }
114
115+EXPORT_SYMBOL(file_sb_list_del);
1facf9fc 116+
7f207e10
AM
117 #ifdef CONFIG_SMP
118
119 /*
4b3da204
AM
120--- linux-3.1/fs/inode.c~ 2011-10-24 09:10:05.000000000 +0200
121+++ linux-3.1/fs/inode.c 2011-10-24 15:59:01.446189509 +0200
122@@ -65,6 +65,7 @@
123 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
2cbb1c4b
JR
124
125 __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
2cbb1c4b 126+EXPORT_SYMBOL(inode_sb_list_lock);
7f207e10
AM
127
128 /*
4b3da204 129 * Empty aops. Can be used for the cases where the user does not
7f207e10 130diff --git a/fs/namei.c b/fs/namei.c
53392da6 131index eb4aef1..66d04c6 100644
7f207e10
AM
132--- a/fs/namei.c
133+++ b/fs/namei.c
2cbb1c4b 134@@ -365,6 +365,7 @@ int deny_write_access(struct file * file)
7f207e10
AM
135
136 return 0;
137 }
138+EXPORT_SYMBOL(deny_write_access);
139
140 /**
141 * path_get - get a reference to a path
53392da6 142@@ -1701,6 +1702,7 @@ struct dentry *lookup_hash(struct nameidata *nd)
027c5e7a 143 {
7f207e10
AM
144 return __lookup_hash(&nd->last, nd->path.dentry, nd);
145 }
146+EXPORT_SYMBOL(lookup_hash);
147
7f207e10
AM
148 /**
149 * lookup_one_len - filesystem helper to lookup single pathname component
150diff --git a/fs/namespace.c b/fs/namespace.c
2cbb1c4b 151index fe59bd1..7d3843f 100644
7f207e10
AM
152--- a/fs/namespace.c
153+++ b/fs/namespace.c
2cbb1c4b 154@@ -1508,6 +1508,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
7f207e10
AM
155 }
156 return 0;
157 }
158+EXPORT_SYMBOL(iterate_mounts);
159
160 static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
161 {
162diff --git a/fs/notify/group.c b/fs/notify/group.c
163index d309f38..f0e9568 100644
164--- a/fs/notify/group.c
165+++ b/fs/notify/group.c
166@@ -22,6 +22,7 @@
167 #include <linux/srcu.h>
168 #include <linux/rculist.h>
169 #include <linux/wait.h>
170+#include <linux/module.h>
171
172 #include <linux/fsnotify_backend.h>
173 #include "fsnotify.h"
174@@ -70,6 +71,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
175 if (atomic_dec_and_test(&group->refcnt))
176 fsnotify_destroy_group(group);
177 }
178+EXPORT_SYMBOL(fsnotify_put_group);
179
180 /*
181 * Create a new fsnotify_group and hold a reference for the group returned.
182@@ -102,3 +104,4 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
183
184 return group;
185 }
186+EXPORT_SYMBOL(fsnotify_alloc_group);
187diff --git a/fs/notify/mark.c b/fs/notify/mark.c
2cbb1c4b 188index 252ab1f..2199b9b 100644
7f207e10
AM
189--- a/fs/notify/mark.c
190+++ b/fs/notify/mark.c
2cbb1c4b 191@@ -112,6 +112,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
7f207e10
AM
192 if (atomic_dec_and_test(&mark->refcnt))
193 mark->free_mark(mark);
194 }
195+EXPORT_SYMBOL(fsnotify_put_mark);
196
197 /*
198 * Any time a mark is getting freed we end up here.
2cbb1c4b 199@@ -189,6 +190,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
7f207e10
AM
200 if (unlikely(atomic_dec_and_test(&group->num_marks)))
201 fsnotify_final_destroy_group(group);
202 }
203+EXPORT_SYMBOL(fsnotify_destroy_mark);
204
205 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
206 {
2cbb1c4b 207@@ -276,6 +278,7 @@ err:
7f207e10
AM
208
209 return ret;
210 }
211+EXPORT_SYMBOL(fsnotify_add_mark);
212
213 /*
214 * clear any marks in a group in which mark->flags & flags is true
2cbb1c4b 215@@ -331,6 +334,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
7f207e10
AM
216 atomic_set(&mark->refcnt, 1);
217 mark->free_mark = free_mark;
218 }
219+EXPORT_SYMBOL(fsnotify_init_mark);
220
221 static int fsnotify_mark_destroy(void *ignored)
222 {
223diff --git a/fs/open.c b/fs/open.c
2cbb1c4b 224index b52cf01..c1b341c 100644
7f207e10
AM
225--- a/fs/open.c
226+++ b/fs/open.c
227@@ -60,6 +60,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
228 mutex_unlock(&dentry->d_inode->i_mutex);
229 return ret;
230 }
231+EXPORT_SYMBOL(do_truncate);
232
233 static long do_sys_truncate(const char __user *pathname, loff_t length)
234 {
235diff --git a/fs/splice.c b/fs/splice.c
2cbb1c4b 236index 19afec6..11f07f8 100644
7f207e10
AM
237--- a/fs/splice.c
238+++ b/fs/splice.c
2cbb1c4b 239@@ -1109,6 +1109,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
240
241 return splice_write(pipe, out, ppos, len, flags);
242 }
243+EXPORT_SYMBOL(do_splice_from);
244
245 /*
246 * Attempt to initiate a splice from a file to a pipe.
2cbb1c4b 247@@ -1135,6 +1136,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
7f207e10
AM
248
249 return splice_read(in, ppos, pipe, len, flags);
250 }
251+EXPORT_SYMBOL(do_splice_to);
252
253 /**
254 * splice_direct_to_actor - splices data directly between two non-pipes
255diff --git a/security/commoncap.c b/security/commoncap.c
2cbb1c4b 256index a93b3b7..024282c 100644
7f207e10
AM
257--- a/security/commoncap.c
258+++ b/security/commoncap.c
53fd41f5 259@@ -978,3 +978,4 @@ int cap_file_mmap(struct file *file, uns
94337f0d 260 }
7f207e10
AM
261 return ret;
262 }
263+EXPORT_SYMBOL(cap_file_mmap);
264diff --git a/security/device_cgroup.c b/security/device_cgroup.c
2cbb1c4b 265index 1be6826..215278c 100644
7f207e10
AM
266--- a/security/device_cgroup.c
267+++ b/security/device_cgroup.c
2cbb1c4b 268@@ -508,6 +508,7 @@ found:
7f207e10
AM
269
270 return -EPERM;
271 }
2cbb1c4b 272+EXPORT_SYMBOL(__devcgroup_inode_permission);
7f207e10
AM
273
274 int devcgroup_inode_mknod(int mode, dev_t dev)
275 {
276diff --git a/security/security.c b/security/security.c
2cbb1c4b 277index 4ba6d4c..9f64bb8 100644
7f207e10
AM
278--- a/security/security.c
279+++ b/security/security.c
2cbb1c4b 280@@ -373,6 +373,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry)
7f207e10
AM
281 return 0;
282 return security_ops->path_rmdir(dir, dentry);
283 }
284+EXPORT_SYMBOL(security_path_rmdir);
285
286 int security_path_unlink(struct path *dir, struct dentry *dentry)
287 {
2cbb1c4b 288@@ -389,6 +390,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
7f207e10
AM
289 return 0;
290 return security_ops->path_symlink(dir, dentry, old_name);
291 }
292+EXPORT_SYMBOL(security_path_symlink);
293
294 int security_path_link(struct dentry *old_dentry, struct path *new_dir,
295 struct dentry *new_dentry)
2cbb1c4b 296@@ -397,6 +399,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
7f207e10
AM
297 return 0;
298 return security_ops->path_link(old_dentry, new_dir, new_dentry);
299 }
300+EXPORT_SYMBOL(security_path_link);
301
302 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
303 struct path *new_dir, struct dentry *new_dentry)
2cbb1c4b 304@@ -415,6 +418,7 @@ int security_path_truncate(struct path *path)
7f207e10
AM
305 return 0;
306 return security_ops->path_truncate(path);
307 }
308+EXPORT_SYMBOL(security_path_truncate);
309
310 int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
311 mode_t mode)
2cbb1c4b 312@@ -423,6 +427,7 @@ int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
7f207e10
AM
313 return 0;
314 return security_ops->path_chmod(dentry, mnt, mode);
315 }
316+EXPORT_SYMBOL(security_path_chmod);
317
318 int security_path_chown(struct path *path, uid_t uid, gid_t gid)
319 {
2cbb1c4b 320@@ -430,6 +435,7 @@ int security_path_chown(struct path *path, uid_t uid, gid_t gid)
7f207e10
AM
321 return 0;
322 return security_ops->path_chown(path, uid, gid);
323 }
324+EXPORT_SYMBOL(security_path_chown);
325
326 int security_path_chroot(struct path *path)
327 {
2cbb1c4b 328@@ -506,6 +512,7 @@ int security_inode_readlink(struct dentry *dentry)
7f207e10
AM
329 return 0;
330 return security_ops->inode_readlink(dentry);
331 }
332+EXPORT_SYMBOL(security_inode_readlink);
333
334 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
335 {
2cbb1c4b 336@@ -520,6 +527,7 @@ int security_inode_permission(struct inode *inode, int mask)
7f207e10 337 return 0;
2cbb1c4b 338 return security_ops->inode_permission(inode, mask, 0);
7f207e10
AM
339 }
340+EXPORT_SYMBOL(security_inode_permission);
341
027c5e7a 342 int security_inode_exec_permission(struct inode *inode, unsigned int flags)
7f207e10 343 {
2cbb1c4b 344@@ -626,6 +634,7 @@ int security_file_permission(struct file *file, int mask)
7f207e10
AM
345
346 return fsnotify_perm(file, mask);
347 }
348+EXPORT_SYMBOL(security_file_permission);
349
350 int security_file_alloc(struct file *file)
351 {
2cbb1c4b 352@@ -653,6 +662,7 @@ int security_file_mmap(struct file *file, unsigned long reqprot,
7f207e10
AM
353 return ret;
354 return ima_file_mmap(file, prot);
355 }
356+EXPORT_SYMBOL(security_file_mmap);
357
358 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
359 unsigned long prot)
360diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
361--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
53392da6 362+++ linux/Documentation/ABI/testing/debugfs-aufs 2011-08-24 13:30:24.727980364 +0200
7f207e10
AM
363@@ -0,0 +1,37 @@
364+What: /debug/aufs/si_<id>/
365+Date: March 2009
366+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
367+Description:
368+ Under /debug/aufs, a directory named si_<id> is created
369+ per aufs mount, where <id> is a unique id generated
370+ internally.
1facf9fc 371+
7f207e10
AM
372+What: /debug/aufs/si_<id>/xib
373+Date: March 2009
374+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
375+Description:
376+ It shows the consumed blocks by xib (External Inode Number
377+ Bitmap), its block size and file size.
378+ When the aufs mount option 'noxino' is specified, it
379+ will be empty. About XINO files, see the aufs manual.
380+
381+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
382+Date: March 2009
383+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
384+Description:
385+ It shows the consumed blocks by xino (External Inode Number
386+ Translation Table), its link count, block size and file
387+ size.
388+ When the aufs mount option 'noxino' is specified, it
389+ will be empty. About XINO files, see the aufs manual.
390+
391+What: /debug/aufs/si_<id>/xigen
392+Date: March 2009
393+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
394+Description:
395+ It shows the consumed blocks by xigen (External Inode
396+ Generation Table), its block size and file size.
397+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
398+ be created.
399+ When the aufs mount option 'noxino' is specified, it
400+ will be empty. About XINO files, see the aufs manual.
401diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
402--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
53392da6 403+++ linux/Documentation/ABI/testing/sysfs-aufs 2011-08-24 13:30:24.727980364 +0200
7f207e10
AM
404@@ -0,0 +1,24 @@
405+What: /sys/fs/aufs/si_<id>/
406+Date: March 2009
407+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
408+Description:
409+ Under /sys/fs/aufs, a directory named si_<id> is created
410+ per aufs mount, where <id> is a unique id generated
411+ internally.
412+
413+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
414+Date: March 2009
415+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
416+Description:
417+ It shows the abolute path of a member directory (which
418+ is called branch) in aufs, and its permission.
419+
420+What: /sys/fs/aufs/si_<id>/xi_path
421+Date: March 2009
422+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
423+Description:
424+ It shows the abolute path of XINO (External Inode Number
425+ Bitmap, Translation Table and Generation Table) file
426+ even if it is the default path.
427+ When the aufs mount option 'noxino' is specified, it
428+ will be empty. About XINO files, see the aufs manual.
53392da6
AM
429diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
430--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
431+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2011-08-24 13:30:24.727980364 +0200
432@@ -0,0 +1,162 @@
433+
434+# Copyright (C) 2005-2011 Junjiro R. Okajima
435+#
436+# This program is free software; you can redistribute it and/or modify
437+# it under the terms of the GNU General Public License as published by
438+# the Free Software Foundation; either version 2 of the License, or
439+# (at your option) any later version.
440+#
441+# This program is distributed in the hope that it will be useful,
442+# but WITHOUT ANY WARRANTY; without even the implied warranty of
443+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
444+# GNU General Public License for more details.
445+#
446+# You should have received a copy of the GNU General Public License
447+# along with this program; if not, write to the Free Software
448+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
449+
450+Introduction
451+----------------------------------------
452+
453+aufs [ei ju: ef es] | [a u f s]
454+1. abbrev. for "advanced multi-layered unification filesystem".
455+2. abbrev. for "another unionfs".
456+3. abbrev. for "auf das" in German which means "on the" in English.
457+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
458+ But "Filesystem aufs Filesystem" is hard to understand.
459+
460+AUFS is a filesystem with features:
461+- multi layered stackable unification filesystem, the member directory
462+ is called as a branch.
463+- branch permission and attribute, 'readonly', 'real-readonly',
464+ 'readwrite', 'whiteout-able', 'link-able whiteout' and their
465+ combination.
466+- internal "file copy-on-write".
467+- logical deletion, whiteout.
468+- dynamic branch manipulation, adding, deleting and changing permission.
469+- allow bypassing aufs, user's direct branch access.
470+- external inode number translation table and bitmap which maintains the
471+ persistent aufs inode number.
472+- seekable directory, including NFS readdir.
473+- file mapping, mmap and sharing pages.
474+- pseudo-link, hardlink over branches.
475+- loopback mounted filesystem as a branch.
476+- several policies to select one among multiple writable branches.
477+- revert a single systemcall when an error occurs in aufs.
478+- and more...
479+
480+
481+Multi Layered Stackable Unification Filesystem
482+----------------------------------------------------------------------
483+Most people already knows what it is.
484+It is a filesystem which unifies several directories and provides a
485+merged single directory. When users access a file, the access will be
486+passed/re-directed/converted (sorry, I am not sure which English word is
487+correct) to the real file on the member filesystem. The member
488+filesystem is called 'lower filesystem' or 'branch' and has a mode
489+'readonly' and 'readwrite.' And the deletion for a file on the lower
490+readonly branch is handled by creating 'whiteout' on the upper writable
491+branch.
492+
493+On LKML, there have been discussions about UnionMount (Jan Blunck,
494+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
495+different approaches to implement the merged-view.
496+The former tries putting it into VFS, and the latter implements as a
497+separate filesystem.
498+(If I misunderstand about these implementations, please let me know and
499+I shall correct it. Because it is a long time ago when I read their
500+source files last time).
501+
502+UnionMount's approach will be able to small, but may be hard to share
503+branches between several UnionMount since the whiteout in it is
504+implemented in the inode on branch filesystem and always
505+shared. According to Bharata's post, readdir does not seems to be
506+finished yet.
507+There are several missing features known in this implementations such as
508+- for users, the inode number may change silently. eg. copy-up.
509+- link(2) may break by copy-up.
510+- read(2) may get an obsoleted filedata (fstat(2) too).
511+- fcntl(F_SETLK) may be broken by copy-up.
512+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
513+ open(O_RDWR).
514+
515+Unionfs has a longer history. When I started implementing a stacking filesystem
516+(Aug 2005), it already existed. It has virtual super_block, inode,
517+dentry and file objects and they have an array pointing lower same kind
518+objects. After contributing many patches for Unionfs, I re-started my
519+project AUFS (Jun 2006).
520+
521+In AUFS, the structure of filesystem resembles to Unionfs, but I
522+implemented my own ideas, approaches and enhancements and it became
523+totally different one.
524+
525+Comparing DM snapshot and fs based implementation
526+- the number of bytes to be copied between devices is much smaller.
527+- the type of filesystem must be one and only.
528+- the fs must be writable, no readonly fs, even for the lower original
529+ device. so the compression fs will not be usable. but if we use
530+ loopback mount, we may address this issue.
531+ for instance,
532+ mount /cdrom/squashfs.img /sq
533+ losetup /sq/ext2.img
534+ losetup /somewhere/cow
535+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
536+- it will be difficult (or needs more operations) to extract the
537+ difference between the original device and COW.
538+- DM snapshot-merge may help a lot when users try merging. in the
539+ fs-layer union, users will use rsync(1).
540+
541+
542+Several characters/aspects of aufs
543+----------------------------------------------------------------------
544+
545+Aufs has several characters or aspects.
546+1. a filesystem, callee of VFS helper
547+2. sub-VFS, caller of VFS helper for branches
548+3. a virtual filesystem which maintains persistent inode number
549+4. reader/writer of files on branches such like an application
550+
551+1. Callee of VFS Helper
552+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
553+unlink(2) from an application reaches sys_unlink() kernel function and
554+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
555+calls filesystem specific unlink operation. Actually aufs implements the
556+unlink operation but it behaves like a redirector.
557+
558+2. Caller of VFS Helper for Branches
559+aufs_unlink() passes the unlink request to the branch filesystem as if
560+it were called from VFS. So the called unlink operation of the branch
561+filesystem acts as usual. As a caller of VFS helper, aufs should handle
562+every necessary pre/post operation for the branch filesystem.
563+- acquire the lock for the parent dir on a branch
564+- lookup in a branch
565+- revalidate dentry on a branch
566+- mnt_want_write() for a branch
567+- vfs_unlink() for a branch
568+- mnt_drop_write() for a branch
569+- release the lock on a branch
570+
571+3. Persistent Inode Number
572+One of the most important issue for a filesystem is to maintain inode
573+numbers. This is particularly important to support exporting a
574+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
575+backend block device for its own. But some storage is necessary to
576+maintain inode number. It may be a large space and may not suit to keep
577+in memory. Aufs rents some space from its first writable branch
578+filesystem (by default) and creates file(s) on it. These files are
579+created by aufs internally and removed soon (currently) keeping opened.
580+Note: Because these files are removed, they are totally gone after
581+ unmounting aufs. It means the inode numbers are not persistent
582+ across unmount or reboot. I have a plan to make them really
583+ persistent which will be important for aufs on NFS server.
584+
585+4. Read/Write Files Internally (copy-on-write)
586+Because a branch can be readonly, when you write a file on it, aufs will
587+"copy-up" it to the upper writable branch internally. And then write the
588+originally requested thing to the file. Generally kernel doesn't
589+open/read/write file actively. In aufs, even a single write may cause a
590+internal "file copy". This behaviour is very similar to cp(1) command.
591+
592+Some people may think it is better to pass such work to user space
593+helper, instead of doing in kernel space. Actually I am still thinking
594+about it. But currently I have implemented it in kernel space.
595diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
596--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
597+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2011-08-24 13:30:24.727980364 +0200
598@@ -0,0 +1,226 @@
599+
600+# Copyright (C) 2005-2011 Junjiro R. Okajima
601+#
602+# This program is free software; you can redistribute it and/or modify
603+# it under the terms of the GNU General Public License as published by
604+# the Free Software Foundation; either version 2 of the License, or
605+# (at your option) any later version.
606+#
607+# This program is distributed in the hope that it will be useful,
608+# but WITHOUT ANY WARRANTY; without even the implied warranty of
609+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
610+# GNU General Public License for more details.
611+#
612+# You should have received a copy of the GNU General Public License
613+# along with this program; if not, write to the Free Software
614+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
615+
616+Basic Aufs Internal Structure
617+
618+Superblock/Inode/Dentry/File Objects
619+----------------------------------------------------------------------
620+As like an ordinary filesystem, aufs has its own
621+superblock/inode/dentry/file objects. All these objects have a
622+dynamically allocated array and store the same kind of pointers to the
623+lower filesystem, branch.
624+For example, when you build a union with one readwrite branch and one
625+readonly, mounted /au, /rw and /ro respectively.
626+- /au = /rw + /ro
627+- /ro/fileA exists but /rw/fileA
628+
629+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
630+pointers are stored in a aufs dentry. The array in aufs dentry will be,
631+- [0] = NULL
632+- [1] = /ro/fileA
633+
634+This style of an array is essentially same to the aufs
635+superblock/inode/dentry/file objects.
636+
637+Because aufs supports manipulating branches, ie. add/delete/change
638+dynamically, these objects has its own generation. When branches are
639+changed, the generation in aufs superblock is incremented. And a
640+generation in other object are compared when it is accessed.
641+When a generation in other objects are obsoleted, aufs refreshes the
642+internal array.
643+
644+
645+Superblock
646+----------------------------------------------------------------------
647+Additionally aufs superblock has some data for policies to select one
648+among multiple writable branches, XIB files, pseudo-links and kobject.
649+See below in detail.
650+About the policies which supports copy-down a directory, see policy.txt
651+too.
652+
653+
654+Branch and XINO(External Inode Number Translation Table)
655+----------------------------------------------------------------------
656+Every branch has its own xino (external inode number translation table)
657+file. The xino file is created and unlinked by aufs internally. When two
658+members of a union exist on the same filesystem, they share the single
659+xino file.
660+The struct of a xino file is simple, just a sequence of aufs inode
661+numbers which is indexed by the lower inode number.
662+In the above sample, assume the inode number of /ro/fileA is i111 and
663+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
664+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
665+
666+When the inode numbers are not contiguous, the xino file will be sparse
667+which has a hole in it and doesn't consume as much disk space as it
668+might appear. If your branch filesystem consumes disk space for such
669+holes, then you should specify 'xino=' option at mounting aufs.
670+
671+Also a writable branch has three kinds of "whiteout bases". All these
672+are existed when the branch is joined to aufs and the names are
673+whiteout-ed doubly, so that users will never see their names in aufs
674+hierarchy.
675+1. a regular file which will be linked to all whiteouts.
676+2. a directory to store a pseudo-link.
677+3. a directory to store an "orphan-ed" file temporary.
678+
679+1. Whiteout Base
680+ When you remove a file on a readonly branch, aufs handles it as a
681+ logical deletion and creates a whiteout on the upper writable branch
682+ as a hardlink of this file in order not to consume inode on the
683+ writable branch.
684+2. Pseudo-link Dir
685+ See below, Pseudo-link.
686+3. Step-Parent Dir
687+ When "fileC" exists on the lower readonly branch only and it is
688+ opened and removed with its parent dir, and then user writes
689+ something into it, then aufs copies-up fileC to this
690+ directory. Because there is no other dir to store fileC. After
691+ creating a file under this dir, the file is unlinked.
692+
693+Because aufs supports manipulating branches, ie. add/delete/change
694+dynamically, a branch has its own id. When the branch order changes, aufs
695+finds the new index by searching the branch id.
696+
697+
698+Pseudo-link
699+----------------------------------------------------------------------
700+Assume "fileA" exists on the lower readonly branch only and it is
701+hardlinked to "fileB" on the branch. When you write something to fileA,
702+aufs copies-up it to the upper writable branch. Additionally aufs
703+creates a hardlink under the Pseudo-link Directory of the writable
704+branch. The inode of a pseudo-link is kept in aufs super_block as a
705+simple list. If fileB is read after unlinking fileA, aufs returns
706+filedata from the pseudo-link instead of the lower readonly
707+branch. Because the pseudo-link is based upon the inode, to keep the
708+inode number by xino (see above) is important.
709+
710+All the hardlinks under the Pseudo-link Directory of the writable branch
711+should be restored in a proper location later. Aufs provides a utility
712+to do this. The userspace helpers executed at remounting and unmounting
713+aufs by default.
714+During this utility is running, it puts aufs into the pseudo-link
715+maintenance mode. In this mode, only the process which began the
716+maintenance mode (and its child processes) is allowed to operate in
717+aufs. Some other processes which are not related to the pseudo-link will
718+be allowed to run too, but the rest have to return an error or wait
719+until the maintenance mode ends. If a process already acquires an inode
720+mutex (in VFS), it has to return an error.
721+
722+
723+XIB(external inode number bitmap)
724+----------------------------------------------------------------------
725+Addition to the xino file per a branch, aufs has an external inode number
726+bitmap in a superblock object. It is also a file such like a xino file.
727+It is a simple bitmap to mark whether the aufs inode number is in-use or
728+not.
729+To reduce the file I/O, aufs prepares a single memory page to cache xib.
730+
731+Aufs implements a feature to truncate/refresh both of xino and xib to
732+reduce the number of consumed disk blocks for these files.
733+
734+
735+Virtual or Vertical Dir, and Readdir in Userspace
736+----------------------------------------------------------------------
737+In order to support multiple layers (branches), aufs readdir operation
738+constructs a virtual dir block on memory. For readdir, aufs calls
739+vfs_readdir() internally for each dir on branches, merges their entries
740+with eliminating the whiteout-ed ones, and sets it to file (dir)
741+object. So the file object has its entry list until it is closed. The
742+entry list will be updated when the file position is zero and becomes
743+old. This decision is made in aufs automatically.
744+
745+The dynamically allocated memory block for the name of entries has a
746+unit of 512 bytes (by default) and stores the names contiguously (no
747+padding). Another block for each entry is handled by kmem_cache too.
748+During building dir blocks, aufs creates hash list and judging whether
749+the entry is whiteouted by its upper branch or already listed.
750+The merged result is cached in the corresponding inode object and
751+maintained by a customizable life-time option.
752+
753+Some people may call it can be a security hole or invite DoS attack
754+since the opened and once readdir-ed dir (file object) holds its entry
755+list and becomes a pressure for system memory. But I'd say it is similar
756+to files under /proc or /sys. The virtual files in them also holds a
757+memory page (generally) while they are opened. When an idea to reduce
758+memory for them is introduced, it will be applied to aufs too.
759+For those who really hate this situation, I've developed readdir(3)
760+library which operates this merging in userspace. You just need to set
761+LD_PRELOAD environment variable, and aufs will not consume no memory in
762+kernel space for readdir(3).
763+
764+
765+Workqueue
766+----------------------------------------------------------------------
767+Aufs sometimes requires privilege access to a branch. For instance,
768+in copy-up/down operation. When a user process is going to make changes
769+to a file which exists in the lower readonly branch only, and the mode
770+of one of ancestor directories may not be writable by a user
771+process. Here aufs copy-up the file with its ancestors and they may
772+require privilege to set its owner/group/mode/etc.
773+This is a typical case of a application character of aufs (see
774+Introduction).
775+
776+Aufs uses workqueue synchronously for this case. It creates its own
777+workqueue. The workqueue is a kernel thread and has privilege. Aufs
778+passes the request to call mkdir or write (for example), and wait for
779+its completion. This approach solves a problem of a signal handler
780+simply.
781+If aufs didn't adopt the workqueue and changed the privilege of the
782+process, and if the mkdir/write call arises SIGXFSZ or other signal,
783+then the user process might gain a privilege or the generated core file
784+was owned by a superuser.
785+
786+Also aufs uses the system global workqueue ("events" kernel thread) too
787+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
788+whiteout base and etc. This is unrelated to a privilege.
789+Most of aufs operation tries acquiring a rw_semaphore for aufs
790+superblock at the beginning, at the same time waits for the completion
791+of all queued asynchronous tasks.
792+
793+
794+Whiteout
795+----------------------------------------------------------------------
796+The whiteout in aufs is very similar to Unionfs's. That is represented
797+by its filename. UnionMount takes an approach of a file mode, but I am
798+afraid several utilities (find(1) or something) will have to support it.
799+
800+Basically the whiteout represents "logical deletion" which stops aufs to
801+lookup further, but also it represents "dir is opaque" which also stop
802+lookup.
803+
804+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
805+In order to make several functions in a single systemcall to be
806+revertible, aufs adopts an approach to rename a directory to a temporary
807+unique whiteouted name.
808+For example, in rename(2) dir where the target dir already existed, aufs
809+renames the target dir to a temporary unique whiteouted name before the
810+actual rename on a branch and then handles other actions (make it opaque,
811+update the attributes, etc). If an error happens in these actions, aufs
812+simply renames the whiteouted name back and returns an error. If all are
813+succeeded, aufs registers a function to remove the whiteouted unique
814+temporary name completely and asynchronously to the system global
815+workqueue.
816+
817+
818+Copy-up
819+----------------------------------------------------------------------
820+It is a well-known feature or concept.
821+When user modifies a file on a readonly branch, aufs operate "copy-up"
822+internally and makes change to the new file on the upper writable branch.
823+When the trigger systemcall does not update the timestamps of the parent
824+dir, aufs reverts it after copy-up.
825diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
826--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
827+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2011-08-24 13:30:24.727980364 +0200
828@@ -0,0 +1,106 @@
829+
830+# Copyright (C) 2005-2011 Junjiro R. Okajima
831+#
832+# This program is free software; you can redistribute it and/or modify
833+# it under the terms of the GNU General Public License as published by
834+# the Free Software Foundation; either version 2 of the License, or
835+# (at your option) any later version.
836+#
837+# This program is distributed in the hope that it will be useful,
838+# but WITHOUT ANY WARRANTY; without even the implied warranty of
839+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
840+# GNU General Public License for more details.
841+#
842+# You should have received a copy of the GNU General Public License
843+# along with this program; if not, write to the Free Software
844+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
845+
846+Lookup in a Branch
847+----------------------------------------------------------------------
848+Since aufs has a character of sub-VFS (see Introduction), it operates
849+lookup for branches as VFS does. It may be a heavy work. Generally
850+speaking struct nameidata is a bigger structure and includes many
851+information. But almost all lookup operation in aufs is the simplest
852+case, ie. lookup only an entry directly connected to its parent. Digging
853+down the directory hierarchy is unnecessary.
854+
855+VFS has a function lookup_one_len() for that use, but it is not usable
856+for a branch filesystem which requires struct nameidata. So aufs
857+implements a simple lookup wrapper function. When a branch filesystem
858+allows NULL as nameidata, it calls lookup_one_len(). Otherwise it builds
859+a simplest nameidata and calls lookup_hash().
860+Here aufs applies "a principle in NFSD", ie. if the filesystem supports
861+NFS-export, then it has to support NULL as a nameidata parameter for
862+->create(), ->lookup() and ->d_revalidate(). So the lookup wrapper in
863+aufs tests if ->s_export_op in the branch is NULL or not.
864+
865+When a branch is a remote filesystem, aufs basically trusts its
866+->d_revalidate(), also aufs forces the hardest revalidate tests for
867+them.
868+For d_revalidate, aufs implements three levels of revalidate tests. See
869+"Revalidate Dentry and UDBA" in detail.
870+
871+
872+Loopback Mount
873+----------------------------------------------------------------------
874+Basically aufs supports any type of filesystem and block device for a
875+branch (actually there are some exceptions). But it is prohibited to add
876+a loopback mounted one whose backend file exists in a filesystem which is
877+already added to aufs. The reason is to protect aufs from a recursive
878+lookup. If it was allowed, the aufs lookup operation might re-enter a
879+lookup for the loopback mounted branch in the same context, and will
880+cause a deadlock.
881+
882+
883+Revalidate Dentry and UDBA (User's Direct Branch Access)
884+----------------------------------------------------------------------
885+Generally VFS helpers re-validate a dentry as a part of lookup.
886+0. digging down the directory hierarchy.
887+1. lock the parent dir by its i_mutex.
888+2. lookup the final (child) entry.
889+3. revalidate it.
890+4. call the actual operation (create, unlink, etc.)
891+5. unlock the parent dir
892+
893+If the filesystem implements its ->d_revalidate() (step 3), then it is
894+called. Actually aufs implements it and checks the dentry on a branch is
895+still valid.
896+But it is not enough. Because aufs has to release the lock for the
897+parent dir on a branch at the end of ->lookup() (step 2) and
898+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
899+held by VFS.
900+If the file on a branch is changed directly, eg. bypassing aufs, after
901+aufs released the lock, then the subsequent operation may cause
902+something unpleasant result.
903+
904+This situation is a result of VFS architecture, ->lookup() and
905+->d_revalidate() is separated. But I never say it is wrong. It is a good
906+design from VFS's point of view. It is just not suitable for sub-VFS
907+character in aufs.
908+
909+Aufs supports such case by three level of revalidation which is
910+selectable by user.
911+1. Simple Revalidate
912+ Addition to the native flow in VFS's, confirm the child-parent
913+ relationship on the branch just after locking the parent dir on the
914+ branch in the "actual operation" (step 4). When this validation
915+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
916+ checks the validation of the dentry on branches.
917+2. Monitor Changes Internally by Inotify/Fsnotify
918+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
919+ the dentry on the branch, and returns EBUSY if it finds different
920+ dentry.
921+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
922+ during it is in cache. When the event is notified, aufs registers a
923+ function to kernel 'events' thread by schedule_work(). And the
924+ function sets some special status to the cached aufs dentry and inode
925+ private data. If they are not cached, then aufs has nothing to
926+ do. When the same file is accessed through aufs (step 0-3) later,
927+ aufs will detect the status and refresh all necessary data.
928+ In this mode, aufs has to ignore the event which is fired by aufs
929+ itself.
930+3. No Extra Validation
931+ This is the simplest test and doesn't add any additional revalidation
932+ test, and skip therevalidatin in step 4. It is useful and improves
933+ aufs performance when system surely hide the aufs branches from user,
934+ by over-mounting something (or another method).
935diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
936--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
937+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2011-08-24 13:30:24.727980364 +0200
938@@ -0,0 +1,76 @@
939+
940+# Copyright (C) 2005-2011 Junjiro R. Okajima
941+#
942+# This program is free software; you can redistribute it and/or modify
943+# it under the terms of the GNU General Public License as published by
944+# the Free Software Foundation; either version 2 of the License, or
945+# (at your option) any later version.
946+#
947+# This program is distributed in the hope that it will be useful,
948+# but WITHOUT ANY WARRANTY; without even the implied warranty of
949+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
950+# GNU General Public License for more details.
951+#
952+# You should have received a copy of the GNU General Public License
953+# along with this program; if not, write to the Free Software
954+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
955+
956+Branch Manipulation
957+
958+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
959+and changing its permission/attribute, there are a lot of works to do.
960+
961+
962+Add a Branch
963+----------------------------------------------------------------------
964+o Confirm the adding dir exists outside of aufs, including loopback
965+ mount.
966+- and other various attributes...
967+o Initialize the xino file and whiteout bases if necessary.
968+ See struct.txt.
969+
970+o Check the owner/group/mode of the directory
971+ When the owner/group/mode of the adding directory differs from the
972+ existing branch, aufs issues a warning because it may impose a
973+ security risk.
974+ For example, when a upper writable branch has a world writable empty
975+ top directory, a malicious user can create any files on the writable
976+ branch directly, like copy-up and modify manually. If something like
977+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
978+ writable branch, and the writable branch is world-writable, then a
979+ malicious guy may create /etc/passwd on the writable branch directly
980+ and the infected file will be valid in aufs.
981+ I am afraid it can be a security issue, but nothing to do except
982+ producing a warning.
983+
984+
985+Delete a Branch
986+----------------------------------------------------------------------
987+o Confirm the deleting branch is not busy
988+ To be general, there is one merit to adopt "remount" interface to
989+ manipulate branches. It is to discard caches. At deleting a branch,
990+ aufs checks the still cached (and connected) dentries and inodes. If
991+ there are any, then they are all in-use. An inode without its
992+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
993+
994+ For the cached one, aufs checks whether the same named entry exists on
995+ other branches.
996+ If the cached one is a directory, because aufs provides a merged view
997+ to users, as long as one dir is left on any branch aufs can show the
998+ dir to users. In this case, the branch can be removed from aufs.
999+ Otherwise aufs rejects deleting the branch.
1000+
1001+ If any file on the deleting branch is opened by aufs, then aufs
1002+ rejects deleting.
1003+
1004+
1005+Modify the Permission of a Branch
1006+----------------------------------------------------------------------
1007+o Re-initialize or remove the xino file and whiteout bases if necessary.
1008+ See struct.txt.
1009+
1010+o rw --> ro: Confirm the modifying branch is not busy
1011+ Aufs rejects the request if any of these conditions are true.
1012+ - a file on the branch is mmap-ed.
1013+ - a regular file on the branch is opened for write and there is no
1014+ same named entry on the upper branch.
1015diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1016--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
1017+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2011-08-24 13:30:24.727980364 +0200
1018@@ -0,0 +1,65 @@
1019+
1020+# Copyright (C) 2005-2011 Junjiro R. Okajima
1021+#
1022+# This program is free software; you can redistribute it and/or modify
1023+# it under the terms of the GNU General Public License as published by
1024+# the Free Software Foundation; either version 2 of the License, or
1025+# (at your option) any later version.
1026+#
1027+# This program is distributed in the hope that it will be useful,
1028+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1029+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1030+# GNU General Public License for more details.
1031+#
1032+# You should have received a copy of the GNU General Public License
1033+# along with this program; if not, write to the Free Software
1034+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1035+
1036+Policies to Select One among Multiple Writable Branches
1037+----------------------------------------------------------------------
1038+When the number of writable branch is more than one, aufs has to decide
1039+the target branch for file creation or copy-up. By default, the highest
1040+writable branch which has the parent (or ancestor) dir of the target
1041+file is chosen (top-down-parent policy).
1042+By user's request, aufs implements some other policies to select the
1043+writable branch, for file creation two policies, round-robin and
1044+most-free-space policies. For copy-up three policies, top-down-parent,
1045+bottom-up-parent and bottom-up policies.
1046+
1047+As expected, the round-robin policy selects the branch in circular. When
1048+you have two writable branches and creates 10 new files, 5 files will be
1049+created for each branch. mkdir(2) systemcall is an exception. When you
1050+create 10 new directories, all will be created on the same branch.
1051+And the most-free-space policy selects the one which has most free
1052+space among the writable branches. The amount of free space will be
1053+checked by aufs internally, and users can specify its time interval.
1054+
1055+The policies for copy-up is more simple,
1056+top-down-parent is equivalent to the same named on in create policy,
1057+bottom-up-parent selects the writable branch where the parent dir
1058+exists and the nearest upper one from the copyup-source,
1059+bottom-up selects the nearest upper writable branch from the
1060+copyup-source, regardless the existence of the parent dir.
1061+
1062+There are some rules or exceptions to apply these policies.
1063+- If there is a readonly branch above the policy-selected branch and
1064+ the parent dir is marked as opaque (a variation of whiteout), or the
1065+ target (creating) file is whiteout-ed on the upper readonly branch,
1066+ then the result of the policy is ignored and the target file will be
1067+ created on the nearest upper writable branch than the readonly branch.
1068+- If there is a writable branch above the policy-selected branch and
1069+ the parent dir is marked as opaque or the target file is whiteouted
1070+ on the branch, then the result of the policy is ignored and the target
1071+ file will be created on the highest one among the upper writable
1072+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1073+ it as usual.
1074+- link(2) and rename(2) systemcalls are exceptions in every policy.
1075+ They try selecting the branch where the source exists as possible
1076+ since copyup a large file will take long time. If it can't be,
1077+ ie. the branch where the source exists is readonly, then they will
1078+ follow the copyup policy.
1079+- There is an exception for rename(2) when the target exists.
1080+ If the rename target exists, aufs compares the index of the branches
1081+ where the source and the target exists and selects the higher
1082+ one. If the selected branch is readonly, then aufs follows the
1083+ copyup policy.
1084diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
1085--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
1086+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2011-08-24 13:30:24.727980364 +0200
1087@@ -0,0 +1,47 @@
1088+
1089+# Copyright (C) 2005-2011 Junjiro R. Okajima
1090+#
1091+# This program is free software; you can redistribute it and/or modify
1092+# it under the terms of the GNU General Public License as published by
1093+# the Free Software Foundation; either version 2 of the License, or
1094+# (at your option) any later version.
1095+#
1096+# This program is distributed in the hope that it will be useful,
1097+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1098+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1099+# GNU General Public License for more details.
1100+#
1101+# You should have received a copy of the GNU General Public License
1102+# along with this program; if not, write to the Free Software
1103+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1104+
1105+mmap(2) -- File Memory Mapping
1106+----------------------------------------------------------------------
1107+In aufs, the file-mapped pages are handled by a branch fs directly, no
1108+interaction with aufs. It means aufs_mmap() calls the branch fs's
1109+->mmap().
1110+This approach is simple and good, but there is one problem.
1111+Under /proc, several entries show the mmap-ped files by its path (with
1112+device and inode number), and the printed path will be the path on the
1113+branch fs's instead of virtual aufs's.
1114+This is not a problem in most cases, but some utilities lsof(1) (and its
1115+user) may expect the path on aufs.
1116+
1117+To address this issue, aufs adds a new member called vm_prfile in struct
1118+vm_area_struct (and struct vm_region). The original vm_file points to
1119+the file on the branch fs in order to handle everything correctly as
1120+usual. The new vm_prfile points to a virtual file in aufs, and the
1121+show-functions in procfs refers to vm_prfile if it is set.
1122+Also we need to maintain several other places where touching vm_file
1123+such like
1124+- fork()/clone() copies vma and the reference count of vm_file is
1125+ incremented.
1126+- merging vma maintains the ref count too.
1127+
1128+This is not a good approach. It just faking the printed path. But it
1129+leaves all behaviour around f_mapping unchanged. This is surely an
1130+advantage.
1131+Actually aufs had adopted another complicated approach which calls
1132+generic_file_mmap() and handles struct vm_operations_struct. In this
1133+approach, aufs met a hard problem and I could not solve it without
1134+switching the approach.
1135diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
1136--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
1137+++ linux/Documentation/filesystems/aufs/design/07export.txt 2011-08-24 13:30:24.727980364 +0200
1138@@ -0,0 +1,59 @@
1139+
1140+# Copyright (C) 2005-2011 Junjiro R. Okajima
1141+#
1142+# This program is free software; you can redistribute it and/or modify
1143+# it under the terms of the GNU General Public License as published by
1144+# the Free Software Foundation; either version 2 of the License, or
1145+# (at your option) any later version.
1146+#
1147+# This program is distributed in the hope that it will be useful,
1148+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1149+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1150+# GNU General Public License for more details.
1151+#
1152+# You should have received a copy of the GNU General Public License
1153+# along with this program; if not, write to the Free Software
1154+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1155+
1156+Export Aufs via NFS
1157+----------------------------------------------------------------------
1158+Here is an approach.
1159+- like xino/xib, add a new file 'xigen' which stores aufs inode
1160+ generation.
1161+- iget_locked(): initialize aufs inode generation for a new inode, and
1162+ store it in xigen file.
1163+- destroy_inode(): increment aufs inode generation and store it in xigen
1164+ file. it is necessary even if it is not unlinked, because any data of
1165+ inode may be changed by UDBA.
1166+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
1167+ build file handle by
1168+ + branch id (4 bytes)
1169+ + superblock generation (4 bytes)
1170+ + inode number (4 or 8 bytes)
1171+ + parent dir inode number (4 or 8 bytes)
1172+ + inode generation (4 bytes))
1173+ + return value of exportfs_encode_fh() for the parent on a branch (4
1174+ bytes)
1175+ + file handle for a branch (by exportfs_encode_fh())
1176+- fh_to_dentry():
1177+ + find the index of a branch from its id in handle, and check it is
1178+ still exist in aufs.
1179+ + 1st level: get the inode number from handle and search it in cache.
1180+ + 2nd level: if not found, get the parent inode number from handle and
1181+ search it in cache. and then open the parent dir, find the matching
1182+ inode number by vfs_readdir() and get its name, and call
1183+ lookup_one_len() for the target dentry.
1184+ + 3rd level: if the parent dir is not cached, call
1185+ exportfs_decode_fh() for a branch and get the parent on a branch,
1186+ build a pathname of it, convert it a pathname in aufs, call
1187+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
1188+ the 2nd level.
1189+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
1190+ for every branch, but not itself. to get this, (currently) aufs
1191+ searches in current->nsproxy->mnt_ns list. it may not be a good
1192+ idea, but I didn't get other approach.
1193+ + test the generation of the gotten inode.
1194+- every inode operation: they may get EBUSY due to UDBA. in this case,
1195+ convert it into ESTALE for NFSD.
1196+- readdir(): call lockdep_on/off() because filldir in NFSD calls
1197+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
1198diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
1199--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
1200+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2011-08-24 13:30:24.727980364 +0200
1201@@ -0,0 +1,53 @@
1202+
1203+# Copyright (C) 2005-2011 Junjiro R. Okajima
1204+#
1205+# This program is free software; you can redistribute it and/or modify
1206+# it under the terms of the GNU General Public License as published by
1207+# the Free Software Foundation; either version 2 of the License, or
1208+# (at your option) any later version.
1209+#
1210+# This program is distributed in the hope that it will be useful,
1211+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1212+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1213+# GNU General Public License for more details.
1214+#
1215+# You should have received a copy of the GNU General Public License
1216+# along with this program; if not, write to the Free Software
1217+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1218+
1219+Show Whiteout Mode (shwh)
1220+----------------------------------------------------------------------
1221+Generally aufs hides the name of whiteouts. But in some cases, to show
1222+them is very useful for users. For instance, creating a new middle layer
1223+(branch) by merging existing layers.
1224+
1225+(borrowing aufs1 HOW-TO from a user, Michael Towers)
1226+When you have three branches,
1227+- Bottom: 'system', squashfs (underlying base system), read-only
1228+- Middle: 'mods', squashfs, read-only
1229+- Top: 'overlay', ram (tmpfs), read-write
1230+
1231+The top layer is loaded at boot time and saved at shutdown, to preserve
1232+the changes made to the system during the session.
1233+When larger changes have been made, or smaller changes have accumulated,
1234+the size of the saved top layer data grows. At this point, it would be
1235+nice to be able to merge the two overlay branches ('mods' and 'overlay')
1236+and rewrite the 'mods' squashfs, clearing the top layer and thus
1237+restoring save and load speed.
1238+
1239+This merging is simplified by the use of another aufs mount, of just the
1240+two overlay branches using the 'shwh' option.
1241+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
1242+ aufs /livesys/merge_union
1243+
1244+A merged view of these two branches is then available at
1245+/livesys/merge_union, and the new feature is that the whiteouts are
1246+visible!
1247+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
1248+writing to all branches. Also the default mode for all branches is 'ro'.
1249+It is now possible to save the combined contents of the two overlay
1250+branches to a new squashfs, e.g.:
1251+# mksquashfs /livesys/merge_union /path/to/newmods.squash
1252+
1253+This new squashfs archive can be stored on the boot device and the
1254+initramfs will use it to replace the old one at the next boot.
1255diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
1256--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
1257+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2011-08-24 13:30:24.727980364 +0200
1258@@ -0,0 +1,47 @@
1259+
1260+# Copyright (C) 2010-2011 Junjiro R. Okajima
1261+#
1262+# This program is free software; you can redistribute it and/or modify
1263+# it under the terms of the GNU General Public License as published by
1264+# the Free Software Foundation; either version 2 of the License, or
1265+# (at your option) any later version.
1266+#
1267+# This program is distributed in the hope that it will be useful,
1268+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1269+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1270+# GNU General Public License for more details.
1271+#
1272+# You should have received a copy of the GNU General Public License
1273+# along with this program; if not, write to the Free Software
1274+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1275+
1276+Dynamically customizable FS operations
1277+----------------------------------------------------------------------
1278+Generally FS operations (struct inode_operations, struct
1279+address_space_operations, struct file_operations, etc.) are defined as
1280+"static const", but it never means that FS have only one set of
1281+operation. Some FS have multiple sets of them. For instance, ext2 has
1282+three sets, one for XIP, for NOBH, and for normal.
1283+Since aufs overrides and redirects these operations, sometimes aufs has
1284+to change its behaviour according to the branch FS type. More imporantly
1285+VFS acts differently if a function (member in the struct) is set or
1286+not. It means aufs should have several sets of operations and select one
1287+among them according to the branch FS definition.
1288+
1289+In order to solve this problem and not to affect the behavour of VFS,
1290+aufs defines these operations dynamically. For instance, aufs defines
1291+aio_read function for struct file_operations, but it may not be set to
1292+the file_operations. When the branch FS doesn't have it, aufs doesn't
1293+set it to its file_operations while the function definition itself is
1294+still alive. So the behaviour of io_submit(2) will not change, and it
1295+will return an error when aio_read is not defined.
1296+
1297+The lifetime of these dynamically generated operation object is
1298+maintained by aufs branch object. When the branch is removed from aufs,
1299+the reference counter of the object is decremented. When it reaches
1300+zero, the dynamically generated operation object will be freed.
1301+
1302+This approach is designed to support AIO (io_submit), Direcit I/O and
1303+XIP mainly.
1304+Currently this approach is applied to file_operations and
1305+vm_operations_struct for regular files only.
1306diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/99plan.txt linux/Documentation/filesystems/aufs/design/99plan.txt
1307--- /usr/share/empty/Documentation/filesystems/aufs/design/99plan.txt 1970-01-01 01:00:00.000000000 +0100
1308+++ linux/Documentation/filesystems/aufs/design/99plan.txt 2011-08-24 13:30:24.727980364 +0200
1309@@ -0,0 +1,96 @@
1310+
1311+# Copyright (C) 2005-2011 Junjiro R. Okajima
1312+#
1313+# This program is free software; you can redistribute it and/or modify
1314+# it under the terms of the GNU General Public License as published by
1315+# the Free Software Foundation; either version 2 of the License, or
1316+# (at your option) any later version.
1317+#
1318+# This program is distributed in the hope that it will be useful,
1319+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1320+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1321+# GNU General Public License for more details.
1322+#
1323+# You should have received a copy of the GNU General Public License
1324+# along with this program; if not, write to the Free Software
1325+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1326+
1327+Plan
1328+
1329+Restoring some features which was implemented in aufs1.
1330+They were dropped in aufs2 in order to make source files simpler and
1331+easier to be reviewed.
1332+
1333+
1334+Test Only the Highest One for the Directory Permission (dirperm1 option)
1335+----------------------------------------------------------------------
1336+Let's try case study.
1337+- aufs has two branches, upper readwrite and lower readonly.
1338+ /au = /rw + /ro
1339+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1340+- user invoked "chmod a+rx /au/dirA"
1341+- then "dirA" becomes world readable?
1342+
1343+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1344+or it may be a natively readonly filesystem. If aufs respects the lower
1345+branch, it should not respond readdir request from other users. But user
1346+allowed it by chmod. Should really aufs rejects showing the entries
1347+under /ro/dirA?
1348+
1349+To be honest, I don't have a best solution for this case. So I
1350+implemented 'dirperm1' and 'nodirperm1' option in aufs1, and leave it to
1351+users.
1352+When dirperm1 is specified, aufs checks only the highest one for the
1353+directory permission, and shows the entries. Otherwise, as usual, checks
1354+every dir existing on all branches and rejects the request.
1355+
1356+As a side effect, dirperm1 option improves the performance of aufs
1357+because the number of permission check is reduced.
1358+
1359+
1360+Being Another Aufs's Readonly Branch (robr)
1361+----------------------------------------------------------------------
1362+Aufs1 allows aufs to be another aufs's readonly branch.
1363+This feature was developed by a user's request. But it may not be used
1364+currecnly.
1365+
1366+
1367+Copy-up on Open (coo=)
1368+----------------------------------------------------------------------
1369+By default the internal copy-up is executed when it is really necessary.
1370+It is not done when a file is opened for writing, but when write(2) is
1371+done. Users who have many (over 100) branches want to know and analyse
1372+when and what file is copied-up. To insert a new upper branch which
1373+contains such files only may improve the performance of aufs.
1374+
1375+Aufs1 implemented "coo=none | leaf | all" option.
1376+
1377+
1378+Refresh the Opened File (refrof)
1379+----------------------------------------------------------------------
1380+This option is implemented in aufs1 but incomplete.
1381+
1382+When user reads from a file, he expects to get its latest filedata
1383+generally. If the file is removed and a new same named file is created,
1384+the content he gets is unchanged, ie. the unlinked filedata.
1385+
1386+Let's try case study again.
1387+- aufs has two branches.
1388+ /au = /rw + /ro
1389+- "fileA" exists under /ro, but /rw.
1390+- user opened "/au/fileA".
1391+- he or someone else inserts a branch (/new) between /rw and /ro.
1392+ /au = /rw + /new + /ro
1393+- the new branch has "fileA".
1394+- user reads from the opened "fileA"
1395+- which filedata should aufs return, from /ro or /new?
1396+
1397+Some people says it has to be "from /ro" and it is a semantics of Unix.
1398+The others say it should be "from /new" because the file is not removed
1399+and it is equivalent to the case of someone else modifies the file.
1400+
1401+Here again I don't have a best and final answer. I got an idea to
1402+implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the
1403+Opened File) is specified (by default), aufs returns the filedata from
1404+/new.
1405+Otherwise from /new.
1406diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
1407--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
1408+++ linux/Documentation/filesystems/aufs/README 2011-08-24 13:30:24.727980364 +0200
1409@@ -0,0 +1,290 @@
1410+
1411+Aufs3 -- advanced multi layered unification filesystem version 3.x
1412+http://aufs.sf.net
1413+Junjiro R. Okajima
1414+
1415+
1416+0. Introduction
1417+----------------------------------------
1418+In the early days, aufs was entirely re-designed and re-implemented
1419+Unionfs Version 1.x series. After many original ideas, approaches,
1420+improvements and implementations, it becomes totally different from
1421+Unionfs while keeping the basic features.
1422+Recently, Unionfs Version 2.x series begin taking some of the same
1423+approaches to aufs1's.
1424+Unionfs is being developed by Professor Erez Zadok at Stony Brook
1425+University and his team.
1426+
1427+Aufs3 supports linux-3.0 and later.
1428+If you want older kernel version support, try aufs2-2.6.git or
1429+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
1430+
1431+Note: it becomes clear that "Aufs was rejected. Let's give it up."
1432+According to Christoph Hellwig, linux rejects all union-type filesystems
1433+but UnionMount.
1434+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
1435+
1436+
1437+1. Features
1438+----------------------------------------
1439+- unite several directories into a single virtual filesystem. The member
1440+ directory is called as a branch.
1441+- you can specify the permission flags to the branch, which are 'readonly',
1442+ 'readwrite' and 'whiteout-able.'
1443+- by upper writable branch, internal copyup and whiteout, files/dirs on
1444+ readonly branch are modifiable logically.
1445+- dynamic branch manipulation, add, del.
1446+- etc...
1447+
1448+Also there are many enhancements in aufs1, such as:
1449+- readdir(3) in userspace.
1450+- keep inode number by external inode number table
1451+- keep the timestamps of file/dir in internal copyup operation
1452+- seekable directory, supporting NFS readdir.
1453+- whiteout is hardlinked in order to reduce the consumption of inodes
1454+ on branch
1455+- do not copyup, nor create a whiteout when it is unnecessary
1456+- revert a single systemcall when an error occurs in aufs
1457+- remount interface instead of ioctl
1458+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
1459+- loopback mounted filesystem as a branch
1460+- kernel thread for removing the dir who has a plenty of whiteouts
1461+- support copyup sparse file (a file which has a 'hole' in it)
1462+- default permission flags for branches
1463+- selectable permission flags for ro branch, whether whiteout can
1464+ exist or not
1465+- export via NFS.
1466+- support <sysfs>/fs/aufs and <debugfs>/aufs.
1467+- support multiple writable branches, some policies to select one
1468+ among multiple writable branches.
1469+- a new semantics for link(2) and rename(2) to support multiple
1470+ writable branches.
1471+- no glibc changes are required.
1472+- pseudo hardlink (hardlink over branches)
1473+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
1474+ including NFS or remote filesystem branch.
1475+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
1476+- and more...
1477+
1478+Currently these features are dropped temporary from aufs3.
1479+See design/08plan.txt in detail.
1480+- test only the highest one for the directory permission (dirperm1)
1481+- copyup on open (coo=)
1482+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
1483+ (robr)
1484+- statistics of aufs thread (/sys/fs/aufs/stat)
1485+- delegation mode (dlgt)
1486+ a delegation of the internal branch access to support task I/O
1487+ accounting, which also supports Linux Security Modules (LSM) mainly
1488+ for Suse AppArmor.
1489+- intent.open/create (file open in a single lookup)
1490+
1491+Features or just an idea in the future (see also design/*.txt),
1492+- reorder the branch index without del/re-add.
1493+- permanent xino files for NFSD
1494+- an option for refreshing the opened files after add/del branches
1495+- 'move' policy for copy-up between two writable branches, after
1496+ checking free space.
1497+- light version, without branch manipulation. (unnecessary?)
1498+- copyup in userspace
1499+- inotify in userspace
1500+- readv/writev
1501+- xattr, acl
1502+
1503+
1504+2. Download
1505+----------------------------------------
1506+There were three GIT trees for aufs2, but for aufs3 two GIT trees,
1507+aufs3-standalone and aufs-util. Note that there is no "3" in "aufs-util."
1508+The aufs3-standalone tree has only aufs source files
1509+and necessary patches, and you can select CONFIG_AUFS_FS=m.
1510+
1511+You will find GIT branches whose name is in form of "aufs3.x" where "x"
1512+represents the linux kernel version, "linux-3.x". For instance,
1513+"aufs3.0" is for linux-3.0.
1514+
1515+o aufs3-standalone tree
1516+$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs3-standalone.git \
1517+ aufs3-standalone.git
1518+$ cd aufs3-standalone.git
1519+$ git checkout origin/aufs3.0
1520+
1521+o aufs-util tree
1522+$ git clone git://aufs.git.sourceforge.net/gitroot/aufs/aufs-util.git \
1523+ aufs-util.git
1524+$ cd aufs-util.git
1525+$ git checkout origin/aufs3.0
1526+
1527+You may not be able to find the GIT branch in aufs-util for your
1528+version. In this case, you should git-checkout the branch for the
1529+nearest lower number.
1530+If you are using linux-3.10 and aufs3.10 (which are not released yet),
1531+but the "aufs3.10" branch doesn't exit in this repository, then
1532+"aufs3.9", "aufs3.8", ... or something is the branch for you.
1533+Also you can view all branches by
1534+ $ git branch -a
1535+
1536+
1537+3. Configuration and Compilation
1538+----------------------------------------
1539+Make sure you have git-checkout'ed the correct branch.
1540+
1541+For aufs3-standalone tree,
1542+There are several ways to build.
1543+
1544+1.
1545+- apply ./aufs3-kbuild.patch to your kernel source files.
1546+- apply ./aufs3-base.patch too.
1547+- apply ./aufs3-proc_map.patch too, if you want to make /proc/PID/maps (and
1548+ others including lsof(1)) show the file path on aufs instead of the
1549+ path on the branch fs.
1550+- apply ./aufs3-standalone.patch too, if you have a plan to set
1551+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs3-standalone.patch.
1552+- copy ./{Documentation,fs,include/linux/aufs_type.h} files to your
1553+ kernel source tree. Never copy ./include/linux/Kbuild.
1554+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS, you can select either
1555+ =m or =y.
1556+- and build your kernel as usual.
1557+- install the built kernel.
1558+- install the header files too by "make headers_install".
1559+- and reboot your system.
1560+
1561+2.
1562+- module only (CONFIG_AUFS_FS=m).
1563+- apply ./aufs3-base.patch to your kernel source files.
1564+- apply ./aufs3-proc_map.patch too to your kernel source files,
1565+ if you want to make /proc/PID/maps (and others including lsof(1)) show
1566+ the file path on aufs instead of the path on the branch fs.
1567+- apply ./aufs3-standalone.patch too.
1568+- build your kernel, don't forget "make headers_install", and reboot.
1569+- edit ./config.mk and set other aufs configurations if necessary.
1570+ Note: You should read ./fs/aufs/Kconfig carefully which describes
1571+ every aufs configurations.
1572+- build the module by simple "make".
1573+- you can specify ${KDIR} make variable which points to your kernel
1574+ source tree.
1575+- install the files
1576+ + run "make install" to install the aufs module, or copy the built
1577+ ./aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
1578+ + run "make headers_install" to install the aufs header file (you can
1579+ specify DESTDIR), or copty ./usr/include/linux/aufs_type.h to
1580+ /usr/include/linux or wherever you like.
1581+- no need to apply aufs3-kbuild.patch, nor copying source files to your
1582+ kernel source tree.
1583+
1584+Note: The haeder file aufs_type.h is necessary to build aufs-util
1585+ as well as "make headers_install" in the kernel source tree.
1586+ headers_install is subject to be forgotten, but it is essentially
1587+ necessary, not only for building aufs-util.
1588+ You may not meet problems without headers_install in some older
1589+ version though.
1590+
1591+And then,
1592+- read README in aufs-util, build and install it
1593+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
1594+ then run "make install_ulib" too. And refer to the aufs manual in
1595+ detail.
1596+
1597+
1598+4. Usage
1599+----------------------------------------
1600+At first, make sure aufs-util are installed, and please read the aufs
1601+manual, aufs.5 in aufs-util.git tree.
1602+$ man -l aufs.5
1603+
1604+And then,
1605+$ mkdir /tmp/rw /tmp/aufs
1606+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
1607+
1608+Here is another example. The result is equivalent.
1609+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
1610+ Or
1611+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
1612+# mount -o remount,append:${HOME} /tmp/aufs
1613+
1614+Then, you can see whole tree of your home dir through /tmp/aufs. If
1615+you modify a file under /tmp/aufs, the one on your home directory is
1616+not affected, instead the same named file will be newly created under
1617+/tmp/rw. And all of your modification to a file will be applied to
1618+the one under /tmp/rw. This is called the file based Copy on Write
1619+(COW) method.
1620+Aufs mount options are described in aufs.5.
1621+If you run chroot or something and make your aufs as a root directory,
1622+then you need to customize the shutdown script. See the aufs manual in
1623+detail.
1624+
1625+Additionally, there are some sample usages of aufs which are a
1626+diskless system with network booting, and LiveCD over NFS.
1627+See sample dir in CVS tree on SourceForge.
1628+
1629+
1630+5. Contact
1631+----------------------------------------
1632+When you have any problems or strange behaviour in aufs, please let me
1633+know with:
1634+- /proc/mounts (instead of the output of mount(8))
1635+- /sys/module/aufs/*
1636+- /sys/fs/aufs/* (if you have them)
1637+- /debug/aufs/* (if you have them)
1638+- linux kernel version
1639+ if your kernel is not plain, for example modified by distributor,
1640+ the url where i can download its source is necessary too.
1641+- aufs version which was printed at loading the module or booting the
1642+ system, instead of the date you downloaded.
1643+- configuration (define/undefine CONFIG_AUFS_xxx)
1644+- kernel configuration or /proc/config.gz (if you have it)
1645+- behaviour which you think to be incorrect
1646+- actual operation, reproducible one is better
1647+- mailto: aufs-users at lists.sourceforge.net
1648+
1649+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
1650+and Feature Requests) on SourceForge. Please join and write to
1651+aufs-users ML.
1652+
1653+
1654+6. Acknowledgements
1655+----------------------------------------
1656+Thanks to everyone who have tried and are using aufs, whoever
1657+have reported a bug or any feedback.
1658+
1659+Especially donators:
1660+Tomas Matejicek(slax.org) made a donation (much more than once).
1661+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
1662+ scripts) is making "doubling" donations.
1663+ Unfortunately I cannot list all of the donators, but I really
1664+ appriciate.
1665+ It ends Aug 2010, but the ordinary donation URL is still available.
1666+ <http://sourceforge.net/donate/index.php?group_id=167503>
1667+Dai Itasaka made a donation (2007/8).
1668+Chuck Smith made a donation (2008/4, 10 and 12).
1669+Henk Schoneveld made a donation (2008/9).
1670+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
1671+Francois Dupoux made a donation (2008/11).
1672+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
1673+ aufs2 GIT tree (2009/2).
1674+William Grant made a donation (2009/3).
1675+Patrick Lane made a donation (2009/4).
1676+The Mail Archive (mail-archive.com) made donations (2009/5).
1677+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
1678+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
1679+Pavel Pronskiy made a donation (2011/2).
1680+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
1681+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
1682+Max Lekomcev (DOM-TV project) made a donation (2011/7).
1683+
1684+Thank you very much.
1685+Donations are always, including future donations, very important and
1686+helpful for me to keep on developing aufs.
1687+
1688+
1689+7.
1690+----------------------------------------
1691+If you are an experienced user, no explanation is needed. Aufs is
1692+just a linux filesystem.
1693+
1694+
1695+Enjoy!
1696+
1697+# Local variables: ;
1698+# mode: text;
1699+# End: ;
7f207e10
AM
1700diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
1701--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
53392da6 1702+++ linux/fs/aufs/aufs.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 1703@@ -0,0 +1,60 @@
7f207e10 1704+/*
027c5e7a 1705+ * Copyright (C) 2005-2011 Junjiro R. Okajima
7f207e10
AM
1706+ *
1707+ * This program, aufs is free software; you can redistribute it and/or modify
1708+ * it under the terms of the GNU General Public License as published by
1709+ * the Free Software Foundation; either version 2 of the License, or
1710+ * (at your option) any later version.
1711+ *
1712+ * This program is distributed in the hope that it will be useful,
1713+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1714+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1715+ * GNU General Public License for more details.
1716+ *
1717+ * You should have received a copy of the GNU General Public License
1718+ * along with this program; if not, write to the Free Software
1719+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1720+ */
1721+
1722+/*
1723+ * all header files
1724+ */
1725+
1726+#ifndef __AUFS_H__
1727+#define __AUFS_H__
1728+
1729+#ifdef __KERNEL__
1730+
1731+#define AuStub(type, name, body, ...) \
1732+ static inline type name(__VA_ARGS__) { body; }
1733+
1734+#define AuStubVoid(name, ...) \
1735+ AuStub(void, name, , __VA_ARGS__)
1736+#define AuStubInt0(name, ...) \
1737+ AuStub(int, name, return 0, __VA_ARGS__)
1738+
1739+#include "debug.h"
1740+
1741+#include "branch.h"
1742+#include "cpup.h"
1743+#include "dcsub.h"
1744+#include "dbgaufs.h"
1745+#include "dentry.h"
1746+#include "dir.h"
1747+#include "dynop.h"
1748+#include "file.h"
1749+#include "fstype.h"
1750+#include "inode.h"
1751+#include "loop.h"
1752+#include "module.h"
7f207e10
AM
1753+#include "opts.h"
1754+#include "rwsem.h"
1755+#include "spl.h"
1756+#include "super.h"
1757+#include "sysaufs.h"
1758+#include "vfsub.h"
1759+#include "whout.h"
1760+#include "wkq.h"
1761+
1762+#endif /* __KERNEL__ */
1763+#endif /* __AUFS_H__ */
1764diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
1765--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
1766+++ linux/fs/aufs/branch.c 2011-08-24 13:30:24.731313534 +0200
1767@@ -0,0 +1,1170 @@
7f207e10 1768+/*
027c5e7a 1769+ * Copyright (C) 2005-2011 Junjiro R. Okajima
7f207e10
AM
1770+ *
1771+ * This program, aufs is free software; you can redistribute it and/or modify
1772+ * it under the terms of the GNU General Public License as published by
1773+ * the Free Software Foundation; either version 2 of the License, or
1774+ * (at your option) any later version.
1775+ *
1776+ * This program is distributed in the hope that it will be useful,
1777+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1778+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1779+ * GNU General Public License for more details.
1780+ *
1781+ * You should have received a copy of the GNU General Public License
1782+ * along with this program; if not, write to the Free Software
1783+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1784+ */
1785+
1786+/*
1787+ * branch management
1788+ */
1789+
027c5e7a 1790+#include <linux/compat.h>
7f207e10
AM
1791+#include <linux/file.h>
1792+#include <linux/statfs.h>
1793+#include "aufs.h"
1794+
1795+/*
1796+ * free a single branch
1facf9fc 1797+ */
1798+static void au_br_do_free(struct au_branch *br)
1799+{
1800+ int i;
1801+ struct au_wbr *wbr;
4a4d8108 1802+ struct au_dykey **key;
1facf9fc 1803+
027c5e7a
AM
1804+ au_hnotify_fin_br(br);
1805+
1facf9fc 1806+ if (br->br_xino.xi_file)
1807+ fput(br->br_xino.xi_file);
1808+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
1809+
1810+ AuDebugOn(atomic_read(&br->br_count));
1811+
1812+ wbr = br->br_wbr;
1813+ if (wbr) {
1814+ for (i = 0; i < AuBrWh_Last; i++)
1815+ dput(wbr->wbr_wh[i]);
1816+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 1817+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 1818+ }
1819+
4a4d8108
AM
1820+ key = br->br_dykey;
1821+ for (i = 0; i < AuBrDynOp; i++, key++)
1822+ if (*key)
1823+ au_dy_put(*key);
1824+ else
1825+ break;
1826+
1facf9fc 1827+ mntput(br->br_mnt);
1facf9fc 1828+ kfree(wbr);
1829+ kfree(br);
1830+}
1831+
1832+/*
1833+ * frees all branches
1834+ */
1835+void au_br_free(struct au_sbinfo *sbinfo)
1836+{
1837+ aufs_bindex_t bmax;
1838+ struct au_branch **br;
1839+
dece6358
AM
1840+ AuRwMustWriteLock(&sbinfo->si_rwsem);
1841+
1facf9fc 1842+ bmax = sbinfo->si_bend + 1;
1843+ br = sbinfo->si_branch;
1844+ while (bmax--)
1845+ au_br_do_free(*br++);
1846+}
1847+
1848+/*
1849+ * find the index of a branch which is specified by @br_id.
1850+ */
1851+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
1852+{
1853+ aufs_bindex_t bindex, bend;
1854+
1855+ bend = au_sbend(sb);
1856+ for (bindex = 0; bindex <= bend; bindex++)
1857+ if (au_sbr_id(sb, bindex) == br_id)
1858+ return bindex;
1859+ return -1;
1860+}
1861+
1862+/* ---------------------------------------------------------------------- */
1863+
1864+/*
1865+ * add a branch
1866+ */
1867+
b752ccd1
AM
1868+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
1869+ struct dentry *h_root)
1facf9fc 1870+{
b752ccd1
AM
1871+ if (unlikely(h_adding == h_root
1872+ || au_test_loopback_overlap(sb, h_adding)))
1facf9fc 1873+ return 1;
b752ccd1
AM
1874+ if (h_adding->d_sb != h_root->d_sb)
1875+ return 0;
1876+ return au_test_subdir(h_adding, h_root)
1877+ || au_test_subdir(h_root, h_adding);
1facf9fc 1878+}
1879+
1880+/*
1881+ * returns a newly allocated branch. @new_nbranch is a number of branches
1882+ * after adding a branch.
1883+ */
1884+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
1885+ int perm)
1886+{
1887+ struct au_branch *add_branch;
1888+ struct dentry *root;
4a4d8108 1889+ int err;
1facf9fc 1890+
4a4d8108 1891+ err = -ENOMEM;
1facf9fc 1892+ root = sb->s_root;
1893+ add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
1894+ if (unlikely(!add_branch))
1895+ goto out;
1896+
027c5e7a
AM
1897+ err = au_hnotify_init_br(add_branch, perm);
1898+ if (unlikely(err))
1899+ goto out_br;
1900+
1facf9fc 1901+ add_branch->br_wbr = NULL;
1902+ if (au_br_writable(perm)) {
1903+ /* may be freed separately at changing the branch permission */
1904+ add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
1905+ GFP_NOFS);
1906+ if (unlikely(!add_branch->br_wbr))
027c5e7a 1907+ goto out_hnotify;
1facf9fc 1908+ }
1909+
4a4d8108
AM
1910+ err = au_sbr_realloc(au_sbi(sb), new_nbranch);
1911+ if (!err)
1912+ err = au_di_realloc(au_di(root), new_nbranch);
1913+ if (!err)
1914+ err = au_ii_realloc(au_ii(root->d_inode), new_nbranch);
1915+ if (!err)
1916+ return add_branch; /* success */
1facf9fc 1917+
1facf9fc 1918+ kfree(add_branch->br_wbr);
4a4d8108 1919+
027c5e7a
AM
1920+out_hnotify:
1921+ au_hnotify_fin_br(add_branch);
4f0767ce 1922+out_br:
1facf9fc 1923+ kfree(add_branch);
4f0767ce 1924+out:
4a4d8108 1925+ return ERR_PTR(err);
1facf9fc 1926+}
1927+
1928+/*
1929+ * test if the branch permission is legal or not.
1930+ */
1931+static int test_br(struct inode *inode, int brperm, char *path)
1932+{
1933+ int err;
1934+
4a4d8108
AM
1935+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
1936+ if (!err)
1937+ goto out;
1facf9fc 1938+
4a4d8108
AM
1939+ err = -EINVAL;
1940+ pr_err("write permission for readonly mount or inode, %s\n", path);
1941+
4f0767ce 1942+out:
1facf9fc 1943+ return err;
1944+}
1945+
1946+/*
1947+ * returns:
1948+ * 0: success, the caller will add it
1949+ * plus: success, it is already unified, the caller should ignore it
1950+ * minus: error
1951+ */
1952+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
1953+{
1954+ int err;
1955+ aufs_bindex_t bend, bindex;
1956+ struct dentry *root;
1957+ struct inode *inode, *h_inode;
1958+
1959+ root = sb->s_root;
1960+ bend = au_sbend(sb);
1961+ if (unlikely(bend >= 0
1962+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
1963+ err = 1;
1964+ if (!remount) {
1965+ err = -EINVAL;
4a4d8108 1966+ pr_err("%s duplicated\n", add->pathname);
1facf9fc 1967+ }
1968+ goto out;
1969+ }
1970+
1971+ err = -ENOSPC; /* -E2BIG; */
1972+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
1973+ || AUFS_BRANCH_MAX - 1 <= bend)) {
4a4d8108 1974+ pr_err("number of branches exceeded %s\n", add->pathname);
1facf9fc 1975+ goto out;
1976+ }
1977+
1978+ err = -EDOM;
1979+ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
4a4d8108 1980+ pr_err("bad index %d\n", add->bindex);
1facf9fc 1981+ goto out;
1982+ }
1983+
1984+ inode = add->path.dentry->d_inode;
1985+ err = -ENOENT;
1986+ if (unlikely(!inode->i_nlink)) {
4a4d8108 1987+ pr_err("no existence %s\n", add->pathname);
1facf9fc 1988+ goto out;
1989+ }
1990+
1991+ err = -EINVAL;
1992+ if (unlikely(inode->i_sb == sb)) {
4a4d8108 1993+ pr_err("%s must be outside\n", add->pathname);
1facf9fc 1994+ goto out;
1995+ }
1996+
1997+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
4a4d8108
AM
1998+ pr_err("unsupported filesystem, %s (%s)\n",
1999+ add->pathname, au_sbtype(inode->i_sb));
1facf9fc 2000+ goto out;
2001+ }
2002+
2003+ err = test_br(add->path.dentry->d_inode, add->perm, add->pathname);
2004+ if (unlikely(err))
2005+ goto out;
2006+
2007+ if (bend < 0)
2008+ return 0; /* success */
2009+
2010+ err = -EINVAL;
2011+ for (bindex = 0; bindex <= bend; bindex++)
2012+ if (unlikely(test_overlap(sb, add->path.dentry,
2013+ au_h_dptr(root, bindex)))) {
4a4d8108 2014+ pr_err("%s is overlapped\n", add->pathname);
1facf9fc 2015+ goto out;
2016+ }
2017+
2018+ err = 0;
2019+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
2020+ h_inode = au_h_dptr(root, 0)->d_inode;
2021+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
2022+ || h_inode->i_uid != inode->i_uid
2023+ || h_inode->i_gid != inode->i_gid)
4a4d8108
AM
2024+ pr_warning("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
2025+ add->pathname,
2026+ inode->i_uid, inode->i_gid,
2027+ (inode->i_mode & S_IALLUGO),
2028+ h_inode->i_uid, h_inode->i_gid,
2029+ (h_inode->i_mode & S_IALLUGO));
1facf9fc 2030+ }
2031+
4f0767ce 2032+out:
1facf9fc 2033+ return err;
2034+}
2035+
2036+/*
2037+ * initialize or clean the whiteouts for an adding branch
2038+ */
2039+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
2040+ int new_perm, struct dentry *h_root)
2041+{
2042+ int err, old_perm;
2043+ aufs_bindex_t bindex;
2044+ struct mutex *h_mtx;
2045+ struct au_wbr *wbr;
2046+ struct au_hinode *hdir;
2047+
2048+ wbr = br->br_wbr;
2049+ old_perm = br->br_perm;
2050+ br->br_perm = new_perm;
2051+ hdir = NULL;
2052+ h_mtx = NULL;
2053+ bindex = au_br_index(sb, br->br_id);
2054+ if (0 <= bindex) {
2055+ hdir = au_hi(sb->s_root->d_inode, bindex);
4a4d8108 2056+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 2057+ } else {
2058+ h_mtx = &h_root->d_inode->i_mutex;
2059+ mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
2060+ }
2061+ if (!wbr)
2062+ err = au_wh_init(h_root, br, sb);
2063+ else {
2064+ wbr_wh_write_lock(wbr);
2065+ err = au_wh_init(h_root, br, sb);
2066+ wbr_wh_write_unlock(wbr);
2067+ }
2068+ if (hdir)
4a4d8108 2069+ au_hn_imtx_unlock(hdir);
1facf9fc 2070+ else
2071+ mutex_unlock(h_mtx);
2072+ br->br_perm = old_perm;
2073+
2074+ if (!err && wbr && !au_br_writable(new_perm)) {
2075+ kfree(wbr);
2076+ br->br_wbr = NULL;
2077+ }
2078+
2079+ return err;
2080+}
2081+
2082+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
2083+ int perm, struct path *path)
2084+{
2085+ int err;
4a4d8108 2086+ struct kstatfs kst;
1facf9fc 2087+ struct au_wbr *wbr;
4a4d8108 2088+ struct dentry *h_dentry;
1facf9fc 2089+
2090+ wbr = br->br_wbr;
dece6358 2091+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 2092+ memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
2093+ atomic_set(&wbr->wbr_wh_running, 0);
2094+ wbr->wbr_bytes = 0;
2095+
4a4d8108
AM
2096+ /*
2097+ * a limit for rmdir/rename a dir
2098+ * cf. AUFS_MAX_NAMELEN in include/linux/aufs_type.h
2099+ */
7f207e10 2100+ err = vfs_statfs(path, &kst);
4a4d8108
AM
2101+ if (unlikely(err))
2102+ goto out;
2103+ err = -EINVAL;
7f207e10 2104+ h_dentry = path->dentry;
4a4d8108
AM
2105+ if (kst.f_namelen >= NAME_MAX)
2106+ err = au_br_init_wh(sb, br, perm, h_dentry);
2107+ else
2108+ pr_err("%.*s(%s), unsupported namelen %ld\n",
2109+ AuDLNPair(h_dentry), au_sbtype(h_dentry->d_sb),
2110+ kst.f_namelen);
1facf9fc 2111+
4f0767ce 2112+out:
1facf9fc 2113+ return err;
2114+}
2115+
2116+/* intialize a new branch */
2117+static int au_br_init(struct au_branch *br, struct super_block *sb,
2118+ struct au_opt_add *add)
2119+{
2120+ int err;
2121+
2122+ err = 0;
2123+ memset(&br->br_xino, 0, sizeof(br->br_xino));
2124+ mutex_init(&br->br_xino.xi_nondir_mtx);
2125+ br->br_perm = add->perm;
2126+ br->br_mnt = add->path.mnt; /* set first, mntget() later */
4a4d8108
AM
2127+ spin_lock_init(&br->br_dykey_lock);
2128+ memset(br->br_dykey, 0, sizeof(br->br_dykey));
1facf9fc 2129+ atomic_set(&br->br_count, 0);
2130+ br->br_xino_upper = AUFS_XINO_TRUNC_INIT;
2131+ atomic_set(&br->br_xino_running, 0);
2132+ br->br_id = au_new_br_id(sb);
7f207e10 2133+ AuDebugOn(br->br_id < 0);
1facf9fc 2134+
2135+ if (au_br_writable(add->perm)) {
2136+ err = au_wbr_init(br, sb, add->perm, &add->path);
2137+ if (unlikely(err))
b752ccd1 2138+ goto out_err;
1facf9fc 2139+ }
2140+
2141+ if (au_opt_test(au_mntflags(sb), XINO)) {
2142+ err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino,
2143+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
2144+ if (unlikely(err)) {
2145+ AuDebugOn(br->br_xino.xi_file);
b752ccd1 2146+ goto out_err;
1facf9fc 2147+ }
2148+ }
2149+
2150+ sysaufs_br_init(br);
2151+ mntget(add->path.mnt);
b752ccd1 2152+ goto out; /* success */
1facf9fc 2153+
4f0767ce 2154+out_err:
b752ccd1 2155+ br->br_mnt = NULL;
4f0767ce 2156+out:
1facf9fc 2157+ return err;
2158+}
2159+
2160+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
2161+ struct au_branch *br, aufs_bindex_t bend,
2162+ aufs_bindex_t amount)
2163+{
2164+ struct au_branch **brp;
2165+
dece6358
AM
2166+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2167+
1facf9fc 2168+ brp = sbinfo->si_branch + bindex;
2169+ memmove(brp + 1, brp, sizeof(*brp) * amount);
2170+ *brp = br;
2171+ sbinfo->si_bend++;
2172+ if (unlikely(bend < 0))
2173+ sbinfo->si_bend = 0;
2174+}
2175+
2176+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
2177+ aufs_bindex_t bend, aufs_bindex_t amount)
2178+{
2179+ struct au_hdentry *hdp;
2180+
1308ab2a 2181+ AuRwMustWriteLock(&dinfo->di_rwsem);
2182+
1facf9fc 2183+ hdp = dinfo->di_hdentry + bindex;
2184+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
2185+ au_h_dentry_init(hdp);
2186+ dinfo->di_bend++;
2187+ if (unlikely(bend < 0))
2188+ dinfo->di_bstart = 0;
2189+}
2190+
2191+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
2192+ aufs_bindex_t bend, aufs_bindex_t amount)
2193+{
2194+ struct au_hinode *hip;
2195+
1308ab2a 2196+ AuRwMustWriteLock(&iinfo->ii_rwsem);
2197+
1facf9fc 2198+ hip = iinfo->ii_hinode + bindex;
2199+ memmove(hip + 1, hip, sizeof(*hip) * amount);
2200+ hip->hi_inode = NULL;
4a4d8108 2201+ au_hn_init(hip);
1facf9fc 2202+ iinfo->ii_bend++;
2203+ if (unlikely(bend < 0))
2204+ iinfo->ii_bstart = 0;
2205+}
2206+
2207+static void au_br_do_add(struct super_block *sb, struct dentry *h_dentry,
2208+ struct au_branch *br, aufs_bindex_t bindex)
2209+{
2210+ struct dentry *root;
2211+ struct inode *root_inode;
2212+ aufs_bindex_t bend, amount;
2213+
2214+ root = sb->s_root;
2215+ root_inode = root->d_inode;
1facf9fc 2216+ bend = au_sbend(sb);
2217+ amount = bend + 1 - bindex;
53392da6 2218+ au_sbilist_lock();
1facf9fc 2219+ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
2220+ au_br_do_add_hdp(au_di(root), bindex, bend, amount);
2221+ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
2222+ au_set_h_dptr(root, bindex, dget(h_dentry));
2223+ au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode),
2224+ /*flags*/0);
53392da6 2225+ au_sbilist_unlock();
1facf9fc 2226+}
2227+
2228+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
2229+{
2230+ int err;
1facf9fc 2231+ aufs_bindex_t bend, add_bindex;
2232+ struct dentry *root, *h_dentry;
2233+ struct inode *root_inode;
2234+ struct au_branch *add_branch;
2235+
2236+ root = sb->s_root;
2237+ root_inode = root->d_inode;
2238+ IMustLock(root_inode);
2239+ err = test_add(sb, add, remount);
2240+ if (unlikely(err < 0))
2241+ goto out;
2242+ if (err) {
2243+ err = 0;
2244+ goto out; /* success */
2245+ }
2246+
2247+ bend = au_sbend(sb);
2248+ add_branch = au_br_alloc(sb, bend + 2, add->perm);
2249+ err = PTR_ERR(add_branch);
2250+ if (IS_ERR(add_branch))
2251+ goto out;
2252+
2253+ err = au_br_init(add_branch, sb, add);
2254+ if (unlikely(err)) {
2255+ au_br_do_free(add_branch);
2256+ goto out;
2257+ }
2258+
2259+ add_bindex = add->bindex;
2260+ h_dentry = add->path.dentry;
2261+ if (!remount)
2262+ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2263+ else {
2264+ sysaufs_brs_del(sb, add_bindex);
2265+ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2266+ sysaufs_brs_add(sb, add_bindex);
2267+ }
2268+
1308ab2a 2269+ if (!add_bindex) {
1facf9fc 2270+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 2271+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
2272+ } else
1facf9fc 2273+ au_add_nlink(root_inode, h_dentry->d_inode);
1facf9fc 2274+
2275+ /*
4a4d8108 2276+ * this test/set prevents aufs from handling unnecesary notify events
027c5e7a 2277+ * of xino files, in case of re-adding a writable branch which was
1facf9fc 2278+ * once detached from aufs.
2279+ */
2280+ if (au_xino_brid(sb) < 0
2281+ && au_br_writable(add_branch->br_perm)
2282+ && !au_test_fs_bad_xino(h_dentry->d_sb)
2283+ && add_branch->br_xino.xi_file
2284+ && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry)
2285+ au_xino_brid_set(sb, add_branch->br_id);
2286+
4f0767ce 2287+out:
1facf9fc 2288+ return err;
2289+}
2290+
2291+/* ---------------------------------------------------------------------- */
2292+
2293+/*
2294+ * delete a branch
2295+ */
2296+
2297+/* to show the line number, do not make it inlined function */
4a4d8108 2298+#define AuVerbose(do_info, fmt, ...) do { \
1facf9fc 2299+ if (do_info) \
4a4d8108 2300+ pr_info(fmt, ##__VA_ARGS__); \
1facf9fc 2301+} while (0)
2302+
027c5e7a
AM
2303+static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
2304+ aufs_bindex_t bend)
2305+{
2306+ return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
2307+}
2308+
2309+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
2310+ aufs_bindex_t bend)
2311+{
2312+ return au_test_ibusy(dentry->d_inode, bstart, bend);
2313+}
2314+
1facf9fc 2315+/*
2316+ * test if the branch is deletable or not.
2317+ */
2318+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
b752ccd1 2319+ unsigned int sigen, const unsigned int verbose)
1facf9fc 2320+{
2321+ int err, i, j, ndentry;
2322+ aufs_bindex_t bstart, bend;
1facf9fc 2323+ struct au_dcsub_pages dpages;
2324+ struct au_dpage *dpage;
2325+ struct dentry *d;
1facf9fc 2326+
2327+ err = au_dpages_init(&dpages, GFP_NOFS);
2328+ if (unlikely(err))
2329+ goto out;
2330+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
2331+ if (unlikely(err))
2332+ goto out_dpages;
2333+
1facf9fc 2334+ for (i = 0; !err && i < dpages.ndpage; i++) {
2335+ dpage = dpages.dpages + i;
2336+ ndentry = dpage->ndentry;
2337+ for (j = 0; !err && j < ndentry; j++) {
2338+ d = dpage->dentries[j];
027c5e7a
AM
2339+ AuDebugOn(!d->d_count);
2340+ if (!au_digen_test(d, sigen)) {
1facf9fc 2341+ di_read_lock_child(d, AuLock_IR);
027c5e7a
AM
2342+ if (unlikely(au_dbrange_test(d))) {
2343+ di_read_unlock(d, AuLock_IR);
2344+ continue;
2345+ }
2346+ } else {
1facf9fc 2347+ di_write_lock_child(d);
027c5e7a
AM
2348+ if (unlikely(au_dbrange_test(d))) {
2349+ di_write_unlock(d);
2350+ continue;
2351+ }
1facf9fc 2352+ err = au_reval_dpath(d, sigen);
2353+ if (!err)
2354+ di_downgrade_lock(d, AuLock_IR);
2355+ else {
2356+ di_write_unlock(d);
2357+ break;
2358+ }
2359+ }
2360+
027c5e7a 2361+ /* AuDbgDentry(d); */
1facf9fc 2362+ bstart = au_dbstart(d);
2363+ bend = au_dbend(d);
2364+ if (bstart <= bindex
2365+ && bindex <= bend
2366+ && au_h_dptr(d, bindex)
027c5e7a 2367+ && au_test_dbusy(d, bstart, bend)) {
1facf9fc 2368+ err = -EBUSY;
2369+ AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d));
027c5e7a 2370+ AuDbgDentry(d);
1facf9fc 2371+ }
2372+ di_read_unlock(d, AuLock_IR);
2373+ }
2374+ }
2375+
4f0767ce 2376+out_dpages:
1facf9fc 2377+ au_dpages_free(&dpages);
4f0767ce 2378+out:
1facf9fc 2379+ return err;
2380+}
2381+
2382+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
b752ccd1 2383+ unsigned int sigen, const unsigned int verbose)
1facf9fc 2384+{
2385+ int err;
7f207e10
AM
2386+ unsigned long long max, ull;
2387+ struct inode *i, **array;
1facf9fc 2388+ aufs_bindex_t bstart, bend;
1facf9fc 2389+
7f207e10
AM
2390+ array = au_iarray_alloc(sb, &max);
2391+ err = PTR_ERR(array);
2392+ if (IS_ERR(array))
2393+ goto out;
2394+
1facf9fc 2395+ err = 0;
7f207e10
AM
2396+ AuDbg("b%d\n", bindex);
2397+ for (ull = 0; !err && ull < max; ull++) {
2398+ i = array[ull];
2399+ if (i->i_ino == AUFS_ROOT_INO)
1facf9fc 2400+ continue;
2401+
7f207e10 2402+ /* AuDbgInode(i); */
1facf9fc 2403+ if (au_iigen(i) == sigen)
2404+ ii_read_lock_child(i);
2405+ else {
2406+ ii_write_lock_child(i);
027c5e7a
AM
2407+ err = au_refresh_hinode_self(i);
2408+ au_iigen_dec(i);
1facf9fc 2409+ if (!err)
2410+ ii_downgrade_lock(i);
2411+ else {
2412+ ii_write_unlock(i);
2413+ break;
2414+ }
2415+ }
2416+
2417+ bstart = au_ibstart(i);
2418+ bend = au_ibend(i);
2419+ if (bstart <= bindex
2420+ && bindex <= bend
2421+ && au_h_iptr(i, bindex)
027c5e7a 2422+ && au_test_ibusy(i, bstart, bend)) {
1facf9fc 2423+ err = -EBUSY;
2424+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
7f207e10 2425+ AuDbgInode(i);
1facf9fc 2426+ }
2427+ ii_read_unlock(i);
2428+ }
7f207e10 2429+ au_iarray_free(array, max);
1facf9fc 2430+
7f207e10 2431+out:
1facf9fc 2432+ return err;
2433+}
2434+
b752ccd1
AM
2435+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
2436+ const unsigned int verbose)
1facf9fc 2437+{
2438+ int err;
2439+ unsigned int sigen;
2440+
2441+ sigen = au_sigen(root->d_sb);
2442+ DiMustNoWaiters(root);
2443+ IiMustNoWaiters(root->d_inode);
2444+ di_write_unlock(root);
b752ccd1 2445+ err = test_dentry_busy(root, bindex, sigen, verbose);
1facf9fc 2446+ if (!err)
b752ccd1 2447+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
1facf9fc 2448+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
2449+
2450+ return err;
2451+}
2452+
2453+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
2454+ const aufs_bindex_t bindex,
2455+ const aufs_bindex_t bend)
2456+{
2457+ struct au_branch **brp, **p;
2458+
dece6358
AM
2459+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2460+
1facf9fc 2461+ brp = sbinfo->si_branch + bindex;
2462+ if (bindex < bend)
2463+ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
2464+ sbinfo->si_branch[0 + bend] = NULL;
2465+ sbinfo->si_bend--;
2466+
53392da6 2467+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2468+ if (p)
2469+ sbinfo->si_branch = p;
4a4d8108 2470+ /* harmless error */
1facf9fc 2471+}
2472+
2473+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
2474+ const aufs_bindex_t bend)
2475+{
2476+ struct au_hdentry *hdp, *p;
2477+
1308ab2a 2478+ AuRwMustWriteLock(&dinfo->di_rwsem);
2479+
4a4d8108 2480+ hdp = dinfo->di_hdentry;
1facf9fc 2481+ if (bindex < bend)
4a4d8108
AM
2482+ memmove(hdp + bindex, hdp + bindex + 1,
2483+ sizeof(*hdp) * (bend - bindex));
2484+ hdp[0 + bend].hd_dentry = NULL;
1facf9fc 2485+ dinfo->di_bend--;
2486+
53392da6 2487+ p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2488+ if (p)
2489+ dinfo->di_hdentry = p;
4a4d8108 2490+ /* harmless error */
1facf9fc 2491+}
2492+
2493+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
2494+ const aufs_bindex_t bend)
2495+{
2496+ struct au_hinode *hip, *p;
2497+
1308ab2a 2498+ AuRwMustWriteLock(&iinfo->ii_rwsem);
2499+
1facf9fc 2500+ hip = iinfo->ii_hinode + bindex;
2501+ if (bindex < bend)
2502+ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
2503+ iinfo->ii_hinode[0 + bend].hi_inode = NULL;
4a4d8108 2504+ au_hn_init(iinfo->ii_hinode + bend);
1facf9fc 2505+ iinfo->ii_bend--;
2506+
53392da6 2507+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 2508+ if (p)
2509+ iinfo->ii_hinode = p;
4a4d8108 2510+ /* harmless error */
1facf9fc 2511+}
2512+
2513+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
2514+ struct au_branch *br)
2515+{
2516+ aufs_bindex_t bend;
2517+ struct au_sbinfo *sbinfo;
53392da6
AM
2518+ struct dentry *root, *h_root;
2519+ struct inode *inode, *h_inode;
2520+ struct au_hinode *hinode;
1facf9fc 2521+
dece6358
AM
2522+ SiMustWriteLock(sb);
2523+
1facf9fc 2524+ root = sb->s_root;
2525+ inode = root->d_inode;
1facf9fc 2526+ sbinfo = au_sbi(sb);
2527+ bend = sbinfo->si_bend;
2528+
53392da6
AM
2529+ h_root = au_h_dptr(root, bindex);
2530+ hinode = au_hi(inode, bindex);
2531+ h_inode = au_igrab(hinode->hi_inode);
2532+ au_hiput(hinode);
1facf9fc 2533+
53392da6 2534+ au_sbilist_lock();
1facf9fc 2535+ au_br_do_del_brp(sbinfo, bindex, bend);
2536+ au_br_do_del_hdp(au_di(root), bindex, bend);
2537+ au_br_do_del_hip(au_ii(inode), bindex, bend);
53392da6
AM
2538+ au_sbilist_unlock();
2539+
2540+ dput(h_root);
2541+ iput(h_inode);
2542+ au_br_do_free(br);
1facf9fc 2543+}
2544+
2545+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
2546+{
2547+ int err, rerr, i;
2548+ unsigned int mnt_flags;
2549+ aufs_bindex_t bindex, bend, br_id;
2550+ unsigned char do_wh, verbose;
2551+ struct au_branch *br;
2552+ struct au_wbr *wbr;
2553+
2554+ err = 0;
2555+ bindex = au_find_dbindex(sb->s_root, del->h_path.dentry);
2556+ if (bindex < 0) {
2557+ if (remount)
2558+ goto out; /* success */
2559+ err = -ENOENT;
4a4d8108 2560+ pr_err("%s no such branch\n", del->pathname);
1facf9fc 2561+ goto out;
2562+ }
2563+ AuDbg("bindex b%d\n", bindex);
2564+
2565+ err = -EBUSY;
2566+ mnt_flags = au_mntflags(sb);
2567+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
2568+ bend = au_sbend(sb);
2569+ if (unlikely(!bend)) {
2570+ AuVerbose(verbose, "no more branches left\n");
2571+ goto out;
2572+ }
2573+ br = au_sbr(sb, bindex);
2574+ i = atomic_read(&br->br_count);
2575+ if (unlikely(i)) {
2576+ AuVerbose(verbose, "%d file(s) opened\n", i);
e49829fe 2577+ goto out;
1facf9fc 2578+ }
2579+
2580+ wbr = br->br_wbr;
2581+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
2582+ if (do_wh) {
1308ab2a 2583+ /* instead of WbrWhMustWriteLock(wbr) */
2584+ SiMustWriteLock(sb);
1facf9fc 2585+ for (i = 0; i < AuBrWh_Last; i++) {
2586+ dput(wbr->wbr_wh[i]);
2587+ wbr->wbr_wh[i] = NULL;
2588+ }
2589+ }
2590+
b752ccd1 2591+ err = test_children_busy(sb->s_root, bindex, verbose);
1facf9fc 2592+ if (unlikely(err)) {
2593+ if (do_wh)
2594+ goto out_wh;
2595+ goto out;
2596+ }
2597+
2598+ err = 0;
2599+ br_id = br->br_id;
2600+ if (!remount)
2601+ au_br_do_del(sb, bindex, br);
2602+ else {
2603+ sysaufs_brs_del(sb, bindex);
2604+ au_br_do_del(sb, bindex, br);
2605+ sysaufs_brs_add(sb, bindex);
2606+ }
2607+
1308ab2a 2608+ if (!bindex) {
1facf9fc 2609+ au_cpup_attr_all(sb->s_root->d_inode, /*force*/1);
1308ab2a 2610+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
2611+ } else
1facf9fc 2612+ au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode);
2613+ if (au_opt_test(mnt_flags, PLINK))
2614+ au_plink_half_refresh(sb, br_id);
2615+
b752ccd1 2616+ if (au_xino_brid(sb) == br_id)
1facf9fc 2617+ au_xino_brid_set(sb, -1);
2618+ goto out; /* success */
2619+
4f0767ce 2620+out_wh:
1facf9fc 2621+ /* revert */
2622+ rerr = au_br_init_wh(sb, br, br->br_perm, del->h_path.dentry);
2623+ if (rerr)
4a4d8108
AM
2624+ pr_warning("failed re-creating base whiteout, %s. (%d)\n",
2625+ del->pathname, rerr);
4f0767ce 2626+out:
1facf9fc 2627+ return err;
2628+}
2629+
2630+/* ---------------------------------------------------------------------- */
2631+
027c5e7a
AM
2632+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
2633+{
2634+ int err;
2635+ aufs_bindex_t bstart, bend;
2636+ struct aufs_ibusy ibusy;
2637+ struct inode *inode, *h_inode;
2638+
2639+ err = -EPERM;
2640+ if (unlikely(!capable(CAP_SYS_ADMIN)))
2641+ goto out;
2642+
2643+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
2644+ if (!err)
2645+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
2646+ if (unlikely(err)) {
2647+ err = -EFAULT;
2648+ AuTraceErr(err);
2649+ goto out;
2650+ }
2651+
2652+ err = -EINVAL;
2653+ si_read_lock(sb, AuLock_FLUSH);
2654+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
2655+ goto out_unlock;
2656+
2657+ err = 0;
2658+ ibusy.h_ino = 0; /* invalid */
2659+ inode = ilookup(sb, ibusy.ino);
2660+ if (!inode
2661+ || inode->i_ino == AUFS_ROOT_INO
2662+ || is_bad_inode(inode))
2663+ goto out_unlock;
2664+
2665+ ii_read_lock_child(inode);
2666+ bstart = au_ibstart(inode);
2667+ bend = au_ibend(inode);
2668+ if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
2669+ h_inode = au_h_iptr(inode, ibusy.bindex);
2670+ if (h_inode && au_test_ibusy(inode, bstart, bend))
2671+ ibusy.h_ino = h_inode->i_ino;
2672+ }
2673+ ii_read_unlock(inode);
2674+ iput(inode);
2675+
2676+out_unlock:
2677+ si_read_unlock(sb);
2678+ if (!err) {
2679+ err = __put_user(ibusy.h_ino, &arg->h_ino);
2680+ if (unlikely(err)) {
2681+ err = -EFAULT;
2682+ AuTraceErr(err);
2683+ }
2684+ }
2685+out:
2686+ return err;
2687+}
2688+
2689+long au_ibusy_ioctl(struct file *file, unsigned long arg)
2690+{
2691+ return au_ibusy(file->f_dentry->d_sb, (void __user *)arg);
2692+}
2693+
2694+#ifdef CONFIG_COMPAT
2695+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
2696+{
2697+ return au_ibusy(file->f_dentry->d_sb, compat_ptr(arg));
2698+}
2699+#endif
2700+
2701+/* ---------------------------------------------------------------------- */
2702+
1facf9fc 2703+/*
2704+ * change a branch permission
2705+ */
2706+
dece6358
AM
2707+static void au_warn_ima(void)
2708+{
2709+#ifdef CONFIG_IMA
1308ab2a 2710+ /* since it doesn't support mark_files_ro() */
027c5e7a 2711+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
dece6358
AM
2712+#endif
2713+}
2714+
1facf9fc 2715+static int do_need_sigen_inc(int a, int b)
2716+{
2717+ return au_br_whable(a) && !au_br_whable(b);
2718+}
2719+
2720+static int need_sigen_inc(int old, int new)
2721+{
2722+ return do_need_sigen_inc(old, new)
2723+ || do_need_sigen_inc(new, old);
2724+}
2725+
7f207e10
AM
2726+static unsigned long long au_farray_cb(void *a,
2727+ unsigned long long max __maybe_unused,
2728+ void *arg)
2729+{
2730+ unsigned long long n;
2731+ struct file **p, *f;
2732+ struct super_block *sb = arg;
2733+
2734+ n = 0;
2735+ p = a;
2736+ lg_global_lock(files_lglock);
2737+ do_file_list_for_each_entry(sb, f) {
2738+ if (au_fi(f)
027c5e7a 2739+ && file_count(f)
7f207e10
AM
2740+ && !special_file(f->f_dentry->d_inode->i_mode)) {
2741+ get_file(f);
2742+ *p++ = f;
2743+ n++;
2744+ AuDebugOn(n > max);
2745+ }
2746+ } while_file_list_for_each_entry;
2747+ lg_global_unlock(files_lglock);
2748+
2749+ return n;
2750+}
2751+
2752+static struct file **au_farray_alloc(struct super_block *sb,
2753+ unsigned long long *max)
2754+{
2755+ *max = atomic_long_read(&au_sbi(sb)->si_nfiles);
2756+ return au_array_alloc(max, au_farray_cb, sb);
2757+}
2758+
2759+static void au_farray_free(struct file **a, unsigned long long max)
2760+{
2761+ unsigned long long ull;
2762+
2763+ for (ull = 0; ull < max; ull++)
2764+ if (a[ull])
2765+ fput(a[ull]);
2766+ au_array_free(a);
2767+}
2768+
1facf9fc 2769+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
2770+{
7f207e10 2771+ int err, do_warn;
027c5e7a 2772+ unsigned int mnt_flags;
7f207e10 2773+ unsigned long long ull, max;
e49829fe 2774+ aufs_bindex_t br_id;
027c5e7a 2775+ unsigned char verbose;
7f207e10 2776+ struct file *file, *hf, **array;
e49829fe
JR
2777+ struct inode *inode;
2778+ struct au_hfile *hfile;
1facf9fc 2779+
027c5e7a
AM
2780+ mnt_flags = au_mntflags(sb);
2781+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
2782+
7f207e10
AM
2783+ array = au_farray_alloc(sb, &max);
2784+ err = PTR_ERR(array);
2785+ if (IS_ERR(array))
1facf9fc 2786+ goto out;
2787+
7f207e10 2788+ do_warn = 0;
e49829fe 2789+ br_id = au_sbr_id(sb, bindex);
7f207e10
AM
2790+ for (ull = 0; ull < max; ull++) {
2791+ file = array[ull];
1facf9fc 2792+
7f207e10 2793+ /* AuDbg("%.*s\n", AuDLNPair(file->f_dentry)); */
1facf9fc 2794+ fi_read_lock(file);
2795+ if (unlikely(au_test_mmapped(file))) {
2796+ err = -EBUSY;
027c5e7a
AM
2797+ AuVerbose(verbose, "mmapped %.*s\n",
2798+ AuDLNPair(file->f_dentry));
7f207e10 2799+ AuDbgFile(file);
1facf9fc 2800+ FiMustNoWaiters(file);
2801+ fi_read_unlock(file);
7f207e10 2802+ goto out_array;
1facf9fc 2803+ }
2804+
027c5e7a 2805+ inode = file->f_dentry->d_inode;
e49829fe
JR
2806+ hfile = &au_fi(file)->fi_htop;
2807+ hf = hfile->hf_file;
2808+ if (!S_ISREG(inode->i_mode)
1facf9fc 2809+ || !(file->f_mode & FMODE_WRITE)
e49829fe 2810+ || hfile->hf_br->br_id != br_id
7f207e10
AM
2811+ || !(hf->f_mode & FMODE_WRITE))
2812+ array[ull] = NULL;
2813+ else {
2814+ do_warn = 1;
2815+ get_file(file);
1facf9fc 2816+ }
2817+
1facf9fc 2818+ FiMustNoWaiters(file);
2819+ fi_read_unlock(file);
7f207e10
AM
2820+ fput(file);
2821+ }
1facf9fc 2822+
2823+ err = 0;
7f207e10 2824+ if (do_warn)
dece6358 2825+ au_warn_ima();
7f207e10
AM
2826+
2827+ for (ull = 0; ull < max; ull++) {
2828+ file = array[ull];
2829+ if (!file)
2830+ continue;
2831+
1facf9fc 2832+ /* todo: already flushed? */
2833+ /* cf. fs/super.c:mark_files_ro() */
7f207e10
AM
2834+ /* fi_read_lock(file); */
2835+ hfile = &au_fi(file)->fi_htop;
2836+ hf = hfile->hf_file;
2837+ /* fi_read_unlock(file); */
027c5e7a 2838+ spin_lock(&hf->f_lock);
1facf9fc 2839+ hf->f_mode &= ~FMODE_WRITE;
027c5e7a 2840+ spin_unlock(&hf->f_lock);
1facf9fc 2841+ if (!file_check_writeable(hf)) {
2842+ file_release_write(hf);
2843+ mnt_drop_write(hf->f_vfsmnt);
2844+ }
2845+ }
2846+
7f207e10
AM
2847+out_array:
2848+ au_farray_free(array, max);
4f0767ce 2849+out:
7f207e10 2850+ AuTraceErr(err);
1facf9fc 2851+ return err;
2852+}
2853+
2854+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 2855+ int *do_refresh)
1facf9fc 2856+{
2857+ int err, rerr;
2858+ aufs_bindex_t bindex;
1308ab2a 2859+ struct path path;
1facf9fc 2860+ struct dentry *root;
2861+ struct au_branch *br;
2862+
2863+ root = sb->s_root;
1facf9fc 2864+ bindex = au_find_dbindex(root, mod->h_root);
2865+ if (bindex < 0) {
2866+ if (remount)
2867+ return 0; /* success */
2868+ err = -ENOENT;
4a4d8108 2869+ pr_err("%s no such branch\n", mod->path);
1facf9fc 2870+ goto out;
2871+ }
2872+ AuDbg("bindex b%d\n", bindex);
2873+
2874+ err = test_br(mod->h_root->d_inode, mod->perm, mod->path);
2875+ if (unlikely(err))
2876+ goto out;
2877+
2878+ br = au_sbr(sb, bindex);
2879+ if (br->br_perm == mod->perm)
2880+ return 0; /* success */
2881+
2882+ if (au_br_writable(br->br_perm)) {
2883+ /* remove whiteout base */
2884+ err = au_br_init_wh(sb, br, mod->perm, mod->h_root);
2885+ if (unlikely(err))
2886+ goto out;
2887+
2888+ if (!au_br_writable(mod->perm)) {
2889+ /* rw --> ro, file might be mmapped */
2890+ DiMustNoWaiters(root);
2891+ IiMustNoWaiters(root->d_inode);
2892+ di_write_unlock(root);
2893+ err = au_br_mod_files_ro(sb, bindex);
2894+ /* aufs_write_lock() calls ..._child() */
2895+ di_write_lock_child(root);
2896+
2897+ if (unlikely(err)) {
2898+ rerr = -ENOMEM;
2899+ br->br_wbr = kmalloc(sizeof(*br->br_wbr),
2900+ GFP_NOFS);
1308ab2a 2901+ if (br->br_wbr) {
2902+ path.mnt = br->br_mnt;
2903+ path.dentry = mod->h_root;
2904+ rerr = au_wbr_init(br, sb, br->br_perm,
2905+ &path);
2906+ }
1facf9fc 2907+ if (unlikely(rerr)) {
2908+ AuIOErr("nested error %d (%d)\n",
2909+ rerr, err);
2910+ br->br_perm = mod->perm;
2911+ }
2912+ }
2913+ }
2914+ } else if (au_br_writable(mod->perm)) {
2915+ /* ro --> rw */
2916+ err = -ENOMEM;
2917+ br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
2918+ if (br->br_wbr) {
1308ab2a 2919+ path.mnt = br->br_mnt;
2920+ path.dentry = mod->h_root;
1facf9fc 2921+ err = au_wbr_init(br, sb, mod->perm, &path);
2922+ if (unlikely(err)) {
2923+ kfree(br->br_wbr);
2924+ br->br_wbr = NULL;
2925+ }
2926+ }
2927+ }
2928+
2929+ if (!err) {
7f207e10 2930+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
1facf9fc 2931+ br->br_perm = mod->perm;
2932+ }
2933+
4f0767ce 2934+out:
7f207e10 2935+ AuTraceErr(err);
1facf9fc 2936+ return err;
2937+}
7f207e10
AM
2938diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
2939--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
53392da6 2940+++ linux/fs/aufs/branch.h 2011-08-24 13:30:24.731313534 +0200
027c5e7a 2941@@ -0,0 +1,233 @@
1facf9fc 2942+/*
027c5e7a 2943+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 2944+ *
2945+ * This program, aufs is free software; you can redistribute it and/or modify
2946+ * it under the terms of the GNU General Public License as published by
2947+ * the Free Software Foundation; either version 2 of the License, or
2948+ * (at your option) any later version.
dece6358
AM
2949+ *
2950+ * This program is distributed in the hope that it will be useful,
2951+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2952+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2953+ * GNU General Public License for more details.
2954+ *
2955+ * You should have received a copy of the GNU General Public License
2956+ * along with this program; if not, write to the Free Software
2957+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 2958+ */
2959+
2960+/*
2961+ * branch filesystems and xino for them
2962+ */
2963+
2964+#ifndef __AUFS_BRANCH_H__
2965+#define __AUFS_BRANCH_H__
2966+
2967+#ifdef __KERNEL__
2968+
2969+#include <linux/fs.h>
2970+#include <linux/mount.h>
1facf9fc 2971+#include <linux/aufs_type.h>
4a4d8108 2972+#include "dynop.h"
1facf9fc 2973+#include "rwsem.h"
2974+#include "super.h"
2975+
2976+/* ---------------------------------------------------------------------- */
2977+
2978+/* a xino file */
2979+struct au_xino_file {
2980+ struct file *xi_file;
2981+ struct mutex xi_nondir_mtx;
2982+
2983+ /* todo: make xino files an array to support huge inode number */
2984+
2985+#ifdef CONFIG_DEBUG_FS
2986+ struct dentry *xi_dbgaufs;
2987+#endif
2988+};
2989+
2990+/* members for writable branch only */
2991+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
2992+struct au_wbr {
dece6358 2993+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 2994+ struct dentry *wbr_wh[AuBrWh_Last];
4a4d8108 2995+ atomic_t wbr_wh_running;
1facf9fc 2996+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
2997+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
2998+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
2999+
3000+ /* mfs mode */
3001+ unsigned long long wbr_bytes;
3002+};
3003+
4a4d8108
AM
3004+/* ext2 has 3 types of operations at least, ext3 has 4 */
3005+#define AuBrDynOp (AuDyLast * 4)
3006+
1facf9fc 3007+/* protected by superblock rwsem */
3008+struct au_branch {
3009+ struct au_xino_file br_xino;
3010+
3011+ aufs_bindex_t br_id;
3012+
3013+ int br_perm;
3014+ struct vfsmount *br_mnt;
4a4d8108
AM
3015+ spinlock_t br_dykey_lock;
3016+ struct au_dykey *br_dykey[AuBrDynOp];
1facf9fc 3017+ atomic_t br_count;
3018+
3019+ struct au_wbr *br_wbr;
3020+
3021+ /* xino truncation */
3022+ blkcnt_t br_xino_upper; /* watermark in blocks */
3023+ atomic_t br_xino_running;
3024+
027c5e7a
AM
3025+#ifdef CONFIG_AUFS_HFSNOTIFY
3026+ struct fsnotify_group *br_hfsn_group;
3027+ struct fsnotify_ops br_hfsn_ops;
3028+#endif
3029+
1facf9fc 3030+#ifdef CONFIG_SYSFS
3031+ /* an entry under sysfs per mount-point */
3032+ char br_name[8];
3033+ struct attribute br_attr;
3034+#endif
3035+};
3036+
3037+/* ---------------------------------------------------------------------- */
3038+
3039+/* branch permission and attribute */
3040+enum {
3041+ AuBrPerm_RW, /* writable, linkable wh */
3042+ AuBrPerm_RO, /* readonly, no wh */
3043+ AuBrPerm_RR, /* natively readonly, no wh */
3044+
3045+ AuBrPerm_RWNoLinkWH, /* un-linkable whiteouts */
3046+
3047+ AuBrPerm_ROWH, /* whiteout-able */
3048+ AuBrPerm_RRWH, /* whiteout-able */
3049+
3050+ AuBrPerm_Last
3051+};
3052+
3053+static inline int au_br_writable(int brperm)
3054+{
3055+ return brperm == AuBrPerm_RW || brperm == AuBrPerm_RWNoLinkWH;
3056+}
3057+
3058+static inline int au_br_whable(int brperm)
3059+{
3060+ return brperm == AuBrPerm_RW
3061+ || brperm == AuBrPerm_ROWH
3062+ || brperm == AuBrPerm_RRWH;
3063+}
3064+
3065+static inline int au_br_rdonly(struct au_branch *br)
3066+{
3067+ return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY)
3068+ || !au_br_writable(br->br_perm))
3069+ ? -EROFS : 0;
3070+}
3071+
4a4d8108 3072+static inline int au_br_hnotifyable(int brperm __maybe_unused)
1facf9fc 3073+{
4a4d8108 3074+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 3075+ return brperm != AuBrPerm_RR && brperm != AuBrPerm_RRWH;
3076+#else
3077+ return 0;
3078+#endif
3079+}
3080+
3081+/* ---------------------------------------------------------------------- */
3082+
3083+/* branch.c */
3084+struct au_sbinfo;
3085+void au_br_free(struct au_sbinfo *sinfo);
3086+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
3087+struct au_opt_add;
3088+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
3089+struct au_opt_del;
3090+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
027c5e7a
AM
3091+long au_ibusy_ioctl(struct file *file, unsigned long arg);
3092+#ifdef CONFIG_COMPAT
3093+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
3094+#endif
1facf9fc 3095+struct au_opt_mod;
3096+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 3097+ int *do_refresh);
1facf9fc 3098+
3099+/* xino.c */
3100+static const loff_t au_loff_max = LLONG_MAX;
3101+
3102+int au_xib_trunc(struct super_block *sb);
3103+ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
3104+ loff_t *pos);
3105+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
3106+ loff_t *pos);
3107+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
3108+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
3109+ino_t au_xino_new_ino(struct super_block *sb);
b752ccd1 3110+void au_xino_delete_inode(struct inode *inode, const int unlinked);
1facf9fc 3111+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
3112+ ino_t ino);
3113+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
3114+ ino_t *ino);
3115+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
3116+ struct file *base_file, int do_test);
3117+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
3118+
3119+struct au_opt_xino;
3120+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
3121+void au_xino_clr(struct super_block *sb);
3122+struct file *au_xino_def(struct super_block *sb);
3123+int au_xino_path(struct seq_file *seq, struct file *file);
3124+
3125+/* ---------------------------------------------------------------------- */
3126+
3127+/* Superblock to branch */
3128+static inline
3129+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
3130+{
3131+ return au_sbr(sb, bindex)->br_id;
3132+}
3133+
3134+static inline
3135+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
3136+{
3137+ return au_sbr(sb, bindex)->br_mnt;
3138+}
3139+
3140+static inline
3141+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
3142+{
3143+ return au_sbr_mnt(sb, bindex)->mnt_sb;
3144+}
3145+
3146+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
3147+{
e49829fe 3148+ atomic_dec(&au_sbr(sb, bindex)->br_count);
1facf9fc 3149+}
3150+
3151+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
3152+{
3153+ return au_sbr(sb, bindex)->br_perm;
3154+}
3155+
3156+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
3157+{
3158+ return au_br_whable(au_sbr_perm(sb, bindex));
3159+}
3160+
3161+/* ---------------------------------------------------------------------- */
3162+
3163+/*
3164+ * wbr_wh_read_lock, wbr_wh_write_lock
3165+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
3166+ */
3167+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
3168+
dece6358
AM
3169+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
3170+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
3171+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
3172+
1facf9fc 3173+#endif /* __KERNEL__ */
3174+#endif /* __AUFS_BRANCH_H__ */
7f207e10
AM
3175diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
3176--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
53392da6 3177+++ linux/fs/aufs/conf.mk 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 3178@@ -0,0 +1,38 @@
4a4d8108
AM
3179+
3180+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
3181+
3182+define AuConf
3183+ifdef ${1}
3184+AuConfStr += ${1}=${${1}}
3185+endif
3186+endef
3187+
b752ccd1 3188+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
e49829fe 3189+ SBILIST \
7f207e10 3190+ HNOTIFY HFSNOTIFY \
4a4d8108
AM
3191+ EXPORT INO_T_64 \
3192+ RDU \
2cbb1c4b 3193+ PROC_MAP \
4a4d8108
AM
3194+ SP_IATTR \
3195+ SHWH \
3196+ BR_RAMFS \
3197+ BR_FUSE POLL \
3198+ BR_HFSPLUS \
3199+ BDEV_LOOP \
b752ccd1
AM
3200+ DEBUG MAGIC_SYSRQ
3201+$(foreach i, ${AuConfAll}, \
4a4d8108
AM
3202+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
3203+
3204+AuConfName = ${obj}/conf.str
3205+${AuConfName}.tmp: FORCE
3206+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
3207+${AuConfName}: ${AuConfName}.tmp
3208+ @diff -q $< $@ > /dev/null 2>&1 || { \
3209+ echo ' GEN ' $@; \
3210+ cp -p $< $@; \
3211+ }
3212+FORCE:
3213+clean-files += ${AuConfName} ${AuConfName}.tmp
3214+${obj}/sysfs.o: ${AuConfName}
b752ccd1
AM
3215+
3216+-include ${srctree}/${src}/conf_priv.mk
7f207e10
AM
3217diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
3218--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
3219+++ linux/fs/aufs/cpup.c 2011-08-24 13:30:24.731313534 +0200
3220@@ -0,0 +1,1080 @@
1facf9fc 3221+/*
027c5e7a 3222+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 3223+ *
3224+ * This program, aufs is free software; you can redistribute it and/or modify
3225+ * it under the terms of the GNU General Public License as published by
3226+ * the Free Software Foundation; either version 2 of the License, or
3227+ * (at your option) any later version.
dece6358
AM
3228+ *
3229+ * This program is distributed in the hope that it will be useful,
3230+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3231+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3232+ * GNU General Public License for more details.
3233+ *
3234+ * You should have received a copy of the GNU General Public License
3235+ * along with this program; if not, write to the Free Software
3236+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 3237+ */
3238+
3239+/*
3240+ * copy-up functions, see wbr_policy.c for copy-down
3241+ */
3242+
dece6358 3243+#include <linux/file.h>
1facf9fc 3244+#include <linux/fs_stack.h>
dece6358 3245+#include <linux/mm.h>
1facf9fc 3246+#include <linux/uaccess.h>
3247+#include "aufs.h"
3248+
3249+void au_cpup_attr_flags(struct inode *dst, struct inode *src)
3250+{
3251+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
3252+ | S_NOATIME | S_NOCMTIME;
3253+
3254+ dst->i_flags |= src->i_flags & ~mask;
3255+ if (au_test_fs_notime(dst->i_sb))
3256+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
3257+}
3258+
3259+void au_cpup_attr_timesizes(struct inode *inode)
3260+{
3261+ struct inode *h_inode;
3262+
3263+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3264+ fsstack_copy_attr_times(inode, h_inode);
4a4d8108 3265+ fsstack_copy_inode_size(inode, h_inode);
1facf9fc 3266+}
3267+
3268+void au_cpup_attr_nlink(struct inode *inode, int force)
3269+{
3270+ struct inode *h_inode;
3271+ struct super_block *sb;
3272+ aufs_bindex_t bindex, bend;
3273+
3274+ sb = inode->i_sb;
3275+ bindex = au_ibstart(inode);
3276+ h_inode = au_h_iptr(inode, bindex);
3277+ if (!force
3278+ && !S_ISDIR(h_inode->i_mode)
3279+ && au_opt_test(au_mntflags(sb), PLINK)
3280+ && au_plink_test(inode))
3281+ return;
3282+
3283+ inode->i_nlink = h_inode->i_nlink;
3284+
3285+ /*
3286+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
3287+ * it may includes whplink directory.
3288+ */
3289+ if (S_ISDIR(h_inode->i_mode)) {
3290+ bend = au_ibend(inode);
3291+ for (bindex++; bindex <= bend; bindex++) {
3292+ h_inode = au_h_iptr(inode, bindex);
3293+ if (h_inode)
3294+ au_add_nlink(inode, h_inode);
3295+ }
3296+ }
3297+}
3298+
3299+void au_cpup_attr_changeable(struct inode *inode)
3300+{
3301+ struct inode *h_inode;
3302+
3303+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3304+ inode->i_mode = h_inode->i_mode;
3305+ inode->i_uid = h_inode->i_uid;
3306+ inode->i_gid = h_inode->i_gid;
3307+ au_cpup_attr_timesizes(inode);
3308+ au_cpup_attr_flags(inode, h_inode);
3309+}
3310+
3311+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
3312+{
3313+ struct au_iinfo *iinfo = au_ii(inode);
3314+
1308ab2a 3315+ IiMustWriteLock(inode);
3316+
1facf9fc 3317+ iinfo->ii_higen = h_inode->i_generation;
3318+ iinfo->ii_hsb1 = h_inode->i_sb;
3319+}
3320+
3321+void au_cpup_attr_all(struct inode *inode, int force)
3322+{
3323+ struct inode *h_inode;
3324+
3325+ h_inode = au_h_iptr(inode, au_ibstart(inode));
3326+ au_cpup_attr_changeable(inode);
3327+ if (inode->i_nlink > 0)
3328+ au_cpup_attr_nlink(inode, force);
3329+ inode->i_rdev = h_inode->i_rdev;
3330+ inode->i_blkbits = h_inode->i_blkbits;
3331+ au_cpup_igen(inode, h_inode);
3332+}
3333+
3334+/* ---------------------------------------------------------------------- */
3335+
3336+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
3337+
3338+/* keep the timestamps of the parent dir when cpup */
3339+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
3340+ struct path *h_path)
3341+{
3342+ struct inode *h_inode;
3343+
3344+ dt->dt_dentry = dentry;
3345+ dt->dt_h_path = *h_path;
3346+ h_inode = h_path->dentry->d_inode;
3347+ dt->dt_atime = h_inode->i_atime;
3348+ dt->dt_mtime = h_inode->i_mtime;
3349+ /* smp_mb(); */
3350+}
3351+
3352+void au_dtime_revert(struct au_dtime *dt)
3353+{
3354+ struct iattr attr;
3355+ int err;
3356+
3357+ attr.ia_atime = dt->dt_atime;
3358+ attr.ia_mtime = dt->dt_mtime;
3359+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
3360+ | ATTR_ATIME | ATTR_ATIME_SET;
3361+
3362+ err = vfsub_notify_change(&dt->dt_h_path, &attr);
3363+ if (unlikely(err))
4a4d8108 3364+ pr_warning("restoring timestamps failed(%d). ignored\n", err);
1facf9fc 3365+}
3366+
3367+/* ---------------------------------------------------------------------- */
3368+
3369+static noinline_for_stack
3370+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src)
3371+{
3372+ int err, sbits;
3373+ struct iattr ia;
3374+ struct path h_path;
1308ab2a 3375+ struct inode *h_isrc, *h_idst;
1facf9fc 3376+
3377+ h_path.dentry = au_h_dptr(dst, bindex);
1308ab2a 3378+ h_idst = h_path.dentry->d_inode;
1facf9fc 3379+ h_path.mnt = au_sbr_mnt(dst->d_sb, bindex);
3380+ h_isrc = h_src->d_inode;
1308ab2a 3381+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 3382+ | ATTR_ATIME | ATTR_MTIME
3383+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
1facf9fc 3384+ ia.ia_uid = h_isrc->i_uid;
3385+ ia.ia_gid = h_isrc->i_gid;
3386+ ia.ia_atime = h_isrc->i_atime;
3387+ ia.ia_mtime = h_isrc->i_mtime;
1308ab2a 3388+ if (h_idst->i_mode != h_isrc->i_mode
3389+ && !S_ISLNK(h_idst->i_mode)) {
3390+ ia.ia_valid |= ATTR_MODE;
3391+ ia.ia_mode = h_isrc->i_mode;
3392+ }
3393+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
3394+ au_cpup_attr_flags(h_idst, h_isrc);
1facf9fc 3395+ err = vfsub_notify_change(&h_path, &ia);
3396+
3397+ /* is this nfs only? */
3398+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
3399+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
3400+ ia.ia_mode = h_isrc->i_mode;
3401+ err = vfsub_notify_change(&h_path, &ia);
3402+ }
3403+
3404+ return err;
3405+}
3406+
3407+/* ---------------------------------------------------------------------- */
3408+
3409+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
3410+ char *buf, unsigned long blksize)
3411+{
3412+ int err;
3413+ size_t sz, rbytes, wbytes;
3414+ unsigned char all_zero;
3415+ char *p, *zp;
3416+ struct mutex *h_mtx;
3417+ /* reduce stack usage */
3418+ struct iattr *ia;
3419+
3420+ zp = page_address(ZERO_PAGE(0));
3421+ if (unlikely(!zp))
3422+ return -ENOMEM; /* possible? */
3423+
3424+ err = 0;
3425+ all_zero = 0;
3426+ while (len) {
3427+ AuDbg("len %lld\n", len);
3428+ sz = blksize;
3429+ if (len < blksize)
3430+ sz = len;
3431+
3432+ rbytes = 0;
3433+ /* todo: signal_pending? */
3434+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
3435+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
3436+ err = rbytes;
3437+ }
3438+ if (unlikely(err < 0))
3439+ break;
3440+
3441+ all_zero = 0;
3442+ if (len >= rbytes && rbytes == blksize)
3443+ all_zero = !memcmp(buf, zp, rbytes);
3444+ if (!all_zero) {
3445+ wbytes = rbytes;
3446+ p = buf;
3447+ while (wbytes) {
3448+ size_t b;
3449+
3450+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
3451+ err = b;
3452+ /* todo: signal_pending? */
3453+ if (unlikely(err == -EAGAIN || err == -EINTR))
3454+ continue;
3455+ if (unlikely(err < 0))
3456+ break;
3457+ wbytes -= b;
3458+ p += b;
3459+ }
3460+ } else {
3461+ loff_t res;
3462+
3463+ AuLabel(hole);
3464+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
3465+ err = res;
3466+ if (unlikely(res < 0))
3467+ break;
3468+ }
3469+ len -= rbytes;
3470+ err = 0;
3471+ }
3472+
3473+ /* the last block may be a hole */
3474+ if (!err && all_zero) {
3475+ AuLabel(last hole);
3476+
3477+ err = 1;
3478+ if (au_test_nfs(dst->f_dentry->d_sb)) {
3479+ /* nfs requires this step to make last hole */
3480+ /* is this only nfs? */
3481+ do {
3482+ /* todo: signal_pending? */
3483+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
3484+ } while (err == -EAGAIN || err == -EINTR);
3485+ if (err == 1)
3486+ dst->f_pos--;
3487+ }
3488+
3489+ if (err == 1) {
3490+ ia = (void *)buf;
3491+ ia->ia_size = dst->f_pos;
3492+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
3493+ ia->ia_file = dst;
3494+ h_mtx = &dst->f_dentry->d_inode->i_mutex;
3495+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
3496+ err = vfsub_notify_change(&dst->f_path, ia);
3497+ mutex_unlock(h_mtx);
3498+ }
3499+ }
3500+
3501+ return err;
3502+}
3503+
3504+int au_copy_file(struct file *dst, struct file *src, loff_t len)
3505+{
3506+ int err;
3507+ unsigned long blksize;
3508+ unsigned char do_kfree;
3509+ char *buf;
3510+
3511+ err = -ENOMEM;
3512+ blksize = dst->f_dentry->d_sb->s_blocksize;
3513+ if (!blksize || PAGE_SIZE < blksize)
3514+ blksize = PAGE_SIZE;
3515+ AuDbg("blksize %lu\n", blksize);
3516+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
3517+ if (do_kfree)
3518+ buf = kmalloc(blksize, GFP_NOFS);
3519+ else
3520+ buf = (void *)__get_free_page(GFP_NOFS);
3521+ if (unlikely(!buf))
3522+ goto out;
3523+
3524+ if (len > (1 << 22))
3525+ AuDbg("copying a large file %lld\n", (long long)len);
3526+
3527+ src->f_pos = 0;
3528+ dst->f_pos = 0;
3529+ err = au_do_copy_file(dst, src, len, buf, blksize);
3530+ if (do_kfree)
3531+ kfree(buf);
3532+ else
3533+ free_page((unsigned long)buf);
3534+
4f0767ce 3535+out:
1facf9fc 3536+ return err;
3537+}
3538+
3539+/*
3540+ * to support a sparse file which is opened with O_APPEND,
3541+ * we need to close the file.
3542+ */
3543+static int au_cp_regular(struct dentry *dentry, aufs_bindex_t bdst,
4a4d8108 3544+ aufs_bindex_t bsrc, loff_t len)
1facf9fc 3545+{
3546+ int err, i;
3547+ enum { SRC, DST };
3548+ struct {
3549+ aufs_bindex_t bindex;
3550+ unsigned int flags;
3551+ struct dentry *dentry;
3552+ struct file *file;
3553+ void *label, *label_file;
3554+ } *f, file[] = {
3555+ {
3556+ .bindex = bsrc,
3557+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
3558+ .file = NULL,
3559+ .label = &&out,
3560+ .label_file = &&out_src
3561+ },
3562+ {
3563+ .bindex = bdst,
3564+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
3565+ .file = NULL,
3566+ .label = &&out_src,
3567+ .label_file = &&out_dst
3568+ }
3569+ };
3570+ struct super_block *sb;
3571+
3572+ /* bsrc branch can be ro/rw. */
3573+ sb = dentry->d_sb;
3574+ f = file;
3575+ for (i = 0; i < 2; i++, f++) {
3576+ f->dentry = au_h_dptr(dentry, f->bindex);
3577+ f->file = au_h_open(dentry, f->bindex, f->flags, /*file*/NULL);
3578+ err = PTR_ERR(f->file);
3579+ if (IS_ERR(f->file))
3580+ goto *f->label;
3581+ err = -EINVAL;
3582+ if (unlikely(!f->file->f_op))
3583+ goto *f->label_file;
3584+ }
3585+
3586+ /* try stopping to update while we copyup */
3587+ IMustLock(file[SRC].dentry->d_inode);
3588+ err = au_copy_file(file[DST].file, file[SRC].file, len);
3589+
4f0767ce 3590+out_dst:
1facf9fc 3591+ fput(file[DST].file);
3592+ au_sbr_put(sb, file[DST].bindex);
4f0767ce 3593+out_src:
1facf9fc 3594+ fput(file[SRC].file);
3595+ au_sbr_put(sb, file[SRC].bindex);
4f0767ce 3596+out:
1facf9fc 3597+ return err;
3598+}
3599+
3600+static int au_do_cpup_regular(struct dentry *dentry, aufs_bindex_t bdst,
3601+ aufs_bindex_t bsrc, loff_t len,
3602+ struct inode *h_dir, struct path *h_path)
3603+{
3604+ int err, rerr;
3605+ loff_t l;
3606+
3607+ err = 0;
3608+ l = i_size_read(au_h_iptr(dentry->d_inode, bsrc));
3609+ if (len == -1 || l < len)
3610+ len = l;
3611+ if (len)
3612+ err = au_cp_regular(dentry, bdst, bsrc, len);
3613+ if (!err)
3614+ goto out; /* success */
3615+
3616+ rerr = vfsub_unlink(h_dir, h_path, /*force*/0);
3617+ if (rerr) {
3618+ AuIOErr("failed unlinking cpup-ed %.*s(%d, %d)\n",
3619+ AuDLNPair(h_path->dentry), err, rerr);
3620+ err = -EIO;
3621+ }
3622+
4f0767ce 3623+out:
1facf9fc 3624+ return err;
3625+}
3626+
3627+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
3628+ struct inode *h_dir)
3629+{
3630+ int err, symlen;
3631+ mm_segment_t old_fs;
b752ccd1
AM
3632+ union {
3633+ char *k;
3634+ char __user *u;
3635+ } sym;
1facf9fc 3636+
3637+ err = -ENOSYS;
3638+ if (unlikely(!h_src->d_inode->i_op->readlink))
3639+ goto out;
3640+
3641+ err = -ENOMEM;
b752ccd1
AM
3642+ sym.k = __getname_gfp(GFP_NOFS);
3643+ if (unlikely(!sym.k))
1facf9fc 3644+ goto out;
3645+
3646+ old_fs = get_fs();
3647+ set_fs(KERNEL_DS);
b752ccd1 3648+ symlen = h_src->d_inode->i_op->readlink(h_src, sym.u, PATH_MAX);
1facf9fc 3649+ err = symlen;
3650+ set_fs(old_fs);
3651+
3652+ if (symlen > 0) {
b752ccd1
AM
3653+ sym.k[symlen] = 0;
3654+ err = vfsub_symlink(h_dir, h_path, sym.k);
1facf9fc 3655+ }
b752ccd1 3656+ __putname(sym.k);
1facf9fc 3657+
4f0767ce 3658+out:
1facf9fc 3659+ return err;
3660+}
3661+
3662+/* return with the lower dst inode is locked */
3663+static noinline_for_stack
3664+int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst,
3665+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3666+ struct dentry *dst_parent)
3667+{
3668+ int err;
3669+ umode_t mode;
3670+ unsigned int mnt_flags;
3671+ unsigned char isdir;
3672+ const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME);
3673+ struct au_dtime dt;
3674+ struct path h_path;
3675+ struct dentry *h_src, *h_dst, *h_parent;
3676+ struct inode *h_inode, *h_dir;
3677+ struct super_block *sb;
3678+
3679+ /* bsrc branch can be ro/rw. */
3680+ h_src = au_h_dptr(dentry, bsrc);
3681+ h_inode = h_src->d_inode;
3682+ AuDebugOn(h_inode != au_h_iptr(dentry->d_inode, bsrc));
3683+
3684+ /* try stopping to be referenced while we are creating */
3685+ h_dst = au_h_dptr(dentry, bdst);
3686+ h_parent = h_dst->d_parent; /* dir inode is locked */
3687+ h_dir = h_parent->d_inode;
3688+ IMustLock(h_dir);
3689+ AuDebugOn(h_parent != h_dst->d_parent);
3690+
3691+ sb = dentry->d_sb;
3692+ h_path.mnt = au_sbr_mnt(sb, bdst);
3693+ if (do_dt) {
3694+ h_path.dentry = h_parent;
3695+ au_dtime_store(&dt, dst_parent, &h_path);
3696+ }
3697+ h_path.dentry = h_dst;
3698+
3699+ isdir = 0;
3700+ mode = h_inode->i_mode;
3701+ switch (mode & S_IFMT) {
3702+ case S_IFREG:
3703+ /* try stopping to update while we are referencing */
3704+ IMustLock(h_inode);
3705+ err = vfsub_create(h_dir, &h_path, mode | S_IWUSR);
3706+ if (!err)
3707+ err = au_do_cpup_regular
3708+ (dentry, bdst, bsrc, len,
3709+ au_h_iptr(dst_parent->d_inode, bdst), &h_path);
3710+ break;
3711+ case S_IFDIR:
3712+ isdir = 1;
3713+ err = vfsub_mkdir(h_dir, &h_path, mode);
3714+ if (!err) {
3715+ /*
3716+ * strange behaviour from the users view,
3717+ * particularry setattr case
3718+ */
3719+ if (au_ibstart(dst_parent->d_inode) == bdst)
3720+ au_cpup_attr_nlink(dst_parent->d_inode,
3721+ /*force*/1);
3722+ au_cpup_attr_nlink(dentry->d_inode, /*force*/1);
3723+ }
3724+ break;
3725+ case S_IFLNK:
3726+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
3727+ break;
3728+ case S_IFCHR:
3729+ case S_IFBLK:
3730+ AuDebugOn(!capable(CAP_MKNOD));
3731+ /*FALLTHROUGH*/
3732+ case S_IFIFO:
3733+ case S_IFSOCK:
3734+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
3735+ break;
3736+ default:
3737+ AuIOErr("Unknown inode type 0%o\n", mode);
3738+ err = -EIO;
3739+ }
3740+
3741+ mnt_flags = au_mntflags(sb);
3742+ if (!au_opt_test(mnt_flags, UDBA_NONE)
3743+ && !isdir
3744+ && au_opt_test(mnt_flags, XINO)
3745+ && h_inode->i_nlink == 1
3746+ /* todo: unnecessary? */
3747+ /* && dentry->d_inode->i_nlink == 1 */
3748+ && bdst < bsrc
3749+ && !au_ftest_cpup(flags, KEEPLINO))
1308ab2a 3750+ au_xino_write(sb, bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 3751+ /* ignore this error */
3752+
3753+ if (do_dt)
3754+ au_dtime_revert(&dt);
3755+ return err;
3756+}
3757+
3758+/*
3759+ * copyup the @dentry from @bsrc to @bdst.
3760+ * the caller must set the both of lower dentries.
3761+ * @len is for truncating when it is -1 copyup the entire file.
3762+ * in link/rename cases, @dst_parent may be different from the real one.
3763+ */
3764+static int au_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3765+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3766+ struct dentry *dst_parent)
3767+{
3768+ int err, rerr;
3769+ aufs_bindex_t old_ibstart;
3770+ unsigned char isdir, plink;
3771+ struct au_dtime dt;
3772+ struct path h_path;
3773+ struct dentry *h_src, *h_dst, *h_parent;
3774+ struct inode *dst_inode, *h_dir, *inode;
3775+ struct super_block *sb;
3776+
3777+ AuDebugOn(bsrc <= bdst);
3778+
3779+ sb = dentry->d_sb;
3780+ h_path.mnt = au_sbr_mnt(sb, bdst);
3781+ h_dst = au_h_dptr(dentry, bdst);
3782+ h_parent = h_dst->d_parent; /* dir inode is locked */
3783+ h_dir = h_parent->d_inode;
3784+ IMustLock(h_dir);
3785+
3786+ h_src = au_h_dptr(dentry, bsrc);
3787+ inode = dentry->d_inode;
3788+
3789+ if (!dst_parent)
3790+ dst_parent = dget_parent(dentry);
3791+ else
3792+ dget(dst_parent);
3793+
3794+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
3795+ dst_inode = au_h_iptr(inode, bdst);
3796+ if (dst_inode) {
3797+ if (unlikely(!plink)) {
3798+ err = -EIO;
027c5e7a
AM
3799+ AuIOErr("hi%lu(i%lu) exists on b%d "
3800+ "but plink is disabled\n",
3801+ dst_inode->i_ino, inode->i_ino, bdst);
1facf9fc 3802+ goto out;
3803+ }
3804+
3805+ if (dst_inode->i_nlink) {
3806+ const int do_dt = au_ftest_cpup(flags, DTIME);
3807+
3808+ h_src = au_plink_lkup(inode, bdst);
3809+ err = PTR_ERR(h_src);
3810+ if (IS_ERR(h_src))
3811+ goto out;
3812+ if (unlikely(!h_src->d_inode)) {
3813+ err = -EIO;
3814+ AuIOErr("i%lu exists on a upper branch "
027c5e7a
AM
3815+ "but not pseudo-linked\n",
3816+ inode->i_ino);
1facf9fc 3817+ dput(h_src);
3818+ goto out;
3819+ }
3820+
3821+ if (do_dt) {
3822+ h_path.dentry = h_parent;
3823+ au_dtime_store(&dt, dst_parent, &h_path);
3824+ }
3825+ h_path.dentry = h_dst;
3826+ err = vfsub_link(h_src, h_dir, &h_path);
3827+ if (do_dt)
3828+ au_dtime_revert(&dt);
3829+ dput(h_src);
3830+ goto out;
3831+ } else
3832+ /* todo: cpup_wh_file? */
3833+ /* udba work */
4a4d8108 3834+ au_update_ibrange(inode, /*do_put_zero*/1);
1facf9fc 3835+ }
3836+
3837+ old_ibstart = au_ibstart(inode);
3838+ err = cpup_entry(dentry, bdst, bsrc, len, flags, dst_parent);
3839+ if (unlikely(err))
3840+ goto out;
3841+ dst_inode = h_dst->d_inode;
3842+ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
3843+
3844+ err = cpup_iattr(dentry, bdst, h_src);
3845+ isdir = S_ISDIR(dst_inode->i_mode);
3846+ if (!err) {
4a4d8108
AM
3847+ if (bdst < old_ibstart) {
3848+ if (S_ISREG(inode->i_mode)) {
3849+ err = au_dy_iaop(inode, bdst, dst_inode);
3850+ if (unlikely(err))
3851+ goto out_rev;
3852+ }
1facf9fc 3853+ au_set_ibstart(inode, bdst);
4a4d8108 3854+ }
1facf9fc 3855+ au_set_h_iptr(inode, bdst, au_igrab(dst_inode),
3856+ au_hi_flags(inode, isdir));
3857+ mutex_unlock(&dst_inode->i_mutex);
3858+ if (!isdir
3859+ && h_src->d_inode->i_nlink > 1
3860+ && plink)
3861+ au_plink_append(inode, bdst, h_dst);
3862+ goto out; /* success */
3863+ }
3864+
3865+ /* revert */
4a4d8108 3866+out_rev:
1facf9fc 3867+ h_path.dentry = h_parent;
3868+ mutex_unlock(&dst_inode->i_mutex);
3869+ au_dtime_store(&dt, dst_parent, &h_path);
3870+ h_path.dentry = h_dst;
3871+ if (!isdir)
3872+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
3873+ else
3874+ rerr = vfsub_rmdir(h_dir, &h_path);
3875+ au_dtime_revert(&dt);
3876+ if (rerr) {
3877+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
3878+ err = -EIO;
3879+ }
3880+
4f0767ce 3881+out:
1facf9fc 3882+ dput(dst_parent);
3883+ return err;
3884+}
3885+
3886+struct au_cpup_single_args {
3887+ int *errp;
3888+ struct dentry *dentry;
3889+ aufs_bindex_t bdst, bsrc;
3890+ loff_t len;
3891+ unsigned int flags;
3892+ struct dentry *dst_parent;
3893+};
3894+
3895+static void au_call_cpup_single(void *args)
3896+{
3897+ struct au_cpup_single_args *a = args;
3898+ *a->errp = au_cpup_single(a->dentry, a->bdst, a->bsrc, a->len,
3899+ a->flags, a->dst_parent);
3900+}
3901+
53392da6
AM
3902+/*
3903+ * prevent SIGXFSZ in copy-up.
3904+ * testing CAP_MKNOD is for generic fs,
3905+ * but CAP_FSETID is for xfs only, currently.
3906+ */
3907+static int au_cpup_sio_test(struct super_block *sb, umode_t mode)
3908+{
3909+ int do_sio;
3910+
3911+ do_sio = 0;
3912+ if (!au_wkq_test()
3913+ && (!au_sbi(sb)->si_plink_maint_pid
3914+ || au_plink_maint(sb, AuLock_NOPLM))) {
3915+ switch (mode & S_IFMT) {
3916+ case S_IFREG:
3917+ /* no condition about RLIMIT_FSIZE and the file size */
3918+ do_sio = 1;
3919+ break;
3920+ case S_IFCHR:
3921+ case S_IFBLK:
3922+ do_sio = !capable(CAP_MKNOD);
3923+ break;
3924+ }
3925+ if (!do_sio)
3926+ do_sio = ((mode & (S_ISUID | S_ISGID))
3927+ && !capable(CAP_FSETID));
3928+ }
3929+
3930+ return do_sio;
3931+}
3932+
1facf9fc 3933+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3934+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3935+ struct dentry *dst_parent)
3936+{
3937+ int err, wkq_err;
1facf9fc 3938+ struct dentry *h_dentry;
3939+
3940+ h_dentry = au_h_dptr(dentry, bsrc);
53392da6 3941+ if (!au_cpup_sio_test(dentry->d_sb, h_dentry->d_inode->i_mode))
1facf9fc 3942+ err = au_cpup_single(dentry, bdst, bsrc, len, flags,
3943+ dst_parent);
3944+ else {
3945+ struct au_cpup_single_args args = {
3946+ .errp = &err,
3947+ .dentry = dentry,
3948+ .bdst = bdst,
3949+ .bsrc = bsrc,
3950+ .len = len,
3951+ .flags = flags,
3952+ .dst_parent = dst_parent
3953+ };
3954+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
3955+ if (unlikely(wkq_err))
3956+ err = wkq_err;
3957+ }
3958+
3959+ return err;
3960+}
3961+
3962+/*
3963+ * copyup the @dentry from the first active lower branch to @bdst,
3964+ * using au_cpup_single().
3965+ */
3966+static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3967+ unsigned int flags)
3968+{
3969+ int err;
3970+ aufs_bindex_t bsrc, bend;
3971+
3972+ bend = au_dbend(dentry);
3973+ for (bsrc = bdst + 1; bsrc <= bend; bsrc++)
3974+ if (au_h_dptr(dentry, bsrc))
3975+ break;
3976+
3977+ err = au_lkup_neg(dentry, bdst);
3978+ if (!err) {
3979+ err = au_cpup_single(dentry, bdst, bsrc, len, flags, NULL);
3980+ if (!err)
3981+ return 0; /* success */
3982+
3983+ /* revert */
3984+ au_set_h_dptr(dentry, bdst, NULL);
3985+ au_set_dbstart(dentry, bsrc);
3986+ }
3987+
3988+ return err;
3989+}
3990+
3991+struct au_cpup_simple_args {
3992+ int *errp;
3993+ struct dentry *dentry;
3994+ aufs_bindex_t bdst;
3995+ loff_t len;
3996+ unsigned int flags;
3997+};
3998+
3999+static void au_call_cpup_simple(void *args)
4000+{
4001+ struct au_cpup_simple_args *a = args;
4002+ *a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags);
4003+}
4004+
4005+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4006+ unsigned int flags)
4007+{
4008+ int err, wkq_err;
1facf9fc 4009+ struct dentry *parent;
4010+ struct inode *h_dir;
4011+
4012+ parent = dget_parent(dentry);
4013+ h_dir = au_h_iptr(parent->d_inode, bdst);
53392da6
AM
4014+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
4015+ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
1facf9fc 4016+ err = au_cpup_simple(dentry, bdst, len, flags);
4017+ else {
4018+ struct au_cpup_simple_args args = {
4019+ .errp = &err,
4020+ .dentry = dentry,
4021+ .bdst = bdst,
4022+ .len = len,
4023+ .flags = flags
4024+ };
4025+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
4026+ if (unlikely(wkq_err))
4027+ err = wkq_err;
4028+ }
4029+
4030+ dput(parent);
4031+ return err;
4032+}
4033+
4034+/* ---------------------------------------------------------------------- */
4035+
4036+/*
4037+ * copyup the deleted file for writing.
4038+ */
4039+static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst,
4040+ struct dentry *wh_dentry, struct file *file,
4041+ loff_t len)
4042+{
4043+ int err;
4044+ aufs_bindex_t bstart;
4045+ struct au_dinfo *dinfo;
4046+ struct dentry *h_d_dst, *h_d_start;
4a4d8108 4047+ struct au_hdentry *hdp;
1facf9fc 4048+
4049+ dinfo = au_di(dentry);
1308ab2a 4050+ AuRwMustWriteLock(&dinfo->di_rwsem);
4051+
1facf9fc 4052+ bstart = dinfo->di_bstart;
4a4d8108
AM
4053+ hdp = dinfo->di_hdentry;
4054+ h_d_dst = hdp[0 + bdst].hd_dentry;
1facf9fc 4055+ dinfo->di_bstart = bdst;
4a4d8108 4056+ hdp[0 + bdst].hd_dentry = wh_dentry;
027c5e7a
AM
4057+ if (file) {
4058+ h_d_start = hdp[0 + bstart].hd_dentry;
4a4d8108 4059+ hdp[0 + bstart].hd_dentry = au_hf_top(file)->f_dentry;
027c5e7a 4060+ }
1facf9fc 4061+ err = au_cpup_single(dentry, bdst, bstart, len, !AuCpup_DTIME,
4062+ /*h_parent*/NULL);
027c5e7a
AM
4063+ if (file) {
4064+ if (!err)
4065+ err = au_reopen_nondir(file);
4a4d8108 4066+ hdp[0 + bstart].hd_dentry = h_d_start;
1facf9fc 4067+ }
4a4d8108 4068+ hdp[0 + bdst].hd_dentry = h_d_dst;
1facf9fc 4069+ dinfo->di_bstart = bstart;
4070+
4071+ return err;
4072+}
4073+
4074+static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4075+ struct file *file)
4076+{
4077+ int err;
4078+ struct au_dtime dt;
4079+ struct dentry *parent, *h_parent, *wh_dentry;
4080+ struct au_branch *br;
4081+ struct path h_path;
4082+
4083+ br = au_sbr(dentry->d_sb, bdst);
4084+ parent = dget_parent(dentry);
4085+ h_parent = au_h_dptr(parent, bdst);
4086+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
4087+ err = PTR_ERR(wh_dentry);
4088+ if (IS_ERR(wh_dentry))
4089+ goto out;
4090+
4091+ h_path.dentry = h_parent;
4092+ h_path.mnt = br->br_mnt;
4093+ au_dtime_store(&dt, parent, &h_path);
4094+ err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len);
4095+ if (unlikely(err))
4096+ goto out_wh;
4097+
4098+ dget(wh_dentry);
4099+ h_path.dentry = wh_dentry;
4a4d8108
AM
4100+ if (!S_ISDIR(wh_dentry->d_inode->i_mode))
4101+ err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0);
4102+ else
4103+ err = vfsub_rmdir(h_parent->d_inode, &h_path);
1facf9fc 4104+ if (unlikely(err)) {
4105+ AuIOErr("failed remove copied-up tmp file %.*s(%d)\n",
4106+ AuDLNPair(wh_dentry), err);
4107+ err = -EIO;
4108+ }
4109+ au_dtime_revert(&dt);
4110+ au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
4111+
4f0767ce 4112+out_wh:
1facf9fc 4113+ dput(wh_dentry);
4f0767ce 4114+out:
1facf9fc 4115+ dput(parent);
4116+ return err;
4117+}
4118+
4119+struct au_cpup_wh_args {
4120+ int *errp;
4121+ struct dentry *dentry;
4122+ aufs_bindex_t bdst;
4123+ loff_t len;
4124+ struct file *file;
4125+};
4126+
4127+static void au_call_cpup_wh(void *args)
4128+{
4129+ struct au_cpup_wh_args *a = args;
4130+ *a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file);
4131+}
4132+
4133+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4134+ struct file *file)
4135+{
4136+ int err, wkq_err;
4137+ struct dentry *parent, *h_orph, *h_parent, *h_dentry;
4138+ struct inode *dir, *h_dir, *h_tmpdir, *h_inode;
4139+ struct au_wbr *wbr;
4140+
4141+ parent = dget_parent(dentry);
4142+ dir = parent->d_inode;
4143+ h_orph = NULL;
4144+ h_parent = NULL;
4145+ h_dir = au_igrab(au_h_iptr(dir, bdst));
4146+ h_tmpdir = h_dir;
4147+ if (!h_dir->i_nlink) {
4148+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
4149+ h_orph = wbr->wbr_orph;
4150+
4151+ h_parent = dget(au_h_dptr(parent, bdst));
1facf9fc 4152+ au_set_h_dptr(parent, bdst, dget(h_orph));
4153+ h_tmpdir = h_orph->d_inode;
1facf9fc 4154+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
4155+
4156+ /* this temporary unlock is safe */
4157+ if (file)
4a4d8108 4158+ h_dentry = au_hf_top(file)->f_dentry;
1facf9fc 4159+ else
4160+ h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
4161+ h_inode = h_dentry->d_inode;
4162+ IMustLock(h_inode);
4163+ mutex_unlock(&h_inode->i_mutex);
dece6358 4164+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
1facf9fc 4165+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
4a4d8108 4166+ /* todo: au_h_open_pre()? */
1facf9fc 4167+ }
4168+
53392da6
AM
4169+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
4170+ && !au_cpup_sio_test(dentry->d_sb, dentry->d_inode->i_mode))
1facf9fc 4171+ err = au_cpup_wh(dentry, bdst, len, file);
4172+ else {
4173+ struct au_cpup_wh_args args = {
4174+ .errp = &err,
4175+ .dentry = dentry,
4176+ .bdst = bdst,
4177+ .len = len,
4178+ .file = file
4179+ };
4180+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
4181+ if (unlikely(wkq_err))
4182+ err = wkq_err;
4183+ }
4184+
4185+ if (h_orph) {
4186+ mutex_unlock(&h_tmpdir->i_mutex);
4a4d8108 4187+ /* todo: au_h_open_post()? */
1facf9fc 4188+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1facf9fc 4189+ au_set_h_dptr(parent, bdst, h_parent);
4190+ }
4191+ iput(h_dir);
4192+ dput(parent);
4193+
4194+ return err;
4195+}
4196+
4197+/* ---------------------------------------------------------------------- */
4198+
4199+/*
4200+ * generic routine for both of copy-up and copy-down.
4201+ */
4202+/* cf. revalidate function in file.c */
4203+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
4204+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
4205+ struct dentry *h_parent, void *arg),
4206+ void *arg)
4207+{
4208+ int err;
4209+ struct au_pin pin;
4210+ struct dentry *d, *parent, *h_parent, *real_parent;
4211+
4212+ err = 0;
4213+ parent = dget_parent(dentry);
4214+ if (IS_ROOT(parent))
4215+ goto out;
4216+
4217+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
4218+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
4219+
4220+ /* do not use au_dpage */
4221+ real_parent = parent;
4222+ while (1) {
4223+ dput(parent);
4224+ parent = dget_parent(dentry);
4225+ h_parent = au_h_dptr(parent, bdst);
4226+ if (h_parent)
4227+ goto out; /* success */
4228+
4229+ /* find top dir which is necessary to cpup */
4230+ do {
4231+ d = parent;
4232+ dput(parent);
4233+ parent = dget_parent(d);
4234+ di_read_lock_parent3(parent, !AuLock_IR);
4235+ h_parent = au_h_dptr(parent, bdst);
4236+ di_read_unlock(parent, !AuLock_IR);
4237+ } while (!h_parent);
4238+
4239+ if (d != real_parent)
4240+ di_write_lock_child3(d);
4241+
4242+ /* somebody else might create while we were sleeping */
4243+ if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
4244+ if (au_h_dptr(d, bdst))
4245+ au_update_dbstart(d);
4246+
4247+ au_pin_set_dentry(&pin, d);
4248+ err = au_do_pin(&pin);
4249+ if (!err) {
4250+ err = cp(d, bdst, h_parent, arg);
4251+ au_unpin(&pin);
4252+ }
4253+ }
4254+
4255+ if (d != real_parent)
4256+ di_write_unlock(d);
4257+ if (unlikely(err))
4258+ break;
4259+ }
4260+
4f0767ce 4261+out:
1facf9fc 4262+ dput(parent);
4263+ return err;
4264+}
4265+
4266+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
4267+ struct dentry *h_parent __maybe_unused ,
4268+ void *arg __maybe_unused)
4269+{
4270+ return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME);
4271+}
4272+
4273+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
4274+{
4275+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
4276+}
4277+
4278+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
4279+{
4280+ int err;
4281+ struct dentry *parent;
4282+ struct inode *dir;
4283+
4284+ parent = dget_parent(dentry);
4285+ dir = parent->d_inode;
4286+ err = 0;
4287+ if (au_h_iptr(dir, bdst))
4288+ goto out;
4289+
4290+ di_read_unlock(parent, AuLock_IR);
4291+ di_write_lock_parent(parent);
4292+ /* someone else might change our inode while we were sleeping */
4293+ if (!au_h_iptr(dir, bdst))
4294+ err = au_cpup_dirs(dentry, bdst);
4295+ di_downgrade_lock(parent, AuLock_IR);
4296+
4f0767ce 4297+out:
1facf9fc 4298+ dput(parent);
4299+ return err;
4300+}
7f207e10
AM
4301diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
4302--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
53392da6 4303+++ linux/fs/aufs/cpup.h 2011-08-24 13:30:24.731313534 +0200
7f207e10 4304@@ -0,0 +1,83 @@
1facf9fc 4305+/*
027c5e7a 4306+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4307+ *
4308+ * This program, aufs is free software; you can redistribute it and/or modify
4309+ * it under the terms of the GNU General Public License as published by
4310+ * the Free Software Foundation; either version 2 of the License, or
4311+ * (at your option) any later version.
dece6358
AM
4312+ *
4313+ * This program is distributed in the hope that it will be useful,
4314+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4315+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4316+ * GNU General Public License for more details.
4317+ *
4318+ * You should have received a copy of the GNU General Public License
4319+ * along with this program; if not, write to the Free Software
4320+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4321+ */
4322+
4323+/*
4324+ * copy-up/down functions
4325+ */
4326+
4327+#ifndef __AUFS_CPUP_H__
4328+#define __AUFS_CPUP_H__
4329+
4330+#ifdef __KERNEL__
4331+
dece6358
AM
4332+#include <linux/path.h>
4333+#include <linux/time.h>
1facf9fc 4334+#include <linux/aufs_type.h>
4335+
dece6358
AM
4336+struct inode;
4337+struct file;
4338+
1facf9fc 4339+void au_cpup_attr_flags(struct inode *dst, struct inode *src);
4340+void au_cpup_attr_timesizes(struct inode *inode);
4341+void au_cpup_attr_nlink(struct inode *inode, int force);
4342+void au_cpup_attr_changeable(struct inode *inode);
4343+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
4344+void au_cpup_attr_all(struct inode *inode, int force);
4345+
4346+/* ---------------------------------------------------------------------- */
4347+
4348+/* cpup flags */
4349+#define AuCpup_DTIME 1 /* do dtime_store/revert */
4350+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
4351+ for link(2) */
4352+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
7f207e10
AM
4353+#define au_fset_cpup(flags, name) \
4354+ do { (flags) |= AuCpup_##name; } while (0)
4355+#define au_fclr_cpup(flags, name) \
4356+ do { (flags) &= ~AuCpup_##name; } while (0)
1facf9fc 4357+
4358+int au_copy_file(struct file *dst, struct file *src, loff_t len);
4359+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
4360+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
4361+ struct dentry *dst_parent);
4362+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4363+ unsigned int flags);
4364+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
4365+ struct file *file);
4366+
4367+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
4368+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
4369+ struct dentry *h_parent, void *arg),
4370+ void *arg);
4371+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
4372+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
4373+
4374+/* ---------------------------------------------------------------------- */
4375+
4376+/* keep timestamps when copyup */
4377+struct au_dtime {
4378+ struct dentry *dt_dentry;
4379+ struct path dt_h_path;
4380+ struct timespec dt_atime, dt_mtime;
4381+};
4382+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4383+ struct path *h_path);
4384+void au_dtime_revert(struct au_dtime *dt);
4385+
4386+#endif /* __KERNEL__ */
4387+#endif /* __AUFS_CPUP_H__ */
7f207e10
AM
4388diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
4389--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
53392da6 4390+++ linux/fs/aufs/dbgaufs.c 2011-08-24 13:30:24.731313534 +0200
4a4d8108 4391@@ -0,0 +1,334 @@
1facf9fc 4392+/*
027c5e7a 4393+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4394+ *
4395+ * This program, aufs is free software; you can redistribute it and/or modify
4396+ * it under the terms of the GNU General Public License as published by
4397+ * the Free Software Foundation; either version 2 of the License, or
4398+ * (at your option) any later version.
dece6358
AM
4399+ *
4400+ * This program is distributed in the hope that it will be useful,
4401+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4402+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4403+ * GNU General Public License for more details.
4404+ *
4405+ * You should have received a copy of the GNU General Public License
4406+ * along with this program; if not, write to the Free Software
4407+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4408+ */
4409+
4410+/*
4411+ * debugfs interface
4412+ */
4413+
4414+#include <linux/debugfs.h>
4415+#include "aufs.h"
4416+
4417+#ifndef CONFIG_SYSFS
4418+#error DEBUG_FS depends upon SYSFS
4419+#endif
4420+
4421+static struct dentry *dbgaufs;
4422+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
4423+
4424+/* 20 is max digits length of ulong 64 */
4425+struct dbgaufs_arg {
4426+ int n;
4427+ char a[20 * 4];
4428+};
4429+
4430+/*
4431+ * common function for all XINO files
4432+ */
4433+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
4434+ struct file *file)
4435+{
4436+ kfree(file->private_data);
4437+ return 0;
4438+}
4439+
4440+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
4441+{
4442+ int err;
4443+ struct kstat st;
4444+ struct dbgaufs_arg *p;
4445+
4446+ err = -ENOMEM;
4447+ p = kmalloc(sizeof(*p), GFP_NOFS);
4448+ if (unlikely(!p))
4449+ goto out;
4450+
4451+ err = 0;
4452+ p->n = 0;
4453+ file->private_data = p;
4454+ if (!xf)
4455+ goto out;
4456+
4457+ err = vfs_getattr(xf->f_vfsmnt, xf->f_dentry, &st);
4458+ if (!err) {
4459+ if (do_fcnt)
4460+ p->n = snprintf
4461+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
4462+ (long)file_count(xf), st.blocks, st.blksize,
4463+ (long long)st.size);
4464+ else
4465+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
4466+ st.blocks, st.blksize,
4467+ (long long)st.size);
4468+ AuDebugOn(p->n >= sizeof(p->a));
4469+ } else {
4470+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
4471+ err = 0;
4472+ }
4473+
4f0767ce 4474+out:
1facf9fc 4475+ return err;
4476+
4477+}
4478+
4479+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
4480+ size_t count, loff_t *ppos)
4481+{
4482+ struct dbgaufs_arg *p;
4483+
4484+ p = file->private_data;
4485+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
4486+}
4487+
4488+/* ---------------------------------------------------------------------- */
4489+
4490+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
4491+{
4492+ int err;
4493+ struct au_sbinfo *sbinfo;
4494+ struct super_block *sb;
4495+
4496+ sbinfo = inode->i_private;
4497+ sb = sbinfo->si_sb;
4498+ si_noflush_read_lock(sb);
4499+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
4500+ si_read_unlock(sb);
4501+ return err;
4502+}
4503+
4504+static const struct file_operations dbgaufs_xib_fop = {
4a4d8108 4505+ .owner = THIS_MODULE,
1facf9fc 4506+ .open = dbgaufs_xib_open,
4507+ .release = dbgaufs_xi_release,
4508+ .read = dbgaufs_xi_read
4509+};
4510+
4511+/* ---------------------------------------------------------------------- */
4512+
4513+#define DbgaufsXi_PREFIX "xi"
4514+
4515+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
4516+{
4517+ int err;
4518+ long l;
4519+ struct au_sbinfo *sbinfo;
4520+ struct super_block *sb;
4521+ struct file *xf;
4522+ struct qstr *name;
4523+
4524+ err = -ENOENT;
4525+ xf = NULL;
4526+ name = &file->f_dentry->d_name;
4527+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
4528+ || memcmp(name->name, DbgaufsXi_PREFIX,
4529+ sizeof(DbgaufsXi_PREFIX) - 1)))
4530+ goto out;
4531+ err = strict_strtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
4532+ if (unlikely(err))
4533+ goto out;
4534+
4535+ sbinfo = inode->i_private;
4536+ sb = sbinfo->si_sb;
4537+ si_noflush_read_lock(sb);
4538+ if (l <= au_sbend(sb)) {
4539+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
4540+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
4541+ } else
4542+ err = -ENOENT;
4543+ si_read_unlock(sb);
4544+
4f0767ce 4545+out:
1facf9fc 4546+ return err;
4547+}
4548+
4549+static const struct file_operations dbgaufs_xino_fop = {
4a4d8108 4550+ .owner = THIS_MODULE,
1facf9fc 4551+ .open = dbgaufs_xino_open,
4552+ .release = dbgaufs_xi_release,
4553+ .read = dbgaufs_xi_read
4554+};
4555+
4556+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
4557+{
4558+ aufs_bindex_t bend;
4559+ struct au_branch *br;
4560+ struct au_xino_file *xi;
4561+
4562+ if (!au_sbi(sb)->si_dbgaufs)
4563+ return;
4564+
4565+ bend = au_sbend(sb);
4566+ for (; bindex <= bend; bindex++) {
4567+ br = au_sbr(sb, bindex);
4568+ xi = &br->br_xino;
4569+ if (xi->xi_dbgaufs) {
4570+ debugfs_remove(xi->xi_dbgaufs);
4571+ xi->xi_dbgaufs = NULL;
4572+ }
4573+ }
4574+}
4575+
4576+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
4577+{
4578+ struct au_sbinfo *sbinfo;
4579+ struct dentry *parent;
4580+ struct au_branch *br;
4581+ struct au_xino_file *xi;
4582+ aufs_bindex_t bend;
4583+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
4584+
4585+ sbinfo = au_sbi(sb);
4586+ parent = sbinfo->si_dbgaufs;
4587+ if (!parent)
4588+ return;
4589+
4590+ bend = au_sbend(sb);
4591+ for (; bindex <= bend; bindex++) {
4592+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
4593+ br = au_sbr(sb, bindex);
4594+ xi = &br->br_xino;
4595+ AuDebugOn(xi->xi_dbgaufs);
4596+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
4597+ sbinfo, &dbgaufs_xino_fop);
4598+ /* ignore an error */
4599+ if (unlikely(!xi->xi_dbgaufs))
4600+ AuWarn1("failed %s under debugfs\n", name);
4601+ }
4602+}
4603+
4604+/* ---------------------------------------------------------------------- */
4605+
4606+#ifdef CONFIG_AUFS_EXPORT
4607+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
4608+{
4609+ int err;
4610+ struct au_sbinfo *sbinfo;
4611+ struct super_block *sb;
4612+
4613+ sbinfo = inode->i_private;
4614+ sb = sbinfo->si_sb;
4615+ si_noflush_read_lock(sb);
4616+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
4617+ si_read_unlock(sb);
4618+ return err;
4619+}
4620+
4621+static const struct file_operations dbgaufs_xigen_fop = {
4a4d8108 4622+ .owner = THIS_MODULE,
1facf9fc 4623+ .open = dbgaufs_xigen_open,
4624+ .release = dbgaufs_xi_release,
4625+ .read = dbgaufs_xi_read
4626+};
4627+
4628+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4629+{
4630+ int err;
4631+
dece6358
AM
4632+ /*
4633+ * This function is a dynamic '__init' fucntion actually,
4634+ * so the tiny check for si_rwsem is unnecessary.
4635+ */
4636+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4637+
1facf9fc 4638+ err = -EIO;
4639+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
4640+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4641+ &dbgaufs_xigen_fop);
4642+ if (sbinfo->si_dbgaufs_xigen)
4643+ err = 0;
4644+
4645+ return err;
4646+}
4647+#else
4648+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4649+{
4650+ return 0;
4651+}
4652+#endif /* CONFIG_AUFS_EXPORT */
4653+
4654+/* ---------------------------------------------------------------------- */
4655+
4656+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
4657+{
dece6358
AM
4658+ /*
4659+ * This function is a dynamic '__init' fucntion actually,
4660+ * so the tiny check for si_rwsem is unnecessary.
4661+ */
4662+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4663+
1facf9fc 4664+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
4665+ sbinfo->si_dbgaufs = NULL;
4666+ kobject_put(&sbinfo->si_kobj);
4667+}
4668+
4669+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
4670+{
4671+ int err;
4672+ char name[SysaufsSiNameLen];
4673+
dece6358
AM
4674+ /*
4675+ * This function is a dynamic '__init' fucntion actually,
4676+ * so the tiny check for si_rwsem is unnecessary.
4677+ */
4678+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4679+
1facf9fc 4680+ err = -ENOENT;
4681+ if (!dbgaufs) {
4682+ AuErr1("/debug/aufs is uninitialized\n");
4683+ goto out;
4684+ }
4685+
4686+ err = -EIO;
4687+ sysaufs_name(sbinfo, name);
4688+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
4689+ if (unlikely(!sbinfo->si_dbgaufs))
4690+ goto out;
4691+ kobject_get(&sbinfo->si_kobj);
4692+
4693+ sbinfo->si_dbgaufs_xib = debugfs_create_file
4694+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4695+ &dbgaufs_xib_fop);
4696+ if (unlikely(!sbinfo->si_dbgaufs_xib))
4697+ goto out_dir;
4698+
4699+ err = dbgaufs_xigen_init(sbinfo);
4700+ if (!err)
4701+ goto out; /* success */
4702+
4f0767ce 4703+out_dir:
1facf9fc 4704+ dbgaufs_si_fin(sbinfo);
4f0767ce 4705+out:
1facf9fc 4706+ return err;
4707+}
4708+
4709+/* ---------------------------------------------------------------------- */
4710+
4711+void dbgaufs_fin(void)
4712+{
4713+ debugfs_remove(dbgaufs);
4714+}
4715+
4716+int __init dbgaufs_init(void)
4717+{
4718+ int err;
4719+
4720+ err = -EIO;
4721+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
4722+ if (dbgaufs)
4723+ err = 0;
4724+ return err;
4725+}
7f207e10
AM
4726diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
4727--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
53392da6 4728+++ linux/fs/aufs/dbgaufs.h 2011-08-24 13:30:24.731313534 +0200
4a4d8108 4729@@ -0,0 +1,52 @@
1facf9fc 4730+/*
027c5e7a 4731+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4732+ *
4733+ * This program, aufs is free software; you can redistribute it and/or modify
4734+ * it under the terms of the GNU General Public License as published by
4735+ * the Free Software Foundation; either version 2 of the License, or
4736+ * (at your option) any later version.
dece6358
AM
4737+ *
4738+ * This program is distributed in the hope that it will be useful,
4739+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4740+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4741+ * GNU General Public License for more details.
4742+ *
4743+ * You should have received a copy of the GNU General Public License
4744+ * along with this program; if not, write to the Free Software
4745+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4746+ */
4747+
4748+/*
4749+ * debugfs interface
4750+ */
4751+
4752+#ifndef __DBGAUFS_H__
4753+#define __DBGAUFS_H__
4754+
4755+#ifdef __KERNEL__
4756+
dece6358 4757+#include <linux/init.h>
1facf9fc 4758+#include <linux/aufs_type.h>
4759+
dece6358 4760+struct super_block;
1facf9fc 4761+struct au_sbinfo;
dece6358 4762+
1facf9fc 4763+#ifdef CONFIG_DEBUG_FS
4764+/* dbgaufs.c */
4765+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
4766+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
4767+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
4768+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
4769+void dbgaufs_fin(void);
4770+int __init dbgaufs_init(void);
1facf9fc 4771+#else
4a4d8108
AM
4772+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
4773+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
4774+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
4775+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
4776+AuStubVoid(dbgaufs_fin, void)
4777+AuStubInt0(__init dbgaufs_init, void)
1facf9fc 4778+#endif /* CONFIG_DEBUG_FS */
4779+
4780+#endif /* __KERNEL__ */
4781+#endif /* __DBGAUFS_H__ */
7f207e10
AM
4782diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
4783--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
53392da6 4784+++ linux/fs/aufs/dcsub.c 2011-08-24 13:30:24.731313534 +0200
027c5e7a 4785@@ -0,0 +1,243 @@
1facf9fc 4786+/*
027c5e7a 4787+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 4788+ *
4789+ * This program, aufs is free software; you can redistribute it and/or modify
4790+ * it under the terms of the GNU General Public License as published by
4791+ * the Free Software Foundation; either version 2 of the License, or
4792+ * (at your option) any later version.
dece6358
AM
4793+ *
4794+ * This program is distributed in the hope that it will be useful,
4795+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4796+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4797+ * GNU General Public License for more details.
4798+ *
4799+ * You should have received a copy of the GNU General Public License
4800+ * along with this program; if not, write to the Free Software
4801+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4802+ */
4803+
4804+/*
4805+ * sub-routines for dentry cache
4806+ */
4807+
4808+#include "aufs.h"
4809+
4810+static void au_dpage_free(struct au_dpage *dpage)
4811+{
4812+ int i;
4813+ struct dentry **p;
4814+
4815+ p = dpage->dentries;
4816+ for (i = 0; i < dpage->ndentry; i++)
4817+ dput(*p++);
4818+ free_page((unsigned long)dpage->dentries);
4819+}
4820+
4821+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
4822+{
4823+ int err;
4824+ void *p;
4825+
4826+ err = -ENOMEM;
4827+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
4828+ if (unlikely(!dpages->dpages))
4829+ goto out;
4830+
4831+ p = (void *)__get_free_page(gfp);
4832+ if (unlikely(!p))
4833+ goto out_dpages;
4834+
4835+ dpages->dpages[0].ndentry = 0;
4836+ dpages->dpages[0].dentries = p;
4837+ dpages->ndpage = 1;
4838+ return 0; /* success */
4839+
4f0767ce 4840+out_dpages:
1facf9fc 4841+ kfree(dpages->dpages);
4f0767ce 4842+out:
1facf9fc 4843+ return err;
4844+}
4845+
4846+void au_dpages_free(struct au_dcsub_pages *dpages)
4847+{
4848+ int i;
4849+ struct au_dpage *p;
4850+
4851+ p = dpages->dpages;
4852+ for (i = 0; i < dpages->ndpage; i++)
4853+ au_dpage_free(p++);
4854+ kfree(dpages->dpages);
4855+}
4856+
4857+static int au_dpages_append(struct au_dcsub_pages *dpages,
4858+ struct dentry *dentry, gfp_t gfp)
4859+{
4860+ int err, sz;
4861+ struct au_dpage *dpage;
4862+ void *p;
4863+
4864+ dpage = dpages->dpages + dpages->ndpage - 1;
4865+ sz = PAGE_SIZE / sizeof(dentry);
4866+ if (unlikely(dpage->ndentry >= sz)) {
4867+ AuLabel(new dpage);
4868+ err = -ENOMEM;
4869+ sz = dpages->ndpage * sizeof(*dpages->dpages);
4870+ p = au_kzrealloc(dpages->dpages, sz,
4871+ sz + sizeof(*dpages->dpages), gfp);
4872+ if (unlikely(!p))
4873+ goto out;
4874+
4875+ dpages->dpages = p;
4876+ dpage = dpages->dpages + dpages->ndpage;
4877+ p = (void *)__get_free_page(gfp);
4878+ if (unlikely(!p))
4879+ goto out;
4880+
4881+ dpage->ndentry = 0;
4882+ dpage->dentries = p;
4883+ dpages->ndpage++;
4884+ }
4885+
027c5e7a
AM
4886+ AuDebugOn(!dentry->d_count);
4887+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
1facf9fc 4888+ return 0; /* success */
4889+
4f0767ce 4890+out:
1facf9fc 4891+ return err;
4892+}
4893+
4894+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
4895+ au_dpages_test test, void *arg)
4896+{
4897+ int err;
027c5e7a 4898+ struct dentry *this_parent;
1facf9fc 4899+ struct list_head *next;
4900+ struct super_block *sb = root->d_sb;
4901+
4902+ err = 0;
027c5e7a
AM
4903+ write_seqlock(&rename_lock);
4904+ this_parent = root;
4905+ spin_lock(&this_parent->d_lock);
4f0767ce 4906+repeat:
1facf9fc 4907+ next = this_parent->d_subdirs.next;
4f0767ce 4908+resume:
1facf9fc 4909+ if (this_parent->d_sb == sb
4910+ && !IS_ROOT(this_parent)
027c5e7a
AM
4911+ && au_di(this_parent)
4912+ && this_parent->d_count
1facf9fc 4913+ && (!test || test(this_parent, arg))) {
4914+ err = au_dpages_append(dpages, this_parent, GFP_ATOMIC);
4915+ if (unlikely(err))
4916+ goto out;
4917+ }
4918+
4919+ while (next != &this_parent->d_subdirs) {
4920+ struct list_head *tmp = next;
4921+ struct dentry *dentry = list_entry(tmp, struct dentry,
4922+ d_u.d_child);
027c5e7a 4923+
1facf9fc 4924+ next = tmp->next;
027c5e7a
AM
4925+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
4926+ if (dentry->d_count) {
4927+ if (!list_empty(&dentry->d_subdirs)) {
4928+ spin_unlock(&this_parent->d_lock);
4929+ spin_release(&dentry->d_lock.dep_map, 1,
4930+ _RET_IP_);
4931+ this_parent = dentry;
4932+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1,
4933+ _RET_IP_);
4934+ goto repeat;
4935+ }
4936+ if (dentry->d_sb == sb
4937+ && au_di(dentry)
4938+ && (!test || test(dentry, arg)))
4939+ err = au_dpages_append(dpages, dentry,
4940+ GFP_ATOMIC);
1facf9fc 4941+ }
027c5e7a
AM
4942+ spin_unlock(&dentry->d_lock);
4943+ if (unlikely(err))
4944+ goto out;
1facf9fc 4945+ }
4946+
4947+ if (this_parent != root) {
027c5e7a
AM
4948+ struct dentry *tmp;
4949+ struct dentry *child;
4950+
4951+ tmp = this_parent->d_parent;
4952+ rcu_read_lock();
4953+ spin_unlock(&this_parent->d_lock);
4954+ child = this_parent;
4955+ this_parent = tmp;
4956+ spin_lock(&this_parent->d_lock);
4957+ rcu_read_unlock();
4958+ next = child->d_u.d_child.next;
1facf9fc 4959+ goto resume;
4960+ }
027c5e7a 4961+
4f0767ce 4962+out:
027c5e7a
AM
4963+ spin_unlock(&this_parent->d_lock);
4964+ write_sequnlock(&rename_lock);
1facf9fc 4965+ return err;
4966+}
4967+
4968+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
4969+ int do_include, au_dpages_test test, void *arg)
4970+{
4971+ int err;
4972+
4973+ err = 0;
027c5e7a
AM
4974+ write_seqlock(&rename_lock);
4975+ spin_lock(&dentry->d_lock);
4976+ if (do_include
4977+ && dentry->d_count
4978+ && (!test || test(dentry, arg)))
1facf9fc 4979+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
4980+ spin_unlock(&dentry->d_lock);
4981+ if (unlikely(err))
4982+ goto out;
4983+
4984+ /*
4985+ * vfsmount_lock is unnecessary since this is a traverse in a single
4986+ * mount
4987+ */
1facf9fc 4988+ while (!IS_ROOT(dentry)) {
027c5e7a
AM
4989+ dentry = dentry->d_parent; /* rename_lock is locked */
4990+ spin_lock(&dentry->d_lock);
4991+ if (dentry->d_count
4992+ && (!test || test(dentry, arg)))
1facf9fc 4993+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
4994+ spin_unlock(&dentry->d_lock);
4995+ if (unlikely(err))
4996+ break;
1facf9fc 4997+ }
4998+
4f0767ce 4999+out:
027c5e7a 5000+ write_sequnlock(&rename_lock);
1facf9fc 5001+ return err;
5002+}
5003+
027c5e7a
AM
5004+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
5005+{
5006+ return au_di(dentry) && dentry->d_sb == arg;
5007+}
5008+
5009+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
5010+ struct dentry *dentry, int do_include)
5011+{
5012+ return au_dcsub_pages_rev(dpages, dentry, do_include,
5013+ au_dcsub_dpages_aufs, dentry->d_sb);
5014+}
5015+
4a4d8108 5016+int au_test_subdir(struct dentry *d1, struct dentry *d2)
1facf9fc 5017+{
4a4d8108
AM
5018+ struct path path[2] = {
5019+ {
5020+ .dentry = d1
5021+ },
5022+ {
5023+ .dentry = d2
5024+ }
5025+ };
1facf9fc 5026+
4a4d8108 5027+ return path_is_under(path + 0, path + 1);
1facf9fc 5028+}
7f207e10
AM
5029diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
5030--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
53392da6 5031+++ linux/fs/aufs/dcsub.h 2011-08-24 13:30:24.731313534 +0200
027c5e7a 5032@@ -0,0 +1,95 @@
1facf9fc 5033+/*
027c5e7a 5034+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5035+ *
5036+ * This program, aufs is free software; you can redistribute it and/or modify
5037+ * it under the terms of the GNU General Public License as published by
5038+ * the Free Software Foundation; either version 2 of the License, or
5039+ * (at your option) any later version.
dece6358
AM
5040+ *
5041+ * This program is distributed in the hope that it will be useful,
5042+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5043+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5044+ * GNU General Public License for more details.
5045+ *
5046+ * You should have received a copy of the GNU General Public License
5047+ * along with this program; if not, write to the Free Software
5048+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5049+ */
5050+
5051+/*
5052+ * sub-routines for dentry cache
5053+ */
5054+
5055+#ifndef __AUFS_DCSUB_H__
5056+#define __AUFS_DCSUB_H__
5057+
5058+#ifdef __KERNEL__
5059+
7f207e10 5060+#include <linux/dcache.h>
027c5e7a 5061+#include <linux/fs.h>
dece6358
AM
5062+#include <linux/types.h>
5063+
5064+struct dentry;
1facf9fc 5065+
5066+struct au_dpage {
5067+ int ndentry;
5068+ struct dentry **dentries;
5069+};
5070+
5071+struct au_dcsub_pages {
5072+ int ndpage;
5073+ struct au_dpage *dpages;
5074+};
5075+
5076+/* ---------------------------------------------------------------------- */
5077+
7f207e10 5078+/* dcsub.c */
1facf9fc 5079+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
5080+void au_dpages_free(struct au_dcsub_pages *dpages);
5081+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
5082+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
5083+ au_dpages_test test, void *arg);
5084+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
5085+ int do_include, au_dpages_test test, void *arg);
027c5e7a
AM
5086+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
5087+ struct dentry *dentry, int do_include);
4a4d8108 5088+int au_test_subdir(struct dentry *d1, struct dentry *d2);
1facf9fc 5089+
7f207e10
AM
5090+/* ---------------------------------------------------------------------- */
5091+
027c5e7a
AM
5092+static inline int au_d_hashed_positive(struct dentry *d)
5093+{
5094+ int err;
5095+ struct inode *inode = d->d_inode;
5096+ err = 0;
5097+ if (unlikely(d_unhashed(d) || !inode || !inode->i_nlink))
5098+ err = -ENOENT;
5099+ return err;
5100+}
5101+
5102+static inline int au_d_alive(struct dentry *d)
5103+{
5104+ int err;
5105+ struct inode *inode;
5106+ err = 0;
5107+ if (!IS_ROOT(d))
5108+ err = au_d_hashed_positive(d);
5109+ else {
5110+ inode = d->d_inode;
5111+ if (unlikely(d_unlinked(d) || !inode || !inode->i_nlink))
5112+ err = -ENOENT;
5113+ }
5114+ return err;
5115+}
5116+
5117+static inline int au_alive_dir(struct dentry *d)
7f207e10 5118+{
027c5e7a
AM
5119+ int err;
5120+ err = au_d_alive(d);
5121+ if (unlikely(err || IS_DEADDIR(d->d_inode)))
5122+ err = -ENOENT;
5123+ return err;
7f207e10
AM
5124+}
5125+
1facf9fc 5126+#endif /* __KERNEL__ */
5127+#endif /* __AUFS_DCSUB_H__ */
7f207e10
AM
5128diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
5129--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
5130+++ linux/fs/aufs/debug.c 2011-08-24 13:30:24.731313534 +0200
5131@@ -0,0 +1,486 @@
1facf9fc 5132+/*
027c5e7a 5133+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5134+ *
5135+ * This program, aufs is free software; you can redistribute it and/or modify
5136+ * it under the terms of the GNU General Public License as published by
5137+ * the Free Software Foundation; either version 2 of the License, or
5138+ * (at your option) any later version.
dece6358
AM
5139+ *
5140+ * This program is distributed in the hope that it will be useful,
5141+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5142+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5143+ * GNU General Public License for more details.
5144+ *
5145+ * You should have received a copy of the GNU General Public License
5146+ * along with this program; if not, write to the Free Software
5147+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5148+ */
5149+
5150+/*
5151+ * debug print functions
5152+ */
5153+
dece6358 5154+#include <linux/module.h>
7f207e10 5155+#include <linux/vt_kern.h>
1facf9fc 5156+#include "aufs.h"
5157+
5158+int aufs_debug;
5159+MODULE_PARM_DESC(debug, "debug print");
5160+module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
5161+
5162+char *au_plevel = KERN_DEBUG;
e49829fe
JR
5163+#define dpri(fmt, ...) do { \
5164+ if ((au_plevel \
5165+ && strcmp(au_plevel, KERN_DEBUG)) \
5166+ || au_debug_test()) \
5167+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
1facf9fc 5168+} while (0)
5169+
5170+/* ---------------------------------------------------------------------- */
5171+
5172+void au_dpri_whlist(struct au_nhash *whlist)
5173+{
5174+ unsigned long ul, n;
5175+ struct hlist_head *head;
5176+ struct au_vdir_wh *tpos;
5177+ struct hlist_node *pos;
5178+
5179+ n = whlist->nh_num;
5180+ head = whlist->nh_head;
5181+ for (ul = 0; ul < n; ul++) {
5182+ hlist_for_each_entry(tpos, pos, head, wh_hash)
5183+ dpri("b%d, %.*s, %d\n",
5184+ tpos->wh_bindex,
5185+ tpos->wh_str.len, tpos->wh_str.name,
5186+ tpos->wh_str.len);
5187+ head++;
5188+ }
5189+}
5190+
5191+void au_dpri_vdir(struct au_vdir *vdir)
5192+{
5193+ unsigned long ul;
5194+ union au_vdir_deblk_p p;
5195+ unsigned char *o;
5196+
5197+ if (!vdir || IS_ERR(vdir)) {
5198+ dpri("err %ld\n", PTR_ERR(vdir));
5199+ return;
5200+ }
5201+
5202+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
5203+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
5204+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
5205+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
5206+ p.deblk = vdir->vd_deblk[ul];
5207+ o = p.deblk;
5208+ dpri("[%lu]: %p\n", ul, o);
5209+ }
5210+}
5211+
53392da6 5212+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
1facf9fc 5213+ struct dentry *wh)
5214+{
5215+ char *n = NULL;
5216+ int l = 0;
5217+
5218+ if (!inode || IS_ERR(inode)) {
5219+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
5220+ return -1;
5221+ }
5222+
5223+ /* the type of i_blocks depends upon CONFIG_LSF */
5224+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
5225+ && sizeof(inode->i_blocks) != sizeof(u64));
5226+ if (wh) {
5227+ n = (void *)wh->d_name.name;
5228+ l = wh->d_name.len;
5229+ }
5230+
53392da6
AM
5231+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
5232+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
5233+ bindex, inode,
1facf9fc 5234+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
5235+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
5236+ i_size_read(inode), (unsigned long long)inode->i_blocks,
53392da6 5237+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
1facf9fc 5238+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
b752ccd1
AM
5239+ inode->i_state, inode->i_flags, inode->i_version,
5240+ inode->i_generation,
1facf9fc 5241+ l ? ", wh " : "", l, n);
5242+ return 0;
5243+}
5244+
5245+void au_dpri_inode(struct inode *inode)
5246+{
5247+ struct au_iinfo *iinfo;
5248+ aufs_bindex_t bindex;
53392da6 5249+ int err, hn;
1facf9fc 5250+
53392da6 5251+ err = do_pri_inode(-1, inode, -1, NULL);
1facf9fc 5252+ if (err || !au_test_aufs(inode->i_sb))
5253+ return;
5254+
5255+ iinfo = au_ii(inode);
5256+ if (!iinfo)
5257+ return;
5258+ dpri("i-1: bstart %d, bend %d, gen %d\n",
5259+ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode));
5260+ if (iinfo->ii_bstart < 0)
5261+ return;
53392da6
AM
5262+ hn = 0;
5263+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
5264+ hn = !!au_hn(iinfo->ii_hinode + bindex);
5265+ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
1facf9fc 5266+ iinfo->ii_hinode[0 + bindex].hi_whdentry);
53392da6 5267+ }
1facf9fc 5268+}
5269+
2cbb1c4b
JR
5270+void au_dpri_dalias(struct inode *inode)
5271+{
5272+ struct dentry *d;
5273+
5274+ spin_lock(&inode->i_lock);
5275+ list_for_each_entry(d, &inode->i_dentry, d_alias)
5276+ au_dpri_dentry(d);
5277+ spin_unlock(&inode->i_lock);
5278+}
5279+
1facf9fc 5280+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
5281+{
5282+ struct dentry *wh = NULL;
53392da6 5283+ int hn;
1facf9fc 5284+
5285+ if (!dentry || IS_ERR(dentry)) {
5286+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
5287+ return -1;
5288+ }
5289+ /* do not call dget_parent() here */
027c5e7a 5290+ /* note: access d_xxx without d_lock */
1facf9fc 5291+ dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n",
5292+ bindex,
5293+ AuDLNPair(dentry->d_parent), AuDLNPair(dentry),
5294+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
027c5e7a 5295+ dentry->d_count, dentry->d_flags);
53392da6 5296+ hn = -1;
1facf9fc 5297+ if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) {
5298+ struct au_iinfo *iinfo = au_ii(dentry->d_inode);
53392da6
AM
5299+ if (iinfo) {
5300+ hn = !!au_hn(iinfo->ii_hinode + bindex);
1facf9fc 5301+ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
53392da6 5302+ }
1facf9fc 5303+ }
53392da6 5304+ do_pri_inode(bindex, dentry->d_inode, hn, wh);
1facf9fc 5305+ return 0;
5306+}
5307+
5308+void au_dpri_dentry(struct dentry *dentry)
5309+{
5310+ struct au_dinfo *dinfo;
5311+ aufs_bindex_t bindex;
5312+ int err;
4a4d8108 5313+ struct au_hdentry *hdp;
1facf9fc 5314+
5315+ err = do_pri_dentry(-1, dentry);
5316+ if (err || !au_test_aufs(dentry->d_sb))
5317+ return;
5318+
5319+ dinfo = au_di(dentry);
5320+ if (!dinfo)
5321+ return;
5322+ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n",
5323+ dinfo->di_bstart, dinfo->di_bend,
5324+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry));
5325+ if (dinfo->di_bstart < 0)
5326+ return;
4a4d8108 5327+ hdp = dinfo->di_hdentry;
1facf9fc 5328+ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
4a4d8108 5329+ do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
1facf9fc 5330+}
5331+
5332+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
5333+{
5334+ char a[32];
5335+
5336+ if (!file || IS_ERR(file)) {
5337+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
5338+ return -1;
5339+ }
5340+ a[0] = 0;
5341+ if (bindex < 0
5342+ && file->f_dentry
5343+ && au_test_aufs(file->f_dentry->d_sb)
5344+ && au_fi(file))
e49829fe 5345+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
2cbb1c4b 5346+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
b752ccd1 5347+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
1facf9fc 5348+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
b752ccd1 5349+ file->f_version, file->f_pos, a);
1facf9fc 5350+ if (file->f_dentry)
5351+ do_pri_dentry(bindex, file->f_dentry);
5352+ return 0;
5353+}
5354+
5355+void au_dpri_file(struct file *file)
5356+{
5357+ struct au_finfo *finfo;
4a4d8108
AM
5358+ struct au_fidir *fidir;
5359+ struct au_hfile *hfile;
1facf9fc 5360+ aufs_bindex_t bindex;
5361+ int err;
5362+
5363+ err = do_pri_file(-1, file);
5364+ if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb))
5365+ return;
5366+
5367+ finfo = au_fi(file);
5368+ if (!finfo)
5369+ return;
4a4d8108 5370+ if (finfo->fi_btop < 0)
1facf9fc 5371+ return;
4a4d8108
AM
5372+ fidir = finfo->fi_hdir;
5373+ if (!fidir)
5374+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
5375+ else
e49829fe
JR
5376+ for (bindex = finfo->fi_btop;
5377+ bindex >= 0 && bindex <= fidir->fd_bbot;
4a4d8108
AM
5378+ bindex++) {
5379+ hfile = fidir->fd_hfile + bindex;
5380+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
5381+ }
1facf9fc 5382+}
5383+
5384+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
5385+{
5386+ struct vfsmount *mnt;
5387+ struct super_block *sb;
5388+
5389+ if (!br || IS_ERR(br))
5390+ goto out;
5391+ mnt = br->br_mnt;
5392+ if (!mnt || IS_ERR(mnt))
5393+ goto out;
5394+ sb = mnt->mnt_sb;
5395+ if (!sb || IS_ERR(sb))
5396+ goto out;
5397+
5398+ dpri("s%d: {perm 0x%x, cnt %d, wbr %p}, "
b752ccd1 5399+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
1facf9fc 5400+ "xino %d\n",
5401+ bindex, br->br_perm, atomic_read(&br->br_count), br->br_wbr,
5402+ au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
b752ccd1 5403+ sb->s_flags, sb->s_count,
1facf9fc 5404+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
5405+ return 0;
5406+
4f0767ce 5407+out:
1facf9fc 5408+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
5409+ return -1;
5410+}
5411+
5412+void au_dpri_sb(struct super_block *sb)
5413+{
5414+ struct au_sbinfo *sbinfo;
5415+ aufs_bindex_t bindex;
5416+ int err;
5417+ /* to reuduce stack size */
5418+ struct {
5419+ struct vfsmount mnt;
5420+ struct au_branch fake;
5421+ } *a;
5422+
5423+ /* this function can be called from magic sysrq */
5424+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
5425+ if (unlikely(!a)) {
5426+ dpri("no memory\n");
5427+ return;
5428+ }
5429+
5430+ a->mnt.mnt_sb = sb;
5431+ a->fake.br_perm = 0;
5432+ a->fake.br_mnt = &a->mnt;
5433+ a->fake.br_xino.xi_file = NULL;
5434+ atomic_set(&a->fake.br_count, 0);
5435+ smp_mb(); /* atomic_set */
5436+ err = do_pri_br(-1, &a->fake);
5437+ kfree(a);
5438+ dpri("dev 0x%x\n", sb->s_dev);
5439+ if (err || !au_test_aufs(sb))
5440+ return;
5441+
5442+ sbinfo = au_sbi(sb);
5443+ if (!sbinfo)
5444+ return;
5445+ dpri("nw %d, gen %u, kobj %d\n",
5446+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
5447+ atomic_read(&sbinfo->si_kobj.kref.refcount));
5448+ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
5449+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
5450+}
5451+
5452+/* ---------------------------------------------------------------------- */
5453+
5454+void au_dbg_sleep_jiffy(int jiffy)
5455+{
5456+ while (jiffy)
5457+ jiffy = schedule_timeout_uninterruptible(jiffy);
5458+}
5459+
5460+void au_dbg_iattr(struct iattr *ia)
5461+{
5462+#define AuBit(name) if (ia->ia_valid & ATTR_ ## name) \
5463+ dpri(#name "\n")
5464+ AuBit(MODE);
5465+ AuBit(UID);
5466+ AuBit(GID);
5467+ AuBit(SIZE);
5468+ AuBit(ATIME);
5469+ AuBit(MTIME);
5470+ AuBit(CTIME);
5471+ AuBit(ATIME_SET);
5472+ AuBit(MTIME_SET);
5473+ AuBit(FORCE);
5474+ AuBit(ATTR_FLAG);
5475+ AuBit(KILL_SUID);
5476+ AuBit(KILL_SGID);
5477+ AuBit(FILE);
5478+ AuBit(KILL_PRIV);
5479+ AuBit(OPEN);
5480+ AuBit(TIMES_SET);
5481+#undef AuBit
5482+ dpri("ia_file %p\n", ia->ia_file);
5483+}
5484+
5485+/* ---------------------------------------------------------------------- */
5486+
027c5e7a
AM
5487+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
5488+{
5489+ struct inode *h_inode, *inode = dentry->d_inode;
5490+ struct dentry *h_dentry;
5491+ aufs_bindex_t bindex, bend, bi;
5492+
5493+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
5494+ return;
5495+
5496+ bend = au_dbend(dentry);
5497+ bi = au_ibend(inode);
5498+ if (bi < bend)
5499+ bend = bi;
5500+ bindex = au_dbstart(dentry);
5501+ bi = au_ibstart(inode);
5502+ if (bi > bindex)
5503+ bindex = bi;
5504+
5505+ for (; bindex <= bend; bindex++) {
5506+ h_dentry = au_h_dptr(dentry, bindex);
5507+ if (!h_dentry)
5508+ continue;
5509+ h_inode = au_h_iptr(inode, bindex);
5510+ if (unlikely(h_inode != h_dentry->d_inode)) {
5511+ int old = au_debug_test();
5512+ if (!old)
5513+ au_debug(1);
5514+ AuDbg("b%d, %s:%d\n", bindex, func, line);
5515+ AuDbgDentry(dentry);
5516+ AuDbgInode(inode);
5517+ if (!old)
5518+ au_debug(0);
5519+ BUG();
5520+ }
5521+ }
5522+}
5523+
1facf9fc 5524+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen)
5525+{
5526+ struct dentry *parent;
5527+
5528+ parent = dget_parent(dentry);
027c5e7a
AM
5529+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
5530+ AuDebugOn(IS_ROOT(dentry));
5531+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 5532+ dput(parent);
5533+}
5534+
5535+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen)
5536+{
5537+ struct dentry *parent;
027c5e7a 5538+ struct inode *inode;
1facf9fc 5539+
5540+ parent = dget_parent(dentry);
027c5e7a
AM
5541+ inode = dentry->d_inode;
5542+ AuDebugOn(inode && S_ISDIR(dentry->d_inode->i_mode));
5543+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 5544+ dput(parent);
5545+}
5546+
5547+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
5548+{
5549+ int err, i, j;
5550+ struct au_dcsub_pages dpages;
5551+ struct au_dpage *dpage;
5552+ struct dentry **dentries;
5553+
5554+ err = au_dpages_init(&dpages, GFP_NOFS);
5555+ AuDebugOn(err);
027c5e7a 5556+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
1facf9fc 5557+ AuDebugOn(err);
5558+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
5559+ dpage = dpages.dpages + i;
5560+ dentries = dpage->dentries;
5561+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
027c5e7a 5562+ AuDebugOn(au_digen_test(dentries[j], sigen));
1facf9fc 5563+ }
5564+ au_dpages_free(&dpages);
5565+}
5566+
1facf9fc 5567+void au_dbg_verify_kthread(void)
5568+{
53392da6 5569+ if (au_wkq_test()) {
1facf9fc 5570+ au_dbg_blocked();
7f207e10 5571+ WARN_ON(1);
1facf9fc 5572+ }
5573+}
5574+
5575+/* ---------------------------------------------------------------------- */
5576+
5577+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused)
5578+{
5579+#ifdef AuForceNoPlink
5580+ au_opt_clr(sbinfo->si_mntflags, PLINK);
5581+#endif
5582+#ifdef AuForceNoXino
5583+ au_opt_clr(sbinfo->si_mntflags, XINO);
5584+#endif
5585+#ifdef AuForceNoRefrof
5586+ au_opt_clr(sbinfo->si_mntflags, REFROF);
5587+#endif
4a4d8108
AM
5588+#ifdef AuForceHnotify
5589+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_HNOTIFY);
1facf9fc 5590+#endif
1308ab2a 5591+#ifdef AuForceRd0
5592+ sbinfo->si_rdblk = 0;
5593+ sbinfo->si_rdhash = 0;
5594+#endif
1facf9fc 5595+}
5596+
5597+int __init au_debug_init(void)
5598+{
5599+ aufs_bindex_t bindex;
5600+ struct au_vdir_destr destr;
5601+
5602+ bindex = -1;
5603+ AuDebugOn(bindex >= 0);
5604+
5605+ destr.len = -1;
5606+ AuDebugOn(destr.len < NAME_MAX);
5607+
5608+#ifdef CONFIG_4KSTACKS
4a4d8108 5609+ pr_warning("CONFIG_4KSTACKS is defined.\n");
1facf9fc 5610+#endif
5611+
5612+#ifdef AuForceNoBrs
5613+ sysaufs_brs = 0;
5614+#endif
5615+
5616+ return 0;
5617+}
7f207e10
AM
5618diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
5619--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
53392da6 5620+++ linux/fs/aufs/debug.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 5621@@ -0,0 +1,252 @@
1facf9fc 5622+/*
027c5e7a 5623+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5624+ *
5625+ * This program, aufs is free software; you can redistribute it and/or modify
5626+ * it under the terms of the GNU General Public License as published by
5627+ * the Free Software Foundation; either version 2 of the License, or
5628+ * (at your option) any later version.
dece6358
AM
5629+ *
5630+ * This program is distributed in the hope that it will be useful,
5631+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5632+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5633+ * GNU General Public License for more details.
5634+ *
5635+ * You should have received a copy of the GNU General Public License
5636+ * along with this program; if not, write to the Free Software
5637+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5638+ */
5639+
5640+/*
5641+ * debug print functions
5642+ */
5643+
5644+#ifndef __AUFS_DEBUG_H__
5645+#define __AUFS_DEBUG_H__
5646+
5647+#ifdef __KERNEL__
5648+
1308ab2a 5649+#include <asm/system.h>
dece6358 5650+#include <linux/bug.h>
7f207e10 5651+/* #include <linux/err.h> */
1308ab2a 5652+#include <linux/init.h>
4a4d8108
AM
5653+#include <linux/module.h>
5654+#include <linux/kallsyms.h>
7f207e10 5655+/* #include <linux/kernel.h> */
1facf9fc 5656+#include <linux/delay.h>
7f207e10 5657+/* #include <linux/kd.h> */
1facf9fc 5658+#include <linux/sysrq.h>
5659+#include <linux/aufs_type.h>
5660+
4a4d8108
AM
5661+#include <asm/system.h>
5662+
1facf9fc 5663+#ifdef CONFIG_AUFS_DEBUG
5664+#define AuDebugOn(a) BUG_ON(a)
5665+
5666+/* module parameter */
5667+extern int aufs_debug;
5668+static inline void au_debug(int n)
5669+{
5670+ aufs_debug = n;
5671+ smp_mb();
5672+}
5673+
5674+static inline int au_debug_test(void)
5675+{
5676+ return aufs_debug;
5677+}
5678+#else
5679+#define AuDebugOn(a) do {} while (0)
4a4d8108
AM
5680+AuStubVoid(au_debug, int n)
5681+AuStubInt0(au_debug_test, void)
1facf9fc 5682+#endif /* CONFIG_AUFS_DEBUG */
5683+
5684+/* ---------------------------------------------------------------------- */
5685+
5686+/* debug print */
5687+
4a4d8108 5688+#define AuDbg(fmt, ...) do { \
1facf9fc 5689+ if (au_debug_test()) \
4a4d8108 5690+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
1facf9fc 5691+} while (0)
4a4d8108
AM
5692+#define AuLabel(l) AuDbg(#l "\n")
5693+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
5694+#define AuWarn1(fmt, ...) do { \
1facf9fc 5695+ static unsigned char _c; \
5696+ if (!_c++) \
4a4d8108 5697+ pr_warning(fmt, ##__VA_ARGS__); \
1facf9fc 5698+} while (0)
5699+
4a4d8108 5700+#define AuErr1(fmt, ...) do { \
1facf9fc 5701+ static unsigned char _c; \
5702+ if (!_c++) \
4a4d8108 5703+ pr_err(fmt, ##__VA_ARGS__); \
1facf9fc 5704+} while (0)
5705+
4a4d8108 5706+#define AuIOErr1(fmt, ...) do { \
1facf9fc 5707+ static unsigned char _c; \
5708+ if (!_c++) \
4a4d8108 5709+ AuIOErr(fmt, ##__VA_ARGS__); \
1facf9fc 5710+} while (0)
5711+
5712+#define AuUnsupportMsg "This operation is not supported." \
5713+ " Please report this application to aufs-users ML."
4a4d8108
AM
5714+#define AuUnsupport(fmt, ...) do { \
5715+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
1facf9fc 5716+ dump_stack(); \
5717+} while (0)
5718+
5719+#define AuTraceErr(e) do { \
5720+ if (unlikely((e) < 0)) \
5721+ AuDbg("err %d\n", (int)(e)); \
5722+} while (0)
5723+
5724+#define AuTraceErrPtr(p) do { \
5725+ if (IS_ERR(p)) \
5726+ AuDbg("err %ld\n", PTR_ERR(p)); \
5727+} while (0)
5728+
5729+/* dirty macros for debug print, use with "%.*s" and caution */
5730+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
5731+#define AuDLNPair(d) AuLNPair(&(d)->d_name)
5732+
5733+/* ---------------------------------------------------------------------- */
5734+
5735+struct au_sbinfo;
5736+struct au_finfo;
dece6358 5737+struct dentry;
1facf9fc 5738+#ifdef CONFIG_AUFS_DEBUG
5739+extern char *au_plevel;
5740+struct au_nhash;
5741+void au_dpri_whlist(struct au_nhash *whlist);
5742+struct au_vdir;
5743+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 5744+struct inode;
1facf9fc 5745+void au_dpri_inode(struct inode *inode);
2cbb1c4b 5746+void au_dpri_dalias(struct inode *inode);
1facf9fc 5747+void au_dpri_dentry(struct dentry *dentry);
dece6358 5748+struct file;
1facf9fc 5749+void au_dpri_file(struct file *filp);
dece6358 5750+struct super_block;
1facf9fc 5751+void au_dpri_sb(struct super_block *sb);
5752+
5753+void au_dbg_sleep_jiffy(int jiffy);
dece6358 5754+struct iattr;
1facf9fc 5755+void au_dbg_iattr(struct iattr *ia);
5756+
027c5e7a
AM
5757+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
5758+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
1facf9fc 5759+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen);
5760+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen);
5761+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
1facf9fc 5762+void au_dbg_verify_kthread(void);
5763+
5764+int __init au_debug_init(void);
5765+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo);
5766+#define AuDbgWhlist(w) do { \
5767+ AuDbg(#w "\n"); \
5768+ au_dpri_whlist(w); \
5769+} while (0)
5770+
5771+#define AuDbgVdir(v) do { \
5772+ AuDbg(#v "\n"); \
5773+ au_dpri_vdir(v); \
5774+} while (0)
5775+
5776+#define AuDbgInode(i) do { \
5777+ AuDbg(#i "\n"); \
5778+ au_dpri_inode(i); \
5779+} while (0)
5780+
2cbb1c4b
JR
5781+#define AuDbgDAlias(i) do { \
5782+ AuDbg(#i "\n"); \
5783+ au_dpri_dalias(i); \
5784+} while (0)
5785+
1facf9fc 5786+#define AuDbgDentry(d) do { \
5787+ AuDbg(#d "\n"); \
5788+ au_dpri_dentry(d); \
5789+} while (0)
5790+
5791+#define AuDbgFile(f) do { \
5792+ AuDbg(#f "\n"); \
5793+ au_dpri_file(f); \
5794+} while (0)
5795+
5796+#define AuDbgSb(sb) do { \
5797+ AuDbg(#sb "\n"); \
5798+ au_dpri_sb(sb); \
5799+} while (0)
5800+
5801+#define AuDbgSleep(sec) do { \
5802+ AuDbg("sleep %d sec\n", sec); \
5803+ ssleep(sec); \
5804+} while (0)
5805+
5806+#define AuDbgSleepJiffy(jiffy) do { \
5807+ AuDbg("sleep %d jiffies\n", jiffy); \
5808+ au_dbg_sleep_jiffy(jiffy); \
5809+} while (0)
5810+
5811+#define AuDbgIAttr(ia) do { \
5812+ AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \
5813+ au_dbg_iattr(ia); \
5814+} while (0)
4a4d8108
AM
5815+
5816+#define AuDbgSym(addr) do { \
5817+ char sym[KSYM_SYMBOL_LEN]; \
5818+ sprint_symbol(sym, (unsigned long)addr); \
5819+ AuDbg("%s\n", sym); \
5820+} while (0)
5821+
5822+#define AuInfoSym(addr) do { \
5823+ char sym[KSYM_SYMBOL_LEN]; \
5824+ sprint_symbol(sym, (unsigned long)addr); \
5825+ AuInfo("%s\n", sym); \
5826+} while (0)
1facf9fc 5827+#else
027c5e7a 5828+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
4a4d8108
AM
5829+AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen)
5830+AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry,
5831+ unsigned int sigen)
5832+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
5833+AuStubVoid(au_dbg_verify_kthread, void)
5834+AuStubInt0(__init au_debug_init, void)
5835+AuStubVoid(au_debug_sbinfo_init, struct au_sbinfo *sbinfo)
1facf9fc 5836+
1facf9fc 5837+#define AuDbgWhlist(w) do {} while (0)
5838+#define AuDbgVdir(v) do {} while (0)
5839+#define AuDbgInode(i) do {} while (0)
2cbb1c4b 5840+#define AuDbgDAlias(i) do {} while (0)
1facf9fc 5841+#define AuDbgDentry(d) do {} while (0)
5842+#define AuDbgFile(f) do {} while (0)
5843+#define AuDbgSb(sb) do {} while (0)
5844+#define AuDbgSleep(sec) do {} while (0)
5845+#define AuDbgSleepJiffy(jiffy) do {} while (0)
5846+#define AuDbgIAttr(ia) do {} while (0)
4a4d8108
AM
5847+#define AuDbgSym(addr) do {} while (0)
5848+#define AuInfoSym(addr) do {} while (0)
1facf9fc 5849+#endif /* CONFIG_AUFS_DEBUG */
5850+
5851+/* ---------------------------------------------------------------------- */
5852+
5853+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
5854+int __init au_sysrq_init(void);
5855+void au_sysrq_fin(void);
5856+
5857+#ifdef CONFIG_HW_CONSOLE
5858+#define au_dbg_blocked() do { \
5859+ WARN_ON(1); \
0c5527e5 5860+ handle_sysrq('w'); \
1facf9fc 5861+} while (0)
5862+#else
4a4d8108 5863+AuStubVoid(au_dbg_blocked, void)
1facf9fc 5864+#endif
5865+
5866+#else
4a4d8108
AM
5867+AuStubInt0(__init au_sysrq_init, void)
5868+AuStubVoid(au_sysrq_fin, void)
5869+AuStubVoid(au_dbg_blocked, void)
1facf9fc 5870+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
5871+
5872+#endif /* __KERNEL__ */
5873+#endif /* __AUFS_DEBUG_H__ */
7f207e10
AM
5874diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
5875--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
53392da6 5876+++ linux/fs/aufs/dentry.c 2011-08-24 13:30:24.731313534 +0200
027c5e7a 5877@@ -0,0 +1,1140 @@
1facf9fc 5878+/*
027c5e7a 5879+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 5880+ *
5881+ * This program, aufs is free software; you can redistribute it and/or modify
5882+ * it under the terms of the GNU General Public License as published by
5883+ * the Free Software Foundation; either version 2 of the License, or
5884+ * (at your option) any later version.
dece6358
AM
5885+ *
5886+ * This program is distributed in the hope that it will be useful,
5887+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5888+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5889+ * GNU General Public License for more details.
5890+ *
5891+ * You should have received a copy of the GNU General Public License
5892+ * along with this program; if not, write to the Free Software
5893+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5894+ */
5895+
5896+/*
5897+ * lookup and dentry operations
5898+ */
5899+
dece6358 5900+#include <linux/namei.h>
1facf9fc 5901+#include "aufs.h"
5902+
5903+static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
5904+{
5905+ if (nd) {
5906+ *h_nd = *nd;
5907+
5908+ /*
5909+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
5910+ * due to whiteout and branch permission.
5911+ */
5912+ h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
b752ccd1 5913+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
1facf9fc 5914+ /* unnecessary? */
5915+ h_nd->intent.open.file = NULL;
5916+ } else
5917+ memset(h_nd, 0, sizeof(*h_nd));
5918+}
5919+
5920+struct au_lkup_one_args {
5921+ struct dentry **errp;
5922+ struct qstr *name;
5923+ struct dentry *h_parent;
5924+ struct au_branch *br;
5925+ struct nameidata *nd;
5926+};
5927+
5928+struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
5929+ struct au_branch *br, struct nameidata *nd)
5930+{
5931+ struct dentry *h_dentry;
5932+ int err;
5933+ struct nameidata h_nd;
5934+
5935+ if (au_test_fs_null_nd(h_parent->d_sb))
5936+ return vfsub_lookup_one_len(name->name, h_parent, name->len);
5937+
5938+ au_h_nd(&h_nd, nd);
5939+ h_nd.path.dentry = h_parent;
5940+ h_nd.path.mnt = br->br_mnt;
5941+
2cbb1c4b 5942+ err = vfsub_name_hash(name->name, &h_nd.last, name->len);
1facf9fc 5943+ h_dentry = ERR_PTR(err);
5944+ if (!err) {
5945+ path_get(&h_nd.path);
5946+ h_dentry = vfsub_lookup_hash(&h_nd);
5947+ path_put(&h_nd.path);
5948+ }
5949+
4a4d8108 5950+ AuTraceErrPtr(h_dentry);
1facf9fc 5951+ return h_dentry;
5952+}
5953+
5954+static void au_call_lkup_one(void *args)
5955+{
5956+ struct au_lkup_one_args *a = args;
5957+ *a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
5958+}
5959+
5960+#define AuLkup_ALLOW_NEG 1
5961+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
7f207e10
AM
5962+#define au_fset_lkup(flags, name) \
5963+ do { (flags) |= AuLkup_##name; } while (0)
5964+#define au_fclr_lkup(flags, name) \
5965+ do { (flags) &= ~AuLkup_##name; } while (0)
1facf9fc 5966+
5967+struct au_do_lookup_args {
5968+ unsigned int flags;
5969+ mode_t type;
5970+ struct nameidata *nd;
5971+};
5972+
5973+/*
5974+ * returns positive/negative dentry, NULL or an error.
5975+ * NULL means whiteout-ed or not-found.
5976+ */
5977+static struct dentry*
5978+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
5979+ aufs_bindex_t bindex, struct qstr *wh_name,
5980+ struct au_do_lookup_args *args)
5981+{
5982+ struct dentry *h_dentry;
5983+ struct inode *h_inode, *inode;
1facf9fc 5984+ struct au_branch *br;
5985+ int wh_found, opq;
5986+ unsigned char wh_able;
5987+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
5988+
1facf9fc 5989+ wh_found = 0;
5990+ br = au_sbr(dentry->d_sb, bindex);
5991+ wh_able = !!au_br_whable(br->br_perm);
5992+ if (wh_able)
5993+ wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
5994+ h_dentry = ERR_PTR(wh_found);
5995+ if (!wh_found)
5996+ goto real_lookup;
5997+ if (unlikely(wh_found < 0))
5998+ goto out;
5999+
6000+ /* We found a whiteout */
6001+ /* au_set_dbend(dentry, bindex); */
6002+ au_set_dbwh(dentry, bindex);
6003+ if (!allow_neg)
6004+ return NULL; /* success */
6005+
4f0767ce 6006+real_lookup:
4a4d8108 6007+ h_dentry = au_lkup_one(&dentry->d_name, h_parent, br, args->nd);
1facf9fc 6008+ if (IS_ERR(h_dentry))
6009+ goto out;
6010+
6011+ h_inode = h_dentry->d_inode;
6012+ if (!h_inode) {
6013+ if (!allow_neg)
6014+ goto out_neg;
6015+ } else if (wh_found
6016+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
6017+ goto out_neg;
6018+
6019+ if (au_dbend(dentry) <= bindex)
6020+ au_set_dbend(dentry, bindex);
6021+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
6022+ au_set_dbstart(dentry, bindex);
6023+ au_set_h_dptr(dentry, bindex, h_dentry);
6024+
6025+ inode = dentry->d_inode;
6026+ if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
6027+ || (inode && !S_ISDIR(inode->i_mode)))
6028+ goto out; /* success */
6029+
6030+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
6031+ opq = au_diropq_test(h_dentry, br);
6032+ mutex_unlock(&h_inode->i_mutex);
6033+ if (opq > 0)
6034+ au_set_dbdiropq(dentry, bindex);
6035+ else if (unlikely(opq < 0)) {
6036+ au_set_h_dptr(dentry, bindex, NULL);
6037+ h_dentry = ERR_PTR(opq);
6038+ }
6039+ goto out;
6040+
4f0767ce 6041+out_neg:
1facf9fc 6042+ dput(h_dentry);
6043+ h_dentry = NULL;
4f0767ce 6044+out:
1facf9fc 6045+ return h_dentry;
6046+}
6047+
dece6358
AM
6048+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
6049+{
6050+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
6051+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
6052+ return -EPERM;
6053+ return 0;
6054+}
6055+
1facf9fc 6056+/*
6057+ * returns the number of lower positive dentries,
6058+ * otherwise an error.
6059+ * can be called at unlinking with @type is zero.
6060+ */
6061+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
6062+ struct nameidata *nd)
6063+{
6064+ int npositive, err;
6065+ aufs_bindex_t bindex, btail, bdiropq;
6066+ unsigned char isdir;
6067+ struct qstr whname;
6068+ struct au_do_lookup_args args = {
6069+ .flags = 0,
6070+ .type = type,
6071+ .nd = nd
6072+ };
6073+ const struct qstr *name = &dentry->d_name;
6074+ struct dentry *parent;
6075+ struct inode *inode;
6076+
dece6358
AM
6077+ err = au_test_shwh(dentry->d_sb, name);
6078+ if (unlikely(err))
1facf9fc 6079+ goto out;
6080+
6081+ err = au_wh_name_alloc(&whname, name);
6082+ if (unlikely(err))
6083+ goto out;
6084+
6085+ inode = dentry->d_inode;
6086+ isdir = !!(inode && S_ISDIR(inode->i_mode));
6087+ if (!type)
6088+ au_fset_lkup(args.flags, ALLOW_NEG);
6089+
6090+ npositive = 0;
4a4d8108 6091+ parent = dget_parent(dentry);
1facf9fc 6092+ btail = au_dbtaildir(parent);
6093+ for (bindex = bstart; bindex <= btail; bindex++) {
6094+ struct dentry *h_parent, *h_dentry;
6095+ struct inode *h_inode, *h_dir;
6096+
6097+ h_dentry = au_h_dptr(dentry, bindex);
6098+ if (h_dentry) {
6099+ if (h_dentry->d_inode)
6100+ npositive++;
6101+ if (type != S_IFDIR)
6102+ break;
6103+ continue;
6104+ }
6105+ h_parent = au_h_dptr(parent, bindex);
6106+ if (!h_parent)
6107+ continue;
6108+ h_dir = h_parent->d_inode;
6109+ if (!h_dir || !S_ISDIR(h_dir->i_mode))
6110+ continue;
6111+
6112+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
6113+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
6114+ &args);
6115+ mutex_unlock(&h_dir->i_mutex);
6116+ err = PTR_ERR(h_dentry);
6117+ if (IS_ERR(h_dentry))
4a4d8108 6118+ goto out_parent;
1facf9fc 6119+ au_fclr_lkup(args.flags, ALLOW_NEG);
6120+
6121+ if (au_dbwh(dentry) >= 0)
6122+ break;
6123+ if (!h_dentry)
6124+ continue;
6125+ h_inode = h_dentry->d_inode;
6126+ if (!h_inode)
6127+ continue;
6128+ npositive++;
6129+ if (!args.type)
6130+ args.type = h_inode->i_mode & S_IFMT;
6131+ if (args.type != S_IFDIR)
6132+ break;
6133+ else if (isdir) {
6134+ /* the type of lower may be different */
6135+ bdiropq = au_dbdiropq(dentry);
6136+ if (bdiropq >= 0 && bdiropq <= bindex)
6137+ break;
6138+ }
6139+ }
6140+
6141+ if (npositive) {
6142+ AuLabel(positive);
6143+ au_update_dbstart(dentry);
6144+ }
6145+ err = npositive;
6146+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
027c5e7a 6147+ && au_dbstart(dentry) < 0)) {
1facf9fc 6148+ err = -EIO;
027c5e7a
AM
6149+ AuIOErr("both of real entry and whiteout found, %.*s, err %d\n",
6150+ AuDLNPair(dentry), err);
6151+ }
1facf9fc 6152+
4f0767ce 6153+out_parent:
4a4d8108 6154+ dput(parent);
1facf9fc 6155+ kfree(whname.name);
4f0767ce 6156+out:
1facf9fc 6157+ return err;
6158+}
6159+
6160+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
6161+ struct au_branch *br)
6162+{
6163+ struct dentry *dentry;
6164+ int wkq_err;
6165+
6166+ if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
6167+ dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
6168+ else {
6169+ struct au_lkup_one_args args = {
6170+ .errp = &dentry,
6171+ .name = name,
6172+ .h_parent = parent,
6173+ .br = br,
6174+ .nd = NULL
6175+ };
6176+
6177+ wkq_err = au_wkq_wait(au_call_lkup_one, &args);
6178+ if (unlikely(wkq_err))
6179+ dentry = ERR_PTR(wkq_err);
6180+ }
6181+
6182+ return dentry;
6183+}
6184+
6185+/*
6186+ * lookup @dentry on @bindex which should be negative.
6187+ */
6188+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex)
6189+{
6190+ int err;
6191+ struct dentry *parent, *h_parent, *h_dentry;
1facf9fc 6192+
1facf9fc 6193+ parent = dget_parent(dentry);
6194+ h_parent = au_h_dptr(parent, bindex);
4a4d8108 6195+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent,
1facf9fc 6196+ au_sbr(dentry->d_sb, bindex));
6197+ err = PTR_ERR(h_dentry);
6198+ if (IS_ERR(h_dentry))
6199+ goto out;
6200+ if (unlikely(h_dentry->d_inode)) {
6201+ err = -EIO;
027c5e7a
AM
6202+ AuIOErr("%.*s should be negative on b%d.\n",
6203+ AuDLNPair(h_dentry), bindex);
1facf9fc 6204+ dput(h_dentry);
6205+ goto out;
6206+ }
6207+
4a4d8108 6208+ err = 0;
1facf9fc 6209+ if (bindex < au_dbstart(dentry))
6210+ au_set_dbstart(dentry, bindex);
6211+ if (au_dbend(dentry) < bindex)
6212+ au_set_dbend(dentry, bindex);
6213+ au_set_h_dptr(dentry, bindex, h_dentry);
1facf9fc 6214+
4f0767ce 6215+out:
1facf9fc 6216+ dput(parent);
6217+ return err;
6218+}
6219+
6220+/* ---------------------------------------------------------------------- */
6221+
6222+/* subset of struct inode */
6223+struct au_iattr {
6224+ unsigned long i_ino;
6225+ /* unsigned int i_nlink; */
6226+ uid_t i_uid;
6227+ gid_t i_gid;
6228+ u64 i_version;
6229+/*
6230+ loff_t i_size;
6231+ blkcnt_t i_blocks;
6232+*/
6233+ umode_t i_mode;
6234+};
6235+
6236+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
6237+{
6238+ ia->i_ino = h_inode->i_ino;
6239+ /* ia->i_nlink = h_inode->i_nlink; */
6240+ ia->i_uid = h_inode->i_uid;
6241+ ia->i_gid = h_inode->i_gid;
6242+ ia->i_version = h_inode->i_version;
6243+/*
6244+ ia->i_size = h_inode->i_size;
6245+ ia->i_blocks = h_inode->i_blocks;
6246+*/
6247+ ia->i_mode = (h_inode->i_mode & S_IFMT);
6248+}
6249+
6250+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
6251+{
6252+ return ia->i_ino != h_inode->i_ino
6253+ /* || ia->i_nlink != h_inode->i_nlink */
6254+ || ia->i_uid != h_inode->i_uid
6255+ || ia->i_gid != h_inode->i_gid
6256+ || ia->i_version != h_inode->i_version
6257+/*
6258+ || ia->i_size != h_inode->i_size
6259+ || ia->i_blocks != h_inode->i_blocks
6260+*/
6261+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
6262+}
6263+
6264+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
6265+ struct au_branch *br)
6266+{
6267+ int err;
6268+ struct au_iattr ia;
6269+ struct inode *h_inode;
6270+ struct dentry *h_d;
6271+ struct super_block *h_sb;
6272+
6273+ err = 0;
6274+ memset(&ia, -1, sizeof(ia));
6275+ h_sb = h_dentry->d_sb;
6276+ h_inode = h_dentry->d_inode;
6277+ if (h_inode)
6278+ au_iattr_save(&ia, h_inode);
6279+ else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
6280+ /* nfs d_revalidate may return 0 for negative dentry */
6281+ /* fuse d_revalidate always return 0 for negative dentry */
6282+ goto out;
6283+
6284+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
6285+ h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
6286+ err = PTR_ERR(h_d);
6287+ if (IS_ERR(h_d))
6288+ goto out;
6289+
6290+ err = 0;
6291+ if (unlikely(h_d != h_dentry
6292+ || h_d->d_inode != h_inode
6293+ || (h_inode && au_iattr_test(&ia, h_inode))))
6294+ err = au_busy_or_stale();
6295+ dput(h_d);
6296+
4f0767ce 6297+out:
1facf9fc 6298+ AuTraceErr(err);
6299+ return err;
6300+}
6301+
6302+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
6303+ struct dentry *h_parent, struct au_branch *br)
6304+{
6305+ int err;
6306+
6307+ err = 0;
027c5e7a
AM
6308+ if (udba == AuOpt_UDBA_REVAL
6309+ && !au_test_fs_remote(h_dentry->d_sb)) {
1facf9fc 6310+ IMustLock(h_dir);
6311+ err = (h_dentry->d_parent->d_inode != h_dir);
027c5e7a 6312+ } else if (udba != AuOpt_UDBA_NONE)
1facf9fc 6313+ err = au_h_verify_dentry(h_dentry, h_parent, br);
6314+
6315+ return err;
6316+}
6317+
6318+/* ---------------------------------------------------------------------- */
6319+
027c5e7a 6320+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
1facf9fc 6321+{
027c5e7a 6322+ int err;
1facf9fc 6323+ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
027c5e7a
AM
6324+ struct au_hdentry tmp, *p, *q;
6325+ struct au_dinfo *dinfo;
6326+ struct super_block *sb;
1facf9fc 6327+
027c5e7a 6328+ DiMustWriteLock(dentry);
1308ab2a 6329+
027c5e7a
AM
6330+ sb = dentry->d_sb;
6331+ dinfo = au_di(dentry);
1facf9fc 6332+ bend = dinfo->di_bend;
6333+ bwh = dinfo->di_bwh;
6334+ bdiropq = dinfo->di_bdiropq;
027c5e7a 6335+ p = dinfo->di_hdentry + dinfo->di_bstart;
1facf9fc 6336+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
027c5e7a 6337+ if (!p->hd_dentry)
1facf9fc 6338+ continue;
6339+
027c5e7a
AM
6340+ new_bindex = au_br_index(sb, p->hd_id);
6341+ if (new_bindex == bindex)
1facf9fc 6342+ continue;
1facf9fc 6343+
1facf9fc 6344+ if (dinfo->di_bwh == bindex)
6345+ bwh = new_bindex;
6346+ if (dinfo->di_bdiropq == bindex)
6347+ bdiropq = new_bindex;
6348+ if (new_bindex < 0) {
6349+ au_hdput(p);
6350+ p->hd_dentry = NULL;
6351+ continue;
6352+ }
6353+
6354+ /* swap two lower dentries, and loop again */
6355+ q = dinfo->di_hdentry + new_bindex;
6356+ tmp = *q;
6357+ *q = *p;
6358+ *p = tmp;
6359+ if (tmp.hd_dentry) {
6360+ bindex--;
6361+ p--;
6362+ }
6363+ }
6364+
1facf9fc 6365+ dinfo->di_bwh = -1;
6366+ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
6367+ dinfo->di_bwh = bwh;
6368+
6369+ dinfo->di_bdiropq = -1;
6370+ if (bdiropq >= 0
6371+ && bdiropq <= au_sbend(sb)
6372+ && au_sbr_whable(sb, bdiropq))
6373+ dinfo->di_bdiropq = bdiropq;
6374+
027c5e7a
AM
6375+ err = -EIO;
6376+ dinfo->di_bstart = -1;
6377+ dinfo->di_bend = -1;
1facf9fc 6378+ bend = au_dbend(parent);
6379+ p = dinfo->di_hdentry;
6380+ for (bindex = 0; bindex <= bend; bindex++, p++)
6381+ if (p->hd_dentry) {
6382+ dinfo->di_bstart = bindex;
6383+ break;
6384+ }
6385+
027c5e7a
AM
6386+ if (dinfo->di_bstart >= 0) {
6387+ p = dinfo->di_hdentry + bend;
6388+ for (bindex = bend; bindex >= 0; bindex--, p--)
6389+ if (p->hd_dentry) {
6390+ dinfo->di_bend = bindex;
6391+ err = 0;
6392+ break;
6393+ }
6394+ }
6395+
6396+ return err;
1facf9fc 6397+}
6398+
027c5e7a 6399+static void au_do_hide(struct dentry *dentry)
1facf9fc 6400+{
027c5e7a 6401+ struct inode *inode;
1facf9fc 6402+
027c5e7a
AM
6403+ inode = dentry->d_inode;
6404+ if (inode) {
6405+ if (!S_ISDIR(inode->i_mode)) {
6406+ if (inode->i_nlink && !d_unhashed(dentry))
6407+ drop_nlink(inode);
6408+ } else {
6409+ clear_nlink(inode);
6410+ /* stop next lookup */
6411+ inode->i_flags |= S_DEAD;
6412+ }
6413+ smp_mb(); /* necessary? */
6414+ }
6415+ d_drop(dentry);
6416+}
1308ab2a 6417+
027c5e7a
AM
6418+static int au_hide_children(struct dentry *parent)
6419+{
6420+ int err, i, j, ndentry;
6421+ struct au_dcsub_pages dpages;
6422+ struct au_dpage *dpage;
6423+ struct dentry *dentry;
1facf9fc 6424+
027c5e7a 6425+ err = au_dpages_init(&dpages, GFP_NOFS);
1facf9fc 6426+ if (unlikely(err))
6427+ goto out;
027c5e7a
AM
6428+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
6429+ if (unlikely(err))
6430+ goto out_dpages;
1facf9fc 6431+
027c5e7a
AM
6432+ /* in reverse order */
6433+ for (i = dpages.ndpage - 1; i >= 0; i--) {
6434+ dpage = dpages.dpages + i;
6435+ ndentry = dpage->ndentry;
6436+ for (j = ndentry - 1; j >= 0; j--) {
6437+ dentry = dpage->dentries[j];
6438+ if (dentry != parent)
6439+ au_do_hide(dentry);
6440+ }
6441+ }
1facf9fc 6442+
027c5e7a
AM
6443+out_dpages:
6444+ au_dpages_free(&dpages);
4f0767ce 6445+out:
027c5e7a 6446+ return err;
1facf9fc 6447+}
6448+
027c5e7a 6449+static void au_hide(struct dentry *dentry)
1facf9fc 6450+{
027c5e7a
AM
6451+ int err;
6452+ struct inode *inode;
1facf9fc 6453+
027c5e7a
AM
6454+ AuDbgDentry(dentry);
6455+ inode = dentry->d_inode;
6456+ if (inode && S_ISDIR(inode->i_mode)) {
6457+ /* shrink_dcache_parent(dentry); */
6458+ err = au_hide_children(dentry);
6459+ if (unlikely(err))
6460+ AuIOErr("%.*s, failed hiding children, ignored %d\n",
6461+ AuDLNPair(dentry), err);
6462+ }
6463+ au_do_hide(dentry);
6464+}
1facf9fc 6465+
027c5e7a
AM
6466+/*
6467+ * By adding a dirty branch, a cached dentry may be affected in various ways.
6468+ *
6469+ * a dirty branch is added
6470+ * - on the top of layers
6471+ * - in the middle of layers
6472+ * - to the bottom of layers
6473+ *
6474+ * on the added branch there exists
6475+ * - a whiteout
6476+ * - a diropq
6477+ * - a same named entry
6478+ * + exist
6479+ * * negative --> positive
6480+ * * positive --> positive
6481+ * - type is unchanged
6482+ * - type is changed
6483+ * + doesn't exist
6484+ * * negative --> negative
6485+ * * positive --> negative (rejected by au_br_del() for non-dir case)
6486+ * - none
6487+ */
6488+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
6489+ struct au_dinfo *tmp)
6490+{
6491+ int err;
6492+ aufs_bindex_t bindex, bend;
6493+ struct {
6494+ struct dentry *dentry;
6495+ struct inode *inode;
6496+ mode_t mode;
6497+ } orig_h, tmp_h;
6498+ struct au_hdentry *hd;
6499+ struct inode *inode, *h_inode;
6500+ struct dentry *h_dentry;
6501+
6502+ err = 0;
6503+ AuDebugOn(dinfo->di_bstart < 0);
6504+ orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
6505+ orig_h.inode = orig_h.dentry->d_inode;
6506+ orig_h.mode = 0;
6507+ if (orig_h.inode)
6508+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
6509+ memset(&tmp_h, 0, sizeof(tmp_h));
6510+ if (tmp->di_bstart >= 0) {
6511+ tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
6512+ tmp_h.inode = tmp_h.dentry->d_inode;
6513+ if (tmp_h.inode)
6514+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
6515+ }
6516+
6517+ inode = dentry->d_inode;
6518+ if (!orig_h.inode) {
6519+ AuDbg("nagative originally\n");
6520+ if (inode) {
6521+ au_hide(dentry);
6522+ goto out;
6523+ }
6524+ AuDebugOn(inode);
6525+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
6526+ AuDebugOn(dinfo->di_bdiropq != -1);
6527+
6528+ if (!tmp_h.inode) {
6529+ AuDbg("negative --> negative\n");
6530+ /* should have only one negative lower */
6531+ if (tmp->di_bstart >= 0
6532+ && tmp->di_bstart < dinfo->di_bstart) {
6533+ AuDebugOn(tmp->di_bstart != tmp->di_bend);
6534+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
6535+ au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
6536+ au_di_cp(dinfo, tmp);
6537+ hd = tmp->di_hdentry + tmp->di_bstart;
6538+ au_set_h_dptr(dentry, tmp->di_bstart,
6539+ dget(hd->hd_dentry));
6540+ }
6541+ au_dbg_verify_dinode(dentry);
6542+ } else {
6543+ AuDbg("negative --> positive\n");
6544+ /*
6545+ * similar to the behaviour of creating with bypassing
6546+ * aufs.
6547+ * unhash it in order to force an error in the
6548+ * succeeding create operation.
6549+ * we should not set S_DEAD here.
6550+ */
6551+ d_drop(dentry);
6552+ /* au_di_swap(tmp, dinfo); */
6553+ au_dbg_verify_dinode(dentry);
6554+ }
6555+ } else {
6556+ AuDbg("positive originally\n");
6557+ /* inode may be NULL */
6558+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
6559+ if (!tmp_h.inode) {
6560+ AuDbg("positive --> negative\n");
6561+ /* or bypassing aufs */
6562+ au_hide(dentry);
6563+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
6564+ dinfo->di_bwh = tmp->di_bwh;
6565+ if (inode)
6566+ err = au_refresh_hinode_self(inode);
6567+ au_dbg_verify_dinode(dentry);
6568+ } else if (orig_h.mode == tmp_h.mode) {
6569+ AuDbg("positive --> positive, same type\n");
6570+ if (!S_ISDIR(orig_h.mode)
6571+ && dinfo->di_bstart > tmp->di_bstart) {
6572+ /*
6573+ * similar to the behaviour of removing and
6574+ * creating.
6575+ */
6576+ au_hide(dentry);
6577+ if (inode)
6578+ err = au_refresh_hinode_self(inode);
6579+ au_dbg_verify_dinode(dentry);
6580+ } else {
6581+ /* fill empty slots */
6582+ if (dinfo->di_bstart > tmp->di_bstart)
6583+ dinfo->di_bstart = tmp->di_bstart;
6584+ if (dinfo->di_bend < tmp->di_bend)
6585+ dinfo->di_bend = tmp->di_bend;
6586+ dinfo->di_bwh = tmp->di_bwh;
6587+ dinfo->di_bdiropq = tmp->di_bdiropq;
6588+ hd = tmp->di_hdentry;
6589+ bend = dinfo->di_bend;
6590+ for (bindex = tmp->di_bstart; bindex <= bend;
6591+ bindex++) {
6592+ if (au_h_dptr(dentry, bindex))
6593+ continue;
6594+ h_dentry = hd[bindex].hd_dentry;
6595+ if (!h_dentry)
6596+ continue;
6597+ h_inode = h_dentry->d_inode;
6598+ AuDebugOn(!h_inode);
6599+ AuDebugOn(orig_h.mode
6600+ != (h_inode->i_mode
6601+ & S_IFMT));
6602+ au_set_h_dptr(dentry, bindex,
6603+ dget(h_dentry));
6604+ }
6605+ err = au_refresh_hinode(inode, dentry);
6606+ au_dbg_verify_dinode(dentry);
6607+ }
6608+ } else {
6609+ AuDbg("positive --> positive, different type\n");
6610+ /* similar to the behaviour of removing and creating */
6611+ au_hide(dentry);
6612+ if (inode)
6613+ err = au_refresh_hinode_self(inode);
6614+ au_dbg_verify_dinode(dentry);
6615+ }
6616+ }
6617+
6618+out:
6619+ return err;
6620+}
6621+
6622+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
6623+{
6624+ int err, ebrange;
6625+ unsigned int sigen;
6626+ struct au_dinfo *dinfo, *tmp;
6627+ struct super_block *sb;
6628+ struct inode *inode;
6629+
6630+ DiMustWriteLock(dentry);
6631+ AuDebugOn(IS_ROOT(dentry));
6632+ AuDebugOn(!parent->d_inode);
6633+
6634+ sb = dentry->d_sb;
6635+ inode = dentry->d_inode;
6636+ sigen = au_sigen(sb);
6637+ err = au_digen_test(parent, sigen);
6638+ if (unlikely(err))
6639+ goto out;
6640+
6641+ dinfo = au_di(dentry);
6642+ err = au_di_realloc(dinfo, au_sbend(sb) + 1);
6643+ if (unlikely(err))
6644+ goto out;
6645+ ebrange = au_dbrange_test(dentry);
6646+ if (!ebrange)
6647+ ebrange = au_do_refresh_hdentry(dentry, parent);
6648+
6649+ if (d_unhashed(dentry) || ebrange) {
6650+ AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
6651+ if (inode)
6652+ err = au_refresh_hinode_self(inode);
6653+ au_dbg_verify_dinode(dentry);
6654+ if (!err)
6655+ goto out_dgen; /* success */
6656+ goto out;
6657+ }
6658+
6659+ /* temporary dinfo */
6660+ AuDbgDentry(dentry);
6661+ err = -ENOMEM;
6662+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
6663+ if (unlikely(!tmp))
6664+ goto out;
6665+ au_di_swap(tmp, dinfo);
6666+ /* returns the number of positive dentries */
6667+ /*
6668+ * if current working dir is removed, it returns an error.
6669+ * but the dentry is legal.
6670+ */
6671+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0, /*nd*/NULL);
6672+ AuDbgDentry(dentry);
6673+ au_di_swap(tmp, dinfo);
6674+ if (err == -ENOENT)
6675+ err = 0;
6676+ if (err >= 0) {
6677+ /* compare/refresh by dinfo */
6678+ AuDbgDentry(dentry);
6679+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
6680+ au_dbg_verify_dinode(dentry);
6681+ AuTraceErr(err);
6682+ }
6683+ au_rw_write_unlock(&tmp->di_rwsem);
6684+ au_di_free(tmp);
6685+ if (unlikely(err))
6686+ goto out;
6687+
6688+out_dgen:
6689+ au_update_digen(dentry);
6690+out:
6691+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
6692+ AuIOErr("failed refreshing %.*s, %d\n",
6693+ AuDLNPair(dentry), err);
6694+ AuDbgDentry(dentry);
6695+ }
6696+ AuTraceErr(err);
6697+ return err;
6698+}
6699+
6700+static noinline_for_stack
6701+int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
6702+ struct dentry *dentry, aufs_bindex_t bindex)
6703+{
6704+ int err, valid;
6705+ int (*reval)(struct dentry *, struct nameidata *);
6706+
6707+ err = 0;
6708+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
6709+ goto out;
6710+ reval = h_dentry->d_op->d_revalidate;
6711+
6712+ AuDbg("b%d\n", bindex);
6713+ if (au_test_fs_null_nd(h_dentry->d_sb))
6714+ /* it may return tri-state */
6715+ valid = reval(h_dentry, NULL);
6716+ else {
6717+ struct nameidata h_nd;
6718+ int locked;
1facf9fc 6719+ struct dentry *parent;
6720+
6721+ au_h_nd(&h_nd, nd);
6722+ parent = nd->path.dentry;
6723+ locked = (nd && nd->path.dentry != dentry);
6724+ if (locked)
6725+ di_read_lock_parent(parent, AuLock_IR);
6726+ BUG_ON(bindex > au_dbend(parent));
6727+ h_nd.path.dentry = au_h_dptr(parent, bindex);
6728+ BUG_ON(!h_nd.path.dentry);
6729+ h_nd.path.mnt = au_sbr(parent->d_sb, bindex)->br_mnt;
6730+ path_get(&h_nd.path);
6731+ valid = reval(h_dentry, &h_nd);
6732+ path_put(&h_nd.path);
6733+ if (locked)
6734+ di_read_unlock(parent, AuLock_IR);
6735+ }
6736+
6737+ if (unlikely(valid < 0))
6738+ err = valid;
6739+ else if (!valid)
6740+ err = -EINVAL;
6741+
4f0767ce 6742+out:
1facf9fc 6743+ AuTraceErr(err);
6744+ return err;
6745+}
6746+
6747+/* todo: remove this */
6748+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
6749+ struct nameidata *nd, int do_udba)
6750+{
6751+ int err;
6752+ umode_t mode, h_mode;
6753+ aufs_bindex_t bindex, btail, bstart, ibs, ibe;
6754+ unsigned char plus, unhashed, is_root, h_plus;
4a4d8108 6755+ struct inode *h_inode, *h_cached_inode;
1facf9fc 6756+ struct dentry *h_dentry;
6757+ struct qstr *name, *h_name;
6758+
6759+ err = 0;
6760+ plus = 0;
6761+ mode = 0;
1facf9fc 6762+ ibs = -1;
6763+ ibe = -1;
6764+ unhashed = !!d_unhashed(dentry);
6765+ is_root = !!IS_ROOT(dentry);
6766+ name = &dentry->d_name;
6767+
6768+ /*
7f207e10
AM
6769+ * Theoretically, REVAL test should be unnecessary in case of
6770+ * {FS,I}NOTIFY.
6771+ * But {fs,i}notify doesn't fire some necessary events,
1facf9fc 6772+ * IN_ATTRIB for atime/nlink/pageio
6773+ * IN_DELETE for NFS dentry
6774+ * Let's do REVAL test too.
6775+ */
6776+ if (do_udba && inode) {
6777+ mode = (inode->i_mode & S_IFMT);
6778+ plus = (inode->i_nlink > 0);
1facf9fc 6779+ ibs = au_ibstart(inode);
6780+ ibe = au_ibend(inode);
6781+ }
6782+
6783+ bstart = au_dbstart(dentry);
6784+ btail = bstart;
6785+ if (inode && S_ISDIR(inode->i_mode))
6786+ btail = au_dbtaildir(dentry);
6787+ for (bindex = bstart; bindex <= btail; bindex++) {
6788+ h_dentry = au_h_dptr(dentry, bindex);
6789+ if (!h_dentry)
6790+ continue;
6791+
6792+ AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
027c5e7a 6793+ spin_lock(&h_dentry->d_lock);
1facf9fc 6794+ h_name = &h_dentry->d_name;
6795+ if (unlikely(do_udba
6796+ && !is_root
6797+ && (unhashed != !!d_unhashed(h_dentry)
6798+ || name->len != h_name->len
6799+ || memcmp(name->name, h_name->name, name->len))
6800+ )) {
6801+ AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
6802+ unhashed, d_unhashed(h_dentry),
6803+ AuDLNPair(dentry), AuDLNPair(h_dentry));
027c5e7a 6804+ spin_unlock(&h_dentry->d_lock);
1facf9fc 6805+ goto err;
6806+ }
027c5e7a 6807+ spin_unlock(&h_dentry->d_lock);
1facf9fc 6808+
6809+ err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
6810+ if (unlikely(err))
6811+ /* do not goto err, to keep the errno */
6812+ break;
6813+
6814+ /* todo: plink too? */
6815+ if (!do_udba)
6816+ continue;
6817+
6818+ /* UDBA tests */
6819+ h_inode = h_dentry->d_inode;
6820+ if (unlikely(!!inode != !!h_inode))
6821+ goto err;
6822+
6823+ h_plus = plus;
6824+ h_mode = mode;
6825+ h_cached_inode = h_inode;
6826+ if (h_inode) {
6827+ h_mode = (h_inode->i_mode & S_IFMT);
6828+ h_plus = (h_inode->i_nlink > 0);
6829+ }
6830+ if (inode && ibs <= bindex && bindex <= ibe)
6831+ h_cached_inode = au_h_iptr(inode, bindex);
6832+
6833+ if (unlikely(plus != h_plus
6834+ || mode != h_mode
6835+ || h_cached_inode != h_inode))
6836+ goto err;
6837+ continue;
6838+
6839+ err:
6840+ err = -EINVAL;
6841+ break;
6842+ }
6843+
6844+ return err;
6845+}
6846+
027c5e7a 6847+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
1facf9fc 6848+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
6849+{
6850+ int err;
6851+ struct dentry *parent;
1facf9fc 6852+
027c5e7a 6853+ if (!au_digen_test(dentry, sigen))
1facf9fc 6854+ return 0;
6855+
6856+ parent = dget_parent(dentry);
6857+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 6858+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 6859+ au_dbg_verify_gen(parent, sigen);
027c5e7a 6860+ err = au_refresh_dentry(dentry, parent);
1facf9fc 6861+ di_read_unlock(parent, AuLock_IR);
6862+ dput(parent);
027c5e7a 6863+ AuTraceErr(err);
1facf9fc 6864+ return err;
6865+}
6866+
6867+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
6868+{
6869+ int err;
6870+ struct dentry *d, *parent;
6871+ struct inode *inode;
6872+
027c5e7a 6873+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1facf9fc 6874+ return simple_reval_dpath(dentry, sigen);
6875+
6876+ /* slow loop, keep it simple and stupid */
6877+ /* cf: au_cpup_dirs() */
6878+ err = 0;
6879+ parent = NULL;
027c5e7a 6880+ while (au_digen_test(dentry, sigen)) {
1facf9fc 6881+ d = dentry;
6882+ while (1) {
6883+ dput(parent);
6884+ parent = dget_parent(d);
027c5e7a 6885+ if (!au_digen_test(parent, sigen))
1facf9fc 6886+ break;
6887+ d = parent;
6888+ }
6889+
6890+ inode = d->d_inode;
6891+ if (d != dentry)
027c5e7a 6892+ di_write_lock_child2(d);
1facf9fc 6893+
6894+ /* someone might update our dentry while we were sleeping */
027c5e7a
AM
6895+ if (au_digen_test(d, sigen)) {
6896+ /*
6897+ * todo: consolidate with simple_reval_dpath(),
6898+ * do_refresh() and au_reval_for_attr().
6899+ */
1facf9fc 6900+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 6901+ err = au_refresh_dentry(d, parent);
1facf9fc 6902+ di_read_unlock(parent, AuLock_IR);
6903+ }
6904+
6905+ if (d != dentry)
6906+ di_write_unlock(d);
6907+ dput(parent);
6908+ if (unlikely(err))
6909+ break;
6910+ }
6911+
6912+ return err;
6913+}
6914+
6915+/*
6916+ * if valid returns 1, otherwise 0.
6917+ */
6918+static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
6919+{
6920+ int valid, err;
6921+ unsigned int sigen;
6922+ unsigned char do_udba;
6923+ struct super_block *sb;
6924+ struct inode *inode;
6925+
027c5e7a
AM
6926+ /* todo: support rcu-walk? */
6927+ if (nd && (nd->flags & LOOKUP_RCU))
6928+ return -ECHILD;
6929+
6930+ valid = 0;
6931+ if (unlikely(!au_di(dentry)))
6932+ goto out;
6933+
6934+ inode = dentry->d_inode;
6935+ if (inode && is_bad_inode(inode))
6936+ goto out;
6937+
e49829fe 6938+ valid = 1;
1facf9fc 6939+ sb = dentry->d_sb;
e49829fe
JR
6940+ /*
6941+ * todo: very ugly
6942+ * i_mutex of parent dir may be held,
6943+ * but we should not return 'invalid' due to busy.
6944+ */
6945+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
6946+ if (unlikely(err)) {
6947+ valid = err;
027c5e7a 6948+ AuTraceErr(err);
e49829fe
JR
6949+ goto out;
6950+ }
027c5e7a
AM
6951+ if (unlikely(au_dbrange_test(dentry))) {
6952+ err = -EINVAL;
6953+ AuTraceErr(err);
6954+ goto out_dgrade;
1facf9fc 6955+ }
027c5e7a
AM
6956+
6957+ sigen = au_sigen(sb);
6958+ if (au_digen_test(dentry, sigen)) {
1facf9fc 6959+ AuDebugOn(IS_ROOT(dentry));
027c5e7a
AM
6960+ err = au_reval_dpath(dentry, sigen);
6961+ if (unlikely(err)) {
6962+ AuTraceErr(err);
1facf9fc 6963+ goto out_dgrade;
027c5e7a 6964+ }
1facf9fc 6965+ }
6966+ di_downgrade_lock(dentry, AuLock_IR);
6967+
1facf9fc 6968+ err = -EINVAL;
027c5e7a
AM
6969+ if (inode && (IS_DEADDIR(inode) || !inode->i_nlink))
6970+ goto out_inval;
6971+
1facf9fc 6972+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
6973+ if (do_udba && inode) {
6974+ aufs_bindex_t bstart = au_ibstart(inode);
027c5e7a 6975+ struct inode *h_inode;
1facf9fc 6976+
027c5e7a
AM
6977+ if (bstart >= 0) {
6978+ h_inode = au_h_iptr(inode, bstart);
6979+ if (h_inode && au_test_higen(inode, h_inode))
6980+ goto out_inval;
6981+ }
1facf9fc 6982+ }
6983+
6984+ err = h_d_revalidate(dentry, inode, nd, do_udba);
027c5e7a 6985+ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
1facf9fc 6986+ err = -EIO;
027c5e7a
AM
6987+ AuDbg("both of real entry and whiteout found, %.*s, err %d\n",
6988+ AuDLNPair(dentry), err);
6989+ }
e49829fe 6990+ goto out_inval;
1facf9fc 6991+
4f0767ce 6992+out_dgrade:
1facf9fc 6993+ di_downgrade_lock(dentry, AuLock_IR);
e49829fe 6994+out_inval:
1facf9fc 6995+ aufs_read_unlock(dentry, AuLock_IR);
6996+ AuTraceErr(err);
6997+ valid = !err;
e49829fe 6998+out:
027c5e7a 6999+ if (!valid) {
e49829fe 7000+ AuDbg("%.*s invalid, %d\n", AuDLNPair(dentry), valid);
027c5e7a
AM
7001+ d_drop(dentry);
7002+ }
1facf9fc 7003+ return valid;
7004+}
7005+
7006+static void aufs_d_release(struct dentry *dentry)
7007+{
027c5e7a 7008+ if (au_di(dentry)) {
4a4d8108
AM
7009+ au_di_fin(dentry);
7010+ au_hn_di_reinit(dentry);
1facf9fc 7011+ }
1facf9fc 7012+}
7013+
4a4d8108 7014+const struct dentry_operations aufs_dop = {
1facf9fc 7015+ .d_revalidate = aufs_d_revalidate,
7016+ .d_release = aufs_d_release
7017+};
7f207e10
AM
7018diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
7019--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
53392da6 7020+++ linux/fs/aufs/dentry.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 7021@@ -0,0 +1,238 @@
1facf9fc 7022+/*
027c5e7a 7023+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 7024+ *
7025+ * This program, aufs is free software; you can redistribute it and/or modify
7026+ * it under the terms of the GNU General Public License as published by
7027+ * the Free Software Foundation; either version 2 of the License, or
7028+ * (at your option) any later version.
dece6358
AM
7029+ *
7030+ * This program is distributed in the hope that it will be useful,
7031+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7032+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7033+ * GNU General Public License for more details.
7034+ *
7035+ * You should have received a copy of the GNU General Public License
7036+ * along with this program; if not, write to the Free Software
7037+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7038+ */
7039+
7040+/*
7041+ * lookup and dentry operations
7042+ */
7043+
7044+#ifndef __AUFS_DENTRY_H__
7045+#define __AUFS_DENTRY_H__
7046+
7047+#ifdef __KERNEL__
7048+
dece6358 7049+#include <linux/dcache.h>
1facf9fc 7050+#include <linux/aufs_type.h>
7051+#include "rwsem.h"
7052+
1facf9fc 7053+struct au_hdentry {
7054+ struct dentry *hd_dentry;
027c5e7a 7055+ aufs_bindex_t hd_id;
1facf9fc 7056+};
7057+
7058+struct au_dinfo {
7059+ atomic_t di_generation;
7060+
dece6358 7061+ struct au_rwsem di_rwsem;
1facf9fc 7062+ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
7063+ struct au_hdentry *di_hdentry;
4a4d8108 7064+} ____cacheline_aligned_in_smp;
1facf9fc 7065+
7066+/* ---------------------------------------------------------------------- */
7067+
7068+/* dentry.c */
4a4d8108 7069+extern const struct dentry_operations aufs_dop;
1facf9fc 7070+struct au_branch;
7071+struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
7072+ struct au_branch *br, struct nameidata *nd);
7073+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
7074+ struct au_branch *br);
7075+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
7076+ struct dentry *h_parent, struct au_branch *br);
7077+
7078+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
7079+ struct nameidata *nd);
7080+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex);
027c5e7a 7081+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
1facf9fc 7082+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
7083+
7084+/* dinfo.c */
4a4d8108 7085+void au_di_init_once(void *_di);
027c5e7a
AM
7086+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
7087+void au_di_free(struct au_dinfo *dinfo);
7088+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
7089+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
4a4d8108
AM
7090+int au_di_init(struct dentry *dentry);
7091+void au_di_fin(struct dentry *dentry);
1facf9fc 7092+int au_di_realloc(struct au_dinfo *dinfo, int nbr);
7093+
7094+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
7095+void di_read_unlock(struct dentry *d, int flags);
7096+void di_downgrade_lock(struct dentry *d, int flags);
7097+void di_write_lock(struct dentry *d, unsigned int lsc);
7098+void di_write_unlock(struct dentry *d);
7099+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
7100+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
7101+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
7102+
7103+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
2cbb1c4b 7104+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
1facf9fc 7105+aufs_bindex_t au_dbtail(struct dentry *dentry);
7106+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
7107+
7108+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
7109+ struct dentry *h_dentry);
027c5e7a
AM
7110+int au_digen_test(struct dentry *dentry, unsigned int sigen);
7111+int au_dbrange_test(struct dentry *dentry);
1facf9fc 7112+void au_update_digen(struct dentry *dentry);
7113+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
7114+void au_update_dbstart(struct dentry *dentry);
7115+void au_update_dbend(struct dentry *dentry);
7116+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
7117+
7118+/* ---------------------------------------------------------------------- */
7119+
7120+static inline struct au_dinfo *au_di(struct dentry *dentry)
7121+{
7122+ return dentry->d_fsdata;
7123+}
7124+
7125+/* ---------------------------------------------------------------------- */
7126+
7127+/* lock subclass for dinfo */
7128+enum {
7129+ AuLsc_DI_CHILD, /* child first */
4a4d8108 7130+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
1facf9fc 7131+ AuLsc_DI_CHILD3, /* copyup dirs */
7132+ AuLsc_DI_PARENT,
7133+ AuLsc_DI_PARENT2,
027c5e7a
AM
7134+ AuLsc_DI_PARENT3,
7135+ AuLsc_DI_TMP /* temp for replacing dinfo */
1facf9fc 7136+};
7137+
7138+/*
7139+ * di_read_lock_child, di_write_lock_child,
7140+ * di_read_lock_child2, di_write_lock_child2,
7141+ * di_read_lock_child3, di_write_lock_child3,
7142+ * di_read_lock_parent, di_write_lock_parent,
7143+ * di_read_lock_parent2, di_write_lock_parent2,
7144+ * di_read_lock_parent3, di_write_lock_parent3,
7145+ */
7146+#define AuReadLockFunc(name, lsc) \
7147+static inline void di_read_lock_##name(struct dentry *d, int flags) \
7148+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
7149+
7150+#define AuWriteLockFunc(name, lsc) \
7151+static inline void di_write_lock_##name(struct dentry *d) \
7152+{ di_write_lock(d, AuLsc_DI_##lsc); }
7153+
7154+#define AuRWLockFuncs(name, lsc) \
7155+ AuReadLockFunc(name, lsc) \
7156+ AuWriteLockFunc(name, lsc)
7157+
7158+AuRWLockFuncs(child, CHILD);
7159+AuRWLockFuncs(child2, CHILD2);
7160+AuRWLockFuncs(child3, CHILD3);
7161+AuRWLockFuncs(parent, PARENT);
7162+AuRWLockFuncs(parent2, PARENT2);
7163+AuRWLockFuncs(parent3, PARENT3);
7164+
7165+#undef AuReadLockFunc
7166+#undef AuWriteLockFunc
7167+#undef AuRWLockFuncs
7168+
7169+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
7170+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
7171+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 7172+
7173+/* ---------------------------------------------------------------------- */
7174+
7175+/* todo: memory barrier? */
7176+static inline unsigned int au_digen(struct dentry *d)
7177+{
7178+ return atomic_read(&au_di(d)->di_generation);
7179+}
7180+
7181+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
7182+{
7183+ hdentry->hd_dentry = NULL;
7184+}
7185+
7186+static inline void au_hdput(struct au_hdentry *hd)
7187+{
4a4d8108
AM
7188+ if (hd)
7189+ dput(hd->hd_dentry);
1facf9fc 7190+}
7191+
7192+static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
7193+{
1308ab2a 7194+ DiMustAnyLock(dentry);
1facf9fc 7195+ return au_di(dentry)->di_bstart;
7196+}
7197+
7198+static inline aufs_bindex_t au_dbend(struct dentry *dentry)
7199+{
1308ab2a 7200+ DiMustAnyLock(dentry);
1facf9fc 7201+ return au_di(dentry)->di_bend;
7202+}
7203+
7204+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
7205+{
1308ab2a 7206+ DiMustAnyLock(dentry);
1facf9fc 7207+ return au_di(dentry)->di_bwh;
7208+}
7209+
7210+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
7211+{
1308ab2a 7212+ DiMustAnyLock(dentry);
1facf9fc 7213+ return au_di(dentry)->di_bdiropq;
7214+}
7215+
7216+/* todo: hard/soft set? */
7217+static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
7218+{
1308ab2a 7219+ DiMustWriteLock(dentry);
1facf9fc 7220+ au_di(dentry)->di_bstart = bindex;
7221+}
7222+
7223+static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
7224+{
1308ab2a 7225+ DiMustWriteLock(dentry);
1facf9fc 7226+ au_di(dentry)->di_bend = bindex;
7227+}
7228+
7229+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
7230+{
1308ab2a 7231+ DiMustWriteLock(dentry);
1facf9fc 7232+ /* dbwh can be outside of bstart - bend range */
7233+ au_di(dentry)->di_bwh = bindex;
7234+}
7235+
7236+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
7237+{
1308ab2a 7238+ DiMustWriteLock(dentry);
1facf9fc 7239+ au_di(dentry)->di_bdiropq = bindex;
7240+}
7241+
7242+/* ---------------------------------------------------------------------- */
7243+
4a4d8108 7244+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 7245+static inline void au_digen_dec(struct dentry *d)
7246+{
e49829fe 7247+ atomic_dec(&au_di(d)->di_generation);
1facf9fc 7248+}
7249+
4a4d8108 7250+static inline void au_hn_di_reinit(struct dentry *dentry)
1facf9fc 7251+{
7252+ dentry->d_fsdata = NULL;
7253+}
7254+#else
4a4d8108
AM
7255+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
7256+#endif /* CONFIG_AUFS_HNOTIFY */
1facf9fc 7257+
7258+#endif /* __KERNEL__ */
7259+#endif /* __AUFS_DENTRY_H__ */
7f207e10
AM
7260diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
7261--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 7262+++ linux/fs/aufs/dinfo.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 7263@@ -0,0 +1,543 @@
1facf9fc 7264+/*
027c5e7a 7265+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 7266+ *
7267+ * This program, aufs is free software; you can redistribute it and/or modify
7268+ * it under the terms of the GNU General Public License as published by
7269+ * the Free Software Foundation; either version 2 of the License, or
7270+ * (at your option) any later version.
dece6358
AM
7271+ *
7272+ * This program is distributed in the hope that it will be useful,
7273+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7274+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7275+ * GNU General Public License for more details.
7276+ *
7277+ * You should have received a copy of the GNU General Public License
7278+ * along with this program; if not, write to the Free Software
7279+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7280+ */
7281+
7282+/*
7283+ * dentry private data
7284+ */
7285+
7286+#include "aufs.h"
7287+
e49829fe 7288+void au_di_init_once(void *_dinfo)
4a4d8108 7289+{
e49829fe
JR
7290+ struct au_dinfo *dinfo = _dinfo;
7291+ static struct lock_class_key aufs_di;
4a4d8108 7292+
e49829fe
JR
7293+ au_rw_init(&dinfo->di_rwsem);
7294+ au_rw_class(&dinfo->di_rwsem, &aufs_di);
4a4d8108
AM
7295+}
7296+
027c5e7a 7297+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
1facf9fc 7298+{
7299+ struct au_dinfo *dinfo;
027c5e7a 7300+ int nbr, i;
1facf9fc 7301+
7302+ dinfo = au_cache_alloc_dinfo();
7303+ if (unlikely(!dinfo))
7304+ goto out;
7305+
1facf9fc 7306+ nbr = au_sbend(sb) + 1;
7307+ if (nbr <= 0)
7308+ nbr = 1;
7309+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
027c5e7a
AM
7310+ if (dinfo->di_hdentry) {
7311+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
7312+ dinfo->di_bstart = -1;
7313+ dinfo->di_bend = -1;
7314+ dinfo->di_bwh = -1;
7315+ dinfo->di_bdiropq = -1;
7316+ for (i = 0; i < nbr; i++)
7317+ dinfo->di_hdentry[i].hd_id = -1;
7318+ goto out;
7319+ }
1facf9fc 7320+
1facf9fc 7321+ au_cache_free_dinfo(dinfo);
027c5e7a
AM
7322+ dinfo = NULL;
7323+
4f0767ce 7324+out:
027c5e7a 7325+ return dinfo;
1facf9fc 7326+}
7327+
027c5e7a 7328+void au_di_free(struct au_dinfo *dinfo)
4a4d8108 7329+{
4a4d8108
AM
7330+ struct au_hdentry *p;
7331+ aufs_bindex_t bend, bindex;
7332+
7333+ /* dentry may not be revalidated */
027c5e7a 7334+ bindex = dinfo->di_bstart;
4a4d8108 7335+ if (bindex >= 0) {
027c5e7a
AM
7336+ bend = dinfo->di_bend;
7337+ p = dinfo->di_hdentry + bindex;
4a4d8108
AM
7338+ while (bindex++ <= bend)
7339+ au_hdput(p++);
7340+ }
027c5e7a
AM
7341+ kfree(dinfo->di_hdentry);
7342+ au_cache_free_dinfo(dinfo);
7343+}
7344+
7345+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
7346+{
7347+ struct au_hdentry *p;
7348+ aufs_bindex_t bi;
7349+
7350+ AuRwMustWriteLock(&a->di_rwsem);
7351+ AuRwMustWriteLock(&b->di_rwsem);
7352+
7353+#define DiSwap(v, name) \
7354+ do { \
7355+ v = a->di_##name; \
7356+ a->di_##name = b->di_##name; \
7357+ b->di_##name = v; \
7358+ } while (0)
7359+
7360+ DiSwap(p, hdentry);
7361+ DiSwap(bi, bstart);
7362+ DiSwap(bi, bend);
7363+ DiSwap(bi, bwh);
7364+ DiSwap(bi, bdiropq);
7365+ /* smp_mb(); */
7366+
7367+#undef DiSwap
7368+}
7369+
7370+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
7371+{
7372+ AuRwMustWriteLock(&dst->di_rwsem);
7373+ AuRwMustWriteLock(&src->di_rwsem);
7374+
7375+ dst->di_bstart = src->di_bstart;
7376+ dst->di_bend = src->di_bend;
7377+ dst->di_bwh = src->di_bwh;
7378+ dst->di_bdiropq = src->di_bdiropq;
7379+ /* smp_mb(); */
7380+}
7381+
7382+int au_di_init(struct dentry *dentry)
7383+{
7384+ int err;
7385+ struct super_block *sb;
7386+ struct au_dinfo *dinfo;
7387+
7388+ err = 0;
7389+ sb = dentry->d_sb;
7390+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
7391+ if (dinfo) {
7392+ atomic_set(&dinfo->di_generation, au_sigen(sb));
7393+ /* smp_mb(); */ /* atomic_set */
7394+ dentry->d_fsdata = dinfo;
7395+ } else
7396+ err = -ENOMEM;
7397+
7398+ return err;
7399+}
7400+
7401+void au_di_fin(struct dentry *dentry)
7402+{
7403+ struct au_dinfo *dinfo;
7404+
7405+ dinfo = au_di(dentry);
7406+ AuRwDestroy(&dinfo->di_rwsem);
7407+ au_di_free(dinfo);
4a4d8108
AM
7408+}
7409+
1facf9fc 7410+int au_di_realloc(struct au_dinfo *dinfo, int nbr)
7411+{
7412+ int err, sz;
7413+ struct au_hdentry *hdp;
7414+
1308ab2a 7415+ AuRwMustWriteLock(&dinfo->di_rwsem);
7416+
1facf9fc 7417+ err = -ENOMEM;
7418+ sz = sizeof(*hdp) * (dinfo->di_bend + 1);
7419+ if (!sz)
7420+ sz = sizeof(*hdp);
7421+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
7422+ if (hdp) {
7423+ dinfo->di_hdentry = hdp;
7424+ err = 0;
7425+ }
7426+
7427+ return err;
7428+}
7429+
7430+/* ---------------------------------------------------------------------- */
7431+
7432+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
7433+{
7434+ switch (lsc) {
7435+ case AuLsc_DI_CHILD:
7436+ ii_write_lock_child(inode);
7437+ break;
7438+ case AuLsc_DI_CHILD2:
7439+ ii_write_lock_child2(inode);
7440+ break;
7441+ case AuLsc_DI_CHILD3:
7442+ ii_write_lock_child3(inode);
7443+ break;
7444+ case AuLsc_DI_PARENT:
7445+ ii_write_lock_parent(inode);
7446+ break;
7447+ case AuLsc_DI_PARENT2:
7448+ ii_write_lock_parent2(inode);
7449+ break;
7450+ case AuLsc_DI_PARENT3:
7451+ ii_write_lock_parent3(inode);
7452+ break;
7453+ default:
7454+ BUG();
7455+ }
7456+}
7457+
7458+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
7459+{
7460+ switch (lsc) {
7461+ case AuLsc_DI_CHILD:
7462+ ii_read_lock_child(inode);
7463+ break;
7464+ case AuLsc_DI_CHILD2:
7465+ ii_read_lock_child2(inode);
7466+ break;
7467+ case AuLsc_DI_CHILD3:
7468+ ii_read_lock_child3(inode);
7469+ break;
7470+ case AuLsc_DI_PARENT:
7471+ ii_read_lock_parent(inode);
7472+ break;
7473+ case AuLsc_DI_PARENT2:
7474+ ii_read_lock_parent2(inode);
7475+ break;
7476+ case AuLsc_DI_PARENT3:
7477+ ii_read_lock_parent3(inode);
7478+ break;
7479+ default:
7480+ BUG();
7481+ }
7482+}
7483+
7484+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
7485+{
dece6358 7486+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 7487+ if (d->d_inode) {
7488+ if (au_ftest_lock(flags, IW))
7489+ do_ii_write_lock(d->d_inode, lsc);
7490+ else if (au_ftest_lock(flags, IR))
7491+ do_ii_read_lock(d->d_inode, lsc);
7492+ }
7493+}
7494+
7495+void di_read_unlock(struct dentry *d, int flags)
7496+{
7497+ if (d->d_inode) {
027c5e7a
AM
7498+ if (au_ftest_lock(flags, IW)) {
7499+ au_dbg_verify_dinode(d);
1facf9fc 7500+ ii_write_unlock(d->d_inode);
027c5e7a
AM
7501+ } else if (au_ftest_lock(flags, IR)) {
7502+ au_dbg_verify_dinode(d);
1facf9fc 7503+ ii_read_unlock(d->d_inode);
027c5e7a 7504+ }
1facf9fc 7505+ }
dece6358 7506+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 7507+}
7508+
7509+void di_downgrade_lock(struct dentry *d, int flags)
7510+{
1facf9fc 7511+ if (d->d_inode && au_ftest_lock(flags, IR))
7512+ ii_downgrade_lock(d->d_inode);
dece6358 7513+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 7514+}
7515+
7516+void di_write_lock(struct dentry *d, unsigned int lsc)
7517+{
dece6358 7518+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 7519+ if (d->d_inode)
7520+ do_ii_write_lock(d->d_inode, lsc);
7521+}
7522+
7523+void di_write_unlock(struct dentry *d)
7524+{
027c5e7a 7525+ au_dbg_verify_dinode(d);
1facf9fc 7526+ if (d->d_inode)
7527+ ii_write_unlock(d->d_inode);
dece6358 7528+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 7529+}
7530+
7531+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
7532+{
7533+ AuDebugOn(d1 == d2
7534+ || d1->d_inode == d2->d_inode
7535+ || d1->d_sb != d2->d_sb);
7536+
7537+ if (isdir && au_test_subdir(d1, d2)) {
7538+ di_write_lock_child(d1);
7539+ di_write_lock_child2(d2);
7540+ } else {
7541+ /* there should be no races */
7542+ di_write_lock_child(d2);
7543+ di_write_lock_child2(d1);
7544+ }
7545+}
7546+
7547+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
7548+{
7549+ AuDebugOn(d1 == d2
7550+ || d1->d_inode == d2->d_inode
7551+ || d1->d_sb != d2->d_sb);
7552+
7553+ if (isdir && au_test_subdir(d1, d2)) {
7554+ di_write_lock_parent(d1);
7555+ di_write_lock_parent2(d2);
7556+ } else {
7557+ /* there should be no races */
7558+ di_write_lock_parent(d2);
7559+ di_write_lock_parent2(d1);
7560+ }
7561+}
7562+
7563+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
7564+{
7565+ di_write_unlock(d1);
7566+ if (d1->d_inode == d2->d_inode)
dece6358 7567+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 7568+ else
7569+ di_write_unlock(d2);
7570+}
7571+
7572+/* ---------------------------------------------------------------------- */
7573+
7574+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
7575+{
7576+ struct dentry *d;
7577+
1308ab2a 7578+ DiMustAnyLock(dentry);
7579+
1facf9fc 7580+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
7581+ return NULL;
7582+ AuDebugOn(bindex < 0);
7583+ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
027c5e7a 7584+ AuDebugOn(d && d->d_count <= 0);
1facf9fc 7585+ return d;
7586+}
7587+
2cbb1c4b
JR
7588+/*
7589+ * extended version of au_h_dptr().
7590+ * returns a hashed and positive h_dentry in bindex, NULL, or error.
7591+ */
7592+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
7593+{
7594+ struct dentry *h_dentry;
7595+ struct inode *inode, *h_inode;
7596+
7597+ inode = dentry->d_inode;
7598+ AuDebugOn(!inode);
7599+
7600+ h_dentry = NULL;
7601+ if (au_dbstart(dentry) <= bindex
7602+ && bindex <= au_dbend(dentry))
7603+ h_dentry = au_h_dptr(dentry, bindex);
7604+ if (h_dentry && !au_d_hashed_positive(h_dentry)) {
7605+ dget(h_dentry);
7606+ goto out; /* success */
7607+ }
7608+
7609+ AuDebugOn(bindex < au_ibstart(inode));
7610+ AuDebugOn(au_ibend(inode) < bindex);
7611+ h_inode = au_h_iptr(inode, bindex);
7612+ h_dentry = d_find_alias(h_inode);
7613+ if (h_dentry) {
7614+ if (!IS_ERR(h_dentry)) {
7615+ if (!au_d_hashed_positive(h_dentry))
7616+ goto out; /* success */
7617+ dput(h_dentry);
7618+ } else
7619+ goto out;
7620+ }
7621+
7622+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
7623+ h_dentry = au_plink_lkup(inode, bindex);
7624+ AuDebugOn(!h_dentry);
7625+ if (!IS_ERR(h_dentry)) {
7626+ if (!au_d_hashed_positive(h_dentry))
7627+ goto out; /* success */
7628+ dput(h_dentry);
7629+ h_dentry = NULL;
7630+ }
7631+ }
7632+
7633+out:
7634+ AuDbgDentry(h_dentry);
7635+ return h_dentry;
7636+}
7637+
1facf9fc 7638+aufs_bindex_t au_dbtail(struct dentry *dentry)
7639+{
7640+ aufs_bindex_t bend, bwh;
7641+
7642+ bend = au_dbend(dentry);
7643+ if (0 <= bend) {
7644+ bwh = au_dbwh(dentry);
7645+ if (!bwh)
7646+ return bwh;
7647+ if (0 < bwh && bwh < bend)
7648+ return bwh - 1;
7649+ }
7650+ return bend;
7651+}
7652+
7653+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
7654+{
7655+ aufs_bindex_t bend, bopq;
7656+
7657+ bend = au_dbtail(dentry);
7658+ if (0 <= bend) {
7659+ bopq = au_dbdiropq(dentry);
7660+ if (0 <= bopq && bopq < bend)
7661+ bend = bopq;
7662+ }
7663+ return bend;
7664+}
7665+
7666+/* ---------------------------------------------------------------------- */
7667+
7668+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
7669+ struct dentry *h_dentry)
7670+{
7671+ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
027c5e7a 7672+ struct au_branch *br;
1facf9fc 7673+
1308ab2a 7674+ DiMustWriteLock(dentry);
7675+
4a4d8108 7676+ au_hdput(hd);
1facf9fc 7677+ hd->hd_dentry = h_dentry;
027c5e7a
AM
7678+ if (h_dentry) {
7679+ br = au_sbr(dentry->d_sb, bindex);
7680+ hd->hd_id = br->br_id;
7681+ }
7682+}
7683+
7684+int au_dbrange_test(struct dentry *dentry)
7685+{
7686+ int err;
7687+ aufs_bindex_t bstart, bend;
7688+
7689+ err = 0;
7690+ bstart = au_dbstart(dentry);
7691+ bend = au_dbend(dentry);
7692+ if (bstart >= 0)
7693+ AuDebugOn(bend < 0 && bstart > bend);
7694+ else {
7695+ err = -EIO;
7696+ AuDebugOn(bend >= 0);
7697+ }
7698+
7699+ return err;
7700+}
7701+
7702+int au_digen_test(struct dentry *dentry, unsigned int sigen)
7703+{
7704+ int err;
7705+
7706+ err = 0;
7707+ if (unlikely(au_digen(dentry) != sigen
7708+ || au_iigen_test(dentry->d_inode, sigen)))
7709+ err = -EIO;
7710+
7711+ return err;
1facf9fc 7712+}
7713+
7714+void au_update_digen(struct dentry *dentry)
7715+{
7716+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
7717+ /* smp_mb(); */ /* atomic_set */
7718+}
7719+
7720+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
7721+{
7722+ struct au_dinfo *dinfo;
7723+ struct dentry *h_d;
4a4d8108 7724+ struct au_hdentry *hdp;
1facf9fc 7725+
1308ab2a 7726+ DiMustWriteLock(dentry);
7727+
1facf9fc 7728+ dinfo = au_di(dentry);
7729+ if (!dinfo || dinfo->di_bstart < 0)
7730+ return;
7731+
4a4d8108 7732+ hdp = dinfo->di_hdentry;
1facf9fc 7733+ if (do_put_zero) {
7734+ aufs_bindex_t bindex, bend;
7735+
7736+ bend = dinfo->di_bend;
7737+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
4a4d8108 7738+ h_d = hdp[0 + bindex].hd_dentry;
1facf9fc 7739+ if (h_d && !h_d->d_inode)
7740+ au_set_h_dptr(dentry, bindex, NULL);
7741+ }
7742+ }
7743+
7744+ dinfo->di_bstart = -1;
7745+ while (++dinfo->di_bstart <= dinfo->di_bend)
4a4d8108 7746+ if (hdp[0 + dinfo->di_bstart].hd_dentry)
1facf9fc 7747+ break;
7748+ if (dinfo->di_bstart > dinfo->di_bend) {
7749+ dinfo->di_bstart = -1;
7750+ dinfo->di_bend = -1;
7751+ return;
7752+ }
7753+
7754+ dinfo->di_bend++;
7755+ while (0 <= --dinfo->di_bend)
4a4d8108 7756+ if (hdp[0 + dinfo->di_bend].hd_dentry)
1facf9fc 7757+ break;
7758+ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
7759+}
7760+
7761+void au_update_dbstart(struct dentry *dentry)
7762+{
7763+ aufs_bindex_t bindex, bend;
7764+ struct dentry *h_dentry;
7765+
7766+ bend = au_dbend(dentry);
7767+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
7768+ h_dentry = au_h_dptr(dentry, bindex);
7769+ if (!h_dentry)
7770+ continue;
7771+ if (h_dentry->d_inode) {
7772+ au_set_dbstart(dentry, bindex);
7773+ return;
7774+ }
7775+ au_set_h_dptr(dentry, bindex, NULL);
7776+ }
7777+}
7778+
7779+void au_update_dbend(struct dentry *dentry)
7780+{
7781+ aufs_bindex_t bindex, bstart;
7782+ struct dentry *h_dentry;
7783+
7784+ bstart = au_dbstart(dentry);
7f207e10 7785+ for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
1facf9fc 7786+ h_dentry = au_h_dptr(dentry, bindex);
7787+ if (!h_dentry)
7788+ continue;
7789+ if (h_dentry->d_inode) {
7790+ au_set_dbend(dentry, bindex);
7791+ return;
7792+ }
7793+ au_set_h_dptr(dentry, bindex, NULL);
7794+ }
7795+}
7796+
7797+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
7798+{
7799+ aufs_bindex_t bindex, bend;
7800+
7801+ bend = au_dbend(dentry);
7802+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
7803+ if (au_h_dptr(dentry, bindex) == h_dentry)
7804+ return bindex;
7805+ return -1;
7806+}
7f207e10
AM
7807diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
7808--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
7809+++ linux/fs/aufs/dir.c 2011-08-24 13:30:24.731313534 +0200
7810@@ -0,0 +1,624 @@
1facf9fc 7811+/*
027c5e7a 7812+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 7813+ *
7814+ * This program, aufs is free software; you can redistribute it and/or modify
7815+ * it under the terms of the GNU General Public License as published by
7816+ * the Free Software Foundation; either version 2 of the License, or
7817+ * (at your option) any later version.
dece6358
AM
7818+ *
7819+ * This program is distributed in the hope that it will be useful,
7820+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7821+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7822+ * GNU General Public License for more details.
7823+ *
7824+ * You should have received a copy of the GNU General Public License
7825+ * along with this program; if not, write to the Free Software
7826+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7827+ */
7828+
7829+/*
7830+ * directory operations
7831+ */
7832+
dece6358 7833+#include <linux/file.h>
1facf9fc 7834+#include <linux/fs_stack.h>
7835+#include "aufs.h"
7836+
7837+void au_add_nlink(struct inode *dir, struct inode *h_dir)
7838+{
7839+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
7840+
7841+ dir->i_nlink += h_dir->i_nlink - 2;
7842+ if (h_dir->i_nlink < 2)
7843+ dir->i_nlink += 2;
7844+}
7845+
7846+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
7847+{
7848+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
7849+
7850+ dir->i_nlink -= h_dir->i_nlink - 2;
7851+ if (h_dir->i_nlink < 2)
7852+ dir->i_nlink -= 2;
7853+}
7854+
1308ab2a 7855+loff_t au_dir_size(struct file *file, struct dentry *dentry)
7856+{
7857+ loff_t sz;
7858+ aufs_bindex_t bindex, bend;
7859+ struct file *h_file;
7860+ struct dentry *h_dentry;
7861+
7862+ sz = 0;
7863+ if (file) {
7864+ AuDebugOn(!file->f_dentry);
7865+ AuDebugOn(!file->f_dentry->d_inode);
7866+ AuDebugOn(!S_ISDIR(file->f_dentry->d_inode->i_mode));
7867+
4a4d8108 7868+ bend = au_fbend_dir(file);
1308ab2a 7869+ for (bindex = au_fbstart(file);
7870+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
7871+ bindex++) {
4a4d8108 7872+ h_file = au_hf_dir(file, bindex);
1308ab2a 7873+ if (h_file
7874+ && h_file->f_dentry
7875+ && h_file->f_dentry->d_inode)
7876+ sz += i_size_read(h_file->f_dentry->d_inode);
7877+ }
7878+ } else {
7879+ AuDebugOn(!dentry);
7880+ AuDebugOn(!dentry->d_inode);
7881+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
7882+
7883+ bend = au_dbtaildir(dentry);
7884+ for (bindex = au_dbstart(dentry);
7885+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
7886+ bindex++) {
7887+ h_dentry = au_h_dptr(dentry, bindex);
7888+ if (h_dentry && h_dentry->d_inode)
7889+ sz += i_size_read(h_dentry->d_inode);
7890+ }
7891+ }
7892+ if (sz < KMALLOC_MAX_SIZE)
7893+ sz = roundup_pow_of_two(sz);
7894+ if (sz > KMALLOC_MAX_SIZE)
7895+ sz = KMALLOC_MAX_SIZE;
7896+ else if (sz < NAME_MAX) {
7897+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
7898+ sz = AUFS_RDBLK_DEF;
7899+ }
7900+ return sz;
7901+}
7902+
1facf9fc 7903+/* ---------------------------------------------------------------------- */
7904+
7905+static int reopen_dir(struct file *file)
7906+{
7907+ int err;
7908+ unsigned int flags;
7909+ aufs_bindex_t bindex, btail, bstart;
7910+ struct dentry *dentry, *h_dentry;
7911+ struct file *h_file;
7912+
7913+ /* open all lower dirs */
7914+ dentry = file->f_dentry;
7915+ bstart = au_dbstart(dentry);
7916+ for (bindex = au_fbstart(file); bindex < bstart; bindex++)
7917+ au_set_h_fptr(file, bindex, NULL);
7918+ au_set_fbstart(file, bstart);
7919+
7920+ btail = au_dbtaildir(dentry);
4a4d8108 7921+ for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
1facf9fc 7922+ au_set_h_fptr(file, bindex, NULL);
4a4d8108 7923+ au_set_fbend_dir(file, btail);
1facf9fc 7924+
4a4d8108 7925+ flags = vfsub_file_flags(file);
1facf9fc 7926+ for (bindex = bstart; bindex <= btail; bindex++) {
7927+ h_dentry = au_h_dptr(dentry, bindex);
7928+ if (!h_dentry)
7929+ continue;
4a4d8108 7930+ h_file = au_hf_dir(file, bindex);
1facf9fc 7931+ if (h_file)
7932+ continue;
7933+
7934+ h_file = au_h_open(dentry, bindex, flags, file);
7935+ err = PTR_ERR(h_file);
7936+ if (IS_ERR(h_file))
7937+ goto out; /* close all? */
7938+ au_set_h_fptr(file, bindex, h_file);
7939+ }
7940+ au_update_figen(file);
7941+ /* todo: necessary? */
7942+ /* file->f_ra = h_file->f_ra; */
7943+ err = 0;
7944+
4f0767ce 7945+out:
1facf9fc 7946+ return err;
7947+}
7948+
7949+static int do_open_dir(struct file *file, int flags)
7950+{
7951+ int err;
7952+ aufs_bindex_t bindex, btail;
7953+ struct dentry *dentry, *h_dentry;
7954+ struct file *h_file;
7955+
1308ab2a 7956+ FiMustWriteLock(file);
7957+
1facf9fc 7958+ dentry = file->f_dentry;
027c5e7a
AM
7959+ err = au_alive_dir(dentry);
7960+ if (unlikely(err))
7961+ goto out;
7962+
1facf9fc 7963+ file->f_version = dentry->d_inode->i_version;
7964+ bindex = au_dbstart(dentry);
7965+ au_set_fbstart(file, bindex);
7966+ btail = au_dbtaildir(dentry);
4a4d8108 7967+ au_set_fbend_dir(file, btail);
1facf9fc 7968+ for (; !err && bindex <= btail; bindex++) {
7969+ h_dentry = au_h_dptr(dentry, bindex);
7970+ if (!h_dentry)
7971+ continue;
7972+
7973+ h_file = au_h_open(dentry, bindex, flags, file);
7974+ if (IS_ERR(h_file)) {
7975+ err = PTR_ERR(h_file);
7976+ break;
7977+ }
7978+ au_set_h_fptr(file, bindex, h_file);
7979+ }
7980+ au_update_figen(file);
7981+ /* todo: necessary? */
7982+ /* file->f_ra = h_file->f_ra; */
7983+ if (!err)
7984+ return 0; /* success */
7985+
7986+ /* close all */
7987+ for (bindex = au_fbstart(file); bindex <= btail; bindex++)
7988+ au_set_h_fptr(file, bindex, NULL);
7989+ au_set_fbstart(file, -1);
4a4d8108
AM
7990+ au_set_fbend_dir(file, -1);
7991+
027c5e7a 7992+out:
1facf9fc 7993+ return err;
7994+}
7995+
7996+static int aufs_open_dir(struct inode *inode __maybe_unused,
7997+ struct file *file)
7998+{
4a4d8108
AM
7999+ int err;
8000+ struct super_block *sb;
8001+ struct au_fidir *fidir;
8002+
8003+ err = -ENOMEM;
8004+ sb = file->f_dentry->d_sb;
8005+ si_read_lock(sb, AuLock_FLUSH);
e49829fe 8006+ fidir = au_fidir_alloc(sb);
4a4d8108
AM
8007+ if (fidir) {
8008+ err = au_do_open(file, do_open_dir, fidir);
8009+ if (unlikely(err))
8010+ kfree(fidir);
8011+ }
8012+ si_read_unlock(sb);
8013+ return err;
1facf9fc 8014+}
8015+
8016+static int aufs_release_dir(struct inode *inode __maybe_unused,
8017+ struct file *file)
8018+{
8019+ struct au_vdir *vdir_cache;
4a4d8108
AM
8020+ struct au_finfo *finfo;
8021+ struct au_fidir *fidir;
8022+ aufs_bindex_t bindex, bend;
1facf9fc 8023+
4a4d8108
AM
8024+ finfo = au_fi(file);
8025+ fidir = finfo->fi_hdir;
8026+ if (fidir) {
0c5527e5
AM
8027+ /* remove me from sb->s_files */
8028+ file_sb_list_del(file);
8029+
4a4d8108
AM
8030+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
8031+ if (vdir_cache)
8032+ au_vdir_free(vdir_cache);
8033+
8034+ bindex = finfo->fi_btop;
8035+ if (bindex >= 0) {
8036+ /*
8037+ * calls fput() instead of filp_close(),
8038+ * since no dnotify or lock for the lower file.
8039+ */
8040+ bend = fidir->fd_bbot;
8041+ for (; bindex <= bend; bindex++)
8042+ au_set_h_fptr(file, bindex, NULL);
8043+ }
8044+ kfree(fidir);
8045+ finfo->fi_hdir = NULL;
1facf9fc 8046+ }
1facf9fc 8047+ au_finfo_fin(file);
1facf9fc 8048+ return 0;
8049+}
8050+
8051+/* ---------------------------------------------------------------------- */
8052+
4a4d8108
AM
8053+static int au_do_flush_dir(struct file *file, fl_owner_t id)
8054+{
8055+ int err;
8056+ aufs_bindex_t bindex, bend;
8057+ struct file *h_file;
8058+
8059+ err = 0;
8060+ bend = au_fbend_dir(file);
8061+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
8062+ h_file = au_hf_dir(file, bindex);
8063+ if (h_file)
8064+ err = vfsub_flush(h_file, id);
8065+ }
8066+ return err;
8067+}
8068+
8069+static int aufs_flush_dir(struct file *file, fl_owner_t id)
8070+{
8071+ return au_do_flush(file, id, au_do_flush_dir);
8072+}
8073+
8074+/* ---------------------------------------------------------------------- */
8075+
1facf9fc 8076+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
8077+{
8078+ int err;
8079+ aufs_bindex_t bend, bindex;
8080+ struct inode *inode;
8081+ struct super_block *sb;
8082+
8083+ err = 0;
8084+ sb = dentry->d_sb;
8085+ inode = dentry->d_inode;
8086+ IMustLock(inode);
8087+ bend = au_dbend(dentry);
8088+ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
8089+ struct path h_path;
1facf9fc 8090+
8091+ if (au_test_ro(sb, bindex, inode))
8092+ continue;
8093+ h_path.dentry = au_h_dptr(dentry, bindex);
8094+ if (!h_path.dentry)
8095+ continue;
1facf9fc 8096+
1facf9fc 8097+ h_path.mnt = au_sbr_mnt(sb, bindex);
53392da6 8098+ err = vfsub_fsync(NULL, &h_path, datasync);
1facf9fc 8099+ }
8100+
8101+ return err;
8102+}
8103+
8104+static int au_do_fsync_dir(struct file *file, int datasync)
8105+{
8106+ int err;
8107+ aufs_bindex_t bend, bindex;
8108+ struct file *h_file;
8109+ struct super_block *sb;
8110+ struct inode *inode;
1facf9fc 8111+
8112+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
8113+ if (unlikely(err))
8114+ goto out;
8115+
8116+ sb = file->f_dentry->d_sb;
8117+ inode = file->f_dentry->d_inode;
4a4d8108 8118+ bend = au_fbend_dir(file);
1facf9fc 8119+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
4a4d8108 8120+ h_file = au_hf_dir(file, bindex);
1facf9fc 8121+ if (!h_file || au_test_ro(sb, bindex, inode))
8122+ continue;
8123+
53392da6 8124+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
1facf9fc 8125+ }
8126+
4f0767ce 8127+out:
1facf9fc 8128+ return err;
8129+}
8130+
8131+/*
8132+ * @file may be NULL
8133+ */
b752ccd1 8134+static int aufs_fsync_dir(struct file *file, int datasync)
1facf9fc 8135+{
8136+ int err;
b752ccd1 8137+ struct dentry *dentry;
1facf9fc 8138+ struct super_block *sb;
8139+
b752ccd1 8140+ dentry = file->f_dentry;
1facf9fc 8141+ IMustLock(dentry->d_inode);
8142+
8143+ err = 0;
8144+ sb = dentry->d_sb;
8145+ si_noflush_read_lock(sb);
8146+ if (file)
8147+ err = au_do_fsync_dir(file, datasync);
8148+ else {
8149+ di_write_lock_child(dentry);
8150+ err = au_do_fsync_dir_no_file(dentry, datasync);
8151+ }
8152+ au_cpup_attr_timesizes(dentry->d_inode);
8153+ di_write_unlock(dentry);
8154+ if (file)
8155+ fi_write_unlock(file);
8156+
8157+ si_read_unlock(sb);
8158+ return err;
8159+}
8160+
8161+/* ---------------------------------------------------------------------- */
8162+
8163+static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir)
8164+{
8165+ int err;
8166+ struct dentry *dentry;
8167+ struct inode *inode;
8168+ struct super_block *sb;
8169+
8170+ dentry = file->f_dentry;
8171+ inode = dentry->d_inode;
8172+ IMustLock(inode);
8173+
8174+ sb = dentry->d_sb;
8175+ si_read_lock(sb, AuLock_FLUSH);
8176+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
8177+ if (unlikely(err))
8178+ goto out;
027c5e7a
AM
8179+ err = au_alive_dir(dentry);
8180+ if (!err)
8181+ err = au_vdir_init(file);
1facf9fc 8182+ di_downgrade_lock(dentry, AuLock_IR);
8183+ if (unlikely(err))
8184+ goto out_unlock;
8185+
b752ccd1 8186+ if (!au_test_nfsd()) {
1facf9fc 8187+ err = au_vdir_fill_de(file, dirent, filldir);
8188+ fsstack_copy_attr_atime(inode,
8189+ au_h_iptr(inode, au_ibstart(inode)));
8190+ } else {
8191+ /*
8192+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
8193+ * encode_fh() and others.
8194+ */
8195+ struct inode *h_inode = au_h_iptr(inode, au_ibstart(inode));
8196+
8197+ di_read_unlock(dentry, AuLock_IR);
8198+ si_read_unlock(sb);
1facf9fc 8199+ err = au_vdir_fill_de(file, dirent, filldir);
1facf9fc 8200+ fsstack_copy_attr_atime(inode, h_inode);
8201+ fi_write_unlock(file);
8202+
8203+ AuTraceErr(err);
8204+ return err;
8205+ }
8206+
4f0767ce 8207+out_unlock:
1facf9fc 8208+ di_read_unlock(dentry, AuLock_IR);
8209+ fi_write_unlock(file);
4f0767ce 8210+out:
1facf9fc 8211+ si_read_unlock(sb);
8212+ return err;
8213+}
8214+
8215+/* ---------------------------------------------------------------------- */
8216+
8217+#define AuTestEmpty_WHONLY 1
dece6358
AM
8218+#define AuTestEmpty_CALLED (1 << 1)
8219+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 8220+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7f207e10
AM
8221+#define au_fset_testempty(flags, name) \
8222+ do { (flags) |= AuTestEmpty_##name; } while (0)
8223+#define au_fclr_testempty(flags, name) \
8224+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
1facf9fc 8225+
dece6358
AM
8226+#ifndef CONFIG_AUFS_SHWH
8227+#undef AuTestEmpty_SHWH
8228+#define AuTestEmpty_SHWH 0
8229+#endif
8230+
1facf9fc 8231+struct test_empty_arg {
1308ab2a 8232+ struct au_nhash *whlist;
1facf9fc 8233+ unsigned int flags;
8234+ int err;
8235+ aufs_bindex_t bindex;
8236+};
8237+
8238+static int test_empty_cb(void *__arg, const char *__name, int namelen,
dece6358
AM
8239+ loff_t offset __maybe_unused, u64 ino,
8240+ unsigned int d_type)
1facf9fc 8241+{
8242+ struct test_empty_arg *arg = __arg;
8243+ char *name = (void *)__name;
8244+
8245+ arg->err = 0;
8246+ au_fset_testempty(arg->flags, CALLED);
8247+ /* smp_mb(); */
8248+ if (name[0] == '.'
8249+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
8250+ goto out; /* success */
8251+
8252+ if (namelen <= AUFS_WH_PFX_LEN
8253+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
8254+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 8255+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 8256+ arg->err = -ENOTEMPTY;
8257+ goto out;
8258+ }
8259+
8260+ name += AUFS_WH_PFX_LEN;
8261+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 8262+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 8263+ arg->err = au_nhash_append_wh
1308ab2a 8264+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 8265+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 8266+
4f0767ce 8267+out:
1facf9fc 8268+ /* smp_mb(); */
8269+ AuTraceErr(arg->err);
8270+ return arg->err;
8271+}
8272+
8273+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
8274+{
8275+ int err;
8276+ struct file *h_file;
8277+
8278+ h_file = au_h_open(dentry, arg->bindex,
8279+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
8280+ /*file*/NULL);
8281+ err = PTR_ERR(h_file);
8282+ if (IS_ERR(h_file))
8283+ goto out;
8284+
8285+ err = 0;
8286+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
8287+ && !h_file->f_dentry->d_inode->i_nlink)
8288+ goto out_put;
8289+
8290+ do {
8291+ arg->err = 0;
8292+ au_fclr_testempty(arg->flags, CALLED);
8293+ /* smp_mb(); */
8294+ err = vfsub_readdir(h_file, test_empty_cb, arg);
8295+ if (err >= 0)
8296+ err = arg->err;
8297+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
8298+
4f0767ce 8299+out_put:
1facf9fc 8300+ fput(h_file);
8301+ au_sbr_put(dentry->d_sb, arg->bindex);
4f0767ce 8302+out:
1facf9fc 8303+ return err;
8304+}
8305+
8306+struct do_test_empty_args {
8307+ int *errp;
8308+ struct dentry *dentry;
8309+ struct test_empty_arg *arg;
8310+};
8311+
8312+static void call_do_test_empty(void *args)
8313+{
8314+ struct do_test_empty_args *a = args;
8315+ *a->errp = do_test_empty(a->dentry, a->arg);
8316+}
8317+
8318+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
8319+{
8320+ int err, wkq_err;
8321+ struct dentry *h_dentry;
8322+ struct inode *h_inode;
8323+
8324+ h_dentry = au_h_dptr(dentry, arg->bindex);
8325+ h_inode = h_dentry->d_inode;
53392da6 8326+ /* todo: i_mode changes anytime? */
1facf9fc 8327+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
8328+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
8329+ mutex_unlock(&h_inode->i_mutex);
8330+ if (!err)
8331+ err = do_test_empty(dentry, arg);
8332+ else {
8333+ struct do_test_empty_args args = {
8334+ .errp = &err,
8335+ .dentry = dentry,
8336+ .arg = arg
8337+ };
8338+ unsigned int flags = arg->flags;
8339+
8340+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
8341+ if (unlikely(wkq_err))
8342+ err = wkq_err;
8343+ arg->flags = flags;
8344+ }
8345+
8346+ return err;
8347+}
8348+
8349+int au_test_empty_lower(struct dentry *dentry)
8350+{
8351+ int err;
1308ab2a 8352+ unsigned int rdhash;
1facf9fc 8353+ aufs_bindex_t bindex, bstart, btail;
1308ab2a 8354+ struct au_nhash whlist;
1facf9fc 8355+ struct test_empty_arg arg;
1facf9fc 8356+
dece6358
AM
8357+ SiMustAnyLock(dentry->d_sb);
8358+
1308ab2a 8359+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
8360+ if (!rdhash)
8361+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
8362+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 8363+ if (unlikely(err))
1facf9fc 8364+ goto out;
8365+
1facf9fc 8366+ arg.flags = 0;
1308ab2a 8367+ arg.whlist = &whlist;
8368+ bstart = au_dbstart(dentry);
dece6358
AM
8369+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
8370+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 8371+ arg.bindex = bstart;
8372+ err = do_test_empty(dentry, &arg);
8373+ if (unlikely(err))
8374+ goto out_whlist;
8375+
8376+ au_fset_testempty(arg.flags, WHONLY);
8377+ btail = au_dbtaildir(dentry);
8378+ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
8379+ struct dentry *h_dentry;
8380+
8381+ h_dentry = au_h_dptr(dentry, bindex);
8382+ if (h_dentry && h_dentry->d_inode) {
8383+ arg.bindex = bindex;
8384+ err = do_test_empty(dentry, &arg);
8385+ }
8386+ }
8387+
4f0767ce 8388+out_whlist:
1308ab2a 8389+ au_nhash_wh_free(&whlist);
4f0767ce 8390+out:
1facf9fc 8391+ return err;
8392+}
8393+
8394+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
8395+{
8396+ int err;
8397+ struct test_empty_arg arg;
8398+ aufs_bindex_t bindex, btail;
8399+
8400+ err = 0;
1308ab2a 8401+ arg.whlist = whlist;
1facf9fc 8402+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
8403+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
8404+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 8405+ btail = au_dbtaildir(dentry);
8406+ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
8407+ struct dentry *h_dentry;
8408+
8409+ h_dentry = au_h_dptr(dentry, bindex);
8410+ if (h_dentry && h_dentry->d_inode) {
8411+ arg.bindex = bindex;
8412+ err = sio_test_empty(dentry, &arg);
8413+ }
8414+ }
8415+
8416+ return err;
8417+}
8418+
8419+/* ---------------------------------------------------------------------- */
8420+
8421+const struct file_operations aufs_dir_fop = {
4a4d8108 8422+ .owner = THIS_MODULE,
027c5e7a 8423+ .llseek = default_llseek,
1facf9fc 8424+ .read = generic_read_dir,
8425+ .readdir = aufs_readdir,
8426+ .unlocked_ioctl = aufs_ioctl_dir,
b752ccd1
AM
8427+#ifdef CONFIG_COMPAT
8428+ .compat_ioctl = aufs_compat_ioctl_dir,
8429+#endif
1facf9fc 8430+ .open = aufs_open_dir,
8431+ .release = aufs_release_dir,
4a4d8108 8432+ .flush = aufs_flush_dir,
1facf9fc 8433+ .fsync = aufs_fsync_dir
8434+};
7f207e10
AM
8435diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
8436--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
53392da6 8437+++ linux/fs/aufs/dir.h 2011-08-24 13:30:24.731313534 +0200
b752ccd1 8438@@ -0,0 +1,138 @@
1facf9fc 8439+/*
027c5e7a 8440+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 8441+ *
8442+ * This program, aufs is free software; you can redistribute it and/or modify
8443+ * it under the terms of the GNU General Public License as published by
8444+ * the Free Software Foundation; either version 2 of the License, or
8445+ * (at your option) any later version.
dece6358
AM
8446+ *
8447+ * This program is distributed in the hope that it will be useful,
8448+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8449+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8450+ * GNU General Public License for more details.
8451+ *
8452+ * You should have received a copy of the GNU General Public License
8453+ * along with this program; if not, write to the Free Software
8454+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 8455+ */
8456+
8457+/*
8458+ * directory operations
8459+ */
8460+
8461+#ifndef __AUFS_DIR_H__
8462+#define __AUFS_DIR_H__
8463+
8464+#ifdef __KERNEL__
8465+
8466+#include <linux/fs.h>
8467+#include <linux/aufs_type.h>
8468+
8469+/* ---------------------------------------------------------------------- */
8470+
8471+/* need to be faster and smaller */
8472+
8473+struct au_nhash {
dece6358
AM
8474+ unsigned int nh_num;
8475+ struct hlist_head *nh_head;
1facf9fc 8476+};
8477+
8478+struct au_vdir_destr {
8479+ unsigned char len;
8480+ unsigned char name[0];
8481+} __packed;
8482+
8483+struct au_vdir_dehstr {
8484+ struct hlist_node hash;
8485+ struct au_vdir_destr *str;
4a4d8108 8486+} ____cacheline_aligned_in_smp;
1facf9fc 8487+
8488+struct au_vdir_de {
8489+ ino_t de_ino;
8490+ unsigned char de_type;
8491+ /* caution: packed */
8492+ struct au_vdir_destr de_str;
8493+} __packed;
8494+
8495+struct au_vdir_wh {
8496+ struct hlist_node wh_hash;
dece6358
AM
8497+#ifdef CONFIG_AUFS_SHWH
8498+ ino_t wh_ino;
1facf9fc 8499+ aufs_bindex_t wh_bindex;
dece6358
AM
8500+ unsigned char wh_type;
8501+#else
8502+ aufs_bindex_t wh_bindex;
8503+#endif
8504+ /* caution: packed */
1facf9fc 8505+ struct au_vdir_destr wh_str;
8506+} __packed;
8507+
8508+union au_vdir_deblk_p {
8509+ unsigned char *deblk;
8510+ struct au_vdir_de *de;
8511+};
8512+
8513+struct au_vdir {
8514+ unsigned char **vd_deblk;
8515+ unsigned long vd_nblk;
1facf9fc 8516+ struct {
8517+ unsigned long ul;
8518+ union au_vdir_deblk_p p;
8519+ } vd_last;
8520+
8521+ unsigned long vd_version;
dece6358 8522+ unsigned int vd_deblk_sz;
1facf9fc 8523+ unsigned long vd_jiffy;
4a4d8108 8524+} ____cacheline_aligned_in_smp;
1facf9fc 8525+
8526+/* ---------------------------------------------------------------------- */
8527+
8528+/* dir.c */
8529+extern const struct file_operations aufs_dir_fop;
8530+void au_add_nlink(struct inode *dir, struct inode *h_dir);
8531+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 8532+loff_t au_dir_size(struct file *file, struct dentry *dentry);
1facf9fc 8533+int au_test_empty_lower(struct dentry *dentry);
8534+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
8535+
8536+/* vdir.c */
1308ab2a 8537+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
8538+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
8539+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 8540+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
8541+ int limit);
dece6358
AM
8542+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
8543+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
8544+ unsigned int d_type, aufs_bindex_t bindex,
8545+ unsigned char shwh);
1facf9fc 8546+void au_vdir_free(struct au_vdir *vdir);
8547+int au_vdir_init(struct file *file);
8548+int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir);
8549+
8550+/* ioctl.c */
8551+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
8552+
1308ab2a 8553+#ifdef CONFIG_AUFS_RDU
8554+/* rdu.c */
8555+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
8556+#ifdef CONFIG_COMPAT
8557+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
8558+ unsigned long arg);
8559+#endif
1308ab2a 8560+#else
8561+static inline long au_rdu_ioctl(struct file *file, unsigned int cmd,
8562+ unsigned long arg)
8563+{
8564+ return -EINVAL;
8565+}
b752ccd1
AM
8566+#ifdef CONFIG_COMPAT
8567+static inline long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
8568+ unsigned long arg)
8569+{
8570+ return -EINVAL;
8571+}
8572+#endif
1308ab2a 8573+#endif
8574+
1facf9fc 8575+#endif /* __KERNEL__ */
8576+#endif /* __AUFS_DIR_H__ */
7f207e10
AM
8577diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
8578--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
53392da6 8579+++ linux/fs/aufs/dynop.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 8580@@ -0,0 +1,377 @@
1facf9fc 8581+/*
027c5e7a 8582+ * Copyright (C) 2010-2011 Junjiro R. Okajima
1facf9fc 8583+ *
8584+ * This program, aufs is free software; you can redistribute it and/or modify
8585+ * it under the terms of the GNU General Public License as published by
8586+ * the Free Software Foundation; either version 2 of the License, or
8587+ * (at your option) any later version.
dece6358
AM
8588+ *
8589+ * This program is distributed in the hope that it will be useful,
8590+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8591+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8592+ * GNU General Public License for more details.
8593+ *
8594+ * You should have received a copy of the GNU General Public License
8595+ * along with this program; if not, write to the Free Software
8596+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 8597+ */
8598+
8599+/*
4a4d8108 8600+ * dynamically customizable operations for regular files
1facf9fc 8601+ */
8602+
1facf9fc 8603+#include "aufs.h"
8604+
4a4d8108 8605+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
1facf9fc 8606+
4a4d8108
AM
8607+/*
8608+ * How large will these lists be?
8609+ * Usually just a few elements, 20-30 at most for each, I guess.
8610+ */
8611+static struct au_splhead dynop[AuDyLast];
8612+
8613+static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op)
1facf9fc 8614+{
4a4d8108
AM
8615+ struct au_dykey *key, *tmp;
8616+ struct list_head *head;
1facf9fc 8617+
4a4d8108
AM
8618+ key = NULL;
8619+ head = &spl->head;
8620+ rcu_read_lock();
8621+ list_for_each_entry_rcu(tmp, head, dk_list)
8622+ if (tmp->dk_op.dy_hop == h_op) {
8623+ key = tmp;
8624+ kref_get(&key->dk_kref);
8625+ break;
8626+ }
8627+ rcu_read_unlock();
8628+
8629+ return key;
1facf9fc 8630+}
8631+
4a4d8108 8632+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
1facf9fc 8633+{
4a4d8108
AM
8634+ struct au_dykey **k, *found;
8635+ const void *h_op = key->dk_op.dy_hop;
8636+ int i;
1facf9fc 8637+
4a4d8108
AM
8638+ found = NULL;
8639+ k = br->br_dykey;
8640+ for (i = 0; i < AuBrDynOp; i++)
8641+ if (k[i]) {
8642+ if (k[i]->dk_op.dy_hop == h_op) {
8643+ found = k[i];
8644+ break;
8645+ }
8646+ } else
8647+ break;
8648+ if (!found) {
8649+ spin_lock(&br->br_dykey_lock);
8650+ for (; i < AuBrDynOp; i++)
8651+ if (k[i]) {
8652+ if (k[i]->dk_op.dy_hop == h_op) {
8653+ found = k[i];
8654+ break;
8655+ }
8656+ } else {
8657+ k[i] = key;
8658+ break;
8659+ }
8660+ spin_unlock(&br->br_dykey_lock);
8661+ BUG_ON(i == AuBrDynOp); /* expand the array */
8662+ }
8663+
8664+ return found;
1facf9fc 8665+}
8666+
4a4d8108
AM
8667+/* kref_get() if @key is already added */
8668+static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key)
8669+{
8670+ struct au_dykey *tmp, *found;
8671+ struct list_head *head;
8672+ const void *h_op = key->dk_op.dy_hop;
1facf9fc 8673+
4a4d8108
AM
8674+ found = NULL;
8675+ head = &spl->head;
8676+ spin_lock(&spl->spin);
8677+ list_for_each_entry(tmp, head, dk_list)
8678+ if (tmp->dk_op.dy_hop == h_op) {
8679+ kref_get(&tmp->dk_kref);
8680+ found = tmp;
8681+ break;
8682+ }
8683+ if (!found)
8684+ list_add_rcu(&key->dk_list, head);
8685+ spin_unlock(&spl->spin);
1facf9fc 8686+
4a4d8108
AM
8687+ if (!found)
8688+ DyPrSym(key);
8689+ return found;
8690+}
8691+
8692+static void dy_free_rcu(struct rcu_head *rcu)
1facf9fc 8693+{
4a4d8108
AM
8694+ struct au_dykey *key;
8695+
8696+ key = container_of(rcu, struct au_dykey, dk_rcu);
8697+ DyPrSym(key);
8698+ kfree(key);
1facf9fc 8699+}
8700+
4a4d8108
AM
8701+static void dy_free(struct kref *kref)
8702+{
8703+ struct au_dykey *key;
8704+ struct au_splhead *spl;
1facf9fc 8705+
4a4d8108
AM
8706+ key = container_of(kref, struct au_dykey, dk_kref);
8707+ spl = dynop + key->dk_op.dy_type;
8708+ au_spl_del_rcu(&key->dk_list, spl);
8709+ call_rcu(&key->dk_rcu, dy_free_rcu);
8710+}
8711+
8712+void au_dy_put(struct au_dykey *key)
1facf9fc 8713+{
4a4d8108
AM
8714+ kref_put(&key->dk_kref, dy_free);
8715+}
1facf9fc 8716+
4a4d8108
AM
8717+/* ---------------------------------------------------------------------- */
8718+
8719+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
8720+
8721+#ifdef CONFIG_AUFS_DEBUG
8722+#define DyDbgDeclare(cnt) unsigned int cnt = 0
4f0767ce 8723+#define DyDbgInc(cnt) do { cnt++; } while (0)
4a4d8108
AM
8724+#else
8725+#define DyDbgDeclare(cnt) do {} while (0)
8726+#define DyDbgInc(cnt) do {} while (0)
8727+#endif
8728+
8729+#define DySet(func, dst, src, h_op, h_sb) do { \
8730+ DyDbgInc(cnt); \
8731+ if (h_op->func) { \
8732+ if (src.func) \
8733+ dst.func = src.func; \
8734+ else \
8735+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
8736+ } \
8737+} while (0)
8738+
8739+#define DySetForce(func, dst, src) do { \
8740+ AuDebugOn(!src.func); \
8741+ DyDbgInc(cnt); \
8742+ dst.func = src.func; \
8743+} while (0)
8744+
8745+#define DySetAop(func) \
8746+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
8747+#define DySetAopForce(func) \
8748+ DySetForce(func, dyaop->da_op, aufs_aop)
8749+
8750+static void dy_aop(struct au_dykey *key, const void *h_op,
8751+ struct super_block *h_sb __maybe_unused)
8752+{
8753+ struct au_dyaop *dyaop = (void *)key;
8754+ const struct address_space_operations *h_aop = h_op;
8755+ DyDbgDeclare(cnt);
8756+
8757+ AuDbg("%s\n", au_sbtype(h_sb));
8758+
8759+ DySetAop(writepage);
8760+ DySetAopForce(readpage); /* force */
4a4d8108
AM
8761+ DySetAop(writepages);
8762+ DySetAop(set_page_dirty);
8763+ DySetAop(readpages);
8764+ DySetAop(write_begin);
8765+ DySetAop(write_end);
8766+ DySetAop(bmap);
8767+ DySetAop(invalidatepage);
8768+ DySetAop(releasepage);
027c5e7a 8769+ DySetAop(freepage);
4a4d8108
AM
8770+ /* these two will be changed according to an aufs mount option */
8771+ DySetAop(direct_IO);
8772+ DySetAop(get_xip_mem);
8773+ DySetAop(migratepage);
8774+ DySetAop(launder_page);
8775+ DySetAop(is_partially_uptodate);
8776+ DySetAop(error_remove_page);
8777+
8778+ DyDbgSize(cnt, *h_aop);
8779+ dyaop->da_get_xip_mem = h_aop->get_xip_mem;
8780+}
8781+
4a4d8108
AM
8782+/* ---------------------------------------------------------------------- */
8783+
8784+static void dy_bug(struct kref *kref)
8785+{
8786+ BUG();
8787+}
8788+
8789+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
8790+{
8791+ struct au_dykey *key, *old;
8792+ struct au_splhead *spl;
b752ccd1 8793+ struct op {
4a4d8108 8794+ unsigned int sz;
b752ccd1
AM
8795+ void (*set)(struct au_dykey *key, const void *h_op,
8796+ struct super_block *h_sb __maybe_unused);
8797+ };
8798+ static const struct op a[] = {
4a4d8108
AM
8799+ [AuDy_AOP] = {
8800+ .sz = sizeof(struct au_dyaop),
b752ccd1 8801+ .set = dy_aop
4a4d8108 8802+ }
b752ccd1
AM
8803+ };
8804+ const struct op *p;
4a4d8108
AM
8805+
8806+ spl = dynop + op->dy_type;
8807+ key = dy_gfind_get(spl, op->dy_hop);
8808+ if (key)
8809+ goto out_add; /* success */
8810+
8811+ p = a + op->dy_type;
8812+ key = kzalloc(p->sz, GFP_NOFS);
8813+ if (unlikely(!key)) {
8814+ key = ERR_PTR(-ENOMEM);
8815+ goto out;
8816+ }
8817+
8818+ key->dk_op.dy_hop = op->dy_hop;
8819+ kref_init(&key->dk_kref);
b752ccd1 8820+ p->set(key, op->dy_hop, br->br_mnt->mnt_sb);
4a4d8108
AM
8821+ old = dy_gadd(spl, key);
8822+ if (old) {
8823+ kfree(key);
8824+ key = old;
8825+ }
8826+
8827+out_add:
8828+ old = dy_bradd(br, key);
8829+ if (old)
8830+ /* its ref-count should never be zero here */
8831+ kref_put(&key->dk_kref, dy_bug);
8832+out:
8833+ return key;
8834+}
8835+
8836+/* ---------------------------------------------------------------------- */
8837+/*
8838+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
8839+ * This behaviour is neccessary to return an error from open(O_DIRECT) instead
8840+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
8841+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
8842+ * See the aufs manual in detail.
8843+ *
8844+ * To keep this behaviour, aufs has to set NULL to ->get_xip_mem too, and the
8845+ * performance of fadvise() and madvise() may be affected.
8846+ */
8847+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
8848+{
8849+ if (!do_dx) {
8850+ dyaop->da_op.direct_IO = NULL;
8851+ dyaop->da_op.get_xip_mem = NULL;
8852+ } else {
8853+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
8854+ dyaop->da_op.get_xip_mem = aufs_aop.get_xip_mem;
8855+ if (!dyaop->da_get_xip_mem)
8856+ dyaop->da_op.get_xip_mem = NULL;
8857+ }
8858+}
8859+
8860+static struct au_dyaop *dy_aget(struct au_branch *br,
8861+ const struct address_space_operations *h_aop,
8862+ int do_dx)
8863+{
8864+ struct au_dyaop *dyaop;
8865+ struct au_dynop op;
8866+
8867+ op.dy_type = AuDy_AOP;
8868+ op.dy_haop = h_aop;
8869+ dyaop = (void *)dy_get(&op, br);
8870+ if (IS_ERR(dyaop))
8871+ goto out;
8872+ dy_adx(dyaop, do_dx);
8873+
8874+out:
8875+ return dyaop;
8876+}
8877+
8878+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
8879+ struct inode *h_inode)
8880+{
8881+ int err, do_dx;
8882+ struct super_block *sb;
8883+ struct au_branch *br;
8884+ struct au_dyaop *dyaop;
8885+
8886+ AuDebugOn(!S_ISREG(h_inode->i_mode));
8887+ IiMustWriteLock(inode);
8888+
8889+ sb = inode->i_sb;
8890+ br = au_sbr(sb, bindex);
8891+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
8892+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
8893+ err = PTR_ERR(dyaop);
8894+ if (IS_ERR(dyaop))
8895+ /* unnecessary to call dy_fput() */
8896+ goto out;
8897+
8898+ err = 0;
8899+ inode->i_mapping->a_ops = &dyaop->da_op;
8900+
8901+out:
8902+ return err;
8903+}
8904+
b752ccd1
AM
8905+/*
8906+ * Is it safe to replace a_ops during the inode/file is in operation?
8907+ * Yes, I hope so.
8908+ */
8909+int au_dy_irefresh(struct inode *inode)
8910+{
8911+ int err;
8912+ aufs_bindex_t bstart;
8913+ struct inode *h_inode;
8914+
8915+ err = 0;
8916+ if (S_ISREG(inode->i_mode)) {
8917+ bstart = au_ibstart(inode);
8918+ h_inode = au_h_iptr(inode, bstart);
8919+ err = au_dy_iaop(inode, bstart, h_inode);
8920+ }
8921+ return err;
8922+}
8923+
4a4d8108
AM
8924+void au_dy_arefresh(int do_dx)
8925+{
8926+ struct au_splhead *spl;
8927+ struct list_head *head;
8928+ struct au_dykey *key;
8929+
8930+ spl = dynop + AuDy_AOP;
8931+ head = &spl->head;
8932+ spin_lock(&spl->spin);
8933+ list_for_each_entry(key, head, dk_list)
8934+ dy_adx((void *)key, do_dx);
8935+ spin_unlock(&spl->spin);
8936+}
8937+
4a4d8108
AM
8938+/* ---------------------------------------------------------------------- */
8939+
8940+void __init au_dy_init(void)
8941+{
8942+ int i;
8943+
8944+ /* make sure that 'struct au_dykey *' can be any type */
8945+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
4a4d8108
AM
8946+
8947+ for (i = 0; i < AuDyLast; i++)
8948+ au_spl_init(dynop + i);
8949+}
8950+
8951+void au_dy_fin(void)
8952+{
8953+ int i;
8954+
8955+ for (i = 0; i < AuDyLast; i++)
8956+ WARN_ON(!list_empty(&dynop[i].head));
8957+}
7f207e10
AM
8958diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
8959--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
53392da6 8960+++ linux/fs/aufs/dynop.h 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 8961@@ -0,0 +1,80 @@
4a4d8108 8962+/*
027c5e7a 8963+ * Copyright (C) 2010-2011 Junjiro R. Okajima
4a4d8108
AM
8964+ *
8965+ * This program, aufs is free software; you can redistribute it and/or modify
8966+ * it under the terms of the GNU General Public License as published by
8967+ * the Free Software Foundation; either version 2 of the License, or
8968+ * (at your option) any later version.
8969+ *
8970+ * This program is distributed in the hope that it will be useful,
8971+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8972+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8973+ * GNU General Public License for more details.
8974+ *
8975+ * You should have received a copy of the GNU General Public License
8976+ * along with this program; if not, write to the Free Software
8977+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
8978+ */
8979+
8980+/*
8981+ * dynamically customizable operations (for regular files only)
8982+ */
8983+
8984+#ifndef __AUFS_DYNOP_H__
8985+#define __AUFS_DYNOP_H__
8986+
8987+#ifdef __KERNEL__
8988+
8989+#include <linux/fs.h>
8990+#include <linux/mm.h>
8991+#include <linux/rcupdate.h>
8992+#include <linux/aufs_type.h>
8993+#include "inode.h"
8994+
2cbb1c4b 8995+enum {AuDy_AOP, AuDyLast};
4a4d8108
AM
8996+
8997+struct au_dynop {
8998+ int dy_type;
8999+ union {
9000+ const void *dy_hop;
9001+ const struct address_space_operations *dy_haop;
4a4d8108
AM
9002+ };
9003+};
9004+
9005+struct au_dykey {
9006+ union {
9007+ struct list_head dk_list;
9008+ struct rcu_head dk_rcu;
9009+ };
9010+ struct au_dynop dk_op;
9011+
9012+ /*
9013+ * during I am in the branch local array, kref is gotten. when the
9014+ * branch is removed, kref is put.
9015+ */
9016+ struct kref dk_kref;
9017+};
9018+
9019+/* stop unioning since their sizes are very different from each other */
9020+struct au_dyaop {
9021+ struct au_dykey da_key;
9022+ struct address_space_operations da_op; /* not const */
9023+ int (*da_get_xip_mem)(struct address_space *, pgoff_t, int,
9024+ void **, unsigned long *);
9025+};
9026+
4a4d8108
AM
9027+/* ---------------------------------------------------------------------- */
9028+
9029+/* dynop.c */
9030+struct au_branch;
9031+void au_dy_put(struct au_dykey *key);
9032+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
9033+ struct inode *h_inode);
b752ccd1 9034+int au_dy_irefresh(struct inode *inode);
4a4d8108 9035+void au_dy_arefresh(int do_dio);
4a4d8108
AM
9036+
9037+void __init au_dy_init(void);
9038+void au_dy_fin(void);
9039+
4a4d8108
AM
9040+#endif /* __KERNEL__ */
9041+#endif /* __AUFS_DYNOP_H__ */
7f207e10
AM
9042diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
9043--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
53392da6 9044+++ linux/fs/aufs/export.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 9045@@ -0,0 +1,805 @@
4a4d8108 9046+/*
027c5e7a 9047+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
9048+ *
9049+ * This program, aufs is free software; you can redistribute it and/or modify
9050+ * it under the terms of the GNU General Public License as published by
9051+ * the Free Software Foundation; either version 2 of the License, or
9052+ * (at your option) any later version.
9053+ *
9054+ * This program is distributed in the hope that it will be useful,
9055+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9056+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9057+ * GNU General Public License for more details.
9058+ *
9059+ * You should have received a copy of the GNU General Public License
9060+ * along with this program; if not, write to the Free Software
9061+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
9062+ */
9063+
9064+/*
9065+ * export via nfs
9066+ */
9067+
9068+#include <linux/exportfs.h>
9069+#include <linux/file.h>
9070+#include <linux/mnt_namespace.h>
9071+#include <linux/namei.h>
9072+#include <linux/nsproxy.h>
9073+#include <linux/random.h>
9074+#include <linux/writeback.h>
9075+#include "aufs.h"
9076+
9077+union conv {
9078+#ifdef CONFIG_AUFS_INO_T_64
9079+ __u32 a[2];
9080+#else
9081+ __u32 a[1];
9082+#endif
9083+ ino_t ino;
9084+};
9085+
9086+static ino_t decode_ino(__u32 *a)
9087+{
9088+ union conv u;
9089+
9090+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
9091+ u.a[0] = a[0];
9092+#ifdef CONFIG_AUFS_INO_T_64
9093+ u.a[1] = a[1];
9094+#endif
9095+ return u.ino;
9096+}
9097+
9098+static void encode_ino(__u32 *a, ino_t ino)
9099+{
9100+ union conv u;
9101+
9102+ u.ino = ino;
9103+ a[0] = u.a[0];
9104+#ifdef CONFIG_AUFS_INO_T_64
9105+ a[1] = u.a[1];
9106+#endif
9107+}
9108+
9109+/* NFS file handle */
9110+enum {
9111+ Fh_br_id,
9112+ Fh_sigen,
9113+#ifdef CONFIG_AUFS_INO_T_64
9114+ /* support 64bit inode number */
9115+ Fh_ino1,
9116+ Fh_ino2,
9117+ Fh_dir_ino1,
9118+ Fh_dir_ino2,
9119+#else
9120+ Fh_ino1,
9121+ Fh_dir_ino1,
9122+#endif
9123+ Fh_igen,
9124+ Fh_h_type,
9125+ Fh_tail,
9126+
9127+ Fh_ino = Fh_ino1,
9128+ Fh_dir_ino = Fh_dir_ino1
9129+};
9130+
9131+static int au_test_anon(struct dentry *dentry)
9132+{
027c5e7a 9133+ /* note: read d_flags without d_lock */
4a4d8108
AM
9134+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
9135+}
9136+
9137+/* ---------------------------------------------------------------------- */
9138+/* inode generation external table */
9139+
b752ccd1 9140+void au_xigen_inc(struct inode *inode)
4a4d8108 9141+{
4a4d8108
AM
9142+ loff_t pos;
9143+ ssize_t sz;
9144+ __u32 igen;
9145+ struct super_block *sb;
9146+ struct au_sbinfo *sbinfo;
9147+
4a4d8108 9148+ sb = inode->i_sb;
b752ccd1 9149+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1facf9fc 9150+
b752ccd1 9151+ sbinfo = au_sbi(sb);
1facf9fc 9152+ pos = inode->i_ino;
9153+ pos *= sizeof(igen);
9154+ igen = inode->i_generation + 1;
1facf9fc 9155+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
9156+ sizeof(igen), &pos);
9157+ if (sz == sizeof(igen))
b752ccd1 9158+ return; /* success */
1facf9fc 9159+
b752ccd1 9160+ if (unlikely(sz >= 0))
1facf9fc 9161+ AuIOErr("xigen error (%zd)\n", sz);
1facf9fc 9162+}
9163+
9164+int au_xigen_new(struct inode *inode)
9165+{
9166+ int err;
9167+ loff_t pos;
9168+ ssize_t sz;
9169+ struct super_block *sb;
9170+ struct au_sbinfo *sbinfo;
9171+ struct file *file;
9172+
9173+ err = 0;
9174+ /* todo: dirty, at mount time */
9175+ if (inode->i_ino == AUFS_ROOT_INO)
9176+ goto out;
9177+ sb = inode->i_sb;
dece6358 9178+ SiMustAnyLock(sb);
1facf9fc 9179+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
9180+ goto out;
9181+
9182+ err = -EFBIG;
9183+ pos = inode->i_ino;
9184+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
9185+ AuIOErr1("too large i%lld\n", pos);
9186+ goto out;
9187+ }
9188+ pos *= sizeof(inode->i_generation);
9189+
9190+ err = 0;
9191+ sbinfo = au_sbi(sb);
9192+ file = sbinfo->si_xigen;
9193+ BUG_ON(!file);
9194+
9195+ if (i_size_read(file->f_dentry->d_inode)
9196+ < pos + sizeof(inode->i_generation)) {
9197+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
9198+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
9199+ sizeof(inode->i_generation), &pos);
9200+ } else
9201+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
9202+ sizeof(inode->i_generation), &pos);
9203+ if (sz == sizeof(inode->i_generation))
9204+ goto out; /* success */
9205+
9206+ err = sz;
9207+ if (unlikely(sz >= 0)) {
9208+ err = -EIO;
9209+ AuIOErr("xigen error (%zd)\n", sz);
9210+ }
9211+
4f0767ce 9212+out:
1facf9fc 9213+ return err;
9214+}
9215+
9216+int au_xigen_set(struct super_block *sb, struct file *base)
9217+{
9218+ int err;
9219+ struct au_sbinfo *sbinfo;
9220+ struct file *file;
9221+
dece6358
AM
9222+ SiMustWriteLock(sb);
9223+
1facf9fc 9224+ sbinfo = au_sbi(sb);
9225+ file = au_xino_create2(base, sbinfo->si_xigen);
9226+ err = PTR_ERR(file);
9227+ if (IS_ERR(file))
9228+ goto out;
9229+ err = 0;
9230+ if (sbinfo->si_xigen)
9231+ fput(sbinfo->si_xigen);
9232+ sbinfo->si_xigen = file;
9233+
4f0767ce 9234+out:
1facf9fc 9235+ return err;
9236+}
9237+
9238+void au_xigen_clr(struct super_block *sb)
9239+{
9240+ struct au_sbinfo *sbinfo;
9241+
dece6358
AM
9242+ SiMustWriteLock(sb);
9243+
1facf9fc 9244+ sbinfo = au_sbi(sb);
9245+ if (sbinfo->si_xigen) {
9246+ fput(sbinfo->si_xigen);
9247+ sbinfo->si_xigen = NULL;
9248+ }
9249+}
9250+
9251+/* ---------------------------------------------------------------------- */
9252+
9253+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
9254+ ino_t dir_ino)
9255+{
9256+ struct dentry *dentry, *d;
9257+ struct inode *inode;
9258+ unsigned int sigen;
9259+
9260+ dentry = NULL;
9261+ inode = ilookup(sb, ino);
9262+ if (!inode)
9263+ goto out;
9264+
9265+ dentry = ERR_PTR(-ESTALE);
9266+ sigen = au_sigen(sb);
9267+ if (unlikely(is_bad_inode(inode)
9268+ || IS_DEADDIR(inode)
9269+ || sigen != au_iigen(inode)))
9270+ goto out_iput;
9271+
9272+ dentry = NULL;
9273+ if (!dir_ino || S_ISDIR(inode->i_mode))
9274+ dentry = d_find_alias(inode);
9275+ else {
027c5e7a
AM
9276+ spin_lock(&inode->i_lock);
9277+ list_for_each_entry(d, &inode->i_dentry, d_alias) {
9278+ spin_lock(&d->d_lock);
1facf9fc 9279+ if (!au_test_anon(d)
9280+ && d->d_parent->d_inode->i_ino == dir_ino) {
027c5e7a
AM
9281+ dentry = dget_dlock(d);
9282+ spin_unlock(&d->d_lock);
1facf9fc 9283+ break;
9284+ }
027c5e7a
AM
9285+ spin_unlock(&d->d_lock);
9286+ }
9287+ spin_unlock(&inode->i_lock);
1facf9fc 9288+ }
027c5e7a 9289+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
2cbb1c4b 9290+ /* need to refresh */
1facf9fc 9291+ dput(dentry);
2cbb1c4b 9292+ dentry = NULL;
1facf9fc 9293+ }
9294+
4f0767ce 9295+out_iput:
1facf9fc 9296+ iput(inode);
4f0767ce 9297+out:
2cbb1c4b 9298+ AuTraceErrPtr(dentry);
1facf9fc 9299+ return dentry;
9300+}
9301+
9302+/* ---------------------------------------------------------------------- */
9303+
9304+/* todo: dirty? */
9305+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
4a4d8108
AM
9306+
9307+struct au_compare_mnt_args {
9308+ /* input */
9309+ struct super_block *sb;
9310+
9311+ /* output */
9312+ struct vfsmount *mnt;
9313+};
9314+
9315+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
9316+{
9317+ struct au_compare_mnt_args *a = arg;
9318+
9319+ if (mnt->mnt_sb != a->sb)
9320+ return 0;
9321+ a->mnt = mntget(mnt);
9322+ return 1;
9323+}
9324+
1facf9fc 9325+static struct vfsmount *au_mnt_get(struct super_block *sb)
9326+{
4a4d8108
AM
9327+ int err;
9328+ struct au_compare_mnt_args args = {
9329+ .sb = sb
9330+ };
1facf9fc 9331+ struct mnt_namespace *ns;
1facf9fc 9332+
0c5527e5 9333+ br_read_lock(vfsmount_lock);
1facf9fc 9334+ /* no get/put ?? */
9335+ AuDebugOn(!current->nsproxy);
9336+ ns = current->nsproxy->mnt_ns;
9337+ AuDebugOn(!ns);
4a4d8108 9338+ err = iterate_mounts(au_compare_mnt, &args, ns->root);
0c5527e5 9339+ br_read_unlock(vfsmount_lock);
4a4d8108
AM
9340+ AuDebugOn(!err);
9341+ AuDebugOn(!args.mnt);
9342+ return args.mnt;
1facf9fc 9343+}
9344+
9345+struct au_nfsd_si_lock {
4a4d8108 9346+ unsigned int sigen;
027c5e7a 9347+ aufs_bindex_t bindex, br_id;
1facf9fc 9348+ unsigned char force_lock;
9349+};
9350+
027c5e7a
AM
9351+static int si_nfsd_read_lock(struct super_block *sb,
9352+ struct au_nfsd_si_lock *nsi_lock)
1facf9fc 9353+{
027c5e7a 9354+ int err;
1facf9fc 9355+ aufs_bindex_t bindex;
9356+
9357+ si_read_lock(sb, AuLock_FLUSH);
9358+
9359+ /* branch id may be wrapped around */
027c5e7a 9360+ err = 0;
1facf9fc 9361+ bindex = au_br_index(sb, nsi_lock->br_id);
9362+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
9363+ goto out; /* success */
9364+
027c5e7a
AM
9365+ err = -ESTALE;
9366+ bindex = -1;
1facf9fc 9367+ if (!nsi_lock->force_lock)
9368+ si_read_unlock(sb);
1facf9fc 9369+
4f0767ce 9370+out:
027c5e7a
AM
9371+ nsi_lock->bindex = bindex;
9372+ return err;
1facf9fc 9373+}
9374+
9375+struct find_name_by_ino {
9376+ int called, found;
9377+ ino_t ino;
9378+ char *name;
9379+ int namelen;
9380+};
9381+
9382+static int
9383+find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset,
9384+ u64 ino, unsigned int d_type)
9385+{
9386+ struct find_name_by_ino *a = arg;
9387+
9388+ a->called++;
9389+ if (a->ino != ino)
9390+ return 0;
9391+
9392+ memcpy(a->name, name, namelen);
9393+ a->namelen = namelen;
9394+ a->found = 1;
9395+ return 1;
9396+}
9397+
9398+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
9399+ struct au_nfsd_si_lock *nsi_lock)
9400+{
9401+ struct dentry *dentry, *parent;
9402+ struct file *file;
9403+ struct inode *dir;
9404+ struct find_name_by_ino arg;
9405+ int err;
9406+
9407+ parent = path->dentry;
9408+ if (nsi_lock)
9409+ si_read_unlock(parent->d_sb);
4a4d8108 9410+ file = vfsub_dentry_open(path, au_dir_roflags);
1facf9fc 9411+ dentry = (void *)file;
9412+ if (IS_ERR(file))
9413+ goto out;
9414+
9415+ dentry = ERR_PTR(-ENOMEM);
4a4d8108 9416+ arg.name = __getname_gfp(GFP_NOFS);
1facf9fc 9417+ if (unlikely(!arg.name))
9418+ goto out_file;
9419+ arg.ino = ino;
9420+ arg.found = 0;
9421+ do {
9422+ arg.called = 0;
9423+ /* smp_mb(); */
9424+ err = vfsub_readdir(file, find_name_by_ino, &arg);
9425+ } while (!err && !arg.found && arg.called);
9426+ dentry = ERR_PTR(err);
9427+ if (unlikely(err))
9428+ goto out_name;
9429+ dentry = ERR_PTR(-ENOENT);
9430+ if (!arg.found)
9431+ goto out_name;
9432+
9433+ /* do not call au_lkup_one() */
9434+ dir = parent->d_inode;
9435+ mutex_lock(&dir->i_mutex);
9436+ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
9437+ mutex_unlock(&dir->i_mutex);
9438+ AuTraceErrPtr(dentry);
9439+ if (IS_ERR(dentry))
9440+ goto out_name;
9441+ AuDebugOn(au_test_anon(dentry));
9442+ if (unlikely(!dentry->d_inode)) {
9443+ dput(dentry);
9444+ dentry = ERR_PTR(-ENOENT);
9445+ }
9446+
4f0767ce 9447+out_name:
1facf9fc 9448+ __putname(arg.name);
4f0767ce 9449+out_file:
1facf9fc 9450+ fput(file);
4f0767ce 9451+out:
1facf9fc 9452+ if (unlikely(nsi_lock
9453+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
9454+ if (!IS_ERR(dentry)) {
9455+ dput(dentry);
9456+ dentry = ERR_PTR(-ESTALE);
9457+ }
9458+ AuTraceErrPtr(dentry);
9459+ return dentry;
9460+}
9461+
9462+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
9463+ ino_t dir_ino,
9464+ struct au_nfsd_si_lock *nsi_lock)
9465+{
9466+ struct dentry *dentry;
9467+ struct path path;
9468+
9469+ if (dir_ino != AUFS_ROOT_INO) {
9470+ path.dentry = decode_by_ino(sb, dir_ino, 0);
9471+ dentry = path.dentry;
9472+ if (!path.dentry || IS_ERR(path.dentry))
9473+ goto out;
9474+ AuDebugOn(au_test_anon(path.dentry));
9475+ } else
9476+ path.dentry = dget(sb->s_root);
9477+
9478+ path.mnt = au_mnt_get(sb);
9479+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
9480+ path_put(&path);
9481+
4f0767ce 9482+out:
1facf9fc 9483+ AuTraceErrPtr(dentry);
9484+ return dentry;
9485+}
9486+
9487+/* ---------------------------------------------------------------------- */
9488+
9489+static int h_acceptable(void *expv, struct dentry *dentry)
9490+{
9491+ return 1;
9492+}
9493+
9494+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
9495+ char *buf, int len, struct super_block *sb)
9496+{
9497+ char *p;
9498+ int n;
9499+ struct path path;
9500+
9501+ p = d_path(h_rootpath, buf, len);
9502+ if (IS_ERR(p))
9503+ goto out;
9504+ n = strlen(p);
9505+
9506+ path.mnt = h_rootpath->mnt;
9507+ path.dentry = h_parent;
9508+ p = d_path(&path, buf, len);
9509+ if (IS_ERR(p))
9510+ goto out;
9511+ if (n != 1)
9512+ p += n;
9513+
9514+ path.mnt = au_mnt_get(sb);
9515+ path.dentry = sb->s_root;
9516+ p = d_path(&path, buf, len - strlen(p));
9517+ mntput(path.mnt);
9518+ if (IS_ERR(p))
9519+ goto out;
9520+ if (n != 1)
9521+ p[strlen(p)] = '/';
9522+
4f0767ce 9523+out:
1facf9fc 9524+ AuTraceErrPtr(p);
9525+ return p;
9526+}
9527+
9528+static
027c5e7a
AM
9529+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
9530+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
1facf9fc 9531+{
9532+ struct dentry *dentry, *h_parent, *root;
9533+ struct super_block *h_sb;
9534+ char *pathname, *p;
9535+ struct vfsmount *h_mnt;
9536+ struct au_branch *br;
9537+ int err;
9538+ struct path path;
9539+
027c5e7a 9540+ br = au_sbr(sb, nsi_lock->bindex);
1facf9fc 9541+ h_mnt = br->br_mnt;
9542+ h_sb = h_mnt->mnt_sb;
9543+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
9544+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
9545+ fh_len - Fh_tail, fh[Fh_h_type],
9546+ h_acceptable, /*context*/NULL);
9547+ dentry = h_parent;
9548+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
9549+ AuWarn1("%s decode_fh failed, %ld\n",
9550+ au_sbtype(h_sb), PTR_ERR(h_parent));
9551+ goto out;
9552+ }
9553+ dentry = NULL;
9554+ if (unlikely(au_test_anon(h_parent))) {
9555+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
9556+ au_sbtype(h_sb));
9557+ goto out_h_parent;
9558+ }
9559+
9560+ dentry = ERR_PTR(-ENOMEM);
9561+ pathname = (void *)__get_free_page(GFP_NOFS);
9562+ if (unlikely(!pathname))
9563+ goto out_h_parent;
9564+
9565+ root = sb->s_root;
9566+ path.mnt = h_mnt;
9567+ di_read_lock_parent(root, !AuLock_IR);
027c5e7a 9568+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
1facf9fc 9569+ di_read_unlock(root, !AuLock_IR);
9570+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
9571+ dentry = (void *)p;
9572+ if (IS_ERR(p))
9573+ goto out_pathname;
9574+
9575+ si_read_unlock(sb);
9576+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
9577+ dentry = ERR_PTR(err);
9578+ if (unlikely(err))
9579+ goto out_relock;
9580+
9581+ dentry = ERR_PTR(-ENOENT);
9582+ AuDebugOn(au_test_anon(path.dentry));
9583+ if (unlikely(!path.dentry->d_inode))
9584+ goto out_path;
9585+
9586+ if (ino != path.dentry->d_inode->i_ino)
9587+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
9588+ else
9589+ dentry = dget(path.dentry);
9590+
4f0767ce 9591+out_path:
1facf9fc 9592+ path_put(&path);
4f0767ce 9593+out_relock:
1facf9fc 9594+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
9595+ if (!IS_ERR(dentry)) {
9596+ dput(dentry);
9597+ dentry = ERR_PTR(-ESTALE);
9598+ }
4f0767ce 9599+out_pathname:
1facf9fc 9600+ free_page((unsigned long)pathname);
4f0767ce 9601+out_h_parent:
1facf9fc 9602+ dput(h_parent);
4f0767ce 9603+out:
1facf9fc 9604+ AuTraceErrPtr(dentry);
9605+ return dentry;
9606+}
9607+
9608+/* ---------------------------------------------------------------------- */
9609+
9610+static struct dentry *
9611+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
9612+ int fh_type)
9613+{
9614+ struct dentry *dentry;
9615+ __u32 *fh = fid->raw;
027c5e7a 9616+ struct au_branch *br;
1facf9fc 9617+ ino_t ino, dir_ino;
1facf9fc 9618+ struct au_nfsd_si_lock nsi_lock = {
1facf9fc 9619+ .force_lock = 0
9620+ };
9621+
1facf9fc 9622+ dentry = ERR_PTR(-ESTALE);
4a4d8108
AM
9623+ /* it should never happen, but the file handle is unreliable */
9624+ if (unlikely(fh_len < Fh_tail))
9625+ goto out;
9626+ nsi_lock.sigen = fh[Fh_sigen];
9627+ nsi_lock.br_id = fh[Fh_br_id];
9628+
1facf9fc 9629+ /* branch id may be wrapped around */
027c5e7a
AM
9630+ br = NULL;
9631+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
1facf9fc 9632+ goto out;
9633+ nsi_lock.force_lock = 1;
9634+
9635+ /* is this inode still cached? */
9636+ ino = decode_ino(fh + Fh_ino);
4a4d8108
AM
9637+ /* it should never happen */
9638+ if (unlikely(ino == AUFS_ROOT_INO))
9639+ goto out;
9640+
1facf9fc 9641+ dir_ino = decode_ino(fh + Fh_dir_ino);
9642+ dentry = decode_by_ino(sb, ino, dir_ino);
9643+ if (IS_ERR(dentry))
9644+ goto out_unlock;
9645+ if (dentry)
9646+ goto accept;
9647+
9648+ /* is the parent dir cached? */
027c5e7a
AM
9649+ br = au_sbr(sb, nsi_lock.bindex);
9650+ atomic_inc(&br->br_count);
1facf9fc 9651+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
9652+ if (IS_ERR(dentry))
9653+ goto out_unlock;
9654+ if (dentry)
9655+ goto accept;
9656+
9657+ /* lookup path */
027c5e7a 9658+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
1facf9fc 9659+ if (IS_ERR(dentry))
9660+ goto out_unlock;
9661+ if (unlikely(!dentry))
9662+ /* todo?: make it ESTALE */
9663+ goto out_unlock;
9664+
4f0767ce 9665+accept:
027c5e7a
AM
9666+ if (!au_digen_test(dentry, au_sigen(sb))
9667+ && dentry->d_inode->i_generation == fh[Fh_igen])
1facf9fc 9668+ goto out_unlock; /* success */
9669+
9670+ dput(dentry);
9671+ dentry = ERR_PTR(-ESTALE);
4f0767ce 9672+out_unlock:
027c5e7a
AM
9673+ if (br)
9674+ atomic_dec(&br->br_count);
1facf9fc 9675+ si_read_unlock(sb);
4f0767ce 9676+out:
1facf9fc 9677+ AuTraceErrPtr(dentry);
9678+ return dentry;
9679+}
9680+
9681+#if 0 /* reserved for future use */
9682+/* support subtreecheck option */
9683+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
9684+ int fh_len, int fh_type)
9685+{
9686+ struct dentry *parent;
9687+ __u32 *fh = fid->raw;
9688+ ino_t dir_ino;
9689+
9690+ dir_ino = decode_ino(fh + Fh_dir_ino);
9691+ parent = decode_by_ino(sb, dir_ino, 0);
9692+ if (IS_ERR(parent))
9693+ goto out;
9694+ if (!parent)
9695+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
9696+ dir_ino, fh, fh_len);
9697+
4f0767ce 9698+out:
1facf9fc 9699+ AuTraceErrPtr(parent);
9700+ return parent;
9701+}
9702+#endif
9703+
9704+/* ---------------------------------------------------------------------- */
9705+
9706+static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
9707+ int connectable)
9708+{
9709+ int err;
9710+ aufs_bindex_t bindex, bend;
9711+ struct super_block *sb, *h_sb;
9712+ struct inode *inode;
9713+ struct dentry *parent, *h_parent;
9714+ struct au_branch *br;
9715+
9716+ AuDebugOn(au_test_anon(dentry));
9717+
9718+ parent = NULL;
9719+ err = -ENOSPC;
9720+ if (unlikely(*max_len <= Fh_tail)) {
9721+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
9722+ goto out;
9723+ }
9724+
9725+ err = FILEID_ROOT;
9726+ if (IS_ROOT(dentry)) {
9727+ AuDebugOn(dentry->d_inode->i_ino != AUFS_ROOT_INO);
9728+ goto out;
9729+ }
9730+
1facf9fc 9731+ h_parent = NULL;
027c5e7a
AM
9732+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_IR | AuLock_GEN);
9733+ if (unlikely(err))
9734+ goto out;
9735+
1facf9fc 9736+ inode = dentry->d_inode;
9737+ AuDebugOn(!inode);
027c5e7a 9738+ sb = dentry->d_sb;
1facf9fc 9739+#ifdef CONFIG_AUFS_DEBUG
9740+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
9741+ AuWarn1("NFS-exporting requires xino\n");
9742+#endif
027c5e7a
AM
9743+ err = -EIO;
9744+ parent = dget_parent(dentry);
9745+ di_read_lock_parent(parent, !AuLock_IR);
1facf9fc 9746+ bend = au_dbtaildir(parent);
9747+ for (bindex = au_dbstart(parent); bindex <= bend; bindex++) {
9748+ h_parent = au_h_dptr(parent, bindex);
9749+ if (h_parent) {
9750+ dget(h_parent);
9751+ break;
9752+ }
9753+ }
9754+ if (unlikely(!h_parent))
9755+ goto out_unlock;
9756+
9757+ err = -EPERM;
9758+ br = au_sbr(sb, bindex);
9759+ h_sb = br->br_mnt->mnt_sb;
9760+ if (unlikely(!h_sb->s_export_op)) {
9761+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
9762+ goto out_dput;
9763+ }
9764+
9765+ fh[Fh_br_id] = br->br_id;
9766+ fh[Fh_sigen] = au_sigen(sb);
9767+ encode_ino(fh + Fh_ino, inode->i_ino);
9768+ encode_ino(fh + Fh_dir_ino, parent->d_inode->i_ino);
9769+ fh[Fh_igen] = inode->i_generation;
9770+
9771+ *max_len -= Fh_tail;
9772+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
9773+ max_len,
9774+ /*connectable or subtreecheck*/0);
9775+ err = fh[Fh_h_type];
9776+ *max_len += Fh_tail;
9777+ /* todo: macros? */
9778+ if (err != 255)
9779+ err = 99;
9780+ else
9781+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
9782+
4f0767ce 9783+out_dput:
1facf9fc 9784+ dput(h_parent);
4f0767ce 9785+out_unlock:
1facf9fc 9786+ di_read_unlock(parent, !AuLock_IR);
9787+ dput(parent);
9788+ aufs_read_unlock(dentry, AuLock_IR);
4f0767ce 9789+out:
1facf9fc 9790+ if (unlikely(err < 0))
9791+ err = 255;
9792+ return err;
9793+}
9794+
9795+/* ---------------------------------------------------------------------- */
9796+
4a4d8108
AM
9797+static int aufs_commit_metadata(struct inode *inode)
9798+{
9799+ int err;
9800+ aufs_bindex_t bindex;
9801+ struct super_block *sb;
9802+ struct inode *h_inode;
9803+ int (*f)(struct inode *inode);
9804+
9805+ sb = inode->i_sb;
e49829fe 9806+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
9807+ ii_write_lock_child(inode);
9808+ bindex = au_ibstart(inode);
9809+ AuDebugOn(bindex < 0);
9810+ h_inode = au_h_iptr(inode, bindex);
9811+
9812+ f = h_inode->i_sb->s_export_op->commit_metadata;
9813+ if (f)
9814+ err = f(h_inode);
9815+ else {
9816+ struct writeback_control wbc = {
9817+ .sync_mode = WB_SYNC_ALL,
9818+ .nr_to_write = 0 /* metadata only */
9819+ };
9820+
9821+ err = sync_inode(h_inode, &wbc);
9822+ }
9823+
9824+ au_cpup_attr_timesizes(inode);
9825+ ii_write_unlock(inode);
9826+ si_read_unlock(sb);
9827+ return err;
9828+}
9829+
9830+/* ---------------------------------------------------------------------- */
9831+
1facf9fc 9832+static struct export_operations aufs_export_op = {
4a4d8108 9833+ .fh_to_dentry = aufs_fh_to_dentry,
1facf9fc 9834+ /* .fh_to_parent = aufs_fh_to_parent, */
4a4d8108
AM
9835+ .encode_fh = aufs_encode_fh,
9836+ .commit_metadata = aufs_commit_metadata
1facf9fc 9837+};
9838+
9839+void au_export_init(struct super_block *sb)
9840+{
9841+ struct au_sbinfo *sbinfo;
9842+ __u32 u;
9843+
9844+ sb->s_export_op = &aufs_export_op;
9845+ sbinfo = au_sbi(sb);
9846+ sbinfo->si_xigen = NULL;
9847+ get_random_bytes(&u, sizeof(u));
9848+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
9849+ atomic_set(&sbinfo->si_xigen_next, u);
9850+}
7f207e10
AM
9851diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
9852--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
53392da6 9853+++ linux/fs/aufs/file.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 9854@@ -0,0 +1,676 @@
1facf9fc 9855+/*
027c5e7a 9856+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 9857+ *
9858+ * This program, aufs is free software; you can redistribute it and/or modify
9859+ * it under the terms of the GNU General Public License as published by
9860+ * the Free Software Foundation; either version 2 of the License, or
9861+ * (at your option) any later version.
dece6358
AM
9862+ *
9863+ * This program is distributed in the hope that it will be useful,
9864+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9865+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9866+ * GNU General Public License for more details.
9867+ *
9868+ * You should have received a copy of the GNU General Public License
9869+ * along with this program; if not, write to the Free Software
9870+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 9871+ */
9872+
9873+/*
4a4d8108 9874+ * handling file/dir, and address_space operation
1facf9fc 9875+ */
9876+
dece6358 9877+#include <linux/file.h>
4a4d8108
AM
9878+#include <linux/fsnotify.h>
9879+#include <linux/namei.h>
9880+#include <linux/pagemap.h>
1facf9fc 9881+#include "aufs.h"
9882+
4a4d8108
AM
9883+/* drop flags for writing */
9884+unsigned int au_file_roflags(unsigned int flags)
9885+{
9886+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
9887+ flags |= O_RDONLY | O_NOATIME;
9888+ return flags;
9889+}
9890+
9891+/* common functions to regular file and dir */
9892+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
9893+ struct file *file)
1facf9fc 9894+{
1308ab2a 9895+ struct file *h_file;
4a4d8108
AM
9896+ struct dentry *h_dentry;
9897+ struct inode *h_inode;
9898+ struct super_block *sb;
9899+ struct au_branch *br;
9900+ struct path h_path;
9901+ int err, exec_flag;
1facf9fc 9902+
4a4d8108
AM
9903+ /* a race condition can happen between open and unlink/rmdir */
9904+ h_file = ERR_PTR(-ENOENT);
9905+ h_dentry = au_h_dptr(dentry, bindex);
b752ccd1 9906+ if (au_test_nfsd() && !h_dentry)
4a4d8108
AM
9907+ goto out;
9908+ h_inode = h_dentry->d_inode;
b752ccd1 9909+ if (au_test_nfsd() && !h_inode)
4a4d8108 9910+ goto out;
027c5e7a
AM
9911+ spin_lock(&h_dentry->d_lock);
9912+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
9913+ || !h_inode
9914+ /* || !dentry->d_inode->i_nlink */
9915+ ;
9916+ spin_unlock(&h_dentry->d_lock);
9917+ if (unlikely(err))
4a4d8108 9918+ goto out;
1facf9fc 9919+
4a4d8108
AM
9920+ sb = dentry->d_sb;
9921+ br = au_sbr(sb, bindex);
9922+ h_file = ERR_PTR(-EACCES);
2cbb1c4b 9923+ exec_flag = flags & __FMODE_EXEC;
4a4d8108 9924+ if (exec_flag && (br->br_mnt->mnt_flags & MNT_NOEXEC))
027c5e7a 9925+ goto out;
1facf9fc 9926+
4a4d8108
AM
9927+ /* drop flags for writing */
9928+ if (au_test_ro(sb, bindex, dentry->d_inode))
9929+ flags = au_file_roflags(flags);
9930+ flags &= ~O_CREAT;
9931+ atomic_inc(&br->br_count);
9932+ h_path.dentry = h_dentry;
9933+ h_path.mnt = br->br_mnt;
9934+ if (!au_special_file(h_inode->i_mode))
9935+ h_file = vfsub_dentry_open(&h_path, flags);
9936+ else {
9937+ /* this block depends upon the configuration */
9938+ di_read_unlock(dentry, AuLock_IR);
9939+ fi_write_unlock(file);
9940+ si_read_unlock(sb);
9941+ h_file = vfsub_dentry_open(&h_path, flags);
9942+ si_noflush_read_lock(sb);
9943+ fi_write_lock(file);
9944+ di_read_lock_child(dentry, AuLock_IR);
dece6358 9945+ }
4a4d8108
AM
9946+ if (IS_ERR(h_file))
9947+ goto out_br;
dece6358 9948+
4a4d8108
AM
9949+ if (exec_flag) {
9950+ err = deny_write_access(h_file);
9951+ if (unlikely(err)) {
9952+ fput(h_file);
9953+ h_file = ERR_PTR(err);
9954+ goto out_br;
9955+ }
9956+ }
953406b4 9957+ fsnotify_open(h_file);
4a4d8108 9958+ goto out; /* success */
1facf9fc 9959+
4f0767ce 9960+out_br:
4a4d8108 9961+ atomic_dec(&br->br_count);
4f0767ce 9962+out:
4a4d8108
AM
9963+ return h_file;
9964+}
1308ab2a 9965+
4a4d8108
AM
9966+int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
9967+ struct au_fidir *fidir)
1facf9fc 9968+{
dece6358 9969+ int err;
1facf9fc 9970+ struct dentry *dentry;
1308ab2a 9971+
4a4d8108
AM
9972+ err = au_finfo_init(file, fidir);
9973+ if (unlikely(err))
9974+ goto out;
1facf9fc 9975+
9976+ dentry = file->f_dentry;
4a4d8108
AM
9977+ di_read_lock_child(dentry, AuLock_IR);
9978+ err = open(file, vfsub_file_flags(file));
9979+ di_read_unlock(dentry, AuLock_IR);
1facf9fc 9980+
4a4d8108
AM
9981+ fi_write_unlock(file);
9982+ if (unlikely(err)) {
9983+ au_fi(file)->fi_hdir = NULL;
9984+ au_finfo_fin(file);
1308ab2a 9985+ }
4a4d8108 9986+
4f0767ce 9987+out:
1308ab2a 9988+ return err;
9989+}
dece6358 9990+
4a4d8108 9991+int au_reopen_nondir(struct file *file)
1308ab2a 9992+{
4a4d8108
AM
9993+ int err;
9994+ aufs_bindex_t bstart;
9995+ struct dentry *dentry;
9996+ struct file *h_file, *h_file_tmp;
1308ab2a 9997+
4a4d8108
AM
9998+ dentry = file->f_dentry;
9999+ AuDebugOn(au_special_file(dentry->d_inode->i_mode));
10000+ bstart = au_dbstart(dentry);
10001+ h_file_tmp = NULL;
10002+ if (au_fbstart(file) == bstart) {
10003+ h_file = au_hf_top(file);
10004+ if (file->f_mode == h_file->f_mode)
10005+ return 0; /* success */
10006+ h_file_tmp = h_file;
10007+ get_file(h_file_tmp);
10008+ au_set_h_fptr(file, bstart, NULL);
10009+ }
10010+ AuDebugOn(au_fi(file)->fi_hdir);
10011+ AuDebugOn(au_fbstart(file) < bstart);
1308ab2a 10012+
4a4d8108
AM
10013+ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
10014+ file);
10015+ err = PTR_ERR(h_file);
10016+ if (IS_ERR(h_file))
10017+ goto out; /* todo: close all? */
10018+
10019+ err = 0;
10020+ au_set_fbstart(file, bstart);
10021+ au_set_h_fptr(file, bstart, h_file);
10022+ au_update_figen(file);
10023+ /* todo: necessary? */
10024+ /* file->f_ra = h_file->f_ra; */
10025+
4f0767ce 10026+out:
4a4d8108
AM
10027+ if (h_file_tmp)
10028+ fput(h_file_tmp);
10029+ return err;
1facf9fc 10030+}
10031+
1308ab2a 10032+/* ---------------------------------------------------------------------- */
10033+
4a4d8108
AM
10034+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
10035+ struct dentry *hi_wh)
1facf9fc 10036+{
4a4d8108
AM
10037+ int err;
10038+ aufs_bindex_t bstart;
10039+ struct au_dinfo *dinfo;
10040+ struct dentry *h_dentry;
10041+ struct au_hdentry *hdp;
1facf9fc 10042+
4a4d8108
AM
10043+ dinfo = au_di(file->f_dentry);
10044+ AuRwMustWriteLock(&dinfo->di_rwsem);
dece6358 10045+
4a4d8108
AM
10046+ bstart = dinfo->di_bstart;
10047+ dinfo->di_bstart = btgt;
10048+ hdp = dinfo->di_hdentry;
10049+ h_dentry = hdp[0 + btgt].hd_dentry;
10050+ hdp[0 + btgt].hd_dentry = hi_wh;
10051+ err = au_reopen_nondir(file);
10052+ hdp[0 + btgt].hd_dentry = h_dentry;
10053+ dinfo->di_bstart = bstart;
1facf9fc 10054+
1facf9fc 10055+ return err;
10056+}
10057+
4a4d8108
AM
10058+static int au_ready_to_write_wh(struct file *file, loff_t len,
10059+ aufs_bindex_t bcpup)
1facf9fc 10060+{
4a4d8108 10061+ int err;
027c5e7a
AM
10062+ struct inode *inode, *h_inode;
10063+ struct dentry *dentry, *h_dentry, *hi_wh;
1facf9fc 10064+
dece6358 10065+ dentry = file->f_dentry;
4a4d8108 10066+ au_update_dbstart(dentry);
dece6358 10067+ inode = dentry->d_inode;
027c5e7a
AM
10068+ h_inode = NULL;
10069+ if (au_dbstart(dentry) <= bcpup && au_dbend(dentry) >= bcpup) {
10070+ h_dentry = au_h_dptr(dentry, bcpup);
10071+ if (h_dentry)
10072+ h_inode = h_dentry->d_inode;
10073+ }
4a4d8108 10074+ hi_wh = au_hi_wh(inode, bcpup);
027c5e7a 10075+ if (!hi_wh && !h_inode)
4a4d8108
AM
10076+ err = au_sio_cpup_wh(dentry, bcpup, len, file);
10077+ else
10078+ /* already copied-up after unlink */
10079+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 10080+
4a4d8108
AM
10081+ if (!err
10082+ && inode->i_nlink > 1
10083+ && au_opt_test(au_mntflags(dentry->d_sb), PLINK))
10084+ au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup));
1308ab2a 10085+
dece6358 10086+ return err;
1facf9fc 10087+}
10088+
4a4d8108
AM
10089+/*
10090+ * prepare the @file for writing.
10091+ */
10092+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
1facf9fc 10093+{
4a4d8108 10094+ int err;
027c5e7a 10095+ aufs_bindex_t bstart, bcpup, dbstart;
4a4d8108
AM
10096+ struct dentry *dentry, *parent, *h_dentry;
10097+ struct inode *h_inode, *inode;
1facf9fc 10098+ struct super_block *sb;
4a4d8108 10099+ struct file *h_file;
1facf9fc 10100+
10101+ dentry = file->f_dentry;
1facf9fc 10102+ sb = dentry->d_sb;
4a4d8108
AM
10103+ inode = dentry->d_inode;
10104+ AuDebugOn(au_special_file(inode->i_mode));
10105+ bstart = au_fbstart(file);
10106+ err = au_test_ro(sb, bstart, inode);
10107+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
10108+ err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0);
1facf9fc 10109+ goto out;
4a4d8108 10110+ }
1facf9fc 10111+
027c5e7a 10112+ /* need to cpup or reopen */
4a4d8108
AM
10113+ parent = dget_parent(dentry);
10114+ di_write_lock_parent(parent);
10115+ err = AuWbrCopyup(au_sbi(sb), dentry);
10116+ bcpup = err;
10117+ if (unlikely(err < 0))
10118+ goto out_dgrade;
10119+ err = 0;
10120+
027c5e7a 10121+ if (!d_unhashed(dentry) && !au_h_dptr(parent, bcpup)) {
4a4d8108 10122+ err = au_cpup_dirs(dentry, bcpup);
1facf9fc 10123+ if (unlikely(err))
4a4d8108
AM
10124+ goto out_dgrade;
10125+ }
10126+
10127+ err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE,
10128+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
10129+ if (unlikely(err))
10130+ goto out_dgrade;
10131+
10132+ h_dentry = au_hf_top(file)->f_dentry;
10133+ h_inode = h_dentry->d_inode;
027c5e7a
AM
10134+ dbstart = au_dbstart(dentry);
10135+ if (dbstart <= bcpup) {
10136+ h_dentry = au_h_dptr(dentry, bcpup);
10137+ AuDebugOn(!h_dentry);
10138+ h_inode = h_dentry->d_inode;
10139+ AuDebugOn(!h_inode);
10140+ bstart = bcpup;
10141+ }
10142+
10143+ if (dbstart <= bcpup /* just reopen */
10144+ || !d_unhashed(dentry) /* copyup and reopen */
10145+ ) {
10146+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
10147+ h_file = au_h_open_pre(dentry, bstart);
10148+ if (IS_ERR(h_file)) {
10149+ err = PTR_ERR(h_file);
10150+ h_file = NULL;
10151+ } else {
10152+ di_downgrade_lock(parent, AuLock_IR);
10153+ if (dbstart > bcpup)
10154+ err = au_sio_cpup_simple(dentry, bcpup, len,
10155+ AuCpup_DTIME);
10156+ if (!err)
10157+ err = au_reopen_nondir(file);
10158+ }
10159+ mutex_unlock(&h_inode->i_mutex);
10160+ au_h_open_post(dentry, bstart, h_file);
10161+ } else { /* copyup as wh and reopen */
10162+ /*
10163+ * since writable hfsplus branch is not supported,
10164+ * h_open_pre/post() are unnecessary.
10165+ */
10166+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
4a4d8108
AM
10167+ err = au_ready_to_write_wh(file, len, bcpup);
10168+ di_downgrade_lock(parent, AuLock_IR);
027c5e7a 10169+ mutex_unlock(&h_inode->i_mutex);
4a4d8108 10170+ }
4a4d8108
AM
10171+
10172+ if (!err) {
10173+ au_pin_set_parent_lflag(pin, /*lflag*/0);
10174+ goto out_dput; /* success */
10175+ }
10176+ au_unpin(pin);
10177+ goto out_unlock;
1facf9fc 10178+
4f0767ce 10179+out_dgrade:
4a4d8108 10180+ di_downgrade_lock(parent, AuLock_IR);
4f0767ce 10181+out_unlock:
4a4d8108 10182+ di_read_unlock(parent, AuLock_IR);
4f0767ce 10183+out_dput:
4a4d8108 10184+ dput(parent);
4f0767ce 10185+out:
1facf9fc 10186+ return err;
10187+}
10188+
4a4d8108
AM
10189+/* ---------------------------------------------------------------------- */
10190+
10191+int au_do_flush(struct file *file, fl_owner_t id,
10192+ int (*flush)(struct file *file, fl_owner_t id))
1facf9fc 10193+{
4a4d8108 10194+ int err;
1308ab2a 10195+ struct dentry *dentry;
1facf9fc 10196+ struct super_block *sb;
4a4d8108 10197+ struct inode *inode;
1facf9fc 10198+
1facf9fc 10199+ dentry = file->f_dentry;
10200+ sb = dentry->d_sb;
dece6358 10201+ inode = dentry->d_inode;
4a4d8108
AM
10202+ si_noflush_read_lock(sb);
10203+ fi_read_lock(file);
b752ccd1 10204+ ii_read_lock_child(inode);
1facf9fc 10205+
4a4d8108
AM
10206+ err = flush(file, id);
10207+ au_cpup_attr_timesizes(inode);
1facf9fc 10208+
b752ccd1 10209+ ii_read_unlock(inode);
4a4d8108 10210+ fi_read_unlock(file);
1308ab2a 10211+ si_read_unlock(sb);
dece6358 10212+ return err;
1facf9fc 10213+}
10214+
4a4d8108
AM
10215+/* ---------------------------------------------------------------------- */
10216+
10217+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 10218+{
4a4d8108
AM
10219+ int err;
10220+ aufs_bindex_t bstart;
10221+ struct au_pin pin;
10222+ struct au_finfo *finfo;
10223+ struct dentry *dentry, *parent, *hi_wh;
10224+ struct inode *inode;
1facf9fc 10225+ struct super_block *sb;
10226+
4a4d8108
AM
10227+ FiMustWriteLock(file);
10228+
10229+ err = 0;
10230+ finfo = au_fi(file);
1308ab2a 10231+ dentry = file->f_dentry;
10232+ sb = dentry->d_sb;
4a4d8108
AM
10233+ inode = dentry->d_inode;
10234+ bstart = au_ibstart(inode);
027c5e7a 10235+ if (bstart == finfo->fi_btop || IS_ROOT(dentry))
1308ab2a 10236+ goto out;
dece6358 10237+
4a4d8108
AM
10238+ parent = dget_parent(dentry);
10239+ if (au_test_ro(sb, bstart, inode)) {
10240+ di_read_lock_parent(parent, !AuLock_IR);
10241+ err = AuWbrCopyup(au_sbi(sb), dentry);
10242+ bstart = err;
10243+ di_read_unlock(parent, !AuLock_IR);
10244+ if (unlikely(err < 0))
10245+ goto out_parent;
10246+ err = 0;
1facf9fc 10247+ }
1facf9fc 10248+
4a4d8108
AM
10249+ di_read_lock_parent(parent, AuLock_IR);
10250+ hi_wh = au_hi_wh(inode, bstart);
7f207e10
AM
10251+ if (!S_ISDIR(inode->i_mode)
10252+ && au_opt_test(au_mntflags(sb), PLINK)
4a4d8108
AM
10253+ && au_plink_test(inode)
10254+ && !d_unhashed(dentry)) {
10255+ err = au_test_and_cpup_dirs(dentry, bstart);
10256+ if (unlikely(err))
10257+ goto out_unlock;
10258+
10259+ /* always superio. */
10260+ err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE,
10261+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
10262+ if (!err)
10263+ err = au_sio_cpup_simple(dentry, bstart, -1,
10264+ AuCpup_DTIME);
10265+ au_unpin(&pin);
10266+ } else if (hi_wh) {
10267+ /* already copied-up after unlink */
10268+ err = au_reopen_wh(file, bstart, hi_wh);
10269+ *need_reopen = 0;
10270+ }
1facf9fc 10271+
4f0767ce 10272+out_unlock:
4a4d8108 10273+ di_read_unlock(parent, AuLock_IR);
4f0767ce 10274+out_parent:
4a4d8108 10275+ dput(parent);
4f0767ce 10276+out:
1308ab2a 10277+ return err;
dece6358 10278+}
1facf9fc 10279+
4a4d8108 10280+static void au_do_refresh_dir(struct file *file)
dece6358 10281+{
4a4d8108
AM
10282+ aufs_bindex_t bindex, bend, new_bindex, brid;
10283+ struct au_hfile *p, tmp, *q;
10284+ struct au_finfo *finfo;
1308ab2a 10285+ struct super_block *sb;
4a4d8108 10286+ struct au_fidir *fidir;
1facf9fc 10287+
4a4d8108 10288+ FiMustWriteLock(file);
1facf9fc 10289+
4a4d8108
AM
10290+ sb = file->f_dentry->d_sb;
10291+ finfo = au_fi(file);
10292+ fidir = finfo->fi_hdir;
10293+ AuDebugOn(!fidir);
10294+ p = fidir->fd_hfile + finfo->fi_btop;
10295+ brid = p->hf_br->br_id;
10296+ bend = fidir->fd_bbot;
10297+ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
10298+ if (!p->hf_file)
10299+ continue;
1308ab2a 10300+
4a4d8108
AM
10301+ new_bindex = au_br_index(sb, p->hf_br->br_id);
10302+ if (new_bindex == bindex)
10303+ continue;
10304+ if (new_bindex < 0) {
10305+ au_set_h_fptr(file, bindex, NULL);
10306+ continue;
10307+ }
1308ab2a 10308+
4a4d8108
AM
10309+ /* swap two lower inode, and loop again */
10310+ q = fidir->fd_hfile + new_bindex;
10311+ tmp = *q;
10312+ *q = *p;
10313+ *p = tmp;
10314+ if (tmp.hf_file) {
10315+ bindex--;
10316+ p--;
10317+ }
10318+ }
1308ab2a 10319+
4a4d8108 10320+ p = fidir->fd_hfile;
027c5e7a 10321+ if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) {
4a4d8108
AM
10322+ bend = au_sbend(sb);
10323+ for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
10324+ finfo->fi_btop++, p++)
10325+ if (p->hf_file) {
10326+ if (p->hf_file->f_dentry
10327+ && p->hf_file->f_dentry->d_inode)
10328+ break;
10329+ else
10330+ au_hfput(p, file);
10331+ }
10332+ } else {
10333+ bend = au_br_index(sb, brid);
10334+ for (finfo->fi_btop = 0; finfo->fi_btop < bend;
10335+ finfo->fi_btop++, p++)
10336+ if (p->hf_file)
10337+ au_hfput(p, file);
10338+ bend = au_sbend(sb);
10339+ }
1308ab2a 10340+
4a4d8108
AM
10341+ p = fidir->fd_hfile + bend;
10342+ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
10343+ fidir->fd_bbot--, p--)
10344+ if (p->hf_file) {
10345+ if (p->hf_file->f_dentry
10346+ && p->hf_file->f_dentry->d_inode)
10347+ break;
10348+ else
10349+ au_hfput(p, file);
10350+ }
10351+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
1308ab2a 10352+}
10353+
4a4d8108
AM
10354+/*
10355+ * after branch manipulating, refresh the file.
10356+ */
10357+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 10358+{
4a4d8108
AM
10359+ int err, need_reopen;
10360+ aufs_bindex_t bend, bindex;
10361+ struct dentry *dentry;
1308ab2a 10362+ struct au_finfo *finfo;
4a4d8108 10363+ struct au_hfile *hfile;
1facf9fc 10364+
4a4d8108 10365+ dentry = file->f_dentry;
1308ab2a 10366+ finfo = au_fi(file);
4a4d8108
AM
10367+ if (!finfo->fi_hdir) {
10368+ hfile = &finfo->fi_htop;
10369+ AuDebugOn(!hfile->hf_file);
10370+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
10371+ AuDebugOn(bindex < 0);
10372+ if (bindex != finfo->fi_btop)
10373+ au_set_fbstart(file, bindex);
10374+ } else {
10375+ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
10376+ if (unlikely(err))
10377+ goto out;
10378+ au_do_refresh_dir(file);
10379+ }
1facf9fc 10380+
4a4d8108
AM
10381+ err = 0;
10382+ need_reopen = 1;
10383+ if (!au_test_mmapped(file))
10384+ err = au_file_refresh_by_inode(file, &need_reopen);
027c5e7a 10385+ if (!err && need_reopen && !d_unlinked(dentry))
4a4d8108
AM
10386+ err = reopen(file);
10387+ if (!err) {
10388+ au_update_figen(file);
10389+ goto out; /* success */
10390+ }
10391+
10392+ /* error, close all lower files */
10393+ if (finfo->fi_hdir) {
10394+ bend = au_fbend_dir(file);
10395+ for (bindex = au_fbstart(file); bindex <= bend; bindex++)
10396+ au_set_h_fptr(file, bindex, NULL);
10397+ }
1facf9fc 10398+
4f0767ce 10399+out:
1facf9fc 10400+ return err;
10401+}
10402+
4a4d8108
AM
10403+/* common function to regular file and dir */
10404+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
10405+ int wlock)
dece6358 10406+{
1308ab2a 10407+ int err;
4a4d8108
AM
10408+ unsigned int sigen, figen;
10409+ aufs_bindex_t bstart;
10410+ unsigned char pseudo_link;
10411+ struct dentry *dentry;
10412+ struct inode *inode;
1facf9fc 10413+
4a4d8108
AM
10414+ err = 0;
10415+ dentry = file->f_dentry;
10416+ inode = dentry->d_inode;
10417+ AuDebugOn(au_special_file(inode->i_mode));
10418+ sigen = au_sigen(dentry->d_sb);
10419+ fi_write_lock(file);
10420+ figen = au_figen(file);
10421+ di_write_lock_child(dentry);
10422+ bstart = au_dbstart(dentry);
10423+ pseudo_link = (bstart != au_ibstart(inode));
10424+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
10425+ if (!wlock) {
10426+ di_downgrade_lock(dentry, AuLock_IR);
10427+ fi_downgrade_lock(file);
10428+ }
10429+ goto out; /* success */
10430+ }
dece6358 10431+
4a4d8108 10432+ AuDbg("sigen %d, figen %d\n", sigen, figen);
027c5e7a 10433+ if (au_digen_test(dentry, sigen)) {
4a4d8108 10434+ err = au_reval_dpath(dentry, sigen);
027c5e7a 10435+ AuDebugOn(!err && au_digen_test(dentry, sigen));
4a4d8108 10436+ }
dece6358 10437+
027c5e7a
AM
10438+ if (!err)
10439+ err = refresh_file(file, reopen);
4a4d8108
AM
10440+ if (!err) {
10441+ if (!wlock) {
10442+ di_downgrade_lock(dentry, AuLock_IR);
10443+ fi_downgrade_lock(file);
10444+ }
10445+ } else {
10446+ di_write_unlock(dentry);
10447+ fi_write_unlock(file);
10448+ }
1facf9fc 10449+
4f0767ce 10450+out:
1308ab2a 10451+ return err;
10452+}
1facf9fc 10453+
4a4d8108
AM
10454+/* ---------------------------------------------------------------------- */
10455+
10456+/* cf. aufs_nopage() */
10457+/* for madvise(2) */
10458+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1308ab2a 10459+{
4a4d8108
AM
10460+ unlock_page(page);
10461+ return 0;
10462+}
1facf9fc 10463+
4a4d8108
AM
10464+/* it will never be called, but necessary to support O_DIRECT */
10465+static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
10466+ const struct iovec *iov, loff_t offset,
10467+ unsigned long nr_segs)
10468+{ BUG(); return 0; }
1facf9fc 10469+
4a4d8108
AM
10470+/*
10471+ * it will never be called, but madvise and fadvise behaves differently
10472+ * when get_xip_mem is defined
10473+ */
10474+static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff,
10475+ int create, void **kmem, unsigned long *pfn)
10476+{ BUG(); return 0; }
1facf9fc 10477+
4a4d8108
AM
10478+/* they will never be called. */
10479+#ifdef CONFIG_AUFS_DEBUG
10480+static int aufs_write_begin(struct file *file, struct address_space *mapping,
10481+ loff_t pos, unsigned len, unsigned flags,
10482+ struct page **pagep, void **fsdata)
10483+{ AuUnsupport(); return 0; }
10484+static int aufs_write_end(struct file *file, struct address_space *mapping,
10485+ loff_t pos, unsigned len, unsigned copied,
10486+ struct page *page, void *fsdata)
10487+{ AuUnsupport(); return 0; }
10488+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
10489+{ AuUnsupport(); return 0; }
1308ab2a 10490+
4a4d8108
AM
10491+static int aufs_set_page_dirty(struct page *page)
10492+{ AuUnsupport(); return 0; }
10493+static void aufs_invalidatepage(struct page *page, unsigned long offset)
10494+{ AuUnsupport(); }
10495+static int aufs_releasepage(struct page *page, gfp_t gfp)
10496+{ AuUnsupport(); return 0; }
10497+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
10498+ struct page *page)
10499+{ AuUnsupport(); return 0; }
10500+static int aufs_launder_page(struct page *page)
10501+{ AuUnsupport(); return 0; }
10502+static int aufs_is_partially_uptodate(struct page *page,
10503+ read_descriptor_t *desc,
10504+ unsigned long from)
10505+{ AuUnsupport(); return 0; }
10506+static int aufs_error_remove_page(struct address_space *mapping,
10507+ struct page *page)
10508+{ AuUnsupport(); return 0; }
10509+#endif /* CONFIG_AUFS_DEBUG */
10510+
10511+const struct address_space_operations aufs_aop = {
10512+ .readpage = aufs_readpage,
10513+ .direct_IO = aufs_direct_IO,
10514+ .get_xip_mem = aufs_get_xip_mem,
10515+#ifdef CONFIG_AUFS_DEBUG
10516+ .writepage = aufs_writepage,
4a4d8108
AM
10517+ /* no writepages, because of writepage */
10518+ .set_page_dirty = aufs_set_page_dirty,
10519+ /* no readpages, because of readpage */
10520+ .write_begin = aufs_write_begin,
10521+ .write_end = aufs_write_end,
10522+ /* no bmap, no block device */
10523+ .invalidatepage = aufs_invalidatepage,
10524+ .releasepage = aufs_releasepage,
10525+ .migratepage = aufs_migratepage,
10526+ .launder_page = aufs_launder_page,
10527+ .is_partially_uptodate = aufs_is_partially_uptodate,
10528+ .error_remove_page = aufs_error_remove_page
10529+#endif /* CONFIG_AUFS_DEBUG */
dece6358 10530+};
7f207e10
AM
10531diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
10532--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
10533+++ linux/fs/aufs/file.h 2011-08-24 13:30:24.731313534 +0200
10534@@ -0,0 +1,299 @@
4a4d8108 10535+/*
027c5e7a 10536+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
10537+ *
10538+ * This program, aufs is free software; you can redistribute it and/or modify
10539+ * it under the terms of the GNU General Public License as published by
10540+ * the Free Software Foundation; either version 2 of the License, or
10541+ * (at your option) any later version.
10542+ *
10543+ * This program is distributed in the hope that it will be useful,
10544+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10545+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10546+ * GNU General Public License for more details.
10547+ *
10548+ * You should have received a copy of the GNU General Public License
10549+ * along with this program; if not, write to the Free Software
10550+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
10551+ */
1facf9fc 10552+
4a4d8108
AM
10553+/*
10554+ * file operations
10555+ */
1facf9fc 10556+
4a4d8108
AM
10557+#ifndef __AUFS_FILE_H__
10558+#define __AUFS_FILE_H__
1facf9fc 10559+
4a4d8108 10560+#ifdef __KERNEL__
1facf9fc 10561+
2cbb1c4b 10562+#include <linux/file.h>
4a4d8108
AM
10563+#include <linux/fs.h>
10564+#include <linux/poll.h>
10565+#include <linux/aufs_type.h>
10566+#include "rwsem.h"
1facf9fc 10567+
4a4d8108
AM
10568+struct au_branch;
10569+struct au_hfile {
10570+ struct file *hf_file;
10571+ struct au_branch *hf_br;
10572+};
1facf9fc 10573+
4a4d8108
AM
10574+struct au_vdir;
10575+struct au_fidir {
10576+ aufs_bindex_t fd_bbot;
10577+ aufs_bindex_t fd_nent;
10578+ struct au_vdir *fd_vdir_cache;
10579+ struct au_hfile fd_hfile[];
10580+};
1facf9fc 10581+
4a4d8108 10582+static inline int au_fidir_sz(int nent)
dece6358 10583+{
4f0767ce
JR
10584+ AuDebugOn(nent < 0);
10585+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
4a4d8108 10586+}
1facf9fc 10587+
4a4d8108
AM
10588+struct au_finfo {
10589+ atomic_t fi_generation;
dece6358 10590+
4a4d8108
AM
10591+ struct au_rwsem fi_rwsem;
10592+ aufs_bindex_t fi_btop;
10593+
10594+ /* do not union them */
10595+ struct { /* for non-dir */
10596+ struct au_hfile fi_htop;
2cbb1c4b 10597+ atomic_t fi_mmapped;
4a4d8108
AM
10598+ };
10599+ struct au_fidir *fi_hdir; /* for dir only */
10600+} ____cacheline_aligned_in_smp;
1facf9fc 10601+
4a4d8108 10602+/* ---------------------------------------------------------------------- */
1facf9fc 10603+
4a4d8108
AM
10604+/* file.c */
10605+extern const struct address_space_operations aufs_aop;
10606+unsigned int au_file_roflags(unsigned int flags);
10607+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
10608+ struct file *file);
10609+int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
10610+ struct au_fidir *fidir);
10611+int au_reopen_nondir(struct file *file);
10612+struct au_pin;
10613+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
10614+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
10615+ int wlock);
10616+int au_do_flush(struct file *file, fl_owner_t id,
10617+ int (*flush)(struct file *file, fl_owner_t id));
1facf9fc 10618+
4a4d8108
AM
10619+/* poll.c */
10620+#ifdef CONFIG_AUFS_POLL
10621+unsigned int aufs_poll(struct file *file, poll_table *wait);
10622+#endif
1facf9fc 10623+
4a4d8108
AM
10624+#ifdef CONFIG_AUFS_BR_HFSPLUS
10625+/* hfsplus.c */
10626+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex);
10627+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
10628+ struct file *h_file);
10629+#else
10630+static inline
10631+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
dece6358 10632+{
4a4d8108
AM
10633+ return NULL;
10634+}
1facf9fc 10635+
4a4d8108
AM
10636+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
10637+ struct file *h_file);
10638+#endif
1facf9fc 10639+
4a4d8108
AM
10640+/* f_op.c */
10641+extern const struct file_operations aufs_file_fop;
4a4d8108
AM
10642+int au_do_open_nondir(struct file *file, int flags);
10643+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
10644+
10645+#ifdef CONFIG_AUFS_SP_IATTR
10646+/* f_op_sp.c */
10647+int au_special_file(umode_t mode);
10648+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev);
10649+#else
10650+AuStubInt0(au_special_file, umode_t mode)
10651+static inline void au_init_special_fop(struct inode *inode, umode_t mode,
10652+ dev_t rdev)
10653+{
10654+ init_special_inode(inode, mode, rdev);
10655+}
10656+#endif
1facf9fc 10657+
4a4d8108
AM
10658+/* finfo.c */
10659+void au_hfput(struct au_hfile *hf, struct file *file);
10660+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
10661+ struct file *h_file);
1facf9fc 10662+
4a4d8108 10663+void au_update_figen(struct file *file);
4a4d8108
AM
10664+struct au_fidir *au_fidir_alloc(struct super_block *sb);
10665+int au_fidir_realloc(struct au_finfo *finfo, int nbr);
1facf9fc 10666+
4a4d8108
AM
10667+void au_fi_init_once(void *_fi);
10668+void au_finfo_fin(struct file *file);
10669+int au_finfo_init(struct file *file, struct au_fidir *fidir);
1facf9fc 10670+
4a4d8108
AM
10671+/* ioctl.c */
10672+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
10673+#ifdef CONFIG_COMPAT
10674+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
10675+ unsigned long arg);
10676+#endif
1facf9fc 10677+
4a4d8108 10678+/* ---------------------------------------------------------------------- */
1facf9fc 10679+
4a4d8108
AM
10680+static inline struct au_finfo *au_fi(struct file *file)
10681+{
10682+ return file->private_data;
10683+}
1facf9fc 10684+
4a4d8108 10685+/* ---------------------------------------------------------------------- */
1facf9fc 10686+
4a4d8108
AM
10687+/*
10688+ * fi_read_lock, fi_write_lock,
10689+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
10690+ */
10691+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
1308ab2a 10692+
4a4d8108
AM
10693+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
10694+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
10695+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
1facf9fc 10696+
1308ab2a 10697+/* ---------------------------------------------------------------------- */
10698+
4a4d8108
AM
10699+/* todo: hard/soft set? */
10700+static inline aufs_bindex_t au_fbstart(struct file *file)
dece6358 10701+{
4a4d8108
AM
10702+ FiMustAnyLock(file);
10703+ return au_fi(file)->fi_btop;
10704+}
dece6358 10705+
4a4d8108
AM
10706+static inline aufs_bindex_t au_fbend_dir(struct file *file)
10707+{
10708+ FiMustAnyLock(file);
10709+ AuDebugOn(!au_fi(file)->fi_hdir);
10710+ return au_fi(file)->fi_hdir->fd_bbot;
10711+}
1facf9fc 10712+
4a4d8108
AM
10713+static inline struct au_vdir *au_fvdir_cache(struct file *file)
10714+{
10715+ FiMustAnyLock(file);
10716+ AuDebugOn(!au_fi(file)->fi_hdir);
10717+ return au_fi(file)->fi_hdir->fd_vdir_cache;
10718+}
1facf9fc 10719+
4a4d8108
AM
10720+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
10721+{
10722+ FiMustWriteLock(file);
10723+ au_fi(file)->fi_btop = bindex;
10724+}
1facf9fc 10725+
4a4d8108
AM
10726+static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
10727+{
10728+ FiMustWriteLock(file);
10729+ AuDebugOn(!au_fi(file)->fi_hdir);
10730+ au_fi(file)->fi_hdir->fd_bbot = bindex;
10731+}
1308ab2a 10732+
4a4d8108
AM
10733+static inline void au_set_fvdir_cache(struct file *file,
10734+ struct au_vdir *vdir_cache)
10735+{
10736+ FiMustWriteLock(file);
10737+ AuDebugOn(!au_fi(file)->fi_hdir);
10738+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
10739+}
dece6358 10740+
4a4d8108
AM
10741+static inline struct file *au_hf_top(struct file *file)
10742+{
10743+ FiMustAnyLock(file);
10744+ AuDebugOn(au_fi(file)->fi_hdir);
10745+ return au_fi(file)->fi_htop.hf_file;
10746+}
1facf9fc 10747+
4a4d8108
AM
10748+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
10749+{
10750+ FiMustAnyLock(file);
10751+ AuDebugOn(!au_fi(file)->fi_hdir);
10752+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
dece6358
AM
10753+}
10754+
4a4d8108
AM
10755+/* todo: memory barrier? */
10756+static inline unsigned int au_figen(struct file *f)
dece6358 10757+{
4a4d8108
AM
10758+ return atomic_read(&au_fi(f)->fi_generation);
10759+}
dece6358 10760+
2cbb1c4b
JR
10761+static inline void au_set_mmapped(struct file *f)
10762+{
10763+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
10764+ return;
10765+ pr_warning("fi_mmapped wrapped around\n");
10766+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
10767+ ;
10768+}
10769+
10770+static inline void au_unset_mmapped(struct file *f)
10771+{
10772+ atomic_dec(&au_fi(f)->fi_mmapped);
10773+}
10774+
4a4d8108
AM
10775+static inline int au_test_mmapped(struct file *f)
10776+{
2cbb1c4b
JR
10777+ return atomic_read(&au_fi(f)->fi_mmapped);
10778+}
10779+
10780+/* customize vma->vm_file */
10781+
10782+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
10783+ struct file *file)
10784+{
53392da6
AM
10785+ struct file *f;
10786+
10787+ f = vma->vm_file;
2cbb1c4b
JR
10788+ get_file(file);
10789+ vma->vm_file = file;
53392da6 10790+ fput(f);
2cbb1c4b
JR
10791+}
10792+
10793+#ifdef CONFIG_MMU
10794+#define AuDbgVmRegion(file, vma) do {} while (0)
10795+
10796+static inline void au_vm_file_reset(struct vm_area_struct *vma,
10797+ struct file *file)
10798+{
10799+ au_do_vm_file_reset(vma, file);
10800+}
10801+#else
10802+#define AuDbgVmRegion(file, vma) \
10803+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
10804+
10805+static inline void au_vm_file_reset(struct vm_area_struct *vma,
10806+ struct file *file)
10807+{
53392da6
AM
10808+ struct file *f;
10809+
2cbb1c4b 10810+ au_do_vm_file_reset(vma, file);
53392da6 10811+ f = vma->vm_region->vm_file;
2cbb1c4b
JR
10812+ get_file(file);
10813+ vma->vm_region->vm_file = file;
53392da6 10814+ fput(f);
2cbb1c4b
JR
10815+}
10816+#endif /* CONFIG_MMU */
10817+
10818+/* handle vma->vm_prfile */
10819+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
10820+ struct file *file)
10821+{
10822+#ifdef CONFIG_AUFS_PROC_MAP
10823+ get_file(file);
10824+ vma->vm_prfile = file;
10825+#ifndef CONFIG_MMU
10826+ get_file(file);
10827+ vma->vm_region->vm_prfile = file;
10828+#endif
10829+#endif
4a4d8108 10830+}
1308ab2a 10831+
4a4d8108
AM
10832+#endif /* __KERNEL__ */
10833+#endif /* __AUFS_FILE_H__ */
7f207e10
AM
10834diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
10835--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 10836+++ linux/fs/aufs/finfo.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 10837@@ -0,0 +1,153 @@
4a4d8108 10838+/*
027c5e7a 10839+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
10840+ *
10841+ * This program, aufs is free software; you can redistribute it and/or modify
10842+ * it under the terms of the GNU General Public License as published by
10843+ * the Free Software Foundation; either version 2 of the License, or
10844+ * (at your option) any later version.
10845+ *
10846+ * This program is distributed in the hope that it will be useful,
10847+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10848+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10849+ * GNU General Public License for more details.
10850+ *
10851+ * You should have received a copy of the GNU General Public License
10852+ * along with this program; if not, write to the Free Software
10853+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
10854+ */
1308ab2a 10855+
4a4d8108
AM
10856+/*
10857+ * file private data
10858+ */
1facf9fc 10859+
4a4d8108
AM
10860+#include <linux/file.h>
10861+#include "aufs.h"
1facf9fc 10862+
4a4d8108
AM
10863+void au_hfput(struct au_hfile *hf, struct file *file)
10864+{
10865+ /* todo: direct access f_flags */
2cbb1c4b 10866+ if (vfsub_file_flags(file) & __FMODE_EXEC)
4a4d8108
AM
10867+ allow_write_access(hf->hf_file);
10868+ fput(hf->hf_file);
10869+ hf->hf_file = NULL;
e49829fe 10870+ atomic_dec(&hf->hf_br->br_count);
4a4d8108
AM
10871+ hf->hf_br = NULL;
10872+}
1facf9fc 10873+
4a4d8108
AM
10874+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
10875+{
10876+ struct au_finfo *finfo = au_fi(file);
10877+ struct au_hfile *hf;
10878+ struct au_fidir *fidir;
10879+
10880+ fidir = finfo->fi_hdir;
10881+ if (!fidir) {
10882+ AuDebugOn(finfo->fi_btop != bindex);
10883+ hf = &finfo->fi_htop;
10884+ } else
10885+ hf = fidir->fd_hfile + bindex;
10886+
10887+ if (hf && hf->hf_file)
10888+ au_hfput(hf, file);
10889+ if (val) {
10890+ FiMustWriteLock(file);
10891+ hf->hf_file = val;
10892+ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
1308ab2a 10893+ }
4a4d8108 10894+}
1facf9fc 10895+
4a4d8108
AM
10896+void au_update_figen(struct file *file)
10897+{
10898+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
10899+ /* smp_mb(); */ /* atomic_set */
1facf9fc 10900+}
10901+
4a4d8108
AM
10902+/* ---------------------------------------------------------------------- */
10903+
4a4d8108
AM
10904+struct au_fidir *au_fidir_alloc(struct super_block *sb)
10905+{
10906+ struct au_fidir *fidir;
10907+ int nbr;
10908+
10909+ nbr = au_sbend(sb) + 1;
10910+ if (nbr < 2)
10911+ nbr = 2; /* initial allocate for 2 branches */
10912+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
10913+ if (fidir) {
10914+ fidir->fd_bbot = -1;
10915+ fidir->fd_nent = nbr;
10916+ fidir->fd_vdir_cache = NULL;
10917+ }
10918+
10919+ return fidir;
10920+}
10921+
10922+int au_fidir_realloc(struct au_finfo *finfo, int nbr)
10923+{
10924+ int err;
10925+ struct au_fidir *fidir, *p;
10926+
10927+ AuRwMustWriteLock(&finfo->fi_rwsem);
10928+ fidir = finfo->fi_hdir;
10929+ AuDebugOn(!fidir);
10930+
10931+ err = -ENOMEM;
10932+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
10933+ GFP_NOFS);
10934+ if (p) {
10935+ p->fd_nent = nbr;
10936+ finfo->fi_hdir = p;
10937+ err = 0;
10938+ }
1facf9fc 10939+
dece6358 10940+ return err;
1facf9fc 10941+}
1308ab2a 10942+
10943+/* ---------------------------------------------------------------------- */
10944+
4a4d8108 10945+void au_finfo_fin(struct file *file)
1308ab2a 10946+{
4a4d8108
AM
10947+ struct au_finfo *finfo;
10948+
7f207e10
AM
10949+ au_nfiles_dec(file->f_dentry->d_sb);
10950+
4a4d8108
AM
10951+ finfo = au_fi(file);
10952+ AuDebugOn(finfo->fi_hdir);
10953+ AuRwDestroy(&finfo->fi_rwsem);
10954+ au_cache_free_finfo(finfo);
1308ab2a 10955+}
1308ab2a 10956+
e49829fe 10957+void au_fi_init_once(void *_finfo)
4a4d8108 10958+{
e49829fe 10959+ struct au_finfo *finfo = _finfo;
2cbb1c4b 10960+ static struct lock_class_key aufs_fi;
1308ab2a 10961+
e49829fe
JR
10962+ au_rw_init(&finfo->fi_rwsem);
10963+ au_rw_class(&finfo->fi_rwsem, &aufs_fi);
4a4d8108 10964+}
1308ab2a 10965+
4a4d8108
AM
10966+int au_finfo_init(struct file *file, struct au_fidir *fidir)
10967+{
10968+ int err;
10969+ struct au_finfo *finfo;
10970+ struct dentry *dentry;
10971+
10972+ err = -ENOMEM;
10973+ dentry = file->f_dentry;
10974+ finfo = au_cache_alloc_finfo();
10975+ if (unlikely(!finfo))
10976+ goto out;
10977+
10978+ err = 0;
7f207e10 10979+ au_nfiles_inc(dentry->d_sb);
4a4d8108
AM
10980+ au_rw_write_lock(&finfo->fi_rwsem);
10981+ finfo->fi_btop = -1;
10982+ finfo->fi_hdir = fidir;
10983+ atomic_set(&finfo->fi_generation, au_digen(dentry));
10984+ /* smp_mb(); */ /* atomic_set */
10985+
10986+ file->private_data = finfo;
10987+
10988+out:
10989+ return err;
10990+}
7f207e10
AM
10991diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
10992--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
10993+++ linux/fs/aufs/f_op.c 2011-08-24 13:30:24.731313534 +0200
10994@@ -0,0 +1,717 @@
dece6358 10995+/*
027c5e7a 10996+ * Copyright (C) 2005-2011 Junjiro R. Okajima
dece6358
AM
10997+ *
10998+ * This program, aufs is free software; you can redistribute it and/or modify
10999+ * it under the terms of the GNU General Public License as published by
11000+ * the Free Software Foundation; either version 2 of the License, or
11001+ * (at your option) any later version.
11002+ *
11003+ * This program is distributed in the hope that it will be useful,
11004+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11005+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11006+ * GNU General Public License for more details.
11007+ *
11008+ * You should have received a copy of the GNU General Public License
11009+ * along with this program; if not, write to the Free Software
11010+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11011+ */
1facf9fc 11012+
11013+/*
4a4d8108 11014+ * file and vm operations
1facf9fc 11015+ */
dece6358
AM
11016+
11017+#include <linux/file.h>
4a4d8108
AM
11018+#include <linux/fs_stack.h>
11019+#include <linux/mman.h>
11020+#include <linux/mm.h>
11021+#include <linux/security.h>
dece6358
AM
11022+#include "aufs.h"
11023+
4a4d8108 11024+int au_do_open_nondir(struct file *file, int flags)
1facf9fc 11025+{
4a4d8108
AM
11026+ int err;
11027+ aufs_bindex_t bindex;
11028+ struct file *h_file;
11029+ struct dentry *dentry;
11030+ struct au_finfo *finfo;
11031+
11032+ FiMustWriteLock(file);
11033+
4a4d8108 11034+ dentry = file->f_dentry;
027c5e7a
AM
11035+ err = au_d_alive(dentry);
11036+ if (unlikely(err))
11037+ goto out;
11038+
4a4d8108
AM
11039+ finfo = au_fi(file);
11040+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
2cbb1c4b 11041+ atomic_set(&finfo->fi_mmapped, 0);
4a4d8108
AM
11042+ bindex = au_dbstart(dentry);
11043+ h_file = au_h_open(dentry, bindex, flags, file);
11044+ if (IS_ERR(h_file))
11045+ err = PTR_ERR(h_file);
11046+ else {
11047+ au_set_fbstart(file, bindex);
11048+ au_set_h_fptr(file, bindex, h_file);
11049+ au_update_figen(file);
11050+ /* todo: necessary? */
11051+ /* file->f_ra = h_file->f_ra; */
11052+ }
027c5e7a
AM
11053+
11054+out:
4a4d8108 11055+ return err;
1facf9fc 11056+}
11057+
4a4d8108
AM
11058+static int aufs_open_nondir(struct inode *inode __maybe_unused,
11059+ struct file *file)
1facf9fc 11060+{
4a4d8108 11061+ int err;
1308ab2a 11062+ struct super_block *sb;
1facf9fc 11063+
2cbb1c4b 11064+ AuDbg("%.*s, f_flags 0x%x, f_mode 0x%x\n",
4a4d8108
AM
11065+ AuDLNPair(file->f_dentry), vfsub_file_flags(file),
11066+ file->f_mode);
1facf9fc 11067+
4a4d8108
AM
11068+ sb = file->f_dentry->d_sb;
11069+ si_read_lock(sb, AuLock_FLUSH);
11070+ err = au_do_open(file, au_do_open_nondir, /*fidir*/NULL);
11071+ si_read_unlock(sb);
11072+ return err;
11073+}
1facf9fc 11074+
4a4d8108
AM
11075+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
11076+{
11077+ struct au_finfo *finfo;
11078+ aufs_bindex_t bindex;
1facf9fc 11079+
4a4d8108
AM
11080+ finfo = au_fi(file);
11081+ bindex = finfo->fi_btop;
0c5527e5
AM
11082+ if (bindex >= 0) {
11083+ /* remove me from sb->s_files */
11084+ file_sb_list_del(file);
4a4d8108 11085+ au_set_h_fptr(file, bindex, NULL);
0c5527e5 11086+ }
7f207e10 11087+
4a4d8108
AM
11088+ au_finfo_fin(file);
11089+ return 0;
1facf9fc 11090+}
11091+
4a4d8108
AM
11092+/* ---------------------------------------------------------------------- */
11093+
11094+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
dece6358 11095+{
1308ab2a 11096+ int err;
4a4d8108
AM
11097+ struct file *h_file;
11098+
11099+ err = 0;
11100+ h_file = au_hf_top(file);
11101+ if (h_file)
11102+ err = vfsub_flush(h_file, id);
11103+ return err;
11104+}
11105+
11106+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
11107+{
11108+ return au_do_flush(file, id, au_do_flush_nondir);
11109+}
11110+
11111+/* ---------------------------------------------------------------------- */
11112+
11113+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
11114+ loff_t *ppos)
11115+{
11116+ ssize_t err;
dece6358 11117+ struct dentry *dentry;
4a4d8108 11118+ struct file *h_file;
dece6358 11119+ struct super_block *sb;
1facf9fc 11120+
dece6358
AM
11121+ dentry = file->f_dentry;
11122+ sb = dentry->d_sb;
e49829fe 11123+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 11124+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
dece6358
AM
11125+ if (unlikely(err))
11126+ goto out;
1facf9fc 11127+
4a4d8108
AM
11128+ h_file = au_hf_top(file);
11129+ err = vfsub_read_u(h_file, buf, count, ppos);
11130+ /* todo: necessary? */
11131+ /* file->f_ra = h_file->f_ra; */
11132+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
1308ab2a 11133+
4a4d8108
AM
11134+ di_read_unlock(dentry, AuLock_IR);
11135+ fi_read_unlock(file);
4f0767ce 11136+out:
dece6358
AM
11137+ si_read_unlock(sb);
11138+ return err;
11139+}
1facf9fc 11140+
e49829fe
JR
11141+/*
11142+ * todo: very ugly
11143+ * it locks both of i_mutex and si_rwsem for read in safe.
11144+ * if the plink maintenance mode continues forever (that is the problem),
11145+ * may loop forever.
11146+ */
11147+static void au_mtx_and_read_lock(struct inode *inode)
11148+{
11149+ int err;
11150+ struct super_block *sb = inode->i_sb;
11151+
11152+ while (1) {
11153+ mutex_lock(&inode->i_mutex);
11154+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
11155+ if (!err)
11156+ break;
11157+ mutex_unlock(&inode->i_mutex);
11158+ si_read_lock(sb, AuLock_NOPLMW);
11159+ si_read_unlock(sb);
11160+ }
11161+}
11162+
4a4d8108
AM
11163+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
11164+ size_t count, loff_t *ppos)
dece6358 11165+{
4a4d8108
AM
11166+ ssize_t err;
11167+ struct au_pin pin;
dece6358 11168+ struct dentry *dentry;
4a4d8108 11169+ struct inode *inode;
4a4d8108
AM
11170+ struct file *h_file;
11171+ char __user *buf = (char __user *)ubuf;
1facf9fc 11172+
dece6358 11173+ dentry = file->f_dentry;
4a4d8108 11174+ inode = dentry->d_inode;
e49829fe 11175+ au_mtx_and_read_lock(inode);
1facf9fc 11176+
4a4d8108
AM
11177+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11178+ if (unlikely(err))
11179+ goto out;
1facf9fc 11180+
4a4d8108
AM
11181+ err = au_ready_to_write(file, -1, &pin);
11182+ di_downgrade_lock(dentry, AuLock_IR);
11183+ if (unlikely(err))
11184+ goto out_unlock;
1facf9fc 11185+
4a4d8108
AM
11186+ h_file = au_hf_top(file);
11187+ au_unpin(&pin);
11188+ err = vfsub_write_u(h_file, buf, count, ppos);
11189+ au_cpup_attr_timesizes(inode);
11190+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
1facf9fc 11191+
4f0767ce 11192+out_unlock:
4a4d8108
AM
11193+ di_read_unlock(dentry, AuLock_IR);
11194+ fi_write_unlock(file);
4f0767ce 11195+out:
e49829fe 11196+ si_read_unlock(inode->i_sb);
4a4d8108 11197+ mutex_unlock(&inode->i_mutex);
dece6358
AM
11198+ return err;
11199+}
1facf9fc 11200+
4a4d8108
AM
11201+static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio,
11202+ const struct iovec *iov, unsigned long nv, loff_t pos)
dece6358 11203+{
4a4d8108
AM
11204+ ssize_t err;
11205+ struct file *file;
11206+ ssize_t (*func)(struct kiocb *, const struct iovec *, unsigned long,
11207+ loff_t);
1facf9fc 11208+
4a4d8108
AM
11209+ err = security_file_permission(h_file, rw);
11210+ if (unlikely(err))
11211+ goto out;
1facf9fc 11212+
4a4d8108
AM
11213+ err = -ENOSYS;
11214+ func = NULL;
11215+ if (rw == MAY_READ)
11216+ func = h_file->f_op->aio_read;
11217+ else if (rw == MAY_WRITE)
11218+ func = h_file->f_op->aio_write;
11219+ if (func) {
11220+ file = kio->ki_filp;
11221+ kio->ki_filp = h_file;
2cbb1c4b 11222+ lockdep_off();
4a4d8108 11223+ err = func(kio, iov, nv, pos);
2cbb1c4b 11224+ lockdep_on();
4a4d8108
AM
11225+ kio->ki_filp = file;
11226+ } else
11227+ /* currently there is no such fs */
11228+ WARN_ON_ONCE(1);
1facf9fc 11229+
4f0767ce 11230+out:
dece6358
AM
11231+ return err;
11232+}
1facf9fc 11233+
4a4d8108
AM
11234+static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
11235+ unsigned long nv, loff_t pos)
1facf9fc 11236+{
4a4d8108
AM
11237+ ssize_t err;
11238+ struct file *file, *h_file;
11239+ struct dentry *dentry;
dece6358 11240+ struct super_block *sb;
1facf9fc 11241+
4a4d8108 11242+ file = kio->ki_filp;
dece6358 11243+ dentry = file->f_dentry;
1308ab2a 11244+ sb = dentry->d_sb;
e49829fe 11245+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11246+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11247+ if (unlikely(err))
11248+ goto out;
11249+
11250+ h_file = au_hf_top(file);
11251+ err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos);
11252+ /* todo: necessary? */
11253+ /* file->f_ra = h_file->f_ra; */
11254+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
11255+ di_read_unlock(dentry, AuLock_IR);
11256+ fi_read_unlock(file);
1facf9fc 11257+
4f0767ce 11258+out:
4a4d8108 11259+ si_read_unlock(sb);
1308ab2a 11260+ return err;
11261+}
1facf9fc 11262+
4a4d8108
AM
11263+static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
11264+ unsigned long nv, loff_t pos)
1308ab2a 11265+{
4a4d8108
AM
11266+ ssize_t err;
11267+ struct au_pin pin;
11268+ struct dentry *dentry;
11269+ struct inode *inode;
4a4d8108 11270+ struct file *file, *h_file;
1308ab2a 11271+
4a4d8108 11272+ file = kio->ki_filp;
1308ab2a 11273+ dentry = file->f_dentry;
1308ab2a 11274+ inode = dentry->d_inode;
e49829fe
JR
11275+ au_mtx_and_read_lock(inode);
11276+
4a4d8108
AM
11277+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11278+ if (unlikely(err))
1308ab2a 11279+ goto out;
1facf9fc 11280+
4a4d8108
AM
11281+ err = au_ready_to_write(file, -1, &pin);
11282+ di_downgrade_lock(dentry, AuLock_IR);
dece6358 11283+ if (unlikely(err))
4a4d8108 11284+ goto out_unlock;
1facf9fc 11285+
4a4d8108
AM
11286+ au_unpin(&pin);
11287+ h_file = au_hf_top(file);
11288+ err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos);
11289+ au_cpup_attr_timesizes(inode);
11290+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
1facf9fc 11291+
4f0767ce 11292+out_unlock:
4a4d8108
AM
11293+ di_read_unlock(dentry, AuLock_IR);
11294+ fi_write_unlock(file);
4f0767ce 11295+out:
e49829fe 11296+ si_read_unlock(inode->i_sb);
4a4d8108 11297+ mutex_unlock(&inode->i_mutex);
dece6358 11298+ return err;
1facf9fc 11299+}
11300+
4a4d8108
AM
11301+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
11302+ struct pipe_inode_info *pipe, size_t len,
11303+ unsigned int flags)
1facf9fc 11304+{
4a4d8108
AM
11305+ ssize_t err;
11306+ struct file *h_file;
11307+ struct dentry *dentry;
dece6358 11308+ struct super_block *sb;
1facf9fc 11309+
dece6358 11310+ dentry = file->f_dentry;
dece6358 11311+ sb = dentry->d_sb;
e49829fe 11312+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11313+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11314+ if (unlikely(err))
dece6358 11315+ goto out;
1facf9fc 11316+
4a4d8108
AM
11317+ err = -EINVAL;
11318+ h_file = au_hf_top(file);
11319+ if (au_test_loopback_kthread()) {
87a755f4
AM
11320+ au_warn_loopback(h_file->f_dentry->d_sb);
11321+ if (file->f_mapping != h_file->f_mapping) {
11322+ file->f_mapping = h_file->f_mapping;
11323+ smp_mb(); /* unnecessary? */
11324+ }
1308ab2a 11325+ }
4a4d8108
AM
11326+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
11327+ /* todo: necessasry? */
11328+ /* file->f_ra = h_file->f_ra; */
11329+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
1facf9fc 11330+
4a4d8108
AM
11331+ di_read_unlock(dentry, AuLock_IR);
11332+ fi_read_unlock(file);
1facf9fc 11333+
4f0767ce 11334+out:
4a4d8108 11335+ si_read_unlock(sb);
dece6358 11336+ return err;
1facf9fc 11337+}
11338+
4a4d8108
AM
11339+static ssize_t
11340+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
11341+ size_t len, unsigned int flags)
1facf9fc 11342+{
4a4d8108
AM
11343+ ssize_t err;
11344+ struct au_pin pin;
11345+ struct dentry *dentry;
11346+ struct inode *inode;
4a4d8108 11347+ struct file *h_file;
1facf9fc 11348+
4a4d8108
AM
11349+ dentry = file->f_dentry;
11350+ inode = dentry->d_inode;
e49829fe 11351+ au_mtx_and_read_lock(inode);
4a4d8108
AM
11352+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11353+ if (unlikely(err))
11354+ goto out;
1facf9fc 11355+
4a4d8108
AM
11356+ err = au_ready_to_write(file, -1, &pin);
11357+ di_downgrade_lock(dentry, AuLock_IR);
11358+ if (unlikely(err))
11359+ goto out_unlock;
1facf9fc 11360+
4a4d8108
AM
11361+ h_file = au_hf_top(file);
11362+ au_unpin(&pin);
11363+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
11364+ au_cpup_attr_timesizes(inode);
11365+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
1facf9fc 11366+
4f0767ce 11367+out_unlock:
4a4d8108
AM
11368+ di_read_unlock(dentry, AuLock_IR);
11369+ fi_write_unlock(file);
4f0767ce 11370+out:
e49829fe 11371+ si_read_unlock(inode->i_sb);
4a4d8108
AM
11372+ mutex_unlock(&inode->i_mutex);
11373+ return err;
11374+}
1facf9fc 11375+
4a4d8108
AM
11376+/* ---------------------------------------------------------------------- */
11377+
4a4d8108
AM
11378+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
11379+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
1308ab2a 11380+
4a4d8108 11381+static unsigned long au_arch_prot_conv(unsigned long flags)
dece6358 11382+{
4a4d8108
AM
11383+ /* currently ppc64 only */
11384+#ifdef CONFIG_PPC64
11385+ /* cf. linux/arch/powerpc/include/asm/mman.h */
11386+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
11387+ return AuConv_VM_PROT(flags, SAO);
11388+#else
11389+ AuDebugOn(arch_calc_vm_prot_bits(-1));
11390+ return 0;
11391+#endif
dece6358
AM
11392+}
11393+
4a4d8108 11394+static unsigned long au_prot_conv(unsigned long flags)
dece6358 11395+{
4a4d8108
AM
11396+ return AuConv_VM_PROT(flags, READ)
11397+ | AuConv_VM_PROT(flags, WRITE)
11398+ | AuConv_VM_PROT(flags, EXEC)
11399+ | au_arch_prot_conv(flags);
dece6358
AM
11400+}
11401+
4a4d8108
AM
11402+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
11403+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
dece6358 11404+
4a4d8108 11405+static unsigned long au_flag_conv(unsigned long flags)
dece6358 11406+{
4a4d8108
AM
11407+ return AuConv_VM_MAP(flags, GROWSDOWN)
11408+ | AuConv_VM_MAP(flags, DENYWRITE)
11409+ | AuConv_VM_MAP(flags, EXECUTABLE)
11410+ | AuConv_VM_MAP(flags, LOCKED);
dece6358 11411+}
1308ab2a 11412+/*
4a4d8108
AM
11413+ * This is another ugly approach to keep the lock order, particularly
11414+ * mm->mmap_sem and aufs rwsem. The previous approach was reverted and you can
11415+ * find it in git-log, if you want.
1308ab2a 11416+ *
4a4d8108
AM
11417+ * native readdir: i_mutex, copy_to_user, mmap_sem
11418+ * aufs readdir: i_mutex, rwsem, nested-i_mutex, copy_to_user, mmap_sem
1308ab2a 11419+ *
4a4d8108
AM
11420+ * Before aufs_mmap() mmap_sem is acquired already, but aufs_mmap() has to
11421+ * acquire aufs rwsem. It introduces a circular locking dependency.
11422+ * To address this problem, aufs_mmap() delegates the part which requires aufs
11423+ * rwsem to its internal workqueue.
1308ab2a 11424+ */
11425+
4a4d8108
AM
11426+struct au_mmap_pre_args {
11427+ /* input */
11428+ struct file *file;
11429+ struct vm_area_struct *vma;
1308ab2a 11430+
4a4d8108
AM
11431+ /* output */
11432+ int *errp;
11433+ struct file *h_file;
11434+ struct au_branch *br;
4a4d8108 11435+};
dece6358 11436+
4a4d8108 11437+static int au_mmap_pre(struct file *file, struct vm_area_struct *vma,
2cbb1c4b 11438+ struct file **h_file, struct au_branch **br)
dece6358 11439+{
4a4d8108
AM
11440+ int err;
11441+ aufs_bindex_t bstart;
11442+ const unsigned char wlock
11443+ = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
11444+ struct dentry *dentry;
11445+ struct super_block *sb;
1308ab2a 11446+
4a4d8108
AM
11447+ dentry = file->f_dentry;
11448+ sb = dentry->d_sb;
e49829fe 11449+ si_read_lock(sb, AuLock_NOPLMW);
4a4d8108
AM
11450+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11451+ if (unlikely(err))
11452+ goto out;
11453+
4a4d8108
AM
11454+ if (wlock) {
11455+ struct au_pin pin;
11456+
11457+ err = au_ready_to_write(file, -1, &pin);
11458+ di_write_unlock(dentry);
11459+ if (unlikely(err))
11460+ goto out_unlock;
11461+ au_unpin(&pin);
11462+ } else
11463+ di_write_unlock(dentry);
11464+ bstart = au_fbstart(file);
11465+ *br = au_sbr(sb, bstart);
11466+ *h_file = au_hf_top(file);
11467+ get_file(*h_file);
2cbb1c4b 11468+ au_set_mmapped(file);
4a4d8108
AM
11469+
11470+out_unlock:
11471+ fi_write_unlock(file);
11472+out:
11473+ si_read_unlock(sb);
11474+ return err;
dece6358
AM
11475+}
11476+
4a4d8108 11477+static void au_call_mmap_pre(void *args)
dece6358 11478+{
4a4d8108 11479+ struct au_mmap_pre_args *a = args;
2cbb1c4b 11480+ *a->errp = au_mmap_pre(a->file, a->vma, &a->h_file, &a->br);
dece6358
AM
11481+}
11482+
4a4d8108 11483+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
dece6358 11484+{
4a4d8108 11485+ int err, wkq_err;
2cbb1c4b 11486+ unsigned long prot;
4a4d8108
AM
11487+ struct au_mmap_pre_args args = {
11488+ .file = file,
11489+ .vma = vma,
11490+ .errp = &err
11491+ };
11492+
2cbb1c4b 11493+ AuDbgVmRegion(file, vma);
b752ccd1 11494+ wkq_err = au_wkq_wait_pre(au_call_mmap_pre, &args);
4a4d8108
AM
11495+ if (unlikely(wkq_err))
11496+ err = wkq_err;
11497+ if (unlikely(err))
11498+ goto out;
1308ab2a 11499+
2cbb1c4b
JR
11500+ au_vm_file_reset(vma, args.h_file);
11501+ prot = au_prot_conv(vma->vm_flags);
11502+ err = security_file_mmap(args.h_file, /*reqprot*/prot, prot,
11503+ au_flag_conv(vma->vm_flags), vma->vm_start, 0);
4a4d8108 11504+ if (unlikely(err))
2cbb1c4b 11505+ goto out_reset;
4a4d8108 11506+
2cbb1c4b
JR
11507+ err = args.h_file->f_op->mmap(args.h_file, vma);
11508+ if (unlikely(err))
11509+ goto out_reset;
4a4d8108 11510+
2cbb1c4b 11511+ au_vm_prfile_set(vma, file);
4a4d8108
AM
11512+ vfsub_file_accessed(args.h_file);
11513+ /* update without lock, I don't think it a problem */
2cbb1c4b
JR
11514+ fsstack_copy_attr_atime(file->f_dentry->d_inode,
11515+ args.h_file->f_dentry->d_inode);
11516+ goto out_fput; /* success */
4a4d8108 11517+
2cbb1c4b
JR
11518+out_reset:
11519+ au_unset_mmapped(file);
11520+ au_vm_file_reset(vma, file);
11521+out_fput:
4a4d8108 11522+ fput(args.h_file);
4f0767ce 11523+out:
4a4d8108
AM
11524+ return err;
11525+}
11526+
11527+/* ---------------------------------------------------------------------- */
11528+
b752ccd1 11529+static int aufs_fsync_nondir(struct file *file, int datasync)
4a4d8108
AM
11530+{
11531+ int err;
11532+ struct au_pin pin;
b752ccd1 11533+ struct dentry *dentry;
4a4d8108
AM
11534+ struct inode *inode;
11535+ struct file *h_file;
11536+ struct super_block *sb;
11537+
b752ccd1 11538+ dentry = file->f_dentry;
4a4d8108
AM
11539+ inode = dentry->d_inode;
11540+ IMustLock(file->f_mapping->host);
11541+ if (inode != file->f_mapping->host) {
11542+ mutex_unlock(&file->f_mapping->host->i_mutex);
11543+ mutex_lock(&inode->i_mutex);
11544+ }
11545+ IMustLock(inode);
11546+
11547+ sb = dentry->d_sb;
e49829fe
JR
11548+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
11549+ if (unlikely(err))
11550+ goto out;
4a4d8108
AM
11551+
11552+ err = 0; /* -EBADF; */ /* posix? */
11553+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
e49829fe 11554+ goto out_si;
4a4d8108
AM
11555+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11556+ if (unlikely(err))
e49829fe 11557+ goto out_si;
4a4d8108
AM
11558+
11559+ err = au_ready_to_write(file, -1, &pin);
11560+ di_downgrade_lock(dentry, AuLock_IR);
11561+ if (unlikely(err))
11562+ goto out_unlock;
11563+ au_unpin(&pin);
11564+
11565+ err = -EINVAL;
11566+ h_file = au_hf_top(file);
53392da6
AM
11567+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
11568+ au_cpup_attr_timesizes(inode);
4a4d8108 11569+
4f0767ce 11570+out_unlock:
4a4d8108 11571+ di_read_unlock(dentry, AuLock_IR);
1308ab2a 11572+ fi_write_unlock(file);
e49829fe 11573+out_si:
953406b4 11574+ si_read_unlock(sb);
e49829fe 11575+out:
4a4d8108
AM
11576+ if (inode != file->f_mapping->host) {
11577+ mutex_unlock(&inode->i_mutex);
11578+ mutex_lock(&file->f_mapping->host->i_mutex);
11579+ }
11580+ return err;
dece6358
AM
11581+}
11582+
4a4d8108
AM
11583+/* no one supports this operation, currently */
11584+#if 0
11585+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
dece6358 11586+{
4a4d8108
AM
11587+ int err;
11588+ struct au_pin pin;
1308ab2a 11589+ struct dentry *dentry;
4a4d8108
AM
11590+ struct inode *inode;
11591+ struct file *file, *h_file;
1308ab2a 11592+
4a4d8108 11593+ file = kio->ki_filp;
1308ab2a 11594+ dentry = file->f_dentry;
4a4d8108 11595+ inode = dentry->d_inode;
e49829fe 11596+ au_mtx_and_read_lock(inode);
4a4d8108
AM
11597+
11598+ err = 0; /* -EBADF; */ /* posix? */
11599+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
11600+ goto out;
11601+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
11602+ if (unlikely(err))
1308ab2a 11603+ goto out;
11604+
4a4d8108
AM
11605+ err = au_ready_to_write(file, -1, &pin);
11606+ di_downgrade_lock(dentry, AuLock_IR);
11607+ if (unlikely(err))
11608+ goto out_unlock;
11609+ au_unpin(&pin);
1308ab2a 11610+
4a4d8108
AM
11611+ err = -ENOSYS;
11612+ h_file = au_hf_top(file);
11613+ if (h_file->f_op && h_file->f_op->aio_fsync) {
11614+ struct dentry *h_d;
11615+ struct mutex *h_mtx;
1308ab2a 11616+
4a4d8108
AM
11617+ h_d = h_file->f_dentry;
11618+ h_mtx = &h_d->d_inode->i_mutex;
11619+ if (!is_sync_kiocb(kio)) {
11620+ get_file(h_file);
11621+ fput(file);
11622+ }
11623+ kio->ki_filp = h_file;
11624+ err = h_file->f_op->aio_fsync(kio, datasync);
11625+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
11626+ if (!err)
11627+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
11628+ /*ignore*/
11629+ au_cpup_attr_timesizes(inode);
11630+ mutex_unlock(h_mtx);
11631+ }
1308ab2a 11632+
4f0767ce 11633+out_unlock:
4a4d8108
AM
11634+ di_read_unlock(dentry, AuLock_IR);
11635+ fi_write_unlock(file);
4f0767ce 11636+out:
e49829fe 11637+ si_read_unlock(inode->sb);
4a4d8108
AM
11638+ mutex_unlock(&inode->i_mutex);
11639+ return err;
dece6358 11640+}
4a4d8108 11641+#endif
dece6358 11642+
4a4d8108 11643+static int aufs_fasync(int fd, struct file *file, int flag)
dece6358 11644+{
4a4d8108
AM
11645+ int err;
11646+ struct file *h_file;
11647+ struct dentry *dentry;
11648+ struct super_block *sb;
1308ab2a 11649+
4a4d8108
AM
11650+ dentry = file->f_dentry;
11651+ sb = dentry->d_sb;
e49829fe 11652+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11653+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
11654+ if (unlikely(err))
11655+ goto out;
11656+
11657+ h_file = au_hf_top(file);
11658+ if (h_file->f_op && h_file->f_op->fasync)
11659+ err = h_file->f_op->fasync(fd, h_file, flag);
11660+
11661+ di_read_unlock(dentry, AuLock_IR);
11662+ fi_read_unlock(file);
1308ab2a 11663+
4f0767ce 11664+out:
4a4d8108 11665+ si_read_unlock(sb);
1308ab2a 11666+ return err;
dece6358 11667+}
4a4d8108
AM
11668+
11669+/* ---------------------------------------------------------------------- */
11670+
11671+/* no one supports this operation, currently */
11672+#if 0
11673+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
11674+ size_t len, loff_t *pos , int more)
11675+{
11676+}
11677+#endif
11678+
11679+/* ---------------------------------------------------------------------- */
11680+
11681+const struct file_operations aufs_file_fop = {
11682+ .owner = THIS_MODULE,
2cbb1c4b 11683+
027c5e7a 11684+ .llseek = default_llseek,
4a4d8108
AM
11685+
11686+ .read = aufs_read,
11687+ .write = aufs_write,
11688+ .aio_read = aufs_aio_read,
11689+ .aio_write = aufs_aio_write,
11690+#ifdef CONFIG_AUFS_POLL
11691+ .poll = aufs_poll,
11692+#endif
11693+ .unlocked_ioctl = aufs_ioctl_nondir,
b752ccd1
AM
11694+#ifdef CONFIG_COMPAT
11695+ .compat_ioctl = aufs_ioctl_nondir, /* same */
11696+#endif
4a4d8108
AM
11697+ .mmap = aufs_mmap,
11698+ .open = aufs_open_nondir,
11699+ .flush = aufs_flush_nondir,
11700+ .release = aufs_release_nondir,
11701+ .fsync = aufs_fsync_nondir,
11702+ /* .aio_fsync = aufs_aio_fsync_nondir, */
11703+ .fasync = aufs_fasync,
11704+ /* .sendpage = aufs_sendpage, */
11705+ .splice_write = aufs_splice_write,
11706+ .splice_read = aufs_splice_read,
11707+#if 0
11708+ .aio_splice_write = aufs_aio_splice_write,
11709+ .aio_splice_read = aufs_aio_splice_read
11710+#endif
11711+};
7f207e10
AM
11712diff -urN /usr/share/empty/fs/aufs/f_op_sp.c linux/fs/aufs/f_op_sp.c
11713--- /usr/share/empty/fs/aufs/f_op_sp.c 1970-01-01 01:00:00.000000000 +0100
53392da6 11714+++ linux/fs/aufs/f_op_sp.c 2011-08-24 13:30:24.731313534 +0200
e49829fe 11715@@ -0,0 +1,299 @@
1308ab2a 11716+/*
027c5e7a 11717+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1308ab2a 11718+ *
11719+ * This program, aufs is free software; you can redistribute it and/or modify
11720+ * it under the terms of the GNU General Public License as published by
11721+ * the Free Software Foundation; either version 2 of the License, or
11722+ * (at your option) any later version.
11723+ *
11724+ * This program is distributed in the hope that it will be useful,
11725+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11726+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11727+ * GNU General Public License for more details.
11728+ *
11729+ * You should have received a copy of the GNU General Public License
11730+ * along with this program; if not, write to the Free Software
11731+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11732+ */
dece6358 11733+
1308ab2a 11734+/*
4a4d8108
AM
11735+ * file operations for special files.
11736+ * while they exist in aufs virtually,
11737+ * their file I/O is handled out of aufs.
1308ab2a 11738+ */
11739+
4a4d8108
AM
11740+#include <linux/fs_stack.h>
11741+#include "aufs.h"
1308ab2a 11742+
4a4d8108
AM
11743+static ssize_t aufs_aio_read_sp(struct kiocb *kio, const struct iovec *iov,
11744+ unsigned long nv, loff_t pos)
dece6358 11745+{
4a4d8108
AM
11746+ ssize_t err;
11747+ aufs_bindex_t bstart;
11748+ unsigned char wbr;
11749+ struct file *file, *h_file;
11750+ struct super_block *sb;
1308ab2a 11751+
4a4d8108
AM
11752+ file = kio->ki_filp;
11753+ sb = file->f_dentry->d_sb;
11754+ si_read_lock(sb, AuLock_FLUSH);
11755+ fi_read_lock(file);
11756+ bstart = au_fbstart(file);
11757+ h_file = au_hf_top(file);
11758+ fi_read_unlock(file);
11759+ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
11760+ si_read_unlock(sb);
11761+
11762+ /* do not change the file in kio */
11763+ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_read);
11764+ err = h_file->f_op->aio_read(kio, iov, nv, pos);
11765+ if (err > 0 && wbr)
11766+ file_accessed(h_file);
11767+
11768+ return err;
11769+}
11770+
11771+static ssize_t aufs_aio_write_sp(struct kiocb *kio, const struct iovec *iov,
11772+ unsigned long nv, loff_t pos)
11773+{
11774+ ssize_t err;
11775+ aufs_bindex_t bstart;
11776+ unsigned char wbr;
11777+ struct super_block *sb;
11778+ struct file *file, *h_file;
11779+
11780+ file = kio->ki_filp;
11781+ sb = file->f_dentry->d_sb;
11782+ si_read_lock(sb, AuLock_FLUSH);
11783+ fi_read_lock(file);
11784+ bstart = au_fbstart(file);
11785+ h_file = au_hf_top(file);
11786+ fi_read_unlock(file);
11787+ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
11788+ si_read_unlock(sb);
11789+
11790+ /* do not change the file in kio */
11791+ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_write);
11792+ err = h_file->f_op->aio_write(kio, iov, nv, pos);
11793+ if (err > 0 && wbr)
11794+ file_update_time(h_file);
11795+
11796+ return err;
11797+}
11798+
11799+/* ---------------------------------------------------------------------- */
11800+
11801+static int aufs_release_sp(struct inode *inode, struct file *file)
11802+{
11803+ int err;
11804+ struct file *h_file;
11805+
11806+ fi_read_lock(file);
11807+ h_file = au_hf_top(file);
11808+ fi_read_unlock(file);
11809+ /* close this fifo in aufs */
11810+ err = h_file->f_op->release(inode, file); /* ignore */
11811+ aufs_release_nondir(inode, file); /* ignore */
11812+ return err;
11813+}
11814+
11815+/* ---------------------------------------------------------------------- */
11816+
11817+/* currently, support only FIFO */
4f0767ce
JR
11818+enum {
11819+ AuSp_FIFO, AuSp_FIFO_R, AuSp_FIFO_W, AuSp_FIFO_RW,
11820+ /* AuSp_SOCK, AuSp_CHR, AuSp_BLK, */
11821+ AuSp_Last
11822+};
4a4d8108
AM
11823+static int aufs_open_sp(struct inode *inode, struct file *file);
11824+static struct au_sp_fop {
11825+ int done;
11826+ struct file_operations fop; /* not 'const' */
11827+ spinlock_t spin;
11828+} au_sp_fop[AuSp_Last] = {
11829+ [AuSp_FIFO] = {
11830+ .fop = {
11831+ .owner = THIS_MODULE,
11832+ .open = aufs_open_sp
11833+ }
11834+ }
11835+};
11836+
11837+static void au_init_fop_sp(struct file *file)
11838+{
11839+ struct au_sp_fop *p;
11840+ int i;
11841+ struct file *h_file;
11842+
11843+ p = au_sp_fop;
11844+ if (unlikely(!p->done)) {
11845+ /* initialize first time only */
11846+ static DEFINE_SPINLOCK(spin);
11847+
11848+ spin_lock(&spin);
11849+ if (!p->done) {
11850+ BUILD_BUG_ON(sizeof(au_sp_fop)/sizeof(*au_sp_fop)
11851+ != AuSp_Last);
11852+ for (i = 0; i < AuSp_Last; i++)
11853+ spin_lock_init(&p[i].spin);
11854+ p->done = 1;
11855+ }
11856+ spin_unlock(&spin);
11857+ }
11858+
11859+ switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
11860+ case FMODE_READ:
11861+ i = AuSp_FIFO_R;
11862+ break;
11863+ case FMODE_WRITE:
11864+ i = AuSp_FIFO_W;
11865+ break;
11866+ case FMODE_READ | FMODE_WRITE:
11867+ i = AuSp_FIFO_RW;
11868+ break;
11869+ default:
11870+ BUG();
11871+ }
11872+
11873+ p += i;
11874+ if (unlikely(!p->done)) {
11875+ /* initialize first time only */
11876+ h_file = au_hf_top(file);
11877+ spin_lock(&p->spin);
11878+ if (!p->done) {
11879+ p->fop = *h_file->f_op;
11880+ p->fop.owner = THIS_MODULE;
11881+ if (p->fop.aio_read)
11882+ p->fop.aio_read = aufs_aio_read_sp;
11883+ if (p->fop.aio_write)
11884+ p->fop.aio_write = aufs_aio_write_sp;
11885+ p->fop.release = aufs_release_sp;
11886+ p->done = 1;
11887+ }
11888+ spin_unlock(&p->spin);
11889+ }
11890+ file->f_op = &p->fop;
11891+}
11892+
11893+static int au_cpup_sp(struct dentry *dentry)
11894+{
11895+ int err;
11896+ aufs_bindex_t bcpup;
11897+ struct au_pin pin;
11898+ struct au_wr_dir_args wr_dir_args = {
11899+ .force_btgt = -1,
11900+ .flags = 0
11901+ };
11902+
11903+ AuDbg("%.*s\n", AuDLNPair(dentry));
11904+
11905+ di_read_unlock(dentry, AuLock_IR);
11906+ di_write_lock_child(dentry);
11907+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
11908+ if (unlikely(err < 0))
11909+ goto out;
11910+ bcpup = err;
11911+ err = 0;
11912+ if (bcpup == au_dbstart(dentry))
11913+ goto out; /* success */
11914+
11915+ err = au_pin(&pin, dentry, bcpup, au_opt_udba(dentry->d_sb),
11916+ AuPin_MNT_WRITE);
11917+ if (!err) {
11918+ err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME);
11919+ au_unpin(&pin);
11920+ }
11921+
4f0767ce 11922+out:
4a4d8108
AM
11923+ di_downgrade_lock(dentry, AuLock_IR);
11924+ return err;
11925+}
11926+
11927+static int au_do_open_sp(struct file *file, int flags)
11928+{
11929+ int err;
11930+ struct dentry *dentry;
11931+ struct super_block *sb;
11932+ struct file *h_file;
11933+ struct inode *h_inode;
11934+
11935+ dentry = file->f_dentry;
11936+ AuDbg("%.*s\n", AuDLNPair(dentry));
11937+
11938+ /*
11939+ * try copying-up.
11940+ * operate on the ro branch is not an error.
11941+ */
11942+ au_cpup_sp(dentry); /* ignore */
11943+
11944+ /* prepare h_file */
11945+ err = au_do_open_nondir(file, vfsub_file_flags(file));
11946+ if (unlikely(err))
11947+ goto out;
11948+
11949+ sb = dentry->d_sb;
11950+ h_file = au_hf_top(file);
11951+ h_inode = h_file->f_dentry->d_inode;
11952+ di_read_unlock(dentry, AuLock_IR);
11953+ fi_write_unlock(file);
11954+ si_read_unlock(sb);
11955+ /* open this fifo in aufs */
11956+ err = h_inode->i_fop->open(file->f_dentry->d_inode, file);
11957+ si_noflush_read_lock(sb);
11958+ fi_write_lock(file);
11959+ di_read_lock_child(dentry, AuLock_IR);
11960+ if (!err)
11961+ au_init_fop_sp(file);
4a4d8108 11962+
4f0767ce 11963+out:
4a4d8108
AM
11964+ return err;
11965+}
11966+
11967+static int aufs_open_sp(struct inode *inode, struct file *file)
11968+{
11969+ int err;
11970+ struct super_block *sb;
11971+
11972+ sb = file->f_dentry->d_sb;
11973+ si_read_lock(sb, AuLock_FLUSH);
11974+ err = au_do_open(file, au_do_open_sp, /*fidir*/NULL);
11975+ si_read_unlock(sb);
11976+ return err;
11977+}
11978+
11979+/* ---------------------------------------------------------------------- */
11980+
11981+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev)
11982+{
11983+ init_special_inode(inode, mode, rdev);
11984+
11985+ switch (mode & S_IFMT) {
11986+ case S_IFIFO:
11987+ inode->i_fop = &au_sp_fop[AuSp_FIFO].fop;
11988+ /*FALLTHROUGH*/
11989+ case S_IFCHR:
11990+ case S_IFBLK:
11991+ case S_IFSOCK:
11992+ break;
11993+ default:
11994+ AuDebugOn(1);
11995+ }
11996+}
11997+
11998+int au_special_file(umode_t mode)
11999+{
12000+ int ret;
12001+
12002+ ret = 0;
12003+ switch (mode & S_IFMT) {
12004+ case S_IFIFO:
12005+#if 0
12006+ case S_IFCHR:
12007+ case S_IFBLK:
12008+ case S_IFSOCK:
12009+#endif
12010+ ret = 1;
12011+ }
12012+
12013+ return ret;
12014+}
7f207e10
AM
12015diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
12016--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
53392da6 12017+++ linux/fs/aufs/fstype.h 2011-08-24 13:30:24.731313534 +0200
4a4d8108
AM
12018@@ -0,0 +1,497 @@
12019+/*
027c5e7a 12020+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
12021+ *
12022+ * This program, aufs is free software; you can redistribute it and/or modify
12023+ * it under the terms of the GNU General Public License as published by
12024+ * the Free Software Foundation; either version 2 of the License, or
12025+ * (at your option) any later version.
12026+ *
12027+ * This program is distributed in the hope that it will be useful,
12028+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12029+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12030+ * GNU General Public License for more details.
12031+ *
12032+ * You should have received a copy of the GNU General Public License
12033+ * along with this program; if not, write to the Free Software
12034+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12035+ */
12036+
12037+/*
12038+ * judging filesystem type
12039+ */
12040+
12041+#ifndef __AUFS_FSTYPE_H__
12042+#define __AUFS_FSTYPE_H__
12043+
12044+#ifdef __KERNEL__
12045+
12046+#include <linux/fs.h>
12047+#include <linux/magic.h>
12048+#include <linux/romfs_fs.h>
12049+#include <linux/aufs_type.h>
12050+
12051+static inline int au_test_aufs(struct super_block *sb)
12052+{
12053+ return sb->s_magic == AUFS_SUPER_MAGIC;
12054+}
12055+
12056+static inline const char *au_sbtype(struct super_block *sb)
12057+{
12058+ return sb->s_type->name;
12059+}
1308ab2a 12060+
12061+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
12062+{
12063+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
12064+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
12065+#else
12066+ return 0;
12067+#endif
12068+}
12069+
1308ab2a 12070+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 12071+{
1308ab2a 12072+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
12073+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
12074+#else
12075+ return 0;
12076+#endif
12077+}
12078+
1308ab2a 12079+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 12080+{
1308ab2a 12081+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
12082+ return sb->s_magic == CRAMFS_MAGIC;
12083+#endif
12084+ return 0;
12085+}
12086+
12087+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
12088+{
12089+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
12090+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
12091+#else
12092+ return 0;
12093+#endif
12094+}
12095+
1308ab2a 12096+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 12097+{
1308ab2a 12098+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
12099+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
12100+#else
12101+ return 0;
12102+#endif
12103+}
12104+
1308ab2a 12105+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 12106+{
1308ab2a 12107+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
12108+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
12109+#else
12110+ return 0;
12111+#endif
12112+}
12113+
1308ab2a 12114+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 12115+{
1308ab2a 12116+#ifdef CONFIG_TMPFS
12117+ return sb->s_magic == TMPFS_MAGIC;
12118+#else
12119+ return 0;
dece6358 12120+#endif
dece6358
AM
12121+}
12122+
1308ab2a 12123+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 12124+{
1308ab2a 12125+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
12126+ return !strcmp(au_sbtype(sb), "ecryptfs");
12127+#else
12128+ return 0;
12129+#endif
1facf9fc 12130+}
12131+
1308ab2a 12132+static inline int au_test_smbfs(struct super_block *sb __maybe_unused)
1facf9fc 12133+{
1308ab2a 12134+#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE)
12135+ return sb->s_magic == SMB_SUPER_MAGIC;
12136+#else
12137+ return 0;
1facf9fc 12138+#endif
1facf9fc 12139+}
12140+
1308ab2a 12141+static inline int au_test_ocfs2(struct super_block *sb __maybe_unused)
1facf9fc 12142+{
1308ab2a 12143+#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE)
12144+ return sb->s_magic == OCFS2_SUPER_MAGIC;
12145+#else
12146+ return 0;
12147+#endif
1facf9fc 12148+}
12149+
1308ab2a 12150+static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused)
1facf9fc 12151+{
1308ab2a 12152+#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE)
12153+ return sb->s_magic == DLMFS_MAGIC;
12154+#else
12155+ return 0;
12156+#endif
1facf9fc 12157+}
12158+
1308ab2a 12159+static inline int au_test_coda(struct super_block *sb __maybe_unused)
1facf9fc 12160+{
1308ab2a 12161+#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE)
12162+ return sb->s_magic == CODA_SUPER_MAGIC;
12163+#else
12164+ return 0;
12165+#endif
12166+}
12167+
12168+static inline int au_test_v9fs(struct super_block *sb __maybe_unused)
12169+{
12170+#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE)
12171+ return sb->s_magic == V9FS_MAGIC;
12172+#else
12173+ return 0;
12174+#endif
12175+}
12176+
12177+static inline int au_test_ext4(struct super_block *sb __maybe_unused)
12178+{
12179+#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE)
12180+ return sb->s_magic == EXT4_SUPER_MAGIC;
12181+#else
12182+ return 0;
12183+#endif
12184+}
12185+
12186+static inline int au_test_sysv(struct super_block *sb __maybe_unused)
12187+{
12188+#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE)
12189+ return !strcmp(au_sbtype(sb), "sysv");
12190+#else
12191+ return 0;
12192+#endif
12193+}
12194+
12195+static inline int au_test_ramfs(struct super_block *sb)
12196+{
12197+ return sb->s_magic == RAMFS_MAGIC;
12198+}
12199+
12200+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
12201+{
12202+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
12203+ return sb->s_magic == UBIFS_SUPER_MAGIC;
12204+#else
12205+ return 0;
12206+#endif
12207+}
12208+
12209+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
12210+{
12211+#ifdef CONFIG_PROC_FS
12212+ return sb->s_magic == PROC_SUPER_MAGIC;
12213+#else
12214+ return 0;
12215+#endif
12216+}
12217+
12218+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
12219+{
12220+#ifdef CONFIG_SYSFS
12221+ return sb->s_magic == SYSFS_MAGIC;
12222+#else
12223+ return 0;
12224+#endif
12225+}
12226+
12227+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
12228+{
12229+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
12230+ return sb->s_magic == CONFIGFS_MAGIC;
12231+#else
12232+ return 0;
12233+#endif
12234+}
12235+
12236+static inline int au_test_minix(struct super_block *sb __maybe_unused)
12237+{
12238+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
12239+ return sb->s_magic == MINIX3_SUPER_MAGIC
12240+ || sb->s_magic == MINIX2_SUPER_MAGIC
12241+ || sb->s_magic == MINIX2_SUPER_MAGIC2
12242+ || sb->s_magic == MINIX_SUPER_MAGIC
12243+ || sb->s_magic == MINIX_SUPER_MAGIC2;
12244+#else
12245+ return 0;
12246+#endif
12247+}
12248+
12249+static inline int au_test_cifs(struct super_block *sb __maybe_unused)
12250+{
12251+#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE)
12252+ return sb->s_magic == CIFS_MAGIC_NUMBER;
12253+#else
12254+ return 0;
12255+#endif
12256+}
12257+
12258+static inline int au_test_fat(struct super_block *sb __maybe_unused)
12259+{
12260+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
12261+ return sb->s_magic == MSDOS_SUPER_MAGIC;
12262+#else
12263+ return 0;
12264+#endif
12265+}
12266+
12267+static inline int au_test_msdos(struct super_block *sb)
12268+{
12269+ return au_test_fat(sb);
12270+}
12271+
12272+static inline int au_test_vfat(struct super_block *sb)
12273+{
12274+ return au_test_fat(sb);
12275+}
12276+
12277+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
12278+{
12279+#ifdef CONFIG_SECURITYFS
12280+ return sb->s_magic == SECURITYFS_MAGIC;
12281+#else
12282+ return 0;
12283+#endif
12284+}
12285+
12286+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
12287+{
12288+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
12289+ return sb->s_magic == SQUASHFS_MAGIC;
12290+#else
12291+ return 0;
12292+#endif
12293+}
12294+
12295+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
12296+{
12297+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
12298+ return sb->s_magic == BTRFS_SUPER_MAGIC;
12299+#else
12300+ return 0;
12301+#endif
12302+}
12303+
12304+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
12305+{
12306+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
12307+ return sb->s_magic == XENFS_SUPER_MAGIC;
12308+#else
12309+ return 0;
12310+#endif
12311+}
12312+
12313+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
12314+{
12315+#ifdef CONFIG_DEBUG_FS
12316+ return sb->s_magic == DEBUGFS_MAGIC;
12317+#else
12318+ return 0;
12319+#endif
12320+}
12321+
12322+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
12323+{
12324+#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
12325+ return sb->s_magic == NILFS_SUPER_MAGIC;
12326+#else
12327+ return 0;
12328+#endif
12329+}
12330+
4a4d8108
AM
12331+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
12332+{
12333+#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE)
12334+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
12335+#else
12336+ return 0;
12337+#endif
12338+}
12339+
1308ab2a 12340+/* ---------------------------------------------------------------------- */
12341+/*
12342+ * they can't be an aufs branch.
12343+ */
12344+static inline int au_test_fs_unsuppoted(struct super_block *sb)
12345+{
12346+ return
12347+#ifndef CONFIG_AUFS_BR_RAMFS
12348+ au_test_ramfs(sb) ||
12349+#endif
12350+ au_test_procfs(sb)
12351+ || au_test_sysfs(sb)
12352+ || au_test_configfs(sb)
12353+ || au_test_debugfs(sb)
12354+ || au_test_securityfs(sb)
12355+ || au_test_xenfs(sb)
12356+ || au_test_ecryptfs(sb)
12357+ /* || !strcmp(au_sbtype(sb), "unionfs") */
12358+ || au_test_aufs(sb); /* will be supported in next version */
12359+}
12360+
12361+/*
12362+ * If the filesystem supports NFS-export, then it has to support NULL as
12363+ * a nameidata parameter for ->create(), ->lookup() and ->d_revalidate().
12364+ * We can apply this principle when we handle a lower filesystem.
12365+ */
12366+static inline int au_test_fs_null_nd(struct super_block *sb)
12367+{
12368+ return !!sb->s_export_op;
12369+}
12370+
12371+static inline int au_test_fs_remote(struct super_block *sb)
12372+{
12373+ return !au_test_tmpfs(sb)
12374+#ifdef CONFIG_AUFS_BR_RAMFS
12375+ && !au_test_ramfs(sb)
12376+#endif
12377+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
12378+}
12379+
12380+/* ---------------------------------------------------------------------- */
12381+
12382+/*
12383+ * Note: these functions (below) are created after reading ->getattr() in all
12384+ * filesystems under linux/fs. it means we have to do so in every update...
12385+ */
12386+
12387+/*
12388+ * some filesystems require getattr to refresh the inode attributes before
12389+ * referencing.
12390+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
12391+ * and leave the work for d_revalidate()
12392+ */
12393+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
12394+{
12395+ return au_test_nfs(sb)
12396+ || au_test_fuse(sb)
12397+ /* || au_test_smbfs(sb) */ /* untested */
12398+ /* || au_test_ocfs2(sb) */ /* untested */
12399+ /* || au_test_btrfs(sb) */ /* untested */
12400+ /* || au_test_coda(sb) */ /* untested */
12401+ /* || au_test_v9fs(sb) */ /* untested */
12402+ ;
12403+}
12404+
12405+/*
12406+ * filesystems which don't maintain i_size or i_blocks.
12407+ */
12408+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
12409+{
12410+ return au_test_xfs(sb)
4a4d8108
AM
12411+ || au_test_btrfs(sb)
12412+ || au_test_ubifs(sb)
12413+ || au_test_hfsplus(sb) /* maintained, but incorrect */
1308ab2a 12414+ /* || au_test_ext4(sb) */ /* untested */
12415+ /* || au_test_ocfs2(sb) */ /* untested */
12416+ /* || au_test_ocfs2_dlmfs(sb) */ /* untested */
12417+ /* || au_test_sysv(sb) */ /* untested */
1308ab2a 12418+ /* || au_test_minix(sb) */ /* untested */
12419+ ;
12420+}
12421+
12422+/*
12423+ * filesystems which don't store the correct value in some of their inode
12424+ * attributes.
12425+ */
12426+static inline int au_test_fs_bad_iattr(struct super_block *sb)
12427+{
12428+ return au_test_fs_bad_iattr_size(sb)
12429+ /* || au_test_cifs(sb) */ /* untested */
12430+ || au_test_fat(sb)
12431+ || au_test_msdos(sb)
12432+ || au_test_vfat(sb);
1facf9fc 12433+}
12434+
12435+/* they don't check i_nlink in link(2) */
12436+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
12437+{
12438+ return au_test_tmpfs(sb)
12439+#ifdef CONFIG_AUFS_BR_RAMFS
12440+ || au_test_ramfs(sb)
12441+#endif
4a4d8108
AM
12442+ || au_test_ubifs(sb)
12443+ || au_test_btrfs(sb)
12444+ || au_test_hfsplus(sb);
1facf9fc 12445+}
12446+
12447+/*
12448+ * filesystems which sets S_NOATIME and S_NOCMTIME.
12449+ */
12450+static inline int au_test_fs_notime(struct super_block *sb)
12451+{
12452+ return au_test_nfs(sb)
12453+ || au_test_fuse(sb)
dece6358 12454+ || au_test_ubifs(sb)
1facf9fc 12455+ /* || au_test_cifs(sb) */ /* untested */
1facf9fc 12456+ ;
12457+}
12458+
12459+/*
12460+ * filesystems which requires replacing i_mapping.
12461+ */
12462+static inline int au_test_fs_bad_mapping(struct super_block *sb)
12463+{
dece6358
AM
12464+ return au_test_fuse(sb)
12465+ || au_test_ubifs(sb);
1facf9fc 12466+}
12467+
12468+/* temporary support for i#1 in cramfs */
12469+static inline int au_test_fs_unique_ino(struct inode *inode)
12470+{
12471+ if (au_test_cramfs(inode->i_sb))
12472+ return inode->i_ino != 1;
12473+ return 1;
12474+}
12475+
12476+/* ---------------------------------------------------------------------- */
12477+
12478+/*
12479+ * the filesystem where the xino files placed must support i/o after unlink and
12480+ * maintain i_size and i_blocks.
12481+ */
12482+static inline int au_test_fs_bad_xino(struct super_block *sb)
12483+{
12484+ return au_test_fs_remote(sb)
12485+ || au_test_fs_bad_iattr_size(sb)
12486+#ifdef CONFIG_AUFS_BR_RAMFS
12487+ || !(au_test_ramfs(sb) || au_test_fs_null_nd(sb))
12488+#else
12489+ || !au_test_fs_null_nd(sb) /* to keep xino code simple */
12490+#endif
12491+ /* don't want unnecessary work for xino */
12492+ || au_test_aufs(sb)
1308ab2a 12493+ || au_test_ecryptfs(sb)
12494+ || au_test_nilfs(sb);
1facf9fc 12495+}
12496+
12497+static inline int au_test_fs_trunc_xino(struct super_block *sb)
12498+{
12499+ return au_test_tmpfs(sb)
12500+ || au_test_ramfs(sb);
12501+}
12502+
12503+/*
12504+ * test if the @sb is real-readonly.
12505+ */
12506+static inline int au_test_fs_rr(struct super_block *sb)
12507+{
12508+ return au_test_squashfs(sb)
12509+ || au_test_iso9660(sb)
12510+ || au_test_cramfs(sb)
12511+ || au_test_romfs(sb);
12512+}
12513+
12514+#endif /* __KERNEL__ */
12515+#endif /* __AUFS_FSTYPE_H__ */
7f207e10
AM
12516diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
12517--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
53392da6 12518+++ linux/fs/aufs/hfsnotify.c 2011-08-24 13:30:24.731313534 +0200
027c5e7a 12519@@ -0,0 +1,247 @@
1facf9fc 12520+/*
027c5e7a 12521+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 12522+ *
12523+ * This program, aufs is free software; you can redistribute it and/or modify
12524+ * it under the terms of the GNU General Public License as published by
12525+ * the Free Software Foundation; either version 2 of the License, or
12526+ * (at your option) any later version.
dece6358
AM
12527+ *
12528+ * This program is distributed in the hope that it will be useful,
12529+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12530+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12531+ * GNU General Public License for more details.
12532+ *
12533+ * You should have received a copy of the GNU General Public License
12534+ * along with this program; if not, write to the Free Software
12535+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 12536+ */
12537+
12538+/*
4a4d8108 12539+ * fsnotify for the lower directories
1facf9fc 12540+ */
12541+
12542+#include "aufs.h"
12543+
4a4d8108
AM
12544+/* FS_IN_IGNORED is unnecessary */
12545+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
12546+ | FS_CREATE | FS_EVENT_ON_CHILD);
7f207e10 12547+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
1facf9fc 12548+
0c5527e5 12549+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
1facf9fc 12550+{
0c5527e5
AM
12551+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
12552+ hn_mark);
4a4d8108 12553+ AuDbg("here\n");
7f207e10
AM
12554+ hn->hn_mark_dead = 1;
12555+ smp_mb();
12556+ wake_up_all(&au_hfsn_wq);
4a4d8108 12557+}
1facf9fc 12558+
027c5e7a 12559+static int au_hfsn_alloc(struct au_hinode *hinode)
4a4d8108 12560+{
027c5e7a
AM
12561+ struct au_hnotify *hn;
12562+ struct super_block *sb;
12563+ struct au_branch *br;
0c5527e5 12564+ struct fsnotify_mark *mark;
027c5e7a 12565+ aufs_bindex_t bindex;
1facf9fc 12566+
027c5e7a
AM
12567+ hn = hinode->hi_notify;
12568+ sb = hn->hn_aufs_inode->i_sb;
12569+ bindex = au_br_index(sb, hinode->hi_id);
12570+ br = au_sbr(sb, bindex);
7f207e10 12571+ hn->hn_mark_dead = 0;
0c5527e5
AM
12572+ mark = &hn->hn_mark;
12573+ fsnotify_init_mark(mark, au_hfsn_free_mark);
12574+ mark->mask = AuHfsnMask;
7f207e10
AM
12575+ /*
12576+ * by udba rename or rmdir, aufs assign a new inode to the known
12577+ * h_inode, so specify 1 to allow dups.
12578+ */
027c5e7a
AM
12579+ return fsnotify_add_mark(mark, br->br_hfsn_group, hinode->hi_inode,
12580+ /*mnt*/NULL, /*allow_dups*/1);
1facf9fc 12581+}
12582+
027c5e7a 12583+static void au_hfsn_free(struct au_hinode *hinode)
1facf9fc 12584+{
027c5e7a 12585+ struct au_hnotify *hn;
0c5527e5 12586+ struct fsnotify_mark *mark;
953406b4 12587+
027c5e7a 12588+ hn = hinode->hi_notify;
0c5527e5
AM
12589+ mark = &hn->hn_mark;
12590+ fsnotify_destroy_mark(mark);
12591+ fsnotify_put_mark(mark);
7f207e10
AM
12592+
12593+ /* TODO: bad approach */
12594+ wait_event(au_hfsn_wq, hn->hn_mark_dead);
1facf9fc 12595+}
12596+
12597+/* ---------------------------------------------------------------------- */
12598+
4a4d8108 12599+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
1facf9fc 12600+{
0c5527e5 12601+ struct fsnotify_mark *mark;
1facf9fc 12602+
0c5527e5
AM
12603+ mark = &hinode->hi_notify->hn_mark;
12604+ spin_lock(&mark->lock);
1facf9fc 12605+ if (do_set) {
0c5527e5
AM
12606+ AuDebugOn(mark->mask & AuHfsnMask);
12607+ mark->mask |= AuHfsnMask;
1facf9fc 12608+ } else {
0c5527e5
AM
12609+ AuDebugOn(!(mark->mask & AuHfsnMask));
12610+ mark->mask &= ~AuHfsnMask;
1facf9fc 12611+ }
0c5527e5 12612+ spin_unlock(&mark->lock);
4a4d8108 12613+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
1facf9fc 12614+}
12615+
4a4d8108 12616+/* ---------------------------------------------------------------------- */
1facf9fc 12617+
4a4d8108
AM
12618+/* #define AuDbgHnotify */
12619+#ifdef AuDbgHnotify
12620+static char *au_hfsn_name(u32 mask)
12621+{
12622+#ifdef CONFIG_AUFS_DEBUG
12623+#define test_ret(flag) if (mask & flag) \
12624+ return #flag;
12625+ test_ret(FS_ACCESS);
12626+ test_ret(FS_MODIFY);
12627+ test_ret(FS_ATTRIB);
12628+ test_ret(FS_CLOSE_WRITE);
12629+ test_ret(FS_CLOSE_NOWRITE);
12630+ test_ret(FS_OPEN);
12631+ test_ret(FS_MOVED_FROM);
12632+ test_ret(FS_MOVED_TO);
12633+ test_ret(FS_CREATE);
12634+ test_ret(FS_DELETE);
12635+ test_ret(FS_DELETE_SELF);
12636+ test_ret(FS_MOVE_SELF);
12637+ test_ret(FS_UNMOUNT);
12638+ test_ret(FS_Q_OVERFLOW);
12639+ test_ret(FS_IN_IGNORED);
12640+ test_ret(FS_IN_ISDIR);
12641+ test_ret(FS_IN_ONESHOT);
12642+ test_ret(FS_EVENT_ON_CHILD);
12643+ return "";
12644+#undef test_ret
12645+#else
12646+ return "??";
12647+#endif
1facf9fc 12648+}
4a4d8108 12649+#endif
1facf9fc 12650+
12651+/* ---------------------------------------------------------------------- */
12652+
4a4d8108 12653+static int au_hfsn_handle_event(struct fsnotify_group *group,
0c5527e5
AM
12654+ struct fsnotify_mark *inode_mark,
12655+ struct fsnotify_mark *vfsmount_mark,
4a4d8108 12656+ struct fsnotify_event *event)
1facf9fc 12657+{
12658+ int err;
4a4d8108
AM
12659+ struct au_hnotify *hnotify;
12660+ struct inode *h_dir, *h_inode;
12661+ __u32 mask;
4a4d8108
AM
12662+ struct qstr h_child_qstr = {
12663+ .name = event->file_name,
12664+ .len = event->name_len
12665+ };
12666+
12667+ AuDebugOn(event->data_type != FSNOTIFY_EVENT_INODE);
1facf9fc 12668+
12669+ err = 0;
0c5527e5 12670+ /* if FS_UNMOUNT happens, there must be another bug */
4a4d8108
AM
12671+ mask = event->mask;
12672+ AuDebugOn(mask & FS_UNMOUNT);
0c5527e5 12673+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
1facf9fc 12674+ goto out;
1facf9fc 12675+
4a4d8108
AM
12676+ h_dir = event->to_tell;
12677+ h_inode = event->inode;
12678+#ifdef AuDbgHnotify
12679+ au_debug(1);
12680+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
12681+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
12682+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
12683+ h_dir->i_ino, mask, au_hfsn_name(mask),
12684+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
12685+ /* WARN_ON(1); */
1facf9fc 12686+ }
4a4d8108 12687+ au_debug(0);
1facf9fc 12688+#endif
4a4d8108 12689+
0c5527e5
AM
12690+ AuDebugOn(!inode_mark);
12691+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
12692+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
1facf9fc 12693+
4a4d8108
AM
12694+out:
12695+ return err;
12696+}
1facf9fc 12697+
027c5e7a 12698+/* isn't it waste to ask every registered 'group'? */
7f207e10 12699+/* copied from linux/fs/notify/inotify/inotify_fsnotiry.c */
4a4d8108 12700+/* it should be exported to modules */
7f207e10
AM
12701+static bool au_hfsn_should_send_event(struct fsnotify_group *group,
12702+ struct inode *h_inode,
0c5527e5
AM
12703+ struct fsnotify_mark *inode_mark,
12704+ struct fsnotify_mark *vfsmount_mark,
12705+ __u32 mask, void *data, int data_type)
4a4d8108 12706+{
4a4d8108 12707+ mask = (mask & ~FS_EVENT_ON_CHILD);
7f207e10 12708+ return inode_mark->mask & mask;
4a4d8108
AM
12709+}
12710+
12711+static struct fsnotify_ops au_hfsn_ops = {
12712+ .should_send_event = au_hfsn_should_send_event,
12713+ .handle_event = au_hfsn_handle_event
12714+};
12715+
12716+/* ---------------------------------------------------------------------- */
12717+
027c5e7a
AM
12718+static void au_hfsn_fin_br(struct au_branch *br)
12719+{
12720+ if (br->br_hfsn_group)
12721+ fsnotify_put_group(br->br_hfsn_group);
12722+}
12723+
12724+static int au_hfsn_init_br(struct au_branch *br, int perm)
12725+{
12726+ br->br_hfsn_group = NULL;
12727+ br->br_hfsn_ops = au_hfsn_ops;
12728+ return 0;
12729+}
12730+
12731+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
4a4d8108
AM
12732+{
12733+ int err;
1facf9fc 12734+
4a4d8108 12735+ err = 0;
027c5e7a
AM
12736+ if (udba != AuOpt_UDBA_HNOTIFY
12737+ || !au_br_hnotifyable(perm)) {
12738+ au_hfsn_fin_br(br);
12739+ br->br_hfsn_group = NULL;
12740+ goto out;
12741+ }
12742+
12743+ if (br->br_hfsn_group)
12744+ goto out;
12745+
12746+ br->br_hfsn_group = fsnotify_alloc_group(&br->br_hfsn_ops);
12747+ if (IS_ERR(br->br_hfsn_group)) {
12748+ err = PTR_ERR(br->br_hfsn_group);
0c5527e5 12749+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
027c5e7a 12750+ br->br_hfsn_group = NULL;
4a4d8108 12751+ }
1facf9fc 12752+
027c5e7a 12753+out:
1facf9fc 12754+ AuTraceErr(err);
12755+ return err;
12756+}
12757+
4a4d8108
AM
12758+const struct au_hnotify_op au_hnotify_op = {
12759+ .ctl = au_hfsn_ctl,
12760+ .alloc = au_hfsn_alloc,
12761+ .free = au_hfsn_free,
1facf9fc 12762+
027c5e7a
AM
12763+ .reset_br = au_hfsn_reset_br,
12764+ .fin_br = au_hfsn_fin_br,
12765+ .init_br = au_hfsn_init_br
4a4d8108 12766+};
7f207e10
AM
12767diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
12768--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
53392da6 12769+++ linux/fs/aufs/hfsplus.c 2011-08-24 13:30:24.731313534 +0200
4a4d8108
AM
12770@@ -0,0 +1,58 @@
12771+/*
027c5e7a 12772+ * Copyright (C) 2010-2011 Junjiro R. Okajima
4a4d8108
AM
12773+ *
12774+ * This program, aufs is free software; you can redistribute it and/or modify
12775+ * it under the terms of the GNU General Public License as published by
12776+ * the Free Software Foundation; either version 2 of the License, or
12777+ * (at your option) any later version.
12778+ *
12779+ * This program is distributed in the hope that it will be useful,
12780+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12781+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12782+ * GNU General Public License for more details.
12783+ *
12784+ * You should have received a copy of the GNU General Public License
12785+ * along with this program; if not, write to the Free Software
12786+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12787+ */
1facf9fc 12788+
4a4d8108
AM
12789+/*
12790+ * special support for filesystems which aqucires an inode mutex
12791+ * at final closing a file, eg, hfsplus.
12792+ *
12793+ * This trick is very simple and stupid, just to open the file before really
12794+ * neceeary open to tell hfsplus that this is not the final closing.
12795+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
12796+ * and au_h_open_post() after releasing it.
12797+ */
1facf9fc 12798+
4a4d8108
AM
12799+#include <linux/file.h>
12800+#include "aufs.h"
1facf9fc 12801+
4a4d8108
AM
12802+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
12803+{
12804+ struct file *h_file;
12805+ struct dentry *h_dentry;
1facf9fc 12806+
4a4d8108
AM
12807+ h_dentry = au_h_dptr(dentry, bindex);
12808+ AuDebugOn(!h_dentry);
12809+ AuDebugOn(!h_dentry->d_inode);
12810+ IMustLock(h_dentry->d_inode);
12811+
12812+ h_file = NULL;
12813+ if (au_test_hfsplus(h_dentry->d_sb)
12814+ && S_ISREG(h_dentry->d_inode->i_mode))
12815+ h_file = au_h_open(dentry, bindex,
12816+ O_RDONLY | O_NOATIME | O_LARGEFILE,
12817+ /*file*/NULL);
12818+ return h_file;
1facf9fc 12819+}
12820+
4a4d8108
AM
12821+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
12822+ struct file *h_file)
12823+{
12824+ if (h_file) {
12825+ fput(h_file);
12826+ au_sbr_put(dentry->d_sb, bindex);
12827+ }
12828+}
7f207e10
AM
12829diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
12830--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
12831+++ linux/fs/aufs/hnotify.c 2011-08-24 13:30:24.731313534 +0200
12832@@ -0,0 +1,712 @@
e49829fe 12833+/*
027c5e7a 12834+ * Copyright (C) 2005-2011 Junjiro R. Okajima
e49829fe
JR
12835+ *
12836+ * This program, aufs is free software; you can redistribute it and/or modify
12837+ * it under the terms of the GNU General Public License as published by
12838+ * the Free Software Foundation; either version 2 of the License, or
12839+ * (at your option) any later version.
12840+ *
12841+ * This program is distributed in the hope that it will be useful,
12842+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12843+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12844+ * GNU General Public License for more details.
12845+ *
12846+ * You should have received a copy of the GNU General Public License
12847+ * along with this program; if not, write to the Free Software
12848+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12849+ */
12850+
12851+/*
7f207e10 12852+ * abstraction to notify the direct changes on lower directories
e49829fe
JR
12853+ */
12854+
12855+#include "aufs.h"
12856+
027c5e7a 12857+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
e49829fe
JR
12858+{
12859+ int err;
7f207e10 12860+ struct au_hnotify *hn;
1facf9fc 12861+
4a4d8108
AM
12862+ err = -ENOMEM;
12863+ hn = au_cache_alloc_hnotify();
12864+ if (hn) {
12865+ hn->hn_aufs_inode = inode;
027c5e7a
AM
12866+ hinode->hi_notify = hn;
12867+ err = au_hnotify_op.alloc(hinode);
12868+ AuTraceErr(err);
12869+ if (unlikely(err)) {
12870+ hinode->hi_notify = NULL;
4a4d8108
AM
12871+ au_cache_free_hnotify(hn);
12872+ /*
12873+ * The upper dir was removed by udba, but the same named
12874+ * dir left. In this case, aufs assignes a new inode
12875+ * number and set the monitor again.
12876+ * For the lower dir, the old monitnor is still left.
12877+ */
12878+ if (err == -EEXIST)
12879+ err = 0;
12880+ }
1308ab2a 12881+ }
1308ab2a 12882+
027c5e7a 12883+ AuTraceErr(err);
1308ab2a 12884+ return err;
dece6358 12885+}
1facf9fc 12886+
4a4d8108 12887+void au_hn_free(struct au_hinode *hinode)
dece6358 12888+{
4a4d8108 12889+ struct au_hnotify *hn;
1facf9fc 12890+
4a4d8108
AM
12891+ hn = hinode->hi_notify;
12892+ if (hn) {
027c5e7a 12893+ au_hnotify_op.free(hinode);
4a4d8108
AM
12894+ au_cache_free_hnotify(hn);
12895+ hinode->hi_notify = NULL;
12896+ }
12897+}
dece6358 12898+
4a4d8108 12899+/* ---------------------------------------------------------------------- */
dece6358 12900+
4a4d8108
AM
12901+void au_hn_ctl(struct au_hinode *hinode, int do_set)
12902+{
12903+ if (hinode->hi_notify)
12904+ au_hnotify_op.ctl(hinode, do_set);
12905+}
12906+
12907+void au_hn_reset(struct inode *inode, unsigned int flags)
12908+{
12909+ aufs_bindex_t bindex, bend;
12910+ struct inode *hi;
12911+ struct dentry *iwhdentry;
1facf9fc 12912+
1308ab2a 12913+ bend = au_ibend(inode);
4a4d8108
AM
12914+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
12915+ hi = au_h_iptr(inode, bindex);
12916+ if (!hi)
12917+ continue;
1308ab2a 12918+
4a4d8108
AM
12919+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
12920+ iwhdentry = au_hi_wh(inode, bindex);
12921+ if (iwhdentry)
12922+ dget(iwhdentry);
12923+ au_igrab(hi);
12924+ au_set_h_iptr(inode, bindex, NULL, 0);
12925+ au_set_h_iptr(inode, bindex, au_igrab(hi),
12926+ flags & ~AuHi_XINO);
12927+ iput(hi);
12928+ dput(iwhdentry);
12929+ /* mutex_unlock(&hi->i_mutex); */
1facf9fc 12930+ }
1facf9fc 12931+}
12932+
1308ab2a 12933+/* ---------------------------------------------------------------------- */
1facf9fc 12934+
4a4d8108 12935+static int hn_xino(struct inode *inode, struct inode *h_inode)
1facf9fc 12936+{
4a4d8108
AM
12937+ int err;
12938+ aufs_bindex_t bindex, bend, bfound, bstart;
12939+ struct inode *h_i;
1facf9fc 12940+
4a4d8108
AM
12941+ err = 0;
12942+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
12943+ pr_warning("branch root dir was changed\n");
12944+ goto out;
12945+ }
1facf9fc 12946+
4a4d8108
AM
12947+ bfound = -1;
12948+ bend = au_ibend(inode);
12949+ bstart = au_ibstart(inode);
12950+#if 0 /* reserved for future use */
12951+ if (bindex == bend) {
12952+ /* keep this ino in rename case */
12953+ goto out;
12954+ }
12955+#endif
12956+ for (bindex = bstart; bindex <= bend; bindex++)
12957+ if (au_h_iptr(inode, bindex) == h_inode) {
12958+ bfound = bindex;
12959+ break;
12960+ }
12961+ if (bfound < 0)
1308ab2a 12962+ goto out;
1facf9fc 12963+
4a4d8108
AM
12964+ for (bindex = bstart; bindex <= bend; bindex++) {
12965+ h_i = au_h_iptr(inode, bindex);
12966+ if (!h_i)
12967+ continue;
1facf9fc 12968+
4a4d8108
AM
12969+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
12970+ /* ignore this error */
12971+ /* bad action? */
1facf9fc 12972+ }
1facf9fc 12973+
4a4d8108 12974+ /* children inode number will be broken */
1facf9fc 12975+
4f0767ce 12976+out:
4a4d8108
AM
12977+ AuTraceErr(err);
12978+ return err;
1facf9fc 12979+}
12980+
4a4d8108 12981+static int hn_gen_tree(struct dentry *dentry)
1facf9fc 12982+{
4a4d8108
AM
12983+ int err, i, j, ndentry;
12984+ struct au_dcsub_pages dpages;
12985+ struct au_dpage *dpage;
12986+ struct dentry **dentries;
1facf9fc 12987+
4a4d8108
AM
12988+ err = au_dpages_init(&dpages, GFP_NOFS);
12989+ if (unlikely(err))
12990+ goto out;
12991+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
12992+ if (unlikely(err))
12993+ goto out_dpages;
1facf9fc 12994+
4a4d8108
AM
12995+ for (i = 0; i < dpages.ndpage; i++) {
12996+ dpage = dpages.dpages + i;
12997+ dentries = dpage->dentries;
12998+ ndentry = dpage->ndentry;
12999+ for (j = 0; j < ndentry; j++) {
13000+ struct dentry *d;
13001+
13002+ d = dentries[j];
13003+ if (IS_ROOT(d))
13004+ continue;
13005+
4a4d8108
AM
13006+ au_digen_dec(d);
13007+ if (d->d_inode)
13008+ /* todo: reset children xino?
13009+ cached children only? */
13010+ au_iigen_dec(d->d_inode);
1308ab2a 13011+ }
dece6358 13012+ }
1facf9fc 13013+
4f0767ce 13014+out_dpages:
4a4d8108 13015+ au_dpages_free(&dpages);
dece6358 13016+
027c5e7a 13017+#if 0
4a4d8108
AM
13018+ /* discard children */
13019+ dentry_unhash(dentry);
13020+ dput(dentry);
027c5e7a 13021+#endif
4f0767ce 13022+out:
dece6358
AM
13023+ return err;
13024+}
13025+
1308ab2a 13026+/*
4a4d8108 13027+ * return 0 if processed.
1308ab2a 13028+ */
4a4d8108
AM
13029+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
13030+ const unsigned int isdir)
dece6358 13031+{
1308ab2a 13032+ int err;
4a4d8108
AM
13033+ struct dentry *d;
13034+ struct qstr *dname;
1facf9fc 13035+
4a4d8108
AM
13036+ err = 1;
13037+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
13038+ pr_warning("branch root dir was changed\n");
13039+ err = 0;
13040+ goto out;
13041+ }
dece6358 13042+
4a4d8108
AM
13043+ if (!isdir) {
13044+ AuDebugOn(!name);
13045+ au_iigen_dec(inode);
027c5e7a 13046+ spin_lock(&inode->i_lock);
4a4d8108 13047+ list_for_each_entry(d, &inode->i_dentry, d_alias) {
027c5e7a 13048+ spin_lock(&d->d_lock);
4a4d8108
AM
13049+ dname = &d->d_name;
13050+ if (dname->len != nlen
027c5e7a
AM
13051+ && memcmp(dname->name, name, nlen)) {
13052+ spin_unlock(&d->d_lock);
4a4d8108 13053+ continue;
027c5e7a 13054+ }
4a4d8108 13055+ err = 0;
4a4d8108
AM
13056+ au_digen_dec(d);
13057+ spin_unlock(&d->d_lock);
13058+ break;
1facf9fc 13059+ }
027c5e7a 13060+ spin_unlock(&inode->i_lock);
1308ab2a 13061+ } else {
027c5e7a 13062+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
4a4d8108
AM
13063+ d = d_find_alias(inode);
13064+ if (!d) {
13065+ au_iigen_dec(inode);
13066+ goto out;
13067+ }
1facf9fc 13068+
027c5e7a 13069+ spin_lock(&d->d_lock);
4a4d8108 13070+ dname = &d->d_name;
027c5e7a
AM
13071+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
13072+ spin_unlock(&d->d_lock);
4a4d8108 13073+ err = hn_gen_tree(d);
027c5e7a
AM
13074+ spin_lock(&d->d_lock);
13075+ }
13076+ spin_unlock(&d->d_lock);
4a4d8108
AM
13077+ dput(d);
13078+ }
1facf9fc 13079+
4f0767ce 13080+out:
4a4d8108 13081+ AuTraceErr(err);
1308ab2a 13082+ return err;
13083+}
dece6358 13084+
4a4d8108 13085+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
1facf9fc 13086+{
4a4d8108
AM
13087+ int err;
13088+ struct inode *inode;
1facf9fc 13089+
4a4d8108
AM
13090+ inode = dentry->d_inode;
13091+ if (IS_ROOT(dentry)
13092+ /* || (inode && inode->i_ino == AUFS_ROOT_INO) */
13093+ ) {
13094+ pr_warning("branch root dir was changed\n");
13095+ return 0;
13096+ }
1308ab2a 13097+
4a4d8108
AM
13098+ err = 0;
13099+ if (!isdir) {
4a4d8108
AM
13100+ au_digen_dec(dentry);
13101+ if (inode)
13102+ au_iigen_dec(inode);
13103+ } else {
027c5e7a 13104+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
4a4d8108
AM
13105+ if (inode)
13106+ err = hn_gen_tree(dentry);
13107+ }
13108+
13109+ AuTraceErr(err);
13110+ return err;
1facf9fc 13111+}
13112+
4a4d8108 13113+/* ---------------------------------------------------------------------- */
1facf9fc 13114+
4a4d8108
AM
13115+/* hnotify job flags */
13116+#define AuHnJob_XINO0 1
13117+#define AuHnJob_GEN (1 << 1)
13118+#define AuHnJob_DIRENT (1 << 2)
13119+#define AuHnJob_ISDIR (1 << 3)
13120+#define AuHnJob_TRYXINO0 (1 << 4)
13121+#define AuHnJob_MNTPNT (1 << 5)
13122+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
7f207e10
AM
13123+#define au_fset_hnjob(flags, name) \
13124+ do { (flags) |= AuHnJob_##name; } while (0)
13125+#define au_fclr_hnjob(flags, name) \
13126+ do { (flags) &= ~AuHnJob_##name; } while (0)
1facf9fc 13127+
4a4d8108
AM
13128+enum {
13129+ AuHn_CHILD,
13130+ AuHn_PARENT,
13131+ AuHnLast
13132+};
1facf9fc 13133+
4a4d8108
AM
13134+struct au_hnotify_args {
13135+ struct inode *h_dir, *dir, *h_child_inode;
13136+ u32 mask;
13137+ unsigned int flags[AuHnLast];
13138+ unsigned int h_child_nlen;
13139+ char h_child_name[];
13140+};
1facf9fc 13141+
4a4d8108
AM
13142+struct hn_job_args {
13143+ unsigned int flags;
13144+ struct inode *inode, *h_inode, *dir, *h_dir;
13145+ struct dentry *dentry;
13146+ char *h_name;
13147+ int h_nlen;
13148+};
1308ab2a 13149+
4a4d8108
AM
13150+static int hn_job(struct hn_job_args *a)
13151+{
13152+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
1308ab2a 13153+
4a4d8108
AM
13154+ /* reset xino */
13155+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
13156+ hn_xino(a->inode, a->h_inode); /* ignore this error */
1308ab2a 13157+
4a4d8108
AM
13158+ if (au_ftest_hnjob(a->flags, TRYXINO0)
13159+ && a->inode
13160+ && a->h_inode) {
13161+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
13162+ if (!a->h_inode->i_nlink)
13163+ hn_xino(a->inode, a->h_inode); /* ignore this error */
13164+ mutex_unlock(&a->h_inode->i_mutex);
1308ab2a 13165+ }
1facf9fc 13166+
4a4d8108
AM
13167+ /* make the generation obsolete */
13168+ if (au_ftest_hnjob(a->flags, GEN)) {
13169+ int err = -1;
13170+ if (a->inode)
13171+ err = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
13172+ isdir);
13173+ if (err && a->dentry)
13174+ hn_gen_by_name(a->dentry, isdir);
13175+ /* ignore this error */
1facf9fc 13176+ }
1facf9fc 13177+
4a4d8108
AM
13178+ /* make dir entries obsolete */
13179+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
13180+ struct au_vdir *vdir;
1facf9fc 13181+
4a4d8108
AM
13182+ vdir = au_ivdir(a->inode);
13183+ if (vdir)
13184+ vdir->vd_jiffy = 0;
13185+ /* IMustLock(a->inode); */
13186+ /* a->inode->i_version++; */
13187+ }
1facf9fc 13188+
4a4d8108
AM
13189+ /* can do nothing but warn */
13190+ if (au_ftest_hnjob(a->flags, MNTPNT)
13191+ && a->dentry
13192+ && d_mountpoint(a->dentry))
13193+ pr_warning("mount-point %.*s is removed or renamed\n",
13194+ AuDLNPair(a->dentry));
1facf9fc 13195+
4a4d8108 13196+ return 0;
1308ab2a 13197+}
1facf9fc 13198+
1308ab2a 13199+/* ---------------------------------------------------------------------- */
1facf9fc 13200+
4a4d8108
AM
13201+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
13202+ struct inode *dir)
1308ab2a 13203+{
4a4d8108
AM
13204+ struct dentry *dentry, *d, *parent;
13205+ struct qstr *dname;
1308ab2a 13206+
4a4d8108
AM
13207+ parent = d_find_alias(dir);
13208+ if (!parent)
13209+ return NULL;
1308ab2a 13210+
4a4d8108 13211+ dentry = NULL;
027c5e7a 13212+ spin_lock(&parent->d_lock);
4a4d8108
AM
13213+ list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) {
13214+ /* AuDbg("%.*s\n", AuDLNPair(d)); */
027c5e7a 13215+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
4a4d8108
AM
13216+ dname = &d->d_name;
13217+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
027c5e7a
AM
13218+ goto cont_unlock;
13219+ if (au_di(d))
13220+ au_digen_dec(d);
13221+ else
13222+ goto cont_unlock;
13223+ if (d->d_count) {
13224+ dentry = dget_dlock(d);
4a4d8108 13225+ spin_unlock(&d->d_lock);
027c5e7a 13226+ break;
dece6358 13227+ }
1facf9fc 13228+
027c5e7a
AM
13229+ cont_unlock:
13230+ spin_unlock(&d->d_lock);
1308ab2a 13231+ }
027c5e7a 13232+ spin_unlock(&parent->d_lock);
4a4d8108 13233+ dput(parent);
1facf9fc 13234+
4a4d8108
AM
13235+ if (dentry)
13236+ di_write_lock_child(dentry);
1308ab2a 13237+
4a4d8108
AM
13238+ return dentry;
13239+}
dece6358 13240+
4a4d8108
AM
13241+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
13242+ aufs_bindex_t bindex, ino_t h_ino)
13243+{
13244+ struct inode *inode;
13245+ ino_t ino;
13246+ int err;
13247+
13248+ inode = NULL;
13249+ err = au_xino_read(sb, bindex, h_ino, &ino);
13250+ if (!err && ino)
13251+ inode = ilookup(sb, ino);
13252+ if (!inode)
13253+ goto out;
13254+
13255+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
13256+ pr_warning("wrong root branch\n");
13257+ iput(inode);
13258+ inode = NULL;
13259+ goto out;
1308ab2a 13260+ }
13261+
4a4d8108 13262+ ii_write_lock_child(inode);
1308ab2a 13263+
4f0767ce 13264+out:
4a4d8108 13265+ return inode;
dece6358
AM
13266+}
13267+
4a4d8108 13268+static void au_hn_bh(void *_args)
1facf9fc 13269+{
4a4d8108
AM
13270+ struct au_hnotify_args *a = _args;
13271+ struct super_block *sb;
13272+ aufs_bindex_t bindex, bend, bfound;
13273+ unsigned char xino, try_iput;
1facf9fc 13274+ int err;
1308ab2a 13275+ struct inode *inode;
4a4d8108
AM
13276+ ino_t h_ino;
13277+ struct hn_job_args args;
13278+ struct dentry *dentry;
13279+ struct au_sbinfo *sbinfo;
1facf9fc 13280+
4a4d8108
AM
13281+ AuDebugOn(!_args);
13282+ AuDebugOn(!a->h_dir);
13283+ AuDebugOn(!a->dir);
13284+ AuDebugOn(!a->mask);
13285+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
13286+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
13287+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
1facf9fc 13288+
4a4d8108
AM
13289+ inode = NULL;
13290+ dentry = NULL;
13291+ /*
13292+ * do not lock a->dir->i_mutex here
13293+ * because of d_revalidate() may cause a deadlock.
13294+ */
13295+ sb = a->dir->i_sb;
13296+ AuDebugOn(!sb);
13297+ sbinfo = au_sbi(sb);
13298+ AuDebugOn(!sbinfo);
7f207e10 13299+ si_write_lock(sb, AuLock_NOPLMW);
1facf9fc 13300+
4a4d8108
AM
13301+ ii_read_lock_parent(a->dir);
13302+ bfound = -1;
13303+ bend = au_ibend(a->dir);
13304+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
13305+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
13306+ bfound = bindex;
13307+ break;
13308+ }
13309+ ii_read_unlock(a->dir);
13310+ if (unlikely(bfound < 0))
13311+ goto out;
1facf9fc 13312+
4a4d8108
AM
13313+ xino = !!au_opt_test(au_mntflags(sb), XINO);
13314+ h_ino = 0;
13315+ if (a->h_child_inode)
13316+ h_ino = a->h_child_inode->i_ino;
1facf9fc 13317+
4a4d8108
AM
13318+ if (a->h_child_nlen
13319+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
13320+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
13321+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
13322+ a->dir);
13323+ try_iput = 0;
13324+ if (dentry)
13325+ inode = dentry->d_inode;
13326+ if (xino && !inode && h_ino
13327+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
13328+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
13329+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
13330+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
13331+ try_iput = 1;
13332+ }
1facf9fc 13333+
4a4d8108
AM
13334+ args.flags = a->flags[AuHn_CHILD];
13335+ args.dentry = dentry;
13336+ args.inode = inode;
13337+ args.h_inode = a->h_child_inode;
13338+ args.dir = a->dir;
13339+ args.h_dir = a->h_dir;
13340+ args.h_name = a->h_child_name;
13341+ args.h_nlen = a->h_child_nlen;
13342+ err = hn_job(&args);
13343+ if (dentry) {
027c5e7a 13344+ if (au_di(dentry))
4a4d8108
AM
13345+ di_write_unlock(dentry);
13346+ dput(dentry);
13347+ }
13348+ if (inode && try_iput) {
13349+ ii_write_unlock(inode);
13350+ iput(inode);
13351+ }
1facf9fc 13352+
4a4d8108
AM
13353+ ii_write_lock_parent(a->dir);
13354+ args.flags = a->flags[AuHn_PARENT];
13355+ args.dentry = NULL;
13356+ args.inode = a->dir;
13357+ args.h_inode = a->h_dir;
13358+ args.dir = NULL;
13359+ args.h_dir = NULL;
13360+ args.h_name = NULL;
13361+ args.h_nlen = 0;
13362+ err = hn_job(&args);
13363+ ii_write_unlock(a->dir);
1facf9fc 13364+
4f0767ce 13365+out:
4a4d8108
AM
13366+ iput(a->h_child_inode);
13367+ iput(a->h_dir);
13368+ iput(a->dir);
027c5e7a
AM
13369+ si_write_unlock(sb);
13370+ au_nwt_done(&sbinfo->si_nowait);
1308ab2a 13371+ kfree(a);
dece6358 13372+}
1facf9fc 13373+
4a4d8108
AM
13374+/* ---------------------------------------------------------------------- */
13375+
13376+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
13377+ struct qstr *h_child_qstr, struct inode *h_child_inode)
dece6358 13378+{
4a4d8108 13379+ int err, len;
53392da6 13380+ unsigned int flags[AuHnLast], f;
4a4d8108
AM
13381+ unsigned char isdir, isroot, wh;
13382+ struct inode *dir;
13383+ struct au_hnotify_args *args;
13384+ char *p, *h_child_name;
dece6358 13385+
1308ab2a 13386+ err = 0;
4a4d8108
AM
13387+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
13388+ dir = igrab(hnotify->hn_aufs_inode);
13389+ if (!dir)
13390+ goto out;
1facf9fc 13391+
4a4d8108
AM
13392+ isroot = (dir->i_ino == AUFS_ROOT_INO);
13393+ wh = 0;
13394+ h_child_name = (void *)h_child_qstr->name;
13395+ len = h_child_qstr->len;
13396+ if (h_child_name) {
13397+ if (len > AUFS_WH_PFX_LEN
13398+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
13399+ h_child_name += AUFS_WH_PFX_LEN;
13400+ len -= AUFS_WH_PFX_LEN;
13401+ wh = 1;
13402+ }
1facf9fc 13403+ }
dece6358 13404+
4a4d8108
AM
13405+ isdir = 0;
13406+ if (h_child_inode)
13407+ isdir = !!S_ISDIR(h_child_inode->i_mode);
13408+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
13409+ flags[AuHn_CHILD] = 0;
13410+ if (isdir)
13411+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
13412+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
13413+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
13414+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
13415+ case FS_MOVED_FROM:
13416+ case FS_MOVED_TO:
13417+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
13418+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
13419+ /*FALLTHROUGH*/
13420+ case FS_CREATE:
13421+ AuDebugOn(!h_child_name || !h_child_inode);
13422+ break;
1facf9fc 13423+
4a4d8108
AM
13424+ case FS_DELETE:
13425+ /*
13426+ * aufs never be able to get this child inode.
13427+ * revalidation should be in d_revalidate()
13428+ * by checking i_nlink, i_generation or d_unhashed().
13429+ */
13430+ AuDebugOn(!h_child_name);
13431+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
13432+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
13433+ break;
dece6358 13434+
4a4d8108
AM
13435+ default:
13436+ AuDebugOn(1);
13437+ }
1308ab2a 13438+
4a4d8108
AM
13439+ if (wh)
13440+ h_child_inode = NULL;
1308ab2a 13441+
4a4d8108
AM
13442+ err = -ENOMEM;
13443+ /* iput() and kfree() will be called in au_hnotify() */
4a4d8108 13444+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
4a4d8108
AM
13445+ if (unlikely(!args)) {
13446+ AuErr1("no memory\n");
13447+ iput(dir);
13448+ goto out;
13449+ }
13450+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
13451+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
13452+ args->mask = mask;
13453+ args->dir = dir;
13454+ args->h_dir = igrab(h_dir);
13455+ if (h_child_inode)
13456+ h_child_inode = igrab(h_child_inode); /* can be NULL */
13457+ args->h_child_inode = h_child_inode;
13458+ args->h_child_nlen = len;
13459+ if (len) {
13460+ p = (void *)args;
13461+ p += sizeof(*args);
13462+ memcpy(p, h_child_name, len);
13463+ p[len] = 0;
1308ab2a 13464+ }
1308ab2a 13465+
53392da6
AM
13466+ f = 0;
13467+ if (!dir->i_nlink)
13468+ f = AuWkq_NEST;
13469+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
4a4d8108
AM
13470+ if (unlikely(err)) {
13471+ pr_err("wkq %d\n", err);
13472+ iput(args->h_child_inode);
13473+ iput(args->h_dir);
13474+ iput(args->dir);
13475+ kfree(args);
1facf9fc 13476+ }
1facf9fc 13477+
4a4d8108 13478+out:
1facf9fc 13479+ return err;
13480+}
13481+
027c5e7a
AM
13482+/* ---------------------------------------------------------------------- */
13483+
13484+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
13485+{
13486+ int err;
13487+
13488+ AuDebugOn(!(udba & AuOptMask_UDBA));
13489+
13490+ err = 0;
13491+ if (au_hnotify_op.reset_br)
13492+ err = au_hnotify_op.reset_br(udba, br, perm);
13493+
13494+ return err;
13495+}
13496+
13497+int au_hnotify_init_br(struct au_branch *br, int perm)
13498+{
13499+ int err;
13500+
13501+ err = 0;
13502+ if (au_hnotify_op.init_br)
13503+ err = au_hnotify_op.init_br(br, perm);
13504+
13505+ return err;
13506+}
13507+
13508+void au_hnotify_fin_br(struct au_branch *br)
13509+{
13510+ if (au_hnotify_op.fin_br)
13511+ au_hnotify_op.fin_br(br);
13512+}
13513+
4a4d8108
AM
13514+static void au_hn_destroy_cache(void)
13515+{
13516+ kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]);
13517+ au_cachep[AuCache_HNOTIFY] = NULL;
13518+}
1308ab2a 13519+
4a4d8108 13520+int __init au_hnotify_init(void)
1facf9fc 13521+{
1308ab2a 13522+ int err;
1308ab2a 13523+
4a4d8108
AM
13524+ err = -ENOMEM;
13525+ au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify);
13526+ if (au_cachep[AuCache_HNOTIFY]) {
027c5e7a
AM
13527+ err = 0;
13528+ if (au_hnotify_op.init)
13529+ err = au_hnotify_op.init();
4a4d8108
AM
13530+ if (unlikely(err))
13531+ au_hn_destroy_cache();
1308ab2a 13532+ }
1308ab2a 13533+ AuTraceErr(err);
4a4d8108 13534+ return err;
1308ab2a 13535+}
13536+
4a4d8108 13537+void au_hnotify_fin(void)
1308ab2a 13538+{
027c5e7a
AM
13539+ if (au_hnotify_op.fin)
13540+ au_hnotify_op.fin();
4a4d8108
AM
13541+ /* cf. au_cache_fin() */
13542+ if (au_cachep[AuCache_HNOTIFY])
13543+ au_hn_destroy_cache();
dece6358 13544+}
7f207e10
AM
13545diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
13546--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 13547+++ linux/fs/aufs/iinfo.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 13548@@ -0,0 +1,264 @@
dece6358 13549+/*
027c5e7a 13550+ * Copyright (C) 2005-2011 Junjiro R. Okajima
dece6358
AM
13551+ *
13552+ * This program, aufs is free software; you can redistribute it and/or modify
13553+ * it under the terms of the GNU General Public License as published by
13554+ * the Free Software Foundation; either version 2 of the License, or
13555+ * (at your option) any later version.
13556+ *
13557+ * This program is distributed in the hope that it will be useful,
13558+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13559+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13560+ * GNU General Public License for more details.
13561+ *
13562+ * You should have received a copy of the GNU General Public License
13563+ * along with this program; if not, write to the Free Software
13564+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
13565+ */
1facf9fc 13566+
dece6358 13567+/*
4a4d8108 13568+ * inode private data
dece6358 13569+ */
1facf9fc 13570+
1308ab2a 13571+#include "aufs.h"
1facf9fc 13572+
4a4d8108 13573+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 13574+{
4a4d8108 13575+ struct inode *h_inode;
1facf9fc 13576+
4a4d8108 13577+ IiMustAnyLock(inode);
1facf9fc 13578+
4a4d8108
AM
13579+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
13580+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
13581+ return h_inode;
13582+}
1facf9fc 13583+
4a4d8108
AM
13584+/* todo: hard/soft set? */
13585+void au_hiput(struct au_hinode *hinode)
13586+{
13587+ au_hn_free(hinode);
13588+ dput(hinode->hi_whdentry);
13589+ iput(hinode->hi_inode);
13590+}
1facf9fc 13591+
4a4d8108
AM
13592+unsigned int au_hi_flags(struct inode *inode, int isdir)
13593+{
13594+ unsigned int flags;
13595+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
1facf9fc 13596+
4a4d8108
AM
13597+ flags = 0;
13598+ if (au_opt_test(mnt_flags, XINO))
13599+ au_fset_hi(flags, XINO);
13600+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
13601+ au_fset_hi(flags, HNOTIFY);
13602+ return flags;
1facf9fc 13603+}
13604+
4a4d8108
AM
13605+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
13606+ struct inode *h_inode, unsigned int flags)
1308ab2a 13607+{
4a4d8108
AM
13608+ struct au_hinode *hinode;
13609+ struct inode *hi;
13610+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 13611+
4a4d8108 13612+ IiMustWriteLock(inode);
dece6358 13613+
4a4d8108
AM
13614+ hinode = iinfo->ii_hinode + bindex;
13615+ hi = hinode->hi_inode;
13616+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
13617+
13618+ if (hi)
13619+ au_hiput(hinode);
13620+ hinode->hi_inode = h_inode;
13621+ if (h_inode) {
13622+ int err;
13623+ struct super_block *sb = inode->i_sb;
13624+ struct au_branch *br;
13625+
027c5e7a
AM
13626+ AuDebugOn(inode->i_mode
13627+ && (h_inode->i_mode & S_IFMT)
13628+ != (inode->i_mode & S_IFMT));
4a4d8108
AM
13629+ if (bindex == iinfo->ii_bstart)
13630+ au_cpup_igen(inode, h_inode);
13631+ br = au_sbr(sb, bindex);
13632+ hinode->hi_id = br->br_id;
13633+ if (au_ftest_hi(flags, XINO)) {
13634+ err = au_xino_write(sb, bindex, h_inode->i_ino,
13635+ inode->i_ino);
13636+ if (unlikely(err))
13637+ AuIOErr1("failed au_xino_write() %d\n", err);
13638+ }
13639+
13640+ if (au_ftest_hi(flags, HNOTIFY)
13641+ && au_br_hnotifyable(br->br_perm)) {
027c5e7a 13642+ err = au_hn_alloc(hinode, inode);
4a4d8108
AM
13643+ if (unlikely(err))
13644+ AuIOErr1("au_hn_alloc() %d\n", err);
1308ab2a 13645+ }
13646+ }
4a4d8108 13647+}
dece6358 13648+
4a4d8108
AM
13649+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
13650+ struct dentry *h_wh)
13651+{
13652+ struct au_hinode *hinode;
dece6358 13653+
4a4d8108
AM
13654+ IiMustWriteLock(inode);
13655+
13656+ hinode = au_ii(inode)->ii_hinode + bindex;
13657+ AuDebugOn(hinode->hi_whdentry);
13658+ hinode->hi_whdentry = h_wh;
1facf9fc 13659+}
13660+
4a4d8108 13661+void au_update_iigen(struct inode *inode)
1308ab2a 13662+{
4a4d8108
AM
13663+ atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
13664+ /* smp_mb(); */ /* atomic_set */
13665+}
1facf9fc 13666+
4a4d8108
AM
13667+/* it may be called at remount time, too */
13668+void au_update_ibrange(struct inode *inode, int do_put_zero)
13669+{
13670+ struct au_iinfo *iinfo;
027c5e7a 13671+ aufs_bindex_t bindex, bend;
1facf9fc 13672+
4a4d8108 13673+ iinfo = au_ii(inode);
027c5e7a 13674+ if (!iinfo)
4a4d8108 13675+ return;
1facf9fc 13676+
4a4d8108 13677+ IiMustWriteLock(inode);
1facf9fc 13678+
027c5e7a 13679+ if (do_put_zero && iinfo->ii_bstart >= 0) {
4a4d8108
AM
13680+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
13681+ bindex++) {
13682+ struct inode *h_i;
1facf9fc 13683+
4a4d8108 13684+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
027c5e7a
AM
13685+ if (h_i && !h_i->i_nlink)
13686+ au_set_h_iptr(inode, bindex, NULL, 0);
13687+ }
4a4d8108
AM
13688+ }
13689+
027c5e7a
AM
13690+ iinfo->ii_bstart = -1;
13691+ iinfo->ii_bend = -1;
13692+ bend = au_sbend(inode->i_sb);
13693+ for (bindex = 0; bindex <= bend; bindex++)
13694+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
13695+ iinfo->ii_bstart = bindex;
4a4d8108 13696+ break;
027c5e7a
AM
13697+ }
13698+ if (iinfo->ii_bstart >= 0)
13699+ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
13700+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
13701+ iinfo->ii_bend = bindex;
13702+ break;
13703+ }
13704+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
1308ab2a 13705+}
1facf9fc 13706+
dece6358 13707+/* ---------------------------------------------------------------------- */
1facf9fc 13708+
4a4d8108 13709+void au_icntnr_init_once(void *_c)
dece6358 13710+{
4a4d8108
AM
13711+ struct au_icntnr *c = _c;
13712+ struct au_iinfo *iinfo = &c->iinfo;
e49829fe 13713+ static struct lock_class_key aufs_ii;
1facf9fc 13714+
4a4d8108 13715+ au_rw_init(&iinfo->ii_rwsem);
e49829fe 13716+ au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
4a4d8108
AM
13717+ inode_init_once(&c->vfs_inode);
13718+}
1facf9fc 13719+
4a4d8108
AM
13720+int au_iinfo_init(struct inode *inode)
13721+{
13722+ struct au_iinfo *iinfo;
13723+ struct super_block *sb;
13724+ int nbr, i;
1facf9fc 13725+
4a4d8108
AM
13726+ sb = inode->i_sb;
13727+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
13728+ nbr = au_sbend(sb) + 1;
13729+ if (unlikely(nbr <= 0))
13730+ nbr = 1;
13731+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
13732+ if (iinfo->ii_hinode) {
7f207e10 13733+ au_ninodes_inc(sb);
4a4d8108
AM
13734+ for (i = 0; i < nbr; i++)
13735+ iinfo->ii_hinode[i].hi_id = -1;
1facf9fc 13736+
4a4d8108
AM
13737+ atomic_set(&iinfo->ii_generation, au_sigen(sb));
13738+ /* smp_mb(); */ /* atomic_set */
13739+ iinfo->ii_bstart = -1;
13740+ iinfo->ii_bend = -1;
13741+ iinfo->ii_vdir = NULL;
13742+ return 0;
1308ab2a 13743+ }
4a4d8108
AM
13744+ return -ENOMEM;
13745+}
1facf9fc 13746+
4a4d8108
AM
13747+int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
13748+{
13749+ int err, sz;
13750+ struct au_hinode *hip;
1facf9fc 13751+
4a4d8108
AM
13752+ AuRwMustWriteLock(&iinfo->ii_rwsem);
13753+
13754+ err = -ENOMEM;
13755+ sz = sizeof(*hip) * (iinfo->ii_bend + 1);
13756+ if (!sz)
13757+ sz = sizeof(*hip);
13758+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
13759+ if (hip) {
13760+ iinfo->ii_hinode = hip;
13761+ err = 0;
1308ab2a 13762+ }
4a4d8108 13763+
1308ab2a 13764+ return err;
1facf9fc 13765+}
13766+
4a4d8108 13767+void au_iinfo_fin(struct inode *inode)
1facf9fc 13768+{
4a4d8108
AM
13769+ struct au_iinfo *iinfo;
13770+ struct au_hinode *hi;
13771+ struct super_block *sb;
b752ccd1
AM
13772+ aufs_bindex_t bindex, bend;
13773+ const unsigned char unlinked = !inode->i_nlink;
1308ab2a 13774+
4a4d8108
AM
13775+ iinfo = au_ii(inode);
13776+ /* bad_inode case */
13777+ if (!iinfo)
13778+ return;
1308ab2a 13779+
b752ccd1 13780+ sb = inode->i_sb;
7f207e10 13781+ au_ninodes_dec(sb);
b752ccd1
AM
13782+ if (si_pid_test(sb))
13783+ au_xino_delete_inode(inode, unlinked);
13784+ else {
13785+ /*
13786+ * it is safe to hide the dependency between sbinfo and
13787+ * sb->s_umount.
13788+ */
13789+ lockdep_off();
13790+ si_noflush_read_lock(sb);
13791+ au_xino_delete_inode(inode, unlinked);
13792+ si_read_unlock(sb);
13793+ lockdep_on();
13794+ }
13795+
4a4d8108
AM
13796+ if (iinfo->ii_vdir)
13797+ au_vdir_free(iinfo->ii_vdir);
1308ab2a 13798+
b752ccd1
AM
13799+ bindex = iinfo->ii_bstart;
13800+ if (bindex >= 0) {
13801+ hi = iinfo->ii_hinode + bindex;
4a4d8108 13802+ bend = iinfo->ii_bend;
b752ccd1
AM
13803+ while (bindex++ <= bend) {
13804+ if (hi->hi_inode)
4a4d8108 13805+ au_hiput(hi);
4a4d8108
AM
13806+ hi++;
13807+ }
13808+ }
4a4d8108 13809+ kfree(iinfo->ii_hinode);
027c5e7a 13810+ iinfo->ii_hinode = NULL;
4a4d8108 13811+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 13812+}
7f207e10
AM
13813diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
13814--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
53392da6 13815+++ linux/fs/aufs/inode.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 13816@@ -0,0 +1,471 @@
4a4d8108 13817+/*
027c5e7a 13818+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
13819+ *
13820+ * This program, aufs is free software; you can redistribute it and/or modify
13821+ * it under the terms of the GNU General Public License as published by
13822+ * the Free Software Foundation; either version 2 of the License, or
13823+ * (at your option) any later version.
13824+ *
13825+ * This program is distributed in the hope that it will be useful,
13826+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13827+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13828+ * GNU General Public License for more details.
13829+ *
13830+ * You should have received a copy of the GNU General Public License
13831+ * along with this program; if not, write to the Free Software
13832+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
13833+ */
1facf9fc 13834+
4a4d8108
AM
13835+/*
13836+ * inode functions
13837+ */
1facf9fc 13838+
4a4d8108 13839+#include "aufs.h"
1308ab2a 13840+
4a4d8108
AM
13841+struct inode *au_igrab(struct inode *inode)
13842+{
13843+ if (inode) {
13844+ AuDebugOn(!atomic_read(&inode->i_count));
027c5e7a 13845+ ihold(inode);
1facf9fc 13846+ }
4a4d8108
AM
13847+ return inode;
13848+}
1facf9fc 13849+
4a4d8108
AM
13850+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
13851+{
13852+ au_cpup_attr_all(inode, /*force*/0);
13853+ au_update_iigen(inode);
13854+ if (do_version)
13855+ inode->i_version++;
dece6358 13856+}
1facf9fc 13857+
027c5e7a 13858+static int au_ii_refresh(struct inode *inode, int *update)
dece6358 13859+{
4a4d8108 13860+ int err, e;
027c5e7a 13861+ umode_t type;
4a4d8108 13862+ aufs_bindex_t bindex, new_bindex;
1308ab2a 13863+ struct super_block *sb;
4a4d8108 13864+ struct au_iinfo *iinfo;
027c5e7a 13865+ struct au_hinode *p, *q, tmp;
1facf9fc 13866+
4a4d8108 13867+ IiMustWriteLock(inode);
1facf9fc 13868+
027c5e7a 13869+ *update = 0;
4a4d8108 13870+ sb = inode->i_sb;
027c5e7a 13871+ type = inode->i_mode & S_IFMT;
4a4d8108
AM
13872+ iinfo = au_ii(inode);
13873+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
13874+ if (unlikely(err))
1308ab2a 13875+ goto out;
1facf9fc 13876+
027c5e7a 13877+ AuDebugOn(iinfo->ii_bstart < 0);
4a4d8108 13878+ p = iinfo->ii_hinode + iinfo->ii_bstart;
4a4d8108
AM
13879+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
13880+ bindex++, p++) {
13881+ if (!p->hi_inode)
13882+ continue;
1facf9fc 13883+
027c5e7a 13884+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
4a4d8108
AM
13885+ new_bindex = au_br_index(sb, p->hi_id);
13886+ if (new_bindex == bindex)
13887+ continue;
1facf9fc 13888+
4a4d8108 13889+ if (new_bindex < 0) {
027c5e7a 13890+ *update = 1;
4a4d8108
AM
13891+ au_hiput(p);
13892+ p->hi_inode = NULL;
13893+ continue;
1308ab2a 13894+ }
4a4d8108
AM
13895+
13896+ if (new_bindex < iinfo->ii_bstart)
13897+ iinfo->ii_bstart = new_bindex;
13898+ if (iinfo->ii_bend < new_bindex)
13899+ iinfo->ii_bend = new_bindex;
13900+ /* swap two lower inode, and loop again */
13901+ q = iinfo->ii_hinode + new_bindex;
13902+ tmp = *q;
13903+ *q = *p;
13904+ *p = tmp;
13905+ if (tmp.hi_inode) {
13906+ bindex--;
13907+ p--;
1308ab2a 13908+ }
13909+ }
4a4d8108
AM
13910+ au_update_ibrange(inode, /*do_put_zero*/0);
13911+ e = au_dy_irefresh(inode);
13912+ if (unlikely(e && !err))
13913+ err = e;
1facf9fc 13914+
4f0767ce 13915+out:
027c5e7a
AM
13916+ AuTraceErr(err);
13917+ return err;
13918+}
13919+
13920+int au_refresh_hinode_self(struct inode *inode)
13921+{
13922+ int err, update;
13923+
13924+ err = au_ii_refresh(inode, &update);
13925+ if (!err)
13926+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
13927+
13928+ AuTraceErr(err);
4a4d8108
AM
13929+ return err;
13930+}
1facf9fc 13931+
4a4d8108
AM
13932+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
13933+{
027c5e7a 13934+ int err, e, update;
4a4d8108 13935+ unsigned int flags;
027c5e7a 13936+ umode_t mode;
4a4d8108 13937+ aufs_bindex_t bindex, bend;
027c5e7a 13938+ unsigned char isdir;
4a4d8108
AM
13939+ struct au_hinode *p;
13940+ struct au_iinfo *iinfo;
1facf9fc 13941+
027c5e7a 13942+ err = au_ii_refresh(inode, &update);
4a4d8108
AM
13943+ if (unlikely(err))
13944+ goto out;
13945+
13946+ update = 0;
13947+ iinfo = au_ii(inode);
13948+ p = iinfo->ii_hinode + iinfo->ii_bstart;
027c5e7a
AM
13949+ mode = (inode->i_mode & S_IFMT);
13950+ isdir = S_ISDIR(mode);
4a4d8108
AM
13951+ flags = au_hi_flags(inode, isdir);
13952+ bend = au_dbend(dentry);
13953+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
13954+ struct inode *h_i;
13955+ struct dentry *h_d;
13956+
13957+ h_d = au_h_dptr(dentry, bindex);
13958+ if (!h_d || !h_d->d_inode)
13959+ continue;
13960+
027c5e7a 13961+ AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT));
4a4d8108
AM
13962+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
13963+ h_i = au_h_iptr(inode, bindex);
13964+ if (h_i) {
13965+ if (h_i == h_d->d_inode)
13966+ continue;
13967+ err = -EIO;
13968+ break;
13969+ }
13970+ }
13971+ if (bindex < iinfo->ii_bstart)
13972+ iinfo->ii_bstart = bindex;
13973+ if (iinfo->ii_bend < bindex)
13974+ iinfo->ii_bend = bindex;
13975+ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
13976+ update = 1;
1308ab2a 13977+ }
4a4d8108
AM
13978+ au_update_ibrange(inode, /*do_put_zero*/0);
13979+ e = au_dy_irefresh(inode);
13980+ if (unlikely(e && !err))
13981+ err = e;
027c5e7a
AM
13982+ if (!err)
13983+ au_refresh_hinode_attr(inode, update && isdir);
4a4d8108 13984+
4f0767ce 13985+out:
4a4d8108 13986+ AuTraceErr(err);
1308ab2a 13987+ return err;
dece6358
AM
13988+}
13989+
4a4d8108 13990+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 13991+{
4a4d8108
AM
13992+ int err;
13993+ unsigned int flags;
13994+ umode_t mode;
13995+ aufs_bindex_t bindex, bstart, btail;
13996+ unsigned char isdir;
13997+ struct dentry *h_dentry;
13998+ struct inode *h_inode;
13999+ struct au_iinfo *iinfo;
dece6358 14000+
4a4d8108 14001+ IiMustWriteLock(inode);
dece6358 14002+
4a4d8108
AM
14003+ err = 0;
14004+ isdir = 0;
14005+ bstart = au_dbstart(dentry);
14006+ h_inode = au_h_dptr(dentry, bstart)->d_inode;
14007+ mode = h_inode->i_mode;
14008+ switch (mode & S_IFMT) {
14009+ case S_IFREG:
14010+ btail = au_dbtail(dentry);
14011+ inode->i_op = &aufs_iop;
14012+ inode->i_fop = &aufs_file_fop;
14013+ err = au_dy_iaop(inode, bstart, h_inode);
14014+ if (unlikely(err))
14015+ goto out;
14016+ break;
14017+ case S_IFDIR:
14018+ isdir = 1;
14019+ btail = au_dbtaildir(dentry);
14020+ inode->i_op = &aufs_dir_iop;
14021+ inode->i_fop = &aufs_dir_fop;
14022+ break;
14023+ case S_IFLNK:
14024+ btail = au_dbtail(dentry);
14025+ inode->i_op = &aufs_symlink_iop;
14026+ break;
14027+ case S_IFBLK:
14028+ case S_IFCHR:
14029+ case S_IFIFO:
14030+ case S_IFSOCK:
14031+ btail = au_dbtail(dentry);
14032+ inode->i_op = &aufs_iop;
14033+ au_init_special_fop(inode, mode, h_inode->i_rdev);
14034+ break;
14035+ default:
14036+ AuIOErr("Unknown file type 0%o\n", mode);
14037+ err = -EIO;
1308ab2a 14038+ goto out;
4a4d8108 14039+ }
dece6358 14040+
4a4d8108
AM
14041+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
14042+ flags = au_hi_flags(inode, isdir);
14043+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
14044+ && au_ftest_hi(flags, HNOTIFY)
14045+ && dentry->d_name.len > AUFS_WH_PFX_LEN
14046+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
14047+ au_fclr_hi(flags, HNOTIFY);
14048+ iinfo = au_ii(inode);
14049+ iinfo->ii_bstart = bstart;
14050+ iinfo->ii_bend = btail;
14051+ for (bindex = bstart; bindex <= btail; bindex++) {
14052+ h_dentry = au_h_dptr(dentry, bindex);
14053+ if (h_dentry)
14054+ au_set_h_iptr(inode, bindex,
14055+ au_igrab(h_dentry->d_inode), flags);
14056+ }
14057+ au_cpup_attr_all(inode, /*force*/1);
dece6358 14058+
4f0767ce 14059+out:
4a4d8108
AM
14060+ return err;
14061+}
dece6358 14062+
027c5e7a
AM
14063+/*
14064+ * successful returns with iinfo write_locked
14065+ * minus: errno
14066+ * zero: success, matched
14067+ * plus: no error, but unmatched
14068+ */
14069+static int reval_inode(struct inode *inode, struct dentry *dentry)
4a4d8108
AM
14070+{
14071+ int err;
14072+ aufs_bindex_t bindex, bend;
14073+ struct inode *h_inode, *h_dinode;
dece6358 14074+
4a4d8108
AM
14075+ /*
14076+ * before this function, if aufs got any iinfo lock, it must be only
14077+ * one, the parent dir.
14078+ * it can happen by UDBA and the obsoleted inode number.
14079+ */
14080+ err = -EIO;
14081+ if (unlikely(inode->i_ino == parent_ino(dentry)))
14082+ goto out;
14083+
027c5e7a 14084+ err = 1;
4a4d8108
AM
14085+ ii_write_lock_new_child(inode);
14086+ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
14087+ bend = au_ibend(inode);
14088+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
14089+ h_inode = au_h_iptr(inode, bindex);
14090+ if (h_inode && h_inode == h_dinode) {
4a4d8108 14091+ err = 0;
027c5e7a 14092+ if (au_iigen_test(inode, au_digen(dentry)))
4a4d8108
AM
14093+ err = au_refresh_hinode(inode, dentry);
14094+ break;
1308ab2a 14095+ }
1facf9fc 14096+ }
dece6358 14097+
4a4d8108
AM
14098+ if (unlikely(err))
14099+ ii_write_unlock(inode);
4f0767ce 14100+out:
1facf9fc 14101+ return err;
14102+}
1facf9fc 14103+
4a4d8108
AM
14104+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
14105+ unsigned int d_type, ino_t *ino)
1facf9fc 14106+{
4a4d8108
AM
14107+ int err;
14108+ struct mutex *mtx;
1facf9fc 14109+
b752ccd1 14110+ /* prevent hardlinked inode number from race condition */
4a4d8108 14111+ mtx = NULL;
b752ccd1 14112+ if (d_type != DT_DIR) {
4a4d8108
AM
14113+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
14114+ mutex_lock(mtx);
14115+ }
14116+ err = au_xino_read(sb, bindex, h_ino, ino);
14117+ if (unlikely(err))
14118+ goto out;
1308ab2a 14119+
4a4d8108
AM
14120+ if (!*ino) {
14121+ err = -EIO;
14122+ *ino = au_xino_new_ino(sb);
14123+ if (unlikely(!*ino))
1facf9fc 14124+ goto out;
4a4d8108
AM
14125+ err = au_xino_write(sb, bindex, h_ino, *ino);
14126+ if (unlikely(err))
1308ab2a 14127+ goto out;
1308ab2a 14128+ }
1facf9fc 14129+
4f0767ce 14130+out:
b752ccd1 14131+ if (mtx)
4a4d8108 14132+ mutex_unlock(mtx);
1facf9fc 14133+ return err;
14134+}
14135+
4a4d8108
AM
14136+/* successful returns with iinfo write_locked */
14137+/* todo: return with unlocked? */
14138+struct inode *au_new_inode(struct dentry *dentry, int must_new)
1facf9fc 14139+{
b752ccd1 14140+ struct inode *inode, *h_inode;
4a4d8108
AM
14141+ struct dentry *h_dentry;
14142+ struct super_block *sb;
b752ccd1 14143+ struct mutex *mtx;
4a4d8108 14144+ ino_t h_ino, ino;
027c5e7a 14145+ int err;
4a4d8108 14146+ aufs_bindex_t bstart;
1facf9fc 14147+
4a4d8108
AM
14148+ sb = dentry->d_sb;
14149+ bstart = au_dbstart(dentry);
14150+ h_dentry = au_h_dptr(dentry, bstart);
b752ccd1
AM
14151+ h_inode = h_dentry->d_inode;
14152+ h_ino = h_inode->i_ino;
14153+
14154+ /*
14155+ * stop 'race'-ing between hardlinks under different
14156+ * parents.
14157+ */
14158+ mtx = NULL;
14159+ if (!S_ISDIR(h_inode->i_mode))
14160+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
14161+
4f0767ce 14162+new_ino:
b752ccd1
AM
14163+ if (mtx)
14164+ mutex_lock(mtx);
4a4d8108
AM
14165+ err = au_xino_read(sb, bstart, h_ino, &ino);
14166+ inode = ERR_PTR(err);
14167+ if (unlikely(err))
14168+ goto out;
b752ccd1 14169+
4a4d8108
AM
14170+ if (!ino) {
14171+ ino = au_xino_new_ino(sb);
14172+ if (unlikely(!ino)) {
14173+ inode = ERR_PTR(-EIO);
dece6358
AM
14174+ goto out;
14175+ }
14176+ }
1facf9fc 14177+
4a4d8108
AM
14178+ AuDbg("i%lu\n", (unsigned long)ino);
14179+ inode = au_iget_locked(sb, ino);
14180+ err = PTR_ERR(inode);
14181+ if (IS_ERR(inode))
1facf9fc 14182+ goto out;
1facf9fc 14183+
4a4d8108
AM
14184+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
14185+ if (inode->i_state & I_NEW) {
14186+ ii_write_lock_new_child(inode);
14187+ err = set_inode(inode, dentry);
14188+ if (!err) {
14189+ unlock_new_inode(inode);
14190+ goto out; /* success */
14191+ }
1308ab2a 14192+
027c5e7a
AM
14193+ /*
14194+ * iget_failed() calls iput(), but we need to call
14195+ * ii_write_unlock() after iget_failed(). so dirty hack for
14196+ * i_count.
14197+ */
14198+ atomic_inc(&inode->i_count);
4a4d8108 14199+ iget_failed(inode);
027c5e7a
AM
14200+ ii_write_unlock(inode);
14201+ au_xino_write(sb, bstart, h_ino, /*ino*/0);
14202+ /* ignore this error */
14203+ goto out_iput;
14204+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
b752ccd1
AM
14205+ /*
14206+ * horrible race condition between lookup, readdir and copyup
14207+ * (or something).
14208+ */
14209+ if (mtx)
14210+ mutex_unlock(mtx);
027c5e7a
AM
14211+ err = reval_inode(inode, dentry);
14212+ if (unlikely(err < 0)) {
14213+ mtx = NULL;
14214+ goto out_iput;
14215+ }
14216+
b752ccd1
AM
14217+ if (!err) {
14218+ mtx = NULL;
4a4d8108 14219+ goto out; /* success */
b752ccd1
AM
14220+ } else if (mtx)
14221+ mutex_lock(mtx);
4a4d8108
AM
14222+ }
14223+
14224+ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
14225+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
14226+ " b%d, %s, %.*s, hi%lu, i%lu.\n",
14227+ bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
14228+ (unsigned long)h_ino, (unsigned long)ino);
14229+ ino = 0;
14230+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
14231+ if (!err) {
14232+ iput(inode);
b752ccd1
AM
14233+ if (mtx)
14234+ mutex_unlock(mtx);
4a4d8108
AM
14235+ goto new_ino;
14236+ }
1308ab2a 14237+
4f0767ce 14238+out_iput:
4a4d8108 14239+ iput(inode);
4a4d8108 14240+ inode = ERR_PTR(err);
4f0767ce 14241+out:
b752ccd1
AM
14242+ if (mtx)
14243+ mutex_unlock(mtx);
4a4d8108 14244+ return inode;
1facf9fc 14245+}
14246+
4a4d8108 14247+/* ---------------------------------------------------------------------- */
1facf9fc 14248+
4a4d8108
AM
14249+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
14250+ struct inode *inode)
14251+{
14252+ int err;
1facf9fc 14253+
4a4d8108 14254+ err = au_br_rdonly(au_sbr(sb, bindex));
1facf9fc 14255+
4a4d8108
AM
14256+ /* pseudo-link after flushed may happen out of bounds */
14257+ if (!err
14258+ && inode
14259+ && au_ibstart(inode) <= bindex
14260+ && bindex <= au_ibend(inode)) {
14261+ /*
14262+ * permission check is unnecessary since vfsub routine
14263+ * will be called later
14264+ */
14265+ struct inode *hi = au_h_iptr(inode, bindex);
14266+ if (hi)
14267+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
1facf9fc 14268+ }
14269+
4a4d8108
AM
14270+ return err;
14271+}
dece6358 14272+
4a4d8108
AM
14273+int au_test_h_perm(struct inode *h_inode, int mask)
14274+{
14275+ if (!current_fsuid())
14276+ return 0;
14277+ return inode_permission(h_inode, mask);
14278+}
1facf9fc 14279+
4a4d8108
AM
14280+int au_test_h_perm_sio(struct inode *h_inode, int mask)
14281+{
14282+ if (au_test_nfs(h_inode->i_sb)
14283+ && (mask & MAY_WRITE)
14284+ && S_ISDIR(h_inode->i_mode))
14285+ mask |= MAY_READ; /* force permission check */
14286+ return au_test_h_perm(h_inode, mask);
1facf9fc 14287+}
7f207e10
AM
14288diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
14289--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
14290+++ linux/fs/aufs/inode.h 2011-08-24 13:30:24.734646739 +0200
14291@@ -0,0 +1,556 @@
4a4d8108 14292+/*
027c5e7a 14293+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
14294+ *
14295+ * This program, aufs is free software; you can redistribute it and/or modify
14296+ * it under the terms of the GNU General Public License as published by
14297+ * the Free Software Foundation; either version 2 of the License, or
14298+ * (at your option) any later version.
14299+ *
14300+ * This program is distributed in the hope that it will be useful,
14301+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14302+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14303+ * GNU General Public License for more details.
14304+ *
14305+ * You should have received a copy of the GNU General Public License
14306+ * along with this program; if not, write to the Free Software
14307+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
14308+ */
1facf9fc 14309+
1308ab2a 14310+/*
4a4d8108 14311+ * inode operations
1308ab2a 14312+ */
dece6358 14313+
4a4d8108
AM
14314+#ifndef __AUFS_INODE_H__
14315+#define __AUFS_INODE_H__
dece6358 14316+
4a4d8108 14317+#ifdef __KERNEL__
1308ab2a 14318+
4a4d8108
AM
14319+#include <linux/fs.h>
14320+#include <linux/fsnotify.h>
14321+#include <linux/aufs_type.h>
14322+#include "rwsem.h"
1308ab2a 14323+
4a4d8108 14324+struct vfsmount;
1facf9fc 14325+
4a4d8108
AM
14326+struct au_hnotify {
14327+#ifdef CONFIG_AUFS_HNOTIFY
14328+#ifdef CONFIG_AUFS_HFSNOTIFY
7f207e10 14329+ /* never use fsnotify_add_vfsmount_mark() */
0c5527e5 14330+ struct fsnotify_mark hn_mark;
7f207e10 14331+ int hn_mark_dead;
4a4d8108 14332+#endif
7f207e10 14333+ struct inode *hn_aufs_inode; /* no get/put */
4a4d8108
AM
14334+#endif
14335+} ____cacheline_aligned_in_smp;
1facf9fc 14336+
4a4d8108
AM
14337+struct au_hinode {
14338+ struct inode *hi_inode;
14339+ aufs_bindex_t hi_id;
14340+#ifdef CONFIG_AUFS_HNOTIFY
14341+ struct au_hnotify *hi_notify;
14342+#endif
dece6358 14343+
4a4d8108
AM
14344+ /* reference to the copied-up whiteout with get/put */
14345+ struct dentry *hi_whdentry;
14346+};
dece6358 14347+
4a4d8108
AM
14348+struct au_vdir;
14349+struct au_iinfo {
14350+ atomic_t ii_generation;
14351+ struct super_block *ii_hsb1; /* no get/put */
1facf9fc 14352+
4a4d8108
AM
14353+ struct au_rwsem ii_rwsem;
14354+ aufs_bindex_t ii_bstart, ii_bend;
14355+ __u32 ii_higen;
14356+ struct au_hinode *ii_hinode;
14357+ struct au_vdir *ii_vdir;
14358+};
1facf9fc 14359+
4a4d8108
AM
14360+struct au_icntnr {
14361+ struct au_iinfo iinfo;
14362+ struct inode vfs_inode;
14363+} ____cacheline_aligned_in_smp;
1308ab2a 14364+
4a4d8108
AM
14365+/* au_pin flags */
14366+#define AuPin_DI_LOCKED 1
14367+#define AuPin_MNT_WRITE (1 << 1)
14368+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
7f207e10
AM
14369+#define au_fset_pin(flags, name) \
14370+ do { (flags) |= AuPin_##name; } while (0)
14371+#define au_fclr_pin(flags, name) \
14372+ do { (flags) &= ~AuPin_##name; } while (0)
4a4d8108
AM
14373+
14374+struct au_pin {
14375+ /* input */
14376+ struct dentry *dentry;
14377+ unsigned int udba;
14378+ unsigned char lsc_di, lsc_hi, flags;
14379+ aufs_bindex_t bindex;
14380+
14381+ /* output */
14382+ struct dentry *parent;
14383+ struct au_hinode *hdir;
14384+ struct vfsmount *h_mnt;
14385+};
1facf9fc 14386+
1308ab2a 14387+/* ---------------------------------------------------------------------- */
14388+
4a4d8108 14389+static inline struct au_iinfo *au_ii(struct inode *inode)
1facf9fc 14390+{
4a4d8108 14391+ struct au_iinfo *iinfo;
1facf9fc 14392+
4a4d8108
AM
14393+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
14394+ if (iinfo->ii_hinode)
14395+ return iinfo;
14396+ return NULL; /* debugging bad_inode case */
14397+}
1facf9fc 14398+
4a4d8108 14399+/* ---------------------------------------------------------------------- */
1facf9fc 14400+
4a4d8108
AM
14401+/* inode.c */
14402+struct inode *au_igrab(struct inode *inode);
027c5e7a 14403+int au_refresh_hinode_self(struct inode *inode);
4a4d8108
AM
14404+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
14405+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
14406+ unsigned int d_type, ino_t *ino);
14407+struct inode *au_new_inode(struct dentry *dentry, int must_new);
14408+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
14409+ struct inode *inode);
14410+int au_test_h_perm(struct inode *h_inode, int mask);
14411+int au_test_h_perm_sio(struct inode *h_inode, int mask);
1facf9fc 14412+
4a4d8108
AM
14413+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
14414+ ino_t h_ino, unsigned int d_type, ino_t *ino)
14415+{
14416+#ifdef CONFIG_AUFS_SHWH
14417+ return au_ino(sb, bindex, h_ino, d_type, ino);
14418+#else
14419+ return 0;
14420+#endif
14421+}
1facf9fc 14422+
4a4d8108
AM
14423+/* i_op.c */
14424+extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
1308ab2a 14425+
4a4d8108
AM
14426+/* au_wr_dir flags */
14427+#define AuWrDir_ADD_ENTRY 1
14428+#define AuWrDir_ISDIR (1 << 1)
14429+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
7f207e10
AM
14430+#define au_fset_wrdir(flags, name) \
14431+ do { (flags) |= AuWrDir_##name; } while (0)
14432+#define au_fclr_wrdir(flags, name) \
14433+ do { (flags) &= ~AuWrDir_##name; } while (0)
1facf9fc 14434+
4a4d8108
AM
14435+struct au_wr_dir_args {
14436+ aufs_bindex_t force_btgt;
14437+ unsigned char flags;
14438+};
14439+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
14440+ struct au_wr_dir_args *args);
dece6358 14441+
4a4d8108
AM
14442+struct dentry *au_pinned_h_parent(struct au_pin *pin);
14443+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
14444+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
14445+ unsigned int udba, unsigned char flags);
14446+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
14447+ unsigned int udba, unsigned char flags) __must_check;
14448+int au_do_pin(struct au_pin *pin) __must_check;
14449+void au_unpin(struct au_pin *pin);
1facf9fc 14450+
4a4d8108
AM
14451+/* i_op_add.c */
14452+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
14453+ struct dentry *h_parent, int isdir);
14454+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
14455+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
14456+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
14457+ struct nameidata *nd);
14458+int aufs_link(struct dentry *src_dentry, struct inode *dir,
14459+ struct dentry *dentry);
14460+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
1facf9fc 14461+
4a4d8108
AM
14462+/* i_op_del.c */
14463+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
14464+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
14465+ struct dentry *h_parent, int isdir);
14466+int aufs_unlink(struct inode *dir, struct dentry *dentry);
14467+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1308ab2a 14468+
4a4d8108
AM
14469+/* i_op_ren.c */
14470+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
14471+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
14472+ struct inode *dir, struct dentry *dentry);
1facf9fc 14473+
4a4d8108
AM
14474+/* iinfo.c */
14475+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
14476+void au_hiput(struct au_hinode *hinode);
14477+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
14478+ struct dentry *h_wh);
14479+unsigned int au_hi_flags(struct inode *inode, int isdir);
1308ab2a 14480+
4a4d8108
AM
14481+/* hinode flags */
14482+#define AuHi_XINO 1
14483+#define AuHi_HNOTIFY (1 << 1)
14484+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
7f207e10
AM
14485+#define au_fset_hi(flags, name) \
14486+ do { (flags) |= AuHi_##name; } while (0)
14487+#define au_fclr_hi(flags, name) \
14488+ do { (flags) &= ~AuHi_##name; } while (0)
1facf9fc 14489+
4a4d8108
AM
14490+#ifndef CONFIG_AUFS_HNOTIFY
14491+#undef AuHi_HNOTIFY
14492+#define AuHi_HNOTIFY 0
14493+#endif
1facf9fc 14494+
4a4d8108
AM
14495+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
14496+ struct inode *h_inode, unsigned int flags);
1facf9fc 14497+
4a4d8108
AM
14498+void au_update_iigen(struct inode *inode);
14499+void au_update_ibrange(struct inode *inode, int do_put_zero);
1facf9fc 14500+
4a4d8108
AM
14501+void au_icntnr_init_once(void *_c);
14502+int au_iinfo_init(struct inode *inode);
14503+void au_iinfo_fin(struct inode *inode);
14504+int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
1308ab2a 14505+
e49829fe 14506+#ifdef CONFIG_PROC_FS
4a4d8108 14507+/* plink.c */
e49829fe
JR
14508+int au_plink_maint(struct super_block *sb, int flags);
14509+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
14510+int au_plink_maint_enter(struct super_block *sb);
4a4d8108
AM
14511+#ifdef CONFIG_AUFS_DEBUG
14512+void au_plink_list(struct super_block *sb);
14513+#else
14514+AuStubVoid(au_plink_list, struct super_block *sb)
14515+#endif
14516+int au_plink_test(struct inode *inode);
14517+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
14518+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
14519+ struct dentry *h_dentry);
e49829fe
JR
14520+void au_plink_put(struct super_block *sb, int verbose);
14521+void au_plink_clean(struct super_block *sb, int verbose);
4a4d8108 14522+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
e49829fe
JR
14523+#else
14524+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
14525+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
14526+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
14527+AuStubVoid(au_plink_list, struct super_block *sb);
14528+AuStubInt0(au_plink_test, struct inode *inode);
14529+AuStub(struct dentry *, au_plink_lkup, return NULL,
14530+ struct inode *inode, aufs_bindex_t bindex);
14531+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
14532+ struct dentry *h_dentry);
14533+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
14534+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
14535+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
14536+#endif /* CONFIG_PROC_FS */
1facf9fc 14537+
4a4d8108 14538+/* ---------------------------------------------------------------------- */
1308ab2a 14539+
4a4d8108
AM
14540+/* lock subclass for iinfo */
14541+enum {
14542+ AuLsc_II_CHILD, /* child first */
14543+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
14544+ AuLsc_II_CHILD3, /* copyup dirs */
14545+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
14546+ AuLsc_II_PARENT2,
14547+ AuLsc_II_PARENT3, /* copyup dirs */
14548+ AuLsc_II_NEW_CHILD
14549+};
1308ab2a 14550+
1facf9fc 14551+/*
4a4d8108
AM
14552+ * ii_read_lock_child, ii_write_lock_child,
14553+ * ii_read_lock_child2, ii_write_lock_child2,
14554+ * ii_read_lock_child3, ii_write_lock_child3,
14555+ * ii_read_lock_parent, ii_write_lock_parent,
14556+ * ii_read_lock_parent2, ii_write_lock_parent2,
14557+ * ii_read_lock_parent3, ii_write_lock_parent3,
14558+ * ii_read_lock_new_child, ii_write_lock_new_child,
1facf9fc 14559+ */
4a4d8108
AM
14560+#define AuReadLockFunc(name, lsc) \
14561+static inline void ii_read_lock_##name(struct inode *i) \
14562+{ \
14563+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
14564+}
14565+
14566+#define AuWriteLockFunc(name, lsc) \
14567+static inline void ii_write_lock_##name(struct inode *i) \
14568+{ \
14569+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
14570+}
14571+
14572+#define AuRWLockFuncs(name, lsc) \
14573+ AuReadLockFunc(name, lsc) \
14574+ AuWriteLockFunc(name, lsc)
14575+
14576+AuRWLockFuncs(child, CHILD);
14577+AuRWLockFuncs(child2, CHILD2);
14578+AuRWLockFuncs(child3, CHILD3);
14579+AuRWLockFuncs(parent, PARENT);
14580+AuRWLockFuncs(parent2, PARENT2);
14581+AuRWLockFuncs(parent3, PARENT3);
14582+AuRWLockFuncs(new_child, NEW_CHILD);
14583+
14584+#undef AuReadLockFunc
14585+#undef AuWriteLockFunc
14586+#undef AuRWLockFuncs
1facf9fc 14587+
14588+/*
4a4d8108 14589+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
1facf9fc 14590+ */
4a4d8108 14591+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
1facf9fc 14592+
4a4d8108
AM
14593+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
14594+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
14595+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
1facf9fc 14596+
4a4d8108 14597+/* ---------------------------------------------------------------------- */
1308ab2a 14598+
027c5e7a
AM
14599+static inline void au_icntnr_init(struct au_icntnr *c)
14600+{
14601+#ifdef CONFIG_AUFS_DEBUG
14602+ c->vfs_inode.i_mode = 0;
14603+#endif
14604+}
14605+
4a4d8108
AM
14606+static inline unsigned int au_iigen(struct inode *inode)
14607+{
14608+ return atomic_read(&au_ii(inode)->ii_generation);
14609+}
1308ab2a 14610+
4a4d8108
AM
14611+/* tiny test for inode number */
14612+/* tmpfs generation is too rough */
14613+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
14614+{
14615+ struct au_iinfo *iinfo;
1308ab2a 14616+
4a4d8108
AM
14617+ iinfo = au_ii(inode);
14618+ AuRwMustAnyLock(&iinfo->ii_rwsem);
14619+ return !(iinfo->ii_hsb1 == h_inode->i_sb
14620+ && iinfo->ii_higen == h_inode->i_generation);
14621+}
1308ab2a 14622+
4a4d8108
AM
14623+static inline void au_iigen_dec(struct inode *inode)
14624+{
e49829fe 14625+ atomic_dec(&au_ii(inode)->ii_generation);
027c5e7a
AM
14626+}
14627+
14628+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
14629+{
14630+ int err;
14631+
14632+ err = 0;
14633+ if (unlikely(inode && au_iigen(inode) != sigen))
14634+ err = -EIO;
14635+
14636+ return err;
4a4d8108 14637+}
1308ab2a 14638+
4a4d8108 14639+/* ---------------------------------------------------------------------- */
1308ab2a 14640+
4a4d8108
AM
14641+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
14642+ aufs_bindex_t bindex)
14643+{
14644+ IiMustAnyLock(inode);
14645+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
14646+}
1308ab2a 14647+
4a4d8108
AM
14648+static inline aufs_bindex_t au_ibstart(struct inode *inode)
14649+{
14650+ IiMustAnyLock(inode);
14651+ return au_ii(inode)->ii_bstart;
14652+}
1308ab2a 14653+
4a4d8108
AM
14654+static inline aufs_bindex_t au_ibend(struct inode *inode)
14655+{
14656+ IiMustAnyLock(inode);
14657+ return au_ii(inode)->ii_bend;
14658+}
1308ab2a 14659+
4a4d8108
AM
14660+static inline struct au_vdir *au_ivdir(struct inode *inode)
14661+{
14662+ IiMustAnyLock(inode);
14663+ return au_ii(inode)->ii_vdir;
14664+}
1308ab2a 14665+
4a4d8108
AM
14666+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
14667+{
14668+ IiMustAnyLock(inode);
14669+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
14670+}
1308ab2a 14671+
4a4d8108 14672+static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 14673+{
4a4d8108
AM
14674+ IiMustWriteLock(inode);
14675+ au_ii(inode)->ii_bstart = bindex;
14676+}
1308ab2a 14677+
4a4d8108
AM
14678+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
14679+{
14680+ IiMustWriteLock(inode);
14681+ au_ii(inode)->ii_bend = bindex;
1308ab2a 14682+}
14683+
4a4d8108
AM
14684+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
14685+{
14686+ IiMustWriteLock(inode);
14687+ au_ii(inode)->ii_vdir = vdir;
14688+}
1facf9fc 14689+
4a4d8108 14690+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 14691+{
4a4d8108
AM
14692+ IiMustAnyLock(inode);
14693+ return au_ii(inode)->ii_hinode + bindex;
14694+}
dece6358 14695+
4a4d8108 14696+/* ---------------------------------------------------------------------- */
1facf9fc 14697+
4a4d8108
AM
14698+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
14699+{
14700+ if (pin)
14701+ return pin->parent;
14702+ return NULL;
1facf9fc 14703+}
14704+
4a4d8108 14705+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
1facf9fc 14706+{
4a4d8108
AM
14707+ if (pin && pin->hdir)
14708+ return pin->hdir->hi_inode;
14709+ return NULL;
1308ab2a 14710+}
1facf9fc 14711+
4a4d8108
AM
14712+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
14713+{
14714+ if (pin)
14715+ return pin->hdir;
14716+ return NULL;
14717+}
1facf9fc 14718+
4a4d8108 14719+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
1308ab2a 14720+{
4a4d8108
AM
14721+ if (pin)
14722+ pin->dentry = dentry;
14723+}
1308ab2a 14724+
4a4d8108
AM
14725+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
14726+ unsigned char lflag)
14727+{
14728+ if (pin) {
7f207e10 14729+ if (lflag)
4a4d8108 14730+ au_fset_pin(pin->flags, DI_LOCKED);
7f207e10 14731+ else
4a4d8108 14732+ au_fclr_pin(pin->flags, DI_LOCKED);
1308ab2a 14733+ }
4a4d8108
AM
14734+}
14735+
14736+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
14737+{
14738+ if (pin) {
14739+ dput(pin->parent);
14740+ pin->parent = dget(parent);
1facf9fc 14741+ }
4a4d8108 14742+}
1facf9fc 14743+
4a4d8108
AM
14744+/* ---------------------------------------------------------------------- */
14745+
027c5e7a 14746+struct au_branch;
4a4d8108
AM
14747+#ifdef CONFIG_AUFS_HNOTIFY
14748+struct au_hnotify_op {
14749+ void (*ctl)(struct au_hinode *hinode, int do_set);
027c5e7a
AM
14750+ int (*alloc)(struct au_hinode *hinode);
14751+ void (*free)(struct au_hinode *hinode);
4a4d8108
AM
14752+
14753+ void (*fin)(void);
14754+ int (*init)(void);
027c5e7a
AM
14755+
14756+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
14757+ void (*fin_br)(struct au_branch *br);
14758+ int (*init_br)(struct au_branch *br, int perm);
4a4d8108
AM
14759+};
14760+
14761+/* hnotify.c */
027c5e7a 14762+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
4a4d8108
AM
14763+void au_hn_free(struct au_hinode *hinode);
14764+void au_hn_ctl(struct au_hinode *hinode, int do_set);
14765+void au_hn_reset(struct inode *inode, unsigned int flags);
14766+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
14767+ struct qstr *h_child_qstr, struct inode *h_child_inode);
027c5e7a
AM
14768+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
14769+int au_hnotify_init_br(struct au_branch *br, int perm);
14770+void au_hnotify_fin_br(struct au_branch *br);
4a4d8108
AM
14771+int __init au_hnotify_init(void);
14772+void au_hnotify_fin(void);
14773+
7f207e10 14774+/* hfsnotify.c */
4a4d8108
AM
14775+extern const struct au_hnotify_op au_hnotify_op;
14776+
14777+static inline
14778+void au_hn_init(struct au_hinode *hinode)
14779+{
14780+ hinode->hi_notify = NULL;
1308ab2a 14781+}
14782+
53392da6
AM
14783+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
14784+{
14785+ return hinode->hi_notify;
14786+}
14787+
4a4d8108
AM
14788+#else
14789+static inline
14790+int au_hn_alloc(struct au_hinode *hinode __maybe_unused,
027c5e7a 14791+ struct inode *inode __maybe_unused)
1308ab2a 14792+{
4a4d8108
AM
14793+ return -EOPNOTSUPP;
14794+}
1308ab2a 14795+
53392da6
AM
14796+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
14797+{
14798+ return NULL;
14799+}
14800+
4a4d8108
AM
14801+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
14802+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
14803+ int do_set __maybe_unused)
14804+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
14805+ unsigned int flags __maybe_unused)
027c5e7a
AM
14806+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
14807+ struct au_branch *br __maybe_unused,
14808+ int perm __maybe_unused)
14809+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
14810+ int perm __maybe_unused)
14811+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
4a4d8108
AM
14812+AuStubInt0(__init au_hnotify_init, void)
14813+AuStubVoid(au_hnotify_fin, void)
14814+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
14815+#endif /* CONFIG_AUFS_HNOTIFY */
14816+
14817+static inline void au_hn_suspend(struct au_hinode *hdir)
14818+{
14819+ au_hn_ctl(hdir, /*do_set*/0);
1308ab2a 14820+}
14821+
4a4d8108 14822+static inline void au_hn_resume(struct au_hinode *hdir)
1308ab2a 14823+{
4a4d8108
AM
14824+ au_hn_ctl(hdir, /*do_set*/1);
14825+}
1308ab2a 14826+
4a4d8108
AM
14827+static inline void au_hn_imtx_lock(struct au_hinode *hdir)
14828+{
14829+ mutex_lock(&hdir->hi_inode->i_mutex);
14830+ au_hn_suspend(hdir);
14831+}
dece6358 14832+
4a4d8108
AM
14833+static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
14834+ unsigned int sc __maybe_unused)
14835+{
14836+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
14837+ au_hn_suspend(hdir);
1facf9fc 14838+}
1facf9fc 14839+
4a4d8108
AM
14840+static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
14841+{
14842+ au_hn_resume(hdir);
14843+ mutex_unlock(&hdir->hi_inode->i_mutex);
14844+}
14845+
14846+#endif /* __KERNEL__ */
14847+#endif /* __AUFS_INODE_H__ */
7f207e10
AM
14848diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
14849--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
53392da6 14850+++ linux/fs/aufs/ioctl.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 14851@@ -0,0 +1,158 @@
4a4d8108 14852+/*
027c5e7a 14853+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
14854+ *
14855+ * This program, aufs is free software; you can redistribute it and/or modify
14856+ * it under the terms of the GNU General Public License as published by
14857+ * the Free Software Foundation; either version 2 of the License, or
14858+ * (at your option) any later version.
14859+ *
14860+ * This program is distributed in the hope that it will be useful,
14861+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14862+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14863+ * GNU General Public License for more details.
14864+ *
14865+ * You should have received a copy of the GNU General Public License
14866+ * along with this program; if not, write to the Free Software
14867+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
14868+ */
14869+
14870+/*
14871+ * ioctl
14872+ * plink-management and readdir in userspace.
14873+ * assist the pathconf(3) wrapper library.
14874+ */
14875+
14876+#include <linux/file.h>
14877+#include "aufs.h"
14878+
14879+static int au_wbr_fd(struct path *path)
14880+{
14881+ int err, fd;
14882+ aufs_bindex_t wbi, bindex, bend;
14883+ struct file *h_file;
14884+ struct super_block *sb;
14885+ struct dentry *root;
14886+ struct au_branch *wbr;
14887+
14888+ err = get_unused_fd();
14889+ if (unlikely(err < 0))
14890+ goto out;
14891+ fd = err;
14892+
14893+ wbi = 0;
14894+ sb = path->dentry->d_sb;
14895+ root = sb->s_root;
14896+ aufs_read_lock(root, AuLock_IR);
14897+ wbr = au_sbr(sb, wbi);
14898+ if (!(path->mnt->mnt_flags & MNT_READONLY)
14899+ && !au_br_writable(wbr->br_perm)) {
14900+ bend = au_sbend(sb);
14901+ for (bindex = 1; bindex <= bend; bindex++) {
14902+ wbr = au_sbr(sb, bindex);
14903+ if (au_br_writable(wbr->br_perm)) {
14904+ wbi = bindex;
14905+ break;
14906+ }
14907+ }
14908+ wbr = au_sbr(sb, wbi);
14909+ }
14910+ AuDbg("wbi %d\n", wbi);
14911+ h_file = au_h_open(root, wbi, O_RDONLY | O_DIRECTORY | O_LARGEFILE,
14912+ NULL);
14913+ aufs_read_unlock(root, AuLock_IR);
14914+ err = PTR_ERR(h_file);
14915+ if (IS_ERR(h_file))
14916+ goto out_fd;
14917+
14918+ atomic_dec(&wbr->br_count); /* cf. au_h_open() */
14919+ fd_install(fd, h_file);
14920+ err = fd;
14921+ goto out; /* success */
14922+
4f0767ce 14923+out_fd:
4a4d8108 14924+ put_unused_fd(fd);
4f0767ce 14925+out:
4a4d8108
AM
14926+ return err;
14927+}
14928+
14929+/* ---------------------------------------------------------------------- */
14930+
14931+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
14932+{
14933+ long err;
14934+
14935+ switch (cmd) {
4a4d8108
AM
14936+ case AUFS_CTL_RDU:
14937+ case AUFS_CTL_RDU_INO:
14938+ err = au_rdu_ioctl(file, cmd, arg);
14939+ break;
14940+
14941+ case AUFS_CTL_WBR_FD:
14942+ err = au_wbr_fd(&file->f_path);
14943+ break;
14944+
027c5e7a
AM
14945+ case AUFS_CTL_IBUSY:
14946+ err = au_ibusy_ioctl(file, arg);
14947+ break;
14948+
4a4d8108
AM
14949+ default:
14950+ /* do not call the lower */
14951+ AuDbg("0x%x\n", cmd);
14952+ err = -ENOTTY;
14953+ }
14954+
14955+ AuTraceErr(err);
14956+ return err;
14957+}
14958+
14959+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
14960+{
14961+ long err;
14962+
14963+ switch (cmd) {
14964+ case AUFS_CTL_WBR_FD:
14965+ err = au_wbr_fd(&file->f_path);
14966+ break;
14967+
14968+ default:
14969+ /* do not call the lower */
14970+ AuDbg("0x%x\n", cmd);
14971+ err = -ENOTTY;
14972+ }
14973+
14974+ AuTraceErr(err);
14975+ return err;
14976+}
b752ccd1
AM
14977+
14978+#ifdef CONFIG_COMPAT
14979+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
14980+ unsigned long arg)
14981+{
14982+ long err;
14983+
14984+ switch (cmd) {
14985+ case AUFS_CTL_RDU:
14986+ case AUFS_CTL_RDU_INO:
14987+ err = au_rdu_compat_ioctl(file, cmd, arg);
14988+ break;
14989+
027c5e7a
AM
14990+ case AUFS_CTL_IBUSY:
14991+ err = au_ibusy_compat_ioctl(file, arg);
14992+ break;
14993+
b752ccd1
AM
14994+ default:
14995+ err = aufs_ioctl_dir(file, cmd, arg);
14996+ }
14997+
14998+ AuTraceErr(err);
14999+ return err;
15000+}
15001+
15002+#if 0 /* unused yet */
15003+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
15004+ unsigned long arg)
15005+{
15006+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
15007+}
15008+#endif
15009+#endif
7f207e10
AM
15010diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
15011--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
53392da6 15012+++ linux/fs/aufs/i_op_add.c 2011-08-24 13:30:24.731313534 +0200
2cbb1c4b 15013@@ -0,0 +1,711 @@
4a4d8108 15014+/*
027c5e7a 15015+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
15016+ *
15017+ * This program, aufs is free software; you can redistribute it and/or modify
15018+ * it under the terms of the GNU General Public License as published by
15019+ * the Free Software Foundation; either version 2 of the License, or
15020+ * (at your option) any later version.
15021+ *
15022+ * This program is distributed in the hope that it will be useful,
15023+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15024+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15025+ * GNU General Public License for more details.
15026+ *
15027+ * You should have received a copy of the GNU General Public License
15028+ * along with this program; if not, write to the Free Software
15029+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
15030+ */
15031+
15032+/*
15033+ * inode operations (add entry)
15034+ */
15035+
15036+#include "aufs.h"
15037+
15038+/*
15039+ * final procedure of adding a new entry, except link(2).
15040+ * remove whiteout, instantiate, copyup the parent dir's times and size
15041+ * and update version.
15042+ * if it failed, re-create the removed whiteout.
15043+ */
15044+static int epilog(struct inode *dir, aufs_bindex_t bindex,
15045+ struct dentry *wh_dentry, struct dentry *dentry)
15046+{
15047+ int err, rerr;
15048+ aufs_bindex_t bwh;
15049+ struct path h_path;
15050+ struct inode *inode, *h_dir;
15051+ struct dentry *wh;
15052+
15053+ bwh = -1;
15054+ if (wh_dentry) {
15055+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
15056+ IMustLock(h_dir);
15057+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
15058+ bwh = au_dbwh(dentry);
15059+ h_path.dentry = wh_dentry;
15060+ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
15061+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
15062+ dentry);
15063+ if (unlikely(err))
15064+ goto out;
15065+ }
15066+
15067+ inode = au_new_inode(dentry, /*must_new*/1);
15068+ if (!IS_ERR(inode)) {
15069+ d_instantiate(dentry, inode);
15070+ dir = dentry->d_parent->d_inode; /* dir inode is locked */
15071+ IMustLock(dir);
15072+ if (au_ibstart(dir) == au_dbstart(dentry))
15073+ au_cpup_attr_timesizes(dir);
15074+ dir->i_version++;
15075+ return 0; /* success */
15076+ }
15077+
15078+ err = PTR_ERR(inode);
15079+ if (!wh_dentry)
15080+ goto out;
15081+
15082+ /* revert */
15083+ /* dir inode is locked */
15084+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
15085+ rerr = PTR_ERR(wh);
15086+ if (IS_ERR(wh)) {
15087+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
15088+ AuDLNPair(dentry), err, rerr);
15089+ err = -EIO;
15090+ } else
15091+ dput(wh);
15092+
4f0767ce 15093+out:
4a4d8108
AM
15094+ return err;
15095+}
15096+
027c5e7a
AM
15097+static int au_d_may_add(struct dentry *dentry)
15098+{
15099+ int err;
15100+
15101+ err = 0;
15102+ if (unlikely(d_unhashed(dentry)))
15103+ err = -ENOENT;
15104+ if (unlikely(dentry->d_inode))
15105+ err = -EEXIST;
15106+ return err;
15107+}
15108+
4a4d8108
AM
15109+/*
15110+ * simple tests for the adding inode operations.
15111+ * following the checks in vfs, plus the parent-child relationship.
15112+ */
15113+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
15114+ struct dentry *h_parent, int isdir)
15115+{
15116+ int err;
15117+ umode_t h_mode;
15118+ struct dentry *h_dentry;
15119+ struct inode *h_inode;
15120+
15121+ err = -ENAMETOOLONG;
15122+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
15123+ goto out;
15124+
15125+ h_dentry = au_h_dptr(dentry, bindex);
15126+ h_inode = h_dentry->d_inode;
15127+ if (!dentry->d_inode) {
15128+ err = -EEXIST;
15129+ if (unlikely(h_inode))
15130+ goto out;
15131+ } else {
15132+ /* rename(2) case */
15133+ err = -EIO;
15134+ if (unlikely(!h_inode || !h_inode->i_nlink))
15135+ goto out;
15136+
15137+ h_mode = h_inode->i_mode;
15138+ if (!isdir) {
15139+ err = -EISDIR;
15140+ if (unlikely(S_ISDIR(h_mode)))
15141+ goto out;
15142+ } else if (unlikely(!S_ISDIR(h_mode))) {
15143+ err = -ENOTDIR;
15144+ goto out;
15145+ }
15146+ }
15147+
15148+ err = 0;
15149+ /* expected parent dir is locked */
15150+ if (unlikely(h_parent != h_dentry->d_parent))
15151+ err = -EIO;
15152+
4f0767ce 15153+out:
4a4d8108
AM
15154+ AuTraceErr(err);
15155+ return err;
15156+}
15157+
15158+/*
15159+ * initial procedure of adding a new entry.
15160+ * prepare writable branch and the parent dir, lock it,
15161+ * and lookup whiteout for the new entry.
15162+ */
15163+static struct dentry*
15164+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
15165+ struct dentry *src_dentry, struct au_pin *pin,
15166+ struct au_wr_dir_args *wr_dir_args)
15167+{
15168+ struct dentry *wh_dentry, *h_parent;
15169+ struct super_block *sb;
15170+ struct au_branch *br;
15171+ int err;
15172+ unsigned int udba;
15173+ aufs_bindex_t bcpup;
15174+
15175+ AuDbg("%.*s\n", AuDLNPair(dentry));
15176+
15177+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
15178+ bcpup = err;
15179+ wh_dentry = ERR_PTR(err);
15180+ if (unlikely(err < 0))
15181+ goto out;
15182+
15183+ sb = dentry->d_sb;
15184+ udba = au_opt_udba(sb);
15185+ err = au_pin(pin, dentry, bcpup, udba,
15186+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15187+ wh_dentry = ERR_PTR(err);
15188+ if (unlikely(err))
15189+ goto out;
15190+
15191+ h_parent = au_pinned_h_parent(pin);
15192+ if (udba != AuOpt_UDBA_NONE
15193+ && au_dbstart(dentry) == bcpup)
15194+ err = au_may_add(dentry, bcpup, h_parent,
15195+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
15196+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
15197+ err = -ENAMETOOLONG;
15198+ wh_dentry = ERR_PTR(err);
15199+ if (unlikely(err))
15200+ goto out_unpin;
15201+
15202+ br = au_sbr(sb, bcpup);
15203+ if (dt) {
15204+ struct path tmp = {
15205+ .dentry = h_parent,
15206+ .mnt = br->br_mnt
15207+ };
15208+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
15209+ }
15210+
15211+ wh_dentry = NULL;
15212+ if (bcpup != au_dbwh(dentry))
15213+ goto out; /* success */
15214+
15215+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
15216+
4f0767ce 15217+out_unpin:
4a4d8108
AM
15218+ if (IS_ERR(wh_dentry))
15219+ au_unpin(pin);
4f0767ce 15220+out:
4a4d8108
AM
15221+ return wh_dentry;
15222+}
15223+
15224+/* ---------------------------------------------------------------------- */
15225+
15226+enum { Mknod, Symlink, Creat };
15227+struct simple_arg {
15228+ int type;
15229+ union {
15230+ struct {
15231+ int mode;
15232+ struct nameidata *nd;
15233+ } c;
15234+ struct {
15235+ const char *symname;
15236+ } s;
15237+ struct {
15238+ int mode;
15239+ dev_t dev;
15240+ } m;
15241+ } u;
15242+};
15243+
15244+static int add_simple(struct inode *dir, struct dentry *dentry,
15245+ struct simple_arg *arg)
15246+{
15247+ int err;
15248+ aufs_bindex_t bstart;
15249+ unsigned char created;
15250+ struct au_dtime dt;
15251+ struct au_pin pin;
15252+ struct path h_path;
15253+ struct dentry *wh_dentry, *parent;
15254+ struct inode *h_dir;
15255+ struct au_wr_dir_args wr_dir_args = {
15256+ .force_btgt = -1,
15257+ .flags = AuWrDir_ADD_ENTRY
15258+ };
15259+
15260+ AuDbg("%.*s\n", AuDLNPair(dentry));
15261+ IMustLock(dir);
15262+
15263+ parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
15264+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
15265+ if (unlikely(err))
15266+ goto out;
15267+ err = au_d_may_add(dentry);
15268+ if (unlikely(err))
15269+ goto out_unlock;
4a4d8108
AM
15270+ di_write_lock_parent(parent);
15271+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
15272+ &wr_dir_args);
15273+ err = PTR_ERR(wh_dentry);
15274+ if (IS_ERR(wh_dentry))
027c5e7a 15275+ goto out_parent;
4a4d8108
AM
15276+
15277+ bstart = au_dbstart(dentry);
15278+ h_path.dentry = au_h_dptr(dentry, bstart);
15279+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
15280+ h_dir = au_pinned_h_dir(&pin);
15281+ switch (arg->type) {
15282+ case Creat:
15283+ err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
15284+ break;
15285+ case Symlink:
15286+ err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
15287+ break;
15288+ case Mknod:
15289+ err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
15290+ break;
15291+ default:
15292+ BUG();
15293+ }
15294+ created = !err;
15295+ if (!err)
15296+ err = epilog(dir, bstart, wh_dentry, dentry);
15297+
15298+ /* revert */
15299+ if (unlikely(created && err && h_path.dentry->d_inode)) {
15300+ int rerr;
15301+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
15302+ if (rerr) {
15303+ AuIOErr("%.*s revert failure(%d, %d)\n",
15304+ AuDLNPair(dentry), err, rerr);
15305+ err = -EIO;
15306+ }
15307+ au_dtime_revert(&dt);
4a4d8108
AM
15308+ }
15309+
15310+ au_unpin(&pin);
15311+ dput(wh_dentry);
15312+
027c5e7a
AM
15313+out_parent:
15314+ di_write_unlock(parent);
15315+out_unlock:
4a4d8108
AM
15316+ if (unlikely(err)) {
15317+ au_update_dbstart(dentry);
15318+ d_drop(dentry);
15319+ }
4a4d8108 15320+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 15321+out:
4a4d8108
AM
15322+ return err;
15323+}
15324+
15325+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
15326+{
15327+ struct simple_arg arg = {
15328+ .type = Mknod,
15329+ .u.m = {
15330+ .mode = mode,
15331+ .dev = dev
15332+ }
15333+ };
15334+ return add_simple(dir, dentry, &arg);
15335+}
15336+
15337+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
15338+{
15339+ struct simple_arg arg = {
15340+ .type = Symlink,
15341+ .u.s.symname = symname
15342+ };
15343+ return add_simple(dir, dentry, &arg);
15344+}
15345+
15346+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
15347+ struct nameidata *nd)
15348+{
15349+ struct simple_arg arg = {
15350+ .type = Creat,
15351+ .u.c = {
15352+ .mode = mode,
15353+ .nd = nd
15354+ }
15355+ };
15356+ return add_simple(dir, dentry, &arg);
15357+}
15358+
15359+/* ---------------------------------------------------------------------- */
15360+
15361+struct au_link_args {
15362+ aufs_bindex_t bdst, bsrc;
15363+ struct au_pin pin;
15364+ struct path h_path;
15365+ struct dentry *src_parent, *parent;
15366+};
15367+
15368+static int au_cpup_before_link(struct dentry *src_dentry,
15369+ struct au_link_args *a)
15370+{
15371+ int err;
15372+ struct dentry *h_src_dentry;
15373+ struct mutex *h_mtx;
15374+ struct file *h_file;
15375+
15376+ di_read_lock_parent(a->src_parent, AuLock_IR);
15377+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
15378+ if (unlikely(err))
15379+ goto out;
15380+
15381+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
15382+ h_mtx = &h_src_dentry->d_inode->i_mutex;
15383+ err = au_pin(&a->pin, src_dentry, a->bdst,
15384+ au_opt_udba(src_dentry->d_sb),
15385+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15386+ if (unlikely(err))
15387+ goto out;
15388+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15389+ h_file = au_h_open_pre(src_dentry, a->bsrc);
15390+ if (IS_ERR(h_file)) {
15391+ err = PTR_ERR(h_file);
15392+ h_file = NULL;
15393+ } else
15394+ err = au_sio_cpup_simple(src_dentry, a->bdst, a->bsrc,
15395+ AuCpup_DTIME /* | AuCpup_KEEPLINO */);
15396+ mutex_unlock(h_mtx);
15397+ au_h_open_post(src_dentry, a->bsrc, h_file);
15398+ au_unpin(&a->pin);
15399+
4f0767ce 15400+out:
4a4d8108
AM
15401+ di_read_unlock(a->src_parent, AuLock_IR);
15402+ return err;
15403+}
15404+
15405+static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
15406+{
15407+ int err;
15408+ unsigned char plink;
15409+ struct inode *h_inode, *inode;
15410+ struct dentry *h_src_dentry;
15411+ struct super_block *sb;
15412+ struct file *h_file;
15413+
15414+ plink = 0;
15415+ h_inode = NULL;
15416+ sb = src_dentry->d_sb;
15417+ inode = src_dentry->d_inode;
15418+ if (au_ibstart(inode) <= a->bdst)
15419+ h_inode = au_h_iptr(inode, a->bdst);
15420+ if (!h_inode || !h_inode->i_nlink) {
15421+ /* copyup src_dentry as the name of dentry. */
15422+ au_set_dbstart(src_dentry, a->bdst);
15423+ au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
15424+ h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
15425+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
15426+ h_file = au_h_open_pre(src_dentry, a->bsrc);
15427+ if (IS_ERR(h_file)) {
15428+ err = PTR_ERR(h_file);
15429+ h_file = NULL;
15430+ } else
15431+ err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc,
15432+ -1, AuCpup_KEEPLINO,
15433+ a->parent);
15434+ mutex_unlock(&h_inode->i_mutex);
15435+ au_h_open_post(src_dentry, a->bsrc, h_file);
15436+ au_set_h_dptr(src_dentry, a->bdst, NULL);
15437+ au_set_dbstart(src_dentry, a->bsrc);
15438+ } else {
15439+ /* the inode of src_dentry already exists on a.bdst branch */
15440+ h_src_dentry = d_find_alias(h_inode);
15441+ if (!h_src_dentry && au_plink_test(inode)) {
15442+ plink = 1;
15443+ h_src_dentry = au_plink_lkup(inode, a->bdst);
15444+ err = PTR_ERR(h_src_dentry);
15445+ if (IS_ERR(h_src_dentry))
15446+ goto out;
15447+
15448+ if (unlikely(!h_src_dentry->d_inode)) {
15449+ dput(h_src_dentry);
15450+ h_src_dentry = NULL;
15451+ }
15452+
15453+ }
15454+ if (h_src_dentry) {
15455+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
15456+ &a->h_path);
15457+ dput(h_src_dentry);
15458+ } else {
15459+ AuIOErr("no dentry found for hi%lu on b%d\n",
15460+ h_inode->i_ino, a->bdst);
15461+ err = -EIO;
15462+ }
15463+ }
15464+
15465+ if (!err && !plink)
15466+ au_plink_append(inode, a->bdst, a->h_path.dentry);
15467+
15468+out:
2cbb1c4b 15469+ AuTraceErr(err);
4a4d8108
AM
15470+ return err;
15471+}
15472+
15473+int aufs_link(struct dentry *src_dentry, struct inode *dir,
15474+ struct dentry *dentry)
15475+{
15476+ int err, rerr;
15477+ struct au_dtime dt;
15478+ struct au_link_args *a;
15479+ struct dentry *wh_dentry, *h_src_dentry;
15480+ struct inode *inode;
15481+ struct super_block *sb;
15482+ struct au_wr_dir_args wr_dir_args = {
15483+ /* .force_btgt = -1, */
15484+ .flags = AuWrDir_ADD_ENTRY
15485+ };
15486+
15487+ IMustLock(dir);
15488+ inode = src_dentry->d_inode;
15489+ IMustLock(inode);
15490+
4a4d8108
AM
15491+ err = -ENOMEM;
15492+ a = kzalloc(sizeof(*a), GFP_NOFS);
15493+ if (unlikely(!a))
15494+ goto out;
15495+
15496+ a->parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
15497+ err = aufs_read_and_write_lock2(dentry, src_dentry,
15498+ AuLock_NOPLM | AuLock_GEN);
e49829fe
JR
15499+ if (unlikely(err))
15500+ goto out_kfree;
027c5e7a
AM
15501+ err = au_d_hashed_positive(src_dentry);
15502+ if (unlikely(err))
15503+ goto out_unlock;
15504+ err = au_d_may_add(dentry);
15505+ if (unlikely(err))
15506+ goto out_unlock;
e49829fe 15507+
4a4d8108 15508+ a->src_parent = dget_parent(src_dentry);
2cbb1c4b 15509+ wr_dir_args.force_btgt = au_ibstart(inode);
4a4d8108
AM
15510+
15511+ di_write_lock_parent(a->parent);
15512+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
15513+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
15514+ &wr_dir_args);
15515+ err = PTR_ERR(wh_dentry);
15516+ if (IS_ERR(wh_dentry))
027c5e7a 15517+ goto out_parent;
4a4d8108
AM
15518+
15519+ err = 0;
15520+ sb = dentry->d_sb;
15521+ a->bdst = au_dbstart(dentry);
15522+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
15523+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
2cbb1c4b
JR
15524+ a->bsrc = au_ibstart(inode);
15525+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
15526+ if (!h_src_dentry) {
15527+ a->bsrc = au_dbstart(src_dentry);
15528+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
15529+ AuDebugOn(!h_src_dentry);
15530+ } else if (IS_ERR(h_src_dentry))
15531+ goto out_parent;
15532+
4a4d8108
AM
15533+ if (au_opt_test(au_mntflags(sb), PLINK)) {
15534+ if (a->bdst < a->bsrc
15535+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
15536+ err = au_cpup_or_link(src_dentry, a);
2cbb1c4b 15537+ else
4a4d8108
AM
15538+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
15539+ &a->h_path);
2cbb1c4b 15540+ dput(h_src_dentry);
4a4d8108
AM
15541+ } else {
15542+ /*
15543+ * copyup src_dentry to the branch we process,
15544+ * and then link(2) to it.
15545+ */
2cbb1c4b 15546+ dput(h_src_dentry);
4a4d8108
AM
15547+ if (a->bdst < a->bsrc
15548+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
15549+ au_unpin(&a->pin);
15550+ di_write_unlock(a->parent);
15551+ err = au_cpup_before_link(src_dentry, a);
15552+ di_write_lock_parent(a->parent);
15553+ if (!err)
15554+ err = au_pin(&a->pin, dentry, a->bdst,
15555+ au_opt_udba(sb),
15556+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
15557+ if (unlikely(err))
15558+ goto out_wh;
15559+ }
15560+ if (!err) {
15561+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
15562+ err = -ENOENT;
15563+ if (h_src_dentry && h_src_dentry->d_inode)
15564+ err = vfsub_link(h_src_dentry,
15565+ au_pinned_h_dir(&a->pin),
15566+ &a->h_path);
15567+ }
15568+ }
15569+ if (unlikely(err))
15570+ goto out_unpin;
15571+
15572+ if (wh_dentry) {
15573+ a->h_path.dentry = wh_dentry;
15574+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
15575+ dentry);
15576+ if (unlikely(err))
15577+ goto out_revert;
15578+ }
15579+
15580+ dir->i_version++;
15581+ if (au_ibstart(dir) == au_dbstart(dentry))
15582+ au_cpup_attr_timesizes(dir);
15583+ inc_nlink(inode);
15584+ inode->i_ctime = dir->i_ctime;
027c5e7a
AM
15585+ d_instantiate(dentry, au_igrab(inode));
15586+ if (d_unhashed(a->h_path.dentry))
4a4d8108
AM
15587+ /* some filesystem calls d_drop() */
15588+ d_drop(dentry);
15589+ goto out_unpin; /* success */
15590+
4f0767ce 15591+out_revert:
4a4d8108 15592+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
027c5e7a
AM
15593+ if (unlikely(rerr)) {
15594+ AuIOErr("%.*s reverting failed(%d, %d)\n",
15595+ AuDLNPair(dentry), err, rerr);
15596+ err = -EIO;
15597+ }
4a4d8108 15598+ au_dtime_revert(&dt);
4f0767ce 15599+out_unpin:
4a4d8108 15600+ au_unpin(&a->pin);
4f0767ce 15601+out_wh:
4a4d8108 15602+ dput(wh_dentry);
027c5e7a
AM
15603+out_parent:
15604+ di_write_unlock(a->parent);
15605+ dput(a->src_parent);
4f0767ce 15606+out_unlock:
4a4d8108
AM
15607+ if (unlikely(err)) {
15608+ au_update_dbstart(dentry);
15609+ d_drop(dentry);
15610+ }
4a4d8108 15611+ aufs_read_and_write_unlock2(dentry, src_dentry);
e49829fe 15612+out_kfree:
4a4d8108 15613+ kfree(a);
4f0767ce 15614+out:
4a4d8108
AM
15615+ return err;
15616+}
15617+
15618+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
15619+{
15620+ int err, rerr;
15621+ aufs_bindex_t bindex;
15622+ unsigned char diropq;
15623+ struct path h_path;
15624+ struct dentry *wh_dentry, *parent, *opq_dentry;
15625+ struct mutex *h_mtx;
15626+ struct super_block *sb;
15627+ struct {
15628+ struct au_pin pin;
15629+ struct au_dtime dt;
15630+ } *a; /* reduce the stack usage */
15631+ struct au_wr_dir_args wr_dir_args = {
15632+ .force_btgt = -1,
15633+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
15634+ };
15635+
15636+ IMustLock(dir);
15637+
15638+ err = -ENOMEM;
15639+ a = kmalloc(sizeof(*a), GFP_NOFS);
15640+ if (unlikely(!a))
15641+ goto out;
15642+
027c5e7a
AM
15643+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
15644+ if (unlikely(err))
15645+ goto out_free;
15646+ err = au_d_may_add(dentry);
15647+ if (unlikely(err))
15648+ goto out_unlock;
15649+
4a4d8108
AM
15650+ parent = dentry->d_parent; /* dir inode is locked */
15651+ di_write_lock_parent(parent);
15652+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
15653+ &a->pin, &wr_dir_args);
15654+ err = PTR_ERR(wh_dentry);
15655+ if (IS_ERR(wh_dentry))
027c5e7a 15656+ goto out_parent;
4a4d8108
AM
15657+
15658+ sb = dentry->d_sb;
15659+ bindex = au_dbstart(dentry);
15660+ h_path.dentry = au_h_dptr(dentry, bindex);
15661+ h_path.mnt = au_sbr_mnt(sb, bindex);
15662+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
15663+ if (unlikely(err))
027c5e7a 15664+ goto out_unpin;
4a4d8108
AM
15665+
15666+ /* make the dir opaque */
15667+ diropq = 0;
15668+ h_mtx = &h_path.dentry->d_inode->i_mutex;
15669+ if (wh_dentry
15670+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
15671+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15672+ opq_dentry = au_diropq_create(dentry, bindex);
15673+ mutex_unlock(h_mtx);
15674+ err = PTR_ERR(opq_dentry);
15675+ if (IS_ERR(opq_dentry))
15676+ goto out_dir;
15677+ dput(opq_dentry);
15678+ diropq = 1;
15679+ }
15680+
15681+ err = epilog(dir, bindex, wh_dentry, dentry);
15682+ if (!err) {
15683+ inc_nlink(dir);
027c5e7a 15684+ goto out_unpin; /* success */
4a4d8108
AM
15685+ }
15686+
15687+ /* revert */
15688+ if (diropq) {
15689+ AuLabel(revert opq);
15690+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
15691+ rerr = au_diropq_remove(dentry, bindex);
15692+ mutex_unlock(h_mtx);
15693+ if (rerr) {
15694+ AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
15695+ AuDLNPair(dentry), err, rerr);
15696+ err = -EIO;
15697+ }
15698+ }
15699+
4f0767ce 15700+out_dir:
4a4d8108
AM
15701+ AuLabel(revert dir);
15702+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
15703+ if (rerr) {
15704+ AuIOErr("%.*s reverting dir failed(%d, %d)\n",
15705+ AuDLNPair(dentry), err, rerr);
15706+ err = -EIO;
15707+ }
4a4d8108 15708+ au_dtime_revert(&a->dt);
027c5e7a 15709+out_unpin:
4a4d8108
AM
15710+ au_unpin(&a->pin);
15711+ dput(wh_dentry);
027c5e7a
AM
15712+out_parent:
15713+ di_write_unlock(parent);
15714+out_unlock:
4a4d8108
AM
15715+ if (unlikely(err)) {
15716+ au_update_dbstart(dentry);
15717+ d_drop(dentry);
15718+ }
4a4d8108 15719+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 15720+out_free:
4a4d8108 15721+ kfree(a);
4f0767ce 15722+out:
4a4d8108
AM
15723+ return err;
15724+}
7f207e10
AM
15725diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
15726--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
53392da6 15727+++ linux/fs/aufs/i_op.c 2011-08-24 13:30:24.731313534 +0200
027c5e7a 15728@@ -0,0 +1,976 @@
4a4d8108 15729+/*
027c5e7a 15730+ * Copyright (C) 2005-2011 Junjiro R. Okajima
4a4d8108
AM
15731+ *
15732+ * This program, aufs is free software; you can redistribute it and/or modify
15733+ * it under the terms of the GNU General Public License as published by
15734+ * the Free Software Foundation; either version 2 of the License, or
15735+ * (at your option) any later version.
15736+ *
15737+ * This program is distributed in the hope that it will be useful,
15738+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15739+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15740+ * GNU General Public License for more details.
15741+ *
15742+ * You should have received a copy of the GNU General Public License
15743+ * along with this program; if not, write to the Free Software
15744+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
15745+ */
1facf9fc 15746+
1308ab2a 15747+/*
4a4d8108 15748+ * inode operations (except add/del/rename)
1308ab2a 15749+ */
4a4d8108
AM
15750+
15751+#include <linux/device_cgroup.h>
15752+#include <linux/fs_stack.h>
15753+#include <linux/mm.h>
15754+#include <linux/namei.h>
15755+#include <linux/security.h>
15756+#include <linux/uaccess.h>
15757+#include "aufs.h"
15758+
027c5e7a 15759+static int h_permission(struct inode *h_inode, int mask, unsigned int flags,
4a4d8108 15760+ struct vfsmount *h_mnt, int brperm)
1facf9fc 15761+{
1308ab2a 15762+ int err;
4a4d8108 15763+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
1facf9fc 15764+
4a4d8108
AM
15765+ err = -EACCES;
15766+ if ((write_mask && IS_IMMUTABLE(h_inode))
15767+ || ((mask & MAY_EXEC)
15768+ && S_ISREG(h_inode->i_mode)
15769+ && ((h_mnt->mnt_flags & MNT_NOEXEC)
15770+ || !(h_inode->i_mode & S_IXUGO))))
15771+ goto out;
15772+
15773+ /*
15774+ * - skip the lower fs test in the case of write to ro branch.
15775+ * - nfs dir permission write check is optimized, but a policy for
15776+ * link/rename requires a real check.
15777+ */
15778+ if ((write_mask && !au_br_writable(brperm))
15779+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
15780+ && write_mask && !(mask & MAY_READ))
15781+ || !h_inode->i_op->permission) {
15782+ /* AuLabel(generic_permission); */
027c5e7a 15783+ err = generic_permission(h_inode, mask, flags,
4a4d8108 15784+ h_inode->i_op->check_acl);
1308ab2a 15785+ } else {
4a4d8108 15786+ /* AuLabel(h_inode->permission); */
027c5e7a 15787+ err = h_inode->i_op->permission(h_inode, mask, flags);
4a4d8108
AM
15788+ AuTraceErr(err);
15789+ }
1facf9fc 15790+
4a4d8108
AM
15791+ if (!err)
15792+ err = devcgroup_inode_permission(h_inode, mask);
7f207e10 15793+ if (!err)
4a4d8108 15794+ err = security_inode_permission(h_inode, mask);
4a4d8108
AM
15795+
15796+#if 0
15797+ if (!err) {
15798+ /* todo: do we need to call ima_path_check()? */
15799+ struct path h_path = {
15800+ .dentry =
15801+ .mnt = h_mnt
15802+ };
15803+ err = ima_path_check(&h_path,
15804+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
15805+ IMA_COUNT_LEAVE);
1308ab2a 15806+ }
4a4d8108 15807+#endif
dece6358 15808+
4f0767ce 15809+out:
1308ab2a 15810+ return err;
15811+}
dece6358 15812+
027c5e7a 15813+static int aufs_permission(struct inode *inode, int mask, unsigned int flags)
1308ab2a 15814+{
15815+ int err;
4a4d8108
AM
15816+ aufs_bindex_t bindex, bend;
15817+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
15818+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
15819+ struct inode *h_inode;
15820+ struct super_block *sb;
15821+ struct au_branch *br;
1facf9fc 15822+
027c5e7a
AM
15823+ /* todo: support rcu-walk? */
15824+ if (flags & IPERM_FLAG_RCU)
15825+ return -ECHILD;
15826+
4a4d8108
AM
15827+ sb = inode->i_sb;
15828+ si_read_lock(sb, AuLock_FLUSH);
15829+ ii_read_lock_child(inode);
027c5e7a
AM
15830+#if 0
15831+ err = au_iigen_test(inode, au_sigen(sb));
15832+ if (unlikely(err))
15833+ goto out;
15834+#endif
dece6358 15835+
4a4d8108
AM
15836+ if (!isdir || write_mask) {
15837+ err = au_busy_or_stale();
15838+ h_inode = au_h_iptr(inode, au_ibstart(inode));
15839+ if (unlikely(!h_inode
15840+ || (h_inode->i_mode & S_IFMT)
15841+ != (inode->i_mode & S_IFMT)))
15842+ goto out;
1facf9fc 15843+
4a4d8108
AM
15844+ err = 0;
15845+ bindex = au_ibstart(inode);
15846+ br = au_sbr(sb, bindex);
027c5e7a
AM
15847+ err = h_permission(h_inode, mask, flags, br->br_mnt,
15848+ br->br_perm);
4a4d8108
AM
15849+ if (write_mask
15850+ && !err
15851+ && !special_file(h_inode->i_mode)) {
15852+ /* test whether the upper writable branch exists */
15853+ err = -EROFS;
15854+ for (; bindex >= 0; bindex--)
15855+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
15856+ err = 0;
15857+ break;
15858+ }
15859+ }
15860+ goto out;
15861+ }
dece6358 15862+
4a4d8108 15863+ /* non-write to dir */
1308ab2a 15864+ err = 0;
4a4d8108
AM
15865+ bend = au_ibend(inode);
15866+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
15867+ h_inode = au_h_iptr(inode, bindex);
15868+ if (h_inode) {
15869+ err = au_busy_or_stale();
15870+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
15871+ break;
15872+
15873+ br = au_sbr(sb, bindex);
027c5e7a 15874+ err = h_permission(h_inode, mask, flags, br->br_mnt,
4a4d8108
AM
15875+ br->br_perm);
15876+ }
15877+ }
1308ab2a 15878+
4f0767ce 15879+out:
4a4d8108
AM
15880+ ii_read_unlock(inode);
15881+ si_read_unlock(sb);
1308ab2a 15882+ return err;
15883+}
15884+
4a4d8108 15885+/* ---------------------------------------------------------------------- */
1facf9fc 15886+
4a4d8108
AM
15887+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
15888+ struct nameidata *nd)
15889+{
15890+ struct dentry *ret, *parent;
b752ccd1 15891+ struct inode *inode;
4a4d8108
AM
15892+ struct super_block *sb;
15893+ int err, npositive;
dece6358 15894+
4a4d8108 15895+ IMustLock(dir);
1308ab2a 15896+
4a4d8108 15897+ sb = dir->i_sb;
7f207e10
AM
15898+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
15899+ ret = ERR_PTR(err);
15900+ if (unlikely(err))
15901+ goto out;
15902+
4a4d8108
AM
15903+ ret = ERR_PTR(-ENAMETOOLONG);
15904+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
7f207e10 15905+ goto out_si;
4a4d8108
AM
15906+ err = au_di_init(dentry);
15907+ ret = ERR_PTR(err);
15908+ if (unlikely(err))
7f207e10 15909+ goto out_si;
1308ab2a 15910+
027c5e7a 15911+ npositive = 0; /* suppress a warning */
4a4d8108
AM
15912+ parent = dentry->d_parent; /* dir inode is locked */
15913+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
15914+ err = au_alive_dir(parent);
15915+ if (!err)
15916+ err = au_digen_test(parent, au_sigen(sb));
15917+ if (!err) {
15918+ npositive = au_lkup_dentry(dentry, au_dbstart(parent),
15919+ /*type*/0, nd);
15920+ err = npositive;
15921+ }
4a4d8108 15922+ di_read_unlock(parent, AuLock_IR);
4a4d8108
AM
15923+ ret = ERR_PTR(err);
15924+ if (unlikely(err < 0))
15925+ goto out_unlock;
1308ab2a 15926+
4a4d8108
AM
15927+ inode = NULL;
15928+ if (npositive) {
b752ccd1 15929+ inode = au_new_inode(dentry, /*must_new*/0);
4a4d8108 15930+ ret = (void *)inode;
1facf9fc 15931+ }
4a4d8108
AM
15932+ if (IS_ERR(inode))
15933+ goto out_unlock;
15934+
15935+ ret = d_splice_alias(inode, dentry);
7f207e10 15936+ if (unlikely(IS_ERR(ret) && inode)) {
4a4d8108 15937+ ii_write_unlock(inode);
7f207e10
AM
15938+ iput(inode);
15939+ }
1facf9fc 15940+
4f0767ce 15941+out_unlock:
4a4d8108 15942+ di_write_unlock(dentry);
7f207e10 15943+out_si:
4a4d8108 15944+ si_read_unlock(sb);
7f207e10 15945+out:
4a4d8108
AM
15946+ return ret;
15947+}
1facf9fc 15948+
4a4d8108 15949+/* ---------------------------------------------------------------------- */
1facf9fc 15950+
4a4d8108
AM
15951+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
15952+ const unsigned char add_entry, aufs_bindex_t bcpup,
15953+ aufs_bindex_t bstart)
15954+{
15955+ int err;
15956+ struct dentry *h_parent;
15957+ struct inode *h_dir;
1facf9fc 15958+
027c5e7a 15959+ if (add_entry)
4a4d8108 15960+ IMustLock(parent->d_inode);
027c5e7a 15961+ else
4a4d8108
AM
15962+ di_write_lock_parent(parent);
15963+
15964+ err = 0;
15965+ if (!au_h_dptr(parent, bcpup)) {
15966+ if (bstart < bcpup)
15967+ err = au_cpdown_dirs(dentry, bcpup);
15968+ else
15969+ err = au_cpup_dirs(dentry, bcpup);
15970+ }
15971+ if (!err && add_entry) {
15972+ h_parent = au_h_dptr(parent, bcpup);
15973+ h_dir = h_parent->d_inode;
15974+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
15975+ err = au_lkup_neg(dentry, bcpup);
15976+ /* todo: no unlock here */
15977+ mutex_unlock(&h_dir->i_mutex);
027c5e7a
AM
15978+
15979+ AuDbg("bcpup %d\n", bcpup);
15980+ if (!err) {
15981+ if (!dentry->d_inode)
15982+ au_set_h_dptr(dentry, bstart, NULL);
4a4d8108
AM
15983+ au_update_dbrange(dentry, /*do_put_zero*/0);
15984+ }
1308ab2a 15985+ }
1facf9fc 15986+
4a4d8108
AM
15987+ if (!add_entry)
15988+ di_write_unlock(parent);
15989+ if (!err)
15990+ err = bcpup; /* success */
1308ab2a 15991+
027c5e7a 15992+ AuTraceErr(err);
4a4d8108
AM
15993+ return err;
15994+}
1facf9fc 15995+
4a4d8108
AM
15996+/*
15997+ * decide the branch and the parent dir where we will create a new entry.
15998+ * returns new bindex or an error.
15999+ * copyup the parent dir if needed.
16000+ */
16001+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
16002+ struct au_wr_dir_args *args)
16003+{
16004+ int err;
16005+ aufs_bindex_t bcpup, bstart, src_bstart;
16006+ const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
16007+ ADD_ENTRY);
16008+ struct super_block *sb;
16009+ struct dentry *parent;
16010+ struct au_sbinfo *sbinfo;
1facf9fc 16011+
4a4d8108
AM
16012+ sb = dentry->d_sb;
16013+ sbinfo = au_sbi(sb);
16014+ parent = dget_parent(dentry);
16015+ bstart = au_dbstart(dentry);
16016+ bcpup = bstart;
16017+ if (args->force_btgt < 0) {
16018+ if (src_dentry) {
16019+ src_bstart = au_dbstart(src_dentry);
16020+ if (src_bstart < bstart)
16021+ bcpup = src_bstart;
16022+ } else if (add_entry) {
16023+ err = AuWbrCreate(sbinfo, dentry,
16024+ au_ftest_wrdir(args->flags, ISDIR));
16025+ bcpup = err;
16026+ }
1facf9fc 16027+
4a4d8108
AM
16028+ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
16029+ if (add_entry)
16030+ err = AuWbrCopyup(sbinfo, dentry);
16031+ else {
16032+ if (!IS_ROOT(dentry)) {
16033+ di_read_lock_parent(parent, !AuLock_IR);
16034+ err = AuWbrCopyup(sbinfo, dentry);
16035+ di_read_unlock(parent, !AuLock_IR);
16036+ } else
16037+ err = AuWbrCopyup(sbinfo, dentry);
16038+ }
16039+ bcpup = err;
16040+ if (unlikely(err < 0))
16041+ goto out;
16042+ }
16043+ } else {
16044+ bcpup = args->force_btgt;
16045+ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
1308ab2a 16046+ }
027c5e7a 16047+
4a4d8108
AM
16048+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
16049+ err = bcpup;
16050+ if (bcpup == bstart)
16051+ goto out; /* success */
4a4d8108
AM
16052+
16053+ /* copyup the new parent into the branch we process */
16054+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
027c5e7a
AM
16055+ if (err >= 0) {
16056+ if (!dentry->d_inode) {
16057+ au_set_h_dptr(dentry, bstart, NULL);
16058+ au_set_dbstart(dentry, bcpup);
16059+ au_set_dbend(dentry, bcpup);
16060+ }
16061+ AuDebugOn(add_entry && !au_h_dptr(dentry, bcpup));
16062+ }
4a4d8108 16063+
4f0767ce 16064+out:
4a4d8108 16065+ dput(parent);
dece6358
AM
16066+ return err;
16067+}
1facf9fc 16068+
1308ab2a 16069+/* ---------------------------------------------------------------------- */
16070+
4a4d8108 16071+struct dentry *au_pinned_h_parent(struct au_pin *pin)
1308ab2a 16072+{
4a4d8108
AM
16073+ if (pin && pin->parent)
16074+ return au_h_dptr(pin->parent, pin->bindex);
16075+ return NULL;
dece6358 16076+}
1facf9fc 16077+
4a4d8108 16078+void au_unpin(struct au_pin *p)
dece6358 16079+{
e49829fe 16080+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
4a4d8108
AM
16081+ mnt_drop_write(p->h_mnt);
16082+ if (!p->hdir)
16083+ return;
1facf9fc 16084+
4a4d8108
AM
16085+ au_hn_imtx_unlock(p->hdir);
16086+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16087+ di_read_unlock(p->parent, AuLock_IR);
16088+ iput(p->hdir->hi_inode);
16089+ dput(p->parent);
16090+ p->parent = NULL;
16091+ p->hdir = NULL;
16092+ p->h_mnt = NULL;
16093+}
1308ab2a 16094+
4a4d8108
AM
16095+int au_do_pin(struct au_pin *p)
16096+{
16097+ int err;
16098+ struct super_block *sb;
16099+ struct dentry *h_dentry, *h_parent;
16100+ struct au_branch *br;
16101+ struct inode *h_dir;
16102+
16103+ err = 0;
16104+ sb = p->dentry->d_sb;
16105+ br = au_sbr(sb, p->bindex);
16106+ if (IS_ROOT(p->dentry)) {
16107+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
16108+ p->h_mnt = br->br_mnt;
16109+ err = mnt_want_write(p->h_mnt);
16110+ if (unlikely(err)) {
16111+ au_fclr_pin(p->flags, MNT_WRITE);
16112+ goto out_err;
16113+ }
16114+ }
dece6358 16115+ goto out;
1facf9fc 16116+ }
16117+
4a4d8108
AM
16118+ h_dentry = NULL;
16119+ if (p->bindex <= au_dbend(p->dentry))
16120+ h_dentry = au_h_dptr(p->dentry, p->bindex);
dece6358 16121+
4a4d8108
AM
16122+ p->parent = dget_parent(p->dentry);
16123+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16124+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
dece6358 16125+
4a4d8108
AM
16126+ h_dir = NULL;
16127+ h_parent = au_h_dptr(p->parent, p->bindex);
16128+ p->hdir = au_hi(p->parent->d_inode, p->bindex);
16129+ if (p->hdir)
16130+ h_dir = p->hdir->hi_inode;
dece6358 16131+
b752ccd1
AM
16132+ /*
16133+ * udba case, or
16134+ * if DI_LOCKED is not set, then p->parent may be different
16135+ * and h_parent can be NULL.
16136+ */
16137+ if (unlikely(!p->hdir || !h_dir || !h_parent)) {
e49829fe 16138+ err = -EBUSY;
4a4d8108
AM
16139+ if (!au_ftest_pin(p->flags, DI_LOCKED))
16140+ di_read_unlock(p->parent, AuLock_IR);
16141+ dput(p->parent);
16142+ p->parent = NULL;
16143+ goto out_err;
16144+ }
1308ab2a 16145+
4a4d8108
AM
16146+ au_igrab(h_dir);
16147+ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
1308ab2a 16148+
4a4d8108
AM
16149+ if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) {
16150+ err = -EBUSY;
16151+ goto out_unpin;
16152+ }
16153+ if (h_dentry) {
16154+ err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
16155+ if (unlikely(err)) {
16156+ au_fclr_pin(p->flags, MNT_WRITE);
16157+ goto out_unpin;
16158+ }
1facf9fc 16159+ }
dece6358 16160+
4a4d8108
AM
16161+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
16162+ p->h_mnt = br->br_mnt;
16163+ err = mnt_want_write(p->h_mnt);
dece6358 16164+ if (unlikely(err)) {
4a4d8108
AM
16165+ au_fclr_pin(p->flags, MNT_WRITE);
16166+ goto out_unpin;
dece6358
AM
16167+ }
16168+ }
4a4d8108
AM
16169+ goto out; /* success */
16170+
4f0767ce 16171+out_unpin:
4a4d8108 16172+ au_unpin(p);
4f0767ce 16173+out_err:
4a4d8108
AM
16174+ pr_err("err %d\n", err);
16175+ err = au_busy_or_stale();
4f0767ce 16176+out:
1facf9fc 16177+ return err;
16178+}
16179+
4a4d8108
AM
16180+void au_pin_init(struct au_pin *p, struct dentry *dentry,
16181+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
16182+ unsigned int udba, unsigned char flags)
16183+{
16184+ p->dentry = dentry;
16185+ p->udba = udba;
16186+ p->lsc_di = lsc_di;
16187+ p->lsc_hi = lsc_hi;
16188+ p->flags = flags;
16189+ p->bindex = bindex;
16190+
16191+ p->parent = NULL;
16192+ p->hdir = NULL;
16193+ p->h_mnt = NULL;
16194+}
16195+
16196+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
16197+ unsigned int udba, unsigned char flags)
16198+{
16199+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
16200+ udba, flags);
16201+ return au_do_pin(pin);
16202+}
16203+
dece6358
AM
16204+/* ---------------------------------------------------------------------- */
16205+
1308ab2a 16206+/*
4a4d8108
AM
16207+ * ->setattr() and ->getattr() are called in various cases.
16208+ * chmod, stat: dentry is revalidated.
16209+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
16210+ * unhashed.
16211+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
1308ab2a 16212+ */
027c5e7a 16213+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
4a4d8108 16214+static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
1facf9fc 16215+{
4a4d8108
AM
16216+ int err;
16217+ struct inode *inode;
16218+ struct dentry *parent;
1facf9fc 16219+
1308ab2a 16220+ err = 0;
4a4d8108 16221+ inode = dentry->d_inode;
027c5e7a 16222+ if (au_digen_test(dentry, sigen)) {
4a4d8108
AM
16223+ parent = dget_parent(dentry);
16224+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 16225+ err = au_refresh_dentry(dentry, parent);
4a4d8108
AM
16226+ di_read_unlock(parent, AuLock_IR);
16227+ dput(parent);
dece6358 16228+ }
1facf9fc 16229+
4a4d8108 16230+ AuTraceErr(err);
1308ab2a 16231+ return err;
16232+}
dece6358 16233+
4a4d8108
AM
16234+#define AuIcpup_DID_CPUP 1
16235+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
7f207e10
AM
16236+#define au_fset_icpup(flags, name) \
16237+ do { (flags) |= AuIcpup_##name; } while (0)
16238+#define au_fclr_icpup(flags, name) \
16239+ do { (flags) &= ~AuIcpup_##name; } while (0)
1308ab2a 16240+
4a4d8108
AM
16241+struct au_icpup_args {
16242+ unsigned char flags;
16243+ unsigned char pin_flags;
16244+ aufs_bindex_t btgt;
16245+ unsigned int udba;
16246+ struct au_pin pin;
16247+ struct path h_path;
16248+ struct inode *h_inode;
16249+};
1308ab2a 16250+
4a4d8108
AM
16251+static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
16252+ struct au_icpup_args *a)
1308ab2a 16253+{
16254+ int err;
4a4d8108 16255+ loff_t sz;
e49829fe 16256+ aufs_bindex_t bstart, ibstart;
4a4d8108
AM
16257+ struct dentry *hi_wh, *parent;
16258+ struct inode *inode;
16259+ struct file *h_file;
16260+ struct au_wr_dir_args wr_dir_args = {
16261+ .force_btgt = -1,
16262+ .flags = 0
16263+ };
16264+
16265+ bstart = au_dbstart(dentry);
16266+ inode = dentry->d_inode;
16267+ if (S_ISDIR(inode->i_mode))
16268+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
16269+ /* plink or hi_wh() case */
e49829fe 16270+ ibstart = au_ibstart(inode);
027c5e7a 16271+ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
e49829fe 16272+ wr_dir_args.force_btgt = ibstart;
4a4d8108
AM
16273+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
16274+ if (unlikely(err < 0))
16275+ goto out;
16276+ a->btgt = err;
16277+ if (err != bstart)
16278+ au_fset_icpup(a->flags, DID_CPUP);
16279+
16280+ err = 0;
16281+ a->pin_flags = AuPin_MNT_WRITE;
16282+ parent = NULL;
16283+ if (!IS_ROOT(dentry)) {
16284+ au_fset_pin(a->pin_flags, DI_LOCKED);
16285+ parent = dget_parent(dentry);
16286+ di_write_lock_parent(parent);
16287+ }
16288+
16289+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
16290+ if (unlikely(err))
16291+ goto out_parent;
16292+
16293+ a->h_path.dentry = au_h_dptr(dentry, bstart);
16294+ a->h_inode = a->h_path.dentry->d_inode;
16295+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
16296+ sz = -1;
16297+ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
16298+ sz = ia->ia_size;
16299+
16300+ h_file = NULL;
16301+ hi_wh = NULL;
027c5e7a 16302+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
4a4d8108
AM
16303+ hi_wh = au_hi_wh(inode, a->btgt);
16304+ if (!hi_wh) {
16305+ err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
16306+ if (unlikely(err))
16307+ goto out_unlock;
16308+ hi_wh = au_hi_wh(inode, a->btgt);
16309+ /* todo: revalidate hi_wh? */
16310+ }
16311+ }
16312+
16313+ if (parent) {
16314+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
16315+ di_downgrade_lock(parent, AuLock_IR);
16316+ dput(parent);
16317+ parent = NULL;
16318+ }
16319+ if (!au_ftest_icpup(a->flags, DID_CPUP))
16320+ goto out; /* success */
16321+
16322+ if (!d_unhashed(dentry)) {
16323+ h_file = au_h_open_pre(dentry, bstart);
16324+ if (IS_ERR(h_file)) {
16325+ err = PTR_ERR(h_file);
16326+ h_file = NULL;
16327+ } else
16328+ err = au_sio_cpup_simple(dentry, a->btgt, sz,
16329+ AuCpup_DTIME);
16330+ if (!err)
16331+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
16332+ } else if (!hi_wh)
16333+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
16334+ else
16335+ a->h_path.dentry = hi_wh; /* do not dget here */
1308ab2a 16336+
4f0767ce 16337+out_unlock:
4a4d8108
AM
16338+ mutex_unlock(&a->h_inode->i_mutex);
16339+ au_h_open_post(dentry, bstart, h_file);
16340+ a->h_inode = a->h_path.dentry->d_inode;
dece6358 16341+ if (!err) {
4a4d8108 16342+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
dece6358 16343+ goto out; /* success */
1facf9fc 16344+ }
dece6358 16345+
4a4d8108 16346+ au_unpin(&a->pin);
4f0767ce 16347+out_parent:
4a4d8108
AM
16348+ if (parent) {
16349+ di_write_unlock(parent);
16350+ dput(parent);
16351+ }
4f0767ce 16352+out:
1facf9fc 16353+ return err;
16354+}
16355+
4a4d8108 16356+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
1facf9fc 16357+{
4a4d8108
AM
16358+ int err;
16359+ struct inode *inode;
16360+ struct super_block *sb;
16361+ struct file *file;
16362+ struct au_icpup_args *a;
1facf9fc 16363+
4a4d8108
AM
16364+ inode = dentry->d_inode;
16365+ IMustLock(inode);
dece6358 16366+
4a4d8108
AM
16367+ err = -ENOMEM;
16368+ a = kzalloc(sizeof(*a), GFP_NOFS);
16369+ if (unlikely(!a))
16370+ goto out;
1facf9fc 16371+
4a4d8108
AM
16372+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
16373+ ia->ia_valid &= ~ATTR_MODE;
dece6358 16374+
4a4d8108
AM
16375+ file = NULL;
16376+ sb = dentry->d_sb;
e49829fe
JR
16377+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
16378+ if (unlikely(err))
16379+ goto out_kfree;
16380+
4a4d8108
AM
16381+ if (ia->ia_valid & ATTR_FILE) {
16382+ /* currently ftruncate(2) only */
16383+ AuDebugOn(!S_ISREG(inode->i_mode));
16384+ file = ia->ia_file;
16385+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
16386+ if (unlikely(err))
16387+ goto out_si;
16388+ ia->ia_file = au_hf_top(file);
16389+ a->udba = AuOpt_UDBA_NONE;
16390+ } else {
16391+ /* fchmod() doesn't pass ia_file */
16392+ a->udba = au_opt_udba(sb);
027c5e7a
AM
16393+ di_write_lock_child(dentry);
16394+ /* no d_unlinked(), to set UDBA_NONE for root */
4a4d8108
AM
16395+ if (d_unhashed(dentry))
16396+ a->udba = AuOpt_UDBA_NONE;
4a4d8108
AM
16397+ if (a->udba != AuOpt_UDBA_NONE) {
16398+ AuDebugOn(IS_ROOT(dentry));
16399+ err = au_reval_for_attr(dentry, au_sigen(sb));
16400+ if (unlikely(err))
16401+ goto out_dentry;
16402+ }
dece6358 16403+ }
dece6358 16404+
4a4d8108
AM
16405+ err = au_pin_and_icpup(dentry, ia, a);
16406+ if (unlikely(err < 0))
16407+ goto out_dentry;
16408+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
16409+ ia->ia_file = NULL;
16410+ ia->ia_valid &= ~ATTR_FILE;
1308ab2a 16411+ }
dece6358 16412+
4a4d8108
AM
16413+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
16414+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
16415+ == (ATTR_MODE | ATTR_CTIME)) {
16416+ err = security_path_chmod(a->h_path.dentry, a->h_path.mnt,
16417+ ia->ia_mode);
16418+ if (unlikely(err))
16419+ goto out_unlock;
16420+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
16421+ && (ia->ia_valid & ATTR_CTIME)) {
16422+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
16423+ if (unlikely(err))
16424+ goto out_unlock;
16425+ }
dece6358 16426+
4a4d8108
AM
16427+ if (ia->ia_valid & ATTR_SIZE) {
16428+ struct file *f;
1308ab2a 16429+
953406b4 16430+ if (ia->ia_size < i_size_read(inode))
4a4d8108 16431+ /* unmap only */
953406b4 16432+ truncate_setsize(inode, ia->ia_size);
1308ab2a 16433+
4a4d8108
AM
16434+ f = NULL;
16435+ if (ia->ia_valid & ATTR_FILE)
16436+ f = ia->ia_file;
16437+ mutex_unlock(&a->h_inode->i_mutex);
16438+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
16439+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
16440+ } else
16441+ err = vfsub_notify_change(&a->h_path, ia);
16442+ if (!err)
16443+ au_cpup_attr_changeable(inode);
1308ab2a 16444+
4f0767ce 16445+out_unlock:
4a4d8108
AM
16446+ mutex_unlock(&a->h_inode->i_mutex);
16447+ au_unpin(&a->pin);
027c5e7a
AM
16448+ if (unlikely(err))
16449+ au_update_dbstart(dentry);
4f0767ce 16450+out_dentry:
4a4d8108
AM
16451+ di_write_unlock(dentry);
16452+ if (file) {
16453+ fi_write_unlock(file);
16454+ ia->ia_file = file;
16455+ ia->ia_valid |= ATTR_FILE;
16456+ }
4f0767ce 16457+out_si:
4a4d8108 16458+ si_read_unlock(sb);
e49829fe 16459+out_kfree:
4a4d8108 16460+ kfree(a);
4f0767ce 16461+out:
4a4d8108
AM
16462+ AuTraceErr(err);
16463+ return err;
1facf9fc 16464+}
16465+
4a4d8108
AM
16466+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
16467+ unsigned int nlink)
1facf9fc 16468+{
4a4d8108
AM
16469+ inode->i_mode = st->mode;
16470+ inode->i_uid = st->uid;
16471+ inode->i_gid = st->gid;
16472+ inode->i_atime = st->atime;
16473+ inode->i_mtime = st->mtime;
16474+ inode->i_ctime = st->ctime;
1facf9fc 16475+
4a4d8108
AM
16476+ au_cpup_attr_nlink(inode, /*force*/0);
16477+ if (S_ISDIR(inode->i_mode)) {
16478+ inode->i_nlink -= nlink;
16479+ inode->i_nlink += st->nlink;
16480+ }
1facf9fc 16481+
4a4d8108
AM
16482+ spin_lock(&inode->i_lock);
16483+ inode->i_blocks = st->blocks;
16484+ i_size_write(inode, st->size);
16485+ spin_unlock(&inode->i_lock);
1facf9fc 16486+}
16487+
4a4d8108
AM
16488+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
16489+ struct dentry *dentry, struct kstat *st)
1facf9fc 16490+{
4a4d8108
AM
16491+ int err;
16492+ unsigned int mnt_flags;
16493+ aufs_bindex_t bindex;
16494+ unsigned char udba_none, positive;
16495+ struct super_block *sb, *h_sb;
16496+ struct inode *inode;
16497+ struct vfsmount *h_mnt;
16498+ struct dentry *h_dentry;
1facf9fc 16499+
4a4d8108
AM
16500+ sb = dentry->d_sb;
16501+ inode = dentry->d_inode;
7f207e10
AM
16502+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
16503+ if (unlikely(err))
16504+ goto out;
4a4d8108
AM
16505+ mnt_flags = au_mntflags(sb);
16506+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
1facf9fc 16507+
4a4d8108 16508+ /* support fstat(2) */
027c5e7a 16509+ if (!d_unlinked(dentry) && !udba_none) {
4a4d8108 16510+ unsigned int sigen = au_sigen(sb);
027c5e7a
AM
16511+ err = au_digen_test(dentry, sigen);
16512+ if (!err) {
4a4d8108 16513+ di_read_lock_child(dentry, AuLock_IR);
027c5e7a
AM
16514+ err = au_dbrange_test(dentry);
16515+ if (unlikely(err))
16516+ goto out_unlock;
16517+ } else {
4a4d8108
AM
16518+ AuDebugOn(IS_ROOT(dentry));
16519+ di_write_lock_child(dentry);
027c5e7a
AM
16520+ err = au_dbrange_test(dentry);
16521+ if (!err)
16522+ err = au_reval_for_attr(dentry, sigen);
4a4d8108
AM
16523+ di_downgrade_lock(dentry, AuLock_IR);
16524+ if (unlikely(err))
7f207e10 16525+ goto out_unlock;
4a4d8108
AM
16526+ }
16527+ } else
16528+ di_read_lock_child(dentry, AuLock_IR);
1facf9fc 16529+
4a4d8108
AM
16530+ bindex = au_ibstart(inode);
16531+ h_mnt = au_sbr_mnt(sb, bindex);
16532+ h_sb = h_mnt->mnt_sb;
16533+ if (!au_test_fs_bad_iattr(h_sb) && udba_none)
16534+ goto out_fill; /* success */
1facf9fc 16535+
4a4d8108
AM
16536+ h_dentry = NULL;
16537+ if (au_dbstart(dentry) == bindex)
16538+ h_dentry = dget(au_h_dptr(dentry, bindex));
16539+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
16540+ h_dentry = au_plink_lkup(inode, bindex);
16541+ if (IS_ERR(h_dentry))
16542+ goto out_fill; /* pretending success */
16543+ }
16544+ /* illegally overlapped or something */
16545+ if (unlikely(!h_dentry))
16546+ goto out_fill; /* pretending success */
16547+
16548+ positive = !!h_dentry->d_inode;
16549+ if (positive)
16550+ err = vfs_getattr(h_mnt, h_dentry, st);
16551+ dput(h_dentry);
16552+ if (!err) {
16553+ if (positive)
16554+ au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
16555+ goto out_fill; /* success */
1facf9fc 16556+ }
7f207e10
AM
16557+ AuTraceErr(err);
16558+ goto out_unlock;
4a4d8108 16559+
4f0767ce 16560+out_fill:
4a4d8108 16561+ generic_fillattr(inode, st);
7f207e10 16562+out_unlock:
4a4d8108
AM
16563+ di_read_unlock(dentry, AuLock_IR);
16564+ si_read_unlock(sb);
7f207e10
AM
16565+out:
16566+ AuTraceErr(err);
4a4d8108 16567+ return err;
1facf9fc 16568+}
16569+
16570+/* ---------------------------------------------------------------------- */
16571+
4a4d8108
AM
16572+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
16573+ int bufsiz)
1facf9fc 16574+{
16575+ int err;
4a4d8108
AM
16576+ struct super_block *sb;
16577+ struct dentry *h_dentry;
1facf9fc 16578+
4a4d8108
AM
16579+ err = -EINVAL;
16580+ h_dentry = au_h_dptr(dentry, bindex);
16581+ if (unlikely(!h_dentry->d_inode->i_op->readlink))
16582+ goto out;
1facf9fc 16583+
4a4d8108
AM
16584+ err = security_inode_readlink(h_dentry);
16585+ if (unlikely(err))
dece6358 16586+ goto out;
1facf9fc 16587+
4a4d8108
AM
16588+ sb = dentry->d_sb;
16589+ if (!au_test_ro(sb, bindex, dentry->d_inode)) {
16590+ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
16591+ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
1facf9fc 16592+ }
4a4d8108 16593+ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
1facf9fc 16594+
4f0767ce 16595+out:
4a4d8108
AM
16596+ return err;
16597+}
1facf9fc 16598+
4a4d8108
AM
16599+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
16600+{
16601+ int err;
1facf9fc 16602+
027c5e7a
AM
16603+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
16604+ if (unlikely(err))
16605+ goto out;
16606+ err = au_d_hashed_positive(dentry);
16607+ if (!err)
16608+ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
4a4d8108 16609+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 16610+
027c5e7a 16611+out:
4a4d8108
AM
16612+ return err;
16613+}
1facf9fc 16614+
4a4d8108
AM
16615+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
16616+{
16617+ int err;
4a4d8108 16618+ mm_segment_t old_fs;
b752ccd1
AM
16619+ union {
16620+ char *k;
16621+ char __user *u;
16622+ } buf;
1facf9fc 16623+
4a4d8108 16624+ err = -ENOMEM;
b752ccd1
AM
16625+ buf.k = __getname_gfp(GFP_NOFS);
16626+ if (unlikely(!buf.k))
4a4d8108 16627+ goto out;
1facf9fc 16628+
027c5e7a
AM
16629+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
16630+ if (unlikely(err))
16631+ goto out_name;
16632+
16633+ err = au_d_hashed_positive(dentry);
16634+ if (!err) {
16635+ old_fs = get_fs();
16636+ set_fs(KERNEL_DS);
16637+ err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX);
16638+ set_fs(old_fs);
16639+ }
4a4d8108 16640+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 16641+
4a4d8108 16642+ if (err >= 0) {
b752ccd1 16643+ buf.k[err] = 0;
4a4d8108 16644+ /* will be freed by put_link */
b752ccd1 16645+ nd_set_link(nd, buf.k);
4a4d8108 16646+ return NULL; /* success */
1308ab2a 16647+ }
1facf9fc 16648+
027c5e7a
AM
16649+out_name:
16650+ __putname(buf.k);
4f0767ce 16651+out:
4a4d8108
AM
16652+ path_put(&nd->path);
16653+ AuTraceErr(err);
16654+ return ERR_PTR(err);
16655+}
1facf9fc 16656+
4a4d8108
AM
16657+static void aufs_put_link(struct dentry *dentry __maybe_unused,
16658+ struct nameidata *nd, void *cookie __maybe_unused)
16659+{
16660+ __putname(nd_get_link(nd));
16661+}
1facf9fc 16662+
4a4d8108 16663+/* ---------------------------------------------------------------------- */
1facf9fc 16664+
4a4d8108
AM
16665+static void aufs_truncate_range(struct inode *inode __maybe_unused,
16666+ loff_t start __maybe_unused,
16667+ loff_t end __maybe_unused)
16668+{
16669+ AuUnsupport();
16670+}
1facf9fc 16671+
4a4d8108 16672+/* ---------------------------------------------------------------------- */
1308ab2a 16673+
4a4d8108
AM
16674+struct inode_operations aufs_symlink_iop = {
16675+ .permission = aufs_permission,
16676+ .setattr = aufs_setattr,
16677+ .getattr = aufs_getattr,
16678+ .readlink = aufs_readlink,
16679+ .follow_link = aufs_follow_link,
16680+ .put_link = aufs_put_link
16681+};
16682+
16683+struct inode_operations aufs_dir_iop = {
16684+ .create = aufs_create,
16685+ .lookup = aufs_lookup,
16686+ .link = aufs_link,
16687+ .unlink = aufs_unlink,
16688+ .symlink = aufs_symlink,
16689+ .mkdir = aufs_mkdir,
16690+ .rmdir = aufs_rmdir,
16691+ .mknod = aufs_mknod,
16692+ .rename = aufs_rename,
16693+
16694+ .permission = aufs_permission,
16695+ .setattr = aufs_setattr,
16696+ .getattr = aufs_getattr
16697+};
16698+
16699+struct inode_operations aufs_iop = {
16700+ .permission = aufs_permission,
16701+ .setattr = aufs_setattr,
16702+ .getattr = aufs_getattr,
16703+ .truncate_range = aufs_truncate_range
16704+};
7f207e10
AM
16705diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
16706--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
16707+++ linux/fs/aufs/i_op_del.c 2011-08-24 13:30:24.731313534 +0200
16708@@ -0,0 +1,478 @@
1facf9fc 16709+/*
027c5e7a 16710+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 16711+ *
16712+ * This program, aufs is free software; you can redistribute it and/or modify
16713+ * it under the terms of the GNU General Public License as published by
16714+ * the Free Software Foundation; either version 2 of the License, or
16715+ * (at your option) any later version.
dece6358
AM
16716+ *
16717+ * This program is distributed in the hope that it will be useful,
16718+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16719+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16720+ * GNU General Public License for more details.
16721+ *
16722+ * You should have received a copy of the GNU General Public License
16723+ * along with this program; if not, write to the Free Software
16724+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 16725+ */
16726+
16727+/*
4a4d8108 16728+ * inode operations (del entry)
1308ab2a 16729+ */
dece6358 16730+
1308ab2a 16731+#include "aufs.h"
dece6358 16732+
4a4d8108
AM
16733+/*
16734+ * decide if a new whiteout for @dentry is necessary or not.
16735+ * when it is necessary, prepare the parent dir for the upper branch whose
16736+ * branch index is @bcpup for creation. the actual creation of the whiteout will
16737+ * be done by caller.
16738+ * return value:
16739+ * 0: wh is unnecessary
16740+ * plus: wh is necessary
16741+ * minus: error
16742+ */
16743+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1308ab2a 16744+{
4a4d8108
AM
16745+ int need_wh, err;
16746+ aufs_bindex_t bstart;
16747+ struct super_block *sb;
dece6358 16748+
4a4d8108
AM
16749+ sb = dentry->d_sb;
16750+ bstart = au_dbstart(dentry);
16751+ if (*bcpup < 0) {
16752+ *bcpup = bstart;
16753+ if (au_test_ro(sb, bstart, dentry->d_inode)) {
16754+ err = AuWbrCopyup(au_sbi(sb), dentry);
16755+ *bcpup = err;
16756+ if (unlikely(err < 0))
16757+ goto out;
16758+ }
16759+ } else
16760+ AuDebugOn(bstart < *bcpup
16761+ || au_test_ro(sb, *bcpup, dentry->d_inode));
16762+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
1308ab2a 16763+
4a4d8108
AM
16764+ if (*bcpup != bstart) {
16765+ err = au_cpup_dirs(dentry, *bcpup);
16766+ if (unlikely(err))
16767+ goto out;
16768+ need_wh = 1;
16769+ } else {
027c5e7a 16770+ struct au_dinfo *dinfo, *tmp;
4a4d8108 16771+
027c5e7a
AM
16772+ need_wh = -ENOMEM;
16773+ dinfo = au_di(dentry);
16774+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
16775+ if (tmp) {
16776+ au_di_cp(tmp, dinfo);
16777+ au_di_swap(tmp, dinfo);
16778+ /* returns the number of positive dentries */
16779+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
16780+ /*nd*/NULL);
16781+ au_di_swap(tmp, dinfo);
16782+ au_rw_write_unlock(&tmp->di_rwsem);
16783+ au_di_free(tmp);
4a4d8108
AM
16784+ }
16785+ }
16786+ AuDbg("need_wh %d\n", need_wh);
16787+ err = need_wh;
16788+
4f0767ce 16789+out:
4a4d8108 16790+ return err;
1facf9fc 16791+}
16792+
4a4d8108
AM
16793+/*
16794+ * simple tests for the del-entry operations.
16795+ * following the checks in vfs, plus the parent-child relationship.
16796+ */
16797+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
16798+ struct dentry *h_parent, int isdir)
1facf9fc 16799+{
4a4d8108
AM
16800+ int err;
16801+ umode_t h_mode;
16802+ struct dentry *h_dentry, *h_latest;
1308ab2a 16803+ struct inode *h_inode;
1facf9fc 16804+
4a4d8108
AM
16805+ h_dentry = au_h_dptr(dentry, bindex);
16806+ h_inode = h_dentry->d_inode;
16807+ if (dentry->d_inode) {
16808+ err = -ENOENT;
16809+ if (unlikely(!h_inode || !h_inode->i_nlink))
16810+ goto out;
1facf9fc 16811+
4a4d8108
AM
16812+ h_mode = h_inode->i_mode;
16813+ if (!isdir) {
16814+ err = -EISDIR;
16815+ if (unlikely(S_ISDIR(h_mode)))
16816+ goto out;
16817+ } else if (unlikely(!S_ISDIR(h_mode))) {
16818+ err = -ENOTDIR;
16819+ goto out;
16820+ }
16821+ } else {
16822+ /* rename(2) case */
16823+ err = -EIO;
16824+ if (unlikely(h_inode))
16825+ goto out;
16826+ }
1facf9fc 16827+
4a4d8108
AM
16828+ err = -ENOENT;
16829+ /* expected parent dir is locked */
16830+ if (unlikely(h_parent != h_dentry->d_parent))
16831+ goto out;
16832+ err = 0;
16833+
16834+ /*
16835+ * rmdir a dir may break the consistency on some filesystem.
16836+ * let's try heavy test.
16837+ */
16838+ err = -EACCES;
16839+ if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
16840+ goto out;
16841+
16842+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
16843+ au_sbr(dentry->d_sb, bindex));
16844+ err = -EIO;
16845+ if (IS_ERR(h_latest))
16846+ goto out;
16847+ if (h_latest == h_dentry)
16848+ err = 0;
16849+ dput(h_latest);
16850+
4f0767ce 16851+out:
4a4d8108 16852+ return err;
1308ab2a 16853+}
1facf9fc 16854+
4a4d8108
AM
16855+/*
16856+ * decide the branch where we operate for @dentry. the branch index will be set
16857+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
16858+ * dir for reverting.
16859+ * when a new whiteout is necessary, create it.
16860+ */
16861+static struct dentry*
16862+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
16863+ struct au_dtime *dt, struct au_pin *pin)
1308ab2a 16864+{
4a4d8108
AM
16865+ struct dentry *wh_dentry;
16866+ struct super_block *sb;
16867+ struct path h_path;
16868+ int err, need_wh;
16869+ unsigned int udba;
16870+ aufs_bindex_t bcpup;
dece6358 16871+
4a4d8108
AM
16872+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
16873+ wh_dentry = ERR_PTR(need_wh);
16874+ if (unlikely(need_wh < 0))
16875+ goto out;
16876+
16877+ sb = dentry->d_sb;
16878+ udba = au_opt_udba(sb);
16879+ bcpup = *rbcpup;
16880+ err = au_pin(pin, dentry, bcpup, udba,
16881+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
16882+ wh_dentry = ERR_PTR(err);
16883+ if (unlikely(err))
16884+ goto out;
16885+
16886+ h_path.dentry = au_pinned_h_parent(pin);
16887+ if (udba != AuOpt_UDBA_NONE
16888+ && au_dbstart(dentry) == bcpup) {
16889+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
16890+ wh_dentry = ERR_PTR(err);
16891+ if (unlikely(err))
16892+ goto out_unpin;
16893+ }
16894+
16895+ h_path.mnt = au_sbr_mnt(sb, bcpup);
16896+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
16897+ wh_dentry = NULL;
16898+ if (!need_wh)
16899+ goto out; /* success, no need to create whiteout */
16900+
16901+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
16902+ if (IS_ERR(wh_dentry))
16903+ goto out_unpin;
16904+
16905+ /* returns with the parent is locked and wh_dentry is dget-ed */
16906+ goto out; /* success */
16907+
4f0767ce 16908+out_unpin:
4a4d8108 16909+ au_unpin(pin);
4f0767ce 16910+out:
4a4d8108 16911+ return wh_dentry;
1facf9fc 16912+}
16913+
4a4d8108
AM
16914+/*
16915+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
16916+ * in order to be revertible and save time for removing many child whiteouts
16917+ * under the dir.
16918+ * returns 1 when there are too many child whiteout and caller should remove
16919+ * them asynchronously. returns 0 when the number of children is enough small to
16920+ * remove now or the branch fs is a remote fs.
16921+ * otherwise return an error.
16922+ */
16923+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
16924+ struct au_nhash *whlist, struct inode *dir)
1facf9fc 16925+{
4a4d8108
AM
16926+ int rmdir_later, err, dirwh;
16927+ struct dentry *h_dentry;
16928+ struct super_block *sb;
16929+
16930+ sb = dentry->d_sb;
16931+ SiMustAnyLock(sb);
16932+ h_dentry = au_h_dptr(dentry, bindex);
16933+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
16934+ if (unlikely(err))
16935+ goto out;
16936+
16937+ /* stop monitoring */
16938+ au_hn_free(au_hi(dentry->d_inode, bindex));
16939+
16940+ if (!au_test_fs_remote(h_dentry->d_sb)) {
16941+ dirwh = au_sbi(sb)->si_dirwh;
16942+ rmdir_later = (dirwh <= 1);
16943+ if (!rmdir_later)
16944+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
16945+ dirwh);
16946+ if (rmdir_later)
16947+ return rmdir_later;
16948+ }
1facf9fc 16949+
4a4d8108
AM
16950+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
16951+ if (unlikely(err)) {
16952+ AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
16953+ AuDLNPair(h_dentry), bindex, err);
16954+ err = 0;
16955+ }
dece6358 16956+
4f0767ce 16957+out:
4a4d8108
AM
16958+ AuTraceErr(err);
16959+ return err;
16960+}
1308ab2a 16961+
4a4d8108
AM
16962+/*
16963+ * final procedure for deleting a entry.
16964+ * maintain dentry and iattr.
16965+ */
16966+static void epilog(struct inode *dir, struct dentry *dentry,
16967+ aufs_bindex_t bindex)
16968+{
16969+ struct inode *inode;
1308ab2a 16970+
4a4d8108
AM
16971+ inode = dentry->d_inode;
16972+ d_drop(dentry);
16973+ inode->i_ctime = dir->i_ctime;
1308ab2a 16974+
4a4d8108
AM
16975+ if (au_ibstart(dir) == bindex)
16976+ au_cpup_attr_timesizes(dir);
16977+ dir->i_version++;
1facf9fc 16978+}
16979+
4a4d8108
AM
16980+/*
16981+ * when an error happened, remove the created whiteout and revert everything.
16982+ */
7f207e10
AM
16983+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
16984+ aufs_bindex_t bwh, struct dentry *wh_dentry,
16985+ struct dentry *dentry, struct au_dtime *dt)
1facf9fc 16986+{
4a4d8108
AM
16987+ int rerr;
16988+ struct path h_path = {
16989+ .dentry = wh_dentry,
7f207e10 16990+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
4a4d8108 16991+ };
dece6358 16992+
7f207e10 16993+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
4a4d8108
AM
16994+ if (!rerr) {
16995+ au_set_dbwh(dentry, bwh);
16996+ au_dtime_revert(dt);
16997+ return 0;
16998+ }
dece6358 16999+
4a4d8108
AM
17000+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
17001+ AuDLNPair(dentry), err, rerr);
17002+ return -EIO;
1facf9fc 17003+}
17004+
4a4d8108 17005+/* ---------------------------------------------------------------------- */
1facf9fc 17006+
4a4d8108 17007+int aufs_unlink(struct inode *dir, struct dentry *dentry)
1308ab2a 17008+{
4a4d8108
AM
17009+ int err;
17010+ aufs_bindex_t bwh, bindex, bstart;
17011+ struct au_dtime dt;
17012+ struct au_pin pin;
17013+ struct path h_path;
17014+ struct inode *inode, *h_dir;
17015+ struct dentry *parent, *wh_dentry;
1facf9fc 17016+
4a4d8108 17017+ IMustLock(dir);
027c5e7a
AM
17018+
17019+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
17020+ if (unlikely(err))
17021+ goto out;
17022+ err = au_d_hashed_positive(dentry);
17023+ if (unlikely(err))
17024+ goto out_unlock;
4a4d8108 17025+ inode = dentry->d_inode;
4a4d8108 17026+ IMustLock(inode);
027c5e7a
AM
17027+ err = -EISDIR;
17028+ if (unlikely(S_ISDIR(inode->i_mode)))
17029+ goto out_unlock; /* possible? */
1facf9fc 17030+
4a4d8108
AM
17031+ bstart = au_dbstart(dentry);
17032+ bwh = au_dbwh(dentry);
17033+ bindex = -1;
027c5e7a
AM
17034+ parent = dentry->d_parent; /* dir inode is locked */
17035+ di_write_lock_parent(parent);
4a4d8108
AM
17036+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
17037+ err = PTR_ERR(wh_dentry);
17038+ if (IS_ERR(wh_dentry))
027c5e7a 17039+ goto out_parent;
1facf9fc 17040+
4a4d8108
AM
17041+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
17042+ h_path.dentry = au_h_dptr(dentry, bstart);
17043+ dget(h_path.dentry);
17044+ if (bindex == bstart) {
17045+ h_dir = au_pinned_h_dir(&pin);
17046+ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
17047+ } else {
17048+ /* dir inode is locked */
17049+ h_dir = wh_dentry->d_parent->d_inode;
17050+ IMustLock(h_dir);
17051+ err = 0;
17052+ }
dece6358 17053+
4a4d8108 17054+ if (!err) {
7f207e10 17055+ vfsub_drop_nlink(inode);
4a4d8108
AM
17056+ epilog(dir, dentry, bindex);
17057+
17058+ /* update target timestamps */
17059+ if (bindex == bstart) {
17060+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
17061+ inode->i_ctime = h_path.dentry->d_inode->i_ctime;
17062+ } else
17063+ /* todo: this timestamp may be reverted later */
17064+ inode->i_ctime = h_dir->i_ctime;
027c5e7a 17065+ goto out_unpin; /* success */
1facf9fc 17066+ }
17067+
4a4d8108
AM
17068+ /* revert */
17069+ if (wh_dentry) {
17070+ int rerr;
17071+
7f207e10 17072+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
4a4d8108
AM
17073+ if (rerr)
17074+ err = rerr;
dece6358 17075+ }
1facf9fc 17076+
027c5e7a 17077+out_unpin:
4a4d8108
AM
17078+ au_unpin(&pin);
17079+ dput(wh_dentry);
17080+ dput(h_path.dentry);
027c5e7a 17081+out_parent:
4a4d8108 17082+ di_write_unlock(parent);
027c5e7a 17083+out_unlock:
4a4d8108 17084+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 17085+out:
4a4d8108 17086+ return err;
dece6358
AM
17087+}
17088+
4a4d8108 17089+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
1308ab2a 17090+{
4a4d8108
AM
17091+ int err, rmdir_later;
17092+ aufs_bindex_t bwh, bindex, bstart;
17093+ struct au_dtime dt;
17094+ struct au_pin pin;
17095+ struct inode *inode;
17096+ struct dentry *parent, *wh_dentry, *h_dentry;
17097+ struct au_whtmp_rmdir *args;
1facf9fc 17098+
4a4d8108 17099+ IMustLock(dir);
027c5e7a
AM
17100+
17101+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
17102+ if (unlikely(err))
4a4d8108 17103+ goto out;
53392da6
AM
17104+ err = au_alive_dir(dentry);
17105+ if (unlikely(err))
027c5e7a 17106+ goto out_unlock;
53392da6 17107+ inode = dentry->d_inode;
4a4d8108 17108+ IMustLock(inode);
027c5e7a
AM
17109+ err = -ENOTDIR;
17110+ if (unlikely(!S_ISDIR(inode->i_mode)))
17111+ goto out_unlock; /* possible? */
dece6358 17112+
4a4d8108
AM
17113+ err = -ENOMEM;
17114+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
17115+ if (unlikely(!args))
17116+ goto out_unlock;
dece6358 17117+
4a4d8108
AM
17118+ parent = dentry->d_parent; /* dir inode is locked */
17119+ di_write_lock_parent(parent);
17120+ err = au_test_empty(dentry, &args->whlist);
17121+ if (unlikely(err))
027c5e7a 17122+ goto out_parent;
1facf9fc 17123+
4a4d8108
AM
17124+ bstart = au_dbstart(dentry);
17125+ bwh = au_dbwh(dentry);
17126+ bindex = -1;
17127+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
17128+ err = PTR_ERR(wh_dentry);
17129+ if (IS_ERR(wh_dentry))
027c5e7a 17130+ goto out_parent;
1facf9fc 17131+
4a4d8108
AM
17132+ h_dentry = au_h_dptr(dentry, bstart);
17133+ dget(h_dentry);
17134+ rmdir_later = 0;
17135+ if (bindex == bstart) {
17136+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
17137+ if (err > 0) {
17138+ rmdir_later = err;
17139+ err = 0;
17140+ }
17141+ } else {
17142+ /* stop monitoring */
17143+ au_hn_free(au_hi(inode, bstart));
17144+
17145+ /* dir inode is locked */
17146+ IMustLock(wh_dentry->d_parent->d_inode);
1facf9fc 17147+ err = 0;
17148+ }
17149+
4a4d8108 17150+ if (!err) {
027c5e7a 17151+ vfsub_dead_dir(inode);
4a4d8108
AM
17152+ au_set_dbdiropq(dentry, -1);
17153+ epilog(dir, dentry, bindex);
1308ab2a 17154+
4a4d8108
AM
17155+ if (rmdir_later) {
17156+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
17157+ args = NULL;
17158+ }
1308ab2a 17159+
4a4d8108 17160+ goto out_unpin; /* success */
1facf9fc 17161+ }
17162+
4a4d8108
AM
17163+ /* revert */
17164+ AuLabel(revert);
17165+ if (wh_dentry) {
17166+ int rerr;
1308ab2a 17167+
7f207e10 17168+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
4a4d8108
AM
17169+ if (rerr)
17170+ err = rerr;
1facf9fc 17171+ }
17172+
4f0767ce 17173+out_unpin:
4a4d8108
AM
17174+ au_unpin(&pin);
17175+ dput(wh_dentry);
17176+ dput(h_dentry);
027c5e7a 17177+out_parent:
4a4d8108
AM
17178+ di_write_unlock(parent);
17179+ if (args)
17180+ au_whtmp_rmdir_free(args);
4f0767ce 17181+out_unlock:
4a4d8108 17182+ aufs_read_unlock(dentry, AuLock_DW);
4f0767ce 17183+out:
4a4d8108
AM
17184+ AuTraceErr(err);
17185+ return err;
dece6358 17186+}
7f207e10
AM
17187diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
17188--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
53392da6 17189+++ linux/fs/aufs/i_op_ren.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 17190@@ -0,0 +1,1017 @@
1facf9fc 17191+/*
027c5e7a 17192+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 17193+ *
17194+ * This program, aufs is free software; you can redistribute it and/or modify
17195+ * it under the terms of the GNU General Public License as published by
17196+ * the Free Software Foundation; either version 2 of the License, or
17197+ * (at your option) any later version.
dece6358
AM
17198+ *
17199+ * This program is distributed in the hope that it will be useful,
17200+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17201+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17202+ * GNU General Public License for more details.
17203+ *
17204+ * You should have received a copy of the GNU General Public License
17205+ * along with this program; if not, write to the Free Software
17206+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 17207+ */
17208+
17209+/*
4a4d8108
AM
17210+ * inode operation (rename entry)
17211+ * todo: this is crazy monster
1facf9fc 17212+ */
17213+
17214+#include "aufs.h"
17215+
4a4d8108
AM
17216+enum { AuSRC, AuDST, AuSrcDst };
17217+enum { AuPARENT, AuCHILD, AuParentChild };
1facf9fc 17218+
4a4d8108
AM
17219+#define AuRen_ISDIR 1
17220+#define AuRen_ISSAMEDIR (1 << 1)
17221+#define AuRen_WHSRC (1 << 2)
17222+#define AuRen_WHDST (1 << 3)
17223+#define AuRen_MNT_WRITE (1 << 4)
17224+#define AuRen_DT_DSTDIR (1 << 5)
17225+#define AuRen_DIROPQ (1 << 6)
17226+#define AuRen_CPUP (1 << 7)
17227+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
7f207e10
AM
17228+#define au_fset_ren(flags, name) \
17229+ do { (flags) |= AuRen_##name; } while (0)
17230+#define au_fclr_ren(flags, name) \
17231+ do { (flags) &= ~AuRen_##name; } while (0)
1facf9fc 17232+
4a4d8108
AM
17233+struct au_ren_args {
17234+ struct {
17235+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
17236+ *wh_dentry;
17237+ struct inode *dir, *inode;
17238+ struct au_hinode *hdir;
17239+ struct au_dtime dt[AuParentChild];
17240+ aufs_bindex_t bstart;
17241+ } sd[AuSrcDst];
1facf9fc 17242+
4a4d8108
AM
17243+#define src_dentry sd[AuSRC].dentry
17244+#define src_dir sd[AuSRC].dir
17245+#define src_inode sd[AuSRC].inode
17246+#define src_h_dentry sd[AuSRC].h_dentry
17247+#define src_parent sd[AuSRC].parent
17248+#define src_h_parent sd[AuSRC].h_parent
17249+#define src_wh_dentry sd[AuSRC].wh_dentry
17250+#define src_hdir sd[AuSRC].hdir
17251+#define src_h_dir sd[AuSRC].hdir->hi_inode
17252+#define src_dt sd[AuSRC].dt
17253+#define src_bstart sd[AuSRC].bstart
1facf9fc 17254+
4a4d8108
AM
17255+#define dst_dentry sd[AuDST].dentry
17256+#define dst_dir sd[AuDST].dir
17257+#define dst_inode sd[AuDST].inode
17258+#define dst_h_dentry sd[AuDST].h_dentry
17259+#define dst_parent sd[AuDST].parent
17260+#define dst_h_parent sd[AuDST].h_parent
17261+#define dst_wh_dentry sd[AuDST].wh_dentry
17262+#define dst_hdir sd[AuDST].hdir
17263+#define dst_h_dir sd[AuDST].hdir->hi_inode
17264+#define dst_dt sd[AuDST].dt
17265+#define dst_bstart sd[AuDST].bstart
17266+
17267+ struct dentry *h_trap;
17268+ struct au_branch *br;
17269+ struct au_hinode *src_hinode;
17270+ struct path h_path;
17271+ struct au_nhash whlist;
027c5e7a 17272+ aufs_bindex_t btgt, src_bwh, src_bdiropq;
1facf9fc 17273+
1308ab2a 17274+ unsigned int flags;
1facf9fc 17275+
4a4d8108
AM
17276+ struct au_whtmp_rmdir *thargs;
17277+ struct dentry *h_dst;
17278+};
1308ab2a 17279+
4a4d8108 17280+/* ---------------------------------------------------------------------- */
1308ab2a 17281+
4a4d8108
AM
17282+/*
17283+ * functions for reverting.
17284+ * when an error happened in a single rename systemcall, we should revert
17285+ * everything as if nothing happend.
17286+ * we don't need to revert the copied-up/down the parent dir since they are
17287+ * harmless.
17288+ */
1facf9fc 17289+
4a4d8108
AM
17290+#define RevertFailure(fmt, ...) do { \
17291+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
17292+ ##__VA_ARGS__, err, rerr); \
17293+ err = -EIO; \
17294+} while (0)
1facf9fc 17295+
4a4d8108 17296+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
1facf9fc 17297+{
4a4d8108 17298+ int rerr;
1facf9fc 17299+
4a4d8108
AM
17300+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
17301+ rerr = au_diropq_remove(a->src_dentry, a->btgt);
17302+ au_hn_imtx_unlock(a->src_hinode);
027c5e7a 17303+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
4a4d8108
AM
17304+ if (rerr)
17305+ RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
17306+}
1facf9fc 17307+
4a4d8108
AM
17308+static void au_ren_rev_rename(int err, struct au_ren_args *a)
17309+{
17310+ int rerr;
1facf9fc 17311+
4a4d8108
AM
17312+ a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
17313+ a->br, /*nd*/NULL);
17314+ rerr = PTR_ERR(a->h_path.dentry);
17315+ if (IS_ERR(a->h_path.dentry)) {
17316+ RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
17317+ return;
1facf9fc 17318+ }
17319+
4a4d8108
AM
17320+ rerr = vfsub_rename(a->dst_h_dir,
17321+ au_h_dptr(a->src_dentry, a->btgt),
17322+ a->src_h_dir, &a->h_path);
17323+ d_drop(a->h_path.dentry);
17324+ dput(a->h_path.dentry);
17325+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
17326+ if (rerr)
17327+ RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
1facf9fc 17328+}
17329+
4a4d8108 17330+static void au_ren_rev_cpup(int err, struct au_ren_args *a)
1facf9fc 17331+{
4a4d8108 17332+ int rerr;
1facf9fc 17333+
4a4d8108
AM
17334+ a->h_path.dentry = a->dst_h_dentry;
17335+ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
17336+ au_set_h_dptr(a->src_dentry, a->btgt, NULL);
17337+ au_set_dbstart(a->src_dentry, a->src_bstart);
17338+ if (rerr)
17339+ RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
1facf9fc 17340+}
17341+
4a4d8108 17342+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
1facf9fc 17343+{
4a4d8108 17344+ int rerr;
dece6358 17345+
4a4d8108
AM
17346+ a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
17347+ a->br, /*nd*/NULL);
17348+ rerr = PTR_ERR(a->h_path.dentry);
17349+ if (IS_ERR(a->h_path.dentry)) {
17350+ RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
17351+ return;
17352+ }
17353+ if (a->h_path.dentry->d_inode) {
17354+ d_drop(a->h_path.dentry);
17355+ dput(a->h_path.dentry);
17356+ return;
dece6358
AM
17357+ }
17358+
4a4d8108
AM
17359+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
17360+ d_drop(a->h_path.dentry);
17361+ dput(a->h_path.dentry);
17362+ if (!rerr)
17363+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
17364+ else
17365+ RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
17366+}
1308ab2a 17367+
4a4d8108
AM
17368+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
17369+{
17370+ int rerr;
1308ab2a 17371+
4a4d8108
AM
17372+ a->h_path.dentry = a->src_wh_dentry;
17373+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
027c5e7a 17374+ au_set_dbwh(a->src_dentry, a->src_bwh);
4a4d8108
AM
17375+ if (rerr)
17376+ RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
17377+}
4a4d8108 17378+#undef RevertFailure
1facf9fc 17379+
1308ab2a 17380+/* ---------------------------------------------------------------------- */
17381+
4a4d8108
AM
17382+/*
17383+ * when we have to copyup the renaming entry, do it with the rename-target name
17384+ * in order to minimize the cost (the later actual rename is unnecessary).
17385+ * otherwise rename it on the target branch.
17386+ */
17387+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 17388+{
dece6358 17389+ int err;
4a4d8108 17390+ struct dentry *d;
1facf9fc 17391+
4a4d8108
AM
17392+ d = a->src_dentry;
17393+ if (au_dbstart(d) == a->btgt) {
17394+ a->h_path.dentry = a->dst_h_dentry;
17395+ if (au_ftest_ren(a->flags, DIROPQ)
17396+ && au_dbdiropq(d) == a->btgt)
17397+ au_fclr_ren(a->flags, DIROPQ);
17398+ AuDebugOn(au_dbstart(d) != a->btgt);
17399+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
17400+ a->dst_h_dir, &a->h_path);
17401+ } else {
17402+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
17403+ struct file *h_file;
1308ab2a 17404+
4a4d8108
AM
17405+ au_fset_ren(a->flags, CPUP);
17406+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
17407+ au_set_dbstart(d, a->btgt);
17408+ au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
17409+ h_file = au_h_open_pre(d, a->src_bstart);
17410+ if (IS_ERR(h_file)) {
17411+ err = PTR_ERR(h_file);
17412+ h_file = NULL;
17413+ } else
17414+ err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
17415+ !AuCpup_DTIME, a->dst_parent);
17416+ mutex_unlock(h_mtx);
17417+ au_h_open_post(d, a->src_bstart, h_file);
17418+ if (!err) {
17419+ d = a->dst_dentry;
17420+ au_set_h_dptr(d, a->btgt, NULL);
17421+ au_update_dbstart(d);
17422+ } else {
17423+ au_set_h_dptr(d, a->btgt, NULL);
17424+ au_set_dbstart(d, a->src_bstart);
17425+ }
1308ab2a 17426+ }
027c5e7a
AM
17427+ if (!err && a->h_dst)
17428+ /* it will be set to dinfo later */
17429+ dget(a->h_dst);
1facf9fc 17430+
dece6358
AM
17431+ return err;
17432+}
1facf9fc 17433+
4a4d8108
AM
17434+/* cf. aufs_rmdir() */
17435+static int au_ren_del_whtmp(struct au_ren_args *a)
dece6358 17436+{
4a4d8108
AM
17437+ int err;
17438+ struct inode *dir;
1facf9fc 17439+
4a4d8108
AM
17440+ dir = a->dst_dir;
17441+ SiMustAnyLock(dir->i_sb);
17442+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
17443+ au_sbi(dir->i_sb)->si_dirwh)
17444+ || au_test_fs_remote(a->h_dst->d_sb)) {
17445+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
17446+ if (unlikely(err))
17447+ pr_warning("failed removing whtmp dir %.*s (%d), "
17448+ "ignored.\n", AuDLNPair(a->h_dst), err);
17449+ } else {
17450+ au_nhash_wh_free(&a->thargs->whlist);
17451+ a->thargs->whlist = a->whlist;
17452+ a->whlist.nh_num = 0;
17453+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
17454+ dput(a->h_dst);
17455+ a->thargs = NULL;
17456+ }
17457+
17458+ return 0;
1308ab2a 17459+}
1facf9fc 17460+
4a4d8108
AM
17461+/* make it 'opaque' dir. */
17462+static int au_ren_diropq(struct au_ren_args *a)
17463+{
17464+ int err;
17465+ struct dentry *diropq;
1facf9fc 17466+
4a4d8108 17467+ err = 0;
027c5e7a 17468+ a->src_bdiropq = au_dbdiropq(a->src_dentry);
4a4d8108
AM
17469+ a->src_hinode = au_hi(a->src_inode, a->btgt);
17470+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
17471+ diropq = au_diropq_create(a->src_dentry, a->btgt);
17472+ au_hn_imtx_unlock(a->src_hinode);
17473+ if (IS_ERR(diropq))
17474+ err = PTR_ERR(diropq);
17475+ dput(diropq);
1facf9fc 17476+
4a4d8108
AM
17477+ return err;
17478+}
1facf9fc 17479+
4a4d8108
AM
17480+static int do_rename(struct au_ren_args *a)
17481+{
17482+ int err;
17483+ struct dentry *d, *h_d;
1facf9fc 17484+
4a4d8108
AM
17485+ /* prepare workqueue args for asynchronous rmdir */
17486+ h_d = a->dst_h_dentry;
17487+ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
17488+ err = -ENOMEM;
17489+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
17490+ if (unlikely(!a->thargs))
17491+ goto out;
17492+ a->h_dst = dget(h_d);
17493+ }
1facf9fc 17494+
4a4d8108
AM
17495+ /* create whiteout for src_dentry */
17496+ if (au_ftest_ren(a->flags, WHSRC)) {
027c5e7a
AM
17497+ a->src_bwh = au_dbwh(a->src_dentry);
17498+ AuDebugOn(a->src_bwh >= 0);
4a4d8108
AM
17499+ a->src_wh_dentry
17500+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
17501+ err = PTR_ERR(a->src_wh_dentry);
17502+ if (IS_ERR(a->src_wh_dentry))
17503+ goto out_thargs;
17504+ }
1facf9fc 17505+
4a4d8108
AM
17506+ /* lookup whiteout for dentry */
17507+ if (au_ftest_ren(a->flags, WHDST)) {
17508+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
17509+ a->br);
17510+ err = PTR_ERR(h_d);
17511+ if (IS_ERR(h_d))
17512+ goto out_whsrc;
17513+ if (!h_d->d_inode)
17514+ dput(h_d);
17515+ else
17516+ a->dst_wh_dentry = h_d;
17517+ }
1facf9fc 17518+
4a4d8108
AM
17519+ /* rename dentry to tmpwh */
17520+ if (a->thargs) {
17521+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
17522+ if (unlikely(err))
17523+ goto out_whdst;
dece6358 17524+
4a4d8108
AM
17525+ d = a->dst_dentry;
17526+ au_set_h_dptr(d, a->btgt, NULL);
17527+ err = au_lkup_neg(d, a->btgt);
17528+ if (unlikely(err))
17529+ goto out_whtmp;
17530+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
17531+ }
1facf9fc 17532+
4a4d8108
AM
17533+ /* cpup src */
17534+ if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
17535+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
17536+ struct file *h_file;
1facf9fc 17537+
4a4d8108
AM
17538+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
17539+ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
17540+ h_file = au_h_open_pre(a->src_dentry, a->src_bstart);
17541+ if (IS_ERR(h_file)) {
17542+ err = PTR_ERR(h_file);
17543+ h_file = NULL;
17544+ } else
17545+ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
17546+ !AuCpup_DTIME);
17547+ mutex_unlock(h_mtx);
17548+ au_h_open_post(a->src_dentry, a->src_bstart, h_file);
17549+ if (unlikely(err))
17550+ goto out_whtmp;
17551+ }
1facf9fc 17552+
4a4d8108
AM
17553+ /* rename by vfs_rename or cpup */
17554+ d = a->dst_dentry;
17555+ if (au_ftest_ren(a->flags, ISDIR)
17556+ && (a->dst_wh_dentry
17557+ || au_dbdiropq(d) == a->btgt
17558+ /* hide the lower to keep xino */
17559+ || a->btgt < au_dbend(d)
17560+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
17561+ au_fset_ren(a->flags, DIROPQ);
17562+ err = au_ren_or_cpup(a);
17563+ if (unlikely(err))
17564+ /* leave the copied-up one */
17565+ goto out_whtmp;
1308ab2a 17566+
4a4d8108
AM
17567+ /* make dir opaque */
17568+ if (au_ftest_ren(a->flags, DIROPQ)) {
17569+ err = au_ren_diropq(a);
17570+ if (unlikely(err))
17571+ goto out_rename;
17572+ }
1308ab2a 17573+
4a4d8108
AM
17574+ /* update target timestamps */
17575+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
17576+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
17577+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
17578+ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
1facf9fc 17579+
4a4d8108
AM
17580+ /* remove whiteout for dentry */
17581+ if (a->dst_wh_dentry) {
17582+ a->h_path.dentry = a->dst_wh_dentry;
17583+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
17584+ a->dst_dentry);
17585+ if (unlikely(err))
17586+ goto out_diropq;
17587+ }
1facf9fc 17588+
4a4d8108
AM
17589+ /* remove whtmp */
17590+ if (a->thargs)
17591+ au_ren_del_whtmp(a); /* ignore this error */
1308ab2a 17592+
4a4d8108
AM
17593+ err = 0;
17594+ goto out_success;
17595+
4f0767ce 17596+out_diropq:
4a4d8108
AM
17597+ if (au_ftest_ren(a->flags, DIROPQ))
17598+ au_ren_rev_diropq(err, a);
4f0767ce 17599+out_rename:
4a4d8108
AM
17600+ if (!au_ftest_ren(a->flags, CPUP))
17601+ au_ren_rev_rename(err, a);
17602+ else
17603+ au_ren_rev_cpup(err, a);
027c5e7a 17604+ dput(a->h_dst);
4f0767ce 17605+out_whtmp:
4a4d8108
AM
17606+ if (a->thargs)
17607+ au_ren_rev_whtmp(err, a);
4f0767ce 17608+out_whdst:
4a4d8108
AM
17609+ dput(a->dst_wh_dentry);
17610+ a->dst_wh_dentry = NULL;
4f0767ce 17611+out_whsrc:
4a4d8108
AM
17612+ if (a->src_wh_dentry)
17613+ au_ren_rev_whsrc(err, a);
4f0767ce 17614+out_success:
4a4d8108
AM
17615+ dput(a->src_wh_dentry);
17616+ dput(a->dst_wh_dentry);
4f0767ce 17617+out_thargs:
4a4d8108
AM
17618+ if (a->thargs) {
17619+ dput(a->h_dst);
17620+ au_whtmp_rmdir_free(a->thargs);
17621+ a->thargs = NULL;
17622+ }
4f0767ce 17623+out:
4a4d8108 17624+ return err;
dece6358 17625+}
1facf9fc 17626+
1308ab2a 17627+/* ---------------------------------------------------------------------- */
1facf9fc 17628+
4a4d8108
AM
17629+/*
17630+ * test if @dentry dir can be rename destination or not.
17631+ * success means, it is a logically empty dir.
17632+ */
17633+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
1308ab2a 17634+{
4a4d8108 17635+ return au_test_empty(dentry, whlist);
1308ab2a 17636+}
1facf9fc 17637+
4a4d8108
AM
17638+/*
17639+ * test if @dentry dir can be rename source or not.
17640+ * if it can, return 0 and @children is filled.
17641+ * success means,
17642+ * - it is a logically empty dir.
17643+ * - or, it exists on writable branch and has no children including whiteouts
17644+ * on the lower branch.
17645+ */
17646+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
17647+{
17648+ int err;
17649+ unsigned int rdhash;
17650+ aufs_bindex_t bstart;
1facf9fc 17651+
4a4d8108
AM
17652+ bstart = au_dbstart(dentry);
17653+ if (bstart != btgt) {
17654+ struct au_nhash whlist;
dece6358 17655+
4a4d8108
AM
17656+ SiMustAnyLock(dentry->d_sb);
17657+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
17658+ if (!rdhash)
17659+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
17660+ dentry));
17661+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
17662+ if (unlikely(err))
17663+ goto out;
17664+ err = au_test_empty(dentry, &whlist);
17665+ au_nhash_wh_free(&whlist);
17666+ goto out;
17667+ }
dece6358 17668+
4a4d8108
AM
17669+ if (bstart == au_dbtaildir(dentry))
17670+ return 0; /* success */
dece6358 17671+
4a4d8108 17672+ err = au_test_empty_lower(dentry);
1facf9fc 17673+
4f0767ce 17674+out:
4a4d8108
AM
17675+ if (err == -ENOTEMPTY) {
17676+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
17677+ " is not supported\n");
17678+ err = -EXDEV;
17679+ }
17680+ return err;
17681+}
1308ab2a 17682+
4a4d8108
AM
17683+/* side effect: sets whlist and h_dentry */
17684+static int au_ren_may_dir(struct au_ren_args *a)
1308ab2a 17685+{
4a4d8108
AM
17686+ int err;
17687+ unsigned int rdhash;
17688+ struct dentry *d;
1facf9fc 17689+
4a4d8108
AM
17690+ d = a->dst_dentry;
17691+ SiMustAnyLock(d->d_sb);
1facf9fc 17692+
4a4d8108
AM
17693+ err = 0;
17694+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
17695+ rdhash = au_sbi(d->d_sb)->si_rdhash;
17696+ if (!rdhash)
17697+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
17698+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
17699+ if (unlikely(err))
17700+ goto out;
1308ab2a 17701+
4a4d8108
AM
17702+ au_set_dbstart(d, a->dst_bstart);
17703+ err = may_rename_dstdir(d, &a->whlist);
17704+ au_set_dbstart(d, a->btgt);
17705+ }
17706+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
17707+ if (unlikely(err))
17708+ goto out;
17709+
17710+ d = a->src_dentry;
17711+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
17712+ if (au_ftest_ren(a->flags, ISDIR)) {
17713+ err = may_rename_srcdir(d, a->btgt);
17714+ if (unlikely(err)) {
17715+ au_nhash_wh_free(&a->whlist);
17716+ a->whlist.nh_num = 0;
17717+ }
17718+ }
4f0767ce 17719+out:
4a4d8108 17720+ return err;
1facf9fc 17721+}
17722+
4a4d8108 17723+/* ---------------------------------------------------------------------- */
1facf9fc 17724+
4a4d8108
AM
17725+/*
17726+ * simple tests for rename.
17727+ * following the checks in vfs, plus the parent-child relationship.
17728+ */
17729+static int au_may_ren(struct au_ren_args *a)
17730+{
17731+ int err, isdir;
17732+ struct inode *h_inode;
1facf9fc 17733+
4a4d8108
AM
17734+ if (a->src_bstart == a->btgt) {
17735+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
17736+ au_ftest_ren(a->flags, ISDIR));
17737+ if (unlikely(err))
17738+ goto out;
17739+ err = -EINVAL;
17740+ if (unlikely(a->src_h_dentry == a->h_trap))
17741+ goto out;
17742+ }
1facf9fc 17743+
4a4d8108
AM
17744+ err = 0;
17745+ if (a->dst_bstart != a->btgt)
17746+ goto out;
1facf9fc 17747+
027c5e7a
AM
17748+ err = -ENOTEMPTY;
17749+ if (unlikely(a->dst_h_dentry == a->h_trap))
17750+ goto out;
17751+
4a4d8108
AM
17752+ err = -EIO;
17753+ h_inode = a->dst_h_dentry->d_inode;
17754+ isdir = !!au_ftest_ren(a->flags, ISDIR);
17755+ if (!a->dst_dentry->d_inode) {
17756+ if (unlikely(h_inode))
17757+ goto out;
17758+ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
17759+ isdir);
17760+ } else {
17761+ if (unlikely(!h_inode || !h_inode->i_nlink))
17762+ goto out;
17763+ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
17764+ isdir);
17765+ if (unlikely(err))
17766+ goto out;
4a4d8108 17767+ }
1facf9fc 17768+
4f0767ce 17769+out:
4a4d8108
AM
17770+ if (unlikely(err == -ENOENT || err == -EEXIST))
17771+ err = -EIO;
17772+ AuTraceErr(err);
17773+ return err;
17774+}
1facf9fc 17775+
1308ab2a 17776+/* ---------------------------------------------------------------------- */
1facf9fc 17777+
4a4d8108
AM
17778+/*
17779+ * locking order
17780+ * (VFS)
17781+ * - src_dir and dir by lock_rename()
17782+ * - inode if exitsts
17783+ * (aufs)
17784+ * - lock all
17785+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
17786+ * + si_read_lock
17787+ * + di_write_lock2_child()
17788+ * + di_write_lock_child()
17789+ * + ii_write_lock_child()
17790+ * + di_write_lock_child2()
17791+ * + ii_write_lock_child2()
17792+ * + src_parent and parent
17793+ * + di_write_lock_parent()
17794+ * + ii_write_lock_parent()
17795+ * + di_write_lock_parent2()
17796+ * + ii_write_lock_parent2()
17797+ * + lower src_dir and dir by vfsub_lock_rename()
17798+ * + verify the every relationships between child and parent. if any
17799+ * of them failed, unlock all and return -EBUSY.
17800+ */
17801+static void au_ren_unlock(struct au_ren_args *a)
1308ab2a 17802+{
4a4d8108
AM
17803+ struct super_block *sb;
17804+
17805+ sb = a->dst_dentry->d_sb;
17806+ if (au_ftest_ren(a->flags, MNT_WRITE))
17807+ mnt_drop_write(a->br->br_mnt);
17808+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
17809+ a->dst_h_parent, a->dst_hdir);
1308ab2a 17810+}
17811+
4a4d8108 17812+static int au_ren_lock(struct au_ren_args *a)
1308ab2a 17813+{
4a4d8108
AM
17814+ int err;
17815+ unsigned int udba;
1308ab2a 17816+
4a4d8108
AM
17817+ err = 0;
17818+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
17819+ a->src_hdir = au_hi(a->src_dir, a->btgt);
17820+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
17821+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
17822+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
17823+ a->dst_h_parent, a->dst_hdir);
17824+ udba = au_opt_udba(a->src_dentry->d_sb);
17825+ if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
17826+ || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
17827+ err = au_busy_or_stale();
17828+ if (!err && au_dbstart(a->src_dentry) == a->btgt)
17829+ err = au_h_verify(a->src_h_dentry, udba,
17830+ a->src_h_parent->d_inode, a->src_h_parent,
17831+ a->br);
17832+ if (!err && au_dbstart(a->dst_dentry) == a->btgt)
17833+ err = au_h_verify(a->dst_h_dentry, udba,
17834+ a->dst_h_parent->d_inode, a->dst_h_parent,
17835+ a->br);
17836+ if (!err) {
17837+ err = mnt_want_write(a->br->br_mnt);
17838+ if (unlikely(err))
17839+ goto out_unlock;
17840+ au_fset_ren(a->flags, MNT_WRITE);
17841+ goto out; /* success */
17842+ }
17843+
17844+ err = au_busy_or_stale();
17845+
4f0767ce 17846+out_unlock:
4a4d8108 17847+ au_ren_unlock(a);
4f0767ce 17848+out:
4a4d8108 17849+ return err;
1facf9fc 17850+}
17851+
17852+/* ---------------------------------------------------------------------- */
17853+
4a4d8108 17854+static void au_ren_refresh_dir(struct au_ren_args *a)
1facf9fc 17855+{
4a4d8108 17856+ struct inode *dir;
dece6358 17857+
4a4d8108
AM
17858+ dir = a->dst_dir;
17859+ dir->i_version++;
17860+ if (au_ftest_ren(a->flags, ISDIR)) {
17861+ /* is this updating defined in POSIX? */
17862+ au_cpup_attr_timesizes(a->src_inode);
17863+ au_cpup_attr_nlink(dir, /*force*/1);
4a4d8108 17864+ }
027c5e7a 17865+
4a4d8108
AM
17866+ if (au_ibstart(dir) == a->btgt)
17867+ au_cpup_attr_timesizes(dir);
dece6358 17868+
4a4d8108
AM
17869+ if (au_ftest_ren(a->flags, ISSAMEDIR))
17870+ return;
dece6358 17871+
4a4d8108
AM
17872+ dir = a->src_dir;
17873+ dir->i_version++;
17874+ if (au_ftest_ren(a->flags, ISDIR))
17875+ au_cpup_attr_nlink(dir, /*force*/1);
17876+ if (au_ibstart(dir) == a->btgt)
17877+ au_cpup_attr_timesizes(dir);
1facf9fc 17878+}
17879+
4a4d8108 17880+static void au_ren_refresh(struct au_ren_args *a)
1facf9fc 17881+{
4a4d8108
AM
17882+ aufs_bindex_t bend, bindex;
17883+ struct dentry *d, *h_d;
17884+ struct inode *i, *h_i;
17885+ struct super_block *sb;
dece6358 17886+
027c5e7a
AM
17887+ d = a->dst_dentry;
17888+ d_drop(d);
17889+ if (a->h_dst)
17890+ /* already dget-ed by au_ren_or_cpup() */
17891+ au_set_h_dptr(d, a->btgt, a->h_dst);
17892+
17893+ i = a->dst_inode;
17894+ if (i) {
17895+ if (!au_ftest_ren(a->flags, ISDIR))
17896+ vfsub_drop_nlink(i);
17897+ else {
17898+ vfsub_dead_dir(i);
17899+ au_cpup_attr_timesizes(i);
17900+ }
17901+ au_update_dbrange(d, /*do_put_zero*/1);
17902+ } else {
17903+ bend = a->btgt;
17904+ for (bindex = au_dbstart(d); bindex < bend; bindex++)
17905+ au_set_h_dptr(d, bindex, NULL);
17906+ bend = au_dbend(d);
17907+ for (bindex = a->btgt + 1; bindex <= bend; bindex++)
17908+ au_set_h_dptr(d, bindex, NULL);
17909+ au_update_dbrange(d, /*do_put_zero*/0);
17910+ }
17911+
4a4d8108
AM
17912+ d = a->src_dentry;
17913+ au_set_dbwh(d, -1);
17914+ bend = au_dbend(d);
17915+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
17916+ h_d = au_h_dptr(d, bindex);
17917+ if (h_d)
17918+ au_set_h_dptr(d, bindex, NULL);
17919+ }
17920+ au_set_dbend(d, a->btgt);
17921+
17922+ sb = d->d_sb;
17923+ i = a->src_inode;
17924+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
17925+ return; /* success */
17926+
17927+ bend = au_ibend(i);
17928+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
17929+ h_i = au_h_iptr(i, bindex);
17930+ if (h_i) {
17931+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
17932+ /* ignore this error */
17933+ au_set_h_iptr(i, bindex, NULL, 0);
17934+ }
17935+ }
17936+ au_set_ibend(i, a->btgt);
1308ab2a 17937+}
dece6358 17938+
4a4d8108
AM
17939+/* ---------------------------------------------------------------------- */
17940+
17941+/* mainly for link(2) and rename(2) */
17942+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
1308ab2a 17943+{
4a4d8108
AM
17944+ aufs_bindex_t bdiropq, bwh;
17945+ struct dentry *parent;
17946+ struct au_branch *br;
17947+
17948+ parent = dentry->d_parent;
17949+ IMustLock(parent->d_inode); /* dir is locked */
17950+
17951+ bdiropq = au_dbdiropq(parent);
17952+ bwh = au_dbwh(dentry);
17953+ br = au_sbr(dentry->d_sb, btgt);
17954+ if (au_br_rdonly(br)
17955+ || (0 <= bdiropq && bdiropq < btgt)
17956+ || (0 <= bwh && bwh < btgt))
17957+ btgt = -1;
17958+
17959+ AuDbg("btgt %d\n", btgt);
17960+ return btgt;
1facf9fc 17961+}
17962+
4a4d8108
AM
17963+/* sets src_bstart, dst_bstart and btgt */
17964+static int au_ren_wbr(struct au_ren_args *a)
1facf9fc 17965+{
4a4d8108
AM
17966+ int err;
17967+ struct au_wr_dir_args wr_dir_args = {
17968+ /* .force_btgt = -1, */
17969+ .flags = AuWrDir_ADD_ENTRY
17970+ };
dece6358 17971+
4a4d8108
AM
17972+ a->src_bstart = au_dbstart(a->src_dentry);
17973+ a->dst_bstart = au_dbstart(a->dst_dentry);
17974+ if (au_ftest_ren(a->flags, ISDIR))
17975+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
17976+ wr_dir_args.force_btgt = a->src_bstart;
17977+ if (a->dst_inode && a->dst_bstart < a->src_bstart)
17978+ wr_dir_args.force_btgt = a->dst_bstart;
17979+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
17980+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
17981+ a->btgt = err;
dece6358 17982+
4a4d8108 17983+ return err;
1facf9fc 17984+}
17985+
4a4d8108 17986+static void au_ren_dt(struct au_ren_args *a)
1facf9fc 17987+{
4a4d8108
AM
17988+ a->h_path.dentry = a->src_h_parent;
17989+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
17990+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
17991+ a->h_path.dentry = a->dst_h_parent;
17992+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
17993+ }
1facf9fc 17994+
4a4d8108
AM
17995+ au_fclr_ren(a->flags, DT_DSTDIR);
17996+ if (!au_ftest_ren(a->flags, ISDIR))
17997+ return;
dece6358 17998+
4a4d8108
AM
17999+ a->h_path.dentry = a->src_h_dentry;
18000+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
18001+ if (a->dst_h_dentry->d_inode) {
18002+ au_fset_ren(a->flags, DT_DSTDIR);
18003+ a->h_path.dentry = a->dst_h_dentry;
18004+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
18005+ }
1308ab2a 18006+}
dece6358 18007+
4a4d8108 18008+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1308ab2a 18009+{
4a4d8108
AM
18010+ struct dentry *h_d;
18011+ struct mutex *h_mtx;
18012+
18013+ au_dtime_revert(a->src_dt + AuPARENT);
18014+ if (!au_ftest_ren(a->flags, ISSAMEDIR))
18015+ au_dtime_revert(a->dst_dt + AuPARENT);
18016+
18017+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
18018+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
18019+ h_mtx = &h_d->d_inode->i_mutex;
18020+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18021+ au_dtime_revert(a->src_dt + AuCHILD);
18022+ mutex_unlock(h_mtx);
18023+
18024+ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
18025+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
18026+ h_mtx = &h_d->d_inode->i_mutex;
18027+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18028+ au_dtime_revert(a->dst_dt + AuCHILD);
18029+ mutex_unlock(h_mtx);
1facf9fc 18030+ }
18031+ }
18032+}
18033+
4a4d8108
AM
18034+/* ---------------------------------------------------------------------- */
18035+
18036+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
18037+ struct inode *_dst_dir, struct dentry *_dst_dentry)
1facf9fc 18038+{
e49829fe 18039+ int err, flags;
4a4d8108
AM
18040+ /* reduce stack space */
18041+ struct au_ren_args *a;
18042+
18043+ AuDbg("%.*s, %.*s\n", AuDLNPair(_src_dentry), AuDLNPair(_dst_dentry));
18044+ IMustLock(_src_dir);
18045+ IMustLock(_dst_dir);
18046+
18047+ err = -ENOMEM;
18048+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
18049+ a = kzalloc(sizeof(*a), GFP_NOFS);
18050+ if (unlikely(!a))
18051+ goto out;
18052+
18053+ a->src_dir = _src_dir;
18054+ a->src_dentry = _src_dentry;
18055+ a->src_inode = a->src_dentry->d_inode;
18056+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
18057+ a->dst_dir = _dst_dir;
18058+ a->dst_dentry = _dst_dentry;
18059+ a->dst_inode = a->dst_dentry->d_inode;
18060+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
18061+ if (a->dst_inode) {
18062+ IMustLock(a->dst_inode);
18063+ au_igrab(a->dst_inode);
1facf9fc 18064+ }
1facf9fc 18065+
4a4d8108 18066+ err = -ENOTDIR;
027c5e7a 18067+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
4a4d8108
AM
18068+ if (S_ISDIR(a->src_inode->i_mode)) {
18069+ au_fset_ren(a->flags, ISDIR);
18070+ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
18071+ goto out_free;
e49829fe
JR
18072+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
18073+ AuLock_DIR | flags);
4a4d8108 18074+ } else
e49829fe
JR
18075+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
18076+ flags);
18077+ if (unlikely(err))
18078+ goto out_free;
1facf9fc 18079+
027c5e7a
AM
18080+ err = au_d_hashed_positive(a->src_dentry);
18081+ if (unlikely(err))
18082+ goto out_unlock;
18083+ err = -ENOENT;
18084+ if (a->dst_inode) {
18085+ /*
18086+ * If it is a dir, VFS unhash dst_dentry before this
18087+ * function. It means we cannot rely upon d_unhashed().
18088+ */
18089+ if (unlikely(!a->dst_inode->i_nlink))
18090+ goto out_unlock;
18091+ if (!S_ISDIR(a->dst_inode->i_mode)) {
18092+ err = au_d_hashed_positive(a->dst_dentry);
18093+ if (unlikely(err))
18094+ goto out_unlock;
18095+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
18096+ goto out_unlock;
18097+ } else if (unlikely(d_unhashed(a->dst_dentry)))
18098+ goto out_unlock;
18099+
4a4d8108
AM
18100+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
18101+ di_write_lock_parent(a->dst_parent);
1facf9fc 18102+
4a4d8108
AM
18103+ /* which branch we process */
18104+ err = au_ren_wbr(a);
18105+ if (unlikely(err < 0))
027c5e7a 18106+ goto out_parent;
4a4d8108
AM
18107+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
18108+ a->h_path.mnt = a->br->br_mnt;
1facf9fc 18109+
4a4d8108
AM
18110+ /* are they available to be renamed */
18111+ err = au_ren_may_dir(a);
18112+ if (unlikely(err))
18113+ goto out_children;
1facf9fc 18114+
4a4d8108
AM
18115+ /* prepare the writable parent dir on the same branch */
18116+ if (a->dst_bstart == a->btgt) {
18117+ au_fset_ren(a->flags, WHDST);
18118+ } else {
18119+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
18120+ if (unlikely(err))
18121+ goto out_children;
18122+ }
1facf9fc 18123+
4a4d8108
AM
18124+ if (a->src_dir != a->dst_dir) {
18125+ /*
18126+ * this temporary unlock is safe,
18127+ * because both dir->i_mutex are locked.
18128+ */
18129+ di_write_unlock(a->dst_parent);
18130+ di_write_lock_parent(a->src_parent);
18131+ err = au_wr_dir_need_wh(a->src_dentry,
18132+ au_ftest_ren(a->flags, ISDIR),
18133+ &a->btgt);
18134+ di_write_unlock(a->src_parent);
18135+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
18136+ au_fclr_ren(a->flags, ISSAMEDIR);
18137+ } else
18138+ err = au_wr_dir_need_wh(a->src_dentry,
18139+ au_ftest_ren(a->flags, ISDIR),
18140+ &a->btgt);
18141+ if (unlikely(err < 0))
18142+ goto out_children;
18143+ if (err)
18144+ au_fset_ren(a->flags, WHSRC);
1facf9fc 18145+
4a4d8108
AM
18146+ /* lock them all */
18147+ err = au_ren_lock(a);
18148+ if (unlikely(err))
18149+ goto out_children;
1facf9fc 18150+
4a4d8108
AM
18151+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
18152+ err = au_may_ren(a);
18153+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
18154+ err = -ENAMETOOLONG;
18155+ if (unlikely(err))
18156+ goto out_hdir;
1facf9fc 18157+
4a4d8108
AM
18158+ /* store timestamps to be revertible */
18159+ au_ren_dt(a);
1facf9fc 18160+
4a4d8108
AM
18161+ /* here we go */
18162+ err = do_rename(a);
18163+ if (unlikely(err))
18164+ goto out_dt;
18165+
18166+ /* update dir attributes */
18167+ au_ren_refresh_dir(a);
18168+
18169+ /* dput/iput all lower dentries */
18170+ au_ren_refresh(a);
18171+
18172+ goto out_hdir; /* success */
18173+
4f0767ce 18174+out_dt:
4a4d8108 18175+ au_ren_rev_dt(err, a);
4f0767ce 18176+out_hdir:
4a4d8108 18177+ au_ren_unlock(a);
4f0767ce 18178+out_children:
4a4d8108 18179+ au_nhash_wh_free(&a->whlist);
027c5e7a
AM
18180+ if (err && a->dst_inode && a->dst_bstart != a->btgt) {
18181+ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
18182+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
18183+ au_set_dbstart(a->dst_dentry, a->dst_bstart);
4a4d8108 18184+ }
027c5e7a 18185+out_parent:
4a4d8108
AM
18186+ if (!err)
18187+ d_move(a->src_dentry, a->dst_dentry);
027c5e7a
AM
18188+ else {
18189+ au_update_dbstart(a->dst_dentry);
18190+ if (!a->dst_inode)
18191+ d_drop(a->dst_dentry);
18192+ }
4a4d8108
AM
18193+ if (au_ftest_ren(a->flags, ISSAMEDIR))
18194+ di_write_unlock(a->dst_parent);
18195+ else
18196+ di_write_unlock2(a->src_parent, a->dst_parent);
027c5e7a 18197+out_unlock:
4a4d8108 18198+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
4f0767ce 18199+out_free:
4a4d8108
AM
18200+ iput(a->dst_inode);
18201+ if (a->thargs)
18202+ au_whtmp_rmdir_free(a->thargs);
18203+ kfree(a);
4f0767ce 18204+out:
4a4d8108
AM
18205+ AuTraceErr(err);
18206+ return err;
1308ab2a 18207+}
7f207e10
AM
18208diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
18209--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
53392da6 18210+++ linux/fs/aufs/Kconfig 2011-08-24 13:30:24.727980364 +0200
2cbb1c4b 18211@@ -0,0 +1,203 @@
4a4d8108
AM
18212+config AUFS_FS
18213+ tristate "Aufs (Advanced multi layered unification filesystem) support"
18214+ depends on EXPERIMENTAL
18215+ help
18216+ Aufs is a stackable unification filesystem such as Unionfs,
18217+ which unifies several directories and provides a merged single
18218+ directory.
18219+ In the early days, aufs was entirely re-designed and
18220+ re-implemented Unionfs Version 1.x series. Introducing many
18221+ original ideas, approaches and improvements, it becomes totally
18222+ different from Unionfs while keeping the basic features.
1facf9fc 18223+
4a4d8108
AM
18224+if AUFS_FS
18225+choice
18226+ prompt "Maximum number of branches"
18227+ default AUFS_BRANCH_MAX_127
18228+ help
18229+ Specifies the maximum number of branches (or member directories)
18230+ in a single aufs. The larger value consumes more system
18231+ resources and has a minor impact to performance.
18232+config AUFS_BRANCH_MAX_127
18233+ bool "127"
18234+ help
18235+ Specifies the maximum number of branches (or member directories)
18236+ in a single aufs. The larger value consumes more system
18237+ resources and has a minor impact to performance.
18238+config AUFS_BRANCH_MAX_511
18239+ bool "511"
18240+ help
18241+ Specifies the maximum number of branches (or member directories)
18242+ in a single aufs. The larger value consumes more system
18243+ resources and has a minor impact to performance.
18244+config AUFS_BRANCH_MAX_1023
18245+ bool "1023"
18246+ help
18247+ Specifies the maximum number of branches (or member directories)
18248+ in a single aufs. The larger value consumes more system
18249+ resources and has a minor impact to performance.
18250+config AUFS_BRANCH_MAX_32767
18251+ bool "32767"
18252+ help
18253+ Specifies the maximum number of branches (or member directories)
18254+ in a single aufs. The larger value consumes more system
18255+ resources and has a minor impact to performance.
18256+endchoice
1facf9fc 18257+
e49829fe
JR
18258+config AUFS_SBILIST
18259+ bool
18260+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
18261+ default y
18262+ help
18263+ Automatic configuration for internal use.
18264+ When aufs supports Magic SysRq or /proc, enabled automatically.
18265+
4a4d8108
AM
18266+config AUFS_HNOTIFY
18267+ bool "Detect direct branch access (bypassing aufs)"
18268+ help
18269+ If you want to modify files on branches directly, eg. bypassing aufs,
18270+ and want aufs to detect the changes of them fully, then enable this
18271+ option and use 'udba=notify' mount option.
7f207e10 18272+ Currently there is only one available configuration, "fsnotify".
4a4d8108
AM
18273+ It will have a negative impact to the performance.
18274+ See detail in aufs.5.
dece6358 18275+
4a4d8108
AM
18276+choice
18277+ prompt "method" if AUFS_HNOTIFY
18278+ default AUFS_HFSNOTIFY
18279+config AUFS_HFSNOTIFY
18280+ bool "fsnotify"
18281+ select FSNOTIFY
4a4d8108 18282+endchoice
1facf9fc 18283+
4a4d8108
AM
18284+config AUFS_EXPORT
18285+ bool "NFS-exportable aufs"
2cbb1c4b 18286+ depends on EXPORTFS
4a4d8108
AM
18287+ help
18288+ If you want to export your mounted aufs via NFS, then enable this
18289+ option. There are several requirements for this configuration.
18290+ See detail in aufs.5.
1facf9fc 18291+
4a4d8108
AM
18292+config AUFS_INO_T_64
18293+ bool
18294+ depends on AUFS_EXPORT
18295+ depends on 64BIT && !(ALPHA || S390)
18296+ default y
18297+ help
18298+ Automatic configuration for internal use.
18299+ /* typedef unsigned long/int __kernel_ino_t */
18300+ /* alpha and s390x are int */
1facf9fc 18301+
4a4d8108
AM
18302+config AUFS_RDU
18303+ bool "Readdir in userspace"
18304+ help
18305+ Aufs has two methods to provide a merged view for a directory,
18306+ by a user-space library and by kernel-space natively. The latter
18307+ is always enabled but sometimes large and slow.
18308+ If you enable this option, install the library in aufs2-util
18309+ package, and set some environment variables for your readdir(3),
18310+ then the work will be handled in user-space which generally
18311+ shows better performance in most cases.
18312+ See detail in aufs.5.
1facf9fc 18313+
2cbb1c4b
JR
18314+config AUFS_PROC_MAP
18315+ bool "support for /proc/maps and lsof(1)"
18316+ depends on PROC_FS
18317+ help
18318+ When you issue mmap(2) in aufs, it is actually a direct mmap(2)
18319+ call to the file on the branch fs since the file in aufs is
18320+ purely virtual. And the file path printed in /proc/maps (and
18321+ others) will be the path on the branch fs. In most cases, it
18322+ does no harm. But some utilities like lsof(1) may confuse since
18323+ the utility or user may expect the file path in aufs to be
18324+ printed.
18325+ To address this issue, aufs provides a patch which introduces a
18326+ new member called vm_prfile into struct vm_are_struct. The patch
18327+ is meaningless without enabling this configuration since nobody
18328+ sets the new vm_prfile member.
18329+ If you don't apply the patch, then enabling this configuration
18330+ will cause a compile error.
18331+ This approach is fragile since if someone else make some changes
18332+ around vm_file, then vm_prfile may not work anymore. As a
18333+ workaround such case, aufs provides this configuration. If you
18334+ disable it, then lsof(1) may produce incorrect result but the
18335+ problem will be gone even if the aufs patch is applied (I hope).
18336+
4a4d8108
AM
18337+config AUFS_SP_IATTR
18338+ bool "Respect the attributes (mtime/ctime mainly) of special files"
18339+ help
18340+ When you write something to a special file, some attributes of it
18341+ (mtime/ctime mainly) may be updated. Generally such updates are
18342+ less important (actually some device drivers and NFS ignore
18343+ it). But some applications (such like test program) requires
18344+ such updates. If you need these updates, then enable this
18345+ configuration which introduces some overhead.
18346+ Currently this configuration handles FIFO only.
1facf9fc 18347+
4a4d8108
AM
18348+config AUFS_SHWH
18349+ bool "Show whiteouts"
18350+ help
18351+ If you want to make the whiteouts in aufs visible, then enable
18352+ this option and specify 'shwh' mount option. Although it may
18353+ sounds like philosophy or something, but in technically it
18354+ simply shows the name of whiteout with keeping its behaviour.
1facf9fc 18355+
4a4d8108
AM
18356+config AUFS_BR_RAMFS
18357+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
18358+ help
18359+ If you want to use ramfs as an aufs branch fs, then enable this
18360+ option. Generally tmpfs is recommended.
18361+ Aufs prohibited them to be a branch fs by default, because
18362+ initramfs becomes unusable after switch_root or something
18363+ generally. If you sets initramfs as an aufs branch and boot your
18364+ system by switch_root, you will meet a problem easily since the
18365+ files in initramfs may be inaccessible.
18366+ Unless you are going to use ramfs as an aufs branch fs without
18367+ switch_root or something, leave it N.
1facf9fc 18368+
4a4d8108
AM
18369+config AUFS_BR_FUSE
18370+ bool "Fuse fs as an aufs branch"
18371+ depends on FUSE_FS
18372+ select AUFS_POLL
18373+ help
18374+ If you want to use fuse-based userspace filesystem as an aufs
18375+ branch fs, then enable this option.
18376+ It implements the internal poll(2) operation which is
18377+ implemented by fuse only (curretnly).
1facf9fc 18378+
4a4d8108
AM
18379+config AUFS_POLL
18380+ bool
18381+ help
18382+ Automatic configuration for internal use.
1facf9fc 18383+
4a4d8108
AM
18384+config AUFS_BR_HFSPLUS
18385+ bool "Hfsplus as an aufs branch"
18386+ depends on HFSPLUS_FS
18387+ default y
18388+ help
18389+ If you want to use hfsplus fs as an aufs branch fs, then enable
18390+ this option. This option introduces a small overhead at
18391+ copying-up a file on hfsplus.
1facf9fc 18392+
4a4d8108
AM
18393+config AUFS_BDEV_LOOP
18394+ bool
18395+ depends on BLK_DEV_LOOP
18396+ default y
18397+ help
18398+ Automatic configuration for internal use.
18399+ Convert =[ym] into =y.
1308ab2a 18400+
4a4d8108
AM
18401+config AUFS_DEBUG
18402+ bool "Debug aufs"
18403+ help
18404+ Enable this to compile aufs internal debug code.
18405+ It will have a negative impact to the performance.
18406+
18407+config AUFS_MAGIC_SYSRQ
18408+ bool
18409+ depends on AUFS_DEBUG && MAGIC_SYSRQ
18410+ default y
18411+ help
18412+ Automatic configuration for internal use.
18413+ When aufs supports Magic SysRq, enabled automatically.
18414+endif
7f207e10
AM
18415diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
18416--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
53392da6 18417+++ linux/fs/aufs/loop.c 2011-08-24 13:30:24.734646739 +0200
87a755f4 18418@@ -0,0 +1,133 @@
1facf9fc 18419+/*
027c5e7a 18420+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18421+ *
18422+ * This program, aufs is free software; you can redistribute it and/or modify
18423+ * it under the terms of the GNU General Public License as published by
18424+ * the Free Software Foundation; either version 2 of the License, or
18425+ * (at your option) any later version.
dece6358
AM
18426+ *
18427+ * This program is distributed in the hope that it will be useful,
18428+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18429+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18430+ * GNU General Public License for more details.
18431+ *
18432+ * You should have received a copy of the GNU General Public License
18433+ * along with this program; if not, write to the Free Software
18434+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18435+ */
18436+
18437+/*
18438+ * support for loopback block device as a branch
18439+ */
18440+
18441+#include <linux/loop.h>
18442+#include "aufs.h"
18443+
18444+/*
18445+ * test if two lower dentries have overlapping branches.
18446+ */
b752ccd1 18447+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
1facf9fc 18448+{
b752ccd1 18449+ struct super_block *h_sb;
1facf9fc 18450+ struct loop_device *l;
18451+
b752ccd1
AM
18452+ h_sb = h_adding->d_sb;
18453+ if (MAJOR(h_sb->s_dev) != LOOP_MAJOR)
1facf9fc 18454+ return 0;
18455+
b752ccd1
AM
18456+ l = h_sb->s_bdev->bd_disk->private_data;
18457+ h_adding = l->lo_backing_file->f_dentry;
18458+ /*
18459+ * h_adding can be local NFS.
18460+ * in this case aufs cannot detect the loop.
18461+ */
18462+ if (unlikely(h_adding->d_sb == sb))
1facf9fc 18463+ return 1;
b752ccd1 18464+ return !!au_test_subdir(h_adding, sb->s_root);
1facf9fc 18465+}
18466+
18467+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
18468+int au_test_loopback_kthread(void)
18469+{
b752ccd1
AM
18470+ int ret;
18471+ struct task_struct *tsk = current;
18472+
18473+ ret = 0;
18474+ if (tsk->flags & PF_KTHREAD) {
18475+ const char c = tsk->comm[4];
18476+ ret = ('0' <= c && c <= '9'
18477+ && !strncmp(tsk->comm, "loop", 4));
18478+ }
1facf9fc 18479+
b752ccd1 18480+ return ret;
1facf9fc 18481+}
87a755f4
AM
18482+
18483+/* ---------------------------------------------------------------------- */
18484+
18485+#define au_warn_loopback_step 16
18486+static int au_warn_loopback_nelem = au_warn_loopback_step;
18487+static unsigned long *au_warn_loopback_array;
18488+
18489+void au_warn_loopback(struct super_block *h_sb)
18490+{
18491+ int i, new_nelem;
18492+ unsigned long *a, magic;
18493+ static DEFINE_SPINLOCK(spin);
18494+
18495+ magic = h_sb->s_magic;
18496+ spin_lock(&spin);
18497+ a = au_warn_loopback_array;
18498+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
18499+ if (a[i] == magic) {
18500+ spin_unlock(&spin);
18501+ return;
18502+ }
18503+
18504+ /* h_sb is new to us, print it */
18505+ if (i < au_warn_loopback_nelem) {
18506+ a[i] = magic;
18507+ goto pr;
18508+ }
18509+
18510+ /* expand the array */
18511+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
18512+ a = au_kzrealloc(au_warn_loopback_array,
18513+ au_warn_loopback_nelem * sizeof(unsigned long),
18514+ new_nelem * sizeof(unsigned long), GFP_ATOMIC);
18515+ if (a) {
18516+ au_warn_loopback_nelem = new_nelem;
18517+ au_warn_loopback_array = a;
18518+ a[i] = magic;
18519+ goto pr;
18520+ }
18521+
18522+ spin_unlock(&spin);
18523+ AuWarn1("realloc failed, ignored\n");
18524+ return;
18525+
18526+pr:
18527+ spin_unlock(&spin);
18528+ pr_warning("you may want to try another patch for loopback file "
18529+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
18530+}
18531+
18532+int au_loopback_init(void)
18533+{
18534+ int err;
18535+ struct super_block *sb __maybe_unused;
18536+
18537+ AuDebugOn(sizeof(sb->s_magic) != sizeof(unsigned long));
18538+
18539+ err = 0;
18540+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
18541+ sizeof(unsigned long), GFP_NOFS);
18542+ if (unlikely(!au_warn_loopback_array))
18543+ err = -ENOMEM;
18544+
18545+ return err;
18546+}
18547+
18548+void au_loopback_fin(void)
18549+{
18550+ kfree(au_warn_loopback_array);
18551+}
7f207e10
AM
18552diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
18553--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
53392da6 18554+++ linux/fs/aufs/loop.h 2011-08-24 13:30:24.734646739 +0200
87a755f4 18555@@ -0,0 +1,50 @@
1facf9fc 18556+/*
027c5e7a 18557+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18558+ *
18559+ * This program, aufs is free software; you can redistribute it and/or modify
18560+ * it under the terms of the GNU General Public License as published by
18561+ * the Free Software Foundation; either version 2 of the License, or
18562+ * (at your option) any later version.
dece6358
AM
18563+ *
18564+ * This program is distributed in the hope that it will be useful,
18565+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18566+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18567+ * GNU General Public License for more details.
18568+ *
18569+ * You should have received a copy of the GNU General Public License
18570+ * along with this program; if not, write to the Free Software
18571+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18572+ */
18573+
18574+/*
18575+ * support for loopback mount as a branch
18576+ */
18577+
18578+#ifndef __AUFS_LOOP_H__
18579+#define __AUFS_LOOP_H__
18580+
18581+#ifdef __KERNEL__
18582+
dece6358
AM
18583+struct dentry;
18584+struct super_block;
1facf9fc 18585+
18586+#ifdef CONFIG_AUFS_BDEV_LOOP
18587+/* loop.c */
b752ccd1 18588+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
1facf9fc 18589+int au_test_loopback_kthread(void);
87a755f4
AM
18590+void au_warn_loopback(struct super_block *h_sb);
18591+
18592+int au_loopback_init(void);
18593+void au_loopback_fin(void);
1facf9fc 18594+#else
4a4d8108 18595+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
b752ccd1 18596+ struct dentry *h_adding)
4a4d8108 18597+AuStubInt0(au_test_loopback_kthread, void)
87a755f4
AM
18598+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
18599+
18600+AuStubInt0(au_loopback_init, void)
18601+AuStubVoid(au_loopback_fin, void)
1facf9fc 18602+#endif /* BLK_DEV_LOOP */
18603+
18604+#endif /* __KERNEL__ */
18605+#endif /* __AUFS_LOOP_H__ */
7f207e10
AM
18606diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
18607--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
53392da6 18608+++ linux/fs/aufs/magic.mk 2011-08-24 13:30:24.734646739 +0200
4a4d8108 18609@@ -0,0 +1,54 @@
1facf9fc 18610+
18611+# defined in ${srctree}/fs/fuse/inode.c
18612+# tristate
18613+ifdef CONFIG_FUSE_FS
18614+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
18615+endif
18616+
18617+# defined in ${srctree}/fs/ocfs2/ocfs2_fs.h
18618+# tristate
18619+ifdef CONFIG_OCFS2_FS
18620+ccflags-y += -DOCFS2_SUPER_MAGIC=0x7461636f
18621+endif
18622+
18623+# defined in ${srctree}/fs/ocfs2/dlm/userdlm.h
18624+# tristate
18625+ifdef CONFIG_OCFS2_FS_O2CB
18626+ccflags-y += -DDLMFS_MAGIC=0x76a9f425
18627+endif
18628+
1facf9fc 18629+# defined in ${srctree}/fs/cifs/cifsfs.c
18630+# tristate
18631+ifdef CONFIG_CIFS_FS
18632+ccflags-y += -DCIFS_MAGIC_NUMBER=0xFF534D42
18633+endif
18634+
18635+# defined in ${srctree}/fs/xfs/xfs_sb.h
18636+# tristate
18637+ifdef CONFIG_XFS_FS
18638+ccflags-y += -DXFS_SB_MAGIC=0x58465342
18639+endif
18640+
18641+# defined in ${srctree}/fs/configfs/mount.c
18642+# tristate
18643+ifdef CONFIG_CONFIGFS_FS
18644+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
18645+endif
18646+
18647+# defined in ${srctree}/fs/9p/v9fs.h
18648+# tristate
18649+ifdef CONFIG_9P_FS
18650+ccflags-y += -DV9FS_MAGIC=0x01021997
18651+endif
18652+
18653+# defined in ${srctree}/fs/ubifs/ubifs.h
18654+# tristate
18655+ifdef CONFIG_UBIFS_FS
18656+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
18657+endif
4a4d8108
AM
18658+
18659+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
18660+# tristate
18661+ifdef CONFIG_HFSPLUS_FS
18662+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
18663+endif
7f207e10
AM
18664diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
18665--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
53392da6 18666+++ linux/fs/aufs/Makefile 2011-08-24 13:30:24.727980364 +0200
7f207e10 18667@@ -0,0 +1,38 @@
4a4d8108
AM
18668+
18669+include ${src}/magic.mk
18670+ifeq (${CONFIG_AUFS_FS},m)
18671+include ${src}/conf.mk
18672+endif
18673+-include ${src}/priv_def.mk
18674+
18675+# cf. include/linux/kernel.h
18676+# enable pr_debug
18677+ccflags-y += -DDEBUG
7f207e10
AM
18678+# sparse doesn't allow spaces
18679+ccflags-y += -D'pr_fmt(fmt)=AUFS_NAME"\040%s:%d:%s[%d]:\040"fmt,__func__,__LINE__,current->comm,current->pid'
4a4d8108
AM
18680+
18681+obj-$(CONFIG_AUFS_FS) += aufs.o
18682+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
18683+ wkq.o vfsub.o dcsub.o \
e49829fe 18684+ cpup.o whout.o wbr_policy.o \
4a4d8108
AM
18685+ dinfo.o dentry.o \
18686+ dynop.o \
18687+ finfo.o file.o f_op.o \
18688+ dir.o vdir.o \
18689+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
18690+ ioctl.o
18691+
18692+# all are boolean
e49829fe 18693+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
4a4d8108
AM
18694+aufs-$(CONFIG_SYSFS) += sysfs.o
18695+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
18696+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
18697+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
18698+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
4a4d8108
AM
18699+aufs-$(CONFIG_AUFS_EXPORT) += export.o
18700+aufs-$(CONFIG_AUFS_POLL) += poll.o
18701+aufs-$(CONFIG_AUFS_RDU) += rdu.o
18702+aufs-$(CONFIG_AUFS_SP_IATTR) += f_op_sp.o
18703+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
18704+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
18705+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
7f207e10
AM
18706diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
18707--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
53392da6 18708+++ linux/fs/aufs/module.c 2011-08-24 13:30:24.734646739 +0200
87a755f4 18709@@ -0,0 +1,189 @@
1facf9fc 18710+/*
027c5e7a 18711+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18712+ *
18713+ * This program, aufs is free software; you can redistribute it and/or modify
18714+ * it under the terms of the GNU General Public License as published by
18715+ * the Free Software Foundation; either version 2 of the License, or
18716+ * (at your option) any later version.
dece6358
AM
18717+ *
18718+ * This program is distributed in the hope that it will be useful,
18719+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18720+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18721+ * GNU General Public License for more details.
18722+ *
18723+ * You should have received a copy of the GNU General Public License
18724+ * along with this program; if not, write to the Free Software
18725+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18726+ */
18727+
18728+/*
18729+ * module global variables and operations
18730+ */
18731+
18732+#include <linux/module.h>
18733+#include <linux/seq_file.h>
18734+#include "aufs.h"
18735+
18736+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
18737+{
18738+ if (new_sz <= nused)
18739+ return p;
18740+
18741+ p = krealloc(p, new_sz, gfp);
18742+ if (p)
18743+ memset(p + nused, 0, new_sz - nused);
18744+ return p;
18745+}
18746+
18747+/* ---------------------------------------------------------------------- */
18748+
18749+/*
18750+ * aufs caches
18751+ */
18752+struct kmem_cache *au_cachep[AuCache_Last];
18753+static int __init au_cache_init(void)
18754+{
4a4d8108 18755+ au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
1facf9fc 18756+ if (au_cachep[AuCache_DINFO])
027c5e7a 18757+ /* SLAB_DESTROY_BY_RCU */
4a4d8108
AM
18758+ au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
18759+ au_icntnr_init_once);
1facf9fc 18760+ if (au_cachep[AuCache_ICNTNR])
4a4d8108
AM
18761+ au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo,
18762+ au_fi_init_once);
1facf9fc 18763+ if (au_cachep[AuCache_FINFO])
18764+ au_cachep[AuCache_VDIR] = AuCache(au_vdir);
18765+ if (au_cachep[AuCache_VDIR])
18766+ au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
18767+ if (au_cachep[AuCache_DEHSTR])
18768+ return 0;
18769+
18770+ return -ENOMEM;
18771+}
18772+
18773+static void au_cache_fin(void)
18774+{
18775+ int i;
4a4d8108
AM
18776+
18777+ /* including AuCache_HNOTIFY */
1facf9fc 18778+ for (i = 0; i < AuCache_Last; i++)
18779+ if (au_cachep[i]) {
18780+ kmem_cache_destroy(au_cachep[i]);
18781+ au_cachep[i] = NULL;
18782+ }
18783+}
18784+
18785+/* ---------------------------------------------------------------------- */
18786+
18787+int au_dir_roflags;
18788+
e49829fe
JR
18789+#ifdef CONFIG_AUFS_SBILIST
18790+struct au_splhead au_sbilist;
18791+#endif
18792+
1facf9fc 18793+/*
18794+ * functions for module interface.
18795+ */
18796+MODULE_LICENSE("GPL");
18797+/* MODULE_LICENSE("GPL v2"); */
dece6358 18798+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 18799+MODULE_DESCRIPTION(AUFS_NAME
18800+ " -- Advanced multi layered unification filesystem");
18801+MODULE_VERSION(AUFS_VERSION);
18802+
1facf9fc 18803+/* this module parameter has no meaning when SYSFS is disabled */
18804+int sysaufs_brs = 1;
18805+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
18806+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
18807+
18808+/* ---------------------------------------------------------------------- */
18809+
18810+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
18811+
18812+int au_seq_path(struct seq_file *seq, struct path *path)
18813+{
18814+ return seq_path(seq, path, au_esc_chars);
18815+}
18816+
18817+/* ---------------------------------------------------------------------- */
18818+
18819+static int __init aufs_init(void)
18820+{
18821+ int err, i;
18822+ char *p;
18823+
18824+ p = au_esc_chars;
18825+ for (i = 1; i <= ' '; i++)
18826+ *p++ = i;
18827+ *p++ = '\\';
18828+ *p++ = '\x7f';
18829+ *p = 0;
18830+
18831+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
18832+
e49829fe 18833+ au_sbilist_init();
1facf9fc 18834+ sysaufs_brs_init();
18835+ au_debug_init();
4a4d8108 18836+ au_dy_init();
1facf9fc 18837+ err = sysaufs_init();
18838+ if (unlikely(err))
18839+ goto out;
e49829fe 18840+ err = au_procfs_init();
4f0767ce 18841+ if (unlikely(err))
953406b4 18842+ goto out_sysaufs;
e49829fe
JR
18843+ err = au_wkq_init();
18844+ if (unlikely(err))
18845+ goto out_procfs;
87a755f4 18846+ err = au_loopback_init();
1facf9fc 18847+ if (unlikely(err))
18848+ goto out_wkq;
87a755f4
AM
18849+ err = au_hnotify_init();
18850+ if (unlikely(err))
18851+ goto out_loopback;
1facf9fc 18852+ err = au_sysrq_init();
18853+ if (unlikely(err))
18854+ goto out_hin;
18855+ err = au_cache_init();
18856+ if (unlikely(err))
18857+ goto out_sysrq;
18858+ err = register_filesystem(&aufs_fs_type);
18859+ if (unlikely(err))
18860+ goto out_cache;
4a4d8108
AM
18861+ /* since we define pr_fmt, call printk directly */
18862+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
1facf9fc 18863+ goto out; /* success */
18864+
4f0767ce 18865+out_cache:
1facf9fc 18866+ au_cache_fin();
4f0767ce 18867+out_sysrq:
1facf9fc 18868+ au_sysrq_fin();
4f0767ce 18869+out_hin:
4a4d8108 18870+ au_hnotify_fin();
87a755f4
AM
18871+out_loopback:
18872+ au_loopback_fin();
4f0767ce 18873+out_wkq:
1facf9fc 18874+ au_wkq_fin();
e49829fe
JR
18875+out_procfs:
18876+ au_procfs_fin();
4f0767ce 18877+out_sysaufs:
1facf9fc 18878+ sysaufs_fin();
4a4d8108 18879+ au_dy_fin();
4f0767ce 18880+out:
1facf9fc 18881+ return err;
18882+}
18883+
18884+static void __exit aufs_exit(void)
18885+{
18886+ unregister_filesystem(&aufs_fs_type);
18887+ au_cache_fin();
18888+ au_sysrq_fin();
4a4d8108 18889+ au_hnotify_fin();
87a755f4 18890+ au_loopback_fin();
1facf9fc 18891+ au_wkq_fin();
e49829fe 18892+ au_procfs_fin();
1facf9fc 18893+ sysaufs_fin();
4a4d8108 18894+ au_dy_fin();
1facf9fc 18895+}
18896+
18897+module_init(aufs_init);
18898+module_exit(aufs_exit);
7f207e10
AM
18899diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
18900--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
53392da6 18901+++ linux/fs/aufs/module.h 2011-08-24 13:30:24.734646739 +0200
e49829fe 18902@@ -0,0 +1,91 @@
1facf9fc 18903+/*
027c5e7a 18904+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 18905+ *
18906+ * This program, aufs is free software; you can redistribute it and/or modify
18907+ * it under the terms of the GNU General Public License as published by
18908+ * the Free Software Foundation; either version 2 of the License, or
18909+ * (at your option) any later version.
dece6358
AM
18910+ *
18911+ * This program is distributed in the hope that it will be useful,
18912+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18913+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18914+ * GNU General Public License for more details.
18915+ *
18916+ * You should have received a copy of the GNU General Public License
18917+ * along with this program; if not, write to the Free Software
18918+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18919+ */
18920+
18921+/*
18922+ * module initialization and module-global
18923+ */
18924+
18925+#ifndef __AUFS_MODULE_H__
18926+#define __AUFS_MODULE_H__
18927+
18928+#ifdef __KERNEL__
18929+
18930+#include <linux/slab.h>
18931+
dece6358
AM
18932+struct path;
18933+struct seq_file;
18934+
1facf9fc 18935+/* module parameters */
1facf9fc 18936+extern int sysaufs_brs;
18937+
18938+/* ---------------------------------------------------------------------- */
18939+
18940+extern int au_dir_roflags;
18941+
18942+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
18943+int au_seq_path(struct seq_file *seq, struct path *path);
18944+
e49829fe
JR
18945+#ifdef CONFIG_PROC_FS
18946+/* procfs.c */
18947+int __init au_procfs_init(void);
18948+void au_procfs_fin(void);
18949+#else
18950+AuStubInt0(au_procfs_init, void);
18951+AuStubVoid(au_procfs_fin, void);
18952+#endif
18953+
4f0767ce
JR
18954+/* ---------------------------------------------------------------------- */
18955+
18956+/* kmem cache */
1facf9fc 18957+enum {
18958+ AuCache_DINFO,
18959+ AuCache_ICNTNR,
18960+ AuCache_FINFO,
18961+ AuCache_VDIR,
18962+ AuCache_DEHSTR,
4a4d8108
AM
18963+#ifdef CONFIG_AUFS_HNOTIFY
18964+ AuCache_HNOTIFY,
1facf9fc 18965+#endif
18966+ AuCache_Last
18967+};
18968+
4a4d8108
AM
18969+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
18970+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
18971+#define AuCacheCtor(type, ctor) \
18972+ kmem_cache_create(#type, sizeof(struct type), \
18973+ __alignof__(struct type), AuCacheFlags, ctor)
1facf9fc 18974+
18975+extern struct kmem_cache *au_cachep[];
18976+
18977+#define AuCacheFuncs(name, index) \
4a4d8108 18978+static inline struct au_##name *au_cache_alloc_##name(void) \
1facf9fc 18979+{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
4a4d8108 18980+static inline void au_cache_free_##name(struct au_##name *p) \
1facf9fc 18981+{ kmem_cache_free(au_cachep[AuCache_##index], p); }
18982+
18983+AuCacheFuncs(dinfo, DINFO);
18984+AuCacheFuncs(icntnr, ICNTNR);
18985+AuCacheFuncs(finfo, FINFO);
18986+AuCacheFuncs(vdir, VDIR);
4a4d8108
AM
18987+AuCacheFuncs(vdir_dehstr, DEHSTR);
18988+#ifdef CONFIG_AUFS_HNOTIFY
18989+AuCacheFuncs(hnotify, HNOTIFY);
18990+#endif
1facf9fc 18991+
4a4d8108
AM
18992+#endif /* __KERNEL__ */
18993+#endif /* __AUFS_MODULE_H__ */
7f207e10
AM
18994diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
18995--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
53392da6 18996+++ linux/fs/aufs/opts.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 18997@@ -0,0 +1,1595 @@
1facf9fc 18998+/*
027c5e7a 18999+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 19000+ *
19001+ * This program, aufs is free software; you can redistribute it and/or modify
19002+ * it under the terms of the GNU General Public License as published by
19003+ * the Free Software Foundation; either version 2 of the License, or
19004+ * (at your option) any later version.
dece6358
AM
19005+ *
19006+ * This program is distributed in the hope that it will be useful,
19007+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19008+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19009+ * GNU General Public License for more details.
19010+ *
19011+ * You should have received a copy of the GNU General Public License
19012+ * along with this program; if not, write to the Free Software
19013+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 19014+ */
19015+
19016+/*
19017+ * mount options/flags
19018+ */
19019+
dece6358 19020+#include <linux/file.h>
e49829fe 19021+#include <linux/jiffies.h>
dece6358 19022+#include <linux/namei.h>
1facf9fc 19023+#include <linux/types.h> /* a distribution requires */
19024+#include <linux/parser.h>
19025+#include "aufs.h"
19026+
19027+/* ---------------------------------------------------------------------- */
19028+
19029+enum {
19030+ Opt_br,
19031+ Opt_add, Opt_del, Opt_mod, Opt_reorder, Opt_append, Opt_prepend,
19032+ Opt_idel, Opt_imod, Opt_ireorder,
19033+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash, Opt_rendir,
dece6358 19034+ Opt_rdblk_def, Opt_rdhash_def,
1facf9fc 19035+ Opt_xino, Opt_zxino, Opt_noxino,
19036+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
19037+ Opt_trunc_xino_path, Opt_itrunc_xino,
19038+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 19039+ Opt_shwh, Opt_noshwh,
1facf9fc 19040+ Opt_plink, Opt_noplink, Opt_list_plink,
19041+ Opt_udba,
4a4d8108 19042+ Opt_dio, Opt_nodio,
1facf9fc 19043+ /* Opt_lock, Opt_unlock, */
19044+ Opt_cmd, Opt_cmd_args,
19045+ Opt_diropq_a, Opt_diropq_w,
19046+ Opt_warn_perm, Opt_nowarn_perm,
19047+ Opt_wbr_copyup, Opt_wbr_create,
19048+ Opt_refrof, Opt_norefrof,
19049+ Opt_verbose, Opt_noverbose,
19050+ Opt_sum, Opt_nosum, Opt_wsum,
19051+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
19052+};
19053+
19054+static match_table_t options = {
19055+ {Opt_br, "br=%s"},
19056+ {Opt_br, "br:%s"},
19057+
19058+ {Opt_add, "add=%d:%s"},
19059+ {Opt_add, "add:%d:%s"},
19060+ {Opt_add, "ins=%d:%s"},
19061+ {Opt_add, "ins:%d:%s"},
19062+ {Opt_append, "append=%s"},
19063+ {Opt_append, "append:%s"},
19064+ {Opt_prepend, "prepend=%s"},
19065+ {Opt_prepend, "prepend:%s"},
19066+
19067+ {Opt_del, "del=%s"},
19068+ {Opt_del, "del:%s"},
19069+ /* {Opt_idel, "idel:%d"}, */
19070+ {Opt_mod, "mod=%s"},
19071+ {Opt_mod, "mod:%s"},
19072+ /* {Opt_imod, "imod:%d:%s"}, */
19073+
19074+ {Opt_dirwh, "dirwh=%d"},
19075+
19076+ {Opt_xino, "xino=%s"},
19077+ {Opt_noxino, "noxino"},
19078+ {Opt_trunc_xino, "trunc_xino"},
19079+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
19080+ {Opt_notrunc_xino, "notrunc_xino"},
19081+ {Opt_trunc_xino_path, "trunc_xino=%s"},
19082+ {Opt_itrunc_xino, "itrunc_xino=%d"},
19083+ /* {Opt_zxino, "zxino=%s"}, */
19084+ {Opt_trunc_xib, "trunc_xib"},
19085+ {Opt_notrunc_xib, "notrunc_xib"},
19086+
e49829fe 19087+#ifdef CONFIG_PROC_FS
1facf9fc 19088+ {Opt_plink, "plink"},
e49829fe
JR
19089+#else
19090+ {Opt_ignore_silent, "plink"},
19091+#endif
19092+
1facf9fc 19093+ {Opt_noplink, "noplink"},
e49829fe 19094+
1facf9fc 19095+#ifdef CONFIG_AUFS_DEBUG
19096+ {Opt_list_plink, "list_plink"},
19097+#endif
19098+
19099+ {Opt_udba, "udba=%s"},
19100+
4a4d8108
AM
19101+ {Opt_dio, "dio"},
19102+ {Opt_nodio, "nodio"},
19103+
1facf9fc 19104+ {Opt_diropq_a, "diropq=always"},
19105+ {Opt_diropq_a, "diropq=a"},
19106+ {Opt_diropq_w, "diropq=whiteouted"},
19107+ {Opt_diropq_w, "diropq=w"},
19108+
19109+ {Opt_warn_perm, "warn_perm"},
19110+ {Opt_nowarn_perm, "nowarn_perm"},
19111+
19112+ /* keep them temporary */
19113+ {Opt_ignore_silent, "coo=%s"},
19114+ {Opt_ignore_silent, "nodlgt"},
19115+ {Opt_ignore_silent, "nodirperm1"},
1facf9fc 19116+ {Opt_ignore_silent, "clean_plink"},
19117+
dece6358
AM
19118+#ifdef CONFIG_AUFS_SHWH
19119+ {Opt_shwh, "shwh"},
19120+#endif
19121+ {Opt_noshwh, "noshwh"},
19122+
1facf9fc 19123+ {Opt_rendir, "rendir=%d"},
19124+
19125+ {Opt_refrof, "refrof"},
19126+ {Opt_norefrof, "norefrof"},
19127+
19128+ {Opt_verbose, "verbose"},
19129+ {Opt_verbose, "v"},
19130+ {Opt_noverbose, "noverbose"},
19131+ {Opt_noverbose, "quiet"},
19132+ {Opt_noverbose, "q"},
19133+ {Opt_noverbose, "silent"},
19134+
19135+ {Opt_sum, "sum"},
19136+ {Opt_nosum, "nosum"},
19137+ {Opt_wsum, "wsum"},
19138+
19139+ {Opt_rdcache, "rdcache=%d"},
19140+ {Opt_rdblk, "rdblk=%d"},
dece6358 19141+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 19142+ {Opt_rdhash, "rdhash=%d"},
dece6358 19143+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 19144+
19145+ {Opt_wbr_create, "create=%s"},
19146+ {Opt_wbr_create, "create_policy=%s"},
19147+ {Opt_wbr_copyup, "cpup=%s"},
19148+ {Opt_wbr_copyup, "copyup=%s"},
19149+ {Opt_wbr_copyup, "copyup_policy=%s"},
19150+
19151+ /* internal use for the scripts */
19152+ {Opt_ignore_silent, "si=%s"},
19153+
19154+ {Opt_br, "dirs=%s"},
19155+ {Opt_ignore, "debug=%d"},
19156+ {Opt_ignore, "delete=whiteout"},
19157+ {Opt_ignore, "delete=all"},
19158+ {Opt_ignore, "imap=%s"},
19159+
1308ab2a 19160+ /* temporary workaround, due to old mount(8)? */
19161+ {Opt_ignore_silent, "relatime"},
19162+
1facf9fc 19163+ {Opt_err, NULL}
19164+};
19165+
19166+/* ---------------------------------------------------------------------- */
19167+
19168+static const char *au_parser_pattern(int val, struct match_token *token)
19169+{
19170+ while (token->pattern) {
19171+ if (token->token == val)
19172+ return token->pattern;
19173+ token++;
19174+ }
19175+ BUG();
19176+ return "??";
19177+}
19178+
19179+/* ---------------------------------------------------------------------- */
19180+
19181+static match_table_t brperms = {
19182+ {AuBrPerm_RO, AUFS_BRPERM_RO},
19183+ {AuBrPerm_RR, AUFS_BRPERM_RR},
19184+ {AuBrPerm_RW, AUFS_BRPERM_RW},
19185+
19186+ {AuBrPerm_ROWH, AUFS_BRPERM_ROWH},
19187+ {AuBrPerm_RRWH, AUFS_BRPERM_RRWH},
19188+ {AuBrPerm_RWNoLinkWH, AUFS_BRPERM_RWNLWH},
19189+
19190+ {AuBrPerm_ROWH, "nfsro"},
19191+ {AuBrPerm_RO, NULL}
19192+};
19193+
4a4d8108 19194+static int noinline_for_stack br_perm_val(char *perm)
1facf9fc 19195+{
19196+ int val;
19197+ substring_t args[MAX_OPT_ARGS];
19198+
19199+ val = match_token(perm, brperms, args);
19200+ return val;
19201+}
19202+
19203+const char *au_optstr_br_perm(int brperm)
19204+{
19205+ return au_parser_pattern(brperm, (void *)brperms);
19206+}
19207+
19208+/* ---------------------------------------------------------------------- */
19209+
19210+static match_table_t udbalevel = {
19211+ {AuOpt_UDBA_REVAL, "reval"},
19212+ {AuOpt_UDBA_NONE, "none"},
4a4d8108
AM
19213+#ifdef CONFIG_AUFS_HNOTIFY
19214+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
19215+#ifdef CONFIG_AUFS_HFSNOTIFY
19216+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
4a4d8108 19217+#endif
1facf9fc 19218+#endif
19219+ {-1, NULL}
19220+};
19221+
4a4d8108 19222+static int noinline_for_stack udba_val(char *str)
1facf9fc 19223+{
19224+ substring_t args[MAX_OPT_ARGS];
19225+
7f207e10 19226+ return match_token(str, udbalevel, args);
1facf9fc 19227+}
19228+
19229+const char *au_optstr_udba(int udba)
19230+{
19231+ return au_parser_pattern(udba, (void *)udbalevel);
19232+}
19233+
19234+/* ---------------------------------------------------------------------- */
19235+
19236+static match_table_t au_wbr_create_policy = {
19237+ {AuWbrCreate_TDP, "tdp"},
19238+ {AuWbrCreate_TDP, "top-down-parent"},
19239+ {AuWbrCreate_RR, "rr"},
19240+ {AuWbrCreate_RR, "round-robin"},
19241+ {AuWbrCreate_MFS, "mfs"},
19242+ {AuWbrCreate_MFS, "most-free-space"},
19243+ {AuWbrCreate_MFSV, "mfs:%d"},
19244+ {AuWbrCreate_MFSV, "most-free-space:%d"},
19245+
19246+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
19247+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
19248+ {AuWbrCreate_PMFS, "pmfs"},
19249+ {AuWbrCreate_PMFSV, "pmfs:%d"},
19250+
19251+ {-1, NULL}
19252+};
19253+
dece6358
AM
19254+/*
19255+ * cf. linux/lib/parser.c and cmdline.c
19256+ * gave up calling memparse() since it uses simple_strtoull() instead of
19257+ * strict_...().
19258+ */
4a4d8108
AM
19259+static int noinline_for_stack
19260+au_match_ull(substring_t *s, unsigned long long *result)
1facf9fc 19261+{
19262+ int err;
19263+ unsigned int len;
19264+ char a[32];
19265+
19266+ err = -ERANGE;
19267+ len = s->to - s->from;
19268+ if (len + 1 <= sizeof(a)) {
19269+ memcpy(a, s->from, len);
19270+ a[len] = '\0';
19271+ err = strict_strtoull(a, 0, result);
19272+ }
19273+ return err;
19274+}
19275+
19276+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
19277+ struct au_opt_wbr_create *create)
19278+{
19279+ int err;
19280+ unsigned long long ull;
19281+
19282+ err = 0;
19283+ if (!au_match_ull(arg, &ull))
19284+ create->mfsrr_watermark = ull;
19285+ else {
4a4d8108 19286+ pr_err("bad integer in %s\n", str);
1facf9fc 19287+ err = -EINVAL;
19288+ }
19289+
19290+ return err;
19291+}
19292+
19293+static int au_wbr_mfs_sec(substring_t *arg, char *str,
19294+ struct au_opt_wbr_create *create)
19295+{
19296+ int n, err;
19297+
19298+ err = 0;
027c5e7a 19299+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
1facf9fc 19300+ create->mfs_second = n;
19301+ else {
4a4d8108 19302+ pr_err("bad integer in %s\n", str);
1facf9fc 19303+ err = -EINVAL;
19304+ }
19305+
19306+ return err;
19307+}
19308+
4a4d8108
AM
19309+static int noinline_for_stack
19310+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
1facf9fc 19311+{
19312+ int err, e;
19313+ substring_t args[MAX_OPT_ARGS];
19314+
19315+ err = match_token(str, au_wbr_create_policy, args);
19316+ create->wbr_create = err;
19317+ switch (err) {
19318+ case AuWbrCreate_MFSRRV:
19319+ e = au_wbr_mfs_wmark(&args[0], str, create);
19320+ if (!e)
19321+ e = au_wbr_mfs_sec(&args[1], str, create);
19322+ if (unlikely(e))
19323+ err = e;
19324+ break;
19325+ case AuWbrCreate_MFSRR:
19326+ e = au_wbr_mfs_wmark(&args[0], str, create);
19327+ if (unlikely(e)) {
19328+ err = e;
19329+ break;
19330+ }
19331+ /*FALLTHROUGH*/
19332+ case AuWbrCreate_MFS:
19333+ case AuWbrCreate_PMFS:
027c5e7a 19334+ create->mfs_second = AUFS_MFS_DEF_SEC;
1facf9fc 19335+ break;
19336+ case AuWbrCreate_MFSV:
19337+ case AuWbrCreate_PMFSV:
19338+ e = au_wbr_mfs_sec(&args[0], str, create);
19339+ if (unlikely(e))
19340+ err = e;
19341+ break;
19342+ }
19343+
19344+ return err;
19345+}
19346+
19347+const char *au_optstr_wbr_create(int wbr_create)
19348+{
19349+ return au_parser_pattern(wbr_create, (void *)au_wbr_create_policy);
19350+}
19351+
19352+static match_table_t au_wbr_copyup_policy = {
19353+ {AuWbrCopyup_TDP, "tdp"},
19354+ {AuWbrCopyup_TDP, "top-down-parent"},
19355+ {AuWbrCopyup_BUP, "bup"},
19356+ {AuWbrCopyup_BUP, "bottom-up-parent"},
19357+ {AuWbrCopyup_BU, "bu"},
19358+ {AuWbrCopyup_BU, "bottom-up"},
19359+ {-1, NULL}
19360+};
19361+
4a4d8108 19362+static int noinline_for_stack au_wbr_copyup_val(char *str)
1facf9fc 19363+{
19364+ substring_t args[MAX_OPT_ARGS];
19365+
19366+ return match_token(str, au_wbr_copyup_policy, args);
19367+}
19368+
19369+const char *au_optstr_wbr_copyup(int wbr_copyup)
19370+{
19371+ return au_parser_pattern(wbr_copyup, (void *)au_wbr_copyup_policy);
19372+}
19373+
19374+/* ---------------------------------------------------------------------- */
19375+
19376+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
19377+
19378+static void dump_opts(struct au_opts *opts)
19379+{
19380+#ifdef CONFIG_AUFS_DEBUG
19381+ /* reduce stack space */
19382+ union {
19383+ struct au_opt_add *add;
19384+ struct au_opt_del *del;
19385+ struct au_opt_mod *mod;
19386+ struct au_opt_xino *xino;
19387+ struct au_opt_xino_itrunc *xino_itrunc;
19388+ struct au_opt_wbr_create *create;
19389+ } u;
19390+ struct au_opt *opt;
19391+
19392+ opt = opts->opt;
19393+ while (opt->type != Opt_tail) {
19394+ switch (opt->type) {
19395+ case Opt_add:
19396+ u.add = &opt->add;
19397+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
19398+ u.add->bindex, u.add->pathname, u.add->perm,
19399+ u.add->path.dentry);
19400+ break;
19401+ case Opt_del:
19402+ case Opt_idel:
19403+ u.del = &opt->del;
19404+ AuDbg("del {%s, %p}\n",
19405+ u.del->pathname, u.del->h_path.dentry);
19406+ break;
19407+ case Opt_mod:
19408+ case Opt_imod:
19409+ u.mod = &opt->mod;
19410+ AuDbg("mod {%s, 0x%x, %p}\n",
19411+ u.mod->path, u.mod->perm, u.mod->h_root);
19412+ break;
19413+ case Opt_append:
19414+ u.add = &opt->add;
19415+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
19416+ u.add->bindex, u.add->pathname, u.add->perm,
19417+ u.add->path.dentry);
19418+ break;
19419+ case Opt_prepend:
19420+ u.add = &opt->add;
19421+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
19422+ u.add->bindex, u.add->pathname, u.add->perm,
19423+ u.add->path.dentry);
19424+ break;
19425+ case Opt_dirwh:
19426+ AuDbg("dirwh %d\n", opt->dirwh);
19427+ break;
19428+ case Opt_rdcache:
19429+ AuDbg("rdcache %d\n", opt->rdcache);
19430+ break;
19431+ case Opt_rdblk:
19432+ AuDbg("rdblk %u\n", opt->rdblk);
19433+ break;
dece6358
AM
19434+ case Opt_rdblk_def:
19435+ AuDbg("rdblk_def\n");
19436+ break;
1facf9fc 19437+ case Opt_rdhash:
19438+ AuDbg("rdhash %u\n", opt->rdhash);
19439+ break;
dece6358
AM
19440+ case Opt_rdhash_def:
19441+ AuDbg("rdhash_def\n");
19442+ break;
1facf9fc 19443+ case Opt_xino:
19444+ u.xino = &opt->xino;
19445+ AuDbg("xino {%s %.*s}\n",
19446+ u.xino->path,
19447+ AuDLNPair(u.xino->file->f_dentry));
19448+ break;
19449+ case Opt_trunc_xino:
19450+ AuLabel(trunc_xino);
19451+ break;
19452+ case Opt_notrunc_xino:
19453+ AuLabel(notrunc_xino);
19454+ break;
19455+ case Opt_trunc_xino_path:
19456+ case Opt_itrunc_xino:
19457+ u.xino_itrunc = &opt->xino_itrunc;
19458+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
19459+ break;
19460+
19461+ case Opt_noxino:
19462+ AuLabel(noxino);
19463+ break;
19464+ case Opt_trunc_xib:
19465+ AuLabel(trunc_xib);
19466+ break;
19467+ case Opt_notrunc_xib:
19468+ AuLabel(notrunc_xib);
19469+ break;
dece6358
AM
19470+ case Opt_shwh:
19471+ AuLabel(shwh);
19472+ break;
19473+ case Opt_noshwh:
19474+ AuLabel(noshwh);
19475+ break;
1facf9fc 19476+ case Opt_plink:
19477+ AuLabel(plink);
19478+ break;
19479+ case Opt_noplink:
19480+ AuLabel(noplink);
19481+ break;
19482+ case Opt_list_plink:
19483+ AuLabel(list_plink);
19484+ break;
19485+ case Opt_udba:
19486+ AuDbg("udba %d, %s\n",
19487+ opt->udba, au_optstr_udba(opt->udba));
19488+ break;
4a4d8108
AM
19489+ case Opt_dio:
19490+ AuLabel(dio);
19491+ break;
19492+ case Opt_nodio:
19493+ AuLabel(nodio);
19494+ break;
1facf9fc 19495+ case Opt_diropq_a:
19496+ AuLabel(diropq_a);
19497+ break;
19498+ case Opt_diropq_w:
19499+ AuLabel(diropq_w);
19500+ break;
19501+ case Opt_warn_perm:
19502+ AuLabel(warn_perm);
19503+ break;
19504+ case Opt_nowarn_perm:
19505+ AuLabel(nowarn_perm);
19506+ break;
19507+ case Opt_refrof:
19508+ AuLabel(refrof);
19509+ break;
19510+ case Opt_norefrof:
19511+ AuLabel(norefrof);
19512+ break;
19513+ case Opt_verbose:
19514+ AuLabel(verbose);
19515+ break;
19516+ case Opt_noverbose:
19517+ AuLabel(noverbose);
19518+ break;
19519+ case Opt_sum:
19520+ AuLabel(sum);
19521+ break;
19522+ case Opt_nosum:
19523+ AuLabel(nosum);
19524+ break;
19525+ case Opt_wsum:
19526+ AuLabel(wsum);
19527+ break;
19528+ case Opt_wbr_create:
19529+ u.create = &opt->wbr_create;
19530+ AuDbg("create %d, %s\n", u.create->wbr_create,
19531+ au_optstr_wbr_create(u.create->wbr_create));
19532+ switch (u.create->wbr_create) {
19533+ case AuWbrCreate_MFSV:
19534+ case AuWbrCreate_PMFSV:
19535+ AuDbg("%d sec\n", u.create->mfs_second);
19536+ break;
19537+ case AuWbrCreate_MFSRR:
19538+ AuDbg("%llu watermark\n",
19539+ u.create->mfsrr_watermark);
19540+ break;
19541+ case AuWbrCreate_MFSRRV:
19542+ AuDbg("%llu watermark, %d sec\n",
19543+ u.create->mfsrr_watermark,
19544+ u.create->mfs_second);
19545+ break;
19546+ }
19547+ break;
19548+ case Opt_wbr_copyup:
19549+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
19550+ au_optstr_wbr_copyup(opt->wbr_copyup));
19551+ break;
19552+ default:
19553+ BUG();
19554+ }
19555+ opt++;
19556+ }
19557+#endif
19558+}
19559+
19560+void au_opts_free(struct au_opts *opts)
19561+{
19562+ struct au_opt *opt;
19563+
19564+ opt = opts->opt;
19565+ while (opt->type != Opt_tail) {
19566+ switch (opt->type) {
19567+ case Opt_add:
19568+ case Opt_append:
19569+ case Opt_prepend:
19570+ path_put(&opt->add.path);
19571+ break;
19572+ case Opt_del:
19573+ case Opt_idel:
19574+ path_put(&opt->del.h_path);
19575+ break;
19576+ case Opt_mod:
19577+ case Opt_imod:
19578+ dput(opt->mod.h_root);
19579+ break;
19580+ case Opt_xino:
19581+ fput(opt->xino.file);
19582+ break;
19583+ }
19584+ opt++;
19585+ }
19586+}
19587+
19588+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
19589+ aufs_bindex_t bindex)
19590+{
19591+ int err;
19592+ struct au_opt_add *add = &opt->add;
19593+ char *p;
19594+
19595+ add->bindex = bindex;
19596+ add->perm = AuBrPerm_Last;
19597+ add->pathname = opt_str;
19598+ p = strchr(opt_str, '=');
19599+ if (p) {
19600+ *p++ = 0;
19601+ if (*p)
19602+ add->perm = br_perm_val(p);
19603+ }
19604+
19605+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
19606+ if (!err) {
19607+ if (!p) {
19608+ add->perm = AuBrPerm_RO;
19609+ if (au_test_fs_rr(add->path.dentry->d_sb))
19610+ add->perm = AuBrPerm_RR;
19611+ else if (!bindex && !(sb_flags & MS_RDONLY))
19612+ add->perm = AuBrPerm_RW;
19613+ }
19614+ opt->type = Opt_add;
19615+ goto out;
19616+ }
4a4d8108 19617+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
1facf9fc 19618+ err = -EINVAL;
19619+
4f0767ce 19620+out:
1facf9fc 19621+ return err;
19622+}
19623+
19624+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
19625+{
19626+ int err;
19627+
19628+ del->pathname = args[0].from;
19629+ AuDbg("del path %s\n", del->pathname);
19630+
19631+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
19632+ if (unlikely(err))
4a4d8108 19633+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
1facf9fc 19634+
19635+ return err;
19636+}
19637+
19638+#if 0 /* reserved for future use */
19639+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
19640+ struct au_opt_del *del, substring_t args[])
19641+{
19642+ int err;
19643+ struct dentry *root;
19644+
19645+ err = -EINVAL;
19646+ root = sb->s_root;
19647+ aufs_read_lock(root, AuLock_FLUSH);
19648+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 19649+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 19650+ goto out;
19651+ }
19652+
19653+ err = 0;
19654+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
19655+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
19656+
4f0767ce 19657+out:
1facf9fc 19658+ aufs_read_unlock(root, !AuLock_IR);
19659+ return err;
19660+}
19661+#endif
19662+
4a4d8108
AM
19663+static int noinline_for_stack
19664+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
1facf9fc 19665+{
19666+ int err;
19667+ struct path path;
19668+ char *p;
19669+
19670+ err = -EINVAL;
19671+ mod->path = args[0].from;
19672+ p = strchr(mod->path, '=');
19673+ if (unlikely(!p)) {
4a4d8108 19674+ pr_err("no permssion %s\n", args[0].from);
1facf9fc 19675+ goto out;
19676+ }
19677+
19678+ *p++ = 0;
19679+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
19680+ if (unlikely(err)) {
4a4d8108 19681+ pr_err("lookup failed %s (%d)\n", mod->path, err);
1facf9fc 19682+ goto out;
19683+ }
19684+
19685+ mod->perm = br_perm_val(p);
19686+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
19687+ mod->h_root = dget(path.dentry);
19688+ path_put(&path);
19689+
4f0767ce 19690+out:
1facf9fc 19691+ return err;
19692+}
19693+
19694+#if 0 /* reserved for future use */
19695+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
19696+ struct au_opt_mod *mod, substring_t args[])
19697+{
19698+ int err;
19699+ struct dentry *root;
19700+
19701+ err = -EINVAL;
19702+ root = sb->s_root;
19703+ aufs_read_lock(root, AuLock_FLUSH);
19704+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 19705+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 19706+ goto out;
19707+ }
19708+
19709+ err = 0;
19710+ mod->perm = br_perm_val(args[1].from);
19711+ AuDbg("mod path %s, perm 0x%x, %s\n",
19712+ mod->path, mod->perm, args[1].from);
19713+ mod->h_root = dget(au_h_dptr(root, bindex));
19714+
4f0767ce 19715+out:
1facf9fc 19716+ aufs_read_unlock(root, !AuLock_IR);
19717+ return err;
19718+}
19719+#endif
19720+
19721+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
19722+ substring_t args[])
19723+{
19724+ int err;
19725+ struct file *file;
19726+
19727+ file = au_xino_create(sb, args[0].from, /*silent*/0);
19728+ err = PTR_ERR(file);
19729+ if (IS_ERR(file))
19730+ goto out;
19731+
19732+ err = -EINVAL;
19733+ if (unlikely(file->f_dentry->d_sb == sb)) {
19734+ fput(file);
4a4d8108 19735+ pr_err("%s must be outside\n", args[0].from);
1facf9fc 19736+ goto out;
19737+ }
19738+
19739+ err = 0;
19740+ xino->file = file;
19741+ xino->path = args[0].from;
19742+
4f0767ce 19743+out:
1facf9fc 19744+ return err;
19745+}
19746+
4a4d8108
AM
19747+static int noinline_for_stack
19748+au_opts_parse_xino_itrunc_path(struct super_block *sb,
19749+ struct au_opt_xino_itrunc *xino_itrunc,
19750+ substring_t args[])
1facf9fc 19751+{
19752+ int err;
19753+ aufs_bindex_t bend, bindex;
19754+ struct path path;
19755+ struct dentry *root;
19756+
19757+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
19758+ if (unlikely(err)) {
4a4d8108 19759+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
1facf9fc 19760+ goto out;
19761+ }
19762+
19763+ xino_itrunc->bindex = -1;
19764+ root = sb->s_root;
19765+ aufs_read_lock(root, AuLock_FLUSH);
19766+ bend = au_sbend(sb);
19767+ for (bindex = 0; bindex <= bend; bindex++) {
19768+ if (au_h_dptr(root, bindex) == path.dentry) {
19769+ xino_itrunc->bindex = bindex;
19770+ break;
19771+ }
19772+ }
19773+ aufs_read_unlock(root, !AuLock_IR);
19774+ path_put(&path);
19775+
19776+ if (unlikely(xino_itrunc->bindex < 0)) {
4a4d8108 19777+ pr_err("no such branch %s\n", args[0].from);
1facf9fc 19778+ err = -EINVAL;
19779+ }
19780+
4f0767ce 19781+out:
1facf9fc 19782+ return err;
19783+}
19784+
19785+/* called without aufs lock */
19786+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
19787+{
19788+ int err, n, token;
19789+ aufs_bindex_t bindex;
19790+ unsigned char skipped;
19791+ struct dentry *root;
19792+ struct au_opt *opt, *opt_tail;
19793+ char *opt_str;
19794+ /* reduce the stack space */
19795+ union {
19796+ struct au_opt_xino_itrunc *xino_itrunc;
19797+ struct au_opt_wbr_create *create;
19798+ } u;
19799+ struct {
19800+ substring_t args[MAX_OPT_ARGS];
19801+ } *a;
19802+
19803+ err = -ENOMEM;
19804+ a = kmalloc(sizeof(*a), GFP_NOFS);
19805+ if (unlikely(!a))
19806+ goto out;
19807+
19808+ root = sb->s_root;
19809+ err = 0;
19810+ bindex = 0;
19811+ opt = opts->opt;
19812+ opt_tail = opt + opts->max_opt - 1;
19813+ opt->type = Opt_tail;
19814+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
19815+ err = -EINVAL;
19816+ skipped = 0;
19817+ token = match_token(opt_str, options, a->args);
19818+ switch (token) {
19819+ case Opt_br:
19820+ err = 0;
19821+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
19822+ && *opt_str) {
19823+ err = opt_add(opt, opt_str, opts->sb_flags,
19824+ bindex++);
19825+ if (unlikely(!err && ++opt > opt_tail)) {
19826+ err = -E2BIG;
19827+ break;
19828+ }
19829+ opt->type = Opt_tail;
19830+ skipped = 1;
19831+ }
19832+ break;
19833+ case Opt_add:
19834+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 19835+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19836+ break;
19837+ }
19838+ bindex = n;
19839+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
19840+ bindex);
19841+ if (!err)
19842+ opt->type = token;
19843+ break;
19844+ case Opt_append:
19845+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
19846+ /*dummy bindex*/1);
19847+ if (!err)
19848+ opt->type = token;
19849+ break;
19850+ case Opt_prepend:
19851+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
19852+ /*bindex*/0);
19853+ if (!err)
19854+ opt->type = token;
19855+ break;
19856+ case Opt_del:
19857+ err = au_opts_parse_del(&opt->del, a->args);
19858+ if (!err)
19859+ opt->type = token;
19860+ break;
19861+#if 0 /* reserved for future use */
19862+ case Opt_idel:
19863+ del->pathname = "(indexed)";
19864+ if (unlikely(match_int(&args[0], &n))) {
4a4d8108 19865+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19866+ break;
19867+ }
19868+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
19869+ if (!err)
19870+ opt->type = token;
19871+ break;
19872+#endif
19873+ case Opt_mod:
19874+ err = au_opts_parse_mod(&opt->mod, a->args);
19875+ if (!err)
19876+ opt->type = token;
19877+ break;
19878+#ifdef IMOD /* reserved for future use */
19879+ case Opt_imod:
19880+ u.mod->path = "(indexed)";
19881+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 19882+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19883+ break;
19884+ }
19885+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
19886+ if (!err)
19887+ opt->type = token;
19888+ break;
19889+#endif
19890+ case Opt_xino:
19891+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
19892+ if (!err)
19893+ opt->type = token;
19894+ break;
19895+
19896+ case Opt_trunc_xino_path:
19897+ err = au_opts_parse_xino_itrunc_path
19898+ (sb, &opt->xino_itrunc, a->args);
19899+ if (!err)
19900+ opt->type = token;
19901+ break;
19902+
19903+ case Opt_itrunc_xino:
19904+ u.xino_itrunc = &opt->xino_itrunc;
19905+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 19906+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19907+ break;
19908+ }
19909+ u.xino_itrunc->bindex = n;
19910+ aufs_read_lock(root, AuLock_FLUSH);
19911+ if (n < 0 || au_sbend(sb) < n) {
4a4d8108 19912+ pr_err("out of bounds, %d\n", n);
1facf9fc 19913+ aufs_read_unlock(root, !AuLock_IR);
19914+ break;
19915+ }
19916+ aufs_read_unlock(root, !AuLock_IR);
19917+ err = 0;
19918+ opt->type = token;
19919+ break;
19920+
19921+ case Opt_dirwh:
19922+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
19923+ break;
19924+ err = 0;
19925+ opt->type = token;
19926+ break;
19927+
19928+ case Opt_rdcache:
027c5e7a
AM
19929+ if (unlikely(match_int(&a->args[0], &n))) {
19930+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19931+ break;
027c5e7a
AM
19932+ }
19933+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
19934+ pr_err("rdcache must be smaller than %d\n",
19935+ AUFS_RDCACHE_MAX);
19936+ break;
19937+ }
19938+ opt->rdcache = n;
1facf9fc 19939+ err = 0;
19940+ opt->type = token;
19941+ break;
19942+ case Opt_rdblk:
19943+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 19944+ || n < 0
1facf9fc 19945+ || n > KMALLOC_MAX_SIZE)) {
4a4d8108 19946+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19947+ break;
19948+ }
1308ab2a 19949+ if (unlikely(n && n < NAME_MAX)) {
4a4d8108
AM
19950+ pr_err("rdblk must be larger than %d\n",
19951+ NAME_MAX);
1facf9fc 19952+ break;
19953+ }
19954+ opt->rdblk = n;
19955+ err = 0;
19956+ opt->type = token;
19957+ break;
19958+ case Opt_rdhash:
19959+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 19960+ || n < 0
1facf9fc 19961+ || n * sizeof(struct hlist_head)
19962+ > KMALLOC_MAX_SIZE)) {
4a4d8108 19963+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 19964+ break;
19965+ }
19966+ opt->rdhash = n;
19967+ err = 0;
19968+ opt->type = token;
19969+ break;
19970+
19971+ case Opt_trunc_xino:
19972+ case Opt_notrunc_xino:
19973+ case Opt_noxino:
19974+ case Opt_trunc_xib:
19975+ case Opt_notrunc_xib:
dece6358
AM
19976+ case Opt_shwh:
19977+ case Opt_noshwh:
1facf9fc 19978+ case Opt_plink:
19979+ case Opt_noplink:
19980+ case Opt_list_plink:
4a4d8108
AM
19981+ case Opt_dio:
19982+ case Opt_nodio:
1facf9fc 19983+ case Opt_diropq_a:
19984+ case Opt_diropq_w:
19985+ case Opt_warn_perm:
19986+ case Opt_nowarn_perm:
19987+ case Opt_refrof:
19988+ case Opt_norefrof:
19989+ case Opt_verbose:
19990+ case Opt_noverbose:
19991+ case Opt_sum:
19992+ case Opt_nosum:
19993+ case Opt_wsum:
dece6358
AM
19994+ case Opt_rdblk_def:
19995+ case Opt_rdhash_def:
1facf9fc 19996+ err = 0;
19997+ opt->type = token;
19998+ break;
19999+
20000+ case Opt_udba:
20001+ opt->udba = udba_val(a->args[0].from);
20002+ if (opt->udba >= 0) {
20003+ err = 0;
20004+ opt->type = token;
20005+ } else
4a4d8108 20006+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20007+ break;
20008+
20009+ case Opt_wbr_create:
20010+ u.create = &opt->wbr_create;
20011+ u.create->wbr_create
20012+ = au_wbr_create_val(a->args[0].from, u.create);
20013+ if (u.create->wbr_create >= 0) {
20014+ err = 0;
20015+ opt->type = token;
20016+ } else
4a4d8108 20017+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20018+ break;
20019+ case Opt_wbr_copyup:
20020+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
20021+ if (opt->wbr_copyup >= 0) {
20022+ err = 0;
20023+ opt->type = token;
20024+ } else
4a4d8108 20025+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 20026+ break;
20027+
20028+ case Opt_ignore:
4a4d8108 20029+ pr_warning("ignored %s\n", opt_str);
1facf9fc 20030+ /*FALLTHROUGH*/
20031+ case Opt_ignore_silent:
20032+ skipped = 1;
20033+ err = 0;
20034+ break;
20035+ case Opt_err:
4a4d8108 20036+ pr_err("unknown option %s\n", opt_str);
1facf9fc 20037+ break;
20038+ }
20039+
20040+ if (!err && !skipped) {
20041+ if (unlikely(++opt > opt_tail)) {
20042+ err = -E2BIG;
20043+ opt--;
20044+ opt->type = Opt_tail;
20045+ break;
20046+ }
20047+ opt->type = Opt_tail;
20048+ }
20049+ }
20050+
20051+ kfree(a);
20052+ dump_opts(opts);
20053+ if (unlikely(err))
20054+ au_opts_free(opts);
20055+
4f0767ce 20056+out:
1facf9fc 20057+ return err;
20058+}
20059+
20060+static int au_opt_wbr_create(struct super_block *sb,
20061+ struct au_opt_wbr_create *create)
20062+{
20063+ int err;
20064+ struct au_sbinfo *sbinfo;
20065+
dece6358
AM
20066+ SiMustWriteLock(sb);
20067+
1facf9fc 20068+ err = 1; /* handled */
20069+ sbinfo = au_sbi(sb);
20070+ if (sbinfo->si_wbr_create_ops->fin) {
20071+ err = sbinfo->si_wbr_create_ops->fin(sb);
20072+ if (!err)
20073+ err = 1;
20074+ }
20075+
20076+ sbinfo->si_wbr_create = create->wbr_create;
20077+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
20078+ switch (create->wbr_create) {
20079+ case AuWbrCreate_MFSRRV:
20080+ case AuWbrCreate_MFSRR:
20081+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
20082+ /*FALLTHROUGH*/
20083+ case AuWbrCreate_MFS:
20084+ case AuWbrCreate_MFSV:
20085+ case AuWbrCreate_PMFS:
20086+ case AuWbrCreate_PMFSV:
e49829fe
JR
20087+ sbinfo->si_wbr_mfs.mfs_expire
20088+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
1facf9fc 20089+ break;
20090+ }
20091+
20092+ if (sbinfo->si_wbr_create_ops->init)
20093+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
20094+
20095+ return err;
20096+}
20097+
20098+/*
20099+ * returns,
20100+ * plus: processed without an error
20101+ * zero: unprocessed
20102+ */
20103+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
20104+ struct au_opts *opts)
20105+{
20106+ int err;
20107+ struct au_sbinfo *sbinfo;
20108+
dece6358
AM
20109+ SiMustWriteLock(sb);
20110+
1facf9fc 20111+ err = 1; /* handled */
20112+ sbinfo = au_sbi(sb);
20113+ switch (opt->type) {
20114+ case Opt_udba:
20115+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
20116+ sbinfo->si_mntflags |= opt->udba;
20117+ opts->given_udba |= opt->udba;
20118+ break;
20119+
20120+ case Opt_plink:
20121+ au_opt_set(sbinfo->si_mntflags, PLINK);
20122+ break;
20123+ case Opt_noplink:
20124+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
e49829fe 20125+ au_plink_put(sb, /*verbose*/1);
1facf9fc 20126+ au_opt_clr(sbinfo->si_mntflags, PLINK);
20127+ break;
20128+ case Opt_list_plink:
20129+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
20130+ au_plink_list(sb);
20131+ break;
20132+
4a4d8108
AM
20133+ case Opt_dio:
20134+ au_opt_set(sbinfo->si_mntflags, DIO);
20135+ au_fset_opts(opts->flags, REFRESH_DYAOP);
20136+ break;
20137+ case Opt_nodio:
20138+ au_opt_clr(sbinfo->si_mntflags, DIO);
20139+ au_fset_opts(opts->flags, REFRESH_DYAOP);
20140+ break;
20141+
1facf9fc 20142+ case Opt_diropq_a:
20143+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
20144+ break;
20145+ case Opt_diropq_w:
20146+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
20147+ break;
20148+
20149+ case Opt_warn_perm:
20150+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
20151+ break;
20152+ case Opt_nowarn_perm:
20153+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
20154+ break;
20155+
20156+ case Opt_refrof:
20157+ au_opt_set(sbinfo->si_mntflags, REFROF);
20158+ break;
20159+ case Opt_norefrof:
20160+ au_opt_clr(sbinfo->si_mntflags, REFROF);
20161+ break;
20162+
20163+ case Opt_verbose:
20164+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
20165+ break;
20166+ case Opt_noverbose:
20167+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
20168+ break;
20169+
20170+ case Opt_sum:
20171+ au_opt_set(sbinfo->si_mntflags, SUM);
20172+ break;
20173+ case Opt_wsum:
20174+ au_opt_clr(sbinfo->si_mntflags, SUM);
20175+ au_opt_set(sbinfo->si_mntflags, SUM_W);
20176+ case Opt_nosum:
20177+ au_opt_clr(sbinfo->si_mntflags, SUM);
20178+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
20179+ break;
20180+
20181+ case Opt_wbr_create:
20182+ err = au_opt_wbr_create(sb, &opt->wbr_create);
20183+ break;
20184+ case Opt_wbr_copyup:
20185+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
20186+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
20187+ break;
20188+
20189+ case Opt_dirwh:
20190+ sbinfo->si_dirwh = opt->dirwh;
20191+ break;
20192+
20193+ case Opt_rdcache:
e49829fe
JR
20194+ sbinfo->si_rdcache
20195+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
1facf9fc 20196+ break;
20197+ case Opt_rdblk:
20198+ sbinfo->si_rdblk = opt->rdblk;
20199+ break;
dece6358
AM
20200+ case Opt_rdblk_def:
20201+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
20202+ break;
1facf9fc 20203+ case Opt_rdhash:
20204+ sbinfo->si_rdhash = opt->rdhash;
20205+ break;
dece6358
AM
20206+ case Opt_rdhash_def:
20207+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
20208+ break;
20209+
20210+ case Opt_shwh:
20211+ au_opt_set(sbinfo->si_mntflags, SHWH);
20212+ break;
20213+ case Opt_noshwh:
20214+ au_opt_clr(sbinfo->si_mntflags, SHWH);
20215+ break;
1facf9fc 20216+
20217+ case Opt_trunc_xino:
20218+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
20219+ break;
20220+ case Opt_notrunc_xino:
20221+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
20222+ break;
20223+
20224+ case Opt_trunc_xino_path:
20225+ case Opt_itrunc_xino:
20226+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
20227+ if (!err)
20228+ err = 1;
20229+ break;
20230+
20231+ case Opt_trunc_xib:
20232+ au_fset_opts(opts->flags, TRUNC_XIB);
20233+ break;
20234+ case Opt_notrunc_xib:
20235+ au_fclr_opts(opts->flags, TRUNC_XIB);
20236+ break;
20237+
20238+ default:
20239+ err = 0;
20240+ break;
20241+ }
20242+
20243+ return err;
20244+}
20245+
20246+/*
20247+ * returns tri-state.
20248+ * plus: processed without an error
20249+ * zero: unprocessed
20250+ * minus: error
20251+ */
20252+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
20253+ struct au_opts *opts)
20254+{
20255+ int err, do_refresh;
20256+
20257+ err = 0;
20258+ switch (opt->type) {
20259+ case Opt_append:
20260+ opt->add.bindex = au_sbend(sb) + 1;
20261+ if (opt->add.bindex < 0)
20262+ opt->add.bindex = 0;
20263+ goto add;
20264+ case Opt_prepend:
20265+ opt->add.bindex = 0;
20266+ add:
20267+ case Opt_add:
20268+ err = au_br_add(sb, &opt->add,
20269+ au_ftest_opts(opts->flags, REMOUNT));
20270+ if (!err) {
20271+ err = 1;
027c5e7a 20272+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20273+ }
20274+ break;
20275+
20276+ case Opt_del:
20277+ case Opt_idel:
20278+ err = au_br_del(sb, &opt->del,
20279+ au_ftest_opts(opts->flags, REMOUNT));
20280+ if (!err) {
20281+ err = 1;
20282+ au_fset_opts(opts->flags, TRUNC_XIB);
027c5e7a 20283+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20284+ }
20285+ break;
20286+
20287+ case Opt_mod:
20288+ case Opt_imod:
20289+ err = au_br_mod(sb, &opt->mod,
20290+ au_ftest_opts(opts->flags, REMOUNT),
20291+ &do_refresh);
20292+ if (!err) {
20293+ err = 1;
027c5e7a
AM
20294+ if (do_refresh)
20295+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20296+ }
20297+ break;
20298+ }
20299+
20300+ return err;
20301+}
20302+
20303+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
20304+ struct au_opt_xino **opt_xino,
20305+ struct au_opts *opts)
20306+{
20307+ int err;
20308+ aufs_bindex_t bend, bindex;
20309+ struct dentry *root, *parent, *h_root;
20310+
20311+ err = 0;
20312+ switch (opt->type) {
20313+ case Opt_xino:
20314+ err = au_xino_set(sb, &opt->xino,
20315+ !!au_ftest_opts(opts->flags, REMOUNT));
20316+ if (unlikely(err))
20317+ break;
20318+
20319+ *opt_xino = &opt->xino;
20320+ au_xino_brid_set(sb, -1);
20321+
20322+ /* safe d_parent access */
20323+ parent = opt->xino.file->f_dentry->d_parent;
20324+ root = sb->s_root;
20325+ bend = au_sbend(sb);
20326+ for (bindex = 0; bindex <= bend; bindex++) {
20327+ h_root = au_h_dptr(root, bindex);
20328+ if (h_root == parent) {
20329+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
20330+ break;
20331+ }
20332+ }
20333+ break;
20334+
20335+ case Opt_noxino:
20336+ au_xino_clr(sb);
20337+ au_xino_brid_set(sb, -1);
20338+ *opt_xino = (void *)-1;
20339+ break;
20340+ }
20341+
20342+ return err;
20343+}
20344+
20345+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
20346+ unsigned int pending)
20347+{
20348+ int err;
20349+ aufs_bindex_t bindex, bend;
20350+ unsigned char do_plink, skip, do_free;
20351+ struct au_branch *br;
20352+ struct au_wbr *wbr;
20353+ struct dentry *root;
20354+ struct inode *dir, *h_dir;
20355+ struct au_sbinfo *sbinfo;
20356+ struct au_hinode *hdir;
20357+
dece6358
AM
20358+ SiMustAnyLock(sb);
20359+
1facf9fc 20360+ sbinfo = au_sbi(sb);
20361+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
20362+
dece6358
AM
20363+ if (!(sb_flags & MS_RDONLY)) {
20364+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
4a4d8108 20365+ pr_warning("first branch should be rw\n");
dece6358 20366+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
4a4d8108 20367+ pr_warning("shwh should be used with ro\n");
dece6358 20368+ }
1facf9fc 20369+
4a4d8108 20370+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
1facf9fc 20371+ && !au_opt_test(sbinfo->si_mntflags, XINO))
4a4d8108 20372+ pr_warning("udba=*notify requires xino\n");
1facf9fc 20373+
20374+ err = 0;
20375+ root = sb->s_root;
4a4d8108 20376+ dir = root->d_inode;
1facf9fc 20377+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
20378+ bend = au_sbend(sb);
20379+ for (bindex = 0; !err && bindex <= bend; bindex++) {
20380+ skip = 0;
20381+ h_dir = au_h_iptr(dir, bindex);
20382+ br = au_sbr(sb, bindex);
20383+ do_free = 0;
20384+
20385+ wbr = br->br_wbr;
20386+ if (wbr)
20387+ wbr_wh_read_lock(wbr);
20388+
20389+ switch (br->br_perm) {
20390+ case AuBrPerm_RO:
20391+ case AuBrPerm_ROWH:
20392+ case AuBrPerm_RR:
20393+ case AuBrPerm_RRWH:
20394+ do_free = !!wbr;
20395+ skip = (!wbr
20396+ || (!wbr->wbr_whbase
20397+ && !wbr->wbr_plink
20398+ && !wbr->wbr_orph));
20399+ break;
20400+
20401+ case AuBrPerm_RWNoLinkWH:
20402+ /* skip = (!br->br_whbase && !br->br_orph); */
20403+ skip = (!wbr || !wbr->wbr_whbase);
20404+ if (skip && wbr) {
20405+ if (do_plink)
20406+ skip = !!wbr->wbr_plink;
20407+ else
20408+ skip = !wbr->wbr_plink;
20409+ }
20410+ break;
20411+
20412+ case AuBrPerm_RW:
20413+ /* skip = (br->br_whbase && br->br_ohph); */
20414+ skip = (wbr && wbr->wbr_whbase);
20415+ if (skip) {
20416+ if (do_plink)
20417+ skip = !!wbr->wbr_plink;
20418+ else
20419+ skip = !wbr->wbr_plink;
20420+ }
20421+ break;
20422+
20423+ default:
20424+ BUG();
20425+ }
20426+ if (wbr)
20427+ wbr_wh_read_unlock(wbr);
20428+
20429+ if (skip)
20430+ continue;
20431+
20432+ hdir = au_hi(dir, bindex);
4a4d8108 20433+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 20434+ if (wbr)
20435+ wbr_wh_write_lock(wbr);
20436+ err = au_wh_init(au_h_dptr(root, bindex), br, sb);
20437+ if (wbr)
20438+ wbr_wh_write_unlock(wbr);
4a4d8108 20439+ au_hn_imtx_unlock(hdir);
1facf9fc 20440+
20441+ if (!err && do_free) {
20442+ kfree(wbr);
20443+ br->br_wbr = NULL;
20444+ }
20445+ }
20446+
20447+ return err;
20448+}
20449+
20450+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
20451+{
20452+ int err;
20453+ unsigned int tmp;
027c5e7a 20454+ aufs_bindex_t bindex, bend;
1facf9fc 20455+ struct au_opt *opt;
20456+ struct au_opt_xino *opt_xino, xino;
20457+ struct au_sbinfo *sbinfo;
027c5e7a 20458+ struct au_branch *br;
1facf9fc 20459+
dece6358
AM
20460+ SiMustWriteLock(sb);
20461+
1facf9fc 20462+ err = 0;
20463+ opt_xino = NULL;
20464+ opt = opts->opt;
20465+ while (err >= 0 && opt->type != Opt_tail)
20466+ err = au_opt_simple(sb, opt++, opts);
20467+ if (err > 0)
20468+ err = 0;
20469+ else if (unlikely(err < 0))
20470+ goto out;
20471+
20472+ /* disable xino and udba temporary */
20473+ sbinfo = au_sbi(sb);
20474+ tmp = sbinfo->si_mntflags;
20475+ au_opt_clr(sbinfo->si_mntflags, XINO);
20476+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
20477+
20478+ opt = opts->opt;
20479+ while (err >= 0 && opt->type != Opt_tail)
20480+ err = au_opt_br(sb, opt++, opts);
20481+ if (err > 0)
20482+ err = 0;
20483+ else if (unlikely(err < 0))
20484+ goto out;
20485+
20486+ bend = au_sbend(sb);
20487+ if (unlikely(bend < 0)) {
20488+ err = -EINVAL;
4a4d8108 20489+ pr_err("no branches\n");
1facf9fc 20490+ goto out;
20491+ }
20492+
20493+ if (au_opt_test(tmp, XINO))
20494+ au_opt_set(sbinfo->si_mntflags, XINO);
20495+ opt = opts->opt;
20496+ while (!err && opt->type != Opt_tail)
20497+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
20498+ if (unlikely(err))
20499+ goto out;
20500+
20501+ err = au_opts_verify(sb, sb->s_flags, tmp);
20502+ if (unlikely(err))
20503+ goto out;
20504+
20505+ /* restore xino */
20506+ if (au_opt_test(tmp, XINO) && !opt_xino) {
20507+ xino.file = au_xino_def(sb);
20508+ err = PTR_ERR(xino.file);
20509+ if (IS_ERR(xino.file))
20510+ goto out;
20511+
20512+ err = au_xino_set(sb, &xino, /*remount*/0);
20513+ fput(xino.file);
20514+ if (unlikely(err))
20515+ goto out;
20516+ }
20517+
20518+ /* restore udba */
027c5e7a 20519+ tmp &= AuOptMask_UDBA;
1facf9fc 20520+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
027c5e7a
AM
20521+ sbinfo->si_mntflags |= tmp;
20522+ bend = au_sbend(sb);
20523+ for (bindex = 0; bindex <= bend; bindex++) {
20524+ br = au_sbr(sb, bindex);
20525+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
20526+ if (unlikely(err))
20527+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
20528+ bindex, err);
20529+ /* go on even if err */
20530+ }
4a4d8108 20531+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
1facf9fc 20532+ struct inode *dir = sb->s_root->d_inode;
4a4d8108 20533+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
1facf9fc 20534+ }
20535+
4f0767ce 20536+out:
1facf9fc 20537+ return err;
20538+}
20539+
20540+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
20541+{
20542+ int err, rerr;
20543+ struct inode *dir;
20544+ struct au_opt_xino *opt_xino;
20545+ struct au_opt *opt;
20546+ struct au_sbinfo *sbinfo;
20547+
dece6358
AM
20548+ SiMustWriteLock(sb);
20549+
1facf9fc 20550+ dir = sb->s_root->d_inode;
20551+ sbinfo = au_sbi(sb);
20552+ err = 0;
20553+ opt_xino = NULL;
20554+ opt = opts->opt;
20555+ while (err >= 0 && opt->type != Opt_tail) {
20556+ err = au_opt_simple(sb, opt, opts);
20557+ if (!err)
20558+ err = au_opt_br(sb, opt, opts);
20559+ if (!err)
20560+ err = au_opt_xino(sb, opt, &opt_xino, opts);
20561+ opt++;
20562+ }
20563+ if (err > 0)
20564+ err = 0;
20565+ AuTraceErr(err);
20566+ /* go on even err */
20567+
20568+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
20569+ if (unlikely(rerr && !err))
20570+ err = rerr;
20571+
20572+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
20573+ rerr = au_xib_trunc(sb);
20574+ if (unlikely(rerr && !err))
20575+ err = rerr;
20576+ }
20577+
20578+ /* will be handled by the caller */
027c5e7a 20579+ if (!au_ftest_opts(opts->flags, REFRESH)
1facf9fc 20580+ && (opts->given_udba || au_opt_test(sbinfo->si_mntflags, XINO)))
027c5e7a 20581+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 20582+
20583+ AuDbg("status 0x%x\n", opts->flags);
20584+ return err;
20585+}
20586+
20587+/* ---------------------------------------------------------------------- */
20588+
20589+unsigned int au_opt_udba(struct super_block *sb)
20590+{
20591+ return au_mntflags(sb) & AuOptMask_UDBA;
20592+}
7f207e10
AM
20593diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
20594--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
53392da6 20595+++ linux/fs/aufs/opts.h 2011-08-24 13:30:24.734646739 +0200
027c5e7a 20596@@ -0,0 +1,210 @@
1facf9fc 20597+/*
027c5e7a 20598+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 20599+ *
20600+ * This program, aufs is free software; you can redistribute it and/or modify
20601+ * it under the terms of the GNU General Public License as published by
20602+ * the Free Software Foundation; either version 2 of the License, or
20603+ * (at your option) any later version.
dece6358
AM
20604+ *
20605+ * This program is distributed in the hope that it will be useful,
20606+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20607+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20608+ * GNU General Public License for more details.
20609+ *
20610+ * You should have received a copy of the GNU General Public License
20611+ * along with this program; if not, write to the Free Software
20612+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20613+ */
20614+
20615+/*
20616+ * mount options/flags
20617+ */
20618+
20619+#ifndef __AUFS_OPTS_H__
20620+#define __AUFS_OPTS_H__
20621+
20622+#ifdef __KERNEL__
20623+
dece6358 20624+#include <linux/path.h>
1facf9fc 20625+#include <linux/aufs_type.h>
20626+
dece6358
AM
20627+struct file;
20628+struct super_block;
20629+
1facf9fc 20630+/* ---------------------------------------------------------------------- */
20631+
20632+/* mount flags */
20633+#define AuOpt_XINO 1 /* external inode number bitmap
20634+ and translation table */
20635+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
20636+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
20637+#define AuOpt_UDBA_REVAL (1 << 3)
4a4d8108 20638+#define AuOpt_UDBA_HNOTIFY (1 << 4)
dece6358
AM
20639+#define AuOpt_SHWH (1 << 5) /* show whiteout */
20640+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
20641+#define AuOpt_DIRPERM1 (1 << 7) /* unimplemented */
20642+#define AuOpt_REFROF (1 << 8) /* unimplemented */
20643+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
20644+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
20645+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
20646+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
20647+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
4a4d8108 20648+#define AuOpt_DIO (1 << 14) /* direct io */
1facf9fc 20649+
4a4d8108
AM
20650+#ifndef CONFIG_AUFS_HNOTIFY
20651+#undef AuOpt_UDBA_HNOTIFY
20652+#define AuOpt_UDBA_HNOTIFY 0
1facf9fc 20653+#endif
dece6358
AM
20654+#ifndef CONFIG_AUFS_SHWH
20655+#undef AuOpt_SHWH
20656+#define AuOpt_SHWH 0
20657+#endif
1facf9fc 20658+
20659+#define AuOpt_Def (AuOpt_XINO \
20660+ | AuOpt_UDBA_REVAL \
20661+ | AuOpt_PLINK \
20662+ /* | AuOpt_DIRPERM1 */ \
20663+ | AuOpt_WARN_PERM)
20664+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
20665+ | AuOpt_UDBA_REVAL \
4a4d8108 20666+ | AuOpt_UDBA_HNOTIFY)
1facf9fc 20667+
20668+#define au_opt_test(flags, name) (flags & AuOpt_##name)
20669+#define au_opt_set(flags, name) do { \
20670+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
20671+ ((flags) |= AuOpt_##name); \
20672+} while (0)
20673+#define au_opt_set_udba(flags, name) do { \
20674+ (flags) &= ~AuOptMask_UDBA; \
20675+ ((flags) |= AuOpt_##name); \
20676+} while (0)
7f207e10
AM
20677+#define au_opt_clr(flags, name) do { \
20678+ ((flags) &= ~AuOpt_##name); \
20679+} while (0)
1facf9fc 20680+
e49829fe
JR
20681+static inline unsigned int au_opts_plink(unsigned int mntflags)
20682+{
20683+#ifdef CONFIG_PROC_FS
20684+ return mntflags;
20685+#else
20686+ return mntflags & ~AuOpt_PLINK;
20687+#endif
20688+}
20689+
1facf9fc 20690+/* ---------------------------------------------------------------------- */
20691+
20692+/* policies to select one among multiple writable branches */
20693+enum {
20694+ AuWbrCreate_TDP, /* top down parent */
20695+ AuWbrCreate_RR, /* round robin */
20696+ AuWbrCreate_MFS, /* most free space */
20697+ AuWbrCreate_MFSV, /* mfs with seconds */
20698+ AuWbrCreate_MFSRR, /* mfs then rr */
20699+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
20700+ AuWbrCreate_PMFS, /* parent and mfs */
20701+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
20702+
20703+ AuWbrCreate_Def = AuWbrCreate_TDP
20704+};
20705+
20706+enum {
20707+ AuWbrCopyup_TDP, /* top down parent */
20708+ AuWbrCopyup_BUP, /* bottom up parent */
20709+ AuWbrCopyup_BU, /* bottom up */
20710+
20711+ AuWbrCopyup_Def = AuWbrCopyup_TDP
20712+};
20713+
20714+/* ---------------------------------------------------------------------- */
20715+
20716+struct au_opt_add {
20717+ aufs_bindex_t bindex;
20718+ char *pathname;
20719+ int perm;
20720+ struct path path;
20721+};
20722+
20723+struct au_opt_del {
20724+ char *pathname;
20725+ struct path h_path;
20726+};
20727+
20728+struct au_opt_mod {
20729+ char *path;
20730+ int perm;
20731+ struct dentry *h_root;
20732+};
20733+
20734+struct au_opt_xino {
20735+ char *path;
20736+ struct file *file;
20737+};
20738+
20739+struct au_opt_xino_itrunc {
20740+ aufs_bindex_t bindex;
20741+};
20742+
20743+struct au_opt_wbr_create {
20744+ int wbr_create;
20745+ int mfs_second;
20746+ unsigned long long mfsrr_watermark;
20747+};
20748+
20749+struct au_opt {
20750+ int type;
20751+ union {
20752+ struct au_opt_xino xino;
20753+ struct au_opt_xino_itrunc xino_itrunc;
20754+ struct au_opt_add add;
20755+ struct au_opt_del del;
20756+ struct au_opt_mod mod;
20757+ int dirwh;
20758+ int rdcache;
20759+ unsigned int rdblk;
20760+ unsigned int rdhash;
20761+ int udba;
20762+ struct au_opt_wbr_create wbr_create;
20763+ int wbr_copyup;
20764+ };
20765+};
20766+
20767+/* opts flags */
20768+#define AuOpts_REMOUNT 1
027c5e7a
AM
20769+#define AuOpts_REFRESH (1 << 1)
20770+#define AuOpts_TRUNC_XIB (1 << 2)
20771+#define AuOpts_REFRESH_DYAOP (1 << 3)
1facf9fc 20772+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
7f207e10
AM
20773+#define au_fset_opts(flags, name) \
20774+ do { (flags) |= AuOpts_##name; } while (0)
20775+#define au_fclr_opts(flags, name) \
20776+ do { (flags) &= ~AuOpts_##name; } while (0)
1facf9fc 20777+
20778+struct au_opts {
20779+ struct au_opt *opt;
20780+ int max_opt;
20781+
20782+ unsigned int given_udba;
20783+ unsigned int flags;
20784+ unsigned long sb_flags;
20785+};
20786+
20787+/* ---------------------------------------------------------------------- */
20788+
20789+const char *au_optstr_br_perm(int brperm);
20790+const char *au_optstr_udba(int udba);
20791+const char *au_optstr_wbr_copyup(int wbr_copyup);
20792+const char *au_optstr_wbr_create(int wbr_create);
20793+
20794+void au_opts_free(struct au_opts *opts);
20795+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
20796+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
20797+ unsigned int pending);
20798+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
20799+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
20800+
20801+unsigned int au_opt_udba(struct super_block *sb);
20802+
20803+/* ---------------------------------------------------------------------- */
20804+
20805+#endif /* __KERNEL__ */
20806+#endif /* __AUFS_OPTS_H__ */
7f207e10
AM
20807diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
20808--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
53392da6 20809+++ linux/fs/aufs/plink.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 20810@@ -0,0 +1,515 @@
1facf9fc 20811+/*
027c5e7a 20812+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 20813+ *
20814+ * This program, aufs is free software; you can redistribute it and/or modify
20815+ * it under the terms of the GNU General Public License as published by
20816+ * the Free Software Foundation; either version 2 of the License, or
20817+ * (at your option) any later version.
dece6358
AM
20818+ *
20819+ * This program is distributed in the hope that it will be useful,
20820+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20821+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20822+ * GNU General Public License for more details.
20823+ *
20824+ * You should have received a copy of the GNU General Public License
20825+ * along with this program; if not, write to the Free Software
20826+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20827+ */
20828+
20829+/*
20830+ * pseudo-link
20831+ */
20832+
20833+#include "aufs.h"
20834+
20835+/*
e49829fe 20836+ * the pseudo-link maintenance mode.
1facf9fc 20837+ * during a user process maintains the pseudo-links,
20838+ * prohibit adding a new plink and branch manipulation.
e49829fe
JR
20839+ *
20840+ * Flags
20841+ * NOPLM:
20842+ * For entry functions which will handle plink, and i_mutex is already held
20843+ * in VFS.
20844+ * They cannot wait and should return an error at once.
20845+ * Callers has to check the error.
20846+ * NOPLMW:
20847+ * For entry functions which will handle plink, but i_mutex is not held
20848+ * in VFS.
20849+ * They can wait the plink maintenance mode to finish.
20850+ *
20851+ * They behave like F_SETLK and F_SETLKW.
20852+ * If the caller never handle plink, then both flags are unnecessary.
1facf9fc 20853+ */
e49829fe
JR
20854+
20855+int au_plink_maint(struct super_block *sb, int flags)
1facf9fc 20856+{
e49829fe
JR
20857+ int err;
20858+ pid_t pid, ppid;
20859+ struct au_sbinfo *sbi;
dece6358
AM
20860+
20861+ SiMustAnyLock(sb);
20862+
e49829fe
JR
20863+ err = 0;
20864+ if (!au_opt_test(au_mntflags(sb), PLINK))
20865+ goto out;
20866+
20867+ sbi = au_sbi(sb);
20868+ pid = sbi->si_plink_maint_pid;
20869+ if (!pid || pid == current->pid)
20870+ goto out;
20871+
20872+ /* todo: it highly depends upon /sbin/mount.aufs */
20873+ rcu_read_lock();
20874+ ppid = task_pid_vnr(rcu_dereference(current->real_parent));
20875+ rcu_read_unlock();
20876+ if (pid == ppid)
20877+ goto out;
20878+
20879+ if (au_ftest_lock(flags, NOPLMW)) {
027c5e7a
AM
20880+ /* if there is no i_mutex lock in VFS, we don't need to wait */
20881+ /* AuDebugOn(!lockdep_depth(current)); */
e49829fe
JR
20882+ while (sbi->si_plink_maint_pid) {
20883+ si_read_unlock(sb);
20884+ /* gave up wake_up_bit() */
20885+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
20886+
20887+ if (au_ftest_lock(flags, FLUSH))
20888+ au_nwt_flush(&sbi->si_nowait);
20889+ si_noflush_read_lock(sb);
20890+ }
20891+ } else if (au_ftest_lock(flags, NOPLM)) {
20892+ AuDbg("ppid %d, pid %d\n", ppid, pid);
20893+ err = -EAGAIN;
20894+ }
20895+
20896+out:
20897+ return err;
4a4d8108
AM
20898+}
20899+
e49829fe 20900+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
4a4d8108 20901+{
4a4d8108 20902+ spin_lock(&sbinfo->si_plink_maint_lock);
027c5e7a 20903+ sbinfo->si_plink_maint_pid = 0;
4a4d8108 20904+ spin_unlock(&sbinfo->si_plink_maint_lock);
027c5e7a 20905+ wake_up_all(&sbinfo->si_plink_wq);
4a4d8108
AM
20906+}
20907+
e49829fe 20908+int au_plink_maint_enter(struct super_block *sb)
4a4d8108
AM
20909+{
20910+ int err;
4a4d8108
AM
20911+ struct au_sbinfo *sbinfo;
20912+
20913+ err = 0;
4a4d8108
AM
20914+ sbinfo = au_sbi(sb);
20915+ /* make sure i am the only one in this fs */
e49829fe
JR
20916+ si_write_lock(sb, AuLock_FLUSH);
20917+ if (au_opt_test(au_mntflags(sb), PLINK)) {
20918+ spin_lock(&sbinfo->si_plink_maint_lock);
20919+ if (!sbinfo->si_plink_maint_pid)
20920+ sbinfo->si_plink_maint_pid = current->pid;
20921+ else
20922+ err = -EBUSY;
20923+ spin_unlock(&sbinfo->si_plink_maint_lock);
20924+ }
4a4d8108
AM
20925+ si_write_unlock(sb);
20926+
20927+ return err;
1facf9fc 20928+}
20929+
20930+/* ---------------------------------------------------------------------- */
20931+
20932+struct pseudo_link {
4a4d8108
AM
20933+ union {
20934+ struct list_head list;
20935+ struct rcu_head rcu;
20936+ };
1facf9fc 20937+ struct inode *inode;
20938+};
20939+
20940+#ifdef CONFIG_AUFS_DEBUG
20941+void au_plink_list(struct super_block *sb)
20942+{
20943+ struct au_sbinfo *sbinfo;
20944+ struct list_head *plink_list;
20945+ struct pseudo_link *plink;
20946+
dece6358
AM
20947+ SiMustAnyLock(sb);
20948+
1facf9fc 20949+ sbinfo = au_sbi(sb);
20950+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 20951+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 20952+
20953+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
20954+ rcu_read_lock();
20955+ list_for_each_entry_rcu(plink, plink_list, list)
1facf9fc 20956+ AuDbg("%lu\n", plink->inode->i_ino);
4a4d8108 20957+ rcu_read_unlock();
1facf9fc 20958+}
20959+#endif
20960+
20961+/* is the inode pseudo-linked? */
20962+int au_plink_test(struct inode *inode)
20963+{
20964+ int found;
20965+ struct au_sbinfo *sbinfo;
20966+ struct list_head *plink_list;
20967+ struct pseudo_link *plink;
20968+
20969+ sbinfo = au_sbi(inode->i_sb);
dece6358 20970+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 20971+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
e49829fe 20972+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
1facf9fc 20973+
20974+ found = 0;
20975+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
20976+ rcu_read_lock();
20977+ list_for_each_entry_rcu(plink, plink_list, list)
1facf9fc 20978+ if (plink->inode == inode) {
20979+ found = 1;
20980+ break;
20981+ }
4a4d8108 20982+ rcu_read_unlock();
1facf9fc 20983+ return found;
20984+}
20985+
20986+/* ---------------------------------------------------------------------- */
20987+
20988+/*
20989+ * generate a name for plink.
20990+ * the file will be stored under AUFS_WH_PLINKDIR.
20991+ */
20992+/* 20 is max digits length of ulong 64 */
20993+#define PLINK_NAME_LEN ((20 + 1) * 2)
20994+
20995+static int plink_name(char *name, int len, struct inode *inode,
20996+ aufs_bindex_t bindex)
20997+{
20998+ int rlen;
20999+ struct inode *h_inode;
21000+
21001+ h_inode = au_h_iptr(inode, bindex);
21002+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
21003+ return rlen;
21004+}
21005+
7f207e10
AM
21006+struct au_do_plink_lkup_args {
21007+ struct dentry **errp;
21008+ struct qstr *tgtname;
21009+ struct dentry *h_parent;
21010+ struct au_branch *br;
21011+};
21012+
21013+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
21014+ struct dentry *h_parent,
21015+ struct au_branch *br)
21016+{
21017+ struct dentry *h_dentry;
21018+ struct mutex *h_mtx;
21019+
21020+ h_mtx = &h_parent->d_inode->i_mutex;
21021+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
21022+ h_dentry = au_lkup_one(tgtname, h_parent, br, /*nd*/NULL);
21023+ mutex_unlock(h_mtx);
21024+ return h_dentry;
21025+}
21026+
21027+static void au_call_do_plink_lkup(void *args)
21028+{
21029+ struct au_do_plink_lkup_args *a = args;
21030+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
21031+}
21032+
1facf9fc 21033+/* lookup the plink-ed @inode under the branch at @bindex */
21034+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
21035+{
21036+ struct dentry *h_dentry, *h_parent;
21037+ struct au_branch *br;
21038+ struct inode *h_dir;
7f207e10 21039+ int wkq_err;
1facf9fc 21040+ char a[PLINK_NAME_LEN];
21041+ struct qstr tgtname = {
21042+ .name = a
21043+ };
21044+
e49829fe
JR
21045+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
21046+
1facf9fc 21047+ br = au_sbr(inode->i_sb, bindex);
21048+ h_parent = br->br_wbr->wbr_plink;
21049+ h_dir = h_parent->d_inode;
21050+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
21051+
7f207e10
AM
21052+ if (current_fsuid()) {
21053+ struct au_do_plink_lkup_args args = {
21054+ .errp = &h_dentry,
21055+ .tgtname = &tgtname,
21056+ .h_parent = h_parent,
21057+ .br = br
21058+ };
21059+
21060+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
21061+ if (unlikely(wkq_err))
21062+ h_dentry = ERR_PTR(wkq_err);
21063+ } else
21064+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
21065+
1facf9fc 21066+ return h_dentry;
21067+}
21068+
21069+/* create a pseudo-link */
21070+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
21071+ struct dentry *h_dentry, struct au_branch *br)
21072+{
21073+ int err;
21074+ struct path h_path = {
21075+ .mnt = br->br_mnt
21076+ };
21077+ struct inode *h_dir;
21078+
21079+ h_dir = h_parent->d_inode;
7f207e10 21080+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
4f0767ce 21081+again:
1facf9fc 21082+ h_path.dentry = au_lkup_one(tgt, h_parent, br, /*nd*/NULL);
21083+ err = PTR_ERR(h_path.dentry);
21084+ if (IS_ERR(h_path.dentry))
21085+ goto out;
21086+
21087+ err = 0;
21088+ /* wh.plink dir is not monitored */
7f207e10 21089+ /* todo: is it really safe? */
1facf9fc 21090+ if (h_path.dentry->d_inode
21091+ && h_path.dentry->d_inode != h_dentry->d_inode) {
21092+ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
21093+ dput(h_path.dentry);
21094+ h_path.dentry = NULL;
21095+ if (!err)
21096+ goto again;
21097+ }
21098+ if (!err && !h_path.dentry->d_inode)
21099+ err = vfsub_link(h_dentry, h_dir, &h_path);
21100+ dput(h_path.dentry);
21101+
4f0767ce 21102+out:
7f207e10 21103+ mutex_unlock(&h_dir->i_mutex);
1facf9fc 21104+ return err;
21105+}
21106+
21107+struct do_whplink_args {
21108+ int *errp;
21109+ struct qstr *tgt;
21110+ struct dentry *h_parent;
21111+ struct dentry *h_dentry;
21112+ struct au_branch *br;
21113+};
21114+
21115+static void call_do_whplink(void *args)
21116+{
21117+ struct do_whplink_args *a = args;
21118+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
21119+}
21120+
21121+static int whplink(struct dentry *h_dentry, struct inode *inode,
21122+ aufs_bindex_t bindex, struct au_branch *br)
21123+{
21124+ int err, wkq_err;
21125+ struct au_wbr *wbr;
21126+ struct dentry *h_parent;
21127+ struct inode *h_dir;
21128+ char a[PLINK_NAME_LEN];
21129+ struct qstr tgtname = {
21130+ .name = a
21131+ };
21132+
21133+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
21134+ h_parent = wbr->wbr_plink;
21135+ h_dir = h_parent->d_inode;
21136+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
21137+
21138+ /* always superio. */
b752ccd1 21139+ if (current_fsuid()) {
1facf9fc 21140+ struct do_whplink_args args = {
21141+ .errp = &err,
21142+ .tgt = &tgtname,
21143+ .h_parent = h_parent,
21144+ .h_dentry = h_dentry,
21145+ .br = br
21146+ };
21147+ wkq_err = au_wkq_wait(call_do_whplink, &args);
21148+ if (unlikely(wkq_err))
21149+ err = wkq_err;
21150+ } else
21151+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
1facf9fc 21152+
21153+ return err;
21154+}
21155+
21156+/* free a single plink */
21157+static void do_put_plink(struct pseudo_link *plink, int do_del)
21158+{
1facf9fc 21159+ if (do_del)
21160+ list_del(&plink->list);
4a4d8108
AM
21161+ iput(plink->inode);
21162+ kfree(plink);
21163+}
21164+
21165+static void do_put_plink_rcu(struct rcu_head *rcu)
21166+{
21167+ struct pseudo_link *plink;
21168+
21169+ plink = container_of(rcu, struct pseudo_link, rcu);
21170+ iput(plink->inode);
1facf9fc 21171+ kfree(plink);
21172+}
21173+
21174+/*
21175+ * create a new pseudo-link for @h_dentry on @bindex.
21176+ * the linked inode is held in aufs @inode.
21177+ */
21178+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
21179+ struct dentry *h_dentry)
21180+{
21181+ struct super_block *sb;
21182+ struct au_sbinfo *sbinfo;
21183+ struct list_head *plink_list;
4a4d8108 21184+ struct pseudo_link *plink, *tmp;
1facf9fc 21185+ int found, err, cnt;
21186+
21187+ sb = inode->i_sb;
21188+ sbinfo = au_sbi(sb);
21189+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21190+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21191+
1facf9fc 21192+ cnt = 0;
21193+ found = 0;
21194+ plink_list = &sbinfo->si_plink.head;
4a4d8108
AM
21195+ rcu_read_lock();
21196+ list_for_each_entry_rcu(plink, plink_list, list) {
1facf9fc 21197+ cnt++;
21198+ if (plink->inode == inode) {
21199+ found = 1;
21200+ break;
21201+ }
21202+ }
4a4d8108
AM
21203+ rcu_read_unlock();
21204+ if (found)
1facf9fc 21205+ return;
4a4d8108
AM
21206+
21207+ tmp = kmalloc(sizeof(*plink), GFP_NOFS);
21208+ if (tmp)
21209+ tmp->inode = au_igrab(inode);
21210+ else {
21211+ err = -ENOMEM;
21212+ goto out;
1facf9fc 21213+ }
21214+
4a4d8108
AM
21215+ spin_lock(&sbinfo->si_plink.spin);
21216+ list_for_each_entry(plink, plink_list, list) {
21217+ if (plink->inode == inode) {
21218+ found = 1;
21219+ break;
21220+ }
1facf9fc 21221+ }
4a4d8108
AM
21222+ if (!found)
21223+ list_add_rcu(&tmp->list, plink_list);
1facf9fc 21224+ spin_unlock(&sbinfo->si_plink.spin);
4a4d8108
AM
21225+ if (!found) {
21226+ cnt++;
21227+ WARN_ONCE(cnt > AUFS_PLINK_WARN,
21228+ "unexpectedly many pseudo links, %d\n", cnt);
1facf9fc 21229+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
4a4d8108
AM
21230+ } else {
21231+ do_put_plink(tmp, 0);
21232+ return;
1facf9fc 21233+ }
21234+
4a4d8108 21235+out:
1facf9fc 21236+ if (unlikely(err)) {
4a4d8108
AM
21237+ pr_warning("err %d, damaged pseudo link.\n", err);
21238+ if (tmp) {
21239+ au_spl_del_rcu(&tmp->list, &sbinfo->si_plink);
21240+ call_rcu(&tmp->rcu, do_put_plink_rcu);
21241+ }
1facf9fc 21242+ }
21243+}
21244+
21245+/* free all plinks */
e49829fe 21246+void au_plink_put(struct super_block *sb, int verbose)
1facf9fc 21247+{
21248+ struct au_sbinfo *sbinfo;
21249+ struct list_head *plink_list;
21250+ struct pseudo_link *plink, *tmp;
21251+
dece6358
AM
21252+ SiMustWriteLock(sb);
21253+
1facf9fc 21254+ sbinfo = au_sbi(sb);
21255+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21256+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21257+
21258+ plink_list = &sbinfo->si_plink.head;
21259+ /* no spin_lock since sbinfo is write-locked */
e49829fe 21260+ WARN(verbose && !list_empty(plink_list), "pseudo-link is not flushed");
1facf9fc 21261+ list_for_each_entry_safe(plink, tmp, plink_list, list)
21262+ do_put_plink(plink, 0);
21263+ INIT_LIST_HEAD(plink_list);
21264+}
21265+
e49829fe
JR
21266+void au_plink_clean(struct super_block *sb, int verbose)
21267+{
21268+ struct dentry *root;
21269+
21270+ root = sb->s_root;
21271+ aufs_write_lock(root);
21272+ if (au_opt_test(au_mntflags(sb), PLINK))
21273+ au_plink_put(sb, verbose);
21274+ aufs_write_unlock(root);
21275+}
21276+
1facf9fc 21277+/* free the plinks on a branch specified by @br_id */
21278+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
21279+{
21280+ struct au_sbinfo *sbinfo;
21281+ struct list_head *plink_list;
21282+ struct pseudo_link *plink, *tmp;
21283+ struct inode *inode;
21284+ aufs_bindex_t bstart, bend, bindex;
21285+ unsigned char do_put;
21286+
dece6358
AM
21287+ SiMustWriteLock(sb);
21288+
1facf9fc 21289+ sbinfo = au_sbi(sb);
21290+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 21291+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 21292+
21293+ plink_list = &sbinfo->si_plink.head;
21294+ /* no spin_lock since sbinfo is write-locked */
21295+ list_for_each_entry_safe(plink, tmp, plink_list, list) {
21296+ do_put = 0;
21297+ inode = au_igrab(plink->inode);
21298+ ii_write_lock_child(inode);
21299+ bstart = au_ibstart(inode);
21300+ bend = au_ibend(inode);
21301+ if (bstart >= 0) {
21302+ for (bindex = bstart; bindex <= bend; bindex++) {
21303+ if (!au_h_iptr(inode, bindex)
21304+ || au_ii_br_id(inode, bindex) != br_id)
21305+ continue;
21306+ au_set_h_iptr(inode, bindex, NULL, 0);
21307+ do_put = 1;
21308+ break;
21309+ }
21310+ } else
21311+ do_put_plink(plink, 1);
21312+
dece6358
AM
21313+ if (do_put) {
21314+ for (bindex = bstart; bindex <= bend; bindex++)
21315+ if (au_h_iptr(inode, bindex)) {
21316+ do_put = 0;
21317+ break;
21318+ }
21319+ if (do_put)
21320+ do_put_plink(plink, 1);
21321+ }
21322+ ii_write_unlock(inode);
21323+ iput(inode);
21324+ }
21325+}
7f207e10
AM
21326diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
21327--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
53392da6 21328+++ linux/fs/aufs/poll.c 2011-08-24 13:30:24.734646739 +0200
dece6358
AM
21329@@ -0,0 +1,56 @@
21330+/*
027c5e7a 21331+ * Copyright (C) 2005-2011 Junjiro R. Okajima
dece6358
AM
21332+ *
21333+ * This program, aufs is free software; you can redistribute it and/or modify
21334+ * it under the terms of the GNU General Public License as published by
21335+ * the Free Software Foundation; either version 2 of the License, or
21336+ * (at your option) any later version.
21337+ *
21338+ * This program is distributed in the hope that it will be useful,
21339+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21340+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21341+ * GNU General Public License for more details.
21342+ *
21343+ * You should have received a copy of the GNU General Public License
21344+ * along with this program; if not, write to the Free Software
21345+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21346+ */
21347+
1308ab2a 21348+/*
21349+ * poll operation
21350+ * There is only one filesystem which implements ->poll operation, currently.
21351+ */
21352+
21353+#include "aufs.h"
21354+
21355+unsigned int aufs_poll(struct file *file, poll_table *wait)
21356+{
21357+ unsigned int mask;
21358+ int err;
21359+ struct file *h_file;
21360+ struct dentry *dentry;
21361+ struct super_block *sb;
21362+
21363+ /* We should pretend an error happened. */
21364+ mask = POLLERR /* | POLLIN | POLLOUT */;
21365+ dentry = file->f_dentry;
21366+ sb = dentry->d_sb;
e49829fe 21367+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
1308ab2a 21368+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
21369+ if (unlikely(err))
21370+ goto out;
21371+
21372+ /* it is not an error if h_file has no operation */
21373+ mask = DEFAULT_POLLMASK;
4a4d8108 21374+ h_file = au_hf_top(file);
1308ab2a 21375+ if (h_file->f_op && h_file->f_op->poll)
21376+ mask = h_file->f_op->poll(h_file, wait);
21377+
21378+ di_read_unlock(dentry, AuLock_IR);
21379+ fi_read_unlock(file);
21380+
4f0767ce 21381+out:
1308ab2a 21382+ si_read_unlock(sb);
21383+ AuTraceErr((int)mask);
21384+ return mask;
21385+}
7f207e10
AM
21386diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
21387--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
21388+++ linux/fs/aufs/procfs.c 2011-08-24 13:30:24.734646739 +0200
21389@@ -0,0 +1,170 @@
e49829fe 21390+/*
027c5e7a 21391+ * Copyright (C) 2010-2011 Junjiro R. Okajima
e49829fe
JR
21392+ *
21393+ * This program, aufs is free software; you can redistribute it and/or modify
21394+ * it under the terms of the GNU General Public License as published by
21395+ * the Free Software Foundation; either version 2 of the License, or
21396+ * (at your option) any later version.
21397+ *
21398+ * This program is distributed in the hope that it will be useful,
21399+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21400+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21401+ * GNU General Public License for more details.
21402+ *
21403+ * You should have received a copy of the GNU General Public License
21404+ * along with this program; if not, write to the Free Software
21405+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21406+ */
21407+
21408+/*
21409+ * procfs interfaces
21410+ */
21411+
21412+#include <linux/proc_fs.h>
21413+#include "aufs.h"
21414+
21415+static int au_procfs_plm_release(struct inode *inode, struct file *file)
21416+{
21417+ struct au_sbinfo *sbinfo;
21418+
21419+ sbinfo = file->private_data;
21420+ if (sbinfo) {
21421+ au_plink_maint_leave(sbinfo);
21422+ kobject_put(&sbinfo->si_kobj);
21423+ }
21424+
21425+ return 0;
21426+}
21427+
21428+static void au_procfs_plm_write_clean(struct file *file)
21429+{
21430+ struct au_sbinfo *sbinfo;
21431+
21432+ sbinfo = file->private_data;
21433+ if (sbinfo)
21434+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
21435+}
21436+
21437+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
21438+{
21439+ int err;
21440+ struct super_block *sb;
21441+ struct au_sbinfo *sbinfo;
21442+
21443+ err = -EBUSY;
21444+ if (unlikely(file->private_data))
21445+ goto out;
21446+
21447+ sb = NULL;
53392da6 21448+ /* don't use au_sbilist_lock() here */
e49829fe
JR
21449+ spin_lock(&au_sbilist.spin);
21450+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
21451+ if (id == sysaufs_si_id(sbinfo)) {
21452+ kobject_get(&sbinfo->si_kobj);
21453+ sb = sbinfo->si_sb;
21454+ break;
21455+ }
21456+ spin_unlock(&au_sbilist.spin);
21457+
21458+ err = -EINVAL;
21459+ if (unlikely(!sb))
21460+ goto out;
21461+
21462+ err = au_plink_maint_enter(sb);
21463+ if (!err)
21464+ /* keep kobject_get() */
21465+ file->private_data = sbinfo;
21466+ else
21467+ kobject_put(&sbinfo->si_kobj);
21468+out:
21469+ return err;
21470+}
21471+
21472+/*
21473+ * Accept a valid "si=xxxx" only.
21474+ * Once it is accepted successfully, accept "clean" too.
21475+ */
21476+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
21477+ size_t count, loff_t *ppos)
21478+{
21479+ ssize_t err;
21480+ unsigned long id;
21481+ /* last newline is allowed */
21482+ char buf[3 + sizeof(unsigned long) * 2 + 1];
21483+
21484+ err = -EACCES;
21485+ if (unlikely(!capable(CAP_SYS_ADMIN)))
21486+ goto out;
21487+
21488+ err = -EINVAL;
21489+ if (unlikely(count > sizeof(buf)))
21490+ goto out;
21491+
21492+ err = copy_from_user(buf, ubuf, count);
21493+ if (unlikely(err)) {
21494+ err = -EFAULT;
21495+ goto out;
21496+ }
21497+ buf[count] = 0;
21498+
21499+ err = -EINVAL;
21500+ if (!strcmp("clean", buf)) {
21501+ au_procfs_plm_write_clean(file);
21502+ goto out_success;
21503+ } else if (unlikely(strncmp("si=", buf, 3)))
21504+ goto out;
21505+
21506+ err = strict_strtoul(buf + 3, 16, &id);
21507+ if (unlikely(err))
21508+ goto out;
21509+
21510+ err = au_procfs_plm_write_si(file, id);
21511+ if (unlikely(err))
21512+ goto out;
21513+
21514+out_success:
21515+ err = count; /* success */
21516+out:
21517+ return err;
21518+}
21519+
21520+static const struct file_operations au_procfs_plm_fop = {
21521+ .write = au_procfs_plm_write,
21522+ .release = au_procfs_plm_release,
21523+ .owner = THIS_MODULE
21524+};
21525+
21526+/* ---------------------------------------------------------------------- */
21527+
21528+static struct proc_dir_entry *au_procfs_dir;
21529+
21530+void au_procfs_fin(void)
21531+{
21532+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
21533+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
21534+}
21535+
21536+int __init au_procfs_init(void)
21537+{
21538+ int err;
21539+ struct proc_dir_entry *entry;
21540+
21541+ err = -ENOMEM;
21542+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
21543+ if (unlikely(!au_procfs_dir))
21544+ goto out;
21545+
21546+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
21547+ au_procfs_dir, &au_procfs_plm_fop);
21548+ if (unlikely(!entry))
21549+ goto out_dir;
21550+
21551+ err = 0;
21552+ goto out; /* success */
21553+
21554+
21555+out_dir:
21556+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
21557+out:
21558+ return err;
21559+}
7f207e10
AM
21560diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
21561--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
53392da6 21562+++ linux/fs/aufs/rdu.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 21563@@ -0,0 +1,383 @@
1308ab2a 21564+/*
027c5e7a 21565+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1308ab2a 21566+ *
21567+ * This program, aufs is free software; you can redistribute it and/or modify
21568+ * it under the terms of the GNU General Public License as published by
21569+ * the Free Software Foundation; either version 2 of the License, or
21570+ * (at your option) any later version.
21571+ *
21572+ * This program is distributed in the hope that it will be useful,
21573+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21574+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21575+ * GNU General Public License for more details.
21576+ *
21577+ * You should have received a copy of the GNU General Public License
21578+ * along with this program; if not, write to the Free Software
21579+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21580+ */
21581+
21582+/*
21583+ * readdir in userspace.
21584+ */
21585+
b752ccd1 21586+#include <linux/compat.h>
4a4d8108 21587+#include <linux/fs_stack.h>
1308ab2a 21588+#include <linux/security.h>
21589+#include <linux/uaccess.h>
21590+#include <linux/aufs_type.h>
21591+#include "aufs.h"
21592+
21593+/* bits for struct aufs_rdu.flags */
21594+#define AuRdu_CALLED 1
21595+#define AuRdu_CONT (1 << 1)
21596+#define AuRdu_FULL (1 << 2)
21597+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
7f207e10
AM
21598+#define au_fset_rdu(flags, name) \
21599+ do { (flags) |= AuRdu_##name; } while (0)
21600+#define au_fclr_rdu(flags, name) \
21601+ do { (flags) &= ~AuRdu_##name; } while (0)
1308ab2a 21602+
21603+struct au_rdu_arg {
21604+ struct aufs_rdu *rdu;
21605+ union au_rdu_ent_ul ent;
21606+ unsigned long end;
21607+
21608+ struct super_block *sb;
21609+ int err;
21610+};
21611+
21612+static int au_rdu_fill(void *__arg, const char *name, int nlen,
21613+ loff_t offset, u64 h_ino, unsigned int d_type)
21614+{
21615+ int err, len;
21616+ struct au_rdu_arg *arg = __arg;
21617+ struct aufs_rdu *rdu = arg->rdu;
21618+ struct au_rdu_ent ent;
21619+
21620+ err = 0;
21621+ arg->err = 0;
21622+ au_fset_rdu(rdu->cookie.flags, CALLED);
21623+ len = au_rdu_len(nlen);
21624+ if (arg->ent.ul + len < arg->end) {
21625+ ent.ino = h_ino;
21626+ ent.bindex = rdu->cookie.bindex;
21627+ ent.type = d_type;
21628+ ent.nlen = nlen;
4a4d8108
AM
21629+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
21630+ ent.type = DT_UNKNOWN;
1308ab2a 21631+
21632+ err = -EFAULT;
21633+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
21634+ goto out;
21635+ if (copy_to_user(arg->ent.e->name, name, nlen))
21636+ goto out;
21637+ /* the terminating NULL */
21638+ if (__put_user(0, arg->ent.e->name + nlen))
21639+ goto out;
21640+ err = 0;
21641+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
21642+ arg->ent.ul += len;
21643+ rdu->rent++;
21644+ } else {
21645+ err = -EFAULT;
21646+ au_fset_rdu(rdu->cookie.flags, FULL);
21647+ rdu->full = 1;
21648+ rdu->tail = arg->ent;
21649+ }
21650+
4f0767ce 21651+out:
1308ab2a 21652+ /* AuTraceErr(err); */
21653+ return err;
21654+}
21655+
21656+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
21657+{
21658+ int err;
21659+ loff_t offset;
21660+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
21661+
21662+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
21663+ err = offset;
21664+ if (unlikely(offset != cookie->h_pos))
21665+ goto out;
21666+
21667+ err = 0;
21668+ do {
21669+ arg->err = 0;
21670+ au_fclr_rdu(cookie->flags, CALLED);
21671+ /* smp_mb(); */
21672+ err = vfsub_readdir(h_file, au_rdu_fill, arg);
21673+ if (err >= 0)
21674+ err = arg->err;
21675+ } while (!err
21676+ && au_ftest_rdu(cookie->flags, CALLED)
21677+ && !au_ftest_rdu(cookie->flags, FULL));
21678+ cookie->h_pos = h_file->f_pos;
21679+
4f0767ce 21680+out:
1308ab2a 21681+ AuTraceErr(err);
21682+ return err;
21683+}
21684+
21685+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
21686+{
21687+ int err;
21688+ aufs_bindex_t bend;
21689+ struct au_rdu_arg arg;
21690+ struct dentry *dentry;
21691+ struct inode *inode;
21692+ struct file *h_file;
21693+ struct au_rdu_cookie *cookie = &rdu->cookie;
21694+
21695+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
21696+ if (unlikely(err)) {
21697+ err = -EFAULT;
21698+ AuTraceErr(err);
21699+ goto out;
21700+ }
21701+ rdu->rent = 0;
21702+ rdu->tail = rdu->ent;
21703+ rdu->full = 0;
21704+ arg.rdu = rdu;
21705+ arg.ent = rdu->ent;
21706+ arg.end = arg.ent.ul;
21707+ arg.end += rdu->sz;
21708+
21709+ err = -ENOTDIR;
21710+ if (unlikely(!file->f_op || !file->f_op->readdir))
21711+ goto out;
21712+
21713+ err = security_file_permission(file, MAY_READ);
21714+ AuTraceErr(err);
21715+ if (unlikely(err))
21716+ goto out;
21717+
21718+ dentry = file->f_dentry;
21719+ inode = dentry->d_inode;
21720+#if 1
21721+ mutex_lock(&inode->i_mutex);
21722+#else
21723+ err = mutex_lock_killable(&inode->i_mutex);
21724+ AuTraceErr(err);
21725+ if (unlikely(err))
21726+ goto out;
21727+#endif
1308ab2a 21728+
21729+ arg.sb = inode->i_sb;
e49829fe
JR
21730+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
21731+ if (unlikely(err))
21732+ goto out_mtx;
027c5e7a
AM
21733+ err = au_alive_dir(dentry);
21734+ if (unlikely(err))
21735+ goto out_si;
e49829fe 21736+ /* todo: reval? */
1308ab2a 21737+ fi_read_lock(file);
21738+
21739+ err = -EAGAIN;
21740+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
21741+ && cookie->generation != au_figen(file)))
21742+ goto out_unlock;
21743+
21744+ err = 0;
21745+ if (!rdu->blk) {
21746+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
21747+ if (!rdu->blk)
21748+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
21749+ }
21750+ bend = au_fbstart(file);
21751+ if (cookie->bindex < bend)
21752+ cookie->bindex = bend;
4a4d8108 21753+ bend = au_fbend_dir(file);
1308ab2a 21754+ /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
21755+ for (; !err && cookie->bindex <= bend;
21756+ cookie->bindex++, cookie->h_pos = 0) {
4a4d8108 21757+ h_file = au_hf_dir(file, cookie->bindex);
1308ab2a 21758+ if (!h_file)
21759+ continue;
21760+
21761+ au_fclr_rdu(cookie->flags, FULL);
21762+ err = au_rdu_do(h_file, &arg);
21763+ AuTraceErr(err);
21764+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
21765+ break;
21766+ }
21767+ AuDbg("rent %llu\n", rdu->rent);
21768+
21769+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
21770+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
21771+ au_fset_rdu(cookie->flags, CONT);
21772+ cookie->generation = au_figen(file);
21773+ }
21774+
21775+ ii_read_lock_child(inode);
21776+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
21777+ ii_read_unlock(inode);
21778+
4f0767ce 21779+out_unlock:
1308ab2a 21780+ fi_read_unlock(file);
027c5e7a 21781+out_si:
1308ab2a 21782+ si_read_unlock(arg.sb);
4f0767ce 21783+out_mtx:
1308ab2a 21784+ mutex_unlock(&inode->i_mutex);
4f0767ce 21785+out:
1308ab2a 21786+ AuTraceErr(err);
21787+ return err;
21788+}
21789+
21790+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
21791+{
21792+ int err;
21793+ ino_t ino;
21794+ unsigned long long nent;
21795+ union au_rdu_ent_ul *u;
21796+ struct au_rdu_ent ent;
21797+ struct super_block *sb;
21798+
21799+ err = 0;
21800+ nent = rdu->nent;
21801+ u = &rdu->ent;
21802+ sb = file->f_dentry->d_sb;
21803+ si_read_lock(sb, AuLock_FLUSH);
21804+ while (nent-- > 0) {
1308ab2a 21805+ err = copy_from_user(&ent, u->e, sizeof(ent));
4a4d8108
AM
21806+ if (!err)
21807+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
1308ab2a 21808+ if (unlikely(err)) {
21809+ err = -EFAULT;
21810+ AuTraceErr(err);
21811+ break;
21812+ }
21813+
21814+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
21815+ if (!ent.wh)
21816+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
21817+ else
21818+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
21819+ &ino);
21820+ if (unlikely(err)) {
21821+ AuTraceErr(err);
21822+ break;
21823+ }
21824+
21825+ err = __put_user(ino, &u->e->ino);
21826+ if (unlikely(err)) {
21827+ err = -EFAULT;
21828+ AuTraceErr(err);
21829+ break;
21830+ }
21831+ u->ul += au_rdu_len(ent.nlen);
21832+ }
21833+ si_read_unlock(sb);
21834+
21835+ return err;
21836+}
21837+
21838+/* ---------------------------------------------------------------------- */
21839+
21840+static int au_rdu_verify(struct aufs_rdu *rdu)
21841+{
b752ccd1 21842+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
1308ab2a 21843+ "%llu, b%d, 0x%x, g%u}\n",
b752ccd1 21844+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
1308ab2a 21845+ rdu->blk,
21846+ rdu->rent, rdu->shwh, rdu->full,
21847+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
21848+ rdu->cookie.generation);
dece6358 21849+
b752ccd1 21850+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
1308ab2a 21851+ return 0;
dece6358 21852+
b752ccd1
AM
21853+ AuDbg("%u:%u\n",
21854+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
1308ab2a 21855+ return -EINVAL;
21856+}
21857+
21858+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 21859+{
1308ab2a 21860+ long err, e;
21861+ struct aufs_rdu rdu;
21862+ void __user *p = (void __user *)arg;
dece6358 21863+
1308ab2a 21864+ err = copy_from_user(&rdu, p, sizeof(rdu));
21865+ if (unlikely(err)) {
21866+ err = -EFAULT;
21867+ AuTraceErr(err);
21868+ goto out;
21869+ }
21870+ err = au_rdu_verify(&rdu);
dece6358
AM
21871+ if (unlikely(err))
21872+ goto out;
21873+
1308ab2a 21874+ switch (cmd) {
21875+ case AUFS_CTL_RDU:
21876+ err = au_rdu(file, &rdu);
21877+ if (unlikely(err))
21878+ break;
dece6358 21879+
1308ab2a 21880+ e = copy_to_user(p, &rdu, sizeof(rdu));
21881+ if (unlikely(e)) {
21882+ err = -EFAULT;
21883+ AuTraceErr(err);
21884+ }
21885+ break;
21886+ case AUFS_CTL_RDU_INO:
21887+ err = au_rdu_ino(file, &rdu);
21888+ break;
21889+
21890+ default:
4a4d8108 21891+ /* err = -ENOTTY; */
1308ab2a 21892+ err = -EINVAL;
21893+ }
dece6358 21894+
4f0767ce 21895+out:
1308ab2a 21896+ AuTraceErr(err);
21897+ return err;
1facf9fc 21898+}
b752ccd1
AM
21899+
21900+#ifdef CONFIG_COMPAT
21901+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
21902+{
21903+ long err, e;
21904+ struct aufs_rdu rdu;
21905+ void __user *p = compat_ptr(arg);
21906+
21907+ /* todo: get_user()? */
21908+ err = copy_from_user(&rdu, p, sizeof(rdu));
21909+ if (unlikely(err)) {
21910+ err = -EFAULT;
21911+ AuTraceErr(err);
21912+ goto out;
21913+ }
21914+ rdu.ent.e = compat_ptr(rdu.ent.ul);
21915+ err = au_rdu_verify(&rdu);
21916+ if (unlikely(err))
21917+ goto out;
21918+
21919+ switch (cmd) {
21920+ case AUFS_CTL_RDU:
21921+ err = au_rdu(file, &rdu);
21922+ if (unlikely(err))
21923+ break;
21924+
21925+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
21926+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
21927+ e = copy_to_user(p, &rdu, sizeof(rdu));
21928+ if (unlikely(e)) {
21929+ err = -EFAULT;
21930+ AuTraceErr(err);
21931+ }
21932+ break;
21933+ case AUFS_CTL_RDU_INO:
21934+ err = au_rdu_ino(file, &rdu);
21935+ break;
21936+
21937+ default:
21938+ /* err = -ENOTTY; */
21939+ err = -EINVAL;
21940+ }
21941+
4f0767ce 21942+out:
b752ccd1
AM
21943+ AuTraceErr(err);
21944+ return err;
21945+}
21946+#endif
7f207e10
AM
21947diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
21948--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
53392da6 21949+++ linux/fs/aufs/rwsem.h 2011-08-24 13:30:24.734646739 +0200
e49829fe 21950@@ -0,0 +1,189 @@
1facf9fc 21951+/*
027c5e7a 21952+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 21953+ *
21954+ * This program, aufs is free software; you can redistribute it and/or modify
21955+ * it under the terms of the GNU General Public License as published by
21956+ * the Free Software Foundation; either version 2 of the License, or
21957+ * (at your option) any later version.
dece6358
AM
21958+ *
21959+ * This program is distributed in the hope that it will be useful,
21960+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21961+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21962+ * GNU General Public License for more details.
21963+ *
21964+ * You should have received a copy of the GNU General Public License
21965+ * along with this program; if not, write to the Free Software
21966+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 21967+ */
21968+
21969+/*
21970+ * simple read-write semaphore wrappers
21971+ */
21972+
21973+#ifndef __AUFS_RWSEM_H__
21974+#define __AUFS_RWSEM_H__
21975+
21976+#ifdef __KERNEL__
21977+
dece6358 21978+#include <linux/rwsem.h>
4a4d8108 21979+#include "debug.h"
dece6358
AM
21980+
21981+struct au_rwsem {
21982+ struct rw_semaphore rwsem;
21983+#ifdef CONFIG_AUFS_DEBUG
21984+ /* just for debugging, not almighty counter */
21985+ atomic_t rcnt, wcnt;
21986+#endif
21987+};
21988+
21989+#ifdef CONFIG_AUFS_DEBUG
21990+#define AuDbgCntInit(rw) do { \
21991+ atomic_set(&(rw)->rcnt, 0); \
21992+ atomic_set(&(rw)->wcnt, 0); \
21993+ smp_mb(); /* atomic set */ \
21994+} while (0)
21995+
e49829fe 21996+#define AuDbgRcntInc(rw) atomic_inc(&(rw)->rcnt)
dece6358 21997+#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
e49829fe 21998+#define AuDbgWcntInc(rw) atomic_inc(&(rw)->wcnt)
dece6358
AM
21999+#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
22000+#else
22001+#define AuDbgCntInit(rw) do {} while (0)
22002+#define AuDbgRcntInc(rw) do {} while (0)
22003+#define AuDbgRcntDec(rw) do {} while (0)
22004+#define AuDbgWcntInc(rw) do {} while (0)
22005+#define AuDbgWcntDec(rw) do {} while (0)
22006+#endif /* CONFIG_AUFS_DEBUG */
22007+
22008+/* to debug easier, do not make them inlined functions */
22009+#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
22010+/* rwsem_is_locked() is unusable */
22011+#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
22012+#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
22013+#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
22014+ && atomic_read(&(rw)->wcnt) <= 0)
22015+#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \
22016+ || atomic_read(&(rw)->wcnt))
22017+
e49829fe
JR
22018+#define au_rw_class(rw, key) lockdep_set_class(&(rw)->rwsem, key)
22019+
dece6358
AM
22020+static inline void au_rw_init(struct au_rwsem *rw)
22021+{
22022+ AuDbgCntInit(rw);
22023+ init_rwsem(&rw->rwsem);
22024+}
22025+
22026+static inline void au_rw_init_wlock(struct au_rwsem *rw)
22027+{
22028+ au_rw_init(rw);
22029+ down_write(&rw->rwsem);
22030+ AuDbgWcntInc(rw);
22031+}
22032+
22033+static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
22034+ unsigned int lsc)
22035+{
22036+ au_rw_init(rw);
22037+ down_write_nested(&rw->rwsem, lsc);
22038+ AuDbgWcntInc(rw);
22039+}
22040+
22041+static inline void au_rw_read_lock(struct au_rwsem *rw)
22042+{
22043+ down_read(&rw->rwsem);
22044+ AuDbgRcntInc(rw);
22045+}
22046+
22047+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
22048+{
22049+ down_read_nested(&rw->rwsem, lsc);
22050+ AuDbgRcntInc(rw);
22051+}
22052+
22053+static inline void au_rw_read_unlock(struct au_rwsem *rw)
22054+{
22055+ AuRwMustReadLock(rw);
22056+ AuDbgRcntDec(rw);
22057+ up_read(&rw->rwsem);
22058+}
22059+
22060+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
22061+{
22062+ AuRwMustWriteLock(rw);
22063+ AuDbgRcntInc(rw);
22064+ AuDbgWcntDec(rw);
22065+ downgrade_write(&rw->rwsem);
22066+}
22067+
22068+static inline void au_rw_write_lock(struct au_rwsem *rw)
22069+{
22070+ down_write(&rw->rwsem);
22071+ AuDbgWcntInc(rw);
22072+}
22073+
22074+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
22075+ unsigned int lsc)
22076+{
22077+ down_write_nested(&rw->rwsem, lsc);
22078+ AuDbgWcntInc(rw);
22079+}
1facf9fc 22080+
dece6358
AM
22081+static inline void au_rw_write_unlock(struct au_rwsem *rw)
22082+{
22083+ AuRwMustWriteLock(rw);
22084+ AuDbgWcntDec(rw);
22085+ up_write(&rw->rwsem);
22086+}
22087+
22088+/* why is not _nested version defined */
22089+static inline int au_rw_read_trylock(struct au_rwsem *rw)
22090+{
22091+ int ret = down_read_trylock(&rw->rwsem);
22092+ if (ret)
22093+ AuDbgRcntInc(rw);
22094+ return ret;
22095+}
22096+
22097+static inline int au_rw_write_trylock(struct au_rwsem *rw)
22098+{
22099+ int ret = down_write_trylock(&rw->rwsem);
22100+ if (ret)
22101+ AuDbgWcntInc(rw);
22102+ return ret;
22103+}
22104+
22105+#undef AuDbgCntInit
22106+#undef AuDbgRcntInc
22107+#undef AuDbgRcntDec
22108+#undef AuDbgWcntInc
22109+#undef AuDbgWcntDec
1facf9fc 22110+
22111+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
22112+static inline void prefix##_read_lock(param) \
dece6358 22113+{ au_rw_read_lock(rwsem); } \
1facf9fc 22114+static inline void prefix##_write_lock(param) \
dece6358 22115+{ au_rw_write_lock(rwsem); } \
1facf9fc 22116+static inline int prefix##_read_trylock(param) \
dece6358 22117+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 22118+static inline int prefix##_write_trylock(param) \
dece6358 22119+{ return au_rw_write_trylock(rwsem); }
1facf9fc 22120+/* why is not _nested version defined */
22121+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 22122+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 22123+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 22124+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 22125+
22126+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
22127+static inline void prefix##_read_unlock(param) \
dece6358 22128+{ au_rw_read_unlock(rwsem); } \
1facf9fc 22129+static inline void prefix##_write_unlock(param) \
dece6358 22130+{ au_rw_write_unlock(rwsem); } \
1facf9fc 22131+static inline void prefix##_downgrade_lock(param) \
dece6358 22132+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 22133+
22134+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
22135+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
22136+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
22137+
22138+#endif /* __KERNEL__ */
22139+#endif /* __AUFS_RWSEM_H__ */
7f207e10
AM
22140diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
22141--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
53392da6 22142+++ linux/fs/aufs/sbinfo.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 22143@@ -0,0 +1,344 @@
1facf9fc 22144+/*
027c5e7a 22145+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 22146+ *
22147+ * This program, aufs is free software; you can redistribute it and/or modify
22148+ * it under the terms of the GNU General Public License as published by
22149+ * the Free Software Foundation; either version 2 of the License, or
22150+ * (at your option) any later version.
dece6358
AM
22151+ *
22152+ * This program is distributed in the hope that it will be useful,
22153+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22154+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22155+ * GNU General Public License for more details.
22156+ *
22157+ * You should have received a copy of the GNU General Public License
22158+ * along with this program; if not, write to the Free Software
22159+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22160+ */
22161+
22162+/*
22163+ * superblock private data
22164+ */
22165+
e49829fe 22166+#include <linux/jiffies.h>
1facf9fc 22167+#include "aufs.h"
22168+
22169+/*
22170+ * they are necessary regardless sysfs is disabled.
22171+ */
22172+void au_si_free(struct kobject *kobj)
22173+{
22174+ struct au_sbinfo *sbinfo;
b752ccd1 22175+ char *locked __maybe_unused; /* debug only */
1facf9fc 22176+
22177+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
22178+ AuDebugOn(!list_empty(&sbinfo->si_plink.head));
e49829fe 22179+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
1facf9fc 22180+
e49829fe 22181+ au_rw_write_lock(&sbinfo->si_rwsem);
1facf9fc 22182+ au_br_free(sbinfo);
e49829fe 22183+ au_rw_write_unlock(&sbinfo->si_rwsem);
b752ccd1
AM
22184+
22185+ AuDebugOn(radix_tree_gang_lookup
22186+ (&sbinfo->au_si_pid.tree, (void **)&locked,
22187+ /*first_index*/PID_MAX_DEFAULT - 1,
22188+ /*max_items*/sizeof(locked)/sizeof(*locked)));
22189+
1facf9fc 22190+ kfree(sbinfo->si_branch);
b752ccd1 22191+ kfree(sbinfo->au_si_pid.bitmap);
1facf9fc 22192+ mutex_destroy(&sbinfo->si_xib_mtx);
dece6358 22193+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 22194+
22195+ kfree(sbinfo);
22196+}
22197+
22198+int au_si_alloc(struct super_block *sb)
22199+{
22200+ int err;
22201+ struct au_sbinfo *sbinfo;
e49829fe 22202+ static struct lock_class_key aufs_si;
1facf9fc 22203+
22204+ err = -ENOMEM;
4a4d8108 22205+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
1facf9fc 22206+ if (unlikely(!sbinfo))
22207+ goto out;
22208+
b752ccd1
AM
22209+ BUILD_BUG_ON(sizeof(unsigned long) !=
22210+ sizeof(*sbinfo->au_si_pid.bitmap));
22211+ sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT),
22212+ sizeof(*sbinfo->au_si_pid.bitmap),
22213+ GFP_NOFS);
22214+ if (unlikely(!sbinfo->au_si_pid.bitmap))
22215+ goto out_sbinfo;
22216+
1facf9fc 22217+ /* will be reallocated separately */
22218+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
22219+ if (unlikely(!sbinfo->si_branch))
b752ccd1 22220+ goto out_pidmap;
1facf9fc 22221+
1facf9fc 22222+ err = sysaufs_si_init(sbinfo);
22223+ if (unlikely(err))
22224+ goto out_br;
22225+
22226+ au_nwt_init(&sbinfo->si_nowait);
dece6358 22227+ au_rw_init_wlock(&sbinfo->si_rwsem);
e49829fe 22228+ au_rw_class(&sbinfo->si_rwsem, &aufs_si);
b752ccd1
AM
22229+ spin_lock_init(&sbinfo->au_si_pid.tree_lock);
22230+ INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL);
22231+
7f207e10 22232+ atomic_long_set(&sbinfo->si_ninodes, 0);
7f207e10
AM
22233+ atomic_long_set(&sbinfo->si_nfiles, 0);
22234+
1facf9fc 22235+ sbinfo->si_bend = -1;
1facf9fc 22236+
22237+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
22238+ sbinfo->si_wbr_create = AuWbrCreate_Def;
4a4d8108
AM
22239+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
22240+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
1facf9fc 22241+
e49829fe 22242+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
1facf9fc 22243+
1facf9fc 22244+ mutex_init(&sbinfo->si_xib_mtx);
1facf9fc 22245+ sbinfo->si_xino_brid = -1;
22246+ /* leave si_xib_last_pindex and si_xib_next_bit */
22247+
e49829fe 22248+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
1facf9fc 22249+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
22250+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
22251+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
22252+
22253+ au_spl_init(&sbinfo->si_plink);
22254+ init_waitqueue_head(&sbinfo->si_plink_wq);
4a4d8108 22255+ spin_lock_init(&sbinfo->si_plink_maint_lock);
1facf9fc 22256+
22257+ /* leave other members for sysaufs and si_mnt. */
22258+ sbinfo->si_sb = sb;
22259+ sb->s_fs_info = sbinfo;
b752ccd1 22260+ si_pid_set(sb);
1facf9fc 22261+ au_debug_sbinfo_init(sbinfo);
22262+ return 0; /* success */
22263+
4f0767ce 22264+out_br:
1facf9fc 22265+ kfree(sbinfo->si_branch);
4f0767ce 22266+out_pidmap:
b752ccd1 22267+ kfree(sbinfo->au_si_pid.bitmap);
4f0767ce 22268+out_sbinfo:
1facf9fc 22269+ kfree(sbinfo);
4f0767ce 22270+out:
1facf9fc 22271+ return err;
22272+}
22273+
22274+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
22275+{
22276+ int err, sz;
22277+ struct au_branch **brp;
22278+
dece6358
AM
22279+ AuRwMustWriteLock(&sbinfo->si_rwsem);
22280+
1facf9fc 22281+ err = -ENOMEM;
22282+ sz = sizeof(*brp) * (sbinfo->si_bend + 1);
22283+ if (unlikely(!sz))
22284+ sz = sizeof(*brp);
22285+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
22286+ if (brp) {
22287+ sbinfo->si_branch = brp;
22288+ err = 0;
22289+ }
22290+
22291+ return err;
22292+}
22293+
22294+/* ---------------------------------------------------------------------- */
22295+
22296+unsigned int au_sigen_inc(struct super_block *sb)
22297+{
22298+ unsigned int gen;
22299+
dece6358
AM
22300+ SiMustWriteLock(sb);
22301+
1facf9fc 22302+ gen = ++au_sbi(sb)->si_generation;
22303+ au_update_digen(sb->s_root);
22304+ au_update_iigen(sb->s_root->d_inode);
22305+ sb->s_root->d_inode->i_version++;
22306+ return gen;
22307+}
22308+
22309+aufs_bindex_t au_new_br_id(struct super_block *sb)
22310+{
22311+ aufs_bindex_t br_id;
22312+ int i;
22313+ struct au_sbinfo *sbinfo;
22314+
dece6358
AM
22315+ SiMustWriteLock(sb);
22316+
1facf9fc 22317+ sbinfo = au_sbi(sb);
22318+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
22319+ br_id = ++sbinfo->si_last_br_id;
7f207e10 22320+ AuDebugOn(br_id < 0);
1facf9fc 22321+ if (br_id && au_br_index(sb, br_id) < 0)
22322+ return br_id;
22323+ }
22324+
22325+ return -1;
22326+}
22327+
22328+/* ---------------------------------------------------------------------- */
22329+
e49829fe
JR
22330+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
22331+int si_read_lock(struct super_block *sb, int flags)
22332+{
22333+ int err;
22334+
22335+ err = 0;
22336+ if (au_ftest_lock(flags, FLUSH))
22337+ au_nwt_flush(&au_sbi(sb)->si_nowait);
22338+
22339+ si_noflush_read_lock(sb);
22340+ err = au_plink_maint(sb, flags);
22341+ if (unlikely(err))
22342+ si_read_unlock(sb);
22343+
22344+ return err;
22345+}
22346+
22347+int si_write_lock(struct super_block *sb, int flags)
22348+{
22349+ int err;
22350+
22351+ if (au_ftest_lock(flags, FLUSH))
22352+ au_nwt_flush(&au_sbi(sb)->si_nowait);
22353+
22354+ si_noflush_write_lock(sb);
22355+ err = au_plink_maint(sb, flags);
22356+ if (unlikely(err))
22357+ si_write_unlock(sb);
22358+
22359+ return err;
22360+}
22361+
1facf9fc 22362+/* dentry and super_block lock. call at entry point */
e49829fe 22363+int aufs_read_lock(struct dentry *dentry, int flags)
1facf9fc 22364+{
e49829fe 22365+ int err;
027c5e7a 22366+ struct super_block *sb;
e49829fe 22367+
027c5e7a
AM
22368+ sb = dentry->d_sb;
22369+ err = si_read_lock(sb, flags);
22370+ if (unlikely(err))
22371+ goto out;
22372+
22373+ if (au_ftest_lock(flags, DW))
22374+ di_write_lock_child(dentry);
22375+ else
22376+ di_read_lock_child(dentry, flags);
22377+
22378+ if (au_ftest_lock(flags, GEN)) {
22379+ err = au_digen_test(dentry, au_sigen(sb));
22380+ AuDebugOn(!err && au_dbrange_test(dentry));
22381+ if (unlikely(err))
22382+ aufs_read_unlock(dentry, flags);
e49829fe
JR
22383+ }
22384+
027c5e7a 22385+out:
e49829fe 22386+ return err;
1facf9fc 22387+}
22388+
22389+void aufs_read_unlock(struct dentry *dentry, int flags)
22390+{
22391+ if (au_ftest_lock(flags, DW))
22392+ di_write_unlock(dentry);
22393+ else
22394+ di_read_unlock(dentry, flags);
22395+ si_read_unlock(dentry->d_sb);
22396+}
22397+
22398+void aufs_write_lock(struct dentry *dentry)
22399+{
e49829fe 22400+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 22401+ di_write_lock_child(dentry);
22402+}
22403+
22404+void aufs_write_unlock(struct dentry *dentry)
22405+{
22406+ di_write_unlock(dentry);
22407+ si_write_unlock(dentry->d_sb);
22408+}
22409+
e49829fe 22410+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
1facf9fc 22411+{
e49829fe 22412+ int err;
027c5e7a
AM
22413+ unsigned int sigen;
22414+ struct super_block *sb;
e49829fe 22415+
027c5e7a
AM
22416+ sb = d1->d_sb;
22417+ err = si_read_lock(sb, flags);
22418+ if (unlikely(err))
22419+ goto out;
22420+
22421+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIR));
22422+
22423+ if (au_ftest_lock(flags, GEN)) {
22424+ sigen = au_sigen(sb);
22425+ err = au_digen_test(d1, sigen);
22426+ AuDebugOn(!err && au_dbrange_test(d1));
22427+ if (!err) {
22428+ err = au_digen_test(d2, sigen);
22429+ AuDebugOn(!err && au_dbrange_test(d2));
22430+ }
22431+ if (unlikely(err))
22432+ aufs_read_and_write_unlock2(d1, d2);
22433+ }
22434+
22435+out:
e49829fe 22436+ return err;
1facf9fc 22437+}
22438+
22439+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
22440+{
22441+ di_write_unlock2(d1, d2);
22442+ si_read_unlock(d1->d_sb);
22443+}
b752ccd1
AM
22444+
22445+/* ---------------------------------------------------------------------- */
22446+
22447+int si_pid_test_slow(struct super_block *sb)
22448+{
22449+ void *p;
22450+
22451+ rcu_read_lock();
22452+ p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid);
22453+ rcu_read_unlock();
22454+
027c5e7a 22455+ return (long)!!p;
b752ccd1
AM
22456+}
22457+
22458+void si_pid_set_slow(struct super_block *sb)
22459+{
22460+ int err;
22461+ struct au_sbinfo *sbinfo;
22462+
22463+ AuDebugOn(si_pid_test_slow(sb));
22464+
22465+ sbinfo = au_sbi(sb);
22466+ err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
22467+ AuDebugOn(err);
22468+ spin_lock(&sbinfo->au_si_pid.tree_lock);
22469+ err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid,
027c5e7a 22470+ /*any valid ptr*/sb);
b752ccd1
AM
22471+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
22472+ AuDebugOn(err);
22473+ radix_tree_preload_end();
22474+}
22475+
22476+void si_pid_clr_slow(struct super_block *sb)
22477+{
22478+ void *p;
22479+ struct au_sbinfo *sbinfo;
22480+
22481+ AuDebugOn(!si_pid_test_slow(sb));
22482+
22483+ sbinfo = au_sbi(sb);
22484+ spin_lock(&sbinfo->au_si_pid.tree_lock);
22485+ p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
22486+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
b752ccd1 22487+}
7f207e10
AM
22488diff -urN /usr/share/empty/fs/aufs/spl.h linux/fs/aufs/spl.h
22489--- /usr/share/empty/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
53392da6 22490+++ linux/fs/aufs/spl.h 2011-08-24 13:30:24.734646739 +0200
4a4d8108 22491@@ -0,0 +1,66 @@
1facf9fc 22492+/*
027c5e7a 22493+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 22494+ *
22495+ * This program, aufs is free software; you can redistribute it and/or modify
22496+ * it under the terms of the GNU General Public License as published by
22497+ * the Free Software Foundation; either version 2 of the License, or
22498+ * (at your option) any later version.
dece6358
AM
22499+ *
22500+ * This program is distributed in the hope that it will be useful,
22501+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22502+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22503+ * GNU General Public License for more details.
22504+ *
22505+ * You should have received a copy of the GNU General Public License
22506+ * along with this program; if not, write to the Free Software
22507+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22508+ */
22509+
22510+/*
22511+ * simple list protected by a spinlock
22512+ */
22513+
22514+#ifndef __AUFS_SPL_H__
22515+#define __AUFS_SPL_H__
22516+
22517+#ifdef __KERNEL__
22518+
dece6358
AM
22519+#include <linux/spinlock.h>
22520+#include <linux/list.h>
4a4d8108 22521+#include <linux/rculist.h>
1facf9fc 22522+
22523+struct au_splhead {
22524+ spinlock_t spin;
22525+ struct list_head head;
22526+};
22527+
22528+static inline void au_spl_init(struct au_splhead *spl)
22529+{
22530+ spin_lock_init(&spl->spin);
22531+ INIT_LIST_HEAD(&spl->head);
22532+}
22533+
22534+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
22535+{
22536+ spin_lock(&spl->spin);
22537+ list_add(list, &spl->head);
22538+ spin_unlock(&spl->spin);
22539+}
22540+
22541+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
22542+{
22543+ spin_lock(&spl->spin);
22544+ list_del(list);
22545+ spin_unlock(&spl->spin);
22546+}
22547+
4a4d8108
AM
22548+static inline void au_spl_del_rcu(struct list_head *list,
22549+ struct au_splhead *spl)
22550+{
22551+ spin_lock(&spl->spin);
22552+ list_del_rcu(list);
22553+ spin_unlock(&spl->spin);
22554+}
22555+
1facf9fc 22556+#endif /* __KERNEL__ */
22557+#endif /* __AUFS_SPL_H__ */
7f207e10
AM
22558diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
22559--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
53392da6 22560+++ linux/fs/aufs/super.c 2011-08-24 13:30:24.734646739 +0200
2cbb1c4b 22561@@ -0,0 +1,930 @@
1facf9fc 22562+/*
027c5e7a 22563+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 22564+ *
22565+ * This program, aufs is free software; you can redistribute it and/or modify
22566+ * it under the terms of the GNU General Public License as published by
22567+ * the Free Software Foundation; either version 2 of the License, or
22568+ * (at your option) any later version.
dece6358
AM
22569+ *
22570+ * This program is distributed in the hope that it will be useful,
22571+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22572+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22573+ * GNU General Public License for more details.
22574+ *
22575+ * You should have received a copy of the GNU General Public License
22576+ * along with this program; if not, write to the Free Software
22577+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22578+ */
22579+
22580+/*
22581+ * mount and super_block operations
22582+ */
22583+
22584+#include <linux/buffer_head.h>
e49829fe 22585+#include <linux/jiffies.h>
dece6358 22586+#include <linux/module.h>
1facf9fc 22587+#include <linux/seq_file.h>
22588+#include <linux/statfs.h>
7f207e10
AM
22589+#include <linux/vmalloc.h>
22590+#include <linux/writeback.h>
1facf9fc 22591+#include "aufs.h"
22592+
22593+/*
22594+ * super_operations
22595+ */
22596+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
22597+{
22598+ struct au_icntnr *c;
22599+
22600+ c = au_cache_alloc_icntnr();
22601+ if (c) {
027c5e7a 22602+ au_icntnr_init(c);
1facf9fc 22603+ c->vfs_inode.i_version = 1; /* sigen(sb); */
22604+ c->iinfo.ii_hinode = NULL;
22605+ return &c->vfs_inode;
22606+ }
22607+ return NULL;
22608+}
22609+
027c5e7a
AM
22610+static void aufs_destroy_inode_cb(struct rcu_head *head)
22611+{
22612+ struct inode *inode = container_of(head, struct inode, i_rcu);
22613+
22614+ INIT_LIST_HEAD(&inode->i_dentry);
22615+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
22616+}
22617+
1facf9fc 22618+static void aufs_destroy_inode(struct inode *inode)
22619+{
22620+ au_iinfo_fin(inode);
027c5e7a 22621+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
1facf9fc 22622+}
22623+
22624+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
22625+{
22626+ struct inode *inode;
22627+ int err;
22628+
22629+ inode = iget_locked(sb, ino);
22630+ if (unlikely(!inode)) {
22631+ inode = ERR_PTR(-ENOMEM);
22632+ goto out;
22633+ }
22634+ if (!(inode->i_state & I_NEW))
22635+ goto out;
22636+
22637+ err = au_xigen_new(inode);
22638+ if (!err)
22639+ err = au_iinfo_init(inode);
22640+ if (!err)
22641+ inode->i_version++;
22642+ else {
22643+ iget_failed(inode);
22644+ inode = ERR_PTR(err);
22645+ }
22646+
4f0767ce 22647+out:
1facf9fc 22648+ /* never return NULL */
22649+ AuDebugOn(!inode);
22650+ AuTraceErrPtr(inode);
22651+ return inode;
22652+}
22653+
22654+/* lock free root dinfo */
22655+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
22656+{
22657+ int err;
22658+ aufs_bindex_t bindex, bend;
22659+ struct path path;
4a4d8108 22660+ struct au_hdentry *hdp;
1facf9fc 22661+ struct au_branch *br;
22662+
22663+ err = 0;
22664+ bend = au_sbend(sb);
4a4d8108 22665+ hdp = au_di(sb->s_root)->di_hdentry;
1facf9fc 22666+ for (bindex = 0; !err && bindex <= bend; bindex++) {
22667+ br = au_sbr(sb, bindex);
22668+ path.mnt = br->br_mnt;
4a4d8108 22669+ path.dentry = hdp[bindex].hd_dentry;
1facf9fc 22670+ err = au_seq_path(seq, &path);
22671+ if (err > 0)
22672+ err = seq_printf(seq, "=%s",
22673+ au_optstr_br_perm(br->br_perm));
22674+ if (!err && bindex != bend)
22675+ err = seq_putc(seq, ':');
22676+ }
22677+
22678+ return err;
22679+}
22680+
22681+static void au_show_wbr_create(struct seq_file *m, int v,
22682+ struct au_sbinfo *sbinfo)
22683+{
22684+ const char *pat;
22685+
dece6358
AM
22686+ AuRwMustAnyLock(&sbinfo->si_rwsem);
22687+
1facf9fc 22688+ seq_printf(m, ",create=");
22689+ pat = au_optstr_wbr_create(v);
22690+ switch (v) {
22691+ case AuWbrCreate_TDP:
22692+ case AuWbrCreate_RR:
22693+ case AuWbrCreate_MFS:
22694+ case AuWbrCreate_PMFS:
22695+ seq_printf(m, pat);
22696+ break;
22697+ case AuWbrCreate_MFSV:
22698+ seq_printf(m, /*pat*/"mfs:%lu",
e49829fe
JR
22699+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22700+ / MSEC_PER_SEC);
1facf9fc 22701+ break;
22702+ case AuWbrCreate_PMFSV:
22703+ seq_printf(m, /*pat*/"pmfs:%lu",
e49829fe
JR
22704+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22705+ / MSEC_PER_SEC);
1facf9fc 22706+ break;
22707+ case AuWbrCreate_MFSRR:
22708+ seq_printf(m, /*pat*/"mfsrr:%llu",
22709+ sbinfo->si_wbr_mfs.mfsrr_watermark);
22710+ break;
22711+ case AuWbrCreate_MFSRRV:
22712+ seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
22713+ sbinfo->si_wbr_mfs.mfsrr_watermark,
e49829fe
JR
22714+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
22715+ / MSEC_PER_SEC);
1facf9fc 22716+ break;
22717+ }
22718+}
22719+
22720+static int au_show_xino(struct seq_file *seq, struct vfsmount *mnt)
22721+{
22722+#ifdef CONFIG_SYSFS
22723+ return 0;
22724+#else
22725+ int err;
22726+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
22727+ aufs_bindex_t bindex, brid;
22728+ struct super_block *sb;
22729+ struct qstr *name;
22730+ struct file *f;
22731+ struct dentry *d, *h_root;
4a4d8108 22732+ struct au_hdentry *hdp;
1facf9fc 22733+
dece6358
AM
22734+ AuRwMustAnyLock(&sbinfo->si_rwsem);
22735+
1facf9fc 22736+ err = 0;
22737+ sb = mnt->mnt_sb;
22738+ f = au_sbi(sb)->si_xib;
22739+ if (!f)
22740+ goto out;
22741+
22742+ /* stop printing the default xino path on the first writable branch */
22743+ h_root = NULL;
22744+ brid = au_xino_brid(sb);
22745+ if (brid >= 0) {
22746+ bindex = au_br_index(sb, brid);
4a4d8108
AM
22747+ hdp = au_di(sb->s_root)->di_hdentry;
22748+ h_root = hdp[0 + bindex].hd_dentry;
1facf9fc 22749+ }
22750+ d = f->f_dentry;
22751+ name = &d->d_name;
22752+ /* safe ->d_parent because the file is unlinked */
22753+ if (d->d_parent == h_root
22754+ && name->len == len
22755+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
22756+ goto out;
22757+
22758+ seq_puts(seq, ",xino=");
22759+ err = au_xino_path(seq, f);
22760+
4f0767ce 22761+out:
1facf9fc 22762+ return err;
22763+#endif
22764+}
22765+
22766+/* seq_file will re-call me in case of too long string */
22767+static int aufs_show_options(struct seq_file *m, struct vfsmount *mnt)
22768+{
027c5e7a 22769+ int err;
1facf9fc 22770+ unsigned int mnt_flags, v;
22771+ struct super_block *sb;
22772+ struct au_sbinfo *sbinfo;
22773+
22774+#define AuBool(name, str) do { \
22775+ v = au_opt_test(mnt_flags, name); \
22776+ if (v != au_opt_test(AuOpt_Def, name)) \
22777+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
22778+} while (0)
22779+
22780+#define AuStr(name, str) do { \
22781+ v = mnt_flags & AuOptMask_##name; \
22782+ if (v != (AuOpt_Def & AuOptMask_##name)) \
22783+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
22784+} while (0)
22785+
22786+#define AuUInt(name, str, val) do { \
22787+ if (val != AUFS_##name##_DEF) \
22788+ seq_printf(m, "," #str "=%u", val); \
22789+} while (0)
22790+
22791+ /* lock free root dinfo */
22792+ sb = mnt->mnt_sb;
22793+ si_noflush_read_lock(sb);
22794+ sbinfo = au_sbi(sb);
22795+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
22796+
22797+ mnt_flags = au_mntflags(sb);
22798+ if (au_opt_test(mnt_flags, XINO)) {
22799+ err = au_show_xino(m, mnt);
22800+ if (unlikely(err))
22801+ goto out;
22802+ } else
22803+ seq_puts(m, ",noxino");
22804+
22805+ AuBool(TRUNC_XINO, trunc_xino);
22806+ AuStr(UDBA, udba);
dece6358 22807+ AuBool(SHWH, shwh);
1facf9fc 22808+ AuBool(PLINK, plink);
4a4d8108 22809+ AuBool(DIO, dio);
1facf9fc 22810+ /* AuBool(DIRPERM1, dirperm1); */
22811+ /* AuBool(REFROF, refrof); */
22812+
22813+ v = sbinfo->si_wbr_create;
22814+ if (v != AuWbrCreate_Def)
22815+ au_show_wbr_create(m, v, sbinfo);
22816+
22817+ v = sbinfo->si_wbr_copyup;
22818+ if (v != AuWbrCopyup_Def)
22819+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
22820+
22821+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
22822+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
22823+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
22824+
22825+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
22826+
027c5e7a
AM
22827+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
22828+ AuUInt(RDCACHE, rdcache, v);
1facf9fc 22829+
22830+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
22831+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
22832+
22833+ AuBool(SUM, sum);
22834+ /* AuBool(SUM_W, wsum); */
22835+ AuBool(WARN_PERM, warn_perm);
22836+ AuBool(VERBOSE, verbose);
22837+
4f0767ce 22838+out:
1facf9fc 22839+ /* be sure to print "br:" last */
22840+ if (!sysaufs_brs) {
22841+ seq_puts(m, ",br:");
22842+ au_show_brs(m, sb);
22843+ }
22844+ si_read_unlock(sb);
22845+ return 0;
22846+
1facf9fc 22847+#undef AuBool
22848+#undef AuStr
4a4d8108 22849+#undef AuUInt
1facf9fc 22850+}
22851+
22852+/* ---------------------------------------------------------------------- */
22853+
22854+/* sum mode which returns the summation for statfs(2) */
22855+
22856+static u64 au_add_till_max(u64 a, u64 b)
22857+{
22858+ u64 old;
22859+
22860+ old = a;
22861+ a += b;
22862+ if (old < a)
22863+ return a;
22864+ return ULLONG_MAX;
22865+}
22866+
22867+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
22868+{
22869+ int err;
22870+ u64 blocks, bfree, bavail, files, ffree;
22871+ aufs_bindex_t bend, bindex, i;
22872+ unsigned char shared;
7f207e10 22873+ struct path h_path;
1facf9fc 22874+ struct super_block *h_sb;
22875+
22876+ blocks = 0;
22877+ bfree = 0;
22878+ bavail = 0;
22879+ files = 0;
22880+ ffree = 0;
22881+
22882+ err = 0;
22883+ bend = au_sbend(sb);
22884+ for (bindex = bend; bindex >= 0; bindex--) {
7f207e10
AM
22885+ h_path.mnt = au_sbr_mnt(sb, bindex);
22886+ h_sb = h_path.mnt->mnt_sb;
1facf9fc 22887+ shared = 0;
22888+ for (i = bindex + 1; !shared && i <= bend; i++)
22889+ shared = (au_sbr_sb(sb, i) == h_sb);
22890+ if (shared)
22891+ continue;
22892+
22893+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
22894+ h_path.dentry = h_path.mnt->mnt_root;
22895+ err = vfs_statfs(&h_path, buf);
1facf9fc 22896+ if (unlikely(err))
22897+ goto out;
22898+
22899+ blocks = au_add_till_max(blocks, buf->f_blocks);
22900+ bfree = au_add_till_max(bfree, buf->f_bfree);
22901+ bavail = au_add_till_max(bavail, buf->f_bavail);
22902+ files = au_add_till_max(files, buf->f_files);
22903+ ffree = au_add_till_max(ffree, buf->f_ffree);
22904+ }
22905+
22906+ buf->f_blocks = blocks;
22907+ buf->f_bfree = bfree;
22908+ buf->f_bavail = bavail;
22909+ buf->f_files = files;
22910+ buf->f_ffree = ffree;
22911+
4f0767ce 22912+out:
1facf9fc 22913+ return err;
22914+}
22915+
22916+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
22917+{
22918+ int err;
7f207e10 22919+ struct path h_path;
1facf9fc 22920+ struct super_block *sb;
22921+
22922+ /* lock free root dinfo */
22923+ sb = dentry->d_sb;
22924+ si_noflush_read_lock(sb);
7f207e10 22925+ if (!au_opt_test(au_mntflags(sb), SUM)) {
1facf9fc 22926+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
22927+ h_path.mnt = au_sbr_mnt(sb, 0);
22928+ h_path.dentry = h_path.mnt->mnt_root;
22929+ err = vfs_statfs(&h_path, buf);
22930+ } else
1facf9fc 22931+ err = au_statfs_sum(sb, buf);
22932+ si_read_unlock(sb);
22933+
22934+ if (!err) {
22935+ buf->f_type = AUFS_SUPER_MAGIC;
4a4d8108 22936+ buf->f_namelen = AUFS_MAX_NAMELEN;
1facf9fc 22937+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
22938+ }
22939+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
22940+
22941+ return err;
22942+}
22943+
22944+/* ---------------------------------------------------------------------- */
22945+
1facf9fc 22946+/* final actions when unmounting a file system */
22947+static void aufs_put_super(struct super_block *sb)
22948+{
22949+ struct au_sbinfo *sbinfo;
22950+
22951+ sbinfo = au_sbi(sb);
22952+ if (!sbinfo)
22953+ return;
22954+
1facf9fc 22955+ dbgaufs_si_fin(sbinfo);
22956+ kobject_put(&sbinfo->si_kobj);
22957+}
22958+
22959+/* ---------------------------------------------------------------------- */
22960+
7f207e10
AM
22961+void au_array_free(void *array)
22962+{
22963+ if (array) {
22964+ if (!is_vmalloc_addr(array))
22965+ kfree(array);
22966+ else
22967+ vfree(array);
22968+ }
22969+}
22970+
22971+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg)
22972+{
22973+ void *array;
22974+ unsigned long long n;
22975+
22976+ array = NULL;
22977+ n = 0;
22978+ if (!*hint)
22979+ goto out;
22980+
22981+ if (*hint > ULLONG_MAX / sizeof(array)) {
22982+ array = ERR_PTR(-EMFILE);
22983+ pr_err("hint %llu\n", *hint);
22984+ goto out;
22985+ }
22986+
22987+ array = kmalloc(sizeof(array) * *hint, GFP_NOFS);
22988+ if (unlikely(!array))
22989+ array = vmalloc(sizeof(array) * *hint);
22990+ if (unlikely(!array)) {
22991+ array = ERR_PTR(-ENOMEM);
22992+ goto out;
22993+ }
22994+
22995+ n = cb(array, *hint, arg);
22996+ AuDebugOn(n > *hint);
22997+
22998+out:
22999+ *hint = n;
23000+ return array;
23001+}
23002+
23003+static unsigned long long au_iarray_cb(void *a,
23004+ unsigned long long max __maybe_unused,
23005+ void *arg)
23006+{
23007+ unsigned long long n;
23008+ struct inode **p, *inode;
23009+ struct list_head *head;
23010+
23011+ n = 0;
23012+ p = a;
23013+ head = arg;
2cbb1c4b 23014+ spin_lock(&inode_sb_list_lock);
7f207e10
AM
23015+ list_for_each_entry(inode, head, i_sb_list) {
23016+ if (!is_bad_inode(inode)
23017+ && au_ii(inode)->ii_bstart >= 0) {
2cbb1c4b
JR
23018+ spin_lock(&inode->i_lock);
23019+ if (atomic_read(&inode->i_count)) {
23020+ au_igrab(inode);
23021+ *p++ = inode;
23022+ n++;
23023+ AuDebugOn(n > max);
23024+ }
23025+ spin_unlock(&inode->i_lock);
7f207e10
AM
23026+ }
23027+ }
2cbb1c4b 23028+ spin_unlock(&inode_sb_list_lock);
7f207e10
AM
23029+
23030+ return n;
23031+}
23032+
23033+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
23034+{
23035+ *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
23036+ return au_array_alloc(max, au_iarray_cb, &sb->s_inodes);
23037+}
23038+
23039+void au_iarray_free(struct inode **a, unsigned long long max)
23040+{
23041+ unsigned long long ull;
23042+
23043+ for (ull = 0; ull < max; ull++)
23044+ iput(a[ull]);
23045+ au_array_free(a);
23046+}
23047+
23048+/* ---------------------------------------------------------------------- */
23049+
1facf9fc 23050+/*
23051+ * refresh dentry and inode at remount time.
23052+ */
027c5e7a
AM
23053+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
23054+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
23055+ struct dentry *parent)
1facf9fc 23056+{
23057+ int err;
1facf9fc 23058+
23059+ di_write_lock_child(dentry);
1facf9fc 23060+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
23061+ err = au_refresh_dentry(dentry, parent);
23062+ if (!err && dir_flags)
23063+ au_hn_reset(dentry->d_inode, dir_flags);
1facf9fc 23064+ di_read_unlock(parent, AuLock_IR);
1facf9fc 23065+ di_write_unlock(dentry);
23066+
23067+ return err;
23068+}
23069+
027c5e7a
AM
23070+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
23071+ struct au_sbinfo *sbinfo,
23072+ const unsigned int dir_flags)
1facf9fc 23073+{
027c5e7a
AM
23074+ int err;
23075+ struct dentry *parent;
23076+ struct inode *inode;
23077+
23078+ err = 0;
23079+ parent = dget_parent(dentry);
23080+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
23081+ inode = dentry->d_inode;
23082+ if (inode) {
23083+ if (!S_ISDIR(inode->i_mode))
23084+ err = au_do_refresh(dentry, /*dir_flags*/0,
23085+ parent);
23086+ else {
23087+ err = au_do_refresh(dentry, dir_flags, parent);
23088+ if (unlikely(err))
23089+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
23090+ }
23091+ } else
23092+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
23093+ AuDbgDentry(dentry);
23094+ }
23095+ dput(parent);
23096+
23097+ AuTraceErr(err);
23098+ return err;
1facf9fc 23099+}
23100+
027c5e7a 23101+static int au_refresh_d(struct super_block *sb)
1facf9fc 23102+{
23103+ int err, i, j, ndentry, e;
027c5e7a 23104+ unsigned int sigen;
1facf9fc 23105+ struct au_dcsub_pages dpages;
23106+ struct au_dpage *dpage;
027c5e7a
AM
23107+ struct dentry **dentries, *d;
23108+ struct au_sbinfo *sbinfo;
23109+ struct dentry *root = sb->s_root;
23110+ const unsigned int dir_flags = au_hi_flags(root->d_inode, /*isdir*/1);
1facf9fc 23111+
027c5e7a
AM
23112+ err = au_dpages_init(&dpages, GFP_NOFS);
23113+ if (unlikely(err))
1facf9fc 23114+ goto out;
027c5e7a
AM
23115+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
23116+ if (unlikely(err))
1facf9fc 23117+ goto out_dpages;
1facf9fc 23118+
027c5e7a
AM
23119+ sigen = au_sigen(sb);
23120+ sbinfo = au_sbi(sb);
23121+ for (i = 0; i < dpages.ndpage; i++) {
1facf9fc 23122+ dpage = dpages.dpages + i;
23123+ dentries = dpage->dentries;
23124+ ndentry = dpage->ndentry;
027c5e7a 23125+ for (j = 0; j < ndentry; j++) {
1facf9fc 23126+ d = dentries[j];
027c5e7a
AM
23127+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags);
23128+ if (unlikely(e && !err))
23129+ err = e;
23130+ /* go on even err */
1facf9fc 23131+ }
23132+ }
23133+
4f0767ce 23134+out_dpages:
1facf9fc 23135+ au_dpages_free(&dpages);
4f0767ce 23136+out:
1facf9fc 23137+ return err;
23138+}
23139+
027c5e7a 23140+static int au_refresh_i(struct super_block *sb)
1facf9fc 23141+{
027c5e7a
AM
23142+ int err, e;
23143+ unsigned int sigen;
23144+ unsigned long long max, ull;
23145+ struct inode *inode, **array;
1facf9fc 23146+
027c5e7a
AM
23147+ array = au_iarray_alloc(sb, &max);
23148+ err = PTR_ERR(array);
23149+ if (IS_ERR(array))
23150+ goto out;
1facf9fc 23151+
23152+ err = 0;
027c5e7a
AM
23153+ sigen = au_sigen(sb);
23154+ for (ull = 0; ull < max; ull++) {
23155+ inode = array[ull];
23156+ if (au_iigen(inode) != sigen) {
1facf9fc 23157+ ii_write_lock_child(inode);
027c5e7a 23158+ e = au_refresh_hinode_self(inode);
1facf9fc 23159+ ii_write_unlock(inode);
23160+ if (unlikely(e)) {
027c5e7a 23161+ pr_err("error %d, i%lu\n", e, inode->i_ino);
1facf9fc 23162+ if (!err)
23163+ err = e;
23164+ /* go on even if err */
23165+ }
23166+ }
1facf9fc 23167+ }
23168+
027c5e7a 23169+ au_iarray_free(array, max);
1facf9fc 23170+
4f0767ce 23171+out:
1facf9fc 23172+ return err;
23173+}
23174+
027c5e7a 23175+static void au_remount_refresh(struct super_block *sb)
1facf9fc 23176+{
027c5e7a
AM
23177+ int err, e;
23178+ unsigned int udba;
23179+ aufs_bindex_t bindex, bend;
1facf9fc 23180+ struct dentry *root;
23181+ struct inode *inode;
027c5e7a 23182+ struct au_branch *br;
1facf9fc 23183+
23184+ au_sigen_inc(sb);
027c5e7a 23185+ au_fclr_si(au_sbi(sb), FAILED_REFRESH_DIR);
1facf9fc 23186+
23187+ root = sb->s_root;
23188+ DiMustNoWaiters(root);
23189+ inode = root->d_inode;
23190+ IiMustNoWaiters(inode);
1facf9fc 23191+
027c5e7a
AM
23192+ udba = au_opt_udba(sb);
23193+ bend = au_sbend(sb);
23194+ for (bindex = 0; bindex <= bend; bindex++) {
23195+ br = au_sbr(sb, bindex);
23196+ err = au_hnotify_reset_br(udba, br, br->br_perm);
1facf9fc 23197+ if (unlikely(err))
027c5e7a
AM
23198+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
23199+ bindex, err);
23200+ /* go on even if err */
1facf9fc 23201+ }
027c5e7a 23202+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
1facf9fc 23203+
027c5e7a
AM
23204+ di_write_unlock(root);
23205+ err = au_refresh_d(sb);
23206+ e = au_refresh_i(sb);
23207+ if (unlikely(e && !err))
23208+ err = e;
1facf9fc 23209+ /* aufs_write_lock() calls ..._child() */
23210+ di_write_lock_child(root);
027c5e7a
AM
23211+
23212+ au_cpup_attr_all(inode, /*force*/1);
23213+
23214+ if (unlikely(err))
23215+ AuIOErr("refresh failed, ignored, %d\n", err);
1facf9fc 23216+}
23217+
23218+/* stop extra interpretation of errno in mount(8), and strange error messages */
23219+static int cvt_err(int err)
23220+{
23221+ AuTraceErr(err);
23222+
23223+ switch (err) {
23224+ case -ENOENT:
23225+ case -ENOTDIR:
23226+ case -EEXIST:
23227+ case -EIO:
23228+ err = -EINVAL;
23229+ }
23230+ return err;
23231+}
23232+
23233+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
23234+{
4a4d8108
AM
23235+ int err, do_dx;
23236+ unsigned int mntflags;
1facf9fc 23237+ struct au_opts opts;
23238+ struct dentry *root;
23239+ struct inode *inode;
23240+ struct au_sbinfo *sbinfo;
23241+
23242+ err = 0;
23243+ root = sb->s_root;
23244+ if (!data || !*data) {
e49829fe
JR
23245+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
23246+ if (!err) {
23247+ di_write_lock_child(root);
23248+ err = au_opts_verify(sb, *flags, /*pending*/0);
23249+ aufs_write_unlock(root);
23250+ }
1facf9fc 23251+ goto out;
23252+ }
23253+
23254+ err = -ENOMEM;
23255+ memset(&opts, 0, sizeof(opts));
23256+ opts.opt = (void *)__get_free_page(GFP_NOFS);
23257+ if (unlikely(!opts.opt))
23258+ goto out;
23259+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
23260+ opts.flags = AuOpts_REMOUNT;
23261+ opts.sb_flags = *flags;
23262+
23263+ /* parse it before aufs lock */
23264+ err = au_opts_parse(sb, data, &opts);
23265+ if (unlikely(err))
23266+ goto out_opts;
23267+
23268+ sbinfo = au_sbi(sb);
23269+ inode = root->d_inode;
23270+ mutex_lock(&inode->i_mutex);
e49829fe
JR
23271+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
23272+ if (unlikely(err))
23273+ goto out_mtx;
23274+ di_write_lock_child(root);
1facf9fc 23275+
23276+ /* au_opts_remount() may return an error */
23277+ err = au_opts_remount(sb, &opts);
23278+ au_opts_free(&opts);
23279+
027c5e7a
AM
23280+ if (au_ftest_opts(opts.flags, REFRESH))
23281+ au_remount_refresh(sb);
1facf9fc 23282+
4a4d8108
AM
23283+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
23284+ mntflags = au_mntflags(sb);
23285+ do_dx = !!au_opt_test(mntflags, DIO);
23286+ au_dy_arefresh(do_dx);
23287+ }
23288+
1facf9fc 23289+ aufs_write_unlock(root);
953406b4 23290+
e49829fe
JR
23291+out_mtx:
23292+ mutex_unlock(&inode->i_mutex);
4f0767ce 23293+out_opts:
1facf9fc 23294+ free_page((unsigned long)opts.opt);
4f0767ce 23295+out:
1facf9fc 23296+ err = cvt_err(err);
23297+ AuTraceErr(err);
23298+ return err;
23299+}
23300+
4a4d8108 23301+static const struct super_operations aufs_sop = {
1facf9fc 23302+ .alloc_inode = aufs_alloc_inode,
23303+ .destroy_inode = aufs_destroy_inode,
b752ccd1 23304+ /* always deleting, no clearing */
1facf9fc 23305+ .drop_inode = generic_delete_inode,
23306+ .show_options = aufs_show_options,
23307+ .statfs = aufs_statfs,
23308+ .put_super = aufs_put_super,
23309+ .remount_fs = aufs_remount_fs
23310+};
23311+
23312+/* ---------------------------------------------------------------------- */
23313+
23314+static int alloc_root(struct super_block *sb)
23315+{
23316+ int err;
23317+ struct inode *inode;
23318+ struct dentry *root;
23319+
23320+ err = -ENOMEM;
23321+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
23322+ err = PTR_ERR(inode);
23323+ if (IS_ERR(inode))
23324+ goto out;
23325+
23326+ inode->i_op = &aufs_dir_iop;
23327+ inode->i_fop = &aufs_dir_fop;
23328+ inode->i_mode = S_IFDIR;
23329+ inode->i_nlink = 2;
23330+ unlock_new_inode(inode);
23331+
23332+ root = d_alloc_root(inode);
23333+ if (unlikely(!root))
23334+ goto out_iput;
23335+ err = PTR_ERR(root);
23336+ if (IS_ERR(root))
23337+ goto out_iput;
23338+
4a4d8108 23339+ err = au_di_init(root);
1facf9fc 23340+ if (!err) {
23341+ sb->s_root = root;
23342+ return 0; /* success */
23343+ }
23344+ dput(root);
23345+ goto out; /* do not iput */
23346+
4f0767ce 23347+out_iput:
1facf9fc 23348+ iget_failed(inode);
4f0767ce 23349+out:
1facf9fc 23350+ return err;
23351+
23352+}
23353+
23354+static int aufs_fill_super(struct super_block *sb, void *raw_data,
23355+ int silent __maybe_unused)
23356+{
23357+ int err;
23358+ struct au_opts opts;
23359+ struct dentry *root;
23360+ struct inode *inode;
23361+ char *arg = raw_data;
23362+
23363+ if (unlikely(!arg || !*arg)) {
23364+ err = -EINVAL;
4a4d8108 23365+ pr_err("no arg\n");
1facf9fc 23366+ goto out;
23367+ }
23368+
23369+ err = -ENOMEM;
23370+ memset(&opts, 0, sizeof(opts));
23371+ opts.opt = (void *)__get_free_page(GFP_NOFS);
23372+ if (unlikely(!opts.opt))
23373+ goto out;
23374+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
23375+ opts.sb_flags = sb->s_flags;
23376+
23377+ err = au_si_alloc(sb);
23378+ if (unlikely(err))
23379+ goto out_opts;
23380+
23381+ /* all timestamps always follow the ones on the branch */
23382+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
23383+ sb->s_op = &aufs_sop;
027c5e7a 23384+ sb->s_d_op = &aufs_dop;
1facf9fc 23385+ sb->s_magic = AUFS_SUPER_MAGIC;
23386+ sb->s_maxbytes = 0;
23387+ au_export_init(sb);
23388+
23389+ err = alloc_root(sb);
23390+ if (unlikely(err)) {
23391+ si_write_unlock(sb);
23392+ goto out_info;
23393+ }
23394+ root = sb->s_root;
23395+ inode = root->d_inode;
23396+
23397+ /*
23398+ * actually we can parse options regardless aufs lock here.
23399+ * but at remount time, parsing must be done before aufs lock.
23400+ * so we follow the same rule.
23401+ */
23402+ ii_write_lock_parent(inode);
23403+ aufs_write_unlock(root);
23404+ err = au_opts_parse(sb, arg, &opts);
23405+ if (unlikely(err))
23406+ goto out_root;
23407+
23408+ /* lock vfs_inode first, then aufs. */
23409+ mutex_lock(&inode->i_mutex);
1facf9fc 23410+ aufs_write_lock(root);
23411+ err = au_opts_mount(sb, &opts);
23412+ au_opts_free(&opts);
1facf9fc 23413+ aufs_write_unlock(root);
23414+ mutex_unlock(&inode->i_mutex);
4a4d8108
AM
23415+ if (!err)
23416+ goto out_opts; /* success */
1facf9fc 23417+
4f0767ce 23418+out_root:
1facf9fc 23419+ dput(root);
23420+ sb->s_root = NULL;
4f0767ce 23421+out_info:
2cbb1c4b 23422+ dbgaufs_si_fin(au_sbi(sb));
1facf9fc 23423+ kobject_put(&au_sbi(sb)->si_kobj);
23424+ sb->s_fs_info = NULL;
4f0767ce 23425+out_opts:
1facf9fc 23426+ free_page((unsigned long)opts.opt);
4f0767ce 23427+out:
1facf9fc 23428+ AuTraceErr(err);
23429+ err = cvt_err(err);
23430+ AuTraceErr(err);
23431+ return err;
23432+}
23433+
23434+/* ---------------------------------------------------------------------- */
23435+
027c5e7a
AM
23436+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
23437+ const char *dev_name __maybe_unused,
23438+ void *raw_data)
1facf9fc 23439+{
027c5e7a 23440+ struct dentry *root;
1facf9fc 23441+ struct super_block *sb;
23442+
23443+ /* all timestamps always follow the ones on the branch */
23444+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
027c5e7a
AM
23445+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
23446+ if (IS_ERR(root))
23447+ goto out;
23448+
23449+ sb = root->d_sb;
23450+ si_write_lock(sb, !AuLock_FLUSH);
23451+ sysaufs_brs_add(sb, 0);
23452+ si_write_unlock(sb);
23453+ au_sbilist_add(sb);
23454+
23455+out:
23456+ return root;
1facf9fc 23457+}
23458+
e49829fe
JR
23459+static void aufs_kill_sb(struct super_block *sb)
23460+{
23461+ struct au_sbinfo *sbinfo;
23462+
23463+ sbinfo = au_sbi(sb);
23464+ if (sbinfo) {
23465+ au_sbilist_del(sb);
23466+ aufs_write_lock(sb->s_root);
23467+ if (sbinfo->si_wbr_create_ops->fin)
23468+ sbinfo->si_wbr_create_ops->fin(sb);
23469+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
23470+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
027c5e7a 23471+ au_remount_refresh(sb);
e49829fe
JR
23472+ }
23473+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
23474+ au_plink_put(sb, /*verbose*/1);
23475+ au_xino_clr(sb);
23476+ aufs_write_unlock(sb->s_root);
e49829fe
JR
23477+ au_nwt_flush(&sbinfo->si_nowait);
23478+ }
23479+ generic_shutdown_super(sb);
23480+}
23481+
1facf9fc 23482+struct file_system_type aufs_fs_type = {
23483+ .name = AUFS_FSTYPE,
23484+ .fs_flags =
23485+ FS_RENAME_DOES_D_MOVE /* a race between rename and others */
23486+ | FS_REVAL_DOT, /* for NFS branch and udba */
027c5e7a 23487+ .mount = aufs_mount,
e49829fe 23488+ .kill_sb = aufs_kill_sb,
1facf9fc 23489+ /* no need to __module_get() and module_put(). */
23490+ .owner = THIS_MODULE,
23491+};
7f207e10
AM
23492diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
23493--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
23494+++ linux/fs/aufs/super.h 2011-08-24 13:30:24.734646739 +0200
23495@@ -0,0 +1,547 @@
1facf9fc 23496+/*
027c5e7a 23497+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 23498+ *
23499+ * This program, aufs is free software; you can redistribute it and/or modify
23500+ * it under the terms of the GNU General Public License as published by
23501+ * the Free Software Foundation; either version 2 of the License, or
23502+ * (at your option) any later version.
dece6358
AM
23503+ *
23504+ * This program is distributed in the hope that it will be useful,
23505+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23506+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23507+ * GNU General Public License for more details.
23508+ *
23509+ * You should have received a copy of the GNU General Public License
23510+ * along with this program; if not, write to the Free Software
23511+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 23512+ */
23513+
23514+/*
23515+ * super_block operations
23516+ */
23517+
23518+#ifndef __AUFS_SUPER_H__
23519+#define __AUFS_SUPER_H__
23520+
23521+#ifdef __KERNEL__
23522+
23523+#include <linux/fs.h>
1facf9fc 23524+#include <linux/aufs_type.h>
23525+#include "rwsem.h"
23526+#include "spl.h"
23527+#include "wkq.h"
23528+
23529+typedef ssize_t (*au_readf_t)(struct file *, char __user *, size_t, loff_t *);
23530+typedef ssize_t (*au_writef_t)(struct file *, const char __user *, size_t,
23531+ loff_t *);
23532+
23533+/* policies to select one among multiple writable branches */
23534+struct au_wbr_copyup_operations {
23535+ int (*copyup)(struct dentry *dentry);
23536+};
23537+
23538+struct au_wbr_create_operations {
23539+ int (*create)(struct dentry *dentry, int isdir);
23540+ int (*init)(struct super_block *sb);
23541+ int (*fin)(struct super_block *sb);
23542+};
23543+
23544+struct au_wbr_mfs {
23545+ struct mutex mfs_lock; /* protect this structure */
23546+ unsigned long mfs_jiffy;
23547+ unsigned long mfs_expire;
23548+ aufs_bindex_t mfs_bindex;
23549+
23550+ unsigned long long mfsrr_bytes;
23551+ unsigned long long mfsrr_watermark;
23552+};
23553+
1facf9fc 23554+struct au_branch;
23555+struct au_sbinfo {
23556+ /* nowait tasks in the system-wide workqueue */
23557+ struct au_nowait_tasks si_nowait;
23558+
b752ccd1
AM
23559+ /*
23560+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
23561+ * rwsem for au_sbinfo is necessary.
23562+ */
dece6358 23563+ struct au_rwsem si_rwsem;
1facf9fc 23564+
b752ccd1
AM
23565+ /* prevent recursive locking in deleting inode */
23566+ struct {
23567+ unsigned long *bitmap;
23568+ spinlock_t tree_lock;
23569+ struct radix_tree_root tree;
23570+ } au_si_pid;
23571+
7f207e10
AM
23572+ /*
23573+ * dirty approach to protect sb->sb_inodes and ->s_files from remount.
23574+ */
23575+ atomic_long_t si_ninodes, si_nfiles;
23576+
1facf9fc 23577+ /* branch management */
23578+ unsigned int si_generation;
23579+
23580+ /* see above flags */
23581+ unsigned char au_si_status;
23582+
23583+ aufs_bindex_t si_bend;
7f207e10
AM
23584+
23585+ /* dirty trick to keep br_id plus */
23586+ unsigned int si_last_br_id :
23587+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
1facf9fc 23588+ struct au_branch **si_branch;
23589+
23590+ /* policy to select a writable branch */
23591+ unsigned char si_wbr_copyup;
23592+ unsigned char si_wbr_create;
23593+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
23594+ struct au_wbr_create_operations *si_wbr_create_ops;
23595+
23596+ /* round robin */
23597+ atomic_t si_wbr_rr_next;
23598+
23599+ /* most free space */
23600+ struct au_wbr_mfs si_wbr_mfs;
23601+
23602+ /* mount flags */
23603+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
23604+ unsigned int si_mntflags;
23605+
23606+ /* external inode number (bitmap and translation table) */
23607+ au_readf_t si_xread;
23608+ au_writef_t si_xwrite;
23609+ struct file *si_xib;
23610+ struct mutex si_xib_mtx; /* protect xib members */
23611+ unsigned long *si_xib_buf;
23612+ unsigned long si_xib_last_pindex;
23613+ int si_xib_next_bit;
23614+ aufs_bindex_t si_xino_brid;
23615+ /* reserved for future use */
23616+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
23617+
23618+#ifdef CONFIG_AUFS_EXPORT
23619+ /* i_generation */
23620+ struct file *si_xigen;
23621+ atomic_t si_xigen_next;
23622+#endif
23623+
23624+ /* vdir parameters */
e49829fe 23625+ unsigned long si_rdcache; /* max cache time in jiffies */
1facf9fc 23626+ unsigned int si_rdblk; /* deblk size */
23627+ unsigned int si_rdhash; /* hash size */
23628+
23629+ /*
23630+ * If the number of whiteouts are larger than si_dirwh, leave all of
23631+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
23632+ * future fsck.aufs or kernel thread will remove them later.
23633+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
23634+ */
23635+ unsigned int si_dirwh;
23636+
23637+ /*
23638+ * rename(2) a directory with all children.
23639+ */
23640+ /* reserved for future use */
23641+ /* int si_rendir; */
23642+
23643+ /* pseudo_link list */
23644+ struct au_splhead si_plink;
23645+ wait_queue_head_t si_plink_wq;
4a4d8108 23646+ spinlock_t si_plink_maint_lock;
e49829fe 23647+ pid_t si_plink_maint_pid;
1facf9fc 23648+
23649+ /*
23650+ * sysfs and lifetime management.
23651+ * this is not a small structure and it may be a waste of memory in case
23652+ * of sysfs is disabled, particulary when many aufs-es are mounted.
23653+ * but using sysfs is majority.
23654+ */
23655+ struct kobject si_kobj;
23656+#ifdef CONFIG_DEBUG_FS
23657+ struct dentry *si_dbgaufs, *si_dbgaufs_xib;
23658+#ifdef CONFIG_AUFS_EXPORT
23659+ struct dentry *si_dbgaufs_xigen;
23660+#endif
23661+#endif
23662+
e49829fe
JR
23663+#ifdef CONFIG_AUFS_SBILIST
23664+ struct list_head si_list;
23665+#endif
23666+
1facf9fc 23667+ /* dirty, necessary for unmounting, sysfs and sysrq */
23668+ struct super_block *si_sb;
23669+};
23670+
dece6358
AM
23671+/* sbinfo status flags */
23672+/*
23673+ * set true when refresh_dirs() failed at remount time.
23674+ * then try refreshing dirs at access time again.
23675+ * if it is false, refreshing dirs at access time is unnecesary
23676+ */
027c5e7a 23677+#define AuSi_FAILED_REFRESH_DIR 1
dece6358
AM
23678+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
23679+ unsigned int flag)
23680+{
23681+ AuRwMustAnyLock(&sbi->si_rwsem);
23682+ return sbi->au_si_status & flag;
23683+}
23684+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
23685+#define au_fset_si(sbinfo, name) do { \
23686+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
23687+ (sbinfo)->au_si_status |= AuSi_##name; \
23688+} while (0)
23689+#define au_fclr_si(sbinfo, name) do { \
23690+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
23691+ (sbinfo)->au_si_status &= ~AuSi_##name; \
23692+} while (0)
23693+
1facf9fc 23694+/* ---------------------------------------------------------------------- */
23695+
23696+/* policy to select one among writable branches */
4a4d8108
AM
23697+#define AuWbrCopyup(sbinfo, ...) \
23698+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
23699+#define AuWbrCreate(sbinfo, ...) \
23700+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
1facf9fc 23701+
23702+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
23703+#define AuLock_DW 1 /* write-lock dentry */
23704+#define AuLock_IR (1 << 1) /* read-lock inode */
23705+#define AuLock_IW (1 << 2) /* write-lock inode */
23706+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
23707+#define AuLock_DIR (1 << 4) /* target is a dir */
e49829fe
JR
23708+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
23709+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
027c5e7a 23710+#define AuLock_GEN (1 << 7) /* test digen/iigen */
1facf9fc 23711+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
7f207e10
AM
23712+#define au_fset_lock(flags, name) \
23713+ do { (flags) |= AuLock_##name; } while (0)
23714+#define au_fclr_lock(flags, name) \
23715+ do { (flags) &= ~AuLock_##name; } while (0)
1facf9fc 23716+
23717+/* ---------------------------------------------------------------------- */
23718+
23719+/* super.c */
23720+extern struct file_system_type aufs_fs_type;
23721+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
7f207e10
AM
23722+typedef unsigned long long (*au_arraycb_t)(void *array, unsigned long long max,
23723+ void *arg);
23724+void au_array_free(void *array);
23725+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg);
23726+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
23727+void au_iarray_free(struct inode **a, unsigned long long max);
1facf9fc 23728+
23729+/* sbinfo.c */
23730+void au_si_free(struct kobject *kobj);
23731+int au_si_alloc(struct super_block *sb);
23732+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
23733+
23734+unsigned int au_sigen_inc(struct super_block *sb);
23735+aufs_bindex_t au_new_br_id(struct super_block *sb);
23736+
e49829fe
JR
23737+int si_read_lock(struct super_block *sb, int flags);
23738+int si_write_lock(struct super_block *sb, int flags);
23739+int aufs_read_lock(struct dentry *dentry, int flags);
1facf9fc 23740+void aufs_read_unlock(struct dentry *dentry, int flags);
23741+void aufs_write_lock(struct dentry *dentry);
23742+void aufs_write_unlock(struct dentry *dentry);
e49829fe 23743+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
1facf9fc 23744+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
23745+
b752ccd1
AM
23746+int si_pid_test_slow(struct super_block *sb);
23747+void si_pid_set_slow(struct super_block *sb);
23748+void si_pid_clr_slow(struct super_block *sb);
23749+
1facf9fc 23750+/* wbr_policy.c */
23751+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
23752+extern struct au_wbr_create_operations au_wbr_create_ops[];
23753+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
23754+
23755+/* ---------------------------------------------------------------------- */
23756+
23757+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
23758+{
23759+ return sb->s_fs_info;
23760+}
23761+
23762+/* ---------------------------------------------------------------------- */
23763+
23764+#ifdef CONFIG_AUFS_EXPORT
23765+void au_export_init(struct super_block *sb);
23766+
b752ccd1 23767+static inline int au_test_nfsd(void)
1facf9fc 23768+{
b752ccd1
AM
23769+ struct task_struct *tsk = current;
23770+
23771+ return (tsk->flags & PF_KTHREAD)
23772+ && !strcmp(tsk->comm, "nfsd");
1facf9fc 23773+}
23774+
b752ccd1 23775+void au_xigen_inc(struct inode *inode);
1facf9fc 23776+int au_xigen_new(struct inode *inode);
23777+int au_xigen_set(struct super_block *sb, struct file *base);
23778+void au_xigen_clr(struct super_block *sb);
23779+
23780+static inline int au_busy_or_stale(void)
23781+{
b752ccd1 23782+ if (!au_test_nfsd())
1facf9fc 23783+ return -EBUSY;
23784+ return -ESTALE;
23785+}
23786+#else
4a4d8108 23787+AuStubVoid(au_export_init, struct super_block *sb)
b752ccd1
AM
23788+AuStubInt0(au_test_nfsd, void)
23789+AuStubVoid(au_xigen_inc, struct inode *inode)
4a4d8108
AM
23790+AuStubInt0(au_xigen_new, struct inode *inode)
23791+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
23792+AuStubVoid(au_xigen_clr, struct super_block *sb)
1facf9fc 23793+static inline int au_busy_or_stale(void)
23794+{
23795+ return -EBUSY;
23796+}
23797+#endif /* CONFIG_AUFS_EXPORT */
23798+
23799+/* ---------------------------------------------------------------------- */
23800+
e49829fe
JR
23801+#ifdef CONFIG_AUFS_SBILIST
23802+/* module.c */
23803+extern struct au_splhead au_sbilist;
23804+
23805+static inline void au_sbilist_init(void)
23806+{
23807+ au_spl_init(&au_sbilist);
23808+}
23809+
23810+static inline void au_sbilist_add(struct super_block *sb)
23811+{
23812+ au_spl_add(&au_sbi(sb)->si_list, &au_sbilist);
23813+}
23814+
23815+static inline void au_sbilist_del(struct super_block *sb)
23816+{
23817+ au_spl_del(&au_sbi(sb)->si_list, &au_sbilist);
23818+}
53392da6
AM
23819+
23820+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
23821+static inline void au_sbilist_lock(void)
23822+{
23823+ spin_lock(&au_sbilist.spin);
23824+}
23825+
23826+static inline void au_sbilist_unlock(void)
23827+{
23828+ spin_unlock(&au_sbilist.spin);
23829+}
23830+#define AuGFP_SBILIST GFP_ATOMIC
23831+#else
23832+AuStubVoid(au_sbilist_lock, void)
23833+AuStubVoid(au_sbilist_unlock, void)
23834+#define AuGFP_SBILIST GFP_NOFS
23835+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
e49829fe
JR
23836+#else
23837+AuStubVoid(au_sbilist_init, void)
23838+AuStubVoid(au_sbilist_add, struct super_block*)
23839+AuStubVoid(au_sbilist_del, struct super_block*)
53392da6
AM
23840+AuStubVoid(au_sbilist_lock, void)
23841+AuStubVoid(au_sbilist_unlock, void)
23842+#define AuGFP_SBILIST GFP_NOFS
e49829fe
JR
23843+#endif
23844+
23845+/* ---------------------------------------------------------------------- */
23846+
1facf9fc 23847+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
23848+{
dece6358
AM
23849+ /*
23850+ * This function is a dynamic '__init' fucntion actually,
23851+ * so the tiny check for si_rwsem is unnecessary.
23852+ */
23853+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 23854+#ifdef CONFIG_DEBUG_FS
23855+ sbinfo->si_dbgaufs = NULL;
23856+ sbinfo->si_dbgaufs_xib = NULL;
23857+#ifdef CONFIG_AUFS_EXPORT
23858+ sbinfo->si_dbgaufs_xigen = NULL;
23859+#endif
23860+#endif
23861+}
23862+
23863+/* ---------------------------------------------------------------------- */
23864+
b752ccd1
AM
23865+static inline pid_t si_pid_bit(void)
23866+{
23867+ /* the origin of pid is 1, but the bitmap's is 0 */
23868+ return current->pid - 1;
23869+}
23870+
23871+static inline int si_pid_test(struct super_block *sb)
23872+{
23873+ pid_t bit = si_pid_bit();
23874+ if (bit < PID_MAX_DEFAULT)
23875+ return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
23876+ else
23877+ return si_pid_test_slow(sb);
23878+}
23879+
23880+static inline void si_pid_set(struct super_block *sb)
23881+{
23882+ pid_t bit = si_pid_bit();
23883+ if (bit < PID_MAX_DEFAULT) {
23884+ AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
23885+ set_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
23886+ /* smp_mb(); */
23887+ } else
23888+ si_pid_set_slow(sb);
23889+}
23890+
23891+static inline void si_pid_clr(struct super_block *sb)
23892+{
23893+ pid_t bit = si_pid_bit();
23894+ if (bit < PID_MAX_DEFAULT) {
23895+ AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
23896+ clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
23897+ /* smp_mb(); */
23898+ } else
23899+ si_pid_clr_slow(sb);
23900+}
23901+
23902+/* ---------------------------------------------------------------------- */
23903+
1facf9fc 23904+/* lock superblock. mainly for entry point functions */
23905+/*
b752ccd1
AM
23906+ * __si_read_lock, __si_write_lock,
23907+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
1facf9fc 23908+ */
b752ccd1 23909+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
1facf9fc 23910+
dece6358
AM
23911+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
23912+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
23913+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
23914+
b752ccd1
AM
23915+static inline void si_noflush_read_lock(struct super_block *sb)
23916+{
23917+ __si_read_lock(sb);
23918+ si_pid_set(sb);
23919+}
23920+
23921+static inline int si_noflush_read_trylock(struct super_block *sb)
23922+{
23923+ int locked = __si_read_trylock(sb);
23924+ if (locked)
23925+ si_pid_set(sb);
23926+ return locked;
23927+}
23928+
23929+static inline void si_noflush_write_lock(struct super_block *sb)
23930+{
23931+ __si_write_lock(sb);
23932+ si_pid_set(sb);
23933+}
23934+
23935+static inline int si_noflush_write_trylock(struct super_block *sb)
23936+{
23937+ int locked = __si_write_trylock(sb);
23938+ if (locked)
23939+ si_pid_set(sb);
23940+ return locked;
23941+}
23942+
e49829fe 23943+#if 0 /* unused */
1facf9fc 23944+static inline int si_read_trylock(struct super_block *sb, int flags)
23945+{
23946+ if (au_ftest_lock(flags, FLUSH))
23947+ au_nwt_flush(&au_sbi(sb)->si_nowait);
23948+ return si_noflush_read_trylock(sb);
23949+}
e49829fe 23950+#endif
1facf9fc 23951+
b752ccd1
AM
23952+static inline void si_read_unlock(struct super_block *sb)
23953+{
23954+ si_pid_clr(sb);
23955+ __si_read_unlock(sb);
23956+}
23957+
b752ccd1 23958+#if 0 /* unused */
1facf9fc 23959+static inline int si_write_trylock(struct super_block *sb, int flags)
23960+{
23961+ if (au_ftest_lock(flags, FLUSH))
23962+ au_nwt_flush(&au_sbi(sb)->si_nowait);
23963+ return si_noflush_write_trylock(sb);
23964+}
b752ccd1
AM
23965+#endif
23966+
23967+static inline void si_write_unlock(struct super_block *sb)
23968+{
23969+ si_pid_clr(sb);
23970+ __si_write_unlock(sb);
23971+}
23972+
23973+#if 0 /* unused */
23974+static inline void si_downgrade_lock(struct super_block *sb)
23975+{
23976+ __si_downgrade_lock(sb);
23977+}
23978+#endif
1facf9fc 23979+
23980+/* ---------------------------------------------------------------------- */
23981+
23982+static inline aufs_bindex_t au_sbend(struct super_block *sb)
23983+{
dece6358 23984+ SiMustAnyLock(sb);
1facf9fc 23985+ return au_sbi(sb)->si_bend;
23986+}
23987+
23988+static inline unsigned int au_mntflags(struct super_block *sb)
23989+{
dece6358 23990+ SiMustAnyLock(sb);
1facf9fc 23991+ return au_sbi(sb)->si_mntflags;
23992+}
23993+
23994+static inline unsigned int au_sigen(struct super_block *sb)
23995+{
dece6358 23996+ SiMustAnyLock(sb);
1facf9fc 23997+ return au_sbi(sb)->si_generation;
23998+}
23999+
7f207e10
AM
24000+static inline void au_ninodes_inc(struct super_block *sb)
24001+{
24002+ atomic_long_inc(&au_sbi(sb)->si_ninodes);
24003+}
24004+
24005+static inline void au_ninodes_dec(struct super_block *sb)
24006+{
24007+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
24008+ atomic_long_dec(&au_sbi(sb)->si_ninodes);
24009+}
24010+
24011+static inline void au_nfiles_inc(struct super_block *sb)
24012+{
24013+ atomic_long_inc(&au_sbi(sb)->si_nfiles);
24014+}
24015+
24016+static inline void au_nfiles_dec(struct super_block *sb)
24017+{
24018+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
24019+ atomic_long_dec(&au_sbi(sb)->si_nfiles);
24020+}
24021+
1facf9fc 24022+static inline struct au_branch *au_sbr(struct super_block *sb,
24023+ aufs_bindex_t bindex)
24024+{
dece6358 24025+ SiMustAnyLock(sb);
1facf9fc 24026+ return au_sbi(sb)->si_branch[0 + bindex];
24027+}
24028+
24029+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
24030+{
dece6358 24031+ SiMustWriteLock(sb);
1facf9fc 24032+ au_sbi(sb)->si_xino_brid = brid;
24033+}
24034+
24035+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
24036+{
dece6358 24037+ SiMustAnyLock(sb);
1facf9fc 24038+ return au_sbi(sb)->si_xino_brid;
24039+}
24040+
24041+#endif /* __KERNEL__ */
24042+#endif /* __AUFS_SUPER_H__ */
7f207e10
AM
24043diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
24044--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
53392da6 24045+++ linux/fs/aufs/sysaufs.c 2011-08-24 13:30:24.734646739 +0200
4a4d8108 24046@@ -0,0 +1,107 @@
1facf9fc 24047+/*
027c5e7a 24048+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24049+ *
24050+ * This program, aufs is free software; you can redistribute it and/or modify
24051+ * it under the terms of the GNU General Public License as published by
24052+ * the Free Software Foundation; either version 2 of the License, or
24053+ * (at your option) any later version.
dece6358
AM
24054+ *
24055+ * This program is distributed in the hope that it will be useful,
24056+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24057+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24058+ * GNU General Public License for more details.
24059+ *
24060+ * You should have received a copy of the GNU General Public License
24061+ * along with this program; if not, write to the Free Software
24062+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24063+ */
24064+
24065+/*
24066+ * sysfs interface and lifetime management
24067+ * they are necessary regardless sysfs is disabled.
24068+ */
24069+
24070+#include <linux/fs.h>
24071+#include <linux/random.h>
24072+#include <linux/sysfs.h>
24073+#include "aufs.h"
24074+
24075+unsigned long sysaufs_si_mask;
e49829fe 24076+struct kset *sysaufs_kset;
1facf9fc 24077+
24078+#define AuSiAttr(_name) { \
24079+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
24080+ .show = sysaufs_si_##_name, \
24081+}
24082+
24083+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
24084+struct attribute *sysaufs_si_attrs[] = {
24085+ &sysaufs_si_attr_xi_path.attr,
24086+ NULL,
24087+};
24088+
4a4d8108 24089+static const struct sysfs_ops au_sbi_ops = {
1facf9fc 24090+ .show = sysaufs_si_show
24091+};
24092+
24093+static struct kobj_type au_sbi_ktype = {
24094+ .release = au_si_free,
24095+ .sysfs_ops = &au_sbi_ops,
24096+ .default_attrs = sysaufs_si_attrs
24097+};
24098+
24099+/* ---------------------------------------------------------------------- */
24100+
24101+int sysaufs_si_init(struct au_sbinfo *sbinfo)
24102+{
24103+ int err;
24104+
e49829fe 24105+ sbinfo->si_kobj.kset = sysaufs_kset;
1facf9fc 24106+ /* cf. sysaufs_name() */
24107+ err = kobject_init_and_add
e49829fe 24108+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
1facf9fc 24109+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
24110+
24111+ dbgaufs_si_null(sbinfo);
24112+ if (!err) {
24113+ err = dbgaufs_si_init(sbinfo);
24114+ if (unlikely(err))
24115+ kobject_put(&sbinfo->si_kobj);
24116+ }
24117+ return err;
24118+}
24119+
24120+void sysaufs_fin(void)
24121+{
24122+ dbgaufs_fin();
e49829fe
JR
24123+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
24124+ kset_unregister(sysaufs_kset);
1facf9fc 24125+}
24126+
24127+int __init sysaufs_init(void)
24128+{
24129+ int err;
24130+
24131+ do {
24132+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
24133+ } while (!sysaufs_si_mask);
24134+
4a4d8108 24135+ err = -EINVAL;
e49829fe
JR
24136+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
24137+ if (unlikely(!sysaufs_kset))
4a4d8108 24138+ goto out;
e49829fe
JR
24139+ err = PTR_ERR(sysaufs_kset);
24140+ if (IS_ERR(sysaufs_kset))
1facf9fc 24141+ goto out;
e49829fe 24142+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
1facf9fc 24143+ if (unlikely(err)) {
e49829fe 24144+ kset_unregister(sysaufs_kset);
1facf9fc 24145+ goto out;
24146+ }
24147+
24148+ err = dbgaufs_init();
24149+ if (unlikely(err))
24150+ sysaufs_fin();
4f0767ce 24151+out:
1facf9fc 24152+ return err;
24153+}
7f207e10
AM
24154diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
24155--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
53392da6 24156+++ linux/fs/aufs/sysaufs.h 2011-08-24 13:30:24.734646739 +0200
4a4d8108 24157@@ -0,0 +1,105 @@
1facf9fc 24158+/*
027c5e7a 24159+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24160+ *
24161+ * This program, aufs is free software; you can redistribute it and/or modify
24162+ * it under the terms of the GNU General Public License as published by
24163+ * the Free Software Foundation; either version 2 of the License, or
24164+ * (at your option) any later version.
dece6358
AM
24165+ *
24166+ * This program is distributed in the hope that it will be useful,
24167+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24168+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24169+ * GNU General Public License for more details.
24170+ *
24171+ * You should have received a copy of the GNU General Public License
24172+ * along with this program; if not, write to the Free Software
24173+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24174+ */
24175+
24176+/*
24177+ * sysfs interface and mount lifetime management
24178+ */
24179+
24180+#ifndef __SYSAUFS_H__
24181+#define __SYSAUFS_H__
24182+
24183+#ifdef __KERNEL__
24184+
1facf9fc 24185+#include <linux/sysfs.h>
24186+#include <linux/aufs_type.h>
24187+#include "module.h"
24188+
dece6358
AM
24189+struct super_block;
24190+struct au_sbinfo;
24191+
1facf9fc 24192+struct sysaufs_si_attr {
24193+ struct attribute attr;
24194+ int (*show)(struct seq_file *seq, struct super_block *sb);
24195+};
24196+
24197+/* ---------------------------------------------------------------------- */
24198+
24199+/* sysaufs.c */
24200+extern unsigned long sysaufs_si_mask;
e49829fe 24201+extern struct kset *sysaufs_kset;
1facf9fc 24202+extern struct attribute *sysaufs_si_attrs[];
24203+int sysaufs_si_init(struct au_sbinfo *sbinfo);
24204+int __init sysaufs_init(void);
24205+void sysaufs_fin(void);
24206+
24207+/* ---------------------------------------------------------------------- */
24208+
24209+/* some people doesn't like to show a pointer in kernel */
24210+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
24211+{
24212+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
24213+}
24214+
24215+#define SysaufsSiNamePrefix "si_"
24216+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
24217+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
24218+{
24219+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
24220+ sysaufs_si_id(sbinfo));
24221+}
24222+
24223+struct au_branch;
24224+#ifdef CONFIG_SYSFS
24225+/* sysfs.c */
24226+extern struct attribute_group *sysaufs_attr_group;
24227+
24228+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
24229+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
24230+ char *buf);
24231+
24232+void sysaufs_br_init(struct au_branch *br);
24233+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
24234+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
24235+
24236+#define sysaufs_brs_init() do {} while (0)
24237+
24238+#else
24239+#define sysaufs_attr_group NULL
24240+
4a4d8108 24241+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
1facf9fc 24242+
24243+static inline
24244+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
24245+ char *buf)
24246+{
24247+ return 0;
24248+}
24249+
4a4d8108
AM
24250+AuStubVoid(sysaufs_br_init, struct au_branch *br)
24251+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
24252+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
1facf9fc 24253+
24254+static inline void sysaufs_brs_init(void)
24255+{
24256+ sysaufs_brs = 0;
24257+}
24258+
24259+#endif /* CONFIG_SYSFS */
24260+
24261+#endif /* __KERNEL__ */
24262+#endif /* __SYSAUFS_H__ */
7f207e10
AM
24263diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
24264--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
53392da6 24265+++ linux/fs/aufs/sysfs.c 2011-08-24 13:30:24.734646739 +0200
953406b4 24266@@ -0,0 +1,250 @@
1facf9fc 24267+/*
027c5e7a 24268+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24269+ *
24270+ * This program, aufs is free software; you can redistribute it and/or modify
24271+ * it under the terms of the GNU General Public License as published by
24272+ * the Free Software Foundation; either version 2 of the License, or
24273+ * (at your option) any later version.
dece6358
AM
24274+ *
24275+ * This program is distributed in the hope that it will be useful,
24276+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24277+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24278+ * GNU General Public License for more details.
24279+ *
24280+ * You should have received a copy of the GNU General Public License
24281+ * along with this program; if not, write to the Free Software
24282+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24283+ */
24284+
24285+/*
24286+ * sysfs interface
24287+ */
24288+
24289+#include <linux/fs.h>
dece6358 24290+#include <linux/module.h>
1facf9fc 24291+#include <linux/seq_file.h>
24292+#include <linux/sysfs.h>
24293+#include "aufs.h"
24294+
4a4d8108
AM
24295+#ifdef CONFIG_AUFS_FS_MODULE
24296+/* this entry violates the "one line per file" policy of sysfs */
24297+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
24298+ char *buf)
24299+{
24300+ ssize_t err;
24301+ static char *conf =
24302+/* this file is generated at compiling */
24303+#include "conf.str"
24304+ ;
24305+
24306+ err = snprintf(buf, PAGE_SIZE, conf);
24307+ if (unlikely(err >= PAGE_SIZE))
24308+ err = -EFBIG;
24309+ return err;
24310+}
24311+
24312+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
24313+#endif
24314+
1facf9fc 24315+static struct attribute *au_attr[] = {
4a4d8108
AM
24316+#ifdef CONFIG_AUFS_FS_MODULE
24317+ &au_config_attr.attr,
24318+#endif
1facf9fc 24319+ NULL, /* need to NULL terminate the list of attributes */
24320+};
24321+
24322+static struct attribute_group sysaufs_attr_group_body = {
24323+ .attrs = au_attr
24324+};
24325+
24326+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
24327+
24328+/* ---------------------------------------------------------------------- */
24329+
24330+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
24331+{
24332+ int err;
24333+
dece6358
AM
24334+ SiMustAnyLock(sb);
24335+
1facf9fc 24336+ err = 0;
24337+ if (au_opt_test(au_mntflags(sb), XINO)) {
24338+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
24339+ seq_putc(seq, '\n');
24340+ }
24341+ return err;
24342+}
24343+
24344+/*
24345+ * the lifetime of branch is independent from the entry under sysfs.
24346+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
24347+ * unlinked.
24348+ */
24349+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
24350+ aufs_bindex_t bindex)
24351+{
24352+ struct path path;
24353+ struct dentry *root;
24354+ struct au_branch *br;
24355+
24356+ AuDbg("b%d\n", bindex);
24357+
24358+ root = sb->s_root;
24359+ di_read_lock_parent(root, !AuLock_IR);
24360+ br = au_sbr(sb, bindex);
24361+ path.mnt = br->br_mnt;
24362+ path.dentry = au_h_dptr(root, bindex);
24363+ au_seq_path(seq, &path);
24364+ di_read_unlock(root, !AuLock_IR);
24365+ seq_printf(seq, "=%s\n", au_optstr_br_perm(br->br_perm));
24366+ return 0;
24367+}
24368+
24369+/* ---------------------------------------------------------------------- */
24370+
24371+static struct seq_file *au_seq(char *p, ssize_t len)
24372+{
24373+ struct seq_file *seq;
24374+
24375+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
24376+ if (seq) {
24377+ /* mutex_init(&seq.lock); */
24378+ seq->buf = p;
24379+ seq->size = len;
24380+ return seq; /* success */
24381+ }
24382+
24383+ seq = ERR_PTR(-ENOMEM);
24384+ return seq;
24385+}
24386+
24387+#define SysaufsBr_PREFIX "br"
24388+
24389+/* todo: file size may exceed PAGE_SIZE */
24390+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 24391+ char *buf)
1facf9fc 24392+{
24393+ ssize_t err;
24394+ long l;
24395+ aufs_bindex_t bend;
24396+ struct au_sbinfo *sbinfo;
24397+ struct super_block *sb;
24398+ struct seq_file *seq;
24399+ char *name;
24400+ struct attribute **cattr;
24401+
24402+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
24403+ sb = sbinfo->si_sb;
1308ab2a 24404+
24405+ /*
24406+ * prevent a race condition between sysfs and aufs.
24407+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
24408+ * prohibits maintaining the sysfs entries.
24409+ * hew we acquire read lock after sysfs_get_active_two().
24410+ * on the other hand, the remount process may maintain the sysfs/aufs
24411+ * entries after acquiring write lock.
24412+ * it can cause a deadlock.
24413+ * simply we gave up processing read here.
24414+ */
24415+ err = -EBUSY;
24416+ if (unlikely(!si_noflush_read_trylock(sb)))
24417+ goto out;
1facf9fc 24418+
24419+ seq = au_seq(buf, PAGE_SIZE);
24420+ err = PTR_ERR(seq);
24421+ if (IS_ERR(seq))
1308ab2a 24422+ goto out_unlock;
1facf9fc 24423+
24424+ name = (void *)attr->name;
24425+ cattr = sysaufs_si_attrs;
24426+ while (*cattr) {
24427+ if (!strcmp(name, (*cattr)->name)) {
24428+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
24429+ ->show(seq, sb);
24430+ goto out_seq;
24431+ }
24432+ cattr++;
24433+ }
24434+
24435+ bend = au_sbend(sb);
24436+ if (!strncmp(name, SysaufsBr_PREFIX, sizeof(SysaufsBr_PREFIX) - 1)) {
24437+ name += sizeof(SysaufsBr_PREFIX) - 1;
24438+ err = strict_strtol(name, 10, &l);
24439+ if (!err) {
24440+ if (l <= bend)
24441+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l);
24442+ else
24443+ err = -ENOENT;
24444+ }
24445+ goto out_seq;
24446+ }
24447+ BUG();
24448+
4f0767ce 24449+out_seq:
1facf9fc 24450+ if (!err) {
24451+ err = seq->count;
24452+ /* sysfs limit */
24453+ if (unlikely(err == PAGE_SIZE))
24454+ err = -EFBIG;
24455+ }
24456+ kfree(seq);
4f0767ce 24457+out_unlock:
1facf9fc 24458+ si_read_unlock(sb);
4f0767ce 24459+out:
1facf9fc 24460+ return err;
24461+}
24462+
24463+/* ---------------------------------------------------------------------- */
24464+
24465+void sysaufs_br_init(struct au_branch *br)
24466+{
4a4d8108
AM
24467+ struct attribute *attr = &br->br_attr;
24468+
24469+ sysfs_attr_init(attr);
24470+ attr->name = br->br_name;
24471+ attr->mode = S_IRUGO;
1facf9fc 24472+}
24473+
24474+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
24475+{
24476+ struct au_branch *br;
24477+ struct kobject *kobj;
24478+ aufs_bindex_t bend;
24479+
24480+ dbgaufs_brs_del(sb, bindex);
24481+
24482+ if (!sysaufs_brs)
24483+ return;
24484+
24485+ kobj = &au_sbi(sb)->si_kobj;
24486+ bend = au_sbend(sb);
24487+ for (; bindex <= bend; bindex++) {
24488+ br = au_sbr(sb, bindex);
24489+ sysfs_remove_file(kobj, &br->br_attr);
24490+ }
24491+}
24492+
24493+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
24494+{
24495+ int err;
24496+ aufs_bindex_t bend;
24497+ struct kobject *kobj;
24498+ struct au_branch *br;
24499+
24500+ dbgaufs_brs_add(sb, bindex);
24501+
24502+ if (!sysaufs_brs)
24503+ return;
24504+
24505+ kobj = &au_sbi(sb)->si_kobj;
24506+ bend = au_sbend(sb);
24507+ for (; bindex <= bend; bindex++) {
24508+ br = au_sbr(sb, bindex);
24509+ snprintf(br->br_name, sizeof(br->br_name), SysaufsBr_PREFIX
24510+ "%d", bindex);
24511+ err = sysfs_create_file(kobj, &br->br_attr);
24512+ if (unlikely(err))
4a4d8108
AM
24513+ pr_warning("failed %s under sysfs(%d)\n",
24514+ br->br_name, err);
1facf9fc 24515+ }
24516+}
7f207e10
AM
24517diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
24518--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
53392da6 24519+++ linux/fs/aufs/sysrq.c 2011-08-24 13:30:24.734646739 +0200
2cbb1c4b 24520@@ -0,0 +1,151 @@
1facf9fc 24521+/*
027c5e7a 24522+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24523+ *
24524+ * This program, aufs is free software; you can redistribute it and/or modify
24525+ * it under the terms of the GNU General Public License as published by
24526+ * the Free Software Foundation; either version 2 of the License, or
24527+ * (at your option) any later version.
dece6358
AM
24528+ *
24529+ * This program is distributed in the hope that it will be useful,
24530+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24531+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24532+ * GNU General Public License for more details.
24533+ *
24534+ * You should have received a copy of the GNU General Public License
24535+ * along with this program; if not, write to the Free Software
24536+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24537+ */
24538+
24539+/*
24540+ * magic sysrq hanlder
24541+ */
24542+
24543+#include <linux/fs.h>
24544+#include <linux/module.h>
24545+#include <linux/moduleparam.h>
24546+/* #include <linux/sysrq.h> */
027c5e7a 24547+#include <linux/writeback.h>
1facf9fc 24548+#include "aufs.h"
24549+
24550+/* ---------------------------------------------------------------------- */
24551+
24552+static void sysrq_sb(struct super_block *sb)
24553+{
24554+ char *plevel;
24555+ struct au_sbinfo *sbinfo;
24556+ struct file *file;
24557+
24558+ plevel = au_plevel;
24559+ au_plevel = KERN_WARNING;
1facf9fc 24560+
24561+ sbinfo = au_sbi(sb);
4a4d8108
AM
24562+ /* since we define pr_fmt, call printk directly */
24563+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
24564+ printk(KERN_WARNING AUFS_NAME ": superblock\n");
1facf9fc 24565+ au_dpri_sb(sb);
027c5e7a
AM
24566+
24567+#if 0
4a4d8108 24568+ printk(KERN_WARNING AUFS_NAME ": root dentry\n");
1facf9fc 24569+ au_dpri_dentry(sb->s_root);
4a4d8108 24570+ printk(KERN_WARNING AUFS_NAME ": root inode\n");
1facf9fc 24571+ au_dpri_inode(sb->s_root->d_inode);
027c5e7a
AM
24572+#endif
24573+
1facf9fc 24574+#if 0
027c5e7a
AM
24575+ do {
24576+ int err, i, j, ndentry;
24577+ struct au_dcsub_pages dpages;
24578+ struct au_dpage *dpage;
24579+
24580+ err = au_dpages_init(&dpages, GFP_ATOMIC);
24581+ if (unlikely(err))
24582+ break;
24583+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
24584+ if (!err)
24585+ for (i = 0; i < dpages.ndpage; i++) {
24586+ dpage = dpages.dpages + i;
24587+ ndentry = dpage->ndentry;
24588+ for (j = 0; j < ndentry; j++)
24589+ au_dpri_dentry(dpage->dentries[j]);
24590+ }
24591+ au_dpages_free(&dpages);
24592+ } while (0);
24593+#endif
24594+
24595+#if 1
24596+ {
24597+ struct inode *i;
24598+ printk(KERN_WARNING AUFS_NAME ": isolated inode\n");
2cbb1c4b
JR
24599+ spin_lock(&inode_sb_list_lock);
24600+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
24601+ spin_lock(&i->i_lock);
027c5e7a
AM
24602+ if (1 || list_empty(&i->i_dentry))
24603+ au_dpri_inode(i);
2cbb1c4b
JR
24604+ spin_unlock(&i->i_lock);
24605+ }
24606+ spin_unlock(&inode_sb_list_lock);
027c5e7a 24607+ }
1facf9fc 24608+#endif
4a4d8108 24609+ printk(KERN_WARNING AUFS_NAME ": files\n");
0c5527e5
AM
24610+ lg_global_lock(files_lglock);
24611+ do_file_list_for_each_entry(sb, file) {
4a4d8108
AM
24612+ umode_t mode;
24613+ mode = file->f_dentry->d_inode->i_mode;
24614+ if (!special_file(mode) || au_special_file(mode))
1facf9fc 24615+ au_dpri_file(file);
0c5527e5
AM
24616+ } while_file_list_for_each_entry;
24617+ lg_global_unlock(files_lglock);
e49829fe 24618+ printk(KERN_WARNING AUFS_NAME ": done\n");
1facf9fc 24619+
24620+ au_plevel = plevel;
1facf9fc 24621+}
24622+
24623+/* ---------------------------------------------------------------------- */
24624+
24625+/* module parameter */
24626+static char *aufs_sysrq_key = "a";
24627+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
24628+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
24629+
0c5527e5 24630+static void au_sysrq(int key __maybe_unused)
1facf9fc 24631+{
1facf9fc 24632+ struct au_sbinfo *sbinfo;
24633+
027c5e7a 24634+ lockdep_off();
53392da6 24635+ au_sbilist_lock();
e49829fe 24636+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
1facf9fc 24637+ sysrq_sb(sbinfo->si_sb);
53392da6 24638+ au_sbilist_unlock();
027c5e7a 24639+ lockdep_on();
1facf9fc 24640+}
24641+
24642+static struct sysrq_key_op au_sysrq_op = {
24643+ .handler = au_sysrq,
24644+ .help_msg = "Aufs",
24645+ .action_msg = "Aufs",
24646+ .enable_mask = SYSRQ_ENABLE_DUMP
24647+};
24648+
24649+/* ---------------------------------------------------------------------- */
24650+
24651+int __init au_sysrq_init(void)
24652+{
24653+ int err;
24654+ char key;
24655+
24656+ err = -1;
24657+ key = *aufs_sysrq_key;
24658+ if ('a' <= key && key <= 'z')
24659+ err = register_sysrq_key(key, &au_sysrq_op);
24660+ if (unlikely(err))
4a4d8108 24661+ pr_err("err %d, sysrq=%c\n", err, key);
1facf9fc 24662+ return err;
24663+}
24664+
24665+void au_sysrq_fin(void)
24666+{
24667+ int err;
24668+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
24669+ if (unlikely(err))
4a4d8108 24670+ pr_err("err %d (ignored)\n", err);
1facf9fc 24671+}
7f207e10
AM
24672diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
24673--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
53392da6 24674+++ linux/fs/aufs/vdir.c 2011-08-24 13:30:24.734646739 +0200
7f207e10 24675@@ -0,0 +1,886 @@
1facf9fc 24676+/*
027c5e7a 24677+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 24678+ *
24679+ * This program, aufs is free software; you can redistribute it and/or modify
24680+ * it under the terms of the GNU General Public License as published by
24681+ * the Free Software Foundation; either version 2 of the License, or
24682+ * (at your option) any later version.
dece6358
AM
24683+ *
24684+ * This program is distributed in the hope that it will be useful,
24685+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24686+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24687+ * GNU General Public License for more details.
24688+ *
24689+ * You should have received a copy of the GNU General Public License
24690+ * along with this program; if not, write to the Free Software
24691+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24692+ */
24693+
24694+/*
24695+ * virtual or vertical directory
24696+ */
24697+
dece6358 24698+#include <linux/hash.h>
1facf9fc 24699+#include "aufs.h"
24700+
dece6358 24701+static unsigned int calc_size(int nlen)
1facf9fc 24702+{
dece6358 24703+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 24704+}
24705+
24706+static int set_deblk_end(union au_vdir_deblk_p *p,
24707+ union au_vdir_deblk_p *deblk_end)
24708+{
24709+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
24710+ p->de->de_str.len = 0;
24711+ /* smp_mb(); */
24712+ return 0;
24713+ }
24714+ return -1; /* error */
24715+}
24716+
24717+/* returns true or false */
24718+static int is_deblk_end(union au_vdir_deblk_p *p,
24719+ union au_vdir_deblk_p *deblk_end)
24720+{
24721+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
24722+ return !p->de->de_str.len;
24723+ return 1;
24724+}
24725+
24726+static unsigned char *last_deblk(struct au_vdir *vdir)
24727+{
24728+ return vdir->vd_deblk[vdir->vd_nblk - 1];
24729+}
24730+
24731+/* ---------------------------------------------------------------------- */
24732+
1308ab2a 24733+/* estimate the apropriate size for name hash table */
24734+unsigned int au_rdhash_est(loff_t sz)
24735+{
24736+ unsigned int n;
24737+
24738+ n = UINT_MAX;
24739+ sz >>= 10;
24740+ if (sz < n)
24741+ n = sz;
24742+ if (sz < AUFS_RDHASH_DEF)
24743+ n = AUFS_RDHASH_DEF;
4a4d8108 24744+ /* pr_info("n %u\n", n); */
1308ab2a 24745+ return n;
24746+}
24747+
1facf9fc 24748+/*
24749+ * the allocated memory has to be freed by
dece6358 24750+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 24751+ */
dece6358 24752+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 24753+{
1facf9fc 24754+ struct hlist_head *head;
dece6358 24755+ unsigned int u;
1facf9fc 24756+
dece6358
AM
24757+ head = kmalloc(sizeof(*nhash->nh_head) * num_hash, gfp);
24758+ if (head) {
24759+ nhash->nh_num = num_hash;
24760+ nhash->nh_head = head;
24761+ for (u = 0; u < num_hash; u++)
1facf9fc 24762+ INIT_HLIST_HEAD(head++);
dece6358 24763+ return 0; /* success */
1facf9fc 24764+ }
1facf9fc 24765+
dece6358 24766+ return -ENOMEM;
1facf9fc 24767+}
24768+
dece6358
AM
24769+static void nhash_count(struct hlist_head *head)
24770+{
24771+#if 0
24772+ unsigned long n;
24773+ struct hlist_node *pos;
24774+
24775+ n = 0;
24776+ hlist_for_each(pos, head)
24777+ n++;
4a4d8108 24778+ pr_info("%lu\n", n);
dece6358
AM
24779+#endif
24780+}
24781+
24782+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 24783+{
1facf9fc 24784+ struct au_vdir_wh *tpos;
24785+ struct hlist_node *pos, *node;
24786+
dece6358
AM
24787+ hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) {
24788+ /* hlist_del(pos); */
24789+ kfree(tpos);
1facf9fc 24790+ }
24791+}
24792+
dece6358 24793+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 24794+{
dece6358
AM
24795+ struct au_vdir_dehstr *tpos;
24796+ struct hlist_node *pos, *node;
1facf9fc 24797+
dece6358
AM
24798+ hlist_for_each_entry_safe(tpos, pos, node, head, hash) {
24799+ /* hlist_del(pos); */
4a4d8108 24800+ au_cache_free_vdir_dehstr(tpos);
1facf9fc 24801+ }
1facf9fc 24802+}
24803+
dece6358
AM
24804+static void au_nhash_do_free(struct au_nhash *nhash,
24805+ void (*free)(struct hlist_head *head))
1facf9fc 24806+{
1308ab2a 24807+ unsigned int n;
1facf9fc 24808+ struct hlist_head *head;
1facf9fc 24809+
dece6358 24810+ n = nhash->nh_num;
1308ab2a 24811+ if (!n)
24812+ return;
24813+
dece6358 24814+ head = nhash->nh_head;
1308ab2a 24815+ while (n-- > 0) {
dece6358
AM
24816+ nhash_count(head);
24817+ free(head++);
1facf9fc 24818+ }
dece6358 24819+ kfree(nhash->nh_head);
1facf9fc 24820+}
24821+
dece6358 24822+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 24823+{
dece6358
AM
24824+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
24825+}
1facf9fc 24826+
dece6358
AM
24827+static void au_nhash_de_free(struct au_nhash *delist)
24828+{
24829+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 24830+}
24831+
24832+/* ---------------------------------------------------------------------- */
24833+
24834+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
24835+ int limit)
24836+{
24837+ int num;
24838+ unsigned int u, n;
24839+ struct hlist_head *head;
24840+ struct au_vdir_wh *tpos;
24841+ struct hlist_node *pos;
24842+
24843+ num = 0;
24844+ n = whlist->nh_num;
24845+ head = whlist->nh_head;
1308ab2a 24846+ for (u = 0; u < n; u++, head++)
1facf9fc 24847+ hlist_for_each_entry(tpos, pos, head, wh_hash)
24848+ if (tpos->wh_bindex == btgt && ++num > limit)
24849+ return 1;
1facf9fc 24850+ return 0;
24851+}
24852+
24853+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 24854+ unsigned char *name,
1facf9fc 24855+ unsigned int len)
24856+{
dece6358
AM
24857+ unsigned int v;
24858+ /* const unsigned int magic_bit = 12; */
24859+
1308ab2a 24860+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
24861+
dece6358
AM
24862+ v = 0;
24863+ while (len--)
24864+ v += *name++;
24865+ /* v = hash_long(v, magic_bit); */
24866+ v %= nhash->nh_num;
24867+ return nhash->nh_head + v;
24868+}
24869+
24870+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
24871+ int nlen)
24872+{
24873+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 24874+}
24875+
24876+/* returns found or not */
dece6358 24877+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 24878+{
24879+ struct hlist_head *head;
24880+ struct au_vdir_wh *tpos;
24881+ struct hlist_node *pos;
24882+ struct au_vdir_destr *str;
24883+
dece6358 24884+ head = au_name_hash(whlist, name, nlen);
1facf9fc 24885+ hlist_for_each_entry(tpos, pos, head, wh_hash) {
24886+ str = &tpos->wh_str;
24887+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
24888+ if (au_nhash_test_name(str, name, nlen))
24889+ return 1;
24890+ }
24891+ return 0;
24892+}
24893+
24894+/* returns found(true) or not */
24895+static int test_known(struct au_nhash *delist, char *name, int nlen)
24896+{
24897+ struct hlist_head *head;
24898+ struct au_vdir_dehstr *tpos;
24899+ struct hlist_node *pos;
24900+ struct au_vdir_destr *str;
24901+
24902+ head = au_name_hash(delist, name, nlen);
24903+ hlist_for_each_entry(tpos, pos, head, hash) {
24904+ str = tpos->str;
24905+ AuDbg("%.*s\n", str->len, str->name);
24906+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 24907+ return 1;
24908+ }
24909+ return 0;
24910+}
24911+
dece6358
AM
24912+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
24913+ unsigned char d_type)
24914+{
24915+#ifdef CONFIG_AUFS_SHWH
24916+ wh->wh_ino = ino;
24917+ wh->wh_type = d_type;
24918+#endif
24919+}
24920+
24921+/* ---------------------------------------------------------------------- */
24922+
24923+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
24924+ unsigned int d_type, aufs_bindex_t bindex,
24925+ unsigned char shwh)
1facf9fc 24926+{
24927+ int err;
24928+ struct au_vdir_destr *str;
24929+ struct au_vdir_wh *wh;
24930+
dece6358 24931+ AuDbg("%.*s\n", nlen, name);
1308ab2a 24932+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
24933+
1facf9fc 24934+ err = -ENOMEM;
dece6358 24935+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 24936+ if (unlikely(!wh))
24937+ goto out;
24938+
24939+ err = 0;
24940+ wh->wh_bindex = bindex;
dece6358
AM
24941+ if (shwh)
24942+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 24943+ str = &wh->wh_str;
dece6358
AM
24944+ str->len = nlen;
24945+ memcpy(str->name, name, nlen);
24946+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 24947+ /* smp_mb(); */
24948+
4f0767ce 24949+out:
1facf9fc 24950+ return err;
24951+}
24952+
1facf9fc 24953+static int append_deblk(struct au_vdir *vdir)
24954+{
24955+ int err;
dece6358 24956+ unsigned long ul;
1facf9fc 24957+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
24958+ union au_vdir_deblk_p p, deblk_end;
24959+ unsigned char **o;
24960+
24961+ err = -ENOMEM;
dece6358
AM
24962+ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
24963+ GFP_NOFS);
1facf9fc 24964+ if (unlikely(!o))
24965+ goto out;
24966+
24967+ vdir->vd_deblk = o;
24968+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
24969+ if (p.deblk) {
24970+ ul = vdir->vd_nblk++;
24971+ vdir->vd_deblk[ul] = p.deblk;
24972+ vdir->vd_last.ul = ul;
24973+ vdir->vd_last.p.deblk = p.deblk;
24974+ deblk_end.deblk = p.deblk + deblk_sz;
24975+ err = set_deblk_end(&p, &deblk_end);
24976+ }
24977+
4f0767ce 24978+out:
1facf9fc 24979+ return err;
24980+}
24981+
dece6358
AM
24982+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
24983+ unsigned int d_type, struct au_nhash *delist)
24984+{
24985+ int err;
24986+ unsigned int sz;
24987+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
24988+ union au_vdir_deblk_p p, *room, deblk_end;
24989+ struct au_vdir_dehstr *dehstr;
24990+
24991+ p.deblk = last_deblk(vdir);
24992+ deblk_end.deblk = p.deblk + deblk_sz;
24993+ room = &vdir->vd_last.p;
24994+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
24995+ || !is_deblk_end(room, &deblk_end));
24996+
24997+ sz = calc_size(nlen);
24998+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
24999+ err = append_deblk(vdir);
25000+ if (unlikely(err))
25001+ goto out;
25002+
25003+ p.deblk = last_deblk(vdir);
25004+ deblk_end.deblk = p.deblk + deblk_sz;
25005+ /* smp_mb(); */
25006+ AuDebugOn(room->deblk != p.deblk);
25007+ }
25008+
25009+ err = -ENOMEM;
4a4d8108 25010+ dehstr = au_cache_alloc_vdir_dehstr();
dece6358
AM
25011+ if (unlikely(!dehstr))
25012+ goto out;
25013+
25014+ dehstr->str = &room->de->de_str;
25015+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
25016+ room->de->de_ino = ino;
25017+ room->de->de_type = d_type;
25018+ room->de->de_str.len = nlen;
25019+ memcpy(room->de->de_str.name, name, nlen);
25020+
25021+ err = 0;
25022+ room->deblk += sz;
25023+ if (unlikely(set_deblk_end(room, &deblk_end)))
25024+ err = append_deblk(vdir);
25025+ /* smp_mb(); */
25026+
4f0767ce 25027+out:
dece6358
AM
25028+ return err;
25029+}
25030+
25031+/* ---------------------------------------------------------------------- */
25032+
25033+void au_vdir_free(struct au_vdir *vdir)
25034+{
25035+ unsigned char **deblk;
25036+
25037+ deblk = vdir->vd_deblk;
25038+ while (vdir->vd_nblk--)
25039+ kfree(*deblk++);
25040+ kfree(vdir->vd_deblk);
25041+ au_cache_free_vdir(vdir);
25042+}
25043+
1308ab2a 25044+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 25045+{
25046+ struct au_vdir *vdir;
1308ab2a 25047+ struct super_block *sb;
1facf9fc 25048+ int err;
25049+
1308ab2a 25050+ sb = file->f_dentry->d_sb;
dece6358
AM
25051+ SiMustAnyLock(sb);
25052+
1facf9fc 25053+ err = -ENOMEM;
25054+ vdir = au_cache_alloc_vdir();
25055+ if (unlikely(!vdir))
25056+ goto out;
25057+
25058+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
25059+ if (unlikely(!vdir->vd_deblk))
25060+ goto out_free;
25061+
25062+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 25063+ if (!vdir->vd_deblk_sz) {
25064+ /* estimate the apropriate size for deblk */
25065+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
4a4d8108 25066+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
1308ab2a 25067+ }
1facf9fc 25068+ vdir->vd_nblk = 0;
25069+ vdir->vd_version = 0;
25070+ vdir->vd_jiffy = 0;
25071+ err = append_deblk(vdir);
25072+ if (!err)
25073+ return vdir; /* success */
25074+
25075+ kfree(vdir->vd_deblk);
25076+
4f0767ce 25077+out_free:
1facf9fc 25078+ au_cache_free_vdir(vdir);
4f0767ce 25079+out:
1facf9fc 25080+ vdir = ERR_PTR(err);
25081+ return vdir;
25082+}
25083+
25084+static int reinit_vdir(struct au_vdir *vdir)
25085+{
25086+ int err;
25087+ union au_vdir_deblk_p p, deblk_end;
25088+
25089+ while (vdir->vd_nblk > 1) {
25090+ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
25091+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
25092+ vdir->vd_nblk--;
25093+ }
25094+ p.deblk = vdir->vd_deblk[0];
25095+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
25096+ err = set_deblk_end(&p, &deblk_end);
25097+ /* keep vd_dblk_sz */
25098+ vdir->vd_last.ul = 0;
25099+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
25100+ vdir->vd_version = 0;
25101+ vdir->vd_jiffy = 0;
25102+ /* smp_mb(); */
25103+ return err;
25104+}
25105+
25106+/* ---------------------------------------------------------------------- */
25107+
1facf9fc 25108+#define AuFillVdir_CALLED 1
25109+#define AuFillVdir_WHABLE (1 << 1)
dece6358 25110+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 25111+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
7f207e10
AM
25112+#define au_fset_fillvdir(flags, name) \
25113+ do { (flags) |= AuFillVdir_##name; } while (0)
25114+#define au_fclr_fillvdir(flags, name) \
25115+ do { (flags) &= ~AuFillVdir_##name; } while (0)
1facf9fc 25116+
dece6358
AM
25117+#ifndef CONFIG_AUFS_SHWH
25118+#undef AuFillVdir_SHWH
25119+#define AuFillVdir_SHWH 0
25120+#endif
25121+
1facf9fc 25122+struct fillvdir_arg {
25123+ struct file *file;
25124+ struct au_vdir *vdir;
dece6358
AM
25125+ struct au_nhash delist;
25126+ struct au_nhash whlist;
1facf9fc 25127+ aufs_bindex_t bindex;
25128+ unsigned int flags;
25129+ int err;
25130+};
25131+
dece6358 25132+static int fillvdir(void *__arg, const char *__name, int nlen,
1facf9fc 25133+ loff_t offset __maybe_unused, u64 h_ino,
25134+ unsigned int d_type)
25135+{
25136+ struct fillvdir_arg *arg = __arg;
25137+ char *name = (void *)__name;
25138+ struct super_block *sb;
1facf9fc 25139+ ino_t ino;
dece6358 25140+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 25141+
1facf9fc 25142+ arg->err = 0;
dece6358 25143+ sb = arg->file->f_dentry->d_sb;
1facf9fc 25144+ au_fset_fillvdir(arg->flags, CALLED);
25145+ /* smp_mb(); */
dece6358 25146+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 25147+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
25148+ if (test_known(&arg->delist, name, nlen)
25149+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
25150+ goto out; /* already exists or whiteouted */
1facf9fc 25151+
25152+ sb = arg->file->f_dentry->d_sb;
dece6358 25153+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
4a4d8108
AM
25154+ if (!arg->err) {
25155+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
25156+ d_type = DT_UNKNOWN;
dece6358
AM
25157+ arg->err = append_de(arg->vdir, name, nlen, ino,
25158+ d_type, &arg->delist);
4a4d8108 25159+ }
1facf9fc 25160+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
25161+ name += AUFS_WH_PFX_LEN;
dece6358
AM
25162+ nlen -= AUFS_WH_PFX_LEN;
25163+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
25164+ goto out; /* already whiteouted */
1facf9fc 25165+
dece6358
AM
25166+ if (shwh)
25167+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
25168+ &ino);
4a4d8108
AM
25169+ if (!arg->err) {
25170+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
25171+ d_type = DT_UNKNOWN;
1facf9fc 25172+ arg->err = au_nhash_append_wh
dece6358
AM
25173+ (&arg->whlist, name, nlen, ino, d_type,
25174+ arg->bindex, shwh);
4a4d8108 25175+ }
1facf9fc 25176+ }
25177+
4f0767ce 25178+out:
1facf9fc 25179+ if (!arg->err)
25180+ arg->vdir->vd_jiffy = jiffies;
25181+ /* smp_mb(); */
25182+ AuTraceErr(arg->err);
25183+ return arg->err;
25184+}
25185+
dece6358
AM
25186+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
25187+ struct au_nhash *whlist, struct au_nhash *delist)
25188+{
25189+#ifdef CONFIG_AUFS_SHWH
25190+ int err;
25191+ unsigned int nh, u;
25192+ struct hlist_head *head;
25193+ struct au_vdir_wh *tpos;
25194+ struct hlist_node *pos, *n;
25195+ char *p, *o;
25196+ struct au_vdir_destr *destr;
25197+
25198+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
25199+
25200+ err = -ENOMEM;
4a4d8108 25201+ o = p = __getname_gfp(GFP_NOFS);
dece6358
AM
25202+ if (unlikely(!p))
25203+ goto out;
25204+
25205+ err = 0;
25206+ nh = whlist->nh_num;
25207+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
25208+ p += AUFS_WH_PFX_LEN;
25209+ for (u = 0; u < nh; u++) {
25210+ head = whlist->nh_head + u;
25211+ hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) {
25212+ destr = &tpos->wh_str;
25213+ memcpy(p, destr->name, destr->len);
25214+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
25215+ tpos->wh_ino, tpos->wh_type, delist);
25216+ if (unlikely(err))
25217+ break;
25218+ }
25219+ }
25220+
25221+ __putname(o);
25222+
4f0767ce 25223+out:
dece6358
AM
25224+ AuTraceErr(err);
25225+ return err;
25226+#else
25227+ return 0;
25228+#endif
25229+}
25230+
1facf9fc 25231+static int au_do_read_vdir(struct fillvdir_arg *arg)
25232+{
25233+ int err;
dece6358 25234+ unsigned int rdhash;
1facf9fc 25235+ loff_t offset;
dece6358
AM
25236+ aufs_bindex_t bend, bindex, bstart;
25237+ unsigned char shwh;
1facf9fc 25238+ struct file *hf, *file;
25239+ struct super_block *sb;
25240+
1facf9fc 25241+ file = arg->file;
25242+ sb = file->f_dentry->d_sb;
dece6358
AM
25243+ SiMustAnyLock(sb);
25244+
25245+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 25246+ if (!rdhash)
25247+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
25248+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
25249+ if (unlikely(err))
1facf9fc 25250+ goto out;
dece6358
AM
25251+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
25252+ if (unlikely(err))
1facf9fc 25253+ goto out_delist;
25254+
25255+ err = 0;
25256+ arg->flags = 0;
dece6358
AM
25257+ shwh = 0;
25258+ if (au_opt_test(au_mntflags(sb), SHWH)) {
25259+ shwh = 1;
25260+ au_fset_fillvdir(arg->flags, SHWH);
25261+ }
25262+ bstart = au_fbstart(file);
4a4d8108 25263+ bend = au_fbend_dir(file);
dece6358 25264+ for (bindex = bstart; !err && bindex <= bend; bindex++) {
4a4d8108 25265+ hf = au_hf_dir(file, bindex);
1facf9fc 25266+ if (!hf)
25267+ continue;
25268+
25269+ offset = vfsub_llseek(hf, 0, SEEK_SET);
25270+ err = offset;
25271+ if (unlikely(offset))
25272+ break;
25273+
25274+ arg->bindex = bindex;
25275+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358
AM
25276+ if (shwh
25277+ || (bindex != bend
25278+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 25279+ au_fset_fillvdir(arg->flags, WHABLE);
25280+ do {
25281+ arg->err = 0;
25282+ au_fclr_fillvdir(arg->flags, CALLED);
25283+ /* smp_mb(); */
25284+ err = vfsub_readdir(hf, fillvdir, arg);
25285+ if (err >= 0)
25286+ err = arg->err;
25287+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
25288+ }
dece6358
AM
25289+
25290+ if (!err && shwh)
25291+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
25292+
25293+ au_nhash_wh_free(&arg->whlist);
1facf9fc 25294+
4f0767ce 25295+out_delist:
dece6358 25296+ au_nhash_de_free(&arg->delist);
4f0767ce 25297+out:
1facf9fc 25298+ return err;
25299+}
25300+
25301+static int read_vdir(struct file *file, int may_read)
25302+{
25303+ int err;
25304+ unsigned long expire;
25305+ unsigned char do_read;
25306+ struct fillvdir_arg arg;
25307+ struct inode *inode;
25308+ struct au_vdir *vdir, *allocated;
25309+
25310+ err = 0;
25311+ inode = file->f_dentry->d_inode;
25312+ IMustLock(inode);
dece6358
AM
25313+ SiMustAnyLock(inode->i_sb);
25314+
1facf9fc 25315+ allocated = NULL;
25316+ do_read = 0;
25317+ expire = au_sbi(inode->i_sb)->si_rdcache;
25318+ vdir = au_ivdir(inode);
25319+ if (!vdir) {
25320+ do_read = 1;
1308ab2a 25321+ vdir = alloc_vdir(file);
1facf9fc 25322+ err = PTR_ERR(vdir);
25323+ if (IS_ERR(vdir))
25324+ goto out;
25325+ err = 0;
25326+ allocated = vdir;
25327+ } else if (may_read
25328+ && (inode->i_version != vdir->vd_version
25329+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
25330+ do_read = 1;
25331+ err = reinit_vdir(vdir);
25332+ if (unlikely(err))
25333+ goto out;
25334+ }
25335+
25336+ if (!do_read)
25337+ return 0; /* success */
25338+
25339+ arg.file = file;
25340+ arg.vdir = vdir;
25341+ err = au_do_read_vdir(&arg);
25342+ if (!err) {
25343+ /* file->f_pos = 0; */
25344+ vdir->vd_version = inode->i_version;
25345+ vdir->vd_last.ul = 0;
25346+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
25347+ if (allocated)
25348+ au_set_ivdir(inode, allocated);
25349+ } else if (allocated)
25350+ au_vdir_free(allocated);
25351+
4f0767ce 25352+out:
1facf9fc 25353+ return err;
25354+}
25355+
25356+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
25357+{
25358+ int err, rerr;
25359+ unsigned long ul, n;
25360+ const unsigned int deblk_sz = src->vd_deblk_sz;
25361+
25362+ AuDebugOn(tgt->vd_nblk != 1);
25363+
25364+ err = -ENOMEM;
25365+ if (tgt->vd_nblk < src->vd_nblk) {
25366+ unsigned char **p;
25367+
dece6358
AM
25368+ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
25369+ GFP_NOFS);
1facf9fc 25370+ if (unlikely(!p))
25371+ goto out;
25372+ tgt->vd_deblk = p;
25373+ }
25374+
1308ab2a 25375+ if (tgt->vd_deblk_sz != deblk_sz) {
25376+ unsigned char *p;
25377+
25378+ tgt->vd_deblk_sz = deblk_sz;
25379+ p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
25380+ if (unlikely(!p))
25381+ goto out;
25382+ tgt->vd_deblk[0] = p;
25383+ }
1facf9fc 25384+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 25385+ tgt->vd_version = src->vd_version;
25386+ tgt->vd_jiffy = src->vd_jiffy;
25387+
25388+ n = src->vd_nblk;
25389+ for (ul = 1; ul < n; ul++) {
dece6358
AM
25390+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
25391+ GFP_NOFS);
25392+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 25393+ goto out;
1308ab2a 25394+ tgt->vd_nblk++;
1facf9fc 25395+ }
1308ab2a 25396+ tgt->vd_nblk = n;
25397+ tgt->vd_last.ul = tgt->vd_last.ul;
25398+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
25399+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
25400+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 25401+ /* smp_mb(); */
25402+ return 0; /* success */
25403+
4f0767ce 25404+out:
1facf9fc 25405+ rerr = reinit_vdir(tgt);
25406+ BUG_ON(rerr);
25407+ return err;
25408+}
25409+
25410+int au_vdir_init(struct file *file)
25411+{
25412+ int err;
25413+ struct inode *inode;
25414+ struct au_vdir *vdir_cache, *allocated;
25415+
25416+ err = read_vdir(file, !file->f_pos);
25417+ if (unlikely(err))
25418+ goto out;
25419+
25420+ allocated = NULL;
25421+ vdir_cache = au_fvdir_cache(file);
25422+ if (!vdir_cache) {
1308ab2a 25423+ vdir_cache = alloc_vdir(file);
1facf9fc 25424+ err = PTR_ERR(vdir_cache);
25425+ if (IS_ERR(vdir_cache))
25426+ goto out;
25427+ allocated = vdir_cache;
25428+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
25429+ err = reinit_vdir(vdir_cache);
25430+ if (unlikely(err))
25431+ goto out;
25432+ } else
25433+ return 0; /* success */
25434+
25435+ inode = file->f_dentry->d_inode;
25436+ err = copy_vdir(vdir_cache, au_ivdir(inode));
25437+ if (!err) {
25438+ file->f_version = inode->i_version;
25439+ if (allocated)
25440+ au_set_fvdir_cache(file, allocated);
25441+ } else if (allocated)
25442+ au_vdir_free(allocated);
25443+
4f0767ce 25444+out:
1facf9fc 25445+ return err;
25446+}
25447+
25448+static loff_t calc_offset(struct au_vdir *vdir)
25449+{
25450+ loff_t offset;
25451+ union au_vdir_deblk_p p;
25452+
25453+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
25454+ offset = vdir->vd_last.p.deblk - p.deblk;
25455+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
25456+ return offset;
25457+}
25458+
25459+/* returns true or false */
25460+static int seek_vdir(struct file *file)
25461+{
25462+ int valid;
25463+ unsigned int deblk_sz;
25464+ unsigned long ul, n;
25465+ loff_t offset;
25466+ union au_vdir_deblk_p p, deblk_end;
25467+ struct au_vdir *vdir_cache;
25468+
25469+ valid = 1;
25470+ vdir_cache = au_fvdir_cache(file);
25471+ offset = calc_offset(vdir_cache);
25472+ AuDbg("offset %lld\n", offset);
25473+ if (file->f_pos == offset)
25474+ goto out;
25475+
25476+ vdir_cache->vd_last.ul = 0;
25477+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
25478+ if (!file->f_pos)
25479+ goto out;
25480+
25481+ valid = 0;
25482+ deblk_sz = vdir_cache->vd_deblk_sz;
25483+ ul = div64_u64(file->f_pos, deblk_sz);
25484+ AuDbg("ul %lu\n", ul);
25485+ if (ul >= vdir_cache->vd_nblk)
25486+ goto out;
25487+
25488+ n = vdir_cache->vd_nblk;
25489+ for (; ul < n; ul++) {
25490+ p.deblk = vdir_cache->vd_deblk[ul];
25491+ deblk_end.deblk = p.deblk + deblk_sz;
25492+ offset = ul;
25493+ offset *= deblk_sz;
25494+ while (!is_deblk_end(&p, &deblk_end) && offset < file->f_pos) {
25495+ unsigned int l;
25496+
25497+ l = calc_size(p.de->de_str.len);
25498+ offset += l;
25499+ p.deblk += l;
25500+ }
25501+ if (!is_deblk_end(&p, &deblk_end)) {
25502+ valid = 1;
25503+ vdir_cache->vd_last.ul = ul;
25504+ vdir_cache->vd_last.p = p;
25505+ break;
25506+ }
25507+ }
25508+
4f0767ce 25509+out:
1facf9fc 25510+ /* smp_mb(); */
25511+ AuTraceErr(!valid);
25512+ return valid;
25513+}
25514+
25515+int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir)
25516+{
25517+ int err;
25518+ unsigned int l, deblk_sz;
25519+ union au_vdir_deblk_p deblk_end;
25520+ struct au_vdir *vdir_cache;
25521+ struct au_vdir_de *de;
25522+
25523+ vdir_cache = au_fvdir_cache(file);
25524+ if (!seek_vdir(file))
25525+ return 0;
25526+
25527+ deblk_sz = vdir_cache->vd_deblk_sz;
25528+ while (1) {
25529+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
25530+ deblk_end.deblk += deblk_sz;
25531+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
25532+ de = vdir_cache->vd_last.p.de;
25533+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
25534+ de->de_str.len, de->de_str.name, file->f_pos,
25535+ (unsigned long)de->de_ino, de->de_type);
25536+ err = filldir(dirent, de->de_str.name, de->de_str.len,
25537+ file->f_pos, de->de_ino, de->de_type);
25538+ if (unlikely(err)) {
25539+ AuTraceErr(err);
25540+ /* todo: ignore the error caused by udba? */
25541+ /* return err; */
25542+ return 0;
25543+ }
25544+
25545+ l = calc_size(de->de_str.len);
25546+ vdir_cache->vd_last.p.deblk += l;
25547+ file->f_pos += l;
25548+ }
25549+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
25550+ vdir_cache->vd_last.ul++;
25551+ vdir_cache->vd_last.p.deblk
25552+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
25553+ file->f_pos = deblk_sz * vdir_cache->vd_last.ul;
25554+ continue;
25555+ }
25556+ break;
25557+ }
25558+
25559+ /* smp_mb(); */
25560+ return 0;
25561+}
7f207e10
AM
25562diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
25563--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
25564+++ linux/fs/aufs/vfsub.c 2011-08-24 13:30:24.734646739 +0200
25565@@ -0,0 +1,836 @@
1facf9fc 25566+/*
027c5e7a 25567+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 25568+ *
25569+ * This program, aufs is free software; you can redistribute it and/or modify
25570+ * it under the terms of the GNU General Public License as published by
25571+ * the Free Software Foundation; either version 2 of the License, or
25572+ * (at your option) any later version.
dece6358
AM
25573+ *
25574+ * This program is distributed in the hope that it will be useful,
25575+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25576+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25577+ * GNU General Public License for more details.
25578+ *
25579+ * You should have received a copy of the GNU General Public License
25580+ * along with this program; if not, write to the Free Software
25581+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 25582+ */
25583+
25584+/*
25585+ * sub-routines for VFS
25586+ */
25587+
4a4d8108 25588+#include <linux/file.h>
1308ab2a 25589+#include <linux/ima.h>
dece6358
AM
25590+#include <linux/namei.h>
25591+#include <linux/security.h>
25592+#include <linux/splice.h>
1facf9fc 25593+#include <linux/uaccess.h>
25594+#include "aufs.h"
25595+
25596+int vfsub_update_h_iattr(struct path *h_path, int *did)
25597+{
25598+ int err;
25599+ struct kstat st;
25600+ struct super_block *h_sb;
25601+
25602+ /* for remote fs, leave work for its getattr or d_revalidate */
25603+ /* for bad i_attr fs, handle them in aufs_getattr() */
25604+ /* still some fs may acquire i_mutex. we need to skip them */
25605+ err = 0;
25606+ if (!did)
25607+ did = &err;
25608+ h_sb = h_path->dentry->d_sb;
25609+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
25610+ if (*did)
25611+ err = vfs_getattr(h_path->mnt, h_path->dentry, &st);
25612+
25613+ return err;
25614+}
25615+
25616+/* ---------------------------------------------------------------------- */
25617+
4a4d8108 25618+struct file *vfsub_dentry_open(struct path *path, int flags)
1308ab2a 25619+{
25620+ struct file *file;
25621+
4a4d8108 25622+ path_get(path);
0c5527e5 25623+ file = dentry_open(path->dentry, path->mnt,
2cbb1c4b 25624+ flags /* | __FMODE_NONOTIFY */,
7f207e10 25625+ current_cred());
2cbb1c4b
JR
25626+ if (!IS_ERR_OR_NULL(file)
25627+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
25628+ i_readcount_inc(path->dentry->d_inode);
4a4d8108 25629+
1308ab2a 25630+ return file;
25631+}
25632+
1facf9fc 25633+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
25634+{
25635+ struct file *file;
25636+
2cbb1c4b 25637+ lockdep_off();
7f207e10 25638+ file = filp_open(path,
2cbb1c4b 25639+ oflags /* | __FMODE_NONOTIFY */,
7f207e10 25640+ mode);
2cbb1c4b 25641+ lockdep_on();
1facf9fc 25642+ if (IS_ERR(file))
25643+ goto out;
25644+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
25645+
4f0767ce 25646+out:
1facf9fc 25647+ return file;
25648+}
25649+
25650+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
25651+{
25652+ int err;
25653+
1facf9fc 25654+ err = kern_path(name, flags, path);
1facf9fc 25655+ if (!err && path->dentry->d_inode)
25656+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
25657+ return err;
25658+}
25659+
25660+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
25661+ int len)
25662+{
25663+ struct path path = {
25664+ .mnt = NULL
25665+ };
25666+
1308ab2a 25667+ /* VFS checks it too, but by WARN_ON_ONCE() */
1facf9fc 25668+ IMustLock(parent->d_inode);
25669+
25670+ path.dentry = lookup_one_len(name, parent, len);
25671+ if (IS_ERR(path.dentry))
25672+ goto out;
25673+ if (path.dentry->d_inode)
25674+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
25675+
4f0767ce 25676+out:
4a4d8108 25677+ AuTraceErrPtr(path.dentry);
1facf9fc 25678+ return path.dentry;
25679+}
25680+
25681+struct dentry *vfsub_lookup_hash(struct nameidata *nd)
25682+{
25683+ struct path path = {
25684+ .mnt = nd->path.mnt
25685+ };
25686+
25687+ IMustLock(nd->path.dentry->d_inode);
25688+
25689+ path.dentry = lookup_hash(nd);
4a4d8108
AM
25690+ if (IS_ERR(path.dentry))
25691+ goto out;
25692+ if (path.dentry->d_inode)
1facf9fc 25693+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
25694+
4f0767ce 25695+out:
4a4d8108 25696+ AuTraceErrPtr(path.dentry);
1facf9fc 25697+ return path.dentry;
25698+}
25699+
2cbb1c4b
JR
25700+/*
25701+ * this is "VFS:__lookup_one_len()" which was removed and merged into
25702+ * VFS:lookup_one_len() by the commit.
25703+ * 6a96ba5 2011-03-14 kill __lookup_one_len()
25704+ * this function should always be equivalent to the corresponding part in
25705+ * VFS:lookup_one_len().
25706+ */
25707+int vfsub_name_hash(const char *name, struct qstr *this, int len)
25708+{
25709+ unsigned long hash;
25710+ unsigned int c;
25711+
25712+ this->name = name;
25713+ this->len = len;
25714+ if (!len)
25715+ return -EACCES;
25716+
25717+ hash = init_name_hash();
25718+ while (len--) {
25719+ c = *(const unsigned char *)name++;
25720+ if (c == '/' || c == '\0')
25721+ return -EACCES;
25722+ hash = partial_name_hash(c, hash);
25723+ }
25724+ this->hash = end_name_hash(hash);
25725+ return 0;
25726+}
25727+
1facf9fc 25728+/* ---------------------------------------------------------------------- */
25729+
25730+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
25731+ struct dentry *d2, struct au_hinode *hdir2)
25732+{
25733+ struct dentry *d;
25734+
2cbb1c4b 25735+ lockdep_off();
1facf9fc 25736+ d = lock_rename(d1, d2);
2cbb1c4b 25737+ lockdep_on();
4a4d8108 25738+ au_hn_suspend(hdir1);
1facf9fc 25739+ if (hdir1 != hdir2)
4a4d8108 25740+ au_hn_suspend(hdir2);
1facf9fc 25741+
25742+ return d;
25743+}
25744+
25745+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
25746+ struct dentry *d2, struct au_hinode *hdir2)
25747+{
4a4d8108 25748+ au_hn_resume(hdir1);
1facf9fc 25749+ if (hdir1 != hdir2)
4a4d8108 25750+ au_hn_resume(hdir2);
2cbb1c4b 25751+ lockdep_off();
1facf9fc 25752+ unlock_rename(d1, d2);
2cbb1c4b 25753+ lockdep_on();
1facf9fc 25754+}
25755+
25756+/* ---------------------------------------------------------------------- */
25757+
25758+int vfsub_create(struct inode *dir, struct path *path, int mode)
25759+{
25760+ int err;
25761+ struct dentry *d;
25762+
25763+ IMustLock(dir);
25764+
25765+ d = path->dentry;
25766+ path->dentry = d->d_parent;
b752ccd1 25767+ err = security_path_mknod(path, d, mode, 0);
1facf9fc 25768+ path->dentry = d;
25769+ if (unlikely(err))
25770+ goto out;
25771+
25772+ if (au_test_fs_null_nd(dir->i_sb))
25773+ err = vfs_create(dir, path->dentry, mode, NULL);
25774+ else {
25775+ struct nameidata h_nd;
25776+
25777+ memset(&h_nd, 0, sizeof(h_nd));
25778+ h_nd.flags = LOOKUP_CREATE;
25779+ h_nd.intent.open.flags = O_CREAT
25780+ | vfsub_fmode_to_uint(FMODE_READ);
25781+ h_nd.intent.open.create_mode = mode;
25782+ h_nd.path.dentry = path->dentry->d_parent;
25783+ h_nd.path.mnt = path->mnt;
25784+ path_get(&h_nd.path);
25785+ err = vfs_create(dir, path->dentry, mode, &h_nd);
25786+ path_put(&h_nd.path);
25787+ }
25788+
25789+ if (!err) {
25790+ struct path tmp = *path;
25791+ int did;
25792+
25793+ vfsub_update_h_iattr(&tmp, &did);
25794+ if (did) {
25795+ tmp.dentry = path->dentry->d_parent;
25796+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25797+ }
25798+ /*ignore*/
25799+ }
25800+
4f0767ce 25801+out:
1facf9fc 25802+ return err;
25803+}
25804+
25805+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
25806+{
25807+ int err;
25808+ struct dentry *d;
25809+
25810+ IMustLock(dir);
25811+
25812+ d = path->dentry;
25813+ path->dentry = d->d_parent;
b752ccd1 25814+ err = security_path_symlink(path, d, symname);
1facf9fc 25815+ path->dentry = d;
25816+ if (unlikely(err))
25817+ goto out;
25818+
25819+ err = vfs_symlink(dir, path->dentry, symname);
25820+ if (!err) {
25821+ struct path tmp = *path;
25822+ int did;
25823+
25824+ vfsub_update_h_iattr(&tmp, &did);
25825+ if (did) {
25826+ tmp.dentry = path->dentry->d_parent;
25827+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25828+ }
25829+ /*ignore*/
25830+ }
25831+
4f0767ce 25832+out:
1facf9fc 25833+ return err;
25834+}
25835+
25836+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
25837+{
25838+ int err;
25839+ struct dentry *d;
25840+
25841+ IMustLock(dir);
25842+
25843+ d = path->dentry;
25844+ path->dentry = d->d_parent;
027c5e7a 25845+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
1facf9fc 25846+ path->dentry = d;
25847+ if (unlikely(err))
25848+ goto out;
25849+
25850+ err = vfs_mknod(dir, path->dentry, mode, dev);
25851+ if (!err) {
25852+ struct path tmp = *path;
25853+ int did;
25854+
25855+ vfsub_update_h_iattr(&tmp, &did);
25856+ if (did) {
25857+ tmp.dentry = path->dentry->d_parent;
25858+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25859+ }
25860+ /*ignore*/
25861+ }
25862+
4f0767ce 25863+out:
1facf9fc 25864+ return err;
25865+}
25866+
25867+static int au_test_nlink(struct inode *inode)
25868+{
25869+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
25870+
25871+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
25872+ || inode->i_nlink < link_max)
25873+ return 0;
25874+ return -EMLINK;
25875+}
25876+
25877+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path)
25878+{
25879+ int err;
25880+ struct dentry *d;
25881+
25882+ IMustLock(dir);
25883+
25884+ err = au_test_nlink(src_dentry->d_inode);
25885+ if (unlikely(err))
25886+ return err;
25887+
25888+ d = path->dentry;
25889+ path->dentry = d->d_parent;
b752ccd1 25890+ err = security_path_link(src_dentry, path, d);
1facf9fc 25891+ path->dentry = d;
25892+ if (unlikely(err))
25893+ goto out;
25894+
2cbb1c4b 25895+ lockdep_off();
1facf9fc 25896+ err = vfs_link(src_dentry, dir, path->dentry);
2cbb1c4b 25897+ lockdep_on();
1facf9fc 25898+ if (!err) {
25899+ struct path tmp = *path;
25900+ int did;
25901+
25902+ /* fuse has different memory inode for the same inumber */
25903+ vfsub_update_h_iattr(&tmp, &did);
25904+ if (did) {
25905+ tmp.dentry = path->dentry->d_parent;
25906+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25907+ tmp.dentry = src_dentry;
25908+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25909+ }
25910+ /*ignore*/
25911+ }
25912+
4f0767ce 25913+out:
1facf9fc 25914+ return err;
25915+}
25916+
25917+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
25918+ struct inode *dir, struct path *path)
25919+{
25920+ int err;
25921+ struct path tmp = {
25922+ .mnt = path->mnt
25923+ };
25924+ struct dentry *d;
25925+
25926+ IMustLock(dir);
25927+ IMustLock(src_dir);
25928+
25929+ d = path->dentry;
25930+ path->dentry = d->d_parent;
25931+ tmp.dentry = src_dentry->d_parent;
b752ccd1 25932+ err = security_path_rename(&tmp, src_dentry, path, d);
1facf9fc 25933+ path->dentry = d;
25934+ if (unlikely(err))
25935+ goto out;
25936+
2cbb1c4b 25937+ lockdep_off();
1facf9fc 25938+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry);
2cbb1c4b 25939+ lockdep_on();
1facf9fc 25940+ if (!err) {
25941+ int did;
25942+
25943+ tmp.dentry = d->d_parent;
25944+ vfsub_update_h_iattr(&tmp, &did);
25945+ if (did) {
25946+ tmp.dentry = src_dentry;
25947+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25948+ tmp.dentry = src_dentry->d_parent;
25949+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25950+ }
25951+ /*ignore*/
25952+ }
25953+
4f0767ce 25954+out:
1facf9fc 25955+ return err;
25956+}
25957+
25958+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
25959+{
25960+ int err;
25961+ struct dentry *d;
25962+
25963+ IMustLock(dir);
25964+
25965+ d = path->dentry;
25966+ path->dentry = d->d_parent;
b752ccd1 25967+ err = security_path_mkdir(path, d, mode);
1facf9fc 25968+ path->dentry = d;
25969+ if (unlikely(err))
25970+ goto out;
25971+
25972+ err = vfs_mkdir(dir, path->dentry, mode);
25973+ if (!err) {
25974+ struct path tmp = *path;
25975+ int did;
25976+
25977+ vfsub_update_h_iattr(&tmp, &did);
25978+ if (did) {
25979+ tmp.dentry = path->dentry->d_parent;
25980+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
25981+ }
25982+ /*ignore*/
25983+ }
25984+
4f0767ce 25985+out:
1facf9fc 25986+ return err;
25987+}
25988+
25989+int vfsub_rmdir(struct inode *dir, struct path *path)
25990+{
25991+ int err;
25992+ struct dentry *d;
25993+
25994+ IMustLock(dir);
25995+
25996+ d = path->dentry;
25997+ path->dentry = d->d_parent;
b752ccd1 25998+ err = security_path_rmdir(path, d);
1facf9fc 25999+ path->dentry = d;
26000+ if (unlikely(err))
26001+ goto out;
26002+
2cbb1c4b 26003+ lockdep_off();
1facf9fc 26004+ err = vfs_rmdir(dir, path->dentry);
2cbb1c4b 26005+ lockdep_on();
1facf9fc 26006+ if (!err) {
26007+ struct path tmp = {
26008+ .dentry = path->dentry->d_parent,
26009+ .mnt = path->mnt
26010+ };
26011+
26012+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
26013+ }
26014+
4f0767ce 26015+out:
1facf9fc 26016+ return err;
26017+}
26018+
26019+/* ---------------------------------------------------------------------- */
26020+
26021+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
26022+ loff_t *ppos)
26023+{
26024+ ssize_t err;
26025+
2cbb1c4b 26026+ lockdep_off();
1facf9fc 26027+ err = vfs_read(file, ubuf, count, ppos);
2cbb1c4b 26028+ lockdep_on();
1facf9fc 26029+ if (err >= 0)
26030+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26031+ return err;
26032+}
26033+
26034+/* todo: kernel_read()? */
26035+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
26036+ loff_t *ppos)
26037+{
26038+ ssize_t err;
26039+ mm_segment_t oldfs;
b752ccd1
AM
26040+ union {
26041+ void *k;
26042+ char __user *u;
26043+ } buf;
1facf9fc 26044+
b752ccd1 26045+ buf.k = kbuf;
1facf9fc 26046+ oldfs = get_fs();
26047+ set_fs(KERNEL_DS);
b752ccd1 26048+ err = vfsub_read_u(file, buf.u, count, ppos);
1facf9fc 26049+ set_fs(oldfs);
26050+ return err;
26051+}
26052+
26053+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
26054+ loff_t *ppos)
26055+{
26056+ ssize_t err;
26057+
2cbb1c4b 26058+ lockdep_off();
1facf9fc 26059+ err = vfs_write(file, ubuf, count, ppos);
2cbb1c4b 26060+ lockdep_on();
1facf9fc 26061+ if (err >= 0)
26062+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26063+ return err;
26064+}
26065+
26066+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
26067+{
26068+ ssize_t err;
26069+ mm_segment_t oldfs;
b752ccd1
AM
26070+ union {
26071+ void *k;
26072+ const char __user *u;
26073+ } buf;
1facf9fc 26074+
b752ccd1 26075+ buf.k = kbuf;
1facf9fc 26076+ oldfs = get_fs();
26077+ set_fs(KERNEL_DS);
b752ccd1 26078+ err = vfsub_write_u(file, buf.u, count, ppos);
1facf9fc 26079+ set_fs(oldfs);
26080+ return err;
26081+}
26082+
4a4d8108
AM
26083+int vfsub_flush(struct file *file, fl_owner_t id)
26084+{
26085+ int err;
26086+
26087+ err = 0;
26088+ if (file->f_op && file->f_op->flush) {
2cbb1c4b
JR
26089+ if (!au_test_nfs(file->f_dentry->d_sb))
26090+ err = file->f_op->flush(file, id);
26091+ else {
26092+ lockdep_off();
26093+ err = file->f_op->flush(file, id);
26094+ lockdep_on();
26095+ }
4a4d8108
AM
26096+ if (!err)
26097+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
26098+ /*ignore*/
26099+ }
26100+ return err;
26101+}
26102+
1facf9fc 26103+int vfsub_readdir(struct file *file, filldir_t filldir, void *arg)
26104+{
26105+ int err;
26106+
2cbb1c4b 26107+ lockdep_off();
1facf9fc 26108+ err = vfs_readdir(file, filldir, arg);
2cbb1c4b 26109+ lockdep_on();
1facf9fc 26110+ if (err >= 0)
26111+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
26112+ return err;
26113+}
26114+
26115+long vfsub_splice_to(struct file *in, loff_t *ppos,
26116+ struct pipe_inode_info *pipe, size_t len,
26117+ unsigned int flags)
26118+{
26119+ long err;
26120+
2cbb1c4b 26121+ lockdep_off();
0fc653ad 26122+ err = do_splice_to(in, ppos, pipe, len, flags);
2cbb1c4b 26123+ lockdep_on();
4a4d8108 26124+ file_accessed(in);
1facf9fc 26125+ if (err >= 0)
26126+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
26127+ return err;
26128+}
26129+
26130+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
26131+ loff_t *ppos, size_t len, unsigned int flags)
26132+{
26133+ long err;
26134+
2cbb1c4b 26135+ lockdep_off();
0fc653ad 26136+ err = do_splice_from(pipe, out, ppos, len, flags);
2cbb1c4b 26137+ lockdep_on();
1facf9fc 26138+ if (err >= 0)
26139+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
26140+ return err;
26141+}
26142+
53392da6
AM
26143+int vfsub_fsync(struct file *file, struct path *path, int datasync)
26144+{
26145+ int err;
26146+
26147+ /* file can be NULL */
26148+ lockdep_off();
26149+ err = vfs_fsync(file, datasync);
26150+ lockdep_on();
26151+ if (!err) {
26152+ if (!path) {
26153+ AuDebugOn(!file);
26154+ path = &file->f_path;
26155+ }
26156+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
26157+ }
26158+ return err;
26159+}
26160+
1facf9fc 26161+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
26162+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
26163+ struct file *h_file)
26164+{
26165+ int err;
26166+ struct inode *h_inode;
26167+
26168+ h_inode = h_path->dentry->d_inode;
26169+ if (!h_file) {
26170+ err = mnt_want_write(h_path->mnt);
26171+ if (err)
26172+ goto out;
26173+ err = inode_permission(h_inode, MAY_WRITE);
26174+ if (err)
26175+ goto out_mnt;
26176+ err = get_write_access(h_inode);
26177+ if (err)
26178+ goto out_mnt;
4a4d8108 26179+ err = break_lease(h_inode, O_WRONLY);
1facf9fc 26180+ if (err)
26181+ goto out_inode;
26182+ }
26183+
26184+ err = locks_verify_truncate(h_inode, h_file, length);
26185+ if (!err)
953406b4 26186+ err = security_path_truncate(h_path);
2cbb1c4b
JR
26187+ if (!err) {
26188+ lockdep_off();
1facf9fc 26189+ err = do_truncate(h_path->dentry, length, attr, h_file);
2cbb1c4b
JR
26190+ lockdep_on();
26191+ }
1facf9fc 26192+
4f0767ce 26193+out_inode:
1facf9fc 26194+ if (!h_file)
26195+ put_write_access(h_inode);
4f0767ce 26196+out_mnt:
1facf9fc 26197+ if (!h_file)
26198+ mnt_drop_write(h_path->mnt);
4f0767ce 26199+out:
1facf9fc 26200+ return err;
26201+}
26202+
26203+/* ---------------------------------------------------------------------- */
26204+
26205+struct au_vfsub_mkdir_args {
26206+ int *errp;
26207+ struct inode *dir;
26208+ struct path *path;
26209+ int mode;
26210+};
26211+
26212+static void au_call_vfsub_mkdir(void *args)
26213+{
26214+ struct au_vfsub_mkdir_args *a = args;
26215+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
26216+}
26217+
26218+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
26219+{
26220+ int err, do_sio, wkq_err;
26221+
26222+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
26223+ if (!do_sio)
26224+ err = vfsub_mkdir(dir, path, mode);
26225+ else {
26226+ struct au_vfsub_mkdir_args args = {
26227+ .errp = &err,
26228+ .dir = dir,
26229+ .path = path,
26230+ .mode = mode
26231+ };
26232+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
26233+ if (unlikely(wkq_err))
26234+ err = wkq_err;
26235+ }
26236+
26237+ return err;
26238+}
26239+
26240+struct au_vfsub_rmdir_args {
26241+ int *errp;
26242+ struct inode *dir;
26243+ struct path *path;
26244+};
26245+
26246+static void au_call_vfsub_rmdir(void *args)
26247+{
26248+ struct au_vfsub_rmdir_args *a = args;
26249+ *a->errp = vfsub_rmdir(a->dir, a->path);
26250+}
26251+
26252+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
26253+{
26254+ int err, do_sio, wkq_err;
26255+
26256+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
26257+ if (!do_sio)
26258+ err = vfsub_rmdir(dir, path);
26259+ else {
26260+ struct au_vfsub_rmdir_args args = {
26261+ .errp = &err,
26262+ .dir = dir,
26263+ .path = path
26264+ };
26265+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
26266+ if (unlikely(wkq_err))
26267+ err = wkq_err;
26268+ }
26269+
26270+ return err;
26271+}
26272+
26273+/* ---------------------------------------------------------------------- */
26274+
26275+struct notify_change_args {
26276+ int *errp;
26277+ struct path *path;
26278+ struct iattr *ia;
26279+};
26280+
26281+static void call_notify_change(void *args)
26282+{
26283+ struct notify_change_args *a = args;
26284+ struct inode *h_inode;
26285+
26286+ h_inode = a->path->dentry->d_inode;
26287+ IMustLock(h_inode);
26288+
26289+ *a->errp = -EPERM;
26290+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
1facf9fc 26291+ *a->errp = notify_change(a->path->dentry, a->ia);
1facf9fc 26292+ if (!*a->errp)
26293+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
26294+ }
26295+ AuTraceErr(*a->errp);
26296+}
26297+
26298+int vfsub_notify_change(struct path *path, struct iattr *ia)
26299+{
26300+ int err;
26301+ struct notify_change_args args = {
26302+ .errp = &err,
26303+ .path = path,
26304+ .ia = ia
26305+ };
26306+
26307+ call_notify_change(&args);
26308+
26309+ return err;
26310+}
26311+
26312+int vfsub_sio_notify_change(struct path *path, struct iattr *ia)
26313+{
26314+ int err, wkq_err;
26315+ struct notify_change_args args = {
26316+ .errp = &err,
26317+ .path = path,
26318+ .ia = ia
26319+ };
26320+
26321+ wkq_err = au_wkq_wait(call_notify_change, &args);
26322+ if (unlikely(wkq_err))
26323+ err = wkq_err;
26324+
26325+ return err;
26326+}
26327+
26328+/* ---------------------------------------------------------------------- */
26329+
26330+struct unlink_args {
26331+ int *errp;
26332+ struct inode *dir;
26333+ struct path *path;
26334+};
26335+
26336+static void call_unlink(void *args)
26337+{
26338+ struct unlink_args *a = args;
26339+ struct dentry *d = a->path->dentry;
26340+ struct inode *h_inode;
26341+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
027c5e7a 26342+ && d->d_count == 1);
1facf9fc 26343+
26344+ IMustLock(a->dir);
26345+
26346+ a->path->dentry = d->d_parent;
26347+ *a->errp = security_path_unlink(a->path, d);
26348+ a->path->dentry = d;
26349+ if (unlikely(*a->errp))
26350+ return;
26351+
26352+ if (!stop_sillyrename)
26353+ dget(d);
26354+ h_inode = d->d_inode;
26355+ if (h_inode)
027c5e7a 26356+ ihold(h_inode);
1facf9fc 26357+
2cbb1c4b 26358+ lockdep_off();
1facf9fc 26359+ *a->errp = vfs_unlink(a->dir, d);
2cbb1c4b 26360+ lockdep_on();
1facf9fc 26361+ if (!*a->errp) {
26362+ struct path tmp = {
26363+ .dentry = d->d_parent,
26364+ .mnt = a->path->mnt
26365+ };
26366+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
26367+ }
26368+
26369+ if (!stop_sillyrename)
26370+ dput(d);
26371+ if (h_inode)
26372+ iput(h_inode);
26373+
26374+ AuTraceErr(*a->errp);
26375+}
26376+
26377+/*
26378+ * @dir: must be locked.
26379+ * @dentry: target dentry.
26380+ */
26381+int vfsub_unlink(struct inode *dir, struct path *path, int force)
26382+{
26383+ int err;
26384+ struct unlink_args args = {
26385+ .errp = &err,
26386+ .dir = dir,
26387+ .path = path
26388+ };
26389+
26390+ if (!force)
26391+ call_unlink(&args);
26392+ else {
26393+ int wkq_err;
26394+
26395+ wkq_err = au_wkq_wait(call_unlink, &args);
26396+ if (unlikely(wkq_err))
26397+ err = wkq_err;
26398+ }
26399+
26400+ return err;
26401+}
7f207e10
AM
26402diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
26403--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
26404+++ linux/fs/aufs/vfsub.h 2011-08-24 13:30:24.734646739 +0200
26405@@ -0,0 +1,232 @@
1facf9fc 26406+/*
027c5e7a 26407+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 26408+ *
26409+ * This program, aufs is free software; you can redistribute it and/or modify
26410+ * it under the terms of the GNU General Public License as published by
26411+ * the Free Software Foundation; either version 2 of the License, or
26412+ * (at your option) any later version.
dece6358
AM
26413+ *
26414+ * This program is distributed in the hope that it will be useful,
26415+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26416+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26417+ * GNU General Public License for more details.
26418+ *
26419+ * You should have received a copy of the GNU General Public License
26420+ * along with this program; if not, write to the Free Software
26421+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 26422+ */
26423+
26424+/*
26425+ * sub-routines for VFS
26426+ */
26427+
26428+#ifndef __AUFS_VFSUB_H__
26429+#define __AUFS_VFSUB_H__
26430+
26431+#ifdef __KERNEL__
26432+
26433+#include <linux/fs.h>
0c5527e5 26434+#include <linux/lglock.h>
7f207e10 26435+#include "debug.h"
1facf9fc 26436+
7f207e10 26437+/* copied from linux/fs/internal.h */
2cbb1c4b 26438+/* todo: BAD approach!! */
0c5527e5 26439+DECLARE_BRLOCK(vfsmount_lock);
0c5527e5 26440+extern void file_sb_list_del(struct file *f);
2cbb1c4b 26441+extern spinlock_t inode_sb_list_lock;
0c5527e5 26442+
7f207e10
AM
26443+/* copied from linux/fs/file_table.c */
26444+DECLARE_LGLOCK(files_lglock);
0c5527e5
AM
26445+#ifdef CONFIG_SMP
26446+/*
26447+ * These macros iterate all files on all CPUs for a given superblock.
26448+ * files_lglock must be held globally.
26449+ */
26450+#define do_file_list_for_each_entry(__sb, __file) \
26451+{ \
26452+ int i; \
26453+ for_each_possible_cpu(i) { \
26454+ struct list_head *list; \
26455+ list = per_cpu_ptr((__sb)->s_files, i); \
26456+ list_for_each_entry((__file), list, f_u.fu_list)
26457+
26458+#define while_file_list_for_each_entry \
26459+ } \
26460+}
26461+
26462+#else
26463+
26464+#define do_file_list_for_each_entry(__sb, __file) \
26465+{ \
26466+ struct list_head *list; \
26467+ list = &(sb)->s_files; \
26468+ list_for_each_entry((__file), list, f_u.fu_list)
26469+
26470+#define while_file_list_for_each_entry \
26471+}
7f207e10
AM
26472+#endif
26473+
26474+/* ---------------------------------------------------------------------- */
1facf9fc 26475+
26476+/* lock subclass for lower inode */
26477+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
26478+/* reduce? gave up. */
26479+enum {
26480+ AuLsc_I_Begin = I_MUTEX_QUOTA, /* 4 */
26481+ AuLsc_I_PARENT, /* lower inode, parent first */
26482+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 26483+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 26484+ AuLsc_I_CHILD,
26485+ AuLsc_I_CHILD2,
26486+ AuLsc_I_End
26487+};
26488+
26489+/* to debug easier, do not make them inlined functions */
26490+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
26491+#define IMustLock(i) MtxMustLock(&(i)->i_mutex)
26492+
26493+/* ---------------------------------------------------------------------- */
26494+
7f207e10
AM
26495+static inline void vfsub_drop_nlink(struct inode *inode)
26496+{
26497+ AuDebugOn(!inode->i_nlink);
26498+ drop_nlink(inode);
26499+}
26500+
027c5e7a
AM
26501+static inline void vfsub_dead_dir(struct inode *inode)
26502+{
26503+ AuDebugOn(!S_ISDIR(inode->i_mode));
26504+ inode->i_flags |= S_DEAD;
26505+ clear_nlink(inode);
26506+}
26507+
7f207e10
AM
26508+/* ---------------------------------------------------------------------- */
26509+
26510+int vfsub_update_h_iattr(struct path *h_path, int *did);
26511+struct file *vfsub_dentry_open(struct path *path, int flags);
26512+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
1facf9fc 26513+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
26514+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
26515+ int len);
26516+struct dentry *vfsub_lookup_hash(struct nameidata *nd);
2cbb1c4b 26517+int vfsub_name_hash(const char *name, struct qstr *this, int len);
1facf9fc 26518+
26519+/* ---------------------------------------------------------------------- */
26520+
26521+struct au_hinode;
26522+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
26523+ struct dentry *d2, struct au_hinode *hdir2);
26524+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
26525+ struct dentry *d2, struct au_hinode *hdir2);
26526+
26527+int vfsub_create(struct inode *dir, struct path *path, int mode);
26528+int vfsub_symlink(struct inode *dir, struct path *path,
26529+ const char *symname);
26530+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
26531+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
26532+ struct path *path);
26533+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
26534+ struct inode *hdir, struct path *path);
26535+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
26536+int vfsub_rmdir(struct inode *dir, struct path *path);
26537+
26538+/* ---------------------------------------------------------------------- */
26539+
26540+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
26541+ loff_t *ppos);
26542+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
26543+ loff_t *ppos);
26544+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
26545+ loff_t *ppos);
26546+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
26547+ loff_t *ppos);
4a4d8108 26548+int vfsub_flush(struct file *file, fl_owner_t id);
1facf9fc 26549+int vfsub_readdir(struct file *file, filldir_t filldir, void *arg);
26550+
4a4d8108
AM
26551+static inline unsigned int vfsub_file_flags(struct file *file)
26552+{
26553+ unsigned int flags;
26554+
26555+ spin_lock(&file->f_lock);
26556+ flags = file->f_flags;
26557+ spin_unlock(&file->f_lock);
26558+
26559+ return flags;
26560+}
1308ab2a 26561+
1facf9fc 26562+static inline void vfsub_file_accessed(struct file *h_file)
26563+{
26564+ file_accessed(h_file);
26565+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
26566+}
26567+
26568+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
26569+ struct dentry *h_dentry)
26570+{
26571+ struct path h_path = {
26572+ .dentry = h_dentry,
26573+ .mnt = h_mnt
26574+ };
26575+ touch_atime(h_mnt, h_dentry);
26576+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
26577+}
26578+
4a4d8108
AM
26579+long vfsub_splice_to(struct file *in, loff_t *ppos,
26580+ struct pipe_inode_info *pipe, size_t len,
26581+ unsigned int flags);
26582+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
26583+ loff_t *ppos, size_t len, unsigned int flags);
26584+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
26585+ struct file *h_file);
53392da6 26586+int vfsub_fsync(struct file *file, struct path *path, int datasync);
4a4d8108 26587+
1facf9fc 26588+/* ---------------------------------------------------------------------- */
26589+
26590+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
26591+{
26592+ loff_t err;
26593+
2cbb1c4b 26594+ lockdep_off();
1facf9fc 26595+ err = vfs_llseek(file, offset, origin);
2cbb1c4b 26596+ lockdep_on();
1facf9fc 26597+ return err;
26598+}
26599+
26600+/* ---------------------------------------------------------------------- */
26601+
26602+/* dirty workaround for strict type of fmode_t */
26603+union vfsub_fmu {
26604+ fmode_t fm;
26605+ unsigned int ui;
26606+};
26607+
26608+static inline unsigned int vfsub_fmode_to_uint(fmode_t fm)
26609+{
26610+ union vfsub_fmu u = {
26611+ .fm = fm
26612+ };
26613+
26614+ BUILD_BUG_ON(sizeof(u.fm) != sizeof(u.ui));
26615+
26616+ return u.ui;
26617+}
26618+
26619+static inline fmode_t vfsub_uint_to_fmode(unsigned int ui)
26620+{
26621+ union vfsub_fmu u = {
26622+ .ui = ui
26623+ };
26624+
26625+ return u.fm;
26626+}
26627+
4a4d8108
AM
26628+/* ---------------------------------------------------------------------- */
26629+
26630+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
26631+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
26632+int vfsub_sio_notify_change(struct path *path, struct iattr *ia);
26633+int vfsub_notify_change(struct path *path, struct iattr *ia);
26634+int vfsub_unlink(struct inode *dir, struct path *path, int force);
26635+
1facf9fc 26636+#endif /* __KERNEL__ */
26637+#endif /* __AUFS_VFSUB_H__ */
7f207e10
AM
26638diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
26639--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
53392da6 26640+++ linux/fs/aufs/wbr_policy.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 26641@@ -0,0 +1,700 @@
1facf9fc 26642+/*
027c5e7a 26643+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 26644+ *
26645+ * This program, aufs is free software; you can redistribute it and/or modify
26646+ * it under the terms of the GNU General Public License as published by
26647+ * the Free Software Foundation; either version 2 of the License, or
26648+ * (at your option) any later version.
dece6358
AM
26649+ *
26650+ * This program is distributed in the hope that it will be useful,
26651+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26652+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26653+ * GNU General Public License for more details.
26654+ *
26655+ * You should have received a copy of the GNU General Public License
26656+ * along with this program; if not, write to the Free Software
26657+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 26658+ */
26659+
26660+/*
26661+ * policies for selecting one among multiple writable branches
26662+ */
26663+
26664+#include <linux/statfs.h>
26665+#include "aufs.h"
26666+
26667+/* subset of cpup_attr() */
26668+static noinline_for_stack
26669+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
26670+{
26671+ int err, sbits;
26672+ struct iattr ia;
26673+ struct inode *h_isrc;
26674+
26675+ h_isrc = h_src->d_inode;
26676+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
26677+ ia.ia_mode = h_isrc->i_mode;
26678+ ia.ia_uid = h_isrc->i_uid;
26679+ ia.ia_gid = h_isrc->i_gid;
26680+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
26681+ au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc);
26682+ err = vfsub_sio_notify_change(h_path, &ia);
26683+
26684+ /* is this nfs only? */
26685+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
26686+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
26687+ ia.ia_mode = h_isrc->i_mode;
26688+ err = vfsub_sio_notify_change(h_path, &ia);
26689+ }
26690+
26691+ return err;
26692+}
26693+
26694+#define AuCpdown_PARENT_OPQ 1
26695+#define AuCpdown_WHED (1 << 1)
26696+#define AuCpdown_MADE_DIR (1 << 2)
26697+#define AuCpdown_DIROPQ (1 << 3)
26698+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
7f207e10
AM
26699+#define au_fset_cpdown(flags, name) \
26700+ do { (flags) |= AuCpdown_##name; } while (0)
26701+#define au_fclr_cpdown(flags, name) \
26702+ do { (flags) &= ~AuCpdown_##name; } while (0)
1facf9fc 26703+
26704+struct au_cpdown_dir_args {
26705+ struct dentry *parent;
26706+ unsigned int flags;
26707+};
26708+
26709+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
26710+ struct au_cpdown_dir_args *a)
26711+{
26712+ int err;
26713+ struct dentry *opq_dentry;
26714+
26715+ opq_dentry = au_diropq_create(dentry, bdst);
26716+ err = PTR_ERR(opq_dentry);
26717+ if (IS_ERR(opq_dentry))
26718+ goto out;
26719+ dput(opq_dentry);
26720+ au_fset_cpdown(a->flags, DIROPQ);
26721+
4f0767ce 26722+out:
1facf9fc 26723+ return err;
26724+}
26725+
26726+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
26727+ struct inode *dir, aufs_bindex_t bdst)
26728+{
26729+ int err;
26730+ struct path h_path;
26731+ struct au_branch *br;
26732+
26733+ br = au_sbr(dentry->d_sb, bdst);
26734+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
26735+ err = PTR_ERR(h_path.dentry);
26736+ if (IS_ERR(h_path.dentry))
26737+ goto out;
26738+
26739+ err = 0;
26740+ if (h_path.dentry->d_inode) {
26741+ h_path.mnt = br->br_mnt;
26742+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
26743+ dentry);
26744+ }
26745+ dput(h_path.dentry);
26746+
4f0767ce 26747+out:
1facf9fc 26748+ return err;
26749+}
26750+
26751+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
26752+ struct dentry *h_parent, void *arg)
26753+{
26754+ int err, rerr;
4a4d8108 26755+ aufs_bindex_t bopq, bstart;
1facf9fc 26756+ struct path h_path;
26757+ struct dentry *parent;
26758+ struct inode *h_dir, *h_inode, *inode, *dir;
26759+ struct au_cpdown_dir_args *args = arg;
26760+
26761+ bstart = au_dbstart(dentry);
26762+ /* dentry is di-locked */
26763+ parent = dget_parent(dentry);
26764+ dir = parent->d_inode;
26765+ h_dir = h_parent->d_inode;
26766+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
26767+ IMustLock(h_dir);
26768+
26769+ err = au_lkup_neg(dentry, bdst);
26770+ if (unlikely(err < 0))
26771+ goto out;
26772+ h_path.dentry = au_h_dptr(dentry, bdst);
26773+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
26774+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
26775+ S_IRWXU | S_IRUGO | S_IXUGO);
26776+ if (unlikely(err))
26777+ goto out_put;
26778+ au_fset_cpdown(args->flags, MADE_DIR);
26779+
1facf9fc 26780+ bopq = au_dbdiropq(dentry);
26781+ au_fclr_cpdown(args->flags, WHED);
26782+ au_fclr_cpdown(args->flags, DIROPQ);
26783+ if (au_dbwh(dentry) == bdst)
26784+ au_fset_cpdown(args->flags, WHED);
26785+ if (!au_ftest_cpdown(args->flags, PARENT_OPQ) && bopq <= bdst)
26786+ au_fset_cpdown(args->flags, PARENT_OPQ);
1facf9fc 26787+ h_inode = h_path.dentry->d_inode;
26788+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
26789+ if (au_ftest_cpdown(args->flags, WHED)) {
26790+ err = au_cpdown_dir_opq(dentry, bdst, args);
26791+ if (unlikely(err)) {
26792+ mutex_unlock(&h_inode->i_mutex);
26793+ goto out_dir;
26794+ }
26795+ }
26796+
26797+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
26798+ mutex_unlock(&h_inode->i_mutex);
26799+ if (unlikely(err))
26800+ goto out_opq;
26801+
26802+ if (au_ftest_cpdown(args->flags, WHED)) {
26803+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
26804+ if (unlikely(err))
26805+ goto out_opq;
26806+ }
26807+
26808+ inode = dentry->d_inode;
26809+ if (au_ibend(inode) < bdst)
26810+ au_set_ibend(inode, bdst);
26811+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
26812+ au_hi_flags(inode, /*isdir*/1));
26813+ goto out; /* success */
26814+
26815+ /* revert */
4f0767ce 26816+out_opq:
1facf9fc 26817+ if (au_ftest_cpdown(args->flags, DIROPQ)) {
26818+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
26819+ rerr = au_diropq_remove(dentry, bdst);
26820+ mutex_unlock(&h_inode->i_mutex);
26821+ if (unlikely(rerr)) {
26822+ AuIOErr("failed removing diropq for %.*s b%d (%d)\n",
26823+ AuDLNPair(dentry), bdst, rerr);
26824+ err = -EIO;
26825+ goto out;
26826+ }
26827+ }
4f0767ce 26828+out_dir:
1facf9fc 26829+ if (au_ftest_cpdown(args->flags, MADE_DIR)) {
26830+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
26831+ if (unlikely(rerr)) {
26832+ AuIOErr("failed removing %.*s b%d (%d)\n",
26833+ AuDLNPair(dentry), bdst, rerr);
26834+ err = -EIO;
26835+ }
26836+ }
4f0767ce 26837+out_put:
1facf9fc 26838+ au_set_h_dptr(dentry, bdst, NULL);
26839+ if (au_dbend(dentry) == bdst)
26840+ au_update_dbend(dentry);
4f0767ce 26841+out:
1facf9fc 26842+ dput(parent);
26843+ return err;
26844+}
26845+
26846+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
26847+{
26848+ int err;
26849+ struct au_cpdown_dir_args args = {
26850+ .parent = dget_parent(dentry),
26851+ .flags = 0
26852+ };
26853+
26854+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &args);
26855+ dput(args.parent);
26856+
26857+ return err;
26858+}
26859+
26860+/* ---------------------------------------------------------------------- */
26861+
26862+/* policies for create */
26863+
4a4d8108
AM
26864+static int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
26865+{
26866+ int err, i, j, ndentry;
26867+ aufs_bindex_t bopq;
26868+ struct au_dcsub_pages dpages;
26869+ struct au_dpage *dpage;
26870+ struct dentry **dentries, *parent, *d;
26871+
26872+ err = au_dpages_init(&dpages, GFP_NOFS);
26873+ if (unlikely(err))
26874+ goto out;
26875+ parent = dget_parent(dentry);
027c5e7a 26876+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
4a4d8108
AM
26877+ if (unlikely(err))
26878+ goto out_free;
26879+
26880+ err = bindex;
26881+ for (i = 0; i < dpages.ndpage; i++) {
26882+ dpage = dpages.dpages + i;
26883+ dentries = dpage->dentries;
26884+ ndentry = dpage->ndentry;
26885+ for (j = 0; j < ndentry; j++) {
26886+ d = dentries[j];
26887+ di_read_lock_parent2(d, !AuLock_IR);
26888+ bopq = au_dbdiropq(d);
26889+ di_read_unlock(d, !AuLock_IR);
26890+ if (bopq >= 0 && bopq < err)
26891+ err = bopq;
26892+ }
26893+ }
26894+
26895+out_free:
26896+ dput(parent);
26897+ au_dpages_free(&dpages);
26898+out:
26899+ return err;
26900+}
26901+
1facf9fc 26902+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
26903+{
26904+ for (; bindex >= 0; bindex--)
26905+ if (!au_br_rdonly(au_sbr(sb, bindex)))
26906+ return bindex;
26907+ return -EROFS;
26908+}
26909+
26910+/* top down parent */
26911+static int au_wbr_create_tdp(struct dentry *dentry, int isdir __maybe_unused)
26912+{
26913+ int err;
26914+ aufs_bindex_t bstart, bindex;
26915+ struct super_block *sb;
26916+ struct dentry *parent, *h_parent;
26917+
26918+ sb = dentry->d_sb;
26919+ bstart = au_dbstart(dentry);
26920+ err = bstart;
26921+ if (!au_br_rdonly(au_sbr(sb, bstart)))
26922+ goto out;
26923+
26924+ err = -EROFS;
26925+ parent = dget_parent(dentry);
26926+ for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
26927+ h_parent = au_h_dptr(parent, bindex);
26928+ if (!h_parent || !h_parent->d_inode)
26929+ continue;
26930+
26931+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
26932+ err = bindex;
26933+ break;
26934+ }
26935+ }
26936+ dput(parent);
26937+
26938+ /* bottom up here */
4a4d8108 26939+ if (unlikely(err < 0)) {
1facf9fc 26940+ err = au_wbr_bu(sb, bstart - 1);
4a4d8108
AM
26941+ if (err >= 0)
26942+ err = au_wbr_nonopq(dentry, err);
26943+ }
1facf9fc 26944+
4f0767ce 26945+out:
1facf9fc 26946+ AuDbg("b%d\n", err);
26947+ return err;
26948+}
26949+
26950+/* ---------------------------------------------------------------------- */
26951+
26952+/* an exception for the policy other than tdp */
26953+static int au_wbr_create_exp(struct dentry *dentry)
26954+{
26955+ int err;
26956+ aufs_bindex_t bwh, bdiropq;
26957+ struct dentry *parent;
26958+
26959+ err = -1;
26960+ bwh = au_dbwh(dentry);
26961+ parent = dget_parent(dentry);
26962+ bdiropq = au_dbdiropq(parent);
26963+ if (bwh >= 0) {
26964+ if (bdiropq >= 0)
26965+ err = min(bdiropq, bwh);
26966+ else
26967+ err = bwh;
26968+ AuDbg("%d\n", err);
26969+ } else if (bdiropq >= 0) {
26970+ err = bdiropq;
26971+ AuDbg("%d\n", err);
26972+ }
26973+ dput(parent);
26974+
4a4d8108
AM
26975+ if (err >= 0)
26976+ err = au_wbr_nonopq(dentry, err);
26977+
1facf9fc 26978+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
26979+ err = -1;
26980+
26981+ AuDbg("%d\n", err);
26982+ return err;
26983+}
26984+
26985+/* ---------------------------------------------------------------------- */
26986+
26987+/* round robin */
26988+static int au_wbr_create_init_rr(struct super_block *sb)
26989+{
26990+ int err;
26991+
26992+ err = au_wbr_bu(sb, au_sbend(sb));
26993+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 26994+ /* smp_mb(); */
1facf9fc 26995+
26996+ AuDbg("b%d\n", err);
26997+ return err;
26998+}
26999+
27000+static int au_wbr_create_rr(struct dentry *dentry, int isdir)
27001+{
27002+ int err, nbr;
27003+ unsigned int u;
27004+ aufs_bindex_t bindex, bend;
27005+ struct super_block *sb;
27006+ atomic_t *next;
27007+
27008+ err = au_wbr_create_exp(dentry);
27009+ if (err >= 0)
27010+ goto out;
27011+
27012+ sb = dentry->d_sb;
27013+ next = &au_sbi(sb)->si_wbr_rr_next;
27014+ bend = au_sbend(sb);
27015+ nbr = bend + 1;
27016+ for (bindex = 0; bindex <= bend; bindex++) {
27017+ if (!isdir) {
27018+ err = atomic_dec_return(next) + 1;
27019+ /* modulo for 0 is meaningless */
27020+ if (unlikely(!err))
27021+ err = atomic_dec_return(next) + 1;
27022+ } else
27023+ err = atomic_read(next);
27024+ AuDbg("%d\n", err);
27025+ u = err;
27026+ err = u % nbr;
27027+ AuDbg("%d\n", err);
27028+ if (!au_br_rdonly(au_sbr(sb, err)))
27029+ break;
27030+ err = -EROFS;
27031+ }
27032+
4a4d8108
AM
27033+ if (err >= 0)
27034+ err = au_wbr_nonopq(dentry, err);
27035+
4f0767ce 27036+out:
1facf9fc 27037+ AuDbg("%d\n", err);
27038+ return err;
27039+}
27040+
27041+/* ---------------------------------------------------------------------- */
27042+
27043+/* most free space */
27044+static void au_mfs(struct dentry *dentry)
27045+{
27046+ struct super_block *sb;
27047+ struct au_branch *br;
27048+ struct au_wbr_mfs *mfs;
27049+ aufs_bindex_t bindex, bend;
27050+ int err;
27051+ unsigned long long b, bavail;
7f207e10 27052+ struct path h_path;
1facf9fc 27053+ /* reduce the stack usage */
27054+ struct kstatfs *st;
27055+
27056+ st = kmalloc(sizeof(*st), GFP_NOFS);
27057+ if (unlikely(!st)) {
27058+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
27059+ return;
27060+ }
27061+
27062+ bavail = 0;
27063+ sb = dentry->d_sb;
27064+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 27065+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 27066+ mfs->mfs_bindex = -EROFS;
27067+ mfs->mfsrr_bytes = 0;
27068+ bend = au_sbend(sb);
27069+ for (bindex = 0; bindex <= bend; bindex++) {
27070+ br = au_sbr(sb, bindex);
27071+ if (au_br_rdonly(br))
27072+ continue;
27073+
27074+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27075+ h_path.mnt = br->br_mnt;
27076+ h_path.dentry = h_path.mnt->mnt_root;
27077+ err = vfs_statfs(&h_path, st);
1facf9fc 27078+ if (unlikely(err)) {
27079+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
27080+ continue;
27081+ }
27082+
27083+ /* when the available size is equal, select the lower one */
27084+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
27085+ || sizeof(b) < sizeof(st->f_bsize));
27086+ b = st->f_bavail * st->f_bsize;
27087+ br->br_wbr->wbr_bytes = b;
27088+ if (b >= bavail) {
27089+ bavail = b;
27090+ mfs->mfs_bindex = bindex;
27091+ mfs->mfs_jiffy = jiffies;
27092+ }
27093+ }
27094+
27095+ mfs->mfsrr_bytes = bavail;
27096+ AuDbg("b%d\n", mfs->mfs_bindex);
27097+ kfree(st);
27098+}
27099+
27100+static int au_wbr_create_mfs(struct dentry *dentry, int isdir __maybe_unused)
27101+{
27102+ int err;
27103+ struct super_block *sb;
27104+ struct au_wbr_mfs *mfs;
27105+
27106+ err = au_wbr_create_exp(dentry);
27107+ if (err >= 0)
27108+ goto out;
27109+
27110+ sb = dentry->d_sb;
27111+ mfs = &au_sbi(sb)->si_wbr_mfs;
27112+ mutex_lock(&mfs->mfs_lock);
27113+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
27114+ || mfs->mfs_bindex < 0
27115+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
27116+ au_mfs(dentry);
27117+ mutex_unlock(&mfs->mfs_lock);
27118+ err = mfs->mfs_bindex;
27119+
4a4d8108
AM
27120+ if (err >= 0)
27121+ err = au_wbr_nonopq(dentry, err);
27122+
4f0767ce 27123+out:
1facf9fc 27124+ AuDbg("b%d\n", err);
27125+ return err;
27126+}
27127+
27128+static int au_wbr_create_init_mfs(struct super_block *sb)
27129+{
27130+ struct au_wbr_mfs *mfs;
27131+
27132+ mfs = &au_sbi(sb)->si_wbr_mfs;
27133+ mutex_init(&mfs->mfs_lock);
27134+ mfs->mfs_jiffy = 0;
27135+ mfs->mfs_bindex = -EROFS;
27136+
27137+ return 0;
27138+}
27139+
27140+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
27141+{
27142+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
27143+ return 0;
27144+}
27145+
27146+/* ---------------------------------------------------------------------- */
27147+
27148+/* most free space and then round robin */
27149+static int au_wbr_create_mfsrr(struct dentry *dentry, int isdir)
27150+{
27151+ int err;
27152+ struct au_wbr_mfs *mfs;
27153+
27154+ err = au_wbr_create_mfs(dentry, isdir);
27155+ if (err >= 0) {
27156+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 27157+ mutex_lock(&mfs->mfs_lock);
1facf9fc 27158+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
27159+ err = au_wbr_create_rr(dentry, isdir);
dece6358 27160+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 27161+ }
27162+
27163+ AuDbg("b%d\n", err);
27164+ return err;
27165+}
27166+
27167+static int au_wbr_create_init_mfsrr(struct super_block *sb)
27168+{
27169+ int err;
27170+
27171+ au_wbr_create_init_mfs(sb); /* ignore */
27172+ err = au_wbr_create_init_rr(sb);
27173+
27174+ return err;
27175+}
27176+
27177+/* ---------------------------------------------------------------------- */
27178+
27179+/* top down parent and most free space */
27180+static int au_wbr_create_pmfs(struct dentry *dentry, int isdir)
27181+{
27182+ int err, e2;
27183+ unsigned long long b;
27184+ aufs_bindex_t bindex, bstart, bend;
27185+ struct super_block *sb;
27186+ struct dentry *parent, *h_parent;
27187+ struct au_branch *br;
27188+
27189+ err = au_wbr_create_tdp(dentry, isdir);
27190+ if (unlikely(err < 0))
27191+ goto out;
27192+ parent = dget_parent(dentry);
27193+ bstart = au_dbstart(parent);
27194+ bend = au_dbtaildir(parent);
27195+ if (bstart == bend)
27196+ goto out_parent; /* success */
27197+
27198+ e2 = au_wbr_create_mfs(dentry, isdir);
27199+ if (e2 < 0)
27200+ goto out_parent; /* success */
27201+
27202+ /* when the available size is equal, select upper one */
27203+ sb = dentry->d_sb;
27204+ br = au_sbr(sb, err);
27205+ b = br->br_wbr->wbr_bytes;
27206+ AuDbg("b%d, %llu\n", err, b);
27207+
27208+ for (bindex = bstart; bindex <= bend; bindex++) {
27209+ h_parent = au_h_dptr(parent, bindex);
27210+ if (!h_parent || !h_parent->d_inode)
27211+ continue;
27212+
27213+ br = au_sbr(sb, bindex);
27214+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
27215+ b = br->br_wbr->wbr_bytes;
27216+ err = bindex;
27217+ AuDbg("b%d, %llu\n", err, b);
27218+ }
27219+ }
27220+
4a4d8108
AM
27221+ if (err >= 0)
27222+ err = au_wbr_nonopq(dentry, err);
27223+
4f0767ce 27224+out_parent:
1facf9fc 27225+ dput(parent);
4f0767ce 27226+out:
1facf9fc 27227+ AuDbg("b%d\n", err);
27228+ return err;
27229+}
27230+
27231+/* ---------------------------------------------------------------------- */
27232+
27233+/* policies for copyup */
27234+
27235+/* top down parent */
27236+static int au_wbr_copyup_tdp(struct dentry *dentry)
27237+{
27238+ return au_wbr_create_tdp(dentry, /*isdir, anything is ok*/0);
27239+}
27240+
27241+/* bottom up parent */
27242+static int au_wbr_copyup_bup(struct dentry *dentry)
27243+{
27244+ int err;
27245+ aufs_bindex_t bindex, bstart;
27246+ struct dentry *parent, *h_parent;
27247+ struct super_block *sb;
27248+
27249+ err = -EROFS;
27250+ sb = dentry->d_sb;
27251+ parent = dget_parent(dentry);
27252+ bstart = au_dbstart(parent);
27253+ for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
27254+ h_parent = au_h_dptr(parent, bindex);
27255+ if (!h_parent || !h_parent->d_inode)
27256+ continue;
27257+
27258+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
27259+ err = bindex;
27260+ break;
27261+ }
27262+ }
27263+ dput(parent);
27264+
27265+ /* bottom up here */
27266+ if (unlikely(err < 0))
27267+ err = au_wbr_bu(sb, bstart - 1);
27268+
27269+ AuDbg("b%d\n", err);
27270+ return err;
27271+}
27272+
27273+/* bottom up */
27274+static int au_wbr_copyup_bu(struct dentry *dentry)
27275+{
27276+ int err;
4a4d8108 27277+ aufs_bindex_t bstart;
1facf9fc 27278+
4a4d8108
AM
27279+ bstart = au_dbstart(dentry);
27280+ err = au_wbr_bu(dentry->d_sb, bstart);
27281+ AuDbg("b%d\n", err);
27282+ if (err > bstart)
27283+ err = au_wbr_nonopq(dentry, err);
1facf9fc 27284+
27285+ AuDbg("b%d\n", err);
27286+ return err;
27287+}
27288+
27289+/* ---------------------------------------------------------------------- */
27290+
27291+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
27292+ [AuWbrCopyup_TDP] = {
27293+ .copyup = au_wbr_copyup_tdp
27294+ },
27295+ [AuWbrCopyup_BUP] = {
27296+ .copyup = au_wbr_copyup_bup
27297+ },
27298+ [AuWbrCopyup_BU] = {
27299+ .copyup = au_wbr_copyup_bu
27300+ }
27301+};
27302+
27303+struct au_wbr_create_operations au_wbr_create_ops[] = {
27304+ [AuWbrCreate_TDP] = {
27305+ .create = au_wbr_create_tdp
27306+ },
27307+ [AuWbrCreate_RR] = {
27308+ .create = au_wbr_create_rr,
27309+ .init = au_wbr_create_init_rr
27310+ },
27311+ [AuWbrCreate_MFS] = {
27312+ .create = au_wbr_create_mfs,
27313+ .init = au_wbr_create_init_mfs,
27314+ .fin = au_wbr_create_fin_mfs
27315+ },
27316+ [AuWbrCreate_MFSV] = {
27317+ .create = au_wbr_create_mfs,
27318+ .init = au_wbr_create_init_mfs,
27319+ .fin = au_wbr_create_fin_mfs
27320+ },
27321+ [AuWbrCreate_MFSRR] = {
27322+ .create = au_wbr_create_mfsrr,
27323+ .init = au_wbr_create_init_mfsrr,
27324+ .fin = au_wbr_create_fin_mfs
27325+ },
27326+ [AuWbrCreate_MFSRRV] = {
27327+ .create = au_wbr_create_mfsrr,
27328+ .init = au_wbr_create_init_mfsrr,
27329+ .fin = au_wbr_create_fin_mfs
27330+ },
27331+ [AuWbrCreate_PMFS] = {
27332+ .create = au_wbr_create_pmfs,
27333+ .init = au_wbr_create_init_mfs,
27334+ .fin = au_wbr_create_fin_mfs
27335+ },
27336+ [AuWbrCreate_PMFSV] = {
27337+ .create = au_wbr_create_pmfs,
27338+ .init = au_wbr_create_init_mfs,
27339+ .fin = au_wbr_create_fin_mfs
27340+ }
27341+};
7f207e10
AM
27342diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
27343--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
53392da6 27344+++ linux/fs/aufs/whout.c 2011-08-24 13:30:24.734646739 +0200
027c5e7a 27345@@ -0,0 +1,1062 @@
1facf9fc 27346+/*
027c5e7a 27347+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 27348+ *
27349+ * This program, aufs is free software; you can redistribute it and/or modify
27350+ * it under the terms of the GNU General Public License as published by
27351+ * the Free Software Foundation; either version 2 of the License, or
27352+ * (at your option) any later version.
dece6358
AM
27353+ *
27354+ * This program is distributed in the hope that it will be useful,
27355+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27356+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27357+ * GNU General Public License for more details.
27358+ *
27359+ * You should have received a copy of the GNU General Public License
27360+ * along with this program; if not, write to the Free Software
27361+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 27362+ */
27363+
27364+/*
27365+ * whiteout for logical deletion and opaque directory
27366+ */
27367+
27368+#include <linux/fs.h>
27369+#include "aufs.h"
27370+
27371+#define WH_MASK S_IRUGO
27372+
27373+/*
27374+ * If a directory contains this file, then it is opaque. We start with the
27375+ * .wh. flag so that it is blocked by lookup.
27376+ */
27377+static struct qstr diropq_name = {
27378+ .name = AUFS_WH_DIROPQ,
27379+ .len = sizeof(AUFS_WH_DIROPQ) - 1
27380+};
27381+
27382+/*
27383+ * generate whiteout name, which is NOT terminated by NULL.
27384+ * @name: original d_name.name
27385+ * @len: original d_name.len
27386+ * @wh: whiteout qstr
27387+ * returns zero when succeeds, otherwise error.
27388+ * succeeded value as wh->name should be freed by kfree().
27389+ */
27390+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
27391+{
27392+ char *p;
27393+
27394+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
27395+ return -ENAMETOOLONG;
27396+
27397+ wh->len = name->len + AUFS_WH_PFX_LEN;
27398+ p = kmalloc(wh->len, GFP_NOFS);
27399+ wh->name = p;
27400+ if (p) {
27401+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
27402+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
27403+ /* smp_mb(); */
27404+ return 0;
27405+ }
27406+ return -ENOMEM;
27407+}
27408+
27409+/* ---------------------------------------------------------------------- */
27410+
27411+/*
27412+ * test if the @wh_name exists under @h_parent.
27413+ * @try_sio specifies the necessary of super-io.
27414+ */
27415+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
27416+ struct au_branch *br, int try_sio)
27417+{
27418+ int err;
27419+ struct dentry *wh_dentry;
1facf9fc 27420+
1facf9fc 27421+ if (!try_sio)
27422+ wh_dentry = au_lkup_one(wh_name, h_parent, br, /*nd*/NULL);
27423+ else
27424+ wh_dentry = au_sio_lkup_one(wh_name, h_parent, br);
27425+ err = PTR_ERR(wh_dentry);
27426+ if (IS_ERR(wh_dentry))
27427+ goto out;
27428+
27429+ err = 0;
27430+ if (!wh_dentry->d_inode)
27431+ goto out_wh; /* success */
27432+
27433+ err = 1;
27434+ if (S_ISREG(wh_dentry->d_inode->i_mode))
27435+ goto out_wh; /* success */
27436+
27437+ err = -EIO;
27438+ AuIOErr("%.*s Invalid whiteout entry type 0%o.\n",
27439+ AuDLNPair(wh_dentry), wh_dentry->d_inode->i_mode);
27440+
4f0767ce 27441+out_wh:
1facf9fc 27442+ dput(wh_dentry);
4f0767ce 27443+out:
1facf9fc 27444+ return err;
27445+}
27446+
27447+/*
27448+ * test if the @h_dentry sets opaque or not.
27449+ */
27450+int au_diropq_test(struct dentry *h_dentry, struct au_branch *br)
27451+{
27452+ int err;
27453+ struct inode *h_dir;
27454+
27455+ h_dir = h_dentry->d_inode;
27456+ err = au_wh_test(h_dentry, &diropq_name, br,
27457+ au_test_h_perm_sio(h_dir, MAY_EXEC));
27458+ return err;
27459+}
27460+
27461+/*
27462+ * returns a negative dentry whose name is unique and temporary.
27463+ */
27464+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
27465+ struct qstr *prefix)
27466+{
1facf9fc 27467+ struct dentry *dentry;
27468+ int i;
027c5e7a 27469+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
4a4d8108 27470+ *name, *p;
027c5e7a 27471+ /* strict atomic_t is unnecessary here */
1facf9fc 27472+ static unsigned short cnt;
27473+ struct qstr qs;
27474+
4a4d8108
AM
27475+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
27476+
1facf9fc 27477+ name = defname;
027c5e7a
AM
27478+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
27479+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
1facf9fc 27480+ dentry = ERR_PTR(-ENAMETOOLONG);
4a4d8108 27481+ if (unlikely(qs.len > NAME_MAX))
1facf9fc 27482+ goto out;
27483+ dentry = ERR_PTR(-ENOMEM);
27484+ name = kmalloc(qs.len + 1, GFP_NOFS);
27485+ if (unlikely(!name))
27486+ goto out;
27487+ }
27488+
27489+ /* doubly whiteout-ed */
27490+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
27491+ p = name + AUFS_WH_PFX_LEN * 2;
27492+ memcpy(p, prefix->name, prefix->len);
27493+ p += prefix->len;
27494+ *p++ = '.';
4a4d8108 27495+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
1facf9fc 27496+
27497+ qs.name = name;
27498+ for (i = 0; i < 3; i++) {
b752ccd1 27499+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
1facf9fc 27500+ dentry = au_sio_lkup_one(&qs, h_parent, br);
27501+ if (IS_ERR(dentry) || !dentry->d_inode)
27502+ goto out_name;
27503+ dput(dentry);
27504+ }
4a4d8108 27505+ /* pr_warning("could not get random name\n"); */
1facf9fc 27506+ dentry = ERR_PTR(-EEXIST);
27507+ AuDbg("%.*s\n", AuLNPair(&qs));
27508+ BUG();
27509+
4f0767ce 27510+out_name:
1facf9fc 27511+ if (name != defname)
27512+ kfree(name);
4f0767ce 27513+out:
4a4d8108 27514+ AuTraceErrPtr(dentry);
1facf9fc 27515+ return dentry;
1facf9fc 27516+}
27517+
27518+/*
27519+ * rename the @h_dentry on @br to the whiteouted temporary name.
27520+ */
27521+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
27522+{
27523+ int err;
27524+ struct path h_path = {
27525+ .mnt = br->br_mnt
27526+ };
27527+ struct inode *h_dir;
27528+ struct dentry *h_parent;
27529+
27530+ h_parent = h_dentry->d_parent; /* dir inode is locked */
27531+ h_dir = h_parent->d_inode;
27532+ IMustLock(h_dir);
27533+
27534+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
27535+ err = PTR_ERR(h_path.dentry);
27536+ if (IS_ERR(h_path.dentry))
27537+ goto out;
27538+
27539+ /* under the same dir, no need to lock_rename() */
27540+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path);
27541+ AuTraceErr(err);
27542+ dput(h_path.dentry);
27543+
4f0767ce 27544+out:
4a4d8108 27545+ AuTraceErr(err);
1facf9fc 27546+ return err;
27547+}
27548+
27549+/* ---------------------------------------------------------------------- */
27550+/*
27551+ * functions for removing a whiteout
27552+ */
27553+
27554+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
27555+{
27556+ int force;
27557+
27558+ /*
27559+ * forces superio when the dir has a sticky bit.
27560+ * this may be a violation of unix fs semantics.
27561+ */
27562+ force = (h_dir->i_mode & S_ISVTX)
27563+ && h_path->dentry->d_inode->i_uid != current_fsuid();
27564+ return vfsub_unlink(h_dir, h_path, force);
27565+}
27566+
27567+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
27568+ struct dentry *dentry)
27569+{
27570+ int err;
27571+
27572+ err = do_unlink_wh(h_dir, h_path);
27573+ if (!err && dentry)
27574+ au_set_dbwh(dentry, -1);
27575+
27576+ return err;
27577+}
27578+
27579+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
27580+ struct au_branch *br)
27581+{
27582+ int err;
27583+ struct path h_path = {
27584+ .mnt = br->br_mnt
27585+ };
27586+
27587+ err = 0;
27588+ h_path.dentry = au_lkup_one(wh, h_parent, br, /*nd*/NULL);
27589+ if (IS_ERR(h_path.dentry))
27590+ err = PTR_ERR(h_path.dentry);
27591+ else {
27592+ if (h_path.dentry->d_inode
27593+ && S_ISREG(h_path.dentry->d_inode->i_mode))
27594+ err = do_unlink_wh(h_parent->d_inode, &h_path);
27595+ dput(h_path.dentry);
27596+ }
27597+
27598+ return err;
27599+}
27600+
27601+/* ---------------------------------------------------------------------- */
27602+/*
27603+ * initialize/clean whiteout for a branch
27604+ */
27605+
27606+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
27607+ const int isdir)
27608+{
27609+ int err;
27610+
27611+ if (!whpath->dentry->d_inode)
27612+ return;
27613+
27614+ err = mnt_want_write(whpath->mnt);
27615+ if (!err) {
27616+ if (isdir)
27617+ err = vfsub_rmdir(h_dir, whpath);
27618+ else
27619+ err = vfsub_unlink(h_dir, whpath, /*force*/0);
27620+ mnt_drop_write(whpath->mnt);
27621+ }
27622+ if (unlikely(err))
4a4d8108
AM
27623+ pr_warning("failed removing %.*s (%d), ignored.\n",
27624+ AuDLNPair(whpath->dentry), err);
1facf9fc 27625+}
27626+
27627+static int test_linkable(struct dentry *h_root)
27628+{
27629+ struct inode *h_dir = h_root->d_inode;
27630+
27631+ if (h_dir->i_op->link)
27632+ return 0;
27633+
4a4d8108
AM
27634+ pr_err("%.*s (%s) doesn't support link(2), use noplink and rw+nolwh\n",
27635+ AuDLNPair(h_root), au_sbtype(h_root->d_sb));
1facf9fc 27636+ return -ENOSYS;
27637+}
27638+
27639+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
27640+static int au_whdir(struct inode *h_dir, struct path *path)
27641+{
27642+ int err;
27643+
27644+ err = -EEXIST;
27645+ if (!path->dentry->d_inode) {
27646+ int mode = S_IRWXU;
27647+
27648+ if (au_test_nfs(path->dentry->d_sb))
27649+ mode |= S_IXUGO;
27650+ err = mnt_want_write(path->mnt);
27651+ if (!err) {
27652+ err = vfsub_mkdir(h_dir, path, mode);
27653+ mnt_drop_write(path->mnt);
27654+ }
27655+ } else if (S_ISDIR(path->dentry->d_inode->i_mode))
27656+ err = 0;
27657+ else
4a4d8108 27658+ pr_err("unknown %.*s exists\n", AuDLNPair(path->dentry));
1facf9fc 27659+
27660+ return err;
27661+}
27662+
27663+struct au_wh_base {
27664+ const struct qstr *name;
27665+ struct dentry *dentry;
27666+};
27667+
27668+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
27669+ struct path *h_path)
27670+{
27671+ h_path->dentry = base[AuBrWh_BASE].dentry;
27672+ au_wh_clean(h_dir, h_path, /*isdir*/0);
27673+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27674+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27675+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27676+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27677+}
27678+
27679+/*
27680+ * returns tri-state,
27681+ * minus: error, caller should print the mesage
27682+ * zero: succuess
27683+ * plus: error, caller should NOT print the mesage
27684+ */
27685+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
27686+ int do_plink, struct au_wh_base base[],
27687+ struct path *h_path)
27688+{
27689+ int err;
27690+ struct inode *h_dir;
27691+
27692+ h_dir = h_root->d_inode;
27693+ h_path->dentry = base[AuBrWh_BASE].dentry;
27694+ au_wh_clean(h_dir, h_path, /*isdir*/0);
27695+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27696+ if (do_plink) {
27697+ err = test_linkable(h_root);
27698+ if (unlikely(err)) {
27699+ err = 1;
27700+ goto out;
27701+ }
27702+
27703+ err = au_whdir(h_dir, h_path);
27704+ if (unlikely(err))
27705+ goto out;
27706+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
27707+ } else
27708+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27709+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27710+ err = au_whdir(h_dir, h_path);
27711+ if (unlikely(err))
27712+ goto out;
27713+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
27714+
4f0767ce 27715+out:
1facf9fc 27716+ return err;
27717+}
27718+
27719+/*
27720+ * for the moment, aufs supports the branch filesystem which does not support
27721+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
27722+ * copyup failed. finally, such filesystem will not be used as the writable
27723+ * branch.
27724+ *
27725+ * returns tri-state, see above.
27726+ */
27727+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
27728+ int do_plink, struct au_wh_base base[],
27729+ struct path *h_path)
27730+{
27731+ int err;
27732+ struct inode *h_dir;
27733+
1308ab2a 27734+ WbrWhMustWriteLock(wbr);
27735+
1facf9fc 27736+ err = test_linkable(h_root);
27737+ if (unlikely(err)) {
27738+ err = 1;
27739+ goto out;
27740+ }
27741+
27742+ /*
27743+ * todo: should this create be done in /sbin/mount.aufs helper?
27744+ */
27745+ err = -EEXIST;
27746+ h_dir = h_root->d_inode;
27747+ if (!base[AuBrWh_BASE].dentry->d_inode) {
27748+ err = mnt_want_write(h_path->mnt);
27749+ if (!err) {
27750+ h_path->dentry = base[AuBrWh_BASE].dentry;
27751+ err = vfsub_create(h_dir, h_path, WH_MASK);
27752+ mnt_drop_write(h_path->mnt);
27753+ }
27754+ } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
27755+ err = 0;
27756+ else
4a4d8108
AM
27757+ pr_err("unknown %.*s/%.*s exists\n",
27758+ AuDLNPair(h_root), AuDLNPair(base[AuBrWh_BASE].dentry));
1facf9fc 27759+ if (unlikely(err))
27760+ goto out;
27761+
27762+ h_path->dentry = base[AuBrWh_PLINK].dentry;
27763+ if (do_plink) {
27764+ err = au_whdir(h_dir, h_path);
27765+ if (unlikely(err))
27766+ goto out;
27767+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
27768+ } else
27769+ au_wh_clean(h_dir, h_path, /*isdir*/1);
27770+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
27771+
27772+ h_path->dentry = base[AuBrWh_ORPH].dentry;
27773+ err = au_whdir(h_dir, h_path);
27774+ if (unlikely(err))
27775+ goto out;
27776+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
27777+
4f0767ce 27778+out:
1facf9fc 27779+ return err;
27780+}
27781+
27782+/*
27783+ * initialize the whiteout base file/dir for @br.
27784+ */
27785+int au_wh_init(struct dentry *h_root, struct au_branch *br,
27786+ struct super_block *sb)
27787+{
27788+ int err, i;
27789+ const unsigned char do_plink
27790+ = !!au_opt_test(au_mntflags(sb), PLINK);
27791+ struct path path = {
27792+ .mnt = br->br_mnt
27793+ };
27794+ struct inode *h_dir;
27795+ struct au_wbr *wbr = br->br_wbr;
27796+ static const struct qstr base_name[] = {
27797+ [AuBrWh_BASE] = {
27798+ .name = AUFS_BASE_NAME,
27799+ .len = sizeof(AUFS_BASE_NAME) - 1
27800+ },
27801+ [AuBrWh_PLINK] = {
27802+ .name = AUFS_PLINKDIR_NAME,
27803+ .len = sizeof(AUFS_PLINKDIR_NAME) - 1
27804+ },
27805+ [AuBrWh_ORPH] = {
27806+ .name = AUFS_ORPHDIR_NAME,
27807+ .len = sizeof(AUFS_ORPHDIR_NAME) - 1
27808+ }
27809+ };
27810+ struct au_wh_base base[] = {
27811+ [AuBrWh_BASE] = {
27812+ .name = base_name + AuBrWh_BASE,
27813+ .dentry = NULL
27814+ },
27815+ [AuBrWh_PLINK] = {
27816+ .name = base_name + AuBrWh_PLINK,
27817+ .dentry = NULL
27818+ },
27819+ [AuBrWh_ORPH] = {
27820+ .name = base_name + AuBrWh_ORPH,
27821+ .dentry = NULL
27822+ }
27823+ };
27824+
1308ab2a 27825+ if (wbr)
27826+ WbrWhMustWriteLock(wbr);
1facf9fc 27827+
1facf9fc 27828+ for (i = 0; i < AuBrWh_Last; i++) {
27829+ /* doubly whiteouted */
27830+ struct dentry *d;
27831+
27832+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
27833+ err = PTR_ERR(d);
27834+ if (IS_ERR(d))
27835+ goto out;
27836+
27837+ base[i].dentry = d;
27838+ AuDebugOn(wbr
27839+ && wbr->wbr_wh[i]
27840+ && wbr->wbr_wh[i] != base[i].dentry);
27841+ }
27842+
27843+ if (wbr)
27844+ for (i = 0; i < AuBrWh_Last; i++) {
27845+ dput(wbr->wbr_wh[i]);
27846+ wbr->wbr_wh[i] = NULL;
27847+ }
27848+
27849+ err = 0;
1facf9fc 27850+ switch (br->br_perm) {
27851+ case AuBrPerm_RO:
27852+ case AuBrPerm_ROWH:
27853+ case AuBrPerm_RR:
27854+ case AuBrPerm_RRWH:
4a4d8108 27855+ h_dir = h_root->d_inode;
1facf9fc 27856+ au_wh_init_ro(h_dir, base, &path);
27857+ break;
27858+
27859+ case AuBrPerm_RWNoLinkWH:
27860+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
27861+ if (err > 0)
27862+ goto out;
27863+ else if (err)
27864+ goto out_err;
27865+ break;
27866+
27867+ case AuBrPerm_RW:
27868+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
27869+ if (err > 0)
27870+ goto out;
27871+ else if (err)
27872+ goto out_err;
27873+ break;
27874+
27875+ default:
27876+ BUG();
27877+ }
27878+ goto out; /* success */
27879+
4f0767ce 27880+out_err:
4a4d8108
AM
27881+ pr_err("an error(%d) on the writable branch %.*s(%s)\n",
27882+ err, AuDLNPair(h_root), au_sbtype(h_root->d_sb));
4f0767ce 27883+out:
1facf9fc 27884+ for (i = 0; i < AuBrWh_Last; i++)
27885+ dput(base[i].dentry);
27886+ return err;
27887+}
27888+
27889+/* ---------------------------------------------------------------------- */
27890+/*
27891+ * whiteouts are all hard-linked usually.
27892+ * when its link count reaches a ceiling, we create a new whiteout base
27893+ * asynchronously.
27894+ */
27895+
27896+struct reinit_br_wh {
27897+ struct super_block *sb;
27898+ struct au_branch *br;
27899+};
27900+
27901+static void reinit_br_wh(void *arg)
27902+{
27903+ int err;
27904+ aufs_bindex_t bindex;
27905+ struct path h_path;
27906+ struct reinit_br_wh *a = arg;
27907+ struct au_wbr *wbr;
27908+ struct inode *dir;
27909+ struct dentry *h_root;
27910+ struct au_hinode *hdir;
27911+
27912+ err = 0;
27913+ wbr = a->br->br_wbr;
27914+ /* big aufs lock */
27915+ si_noflush_write_lock(a->sb);
27916+ if (!au_br_writable(a->br->br_perm))
27917+ goto out;
27918+ bindex = au_br_index(a->sb, a->br->br_id);
27919+ if (unlikely(bindex < 0))
27920+ goto out;
27921+
1308ab2a 27922+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
1facf9fc 27923+ dir = a->sb->s_root->d_inode;
1facf9fc 27924+ hdir = au_hi(dir, bindex);
27925+ h_root = au_h_dptr(a->sb->s_root, bindex);
27926+
4a4d8108 27927+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 27928+ wbr_wh_write_lock(wbr);
27929+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
27930+ h_root, a->br);
27931+ if (!err) {
27932+ err = mnt_want_write(a->br->br_mnt);
27933+ if (!err) {
27934+ h_path.dentry = wbr->wbr_whbase;
27935+ h_path.mnt = a->br->br_mnt;
27936+ err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0);
27937+ mnt_drop_write(a->br->br_mnt);
27938+ }
27939+ } else {
4a4d8108
AM
27940+ pr_warning("%.*s is moved, ignored\n",
27941+ AuDLNPair(wbr->wbr_whbase));
1facf9fc 27942+ err = 0;
27943+ }
27944+ dput(wbr->wbr_whbase);
27945+ wbr->wbr_whbase = NULL;
27946+ if (!err)
27947+ err = au_wh_init(h_root, a->br, a->sb);
27948+ wbr_wh_write_unlock(wbr);
4a4d8108 27949+ au_hn_imtx_unlock(hdir);
1308ab2a 27950+ di_read_unlock(a->sb->s_root, AuLock_IR);
1facf9fc 27951+
4f0767ce 27952+out:
1facf9fc 27953+ if (wbr)
27954+ atomic_dec(&wbr->wbr_wh_running);
27955+ atomic_dec(&a->br->br_count);
1facf9fc 27956+ si_write_unlock(a->sb);
027c5e7a 27957+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
1facf9fc 27958+ kfree(arg);
27959+ if (unlikely(err))
27960+ AuIOErr("err %d\n", err);
27961+}
27962+
27963+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
27964+{
27965+ int do_dec, wkq_err;
27966+ struct reinit_br_wh *arg;
27967+
27968+ do_dec = 1;
27969+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
27970+ goto out;
27971+
27972+ /* ignore ENOMEM */
27973+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
27974+ if (arg) {
27975+ /*
27976+ * dec(wh_running), kfree(arg) and dec(br_count)
27977+ * in reinit function
27978+ */
27979+ arg->sb = sb;
27980+ arg->br = br;
27981+ atomic_inc(&br->br_count);
53392da6 27982+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
1facf9fc 27983+ if (unlikely(wkq_err)) {
27984+ atomic_dec(&br->br_wbr->wbr_wh_running);
27985+ atomic_dec(&br->br_count);
27986+ kfree(arg);
27987+ }
27988+ do_dec = 0;
27989+ }
27990+
4f0767ce 27991+out:
1facf9fc 27992+ if (do_dec)
27993+ atomic_dec(&br->br_wbr->wbr_wh_running);
27994+}
27995+
27996+/* ---------------------------------------------------------------------- */
27997+
27998+/*
27999+ * create the whiteout @wh.
28000+ */
28001+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
28002+ struct dentry *wh)
28003+{
28004+ int err;
28005+ struct path h_path = {
28006+ .dentry = wh
28007+ };
28008+ struct au_branch *br;
28009+ struct au_wbr *wbr;
28010+ struct dentry *h_parent;
28011+ struct inode *h_dir;
28012+
28013+ h_parent = wh->d_parent; /* dir inode is locked */
28014+ h_dir = h_parent->d_inode;
28015+ IMustLock(h_dir);
28016+
28017+ br = au_sbr(sb, bindex);
28018+ h_path.mnt = br->br_mnt;
28019+ wbr = br->br_wbr;
28020+ wbr_wh_read_lock(wbr);
28021+ if (wbr->wbr_whbase) {
28022+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path);
28023+ if (!err || err != -EMLINK)
28024+ goto out;
28025+
28026+ /* link count full. re-initialize br_whbase. */
28027+ kick_reinit_br_wh(sb, br);
28028+ }
28029+
28030+ /* return this error in this context */
28031+ err = vfsub_create(h_dir, &h_path, WH_MASK);
28032+
4f0767ce 28033+out:
1facf9fc 28034+ wbr_wh_read_unlock(wbr);
28035+ return err;
28036+}
28037+
28038+/* ---------------------------------------------------------------------- */
28039+
28040+/*
28041+ * create or remove the diropq.
28042+ */
28043+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
28044+ unsigned int flags)
28045+{
28046+ struct dentry *opq_dentry, *h_dentry;
28047+ struct super_block *sb;
28048+ struct au_branch *br;
28049+ int err;
28050+
28051+ sb = dentry->d_sb;
28052+ br = au_sbr(sb, bindex);
28053+ h_dentry = au_h_dptr(dentry, bindex);
28054+ opq_dentry = au_lkup_one(&diropq_name, h_dentry, br, /*nd*/NULL);
28055+ if (IS_ERR(opq_dentry))
28056+ goto out;
28057+
28058+ if (au_ftest_diropq(flags, CREATE)) {
28059+ err = link_or_create_wh(sb, bindex, opq_dentry);
28060+ if (!err) {
28061+ au_set_dbdiropq(dentry, bindex);
28062+ goto out; /* success */
28063+ }
28064+ } else {
28065+ struct path tmp = {
28066+ .dentry = opq_dentry,
28067+ .mnt = br->br_mnt
28068+ };
28069+ err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
28070+ if (!err)
28071+ au_set_dbdiropq(dentry, -1);
28072+ }
28073+ dput(opq_dentry);
28074+ opq_dentry = ERR_PTR(err);
28075+
4f0767ce 28076+out:
1facf9fc 28077+ return opq_dentry;
28078+}
28079+
28080+struct do_diropq_args {
28081+ struct dentry **errp;
28082+ struct dentry *dentry;
28083+ aufs_bindex_t bindex;
28084+ unsigned int flags;
28085+};
28086+
28087+static void call_do_diropq(void *args)
28088+{
28089+ struct do_diropq_args *a = args;
28090+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
28091+}
28092+
28093+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
28094+ unsigned int flags)
28095+{
28096+ struct dentry *diropq, *h_dentry;
28097+
28098+ h_dentry = au_h_dptr(dentry, bindex);
28099+ if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
28100+ diropq = do_diropq(dentry, bindex, flags);
28101+ else {
28102+ int wkq_err;
28103+ struct do_diropq_args args = {
28104+ .errp = &diropq,
28105+ .dentry = dentry,
28106+ .bindex = bindex,
28107+ .flags = flags
28108+ };
28109+
28110+ wkq_err = au_wkq_wait(call_do_diropq, &args);
28111+ if (unlikely(wkq_err))
28112+ diropq = ERR_PTR(wkq_err);
28113+ }
28114+
28115+ return diropq;
28116+}
28117+
28118+/* ---------------------------------------------------------------------- */
28119+
28120+/*
28121+ * lookup whiteout dentry.
28122+ * @h_parent: lower parent dentry which must exist and be locked
28123+ * @base_name: name of dentry which will be whiteouted
28124+ * returns dentry for whiteout.
28125+ */
28126+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
28127+ struct au_branch *br)
28128+{
28129+ int err;
28130+ struct qstr wh_name;
28131+ struct dentry *wh_dentry;
28132+
28133+ err = au_wh_name_alloc(&wh_name, base_name);
28134+ wh_dentry = ERR_PTR(err);
28135+ if (!err) {
28136+ wh_dentry = au_lkup_one(&wh_name, h_parent, br, /*nd*/NULL);
28137+ kfree(wh_name.name);
28138+ }
28139+ return wh_dentry;
28140+}
28141+
28142+/*
28143+ * link/create a whiteout for @dentry on @bindex.
28144+ */
28145+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
28146+ struct dentry *h_parent)
28147+{
28148+ struct dentry *wh_dentry;
28149+ struct super_block *sb;
28150+ int err;
28151+
28152+ sb = dentry->d_sb;
28153+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
28154+ if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
28155+ err = link_or_create_wh(sb, bindex, wh_dentry);
28156+ if (!err)
28157+ au_set_dbwh(dentry, bindex);
28158+ else {
28159+ dput(wh_dentry);
28160+ wh_dentry = ERR_PTR(err);
28161+ }
28162+ }
28163+
28164+ return wh_dentry;
28165+}
28166+
28167+/* ---------------------------------------------------------------------- */
28168+
28169+/* Delete all whiteouts in this directory on branch bindex. */
28170+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
28171+ aufs_bindex_t bindex, struct au_branch *br)
28172+{
28173+ int err;
28174+ unsigned long ul, n;
28175+ struct qstr wh_name;
28176+ char *p;
28177+ struct hlist_head *head;
28178+ struct au_vdir_wh *tpos;
28179+ struct hlist_node *pos;
28180+ struct au_vdir_destr *str;
28181+
28182+ err = -ENOMEM;
4a4d8108 28183+ p = __getname_gfp(GFP_NOFS);
1facf9fc 28184+ wh_name.name = p;
28185+ if (unlikely(!wh_name.name))
28186+ goto out;
28187+
28188+ err = 0;
28189+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
28190+ p += AUFS_WH_PFX_LEN;
28191+ n = whlist->nh_num;
28192+ head = whlist->nh_head;
28193+ for (ul = 0; !err && ul < n; ul++, head++) {
28194+ hlist_for_each_entry(tpos, pos, head, wh_hash) {
28195+ if (tpos->wh_bindex != bindex)
28196+ continue;
28197+
28198+ str = &tpos->wh_str;
28199+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
28200+ memcpy(p, str->name, str->len);
28201+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
28202+ err = unlink_wh_name(h_dentry, &wh_name, br);
28203+ if (!err)
28204+ continue;
28205+ break;
28206+ }
28207+ AuIOErr("whiteout name too long %.*s\n",
28208+ str->len, str->name);
28209+ err = -EIO;
28210+ break;
28211+ }
28212+ }
28213+ __putname(wh_name.name);
28214+
4f0767ce 28215+out:
1facf9fc 28216+ return err;
28217+}
28218+
28219+struct del_wh_children_args {
28220+ int *errp;
28221+ struct dentry *h_dentry;
1308ab2a 28222+ struct au_nhash *whlist;
1facf9fc 28223+ aufs_bindex_t bindex;
28224+ struct au_branch *br;
28225+};
28226+
28227+static void call_del_wh_children(void *args)
28228+{
28229+ struct del_wh_children_args *a = args;
1308ab2a 28230+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 28231+}
28232+
28233+/* ---------------------------------------------------------------------- */
28234+
28235+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
28236+{
28237+ struct au_whtmp_rmdir *whtmp;
dece6358 28238+ int err;
1308ab2a 28239+ unsigned int rdhash;
dece6358
AM
28240+
28241+ SiMustAnyLock(sb);
1facf9fc 28242+
28243+ whtmp = kmalloc(sizeof(*whtmp), gfp);
dece6358
AM
28244+ if (unlikely(!whtmp)) {
28245+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 28246+ goto out;
dece6358 28247+ }
1facf9fc 28248+
28249+ whtmp->dir = NULL;
027c5e7a 28250+ whtmp->br = NULL;
1facf9fc 28251+ whtmp->wh_dentry = NULL;
1308ab2a 28252+ /* no estimation for dir size */
28253+ rdhash = au_sbi(sb)->si_rdhash;
28254+ if (!rdhash)
28255+ rdhash = AUFS_RDHASH_DEF;
28256+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
28257+ if (unlikely(err)) {
28258+ kfree(whtmp);
28259+ whtmp = ERR_PTR(err);
28260+ }
dece6358 28261+
4f0767ce 28262+out:
dece6358 28263+ return whtmp;
1facf9fc 28264+}
28265+
28266+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
28267+{
027c5e7a
AM
28268+ if (whtmp->br)
28269+ atomic_dec(&whtmp->br->br_count);
1facf9fc 28270+ dput(whtmp->wh_dentry);
28271+ iput(whtmp->dir);
dece6358 28272+ au_nhash_wh_free(&whtmp->whlist);
1facf9fc 28273+ kfree(whtmp);
28274+}
28275+
28276+/*
28277+ * rmdir the whiteouted temporary named dir @h_dentry.
28278+ * @whlist: whiteouted children.
28279+ */
28280+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
28281+ struct dentry *wh_dentry, struct au_nhash *whlist)
28282+{
28283+ int err;
28284+ struct path h_tmp;
28285+ struct inode *wh_inode, *h_dir;
28286+ struct au_branch *br;
28287+
28288+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
28289+ IMustLock(h_dir);
28290+
28291+ br = au_sbr(dir->i_sb, bindex);
28292+ wh_inode = wh_dentry->d_inode;
28293+ mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
28294+
28295+ /*
28296+ * someone else might change some whiteouts while we were sleeping.
28297+ * it means this whlist may have an obsoleted entry.
28298+ */
28299+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
28300+ err = del_wh_children(wh_dentry, whlist, bindex, br);
28301+ else {
28302+ int wkq_err;
28303+ struct del_wh_children_args args = {
28304+ .errp = &err,
28305+ .h_dentry = wh_dentry,
1308ab2a 28306+ .whlist = whlist,
1facf9fc 28307+ .bindex = bindex,
28308+ .br = br
28309+ };
28310+
28311+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
28312+ if (unlikely(wkq_err))
28313+ err = wkq_err;
28314+ }
28315+ mutex_unlock(&wh_inode->i_mutex);
28316+
28317+ if (!err) {
28318+ h_tmp.dentry = wh_dentry;
28319+ h_tmp.mnt = br->br_mnt;
28320+ err = vfsub_rmdir(h_dir, &h_tmp);
1facf9fc 28321+ }
28322+
28323+ if (!err) {
28324+ if (au_ibstart(dir) == bindex) {
7f207e10 28325+ /* todo: dir->i_mutex is necessary */
1facf9fc 28326+ au_cpup_attr_timesizes(dir);
7f207e10 28327+ vfsub_drop_nlink(dir);
1facf9fc 28328+ }
28329+ return 0; /* success */
28330+ }
28331+
4a4d8108
AM
28332+ pr_warning("failed removing %.*s(%d), ignored\n",
28333+ AuDLNPair(wh_dentry), err);
1facf9fc 28334+ return err;
28335+}
28336+
28337+static void call_rmdir_whtmp(void *args)
28338+{
28339+ int err;
e49829fe 28340+ aufs_bindex_t bindex;
1facf9fc 28341+ struct au_whtmp_rmdir *a = args;
28342+ struct super_block *sb;
28343+ struct dentry *h_parent;
28344+ struct inode *h_dir;
1facf9fc 28345+ struct au_hinode *hdir;
28346+
28347+ /* rmdir by nfsd may cause deadlock with this i_mutex */
28348+ /* mutex_lock(&a->dir->i_mutex); */
e49829fe 28349+ err = -EROFS;
1facf9fc 28350+ sb = a->dir->i_sb;
e49829fe
JR
28351+ si_read_lock(sb, !AuLock_FLUSH);
28352+ if (!au_br_writable(a->br->br_perm))
28353+ goto out;
28354+ bindex = au_br_index(sb, a->br->br_id);
28355+ if (unlikely(bindex < 0))
1facf9fc 28356+ goto out;
28357+
28358+ err = -EIO;
1facf9fc 28359+ ii_write_lock_parent(a->dir);
28360+ h_parent = dget_parent(a->wh_dentry);
28361+ h_dir = h_parent->d_inode;
e49829fe 28362+ hdir = au_hi(a->dir, bindex);
4a4d8108 28363+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
e49829fe
JR
28364+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
28365+ a->br);
1facf9fc 28366+ if (!err) {
e49829fe 28367+ err = mnt_want_write(a->br->br_mnt);
1facf9fc 28368+ if (!err) {
e49829fe 28369+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry,
dece6358 28370+ &a->whlist);
e49829fe 28371+ mnt_drop_write(a->br->br_mnt);
1facf9fc 28372+ }
28373+ }
4a4d8108 28374+ au_hn_imtx_unlock(hdir);
1facf9fc 28375+ dput(h_parent);
28376+ ii_write_unlock(a->dir);
28377+
4f0767ce 28378+out:
1facf9fc 28379+ /* mutex_unlock(&a->dir->i_mutex); */
1facf9fc 28380+ au_whtmp_rmdir_free(a);
027c5e7a
AM
28381+ si_read_unlock(sb);
28382+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 28383+ if (unlikely(err))
28384+ AuIOErr("err %d\n", err);
28385+}
28386+
28387+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
28388+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
28389+{
28390+ int wkq_err;
e49829fe 28391+ struct super_block *sb;
1facf9fc 28392+
28393+ IMustLock(dir);
28394+
28395+ /* all post-process will be done in do_rmdir_whtmp(). */
e49829fe 28396+ sb = dir->i_sb;
1facf9fc 28397+ args->dir = au_igrab(dir);
e49829fe
JR
28398+ args->br = au_sbr(sb, bindex);
28399+ atomic_inc(&args->br->br_count);
1facf9fc 28400+ args->wh_dentry = dget(wh_dentry);
53392da6 28401+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1facf9fc 28402+ if (unlikely(wkq_err)) {
4a4d8108
AM
28403+ pr_warning("rmdir error %.*s (%d), ignored\n",
28404+ AuDLNPair(wh_dentry), wkq_err);
1facf9fc 28405+ au_whtmp_rmdir_free(args);
28406+ }
28407+}
7f207e10
AM
28408diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
28409--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
53392da6 28410+++ linux/fs/aufs/whout.h 2011-08-24 13:30:24.734646739 +0200
7f207e10 28411@@ -0,0 +1,89 @@
1facf9fc 28412+/*
027c5e7a 28413+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 28414+ *
28415+ * This program, aufs is free software; you can redistribute it and/or modify
28416+ * it under the terms of the GNU General Public License as published by
28417+ * the Free Software Foundation; either version 2 of the License, or
28418+ * (at your option) any later version.
dece6358
AM
28419+ *
28420+ * This program is distributed in the hope that it will be useful,
28421+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28422+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28423+ * GNU General Public License for more details.
28424+ *
28425+ * You should have received a copy of the GNU General Public License
28426+ * along with this program; if not, write to the Free Software
28427+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28428+ */
28429+
28430+/*
28431+ * whiteout for logical deletion and opaque directory
28432+ */
28433+
28434+#ifndef __AUFS_WHOUT_H__
28435+#define __AUFS_WHOUT_H__
28436+
28437+#ifdef __KERNEL__
28438+
1facf9fc 28439+#include <linux/aufs_type.h>
28440+#include "dir.h"
28441+
28442+/* whout.c */
28443+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
28444+struct au_branch;
28445+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
28446+ struct au_branch *br, int try_sio);
28447+int au_diropq_test(struct dentry *h_dentry, struct au_branch *br);
28448+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
28449+ struct qstr *prefix);
28450+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
28451+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
28452+ struct dentry *dentry);
28453+int au_wh_init(struct dentry *h_parent, struct au_branch *br,
28454+ struct super_block *sb);
28455+
28456+/* diropq flags */
28457+#define AuDiropq_CREATE 1
28458+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
7f207e10
AM
28459+#define au_fset_diropq(flags, name) \
28460+ do { (flags) |= AuDiropq_##name; } while (0)
28461+#define au_fclr_diropq(flags, name) \
28462+ do { (flags) &= ~AuDiropq_##name; } while (0)
1facf9fc 28463+
28464+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
28465+ unsigned int flags);
28466+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
28467+ struct au_branch *br);
28468+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
28469+ struct dentry *h_parent);
28470+
28471+/* real rmdir for the whiteout-ed dir */
28472+struct au_whtmp_rmdir {
28473+ struct inode *dir;
e49829fe 28474+ struct au_branch *br;
1facf9fc 28475+ struct dentry *wh_dentry;
dece6358 28476+ struct au_nhash whlist;
1facf9fc 28477+};
28478+
28479+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
28480+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
28481+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
28482+ struct dentry *wh_dentry, struct au_nhash *whlist);
28483+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
28484+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
28485+
28486+/* ---------------------------------------------------------------------- */
28487+
28488+static inline struct dentry *au_diropq_create(struct dentry *dentry,
28489+ aufs_bindex_t bindex)
28490+{
28491+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
28492+}
28493+
28494+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
28495+{
28496+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
28497+}
28498+
28499+#endif /* __KERNEL__ */
28500+#endif /* __AUFS_WHOUT_H__ */
7f207e10
AM
28501diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
28502--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
28503+++ linux/fs/aufs/wkq.c 2011-08-24 13:30:24.734646739 +0200
28504@@ -0,0 +1,244 @@
1facf9fc 28505+/*
027c5e7a 28506+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 28507+ *
28508+ * This program, aufs is free software; you can redistribute it and/or modify
28509+ * it under the terms of the GNU General Public License as published by
28510+ * the Free Software Foundation; either version 2 of the License, or
28511+ * (at your option) any later version.
dece6358
AM
28512+ *
28513+ * This program is distributed in the hope that it will be useful,
28514+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28515+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28516+ * GNU General Public License for more details.
28517+ *
28518+ * You should have received a copy of the GNU General Public License
28519+ * along with this program; if not, write to the Free Software
28520+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28521+ */
28522+
28523+/*
28524+ * workqueue for asynchronous/super-io operations
28525+ * todo: try new dredential scheme
28526+ */
28527+
dece6358 28528+#include <linux/module.h>
1facf9fc 28529+#include "aufs.h"
28530+
b752ccd1
AM
28531+/* internal workqueue named AUFS_WKQ_NAME and AUFS_WKQ_PRE_NAME */
28532+enum {
28533+ AuWkq_INORMAL,
28534+ AuWkq_IPRE
28535+};
28536+
28537+static struct {
28538+ char *name;
28539+ struct workqueue_struct *wkq;
28540+} au_wkq[] = {
28541+ [AuWkq_INORMAL] = {
28542+ .name = AUFS_WKQ_NAME
28543+ },
28544+ [AuWkq_IPRE] = {
28545+ .name = AUFS_WKQ_PRE_NAME
28546+ }
28547+};
1facf9fc 28548+
28549+struct au_wkinfo {
28550+ struct work_struct wk;
7f207e10 28551+ struct kobject *kobj;
1facf9fc 28552+
28553+ unsigned int flags; /* see wkq.h */
28554+
28555+ au_wkq_func_t func;
28556+ void *args;
28557+
1facf9fc 28558+ struct completion *comp;
28559+};
28560+
28561+/* ---------------------------------------------------------------------- */
28562+
1facf9fc 28563+static void wkq_func(struct work_struct *wk)
28564+{
28565+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
28566+
7f207e10
AM
28567+ AuDebugOn(current_fsuid());
28568+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
28569+
1facf9fc 28570+ wkinfo->func(wkinfo->args);
1facf9fc 28571+ if (au_ftest_wkq(wkinfo->flags, WAIT))
28572+ complete(wkinfo->comp);
28573+ else {
7f207e10 28574+ kobject_put(wkinfo->kobj);
1facf9fc 28575+ module_put(THIS_MODULE);
28576+ kfree(wkinfo);
28577+ }
28578+}
28579+
28580+/*
28581+ * Since struct completion is large, try allocating it dynamically.
28582+ */
28583+#if defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS)
28584+#define AuWkqCompDeclare(name) struct completion *comp = NULL
28585+
28586+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
28587+{
28588+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
28589+ if (*comp) {
28590+ init_completion(*comp);
28591+ wkinfo->comp = *comp;
28592+ return 0;
28593+ }
28594+ return -ENOMEM;
28595+}
28596+
28597+static void au_wkq_comp_free(struct completion *comp)
28598+{
28599+ kfree(comp);
28600+}
28601+
28602+#else
28603+
28604+/* no braces */
28605+#define AuWkqCompDeclare(name) \
28606+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
28607+ struct completion *comp = &_ ## name
28608+
28609+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
28610+{
28611+ wkinfo->comp = *comp;
28612+ return 0;
28613+}
28614+
28615+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
28616+{
28617+ /* empty */
28618+}
28619+#endif /* 4KSTACKS */
28620+
53392da6 28621+static void au_wkq_run(struct au_wkinfo *wkinfo)
1facf9fc 28622+{
b752ccd1
AM
28623+ struct workqueue_struct *wkq;
28624+
53392da6
AM
28625+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
28626+ if (au_wkq_test()) {
28627+ AuWarn1("wkq from wkq, due to a dead dir by UDBA?\n");
28628+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
28629+ }
28630+ } else
28631+ au_dbg_verify_kthread();
28632+
28633+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
a1f66529 28634+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
b752ccd1 28635+ wkq = au_wkq[AuWkq_INORMAL].wkq;
53392da6 28636+ if (au_ftest_wkq(wkinfo->flags, PRE))
b752ccd1
AM
28637+ wkq = au_wkq[AuWkq_IPRE].wkq;
28638+ queue_work(wkq, &wkinfo->wk);
4a4d8108
AM
28639+ } else {
28640+ INIT_WORK(&wkinfo->wk, wkq_func);
28641+ schedule_work(&wkinfo->wk);
28642+ }
1facf9fc 28643+}
28644+
7f207e10
AM
28645+/*
28646+ * Be careful. It is easy to make deadlock happen.
28647+ * processA: lock, wkq and wait
28648+ * processB: wkq and wait, lock in wkq
28649+ * --> deadlock
28650+ */
b752ccd1 28651+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
1facf9fc 28652+{
28653+ int err;
28654+ AuWkqCompDeclare(comp);
28655+ struct au_wkinfo wkinfo = {
b752ccd1 28656+ .flags = flags,
1facf9fc 28657+ .func = func,
28658+ .args = args
28659+ };
28660+
28661+ err = au_wkq_comp_alloc(&wkinfo, &comp);
28662+ if (!err) {
53392da6 28663+ au_wkq_run(&wkinfo);
1facf9fc 28664+ /* no timeout, no interrupt */
28665+ wait_for_completion(wkinfo.comp);
28666+ au_wkq_comp_free(comp);
4a4d8108 28667+ destroy_work_on_stack(&wkinfo.wk);
1facf9fc 28668+ }
28669+
28670+ return err;
28671+
28672+}
28673+
027c5e7a
AM
28674+/*
28675+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
28676+ * problem in a concurrent umounting.
28677+ */
53392da6
AM
28678+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
28679+ unsigned int flags)
1facf9fc 28680+{
28681+ int err;
28682+ struct au_wkinfo *wkinfo;
28683+
28684+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
28685+
28686+ /*
28687+ * wkq_func() must free this wkinfo.
28688+ * it highly depends upon the implementation of workqueue.
28689+ */
28690+ err = 0;
28691+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
28692+ if (wkinfo) {
7f207e10 28693+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
53392da6 28694+ wkinfo->flags = flags & ~AuWkq_WAIT;
1facf9fc 28695+ wkinfo->func = func;
28696+ wkinfo->args = args;
28697+ wkinfo->comp = NULL;
7f207e10 28698+ kobject_get(wkinfo->kobj);
1facf9fc 28699+ __module_get(THIS_MODULE);
28700+
53392da6 28701+ au_wkq_run(wkinfo);
1facf9fc 28702+ } else {
28703+ err = -ENOMEM;
e49829fe 28704+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 28705+ }
28706+
28707+ return err;
28708+}
28709+
28710+/* ---------------------------------------------------------------------- */
28711+
28712+void au_nwt_init(struct au_nowait_tasks *nwt)
28713+{
28714+ atomic_set(&nwt->nw_len, 0);
4a4d8108 28715+ /* smp_mb(); */ /* atomic_set */
1facf9fc 28716+ init_waitqueue_head(&nwt->nw_wq);
28717+}
28718+
28719+void au_wkq_fin(void)
28720+{
b752ccd1
AM
28721+ int i;
28722+
28723+ for (i = 0; i < ARRAY_SIZE(au_wkq); i++)
28724+ if (au_wkq[i].wkq)
28725+ destroy_workqueue(au_wkq[i].wkq);
1facf9fc 28726+}
28727+
28728+int __init au_wkq_init(void)
28729+{
b752ccd1
AM
28730+ int err, i;
28731+
28732+ err = 0;
28733+ for (i = 0; !err && i < ARRAY_SIZE(au_wkq); i++) {
7f207e10
AM
28734+ BUILD_BUG_ON(!WQ_RESCUER);
28735+ au_wkq[i].wkq = alloc_workqueue(au_wkq[i].name, !WQ_RESCUER,
28736+ WQ_DFL_ACTIVE);
b752ccd1
AM
28737+ if (IS_ERR(au_wkq[i].wkq))
28738+ err = PTR_ERR(au_wkq[i].wkq);
28739+ else if (!au_wkq[i].wkq)
28740+ err = -ENOMEM;
28741+ if (unlikely(err))
28742+ au_wkq[i].wkq = NULL;
28743+ }
7f207e10 28744+ if (unlikely(err))
b752ccd1
AM
28745+ au_wkq_fin();
28746+
28747+ return err;
1facf9fc 28748+}
7f207e10
AM
28749diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
28750--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
28751+++ linux/fs/aufs/wkq.h 2011-08-24 13:30:24.737979976 +0200
28752@@ -0,0 +1,101 @@
1facf9fc 28753+/*
027c5e7a 28754+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 28755+ *
28756+ * This program, aufs is free software; you can redistribute it and/or modify
28757+ * it under the terms of the GNU General Public License as published by
28758+ * the Free Software Foundation; either version 2 of the License, or
28759+ * (at your option) any later version.
dece6358
AM
28760+ *
28761+ * This program is distributed in the hope that it will be useful,
28762+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28763+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28764+ * GNU General Public License for more details.
28765+ *
28766+ * You should have received a copy of the GNU General Public License
28767+ * along with this program; if not, write to the Free Software
28768+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28769+ */
28770+
28771+/*
28772+ * workqueue for asynchronous/super-io operations
28773+ * todo: try new credentials management scheme
28774+ */
28775+
28776+#ifndef __AUFS_WKQ_H__
28777+#define __AUFS_WKQ_H__
28778+
28779+#ifdef __KERNEL__
28780+
1facf9fc 28781+#include <linux/sched.h>
dece6358 28782+#include <linux/wait.h>
1facf9fc 28783+#include <linux/aufs_type.h>
28784+
dece6358
AM
28785+struct super_block;
28786+
1facf9fc 28787+/* ---------------------------------------------------------------------- */
28788+
28789+/*
28790+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
28791+ */
28792+struct au_nowait_tasks {
28793+ atomic_t nw_len;
28794+ wait_queue_head_t nw_wq;
28795+};
28796+
28797+/* ---------------------------------------------------------------------- */
28798+
28799+typedef void (*au_wkq_func_t)(void *args);
28800+
28801+/* wkq flags */
28802+#define AuWkq_WAIT 1
b752ccd1 28803+#define AuWkq_PRE (1 << 1)
53392da6
AM
28804+#ifdef CONFIG_AUFS_HNOTIFY
28805+#define AuWkq_NEST (1 << 2)
28806+#else
28807+#define AuWkq_NEST 0
28808+#endif
1facf9fc 28809+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
7f207e10
AM
28810+#define au_fset_wkq(flags, name) \
28811+ do { (flags) |= AuWkq_##name; } while (0)
28812+#define au_fclr_wkq(flags, name) \
28813+ do { (flags) &= ~AuWkq_##name; } while (0)
1facf9fc 28814+
28815+/* wkq.c */
b752ccd1 28816+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
53392da6
AM
28817+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
28818+ unsigned int flags);
1facf9fc 28819+void au_nwt_init(struct au_nowait_tasks *nwt);
28820+int __init au_wkq_init(void);
28821+void au_wkq_fin(void);
28822+
28823+/* ---------------------------------------------------------------------- */
28824+
53392da6
AM
28825+static inline int au_wkq_test(void)
28826+{
28827+ return current->flags & PF_WQ_WORKER;
28828+}
28829+
b752ccd1
AM
28830+static inline int au_wkq_wait_pre(au_wkq_func_t func, void *args)
28831+{
28832+ return au_wkq_do_wait(AuWkq_WAIT | AuWkq_PRE, func, args);
28833+}
28834+
28835+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
1facf9fc 28836+{
b752ccd1 28837+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
1facf9fc 28838+}
28839+
28840+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
28841+{
e49829fe 28842+ if (atomic_dec_and_test(&nwt->nw_len))
1facf9fc 28843+ wake_up_all(&nwt->nw_wq);
28844+}
28845+
28846+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
28847+{
28848+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
28849+ return 0;
28850+}
28851+
28852+#endif /* __KERNEL__ */
28853+#endif /* __AUFS_WKQ_H__ */
7f207e10
AM
28854diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
28855--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
53392da6 28856+++ linux/fs/aufs/xino.c 2011-08-24 13:30:24.737979976 +0200
7f207e10 28857@@ -0,0 +1,1265 @@
1facf9fc 28858+/*
027c5e7a 28859+ * Copyright (C) 2005-2011 Junjiro R. Okajima
1facf9fc 28860+ *
28861+ * This program, aufs is free software; you can redistribute it and/or modify
28862+ * it under the terms of the GNU General Public License as published by
28863+ * the Free Software Foundation; either version 2 of the License, or
28864+ * (at your option) any later version.
dece6358
AM
28865+ *
28866+ * This program is distributed in the hope that it will be useful,
28867+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28868+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28869+ * GNU General Public License for more details.
28870+ *
28871+ * You should have received a copy of the GNU General Public License
28872+ * along with this program; if not, write to the Free Software
28873+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 28874+ */
28875+
28876+/*
28877+ * external inode number translation table and bitmap
28878+ */
28879+
dece6358 28880+#include <linux/file.h>
1facf9fc 28881+#include <linux/seq_file.h>
28882+#include <linux/uaccess.h>
28883+#include "aufs.h"
28884+
b752ccd1 28885+ssize_t xino_fread(au_readf_t func, struct file *file, void *kbuf, size_t size,
1facf9fc 28886+ loff_t *pos)
28887+{
28888+ ssize_t err;
28889+ mm_segment_t oldfs;
b752ccd1
AM
28890+ union {
28891+ void *k;
28892+ char __user *u;
28893+ } buf;
1facf9fc 28894+
b752ccd1 28895+ buf.k = kbuf;
1facf9fc 28896+ oldfs = get_fs();
28897+ set_fs(KERNEL_DS);
28898+ do {
28899+ /* todo: signal_pending? */
b752ccd1 28900+ err = func(file, buf.u, size, pos);
1facf9fc 28901+ } while (err == -EAGAIN || err == -EINTR);
28902+ set_fs(oldfs);
28903+
28904+#if 0 /* reserved for future use */
28905+ if (err > 0)
28906+ fsnotify_access(file->f_dentry);
28907+#endif
28908+
28909+ return err;
28910+}
28911+
28912+/* ---------------------------------------------------------------------- */
28913+
b752ccd1 28914+static ssize_t do_xino_fwrite(au_writef_t func, struct file *file, void *kbuf,
1facf9fc 28915+ size_t size, loff_t *pos)
28916+{
28917+ ssize_t err;
28918+ mm_segment_t oldfs;
b752ccd1
AM
28919+ union {
28920+ void *k;
28921+ const char __user *u;
28922+ } buf;
1facf9fc 28923+
b752ccd1 28924+ buf.k = kbuf;
1facf9fc 28925+ oldfs = get_fs();
28926+ set_fs(KERNEL_DS);
1facf9fc 28927+ do {
28928+ /* todo: signal_pending? */
b752ccd1 28929+ err = func(file, buf.u, size, pos);
1facf9fc 28930+ } while (err == -EAGAIN || err == -EINTR);
1facf9fc 28931+ set_fs(oldfs);
28932+
28933+#if 0 /* reserved for future use */
28934+ if (err > 0)
28935+ fsnotify_modify(file->f_dentry);
28936+#endif
28937+
28938+ return err;
28939+}
28940+
28941+struct do_xino_fwrite_args {
28942+ ssize_t *errp;
28943+ au_writef_t func;
28944+ struct file *file;
28945+ void *buf;
28946+ size_t size;
28947+ loff_t *pos;
28948+};
28949+
28950+static void call_do_xino_fwrite(void *args)
28951+{
28952+ struct do_xino_fwrite_args *a = args;
28953+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
28954+}
28955+
28956+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
28957+ loff_t *pos)
28958+{
28959+ ssize_t err;
28960+
28961+ /* todo: signal block and no wkq? */
b752ccd1
AM
28962+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
28963+ lockdep_off();
28964+ err = do_xino_fwrite(func, file, buf, size, pos);
28965+ lockdep_on();
28966+ } else {
28967+ /*
28968+ * it breaks RLIMIT_FSIZE and normal user's limit,
28969+ * users should care about quota and real 'filesystem full.'
28970+ */
1facf9fc 28971+ int wkq_err;
28972+ struct do_xino_fwrite_args args = {
28973+ .errp = &err,
28974+ .func = func,
28975+ .file = file,
28976+ .buf = buf,
28977+ .size = size,
28978+ .pos = pos
28979+ };
28980+
28981+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
28982+ if (unlikely(wkq_err))
28983+ err = wkq_err;
b752ccd1 28984+ }
1facf9fc 28985+
28986+ return err;
28987+}
28988+
28989+/* ---------------------------------------------------------------------- */
28990+
28991+/*
28992+ * create a new xinofile at the same place/path as @base_file.
28993+ */
28994+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
28995+{
28996+ struct file *file;
4a4d8108 28997+ struct dentry *base, *parent;
1facf9fc 28998+ struct inode *dir;
28999+ struct qstr *name;
1308ab2a 29000+ struct path path;
4a4d8108 29001+ int err;
1facf9fc 29002+
29003+ base = base_file->f_dentry;
29004+ parent = base->d_parent; /* dir inode is locked */
29005+ dir = parent->d_inode;
29006+ IMustLock(dir);
29007+
29008+ file = ERR_PTR(-EINVAL);
29009+ name = &base->d_name;
4a4d8108
AM
29010+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
29011+ if (IS_ERR(path.dentry)) {
29012+ file = (void *)path.dentry;
29013+ pr_err("%.*s lookup err %ld\n",
29014+ AuLNPair(name), PTR_ERR(path.dentry));
1facf9fc 29015+ goto out;
29016+ }
29017+
29018+ /* no need to mnt_want_write() since we call dentry_open() later */
4a4d8108 29019+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
1facf9fc 29020+ if (unlikely(err)) {
29021+ file = ERR_PTR(err);
4a4d8108 29022+ pr_err("%.*s create err %d\n", AuLNPair(name), err);
1facf9fc 29023+ goto out_dput;
29024+ }
29025+
1308ab2a 29026+ path.mnt = base_file->f_vfsmnt;
4a4d8108 29027+ file = vfsub_dentry_open(&path,
7f207e10 29028+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 29029+ /* | __FMODE_NONOTIFY */);
1facf9fc 29030+ if (IS_ERR(file)) {
4a4d8108 29031+ pr_err("%.*s open err %ld\n", AuLNPair(name), PTR_ERR(file));
1facf9fc 29032+ goto out_dput;
29033+ }
29034+
29035+ err = vfsub_unlink(dir, &file->f_path, /*force*/0);
29036+ if (unlikely(err)) {
4a4d8108 29037+ pr_err("%.*s unlink err %d\n", AuLNPair(name), err);
1facf9fc 29038+ goto out_fput;
29039+ }
29040+
29041+ if (copy_src) {
29042+ /* no one can touch copy_src xino */
29043+ err = au_copy_file(file, copy_src,
29044+ i_size_read(copy_src->f_dentry->d_inode));
29045+ if (unlikely(err)) {
4a4d8108 29046+ pr_err("%.*s copy err %d\n", AuLNPair(name), err);
1facf9fc 29047+ goto out_fput;
29048+ }
29049+ }
29050+ goto out_dput; /* success */
29051+
4f0767ce 29052+out_fput:
1facf9fc 29053+ fput(file);
29054+ file = ERR_PTR(err);
4f0767ce 29055+out_dput:
4a4d8108 29056+ dput(path.dentry);
4f0767ce 29057+out:
1facf9fc 29058+ return file;
29059+}
29060+
29061+struct au_xino_lock_dir {
29062+ struct au_hinode *hdir;
29063+ struct dentry *parent;
29064+ struct mutex *mtx;
29065+};
29066+
29067+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
29068+ struct au_xino_lock_dir *ldir)
29069+{
29070+ aufs_bindex_t brid, bindex;
29071+
29072+ ldir->hdir = NULL;
29073+ bindex = -1;
29074+ brid = au_xino_brid(sb);
29075+ if (brid >= 0)
29076+ bindex = au_br_index(sb, brid);
29077+ if (bindex >= 0) {
29078+ ldir->hdir = au_hi(sb->s_root->d_inode, bindex);
4a4d8108 29079+ au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
1facf9fc 29080+ } else {
29081+ ldir->parent = dget_parent(xino->f_dentry);
29082+ ldir->mtx = &ldir->parent->d_inode->i_mutex;
29083+ mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
29084+ }
29085+}
29086+
29087+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
29088+{
29089+ if (ldir->hdir)
4a4d8108 29090+ au_hn_imtx_unlock(ldir->hdir);
1facf9fc 29091+ else {
29092+ mutex_unlock(ldir->mtx);
29093+ dput(ldir->parent);
29094+ }
29095+}
29096+
29097+/* ---------------------------------------------------------------------- */
29098+
29099+/* trucate xino files asynchronously */
29100+
29101+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
29102+{
29103+ int err;
29104+ aufs_bindex_t bi, bend;
29105+ struct au_branch *br;
29106+ struct file *new_xino, *file;
29107+ struct super_block *h_sb;
29108+ struct au_xino_lock_dir ldir;
29109+
29110+ err = -EINVAL;
29111+ bend = au_sbend(sb);
29112+ if (unlikely(bindex < 0 || bend < bindex))
29113+ goto out;
29114+ br = au_sbr(sb, bindex);
29115+ file = br->br_xino.xi_file;
29116+ if (!file)
29117+ goto out;
29118+
29119+ au_xino_lock_dir(sb, file, &ldir);
29120+ /* mnt_want_write() is unnecessary here */
29121+ new_xino = au_xino_create2(file, file);
29122+ au_xino_unlock_dir(&ldir);
29123+ err = PTR_ERR(new_xino);
29124+ if (IS_ERR(new_xino))
29125+ goto out;
29126+ err = 0;
29127+ fput(file);
29128+ br->br_xino.xi_file = new_xino;
29129+
29130+ h_sb = br->br_mnt->mnt_sb;
29131+ for (bi = 0; bi <= bend; bi++) {
29132+ if (unlikely(bi == bindex))
29133+ continue;
29134+ br = au_sbr(sb, bi);
29135+ if (br->br_mnt->mnt_sb != h_sb)
29136+ continue;
29137+
29138+ fput(br->br_xino.xi_file);
29139+ br->br_xino.xi_file = new_xino;
29140+ get_file(new_xino);
29141+ }
29142+
4f0767ce 29143+out:
1facf9fc 29144+ return err;
29145+}
29146+
29147+struct xino_do_trunc_args {
29148+ struct super_block *sb;
29149+ struct au_branch *br;
29150+};
29151+
29152+static void xino_do_trunc(void *_args)
29153+{
29154+ struct xino_do_trunc_args *args = _args;
29155+ struct super_block *sb;
29156+ struct au_branch *br;
29157+ struct inode *dir;
29158+ int err;
29159+ aufs_bindex_t bindex;
29160+
29161+ err = 0;
29162+ sb = args->sb;
29163+ dir = sb->s_root->d_inode;
29164+ br = args->br;
29165+
29166+ si_noflush_write_lock(sb);
29167+ ii_read_lock_parent(dir);
29168+ bindex = au_br_index(sb, br->br_id);
29169+ err = au_xino_trunc(sb, bindex);
dece6358
AM
29170+ if (!err
29171+ && br->br_xino.xi_file->f_dentry->d_inode->i_blocks
1facf9fc 29172+ >= br->br_xino_upper)
29173+ br->br_xino_upper += AUFS_XINO_TRUNC_STEP;
29174+
1facf9fc 29175+ ii_read_unlock(dir);
29176+ if (unlikely(err))
4a4d8108 29177+ pr_warning("err b%d, (%d)\n", bindex, err);
1facf9fc 29178+ atomic_dec(&br->br_xino_running);
29179+ atomic_dec(&br->br_count);
1facf9fc 29180+ si_write_unlock(sb);
027c5e7a 29181+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 29182+ kfree(args);
29183+}
29184+
29185+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
29186+{
29187+ struct xino_do_trunc_args *args;
29188+ int wkq_err;
29189+
29190+ if (br->br_xino.xi_file->f_dentry->d_inode->i_blocks
29191+ < br->br_xino_upper)
29192+ return;
29193+
29194+ if (atomic_inc_return(&br->br_xino_running) > 1)
29195+ goto out;
29196+
29197+ /* lock and kfree() will be called in trunc_xino() */
29198+ args = kmalloc(sizeof(*args), GFP_NOFS);
29199+ if (unlikely(!args)) {
29200+ AuErr1("no memory\n");
29201+ goto out_args;
29202+ }
29203+
e49829fe 29204+ atomic_inc(&br->br_count);
1facf9fc 29205+ args->sb = sb;
29206+ args->br = br;
53392da6 29207+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
1facf9fc 29208+ if (!wkq_err)
29209+ return; /* success */
29210+
4a4d8108 29211+ pr_err("wkq %d\n", wkq_err);
e49829fe 29212+ atomic_dec(&br->br_count);
1facf9fc 29213+
4f0767ce 29214+out_args:
1facf9fc 29215+ kfree(args);
4f0767ce 29216+out:
e49829fe 29217+ atomic_dec(&br->br_xino_running);
1facf9fc 29218+}
29219+
29220+/* ---------------------------------------------------------------------- */
29221+
29222+static int au_xino_do_write(au_writef_t write, struct file *file,
29223+ ino_t h_ino, ino_t ino)
29224+{
29225+ loff_t pos;
29226+ ssize_t sz;
29227+
29228+ pos = h_ino;
29229+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
29230+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
29231+ return -EFBIG;
29232+ }
29233+ pos *= sizeof(ino);
29234+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
29235+ if (sz == sizeof(ino))
29236+ return 0; /* success */
29237+
29238+ AuIOErr("write failed (%zd)\n", sz);
29239+ return -EIO;
29240+}
29241+
29242+/*
29243+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
29244+ * at the position of @h_ino.
29245+ * even if @ino is zero, it is written to the xinofile and means no entry.
29246+ * if the size of the xino file on a specific filesystem exceeds the watermark,
29247+ * try truncating it.
29248+ */
29249+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
29250+ ino_t ino)
29251+{
29252+ int err;
29253+ unsigned int mnt_flags;
29254+ struct au_branch *br;
29255+
29256+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
29257+ || ((loff_t)-1) > 0);
dece6358 29258+ SiMustAnyLock(sb);
1facf9fc 29259+
29260+ mnt_flags = au_mntflags(sb);
29261+ if (!au_opt_test(mnt_flags, XINO))
29262+ return 0;
29263+
29264+ br = au_sbr(sb, bindex);
29265+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
29266+ h_ino, ino);
29267+ if (!err) {
29268+ if (au_opt_test(mnt_flags, TRUNC_XINO)
29269+ && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
29270+ xino_try_trunc(sb, br);
29271+ return 0; /* success */
29272+ }
29273+
29274+ AuIOErr("write failed (%d)\n", err);
29275+ return -EIO;
29276+}
29277+
29278+/* ---------------------------------------------------------------------- */
29279+
29280+/* aufs inode number bitmap */
29281+
29282+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
29283+static ino_t xib_calc_ino(unsigned long pindex, int bit)
29284+{
29285+ ino_t ino;
29286+
29287+ AuDebugOn(bit < 0 || page_bits <= bit);
29288+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
29289+ return ino;
29290+}
29291+
29292+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
29293+{
29294+ AuDebugOn(ino < AUFS_FIRST_INO);
29295+ ino -= AUFS_FIRST_INO;
29296+ *pindex = ino / page_bits;
29297+ *bit = ino % page_bits;
29298+}
29299+
29300+static int xib_pindex(struct super_block *sb, unsigned long pindex)
29301+{
29302+ int err;
29303+ loff_t pos;
29304+ ssize_t sz;
29305+ struct au_sbinfo *sbinfo;
29306+ struct file *xib;
29307+ unsigned long *p;
29308+
29309+ sbinfo = au_sbi(sb);
29310+ MtxMustLock(&sbinfo->si_xib_mtx);
29311+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
29312+ || !au_opt_test(sbinfo->si_mntflags, XINO));
29313+
29314+ if (pindex == sbinfo->si_xib_last_pindex)
29315+ return 0;
29316+
29317+ xib = sbinfo->si_xib;
29318+ p = sbinfo->si_xib_buf;
29319+ pos = sbinfo->si_xib_last_pindex;
29320+ pos *= PAGE_SIZE;
29321+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
29322+ if (unlikely(sz != PAGE_SIZE))
29323+ goto out;
29324+
29325+ pos = pindex;
29326+ pos *= PAGE_SIZE;
29327+ if (i_size_read(xib->f_dentry->d_inode) >= pos + PAGE_SIZE)
29328+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
29329+ else {
29330+ memset(p, 0, PAGE_SIZE);
29331+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
29332+ }
29333+ if (sz == PAGE_SIZE) {
29334+ sbinfo->si_xib_last_pindex = pindex;
29335+ return 0; /* success */
29336+ }
29337+
4f0767ce 29338+out:
b752ccd1
AM
29339+ AuIOErr1("write failed (%zd)\n", sz);
29340+ err = sz;
29341+ if (sz >= 0)
29342+ err = -EIO;
29343+ return err;
29344+}
29345+
29346+/* ---------------------------------------------------------------------- */
29347+
29348+static void au_xib_clear_bit(struct inode *inode)
29349+{
29350+ int err, bit;
29351+ unsigned long pindex;
29352+ struct super_block *sb;
29353+ struct au_sbinfo *sbinfo;
29354+
29355+ AuDebugOn(inode->i_nlink);
29356+
29357+ sb = inode->i_sb;
29358+ xib_calc_bit(inode->i_ino, &pindex, &bit);
29359+ AuDebugOn(page_bits <= bit);
29360+ sbinfo = au_sbi(sb);
29361+ mutex_lock(&sbinfo->si_xib_mtx);
29362+ err = xib_pindex(sb, pindex);
29363+ if (!err) {
29364+ clear_bit(bit, sbinfo->si_xib_buf);
29365+ sbinfo->si_xib_next_bit = bit;
29366+ }
29367+ mutex_unlock(&sbinfo->si_xib_mtx);
29368+}
29369+
29370+/* for s_op->delete_inode() */
29371+void au_xino_delete_inode(struct inode *inode, const int unlinked)
29372+{
29373+ int err;
29374+ unsigned int mnt_flags;
29375+ aufs_bindex_t bindex, bend, bi;
29376+ unsigned char try_trunc;
29377+ struct au_iinfo *iinfo;
29378+ struct super_block *sb;
29379+ struct au_hinode *hi;
29380+ struct inode *h_inode;
29381+ struct au_branch *br;
29382+ au_writef_t xwrite;
29383+
29384+ sb = inode->i_sb;
29385+ mnt_flags = au_mntflags(sb);
29386+ if (!au_opt_test(mnt_flags, XINO)
29387+ || inode->i_ino == AUFS_ROOT_INO)
29388+ return;
29389+
29390+ if (unlinked) {
29391+ au_xigen_inc(inode);
29392+ au_xib_clear_bit(inode);
29393+ }
29394+
29395+ iinfo = au_ii(inode);
29396+ if (!iinfo)
29397+ return;
1facf9fc 29398+
b752ccd1
AM
29399+ bindex = iinfo->ii_bstart;
29400+ if (bindex < 0)
29401+ return;
1facf9fc 29402+
b752ccd1
AM
29403+ xwrite = au_sbi(sb)->si_xwrite;
29404+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
29405+ hi = iinfo->ii_hinode + bindex;
29406+ bend = iinfo->ii_bend;
29407+ for (; bindex <= bend; bindex++, hi++) {
29408+ h_inode = hi->hi_inode;
29409+ if (!h_inode
29410+ || (!unlinked && h_inode->i_nlink))
29411+ continue;
1facf9fc 29412+
b752ccd1
AM
29413+ /* inode may not be revalidated */
29414+ bi = au_br_index(sb, hi->hi_id);
29415+ if (bi < 0)
29416+ continue;
1facf9fc 29417+
b752ccd1
AM
29418+ br = au_sbr(sb, bi);
29419+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
29420+ h_inode->i_ino, /*ino*/0);
29421+ if (!err && try_trunc
29422+ && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
29423+ xino_try_trunc(sb, br);
1facf9fc 29424+ }
1facf9fc 29425+}
29426+
29427+/* get an unused inode number from bitmap */
29428+ino_t au_xino_new_ino(struct super_block *sb)
29429+{
29430+ ino_t ino;
29431+ unsigned long *p, pindex, ul, pend;
29432+ struct au_sbinfo *sbinfo;
29433+ struct file *file;
29434+ int free_bit, err;
29435+
29436+ if (!au_opt_test(au_mntflags(sb), XINO))
29437+ return iunique(sb, AUFS_FIRST_INO);
29438+
29439+ sbinfo = au_sbi(sb);
29440+ mutex_lock(&sbinfo->si_xib_mtx);
29441+ p = sbinfo->si_xib_buf;
29442+ free_bit = sbinfo->si_xib_next_bit;
29443+ if (free_bit < page_bits && !test_bit(free_bit, p))
29444+ goto out; /* success */
29445+ free_bit = find_first_zero_bit(p, page_bits);
29446+ if (free_bit < page_bits)
29447+ goto out; /* success */
29448+
29449+ pindex = sbinfo->si_xib_last_pindex;
29450+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
29451+ err = xib_pindex(sb, ul);
29452+ if (unlikely(err))
29453+ goto out_err;
29454+ free_bit = find_first_zero_bit(p, page_bits);
29455+ if (free_bit < page_bits)
29456+ goto out; /* success */
29457+ }
29458+
29459+ file = sbinfo->si_xib;
29460+ pend = i_size_read(file->f_dentry->d_inode) / PAGE_SIZE;
29461+ for (ul = pindex + 1; ul <= pend; ul++) {
29462+ err = xib_pindex(sb, ul);
29463+ if (unlikely(err))
29464+ goto out_err;
29465+ free_bit = find_first_zero_bit(p, page_bits);
29466+ if (free_bit < page_bits)
29467+ goto out; /* success */
29468+ }
29469+ BUG();
29470+
4f0767ce 29471+out:
1facf9fc 29472+ set_bit(free_bit, p);
7f207e10 29473+ sbinfo->si_xib_next_bit = free_bit + 1;
1facf9fc 29474+ pindex = sbinfo->si_xib_last_pindex;
29475+ mutex_unlock(&sbinfo->si_xib_mtx);
29476+ ino = xib_calc_ino(pindex, free_bit);
29477+ AuDbg("i%lu\n", (unsigned long)ino);
29478+ return ino;
4f0767ce 29479+out_err:
1facf9fc 29480+ mutex_unlock(&sbinfo->si_xib_mtx);
29481+ AuDbg("i0\n");
29482+ return 0;
29483+}
29484+
29485+/*
29486+ * read @ino from xinofile for the specified branch{@sb, @bindex}
29487+ * at the position of @h_ino.
29488+ * if @ino does not exist and @do_new is true, get new one.
29489+ */
29490+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
29491+ ino_t *ino)
29492+{
29493+ int err;
29494+ ssize_t sz;
29495+ loff_t pos;
29496+ struct file *file;
29497+ struct au_sbinfo *sbinfo;
29498+
29499+ *ino = 0;
29500+ if (!au_opt_test(au_mntflags(sb), XINO))
29501+ return 0; /* no xino */
29502+
29503+ err = 0;
29504+ sbinfo = au_sbi(sb);
29505+ pos = h_ino;
29506+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
29507+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
29508+ return -EFBIG;
29509+ }
29510+ pos *= sizeof(*ino);
29511+
29512+ file = au_sbr(sb, bindex)->br_xino.xi_file;
29513+ if (i_size_read(file->f_dentry->d_inode) < pos + sizeof(*ino))
29514+ return 0; /* no ino */
29515+
29516+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
29517+ if (sz == sizeof(*ino))
29518+ return 0; /* success */
29519+
29520+ err = sz;
29521+ if (unlikely(sz >= 0)) {
29522+ err = -EIO;
29523+ AuIOErr("xino read error (%zd)\n", sz);
29524+ }
29525+
29526+ return err;
29527+}
29528+
29529+/* ---------------------------------------------------------------------- */
29530+
29531+/* create and set a new xino file */
29532+
29533+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
29534+{
29535+ struct file *file;
29536+ struct dentry *h_parent, *d;
29537+ struct inode *h_dir;
29538+ int err;
29539+
29540+ /*
29541+ * at mount-time, and the xino file is the default path,
4a4d8108 29542+ * hnotify is disabled so we have no notify events to ignore.
1facf9fc 29543+ * when a user specified the xino, we cannot get au_hdir to be ignored.
29544+ */
7f207e10 29545+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 29546+ /* | __FMODE_NONOTIFY */,
1facf9fc 29547+ S_IRUGO | S_IWUGO);
29548+ if (IS_ERR(file)) {
29549+ if (!silent)
4a4d8108 29550+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
1facf9fc 29551+ return file;
29552+ }
29553+
29554+ /* keep file count */
29555+ h_parent = dget_parent(file->f_dentry);
29556+ h_dir = h_parent->d_inode;
29557+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
29558+ /* mnt_want_write() is unnecessary here */
29559+ err = vfsub_unlink(h_dir, &file->f_path, /*force*/0);
29560+ mutex_unlock(&h_dir->i_mutex);
29561+ dput(h_parent);
29562+ if (unlikely(err)) {
29563+ if (!silent)
4a4d8108 29564+ pr_err("unlink %s(%d)\n", fname, err);
1facf9fc 29565+ goto out;
29566+ }
29567+
29568+ err = -EINVAL;
29569+ d = file->f_dentry;
29570+ if (unlikely(sb == d->d_sb)) {
29571+ if (!silent)
4a4d8108 29572+ pr_err("%s must be outside\n", fname);
1facf9fc 29573+ goto out;
29574+ }
29575+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
29576+ if (!silent)
4a4d8108
AM
29577+ pr_err("xino doesn't support %s(%s)\n",
29578+ fname, au_sbtype(d->d_sb));
1facf9fc 29579+ goto out;
29580+ }
29581+ return file; /* success */
29582+
4f0767ce 29583+out:
1facf9fc 29584+ fput(file);
29585+ file = ERR_PTR(err);
29586+ return file;
29587+}
29588+
29589+/*
29590+ * find another branch who is on the same filesystem of the specified
29591+ * branch{@btgt}. search until @bend.
29592+ */
29593+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
29594+ aufs_bindex_t bend)
29595+{
29596+ aufs_bindex_t bindex;
29597+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
29598+
29599+ for (bindex = 0; bindex < btgt; bindex++)
29600+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
29601+ return bindex;
29602+ for (bindex++; bindex <= bend; bindex++)
29603+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
29604+ return bindex;
29605+ return -1;
29606+}
29607+
29608+/* ---------------------------------------------------------------------- */
29609+
29610+/*
29611+ * initialize the xinofile for the specified branch @br
29612+ * at the place/path where @base_file indicates.
29613+ * test whether another branch is on the same filesystem or not,
29614+ * if @do_test is true.
29615+ */
29616+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
29617+ struct file *base_file, int do_test)
29618+{
29619+ int err;
29620+ ino_t ino;
29621+ aufs_bindex_t bend, bindex;
29622+ struct au_branch *shared_br, *b;
29623+ struct file *file;
29624+ struct super_block *tgt_sb;
29625+
29626+ shared_br = NULL;
29627+ bend = au_sbend(sb);
29628+ if (do_test) {
29629+ tgt_sb = br->br_mnt->mnt_sb;
29630+ for (bindex = 0; bindex <= bend; bindex++) {
29631+ b = au_sbr(sb, bindex);
29632+ if (tgt_sb == b->br_mnt->mnt_sb) {
29633+ shared_br = b;
29634+ break;
29635+ }
29636+ }
29637+ }
29638+
29639+ if (!shared_br || !shared_br->br_xino.xi_file) {
29640+ struct au_xino_lock_dir ldir;
29641+
29642+ au_xino_lock_dir(sb, base_file, &ldir);
29643+ /* mnt_want_write() is unnecessary here */
29644+ file = au_xino_create2(base_file, NULL);
29645+ au_xino_unlock_dir(&ldir);
29646+ err = PTR_ERR(file);
29647+ if (IS_ERR(file))
29648+ goto out;
29649+ br->br_xino.xi_file = file;
29650+ } else {
29651+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
29652+ get_file(br->br_xino.xi_file);
29653+ }
29654+
29655+ ino = AUFS_ROOT_INO;
29656+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
29657+ h_ino, ino);
b752ccd1
AM
29658+ if (unlikely(err)) {
29659+ fput(br->br_xino.xi_file);
29660+ br->br_xino.xi_file = NULL;
29661+ }
1facf9fc 29662+
4f0767ce 29663+out:
1facf9fc 29664+ return err;
29665+}
29666+
29667+/* ---------------------------------------------------------------------- */
29668+
29669+/* trucate a xino bitmap file */
29670+
29671+/* todo: slow */
29672+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
29673+{
29674+ int err, bit;
29675+ ssize_t sz;
29676+ unsigned long pindex;
29677+ loff_t pos, pend;
29678+ struct au_sbinfo *sbinfo;
29679+ au_readf_t func;
29680+ ino_t *ino;
29681+ unsigned long *p;
29682+
29683+ err = 0;
29684+ sbinfo = au_sbi(sb);
dece6358 29685+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 29686+ p = sbinfo->si_xib_buf;
29687+ func = sbinfo->si_xread;
29688+ pend = i_size_read(file->f_dentry->d_inode);
29689+ pos = 0;
29690+ while (pos < pend) {
29691+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
29692+ err = sz;
29693+ if (unlikely(sz <= 0))
29694+ goto out;
29695+
29696+ err = 0;
29697+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
29698+ if (unlikely(*ino < AUFS_FIRST_INO))
29699+ continue;
29700+
29701+ xib_calc_bit(*ino, &pindex, &bit);
29702+ AuDebugOn(page_bits <= bit);
29703+ err = xib_pindex(sb, pindex);
29704+ if (!err)
29705+ set_bit(bit, p);
29706+ else
29707+ goto out;
29708+ }
29709+ }
29710+
4f0767ce 29711+out:
1facf9fc 29712+ return err;
29713+}
29714+
29715+static int xib_restore(struct super_block *sb)
29716+{
29717+ int err;
29718+ aufs_bindex_t bindex, bend;
29719+ void *page;
29720+
29721+ err = -ENOMEM;
29722+ page = (void *)__get_free_page(GFP_NOFS);
29723+ if (unlikely(!page))
29724+ goto out;
29725+
29726+ err = 0;
29727+ bend = au_sbend(sb);
29728+ for (bindex = 0; !err && bindex <= bend; bindex++)
29729+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
29730+ err = do_xib_restore
29731+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
29732+ else
29733+ AuDbg("b%d\n", bindex);
29734+ free_page((unsigned long)page);
29735+
4f0767ce 29736+out:
1facf9fc 29737+ return err;
29738+}
29739+
29740+int au_xib_trunc(struct super_block *sb)
29741+{
29742+ int err;
29743+ ssize_t sz;
29744+ loff_t pos;
29745+ struct au_xino_lock_dir ldir;
29746+ struct au_sbinfo *sbinfo;
29747+ unsigned long *p;
29748+ struct file *file;
29749+
dece6358
AM
29750+ SiMustWriteLock(sb);
29751+
1facf9fc 29752+ err = 0;
29753+ sbinfo = au_sbi(sb);
29754+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
29755+ goto out;
29756+
29757+ file = sbinfo->si_xib;
29758+ if (i_size_read(file->f_dentry->d_inode) <= PAGE_SIZE)
29759+ goto out;
29760+
29761+ au_xino_lock_dir(sb, file, &ldir);
29762+ /* mnt_want_write() is unnecessary here */
29763+ file = au_xino_create2(sbinfo->si_xib, NULL);
29764+ au_xino_unlock_dir(&ldir);
29765+ err = PTR_ERR(file);
29766+ if (IS_ERR(file))
29767+ goto out;
29768+ fput(sbinfo->si_xib);
29769+ sbinfo->si_xib = file;
29770+
29771+ p = sbinfo->si_xib_buf;
29772+ memset(p, 0, PAGE_SIZE);
29773+ pos = 0;
29774+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
29775+ if (unlikely(sz != PAGE_SIZE)) {
29776+ err = sz;
29777+ AuIOErr("err %d\n", err);
29778+ if (sz >= 0)
29779+ err = -EIO;
29780+ goto out;
29781+ }
29782+
29783+ mutex_lock(&sbinfo->si_xib_mtx);
29784+ /* mnt_want_write() is unnecessary here */
29785+ err = xib_restore(sb);
29786+ mutex_unlock(&sbinfo->si_xib_mtx);
29787+
29788+out:
29789+ return err;
29790+}
29791+
29792+/* ---------------------------------------------------------------------- */
29793+
29794+/*
29795+ * xino mount option handlers
29796+ */
29797+static au_readf_t find_readf(struct file *h_file)
29798+{
29799+ const struct file_operations *fop = h_file->f_op;
29800+
29801+ if (fop) {
29802+ if (fop->read)
29803+ return fop->read;
29804+ if (fop->aio_read)
29805+ return do_sync_read;
29806+ }
29807+ return ERR_PTR(-ENOSYS);
29808+}
29809+
29810+static au_writef_t find_writef(struct file *h_file)
29811+{
29812+ const struct file_operations *fop = h_file->f_op;
29813+
29814+ if (fop) {
29815+ if (fop->write)
29816+ return fop->write;
29817+ if (fop->aio_write)
29818+ return do_sync_write;
29819+ }
29820+ return ERR_PTR(-ENOSYS);
29821+}
29822+
29823+/* xino bitmap */
29824+static void xino_clear_xib(struct super_block *sb)
29825+{
29826+ struct au_sbinfo *sbinfo;
29827+
dece6358
AM
29828+ SiMustWriteLock(sb);
29829+
1facf9fc 29830+ sbinfo = au_sbi(sb);
29831+ sbinfo->si_xread = NULL;
29832+ sbinfo->si_xwrite = NULL;
29833+ if (sbinfo->si_xib)
29834+ fput(sbinfo->si_xib);
29835+ sbinfo->si_xib = NULL;
29836+ free_page((unsigned long)sbinfo->si_xib_buf);
29837+ sbinfo->si_xib_buf = NULL;
29838+}
29839+
29840+static int au_xino_set_xib(struct super_block *sb, struct file *base)
29841+{
29842+ int err;
29843+ loff_t pos;
29844+ struct au_sbinfo *sbinfo;
29845+ struct file *file;
29846+
dece6358
AM
29847+ SiMustWriteLock(sb);
29848+
1facf9fc 29849+ sbinfo = au_sbi(sb);
29850+ file = au_xino_create2(base, sbinfo->si_xib);
29851+ err = PTR_ERR(file);
29852+ if (IS_ERR(file))
29853+ goto out;
29854+ if (sbinfo->si_xib)
29855+ fput(sbinfo->si_xib);
29856+ sbinfo->si_xib = file;
29857+ sbinfo->si_xread = find_readf(file);
29858+ sbinfo->si_xwrite = find_writef(file);
29859+
29860+ err = -ENOMEM;
29861+ if (!sbinfo->si_xib_buf)
29862+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
29863+ if (unlikely(!sbinfo->si_xib_buf))
29864+ goto out_unset;
29865+
29866+ sbinfo->si_xib_last_pindex = 0;
29867+ sbinfo->si_xib_next_bit = 0;
29868+ if (i_size_read(file->f_dentry->d_inode) < PAGE_SIZE) {
29869+ pos = 0;
29870+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
29871+ PAGE_SIZE, &pos);
29872+ if (unlikely(err != PAGE_SIZE))
29873+ goto out_free;
29874+ }
29875+ err = 0;
29876+ goto out; /* success */
29877+
4f0767ce 29878+out_free:
1facf9fc 29879+ free_page((unsigned long)sbinfo->si_xib_buf);
b752ccd1
AM
29880+ sbinfo->si_xib_buf = NULL;
29881+ if (err >= 0)
29882+ err = -EIO;
4f0767ce 29883+out_unset:
b752ccd1
AM
29884+ fput(sbinfo->si_xib);
29885+ sbinfo->si_xib = NULL;
29886+ sbinfo->si_xread = NULL;
29887+ sbinfo->si_xwrite = NULL;
4f0767ce 29888+out:
b752ccd1 29889+ return err;
1facf9fc 29890+}
29891+
b752ccd1
AM
29892+/* xino for each branch */
29893+static void xino_clear_br(struct super_block *sb)
29894+{
29895+ aufs_bindex_t bindex, bend;
29896+ struct au_branch *br;
1facf9fc 29897+
b752ccd1
AM
29898+ bend = au_sbend(sb);
29899+ for (bindex = 0; bindex <= bend; bindex++) {
29900+ br = au_sbr(sb, bindex);
29901+ if (!br || !br->br_xino.xi_file)
29902+ continue;
29903+
29904+ fput(br->br_xino.xi_file);
29905+ br->br_xino.xi_file = NULL;
29906+ }
29907+}
29908+
29909+static int au_xino_set_br(struct super_block *sb, struct file *base)
1facf9fc 29910+{
29911+ int err;
b752ccd1
AM
29912+ ino_t ino;
29913+ aufs_bindex_t bindex, bend, bshared;
29914+ struct {
29915+ struct file *old, *new;
29916+ } *fpair, *p;
29917+ struct au_branch *br;
29918+ struct inode *inode;
29919+ au_writef_t writef;
1facf9fc 29920+
b752ccd1
AM
29921+ SiMustWriteLock(sb);
29922+
29923+ err = -ENOMEM;
29924+ bend = au_sbend(sb);
29925+ fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
29926+ if (unlikely(!fpair))
1facf9fc 29927+ goto out;
29928+
b752ccd1
AM
29929+ inode = sb->s_root->d_inode;
29930+ ino = AUFS_ROOT_INO;
29931+ writef = au_sbi(sb)->si_xwrite;
29932+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
29933+ br = au_sbr(sb, bindex);
29934+ bshared = is_sb_shared(sb, bindex, bindex - 1);
29935+ if (bshared >= 0) {
29936+ /* shared xino */
29937+ *p = fpair[bshared];
29938+ get_file(p->new);
29939+ }
29940+
29941+ if (!p->new) {
29942+ /* new xino */
29943+ p->old = br->br_xino.xi_file;
29944+ p->new = au_xino_create2(base, br->br_xino.xi_file);
29945+ err = PTR_ERR(p->new);
29946+ if (IS_ERR(p->new)) {
29947+ p->new = NULL;
29948+ goto out_pair;
29949+ }
29950+ }
29951+
29952+ err = au_xino_do_write(writef, p->new,
29953+ au_h_iptr(inode, bindex)->i_ino, ino);
29954+ if (unlikely(err))
29955+ goto out_pair;
29956+ }
29957+
29958+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
29959+ br = au_sbr(sb, bindex);
29960+ if (br->br_xino.xi_file)
29961+ fput(br->br_xino.xi_file);
29962+ get_file(p->new);
29963+ br->br_xino.xi_file = p->new;
29964+ }
1facf9fc 29965+
4f0767ce 29966+out_pair:
b752ccd1
AM
29967+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
29968+ if (p->new)
29969+ fput(p->new);
29970+ else
29971+ break;
29972+ kfree(fpair);
4f0767ce 29973+out:
1facf9fc 29974+ return err;
29975+}
b752ccd1
AM
29976+
29977+void au_xino_clr(struct super_block *sb)
29978+{
29979+ struct au_sbinfo *sbinfo;
29980+
29981+ au_xigen_clr(sb);
29982+ xino_clear_xib(sb);
29983+ xino_clear_br(sb);
29984+ sbinfo = au_sbi(sb);
29985+ /* lvalue, do not call au_mntflags() */
29986+ au_opt_clr(sbinfo->si_mntflags, XINO);
29987+}
29988+
29989+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
29990+{
29991+ int err, skip;
29992+ struct dentry *parent, *cur_parent;
29993+ struct qstr *dname, *cur_name;
29994+ struct file *cur_xino;
29995+ struct inode *dir;
29996+ struct au_sbinfo *sbinfo;
29997+
29998+ SiMustWriteLock(sb);
29999+
30000+ err = 0;
30001+ sbinfo = au_sbi(sb);
30002+ parent = dget_parent(xino->file->f_dentry);
30003+ if (remount) {
30004+ skip = 0;
30005+ dname = &xino->file->f_dentry->d_name;
30006+ cur_xino = sbinfo->si_xib;
30007+ if (cur_xino) {
30008+ cur_parent = dget_parent(cur_xino->f_dentry);
30009+ cur_name = &cur_xino->f_dentry->d_name;
30010+ skip = (cur_parent == parent
30011+ && dname->len == cur_name->len
30012+ && !memcmp(dname->name, cur_name->name,
30013+ dname->len));
30014+ dput(cur_parent);
30015+ }
30016+ if (skip)
30017+ goto out;
30018+ }
30019+
30020+ au_opt_set(sbinfo->si_mntflags, XINO);
30021+ dir = parent->d_inode;
30022+ mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
30023+ /* mnt_want_write() is unnecessary here */
30024+ err = au_xino_set_xib(sb, xino->file);
30025+ if (!err)
30026+ err = au_xigen_set(sb, xino->file);
30027+ if (!err)
30028+ err = au_xino_set_br(sb, xino->file);
30029+ mutex_unlock(&dir->i_mutex);
30030+ if (!err)
30031+ goto out; /* success */
30032+
30033+ /* reset all */
30034+ AuIOErr("failed creating xino(%d).\n", err);
30035+
4f0767ce 30036+out:
b752ccd1
AM
30037+ dput(parent);
30038+ return err;
30039+}
30040+
30041+/* ---------------------------------------------------------------------- */
30042+
30043+/*
30044+ * create a xinofile at the default place/path.
30045+ */
30046+struct file *au_xino_def(struct super_block *sb)
30047+{
30048+ struct file *file;
30049+ char *page, *p;
30050+ struct au_branch *br;
30051+ struct super_block *h_sb;
30052+ struct path path;
30053+ aufs_bindex_t bend, bindex, bwr;
30054+
30055+ br = NULL;
30056+ bend = au_sbend(sb);
30057+ bwr = -1;
30058+ for (bindex = 0; bindex <= bend; bindex++) {
30059+ br = au_sbr(sb, bindex);
30060+ if (au_br_writable(br->br_perm)
30061+ && !au_test_fs_bad_xino(br->br_mnt->mnt_sb)) {
30062+ bwr = bindex;
30063+ break;
30064+ }
30065+ }
30066+
7f207e10
AM
30067+ if (bwr >= 0) {
30068+ file = ERR_PTR(-ENOMEM);
30069+ page = __getname_gfp(GFP_NOFS);
30070+ if (unlikely(!page))
30071+ goto out;
30072+ path.mnt = br->br_mnt;
30073+ path.dentry = au_h_dptr(sb->s_root, bwr);
30074+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
30075+ file = (void *)p;
30076+ if (!IS_ERR(p)) {
30077+ strcat(p, "/" AUFS_XINO_FNAME);
30078+ AuDbg("%s\n", p);
30079+ file = au_xino_create(sb, p, /*silent*/0);
30080+ if (!IS_ERR(file))
30081+ au_xino_brid_set(sb, br->br_id);
30082+ }
30083+ __putname(page);
30084+ } else {
30085+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
30086+ if (IS_ERR(file))
30087+ goto out;
30088+ h_sb = file->f_dentry->d_sb;
30089+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
30090+ pr_err("xino doesn't support %s(%s)\n",
30091+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
30092+ fput(file);
30093+ file = ERR_PTR(-EINVAL);
30094+ }
30095+ if (!IS_ERR(file))
30096+ au_xino_brid_set(sb, -1);
30097+ }
0c5527e5 30098+
7f207e10
AM
30099+out:
30100+ return file;
30101+}
30102+
30103+/* ---------------------------------------------------------------------- */
30104+
30105+int au_xino_path(struct seq_file *seq, struct file *file)
30106+{
30107+ int err;
30108+
30109+ err = au_seq_path(seq, &file->f_path);
30110+ if (unlikely(err < 0))
30111+ goto out;
30112+
30113+ err = 0;
30114+#define Deleted "\\040(deleted)"
30115+ seq->count -= sizeof(Deleted) - 1;
30116+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
30117+ sizeof(Deleted) - 1));
30118+#undef Deleted
30119+
30120+out:
30121+ return err;
30122+}
30123diff -urN /usr/share/empty/include/linux/aufs_type.h linux/include/linux/aufs_type.h
30124--- /usr/share/empty/include/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
53392da6
AM
30125+++ linux/include/linux/aufs_type.h 2011-08-24 13:30:24.737979976 +0200
30126@@ -0,0 +1,211 @@
7f207e10 30127+/*
027c5e7a 30128+ * Copyright (C) 2005-2011 Junjiro R. Okajima
7f207e10
AM
30129+ *
30130+ * This program, aufs is free software; you can redistribute it and/or modify
30131+ * it under the terms of the GNU General Public License as published by
30132+ * the Free Software Foundation; either version 2 of the License, or
30133+ * (at your option) any later version.
30134+ *
30135+ * This program is distributed in the hope that it will be useful,
30136+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30137+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30138+ * GNU General Public License for more details.
30139+ *
30140+ * You should have received a copy of the GNU General Public License
30141+ * along with this program; if not, write to the Free Software
30142+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30143+ */
30144+
30145+#ifndef __AUFS_TYPE_H__
30146+#define __AUFS_TYPE_H__
30147+
30148+#include <linux/ioctl.h>
30149+#include <linux/kernel.h>
30150+#include <linux/limits.h>
30151+#include <linux/types.h>
30152+
53392da6 30153+#define AUFS_VERSION "3.0-20110822"
7f207e10
AM
30154+
30155+/* todo? move this to linux-2.6.19/include/magic.h */
30156+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
30157+
30158+/* ---------------------------------------------------------------------- */
30159+
30160+#ifdef CONFIG_AUFS_BRANCH_MAX_127
30161+typedef __s8 aufs_bindex_t;
30162+#define AUFS_BRANCH_MAX 127
30163+#else
30164+typedef __s16 aufs_bindex_t;
30165+#ifdef CONFIG_AUFS_BRANCH_MAX_511
30166+#define AUFS_BRANCH_MAX 511
30167+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
30168+#define AUFS_BRANCH_MAX 1023
30169+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
30170+#define AUFS_BRANCH_MAX 32767
30171+#endif
30172+#endif
30173+
30174+#ifdef __KERNEL__
30175+#ifndef AUFS_BRANCH_MAX
30176+#error unknown CONFIG_AUFS_BRANCH_MAX value
30177+#endif
30178+#endif /* __KERNEL__ */
30179+
30180+/* ---------------------------------------------------------------------- */
30181+
30182+#define AUFS_NAME "aufs"
30183+#define AUFS_FSTYPE AUFS_NAME
30184+
30185+#define AUFS_ROOT_INO 2
30186+#define AUFS_FIRST_INO 11
30187+
30188+#define AUFS_WH_PFX ".wh."
30189+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
30190+#define AUFS_WH_TMP_LEN 4
30191+/* a limit for rmdir/rename a dir */
30192+#define AUFS_MAX_NAMELEN (NAME_MAX \
30193+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
30194+ - 1 /* dot */\
30195+ - AUFS_WH_TMP_LEN) /* hex */
30196+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
30197+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
30198+#define AUFS_XINO_TRUNC_INIT 64 /* blocks */
30199+#define AUFS_XINO_TRUNC_STEP 4 /* blocks */
30200+#define AUFS_DIRWH_DEF 3
30201+#define AUFS_RDCACHE_DEF 10 /* seconds */
027c5e7a 30202+#define AUFS_RDCACHE_MAX 3600 /* seconds */
7f207e10
AM
30203+#define AUFS_RDBLK_DEF 512 /* bytes */
30204+#define AUFS_RDHASH_DEF 32
30205+#define AUFS_WKQ_NAME AUFS_NAME "d"
30206+#define AUFS_WKQ_PRE_NAME AUFS_WKQ_NAME "_pre"
027c5e7a
AM
30207+#define AUFS_MFS_DEF_SEC 30 /* seconds */
30208+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
7f207e10
AM
30209+#define AUFS_PLINK_WARN 100 /* number of plinks */
30210+
30211+/* pseudo-link maintenace under /proc */
30212+#define AUFS_PLINK_MAINT_NAME "plink_maint"
30213+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
30214+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
30215+
30216+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
30217+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
30218+
30219+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
30220+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
30221+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
30222+
30223+/* doubly whiteouted */
30224+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
30225+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
30226+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
30227+
30228+/* branch permission */
30229+#define AUFS_BRPERM_RW "rw"
30230+#define AUFS_BRPERM_RO "ro"
30231+#define AUFS_BRPERM_RR "rr"
30232+#define AUFS_BRPERM_WH "wh"
30233+#define AUFS_BRPERM_NLWH "nolwh"
30234+#define AUFS_BRPERM_ROWH AUFS_BRPERM_RO "+" AUFS_BRPERM_WH
30235+#define AUFS_BRPERM_RRWH AUFS_BRPERM_RR "+" AUFS_BRPERM_WH
30236+#define AUFS_BRPERM_RWNLWH AUFS_BRPERM_RW "+" AUFS_BRPERM_NLWH
30237+
30238+/* ---------------------------------------------------------------------- */
30239+
30240+/* ioctl */
30241+enum {
30242+ /* readdir in userspace */
30243+ AuCtl_RDU,
30244+ AuCtl_RDU_INO,
30245+
30246+ /* pathconf wrapper */
027c5e7a
AM
30247+ AuCtl_WBR_FD,
30248+
30249+ /* busy inode */
30250+ AuCtl_IBUSY
7f207e10
AM
30251+};
30252+
30253+/* borrowed from linux/include/linux/kernel.h */
30254+#ifndef ALIGN
30255+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
30256+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
30257+#endif
30258+
30259+/* borrowed from linux/include/linux/compiler-gcc3.h */
30260+#ifndef __aligned
30261+#define __aligned(x) __attribute__((aligned(x)))
53392da6
AM
30262+#endif
30263+
30264+#ifdef __KERNEL__
30265+#ifndef __packed
7f207e10
AM
30266+#define __packed __attribute__((packed))
30267+#endif
53392da6 30268+#endif
7f207e10
AM
30269+
30270+struct au_rdu_cookie {
30271+ __u64 h_pos;
30272+ __s16 bindex;
30273+ __u8 flags;
30274+ __u8 pad;
30275+ __u32 generation;
30276+} __aligned(8);
30277+
30278+struct au_rdu_ent {
30279+ __u64 ino;
30280+ __s16 bindex;
30281+ __u8 type;
30282+ __u8 nlen;
30283+ __u8 wh;
30284+ char name[0];
30285+} __aligned(8);
30286+
30287+static inline int au_rdu_len(int nlen)
30288+{
30289+ /* include the terminating NULL */
30290+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
30291+ sizeof(__u64));
30292+}
30293+
30294+union au_rdu_ent_ul {
30295+ struct au_rdu_ent __user *e;
30296+ __u64 ul;
30297+};
30298+
30299+enum {
30300+ AufsCtlRduV_SZ,
30301+ AufsCtlRduV_End
30302+};
30303+
30304+struct aufs_rdu {
30305+ /* input */
30306+ union {
30307+ __u64 sz; /* AuCtl_RDU */
30308+ __u64 nent; /* AuCtl_RDU_INO */
30309+ };
30310+ union au_rdu_ent_ul ent;
30311+ __u16 verify[AufsCtlRduV_End];
30312+
30313+ /* input/output */
30314+ __u32 blk;
30315+
30316+ /* output */
30317+ union au_rdu_ent_ul tail;
30318+ /* number of entries which were added in a single call */
30319+ __u64 rent;
30320+ __u8 full;
30321+ __u8 shwh;
30322+
30323+ struct au_rdu_cookie cookie;
30324+} __aligned(8);
30325+
027c5e7a
AM
30326+struct aufs_ibusy {
30327+ __u64 ino, h_ino;
30328+ __s16 bindex;
30329+} __aligned(8);
30330+
7f207e10
AM
30331+#define AuCtlType 'A'
30332+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
30333+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
30334+#define AUFS_CTL_WBR_FD _IO(AuCtlType, AuCtl_WBR_FD)
027c5e7a 30335+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
7f207e10
AM
30336+
30337+#endif /* __AUFS_TYPE_H__ */
87a755f4 30338
This page took 4.594248 seconds and 4 git commands to generate.