]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs2.patch
up to 2.6.32.63, use .xz patch
[packages/kernel.git] / kernel-aufs2.patch
CommitLineData
1308ab2a 1diff -uprN -x .git linux-2.6.31/Documentation/ABI/testing/debugfs-aufs aufs2-2.6.git/Documentation/ABI/testing/debugfs-aufs
2--- linux-2.6.31/Documentation/ABI/testing/debugfs-aufs 1970-01-01 00:00:00.000000000 +0000
3+++ aufs2-2.6.git/Documentation/ABI/testing/debugfs-aufs 2009-09-14 14:52:35.032396516 +0000
1facf9fc 4@@ -0,0 +1,40 @@
5+What: /debug/aufs/si_<id>/
6+Date: March 2009
7+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
8+Description:
9+ Under /debug/aufs, a directory named si_<id> is created
10+ per aufs mount, where <id> is a unique id generated
11+ internally.
12+
13+What: /debug/aufs/si_<id>/xib
14+Date: March 2009
15+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
16+Description:
17+ It shows the consumed blocks by xib (External Inode Number
18+ Bitmap), its block size and file size.
19+ When the aufs mount option 'noxino' is specified, it
20+ will be empty. About XINO files, see
21+ Documentation/filesystems/aufs/aufs.5 in detail.
22+
23+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
24+Date: March 2009
25+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
26+Description:
27+ It shows the consumed blocks by xino (External Inode Number
28+ Translation Table), its link count, block size and file
29+ size.
30+ When the aufs mount option 'noxino' is specified, it
31+ will be empty. About XINO files, see
32+ Documentation/filesystems/aufs/aufs.5 in detail.
33+
34+What: /debug/aufs/si_<id>/xigen
35+Date: March 2009
36+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
37+Description:
38+ It shows the consumed blocks by xigen (External Inode
39+ Generation Table), its block size and file size.
40+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
41+ be created.
42+ When the aufs mount option 'noxino' is specified, it
43+ will be empty. About XINO files, see
44+ Documentation/filesystems/aufs/aufs.5 in detail.
1308ab2a 45diff -uprN -x .git linux-2.6.31/Documentation/ABI/testing/sysfs-aufs aufs2-2.6.git/Documentation/ABI/testing/sysfs-aufs
46--- linux-2.6.31/Documentation/ABI/testing/sysfs-aufs 1970-01-01 00:00:00.000000000 +0000
47+++ aufs2-2.6.git/Documentation/ABI/testing/sysfs-aufs 2009-09-14 14:52:35.032396516 +0000
1facf9fc 48@@ -0,0 +1,25 @@
49+What: /sys/fs/aufs/si_<id>/
50+Date: March 2009
51+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
52+Description:
53+ Under /sys/fs/aufs, a directory named si_<id> is created
54+ per aufs mount, where <id> is a unique id generated
55+ internally.
56+
57+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
58+Date: March 2009
59+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
60+Description:
61+ It shows the abolute path of a member directory (which
62+ is called branch) in aufs, and its permission.
63+
64+What: /sys/fs/aufs/si_<id>/xi_path
65+Date: March 2009
66+Contact: J. R. Okajima <hooanon05@yahoo.co.jp>
67+Description:
68+ It shows the abolute path of XINO (External Inode Number
69+ Bitmap, Translation Table and Generation Table) file
70+ even if it is the default path.
71+ When the aufs mount option 'noxino' is specified, it
72+ will be empty. About XINO files, see
73+ Documentation/filesystems/aufs/aufs.5 in detail.
1308ab2a 74diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/README aufs2-2.6.git/Documentation/filesystems/aufs/README
75--- linux-2.6.31/Documentation/filesystems/aufs/README 1970-01-01 00:00:00.000000000 +0000
76+++ aufs2-2.6.git/Documentation/filesystems/aufs/README 2009-09-21 21:48:58.761610020 +0000
77@@ -0,0 +1,342 @@
78+
79+Aufs2 -- advanced multi layered unification filesystem version 2
80+http://aufs.sf.net
81+Junjiro R. Okajima
82+
83+
84+0. Introduction
85+----------------------------------------
86+In the early days, aufs was entirely re-designed and re-implemented
87+Unionfs Version 1.x series. After many original ideas, approaches,
88+improvements and implementations, it becomes totally different from
89+Unionfs while keeping the basic features.
90+Recently, Unionfs Version 2.x series begin taking some of the same
91+approaches to aufs1's.
92+Unionfs is being developed by Professor Erez Zadok at Stony Brook
93+University and his team.
94+
95+This version of AUFS, aufs2 has several purposes.
96+- to be reviewed easily and widely.
97+- to make the source files simpler and smaller by dropping several
98+ original features.
99+
100+Through this work, I found some bad things in aufs1 source code and
101+fixed them. Some of the dropped features will be reverted in the future,
102+but not all I'm afraid.
103+Aufs2 supports linux-2.6.27 and later. If you want older kernel version
104+support, try aufs1 from CVS on SourceForge.
105+
106+Note: it becomes clear that "Aufs was rejected. Let's give it up."
107+According to Christoph Hellwig, linux rejects all union-type filesystems
108+but UnionMount.
109+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
110+
111+
112+1. Features
113+----------------------------------------
114+- unite several directories into a single virtual filesystem. The member
115+ directory is called as a branch.
116+- you can specify the permission flags to the branch, which are 'readonly',
117+ 'readwrite' and 'whiteout-able.'
118+- by upper writable branch, internal copyup and whiteout, files/dirs on
119+ readonly branch are modifiable logically.
120+- dynamic branch manipulation, add, del.
121+- etc...
122+
123+Also there are many enhancements in aufs1, such as:
124+- readdir(3) in userspace.
125+- keep inode number by external inode number table
126+- keep the timestamps of file/dir in internal copyup operation
127+- seekable directory, supporting NFS readdir.
128+- support mmap(2) including /proc/PID/exe symlink, without page-copy
129+- whiteout is hardlinked in order to reduce the consumption of inodes
130+ on branch
131+- do not copyup, nor create a whiteout when it is unnecessary
132+- revert a single systemcall when an error occurs in aufs
133+- remount interface instead of ioctl
134+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
135+- loopback mounted filesystem as a branch
136+- kernel thread for removing the dir who has a plenty of whiteouts
137+- support copyup sparse file (a file which has a 'hole' in it)
138+- default permission flags for branches
139+- selectable permission flags for ro branch, whether whiteout can
140+ exist or not
141+- export via NFS.
142+- support <sysfs>/fs/aufs and <debugfs>/aufs.
143+- support multiple writable branches, some policies to select one
144+ among multiple writable branches.
145+- a new semantics for link(2) and rename(2) to support multiple
146+ writable branches.
147+- no glibc changes are required.
148+- pseudo hardlink (hardlink over branches)
149+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
150+ including NFS or remote filesystem branch.
151+- and more...
152+
153+Currently these features are dropped temporary from this version, aufs2.
154+See design/08plan.txt in detail.
155+- test only the highest one for the directory permission (dirperm1)
156+- show whiteout mode (shwh)
157+- copyup on open (coo=)
158+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
159+ (robr)
160+- statistics of aufs thread (/sys/fs/aufs/stat)
161+- delegation mode (dlgt)
162+ a delegation of the internal branch access to support task I/O
163+ accounting, which also supports Linux Security Modules (LSM) mainly
164+ for Suse AppArmor.
165+- intent.open/create (file open in a single lookup)
166+
167+Features or just an idea in the future (see also design/*.txt),
168+- reorder the branch index without del/re-add.
169+- permanent xino files for NFSD
170+- an option for refreshing the opened files after add/del branches
171+- 'move' policy for copy-up between two writable branches, after
172+ checking free space.
173+- O_DIRECT
174+- light version, without branch manipulation. (unnecessary?)
175+- copyup in userspace
176+- inotify in userspace
177+- readv/writev
178+- xattr, acl
179+
180+
181+2. Download
182+----------------------------------------
183+Kindly one of aufs user, the Center for Scientific Computing and Free
184+Software (C3SL), Federal University of Parana offered me a public GIT
185+tree space.
186+
187+There are three GIT trees, aufs2-2.6, aufs2-standalone and aufs2-util.
188+While the aufs2-util is always necessary, you need either of aufs2-2.6
189+or aufs2-standalone.
190+
191+The aufs2-2.6 tree includes the whole linux-2.6 GIT tree,
192+git://git.kernel.org/.../torvalds/linux-2.6.git.
193+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
194+build aufs2 as an externel kernel module.
195+If you already have linux-2.6 GIT tree, you may want to pull and merge
196+the "aufs2" branch from this tree.
197+
198+On the other hand, the aufs2-standalone tree has only aufs2 source files
199+and a necessary patch, and you can select CONFIG_AUFS_FS=m. In other
200+words, the aufs2-standalone tree is generated from aufs2-2.6 tree by,
201+- extract new files and modifications.
202+- generate some patch files from modifications.
203+- generate a ChangeLog file from git-log.
204+- commit the files newly and no log messages. this is not git-pull.
205+
206+Both of aufs2-2.6 and aufs2-standalone trees have a branch whose name is
207+in form of "aufs2-xx" where "xx" represents the linux kernel version,
208+"linux-2.6.xx".
209+
210+o aufs2-2.6 tree
211+$ git clone --reference /your/linux-2.6/git/tree \
212+ http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git \
213+ aufs2-2.6.git
214+- if you don't have linux-2.6 GIT tree, then remove "--reference ..."
215+$ cd aufs2-2.6.git
216+$ git checkout origin/aufs2-xx # for instance, aufs2-27 for linux-2.6.27
217+ # aufs2 (no -xx) for the latest -rc version.
218+
219+o aufs2-standalone tree
220+$ git clone http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-standalone.git \
221+ aufs2-standalone.git
222+$ cd aufs2-standalone.git
223+$ git checkout origin/aufs2-xx # for instance, aufs2-27 for linux-2.6.27
224+ # aufs2 (no -xx) for the latest -rc version.
225+
226+o aufs2-util tree
227+$ git clone http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-util.git \
228+ aufs2-util.git
229+$ cd aufs2-util.git
230+- no particular tag/branch currently.
231+
232+o for advanced users
233+$ git clone git://git.kernel.org/.../torvalds/linux-2.6.git linux-2.6.git
234+ It will take very long time.
235+
236+$ cd linux-2.6.git
237+$ git remote add aufs2 http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git
238+$ git checkout -b aufs2-27 v2.6.27
239+$ git pull aufs2 aufs2-27
240+ It may take long time again.
241+ Once pulling completes, you've got linux-2.6.27 and aufs2 for it in a
242+ branch named aufs2-27, and you can configure and build it.
243+
244+Or
245+
246+$ git checkout -t -b aufs2 master
247+$ git pull aufs2 aufs2
248+ then you've got the latest linux kernel and the latest aufs2 in a
249+ branch named aufs2, and you can configure and build it.
250+ But aufs is released once a week, so you may meet a compilation error
251+ due to mismatching between the mainline and aufs2.
252+
253+Or you may want build linux-2.6.xx.yy instead of linux-2.6.xx, then here
254+is an approach using linux-2.6-stable GIT tree.
255+
256+$ cd linux-2.6.git/..
257+$ git clone -q --reference ./linux-2.6.git git://git.kernel.org/.../linux-2.6-stable.git \
258+ linux-2.6-stable.git
259+ It will take very long time.
260+
261+$ cd linux-2.6-stable.git
262+$ git remote add aufs2 http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git
263+$ git checkout -b aufs2-27.1 v2.6.27.1
264+$ git pull aufs2 aufs2-27
265+ then you've got linux-2.6.27.1 and aufs2 for 2.6.27 in a branch named
266+ aufs2-27.1, and you can configure and build it.
267+ But the changes made by v2.6.xx.yy may conflict with aufs2-xx, since
268+ aufs2-xx is for v2.6.xx only. In this case, you may find some patchces
269+ for v2.6.xx.yy in aufs2-standalone.git#aufs2-xx branch if someone else
270+ have ever requested me to support v2.6.xx.yy and I did it.
271+
272+You can also check what was changed by pulling aufs2.
273+$ git diff v2.6.27.1..aufs2-27.1
274+
275+If you want to check the changed files other than fs/aufs, then try this.
276+$ git diff v2.6.27.1..aufs2-27.1 |
277+> awk '
278+> /^diff / {new=1}
279+> /^diff.*aufs/ {new=0}
280+> new {print}
281+> '
282+
283+
284+3. Configuration and Compilation
285+----------------------------------------
286+For aufs2-2.6 tree,
287+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS.
288+- set other aufs configurations if necessary.
289+
290+For aufs2-standalone tree,
291+There are several ways to build.
292+
293+You may feel why aufs2-standalone.patch needs to export so many kernel
294+symbols. Because you selected aufs2-standalone tree instead of aufs2-2.6
295+tree. The number of necessary symbols to export essentially is zero.
296+All other symbols are for the external module.
297+If you don't like aufs2-standalone.patch, then try aufs2-2.6 tree.
298+
299+1.
300+- apply ./aufs2-kbuild.patch to your kernel source files.
301+- apply ./aufs2-base.patch too.
302+- apply ./aufs2-standalone.patch too, if you have a plan to set
303+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs2-standalone.patch.
304+- copy ./{Documentation,fs,include} files to your kernel source tree.
305+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS, you can select either
306+ =m or =y.
307+- and build your kernel as usual.
308+- install it and reboot your system.
309+
310+2.
311+- module only (CONFIG_AUFS_FS=m).
312+- apply ./aufs2-base.patch to your kernel source files.
313+- apply ./aufs2-standalone.patch too.
314+- build your kernel and reboot.
315+- edit ./config.mk and set other aufs configurations if necessary.
316+ Note: You should read ./fs/aufs/Kconfig carefully which describes
317+ every aufs configurations.
318+- build the module by simple "make".
319+- you can specify ${KDIR} make variable which points to your kernel
320+ source tree.
321+- copy the build ./aufs.ko to /lib/modules/..., and run depmod -a (or
322+ reboot simply).
323+- no need to apply aufs2-kbuild.patch, nor copying source files to your
324+ kernel source tree.
325+
326+And then,
327+- read README in aufs2-util, build and install it
328+- if you want to use readdir(3) in userspace, then run
329+ "make install_ulib" too. And refer to the aufs manual in detail.
330+
331+
332+4. Usage
333+----------------------------------------
334+At first, make sure aufs2-util are installed, and please read the aufs
335+manual, aufs.5 in aufs2-util.git tree.
336+$ man -l aufs.5
337+
338+And then,
339+$ mkdir /tmp/rw /tmp/aufs
340+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
341+
342+Here is another example. The result is equivalent.
343+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
344+ Or
345+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
346+# mount -o remount,append:${HOME} /tmp/aufs
347+
348+Then, you can see whole tree of your home dir through /tmp/aufs. If
349+you modify a file under /tmp/aufs, the one on your home directory is
350+not affected, instead the same named file will be newly created under
351+/tmp/rw. And all of your modification to a file will be applied to
352+the one under /tmp/rw. This is called the file based Copy on Write
353+(COW) method.
354+Aufs mount options are described in aufs.5.
355+
356+Additionally, there are some sample usages of aufs which are a
357+diskless system with network booting, and LiveCD over NFS.
358+See sample dir in CVS tree on SourceForge.
359+
360+
361+5. Contact
362+----------------------------------------
363+When you have any problems or strange behaviour in aufs, please let me
364+know with:
365+- /proc/mounts (instead of the output of mount(8))
366+- /sys/module/aufs/*
367+- /sys/fs/aufs/* (if you have them)
368+- /debug/aufs/* (if you have them)
369+- linux kernel version
370+ if your kernel is not plain, for example modified by distributor,
371+ the url where i can download its source is necessary too.
372+- aufs version which was printed at loading the module or booting the
373+ system, instead of the date you downloaded.
374+- configuration (define/undefine CONFIG_AUFS_xxx)
375+- kernel configuration or /proc/config.gz (if you have it)
376+- behaviour which you think to be incorrect
377+- actual operation, reproducible one is better
378+- mailto: aufs-users at lists.sourceforge.net
379+
380+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
381+and Feature Requests) on SourceForge. Please join and write to
382+aufs-users ML.
383+
384+
385+6. Acknowledgements
386+----------------------------------------
387+Thanks to everyone who have tried and are using aufs, whoever
388+have reported a bug or any feedback.
389+
390+Especially donors:
391+Tomas Matejicek(slax.org) made a donation (much more than once).
392+Dai Itasaka made a donation (2007/8).
393+Chuck Smith made a donation (2008/4, 10 and 12).
394+Henk Schoneveld made a donation (2008/9).
395+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
396+Francois Dupoux made a donation (2008/11).
397+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
398+aufs2 GIT tree (2009/2).
399+William Grant made a donation (2009/3).
400+Patrick Lane made a donation (2009/4).
401+The Mail Archive (mail-archive.com) made donations (2009/5).
402+Nippy Networks (Ed Wildgoose) a donation (2009/7).
403+
404+Thank you very much.
405+Donations are always, including future donations, very important and
406+helpful for me to keep on developing aufs.
407+
408+
409+7.
410+----------------------------------------
411+If you are an experienced user, no explanation is needed. Aufs is
412+just a linux filesystem.
413+
414+
415+Enjoy!
416+
417+# Local variables: ;
418+# mode: text;
419+# End: ;
420diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/01intro.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/01intro.txt
421--- linux-2.6.31/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 00:00:00.000000000 +0000
422+++ aufs2-2.6.git/Documentation/filesystems/aufs/design/01intro.txt 2009-09-21 21:48:58.761610020 +0000
423@@ -0,0 +1,137 @@
424+
425+# Copyright (C) 2005-2009 Junjiro R. Okajima
426+#
427+# This program is free software; you can redistribute it and/or modify
428+# it under the terms of the GNU General Public License as published by
429+# the Free Software Foundation; either version 2 of the License, or
430+# (at your option) any later version.
431+#
432+# This program is distributed in the hope that it will be useful,
433+# but WITHOUT ANY WARRANTY; without even the implied warranty of
434+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
435+# GNU General Public License for more details.
436+#
437+# You should have received a copy of the GNU General Public License
438+# along with this program; if not, write to the Free Software
439+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
440+
441+Introduction
442+----------------------------------------
443+
444+aufs [ei ju: ef es] | [a u f s]
445+1. abbrev. for "advanced multi-layered unification filesystem".
446+2. abbrev. for "another unionfs".
447+3. abbrev. for "auf das" in German which means "on the" in English.
448+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
449+ But "Filesystem aufs Filesystem" is hard to understand.
450+
451+AUFS is a filesystem with features:
452+- multi layered stackable unification filesystem, the member directory
453+ is called as a branch.
454+- branch permission and attribute, 'readonly', 'real-readonly',
455+ 'readwrite', 'whiteout-able', 'link-able whiteout' and their
456+ combination.
457+- internal "file copy-on-write".
458+- logical deletion, whiteout.
459+- dynamic branch manipulation, adding, deleting and changing permission.
460+- allow bypassing aufs, user's direct branch access.
461+- external inode number translation table and bitmap which maintains the
462+ persistent aufs inode number.
463+- seekable directory, including NFS readdir.
464+- file mapping, mmap and sharing pages.
465+- pseudo-link, hardlink over branches.
466+- loopback mounted filesystem as a branch.
467+- several policies to select one among multiple writable branches.
468+- revert a single systemcall when an error occurs in aufs.
469+- and more...
470+
471+
472+Multi Layered Stackable Unification Filesystem
473+----------------------------------------------------------------------
474+Most people already knows what it is.
475+It is a filesystem which unifies several directories and provides a
476+merged single directory. When users access a file, the access will be
477+passed/re-directed/converted (sorry, I am not sure which English word is
478+correct) to the real file on the member filesystem. The member
479+filesystem is called 'lower filesystem' or 'branch' and has a mode
480+'readonly' and 'readwrite.' And the deletion for a file on the lower
481+readonly branch is handled by creating 'whiteout' on the upper writable
482+branch.
483+
484+On LKML, there have been discussions about UnionMount (Jan Blunck and
485+Bharata B Rao) and Unionfs (Erez Zadok). They took different approaches
486+to implement the merged-view.
487+The former tries putting it into VFS, and the latter implements as a
488+separate filesystem.
489+(If I misunderstand about these implementations, please let me know and
490+I shall correct it. Because it is a long time ago when I read their
491+source files last time).
492+UnionMount's approach will be able to small, but may be hard to share
493+branches between several UnionMount since the whiteout in it is
494+implemented in the inode on branch filesystem and always
495+shared. According to Bharata's post, readdir does not seems to be
496+finished yet.
497+Unionfs has a longer history. When I started implementing a stacking filesystem
498+(Aug 2005), it already existed. It has virtual super_block, inode,
499+dentry and file objects and they have an array pointing lower same kind
500+objects. After contributing many patches for Unionfs, I re-started my
501+project AUFS (Jun 2006).
502+
503+In AUFS, the structure of filesystem resembles to Unionfs, but I
504+implemented my own ideas, approaches and enhancements and it became
505+totally different one.
506+
507+
508+Several characters/aspects of aufs
509+----------------------------------------------------------------------
510+
511+Aufs has several characters or aspects.
512+1. a filesystem, callee of VFS helper
513+2. sub-VFS, caller of VFS helper for branches
514+3. a virtual filesystem which maintains persistent inode number
515+4. reader/writer of files on branches such like an application
516+
517+1. Caller of VFS Helper
518+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
519+unlink(2) from an application reaches sys_unlink() kernel function and
520+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
521+calls filesystem specific unlink operation. Actually aufs implements the
522+unlink operation but it behaves like a redirector.
523+
524+2. Caller of VFS Helper for Branches
525+aufs_unlink() passes the unlink request to the branch filesystem as if
526+it were called from VFS. So the called unlink operation of the branch
527+filesystem acts as usual. As a caller of VFS helper, aufs should handle
528+every necessary pre/post operation for the branch filesystem.
529+- acquire the lock for the parent dir on a branch
530+- lookup in a branch
531+- revalidate dentry on a branch
532+- mnt_want_write() for a branch
533+- vfs_unlink() for a branch
534+- mnt_drop_write() for a branch
535+- release the lock on a branch
536+
537+3. Persistent Inode Number
538+One of the most important issue for a filesystem is to maintain inode
539+numbers. This is particularly important to support exporting a
540+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
541+backend block device for its own. But some storage is necessary to
542+maintain inode number. It may be a large space and may not suit to keep
543+in memory. Aufs rents some space from its first writable branch
544+filesystem (by default) and creates file(s) on it. These files are
545+created by aufs internally and removed soon (currently) keeping opened.
546+Note: Because these files are removed, they are totally gone after
547+ unmounting aufs. It means the inode numbers are not persistent
548+ across unmount or reboot. I have a plan to make them really
549+ persistent which will be important for aufs on NFS server.
550+
551+4. Read/Write Files Internally (copy-on-write)
552+Because a branch can be readonly, when you write a file on it, aufs will
553+"copy-up" it to the upper writable branch internally. And then write the
554+originally requested thing to the file. Generally kernel doesn't
555+open/read/write file actively. In aufs, even a single write may cause a
556+internal "file copy". This behaviour is very similar to cp(1) command.
557+
558+Some people may think it is better to pass such work to user space
559+helper, instead of doing in kernel space. Actually I am still thinking
560+about it. But currently I have implemented it in kernel space.
561diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/02struct.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/02struct.txt
562--- linux-2.6.31/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 00:00:00.000000000 +0000
563+++ aufs2-2.6.git/Documentation/filesystems/aufs/design/02struct.txt 2009-09-21 21:48:58.761610020 +0000
564@@ -0,0 +1,218 @@
565+
566+# Copyright (C) 2005-2009 Junjiro R. Okajima
567+#
568+# This program is free software; you can redistribute it and/or modify
569+# it under the terms of the GNU General Public License as published by
570+# the Free Software Foundation; either version 2 of the License, or
571+# (at your option) any later version.
572+#
573+# This program is distributed in the hope that it will be useful,
574+# but WITHOUT ANY WARRANTY; without even the implied warranty of
575+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
576+# GNU General Public License for more details.
577+#
578+# You should have received a copy of the GNU General Public License
579+# along with this program; if not, write to the Free Software
580+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
581+
582+Basic Aufs Internal Structure
583+
584+Superblock/Inode/Dentry/File Objects
585+----------------------------------------------------------------------
586+As like an ordinary filesystem, aufs has its own
587+superblock/inode/dentry/file objects. All these objects have a
588+dynamically allocated array and store the same kind of pointers to the
589+lower filesystem, branch.
590+For example, when you build a union with one readwrite branch and one
591+readonly, mounted /au, /rw and /ro respectively.
592+- /au = /rw + /ro
593+- /ro/fileA exists but /rw/fileA
594+
595+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
596+pointers are stored in a aufs dentry. The array in aufs dentry will be,
597+- [0] = NULL
598+- [1] = /ro/fileA
599+
600+This style of an array is essentially same to the aufs
601+superblock/inode/dentry/file objects.
602+
603+Because aufs supports manipulating branches, ie. add/delete/change
604+dynamically, these objects has its own generation. When branches are
605+changed, the generation in aufs superblock is incremented. And a
606+generation in other object are compared when it is accessed.
607+When a generation in other objects are obsoleted, aufs refreshes the
608+internal array.
609+
610+
611+Superblock
612+----------------------------------------------------------------------
613+Additionally aufs superblock has some data for policies to select one
614+among multiple writable branches, XIB files, pseudo-links and kobject.
615+See below in detail.
616+About the policies which supports copy-down a directory, see policy.txt
617+too.
618+
619+
620+Branch and XINO(External Inode Number Translation Table)
621+----------------------------------------------------------------------
622+Every branch has its own xino (external inode number translation table)
623+file. The xino file is created and unlinked by aufs internally. When two
624+members of a union exist on the same filesystem, they share the single
625+xino file.
626+The struct of a xino file is simple, just a sequence of aufs inode
627+numbers which is indexed by the lower inode number.
628+In the above sample, assume the inode number of /ro/fileA is i111 and
629+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
630+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
631+
632+When the inode numbers are not contiguous, the xino file will be sparse
633+which has a hole in it and doesn't consume as much disk space as it
634+might appear. If your branch filesystem consumes disk space for such
635+holes, then you should specify 'xino=' option at mounting aufs.
636+
637+Also a writable branch has three kinds of "whiteout bases". All these
638+are existed when the branch is joined to aufs and the names are
639+whiteout-ed doubly, so that users will never see their names in aufs
640+hierarchy.
641+1. a regular file which will be linked to all whiteouts.
642+2. a directory to store a pseudo-link.
643+3. a directory to store an "orphan-ed" file temporary.
644+
645+1. Whiteout Base
646+ When you remove a file on a readonly branch, aufs handles it as a
647+ logical deletion and creates a whiteout on the upper writable branch
648+ as a hardlink of this file in order not to consume inode on the
649+ writable branch.
650+2. Pseudo-link Dir
651+ See below, Pseudo-link.
652+3. Step-Parent Dir
653+ When "fileC" exists on the lower readonly branch only and it is
654+ opened and removed with its parent dir, and then user writes
655+ something into it, then aufs copies-up fileC to this
656+ directory. Because there is no other dir to store fileC. After
657+ creating a file under this dir, the file is unlinked.
658+
659+Because aufs supports manipulating branches, ie. add/delete/change
660+dynamically, a branch has its own id. When the branch order changes, aufs
661+finds the new index by searching the branch id.
662+
663+
664+Pseudo-link
665+----------------------------------------------------------------------
666+Assume "fileA" exists on the lower readonly branch only and it is
667+hardlinked to "fileB" on the branch. When you write something to fileA,
668+aufs copies-up it to the upper writable branch. Additionally aufs
669+creates a hardlink under the Pseudo-link Directory of the writable
670+branch. The inode of a pseudo-link is kept in aufs super_block as a
671+simple list. If fileB is read after unlinking fileA, aufs returns
672+filedata from the pseudo-link instead of the lower readonly
673+branch. Because the pseudo-link is based upon the inode, to keep the
674+inode number by xino (see above) is important.
675+
676+All the hardlinks under the Pseudo-link Directory of the writable branch
677+should be restored in a proper location later. Aufs provides a utility
678+to do this. The userspace helpers executed at remounting and unmounting
679+aufs by default.
680+
681+
682+XIB(external inode number bitmap)
683+----------------------------------------------------------------------
684+Addition to the xino file per a branch, aufs has an external inode number
685+bitmap in a superblock object. It is also a file such like a xino file.
686+It is a simple bitmap to mark whether the aufs inode number is in-use or
687+not.
688+To reduce the file I/O, aufs prepares a single memory page to cache xib.
689+
690+Aufs implements a feature to truncate/refresh both of xino and xib to
691+reduce the number of consumed disk blocks for these files.
692+
693+
694+Virtual or Vertical Dir
695+----------------------------------------------------------------------
696+In order to support multiple layers (branches), aufs readdir operation
697+constructs a virtual dir block on memory. For readdir, aufs calls
698+vfs_readdir() internally for each dir on branches, merges their entries
699+with eliminating the whiteout-ed ones, and sets it to file (dir)
700+object. So the file object has its entry list until it is closed. The
701+entry list will be updated when the file position is zero and becomes
702+old. This decision is made in aufs automatically.
703+
704+The dynamically allocated memory block for the name of entries has a
705+unit of 512 bytes (by default) and stores the names contiguously (no
706+padding). Another block for each entry is handled by kmem_cache too.
707+During building dir blocks, aufs creates hash list and judging whether
708+the entry is whiteouted by its upper branch or already listed.
709+
710+Some people may call it can be a security hole or invite DoS attack
711+since the opened and once readdir-ed dir (file object) holds its entry
712+list and becomes a pressure for system memory. But I'd say it is similar
713+to files under /proc or /sys. The virtual files in them also holds a
714+memory page (generally) while they are opened. When an idea to reduce
715+memory for them is introduced, it will be applied to aufs too.
716+For those who really hate this situation, I've developed readdir(3)
717+library which operates this merging in userspace. You just need to set
718+LD_PRELOAD environment variable, and aufs will not consume no memory in
719+kernel space for readdir(3).
720+
721+
722+Workqueue
723+----------------------------------------------------------------------
724+Aufs sometimes requires privilege access to a branch. For instance,
725+in copy-up/down operation. When a user process is going to make changes
726+to a file which exists in the lower readonly branch only, and the mode
727+of one of ancestor directories may not be writable by a user
728+process. Here aufs copy-up the file with its ancestors and they may
729+require privilege to set its owner/group/mode/etc.
730+This is a typical case of a application character of aufs (see
731+Introduction).
732+
733+Aufs uses workqueue synchronously for this case. It creates its own
734+workqueue. The workqueue is a kernel thread and has privilege. Aufs
735+passes the request to call mkdir or write (for example), and wait for
736+its completion. This approach solves a problem of a signal handler
737+simply.
738+If aufs didn't adopt the workqueue and changed the privilege of the
739+process, and if the mkdir/write call arises SIGXFSZ or other signal,
740+then the user process might gain a privilege or the generated core file
741+was owned by a superuser. But I have a plan to switch to a new
742+credential approach which will be introduced in linux-2.6.29.
743+
744+Also aufs uses the system global workqueue ("events" kernel thread) too
745+for asynchronous tasks, such like handling inotify, re-creating a
746+whiteout base and etc. This is unrelated to a privilege.
747+Most of aufs operation tries acquiring a rw_semaphore for aufs
748+superblock at the beginning, at the same time waits for the completion
749+of all queued asynchronous tasks.
750+
751+
752+Whiteout
753+----------------------------------------------------------------------
754+The whiteout in aufs is very similar to Unionfs's. That is represented
755+by its filename. UnionMount takes an approach of a file mode, but I am
756+afraid several utilities (find(1) or something) will have to support it.
757+
758+Basically the whiteout represents "logical deletion" which stops aufs to
759+lookup further, but also it represents "dir is opaque" which also stop
760+lookup.
761+
762+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
763+In order to make several functions in a single systemcall to be
764+revertible, aufs adopts an approach to rename a directory to a temporary
765+unique whiteouted name.
766+For example, in rename(2) dir where the target dir already existed, aufs
767+renames the target dir to a temporary unique whiteouted name before the
768+actual rename on a branch and then handles other actions (make it opaque,
769+update the attributes, etc). If an error happens in these actions, aufs
770+simply renames the whiteouted name back and returns an error. If all are
771+succeeded, aufs registers a function to remove the whiteouted unique
772+temporary name completely and asynchronously to the system global
773+workqueue.
774+
775+
776+Copy-up
777+----------------------------------------------------------------------
778+It is a well-known feature or concept.
779+When user modifies a file on a readonly branch, aufs operate "copy-up"
780+internally and makes change to the new file on the upper writable branch.
781+When the trigger systemcall does not update the timestamps of the parent
782+dir, aufs reverts it after copy-up.
783diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/03lookup.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/03lookup.txt
784--- linux-2.6.31/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 00:00:00.000000000 +0000
785+++ aufs2-2.6.git/Documentation/filesystems/aufs/design/03lookup.txt 2009-09-21 21:48:58.761610020 +0000
786@@ -0,0 +1,104 @@
787+
788+# Copyright (C) 2005-2009 Junjiro R. Okajima
789+#
790+# This program is free software; you can redistribute it and/or modify
791+# it under the terms of the GNU General Public License as published by
792+# the Free Software Foundation; either version 2 of the License, or
793+# (at your option) any later version.
794+#
795+# This program is distributed in the hope that it will be useful,
796+# but WITHOUT ANY WARRANTY; without even the implied warranty of
797+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
798+# GNU General Public License for more details.
799+#
800+# You should have received a copy of the GNU General Public License
801+# along with this program; if not, write to the Free Software
802+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
803+
804+Lookup in a Branch
805+----------------------------------------------------------------------
806+Since aufs has a character of sub-VFS (see Introduction), it operates
807+lookup for branches as VFS does. It may be a heavy work. Generally
808+speaking struct nameidata is a bigger structure and includes many
809+information. But almost all lookup operation in aufs is the simplest
810+case, ie. lookup only an entry directly connected to its parent. Digging
811+down the directory hierarchy is unnecessary.
812+
813+VFS has a function lookup_one_len() for that use, but it is not usable
814+for a branch filesystem which requires struct nameidata. So aufs
815+implements a simple lookup wrapper function. When a branch filesystem
816+allows NULL as nameidata, it calls lookup_one_len(). Otherwise it builds
817+a simplest nameidata and calls lookup_hash().
818+Here aufs applies "a principle in NFSD", ie. if the filesystem supports
819+NFS-export, then it has to support NULL as a nameidata parameter for
820+->create(), ->lookup() and ->d_revalidate(). So the lookup wrapper in
821+aufs tests if ->s_export_op in the branch is NULL or not.
822+
823+When a branch is a remote filesystem, aufs trusts its ->d_revalidate().
824+For d_revalidate, aufs implements three levels of revalidate tests. See
825+"Revalidate Dentry and UDBA" in detail.
826+
827+
828+Loopback Mount
829+----------------------------------------------------------------------
830+Basically aufs supports any type of filesystem and block device for a
831+branch (actually there are some exceptions). But it is prohibited to add
832+a loopback mounted one whose backend file exists in a filesystem which is
833+already added to aufs. The reason is to protect aufs from a recursive
834+lookup. If it was allowed, the aufs lookup operation might re-enter a
835+lookup for the loopback mounted branch in the same context, and will
836+cause a deadlock.
837+
838+
839+Revalidate Dentry and UDBA (User's Direct Branch Access)
840+----------------------------------------------------------------------
841+Generally VFS helpers re-validate a dentry as a part of lookup.
842+0. digging down the directory hierarchy.
843+1. lock the parent dir by its i_mutex.
844+2. lookup the final (child) entry.
845+3. revalidate it.
846+4. call the actual operation (create, unlink, etc.)
847+5. unlock the parent dir
848+
849+If the filesystem implements its ->d_revalidate() (step 3), then it is
850+called. Actually aufs implements it and checks the dentry on a branch is
851+still valid.
852+But it is not enough. Because aufs has to release the lock for the
853+parent dir on a branch at the end of ->lookup() (step 2) and
854+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
855+held by VFS.
856+If the file on a branch is changed directly, eg. bypassing aufs, after
857+aufs released the lock, then the subsequent operation may cause
858+something unpleasant result.
859+
860+This situation is a result of VFS architecture, ->lookup() and
861+->d_revalidate() is separated. But I never say it is wrong. It is a good
862+design from VFS's point of view. It is just not suitable for sub-VFS
863+character in aufs.
864+
865+Aufs supports such case by three level of revalidation which is
866+selectable by user.
867+1. Simple Revalidate
868+ Addition to the native flow in VFS's, confirm the child-parent
869+ relationship on the branch just after locking the parent dir on the
870+ branch in the "actual operation" (step 4). When this validation
871+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
872+ checks the validation of the dentry on branches.
873+2. Monitor Changes Internally by Inotify
874+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
875+ the dentry on the branch, and returns EBUSY if it finds different
876+ dentry.
877+ Additionally, aufs sets the inotify watch for every dir on branches
878+ during it is in cache. When the event is notified, aufs registers a
879+ function to kernel 'events' thread by schedule_work(). And the
880+ function sets some special status to the cached aufs dentry and inode
881+ private data. If they are not cached, then aufs has nothing to
882+ do. When the same file is accessed through aufs (step 0-3) later,
883+ aufs will detect the status and refresh all necessary data.
884+ In this mode, aufs has to ignore the event which is fired by aufs
885+ itself.
886+3. No Extra Validation
887+ This is the simplest test and doesn't add any additional revalidation
888+ test, and skip therevalidatin in step 4. It is useful and improves
889+ aufs performance when system surely hide the aufs branches from user,
890+ by over-mounting something (or another method).
891diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/04branch.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/04branch.txt
892--- linux-2.6.31/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 00:00:00.000000000 +0000
893+++ aufs2-2.6.git/Documentation/filesystems/aufs/design/04branch.txt 2009-09-21 21:48:58.761610020 +0000
894@@ -0,0 +1,76 @@
895+
896+# Copyright (C) 2005-2009 Junjiro R. Okajima
897+#
898+# This program is free software; you can redistribute it and/or modify
899+# it under the terms of the GNU General Public License as published by
900+# the Free Software Foundation; either version 2 of the License, or
901+# (at your option) any later version.
902+#
903+# This program is distributed in the hope that it will be useful,
904+# but WITHOUT ANY WARRANTY; without even the implied warranty of
905+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
906+# GNU General Public License for more details.
907+#
908+# You should have received a copy of the GNU General Public License
909+# along with this program; if not, write to the Free Software
910+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
911+
912+Branch Manipulation
913+
914+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
915+and changing its permission/attribute, there are a lot of works to do.
916+
917+
918+Add a Branch
919+----------------------------------------------------------------------
920+o Confirm the adding dir exists outside of aufs, including loopback
921+ mount.
922+- and other various attributes...
923+o Initialize the xino file and whiteout bases if necessary.
924+ See struct.txt.
925+
926+o Check the owner/group/mode of the directory
927+ When the owner/group/mode of the adding directory differs from the
928+ existing branch, aufs issues a warning because it may impose a
929+ security risk.
930+ For example, when a upper writable branch has a world writable empty
931+ top directory, a malicious user can create any files on the writable
932+ branch directly, like copy-up and modify manually. If something like
933+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
934+ writable branch, and the writable branch is world-writable, then a
935+ malicious guy may create /etc/passwd on the writable branch directly
936+ and the infected file will be valid in aufs.
937+ I am afraid it can be a security issue, but nothing to do except
938+ producing a warning.
939+
940+
941+Delete a Branch
942+----------------------------------------------------------------------
943+o Confirm the deleting branch is not busy
944+ To be general, there is one merit to adopt "remount" interface to
945+ manipulate branches. It is to discard caches. At deleting a branch,
946+ aufs checks the still cached (and connected) dentries and inodes. If
947+ there are any, then they are all in-use. An inode without its
948+ corresponding dentry can be alive alone (for example, inotify case).
949+
950+ For the cached one, aufs checks whether the same named entry exists on
951+ other branches.
952+ If the cached one is a directory, because aufs provides a merged view
953+ to users, as long as one dir is left on any branch aufs can show the
954+ dir to users. In this case, the branch can be removed from aufs.
955+ Otherwise aufs rejects deleting the branch.
956+
957+ If any file on the deleting branch is opened by aufs, then aufs
958+ rejects deleting.
959+
960+
961+Modify the Permission of a Branch
962+----------------------------------------------------------------------
963+o Re-initialize or remove the xino file and whiteout bases if necessary.
964+ See struct.txt.
965+
966+o rw --> ro: Confirm the modifying branch is not busy
967+ Aufs rejects the request if any of these conditions are true.
968+ - a file on the branch is mmap-ed.
969+ - a regular file on the branch is opened for write and there is no
970+ same named entry on the upper branch.
971diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/05wbr_policy.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/05wbr_policy.txt
972--- linux-2.6.31/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 00:00:00.000000000 +0000
973+++ aufs2-2.6.git/Documentation/filesystems/aufs/design/05wbr_policy.txt 2009-09-21 21:48:58.761610020 +0000
974@@ -0,0 +1,65 @@
975+
976+# Copyright (C) 2005-2009 Junjiro R. Okajima
977+#
978+# This program is free software; you can redistribute it and/or modify
979+# it under the terms of the GNU General Public License as published by
980+# the Free Software Foundation; either version 2 of the License, or
981+# (at your option) any later version.
982+#
983+# This program is distributed in the hope that it will be useful,
984+# but WITHOUT ANY WARRANTY; without even the implied warranty of
985+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
986+# GNU General Public License for more details.
987+#
988+# You should have received a copy of the GNU General Public License
989+# along with this program; if not, write to the Free Software
990+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
991+
992+Policies to Select One among Multiple Writable Branches
993+----------------------------------------------------------------------
994+When the number of writable branch is more than one, aufs has to decide
995+the target branch for file creation or copy-up. By default, the highest
996+writable branch which has the parent (or ancestor) dir of the target
997+file is chosen (top-down-parent policy).
998+By user's request, aufs implements some other policies to select the
999+writable branch, for file creation two policies, round-robin and
1000+most-free-space policies. For copy-up three policies, top-down-parent,
1001+bottom-up-parent and bottom-up policies.
1002+
1003+As expected, the round-robin policy selects the branch in circular. When
1004+you have two writable branches and creates 10 new files, 5 files will be
1005+created for each branch. mkdir(2) systemcall is an exception. When you
1006+create 10 new directories, all will be created on the same branch.
1007+And the most-free-space policy selects the one which has most free
1008+space among the writable branches. The amount of free space will be
1009+checked by aufs internally, and users can specify its time interval.
1010+
1011+The policies for copy-up is more simple,
1012+top-down-parent is equivalent to the same named on in create policy,
1013+bottom-up-parent selects the writable branch where the parent dir
1014+exists and the nearest upper one from the copyup-source,
1015+bottom-up selects the nearest upper writable branch from the
1016+copyup-source, regardless the existence of the parent dir.
1017+
1018+There are some rules or exceptions to apply these policies.
1019+- If there is a readonly branch above the policy-selected branch and
1020+ the parent dir is marked as opaque (a variation of whiteout), or the
1021+ target (creating) file is whiteout-ed on the upper readonly branch,
1022+ then the result of the policy is ignored and the target file will be
1023+ created on the nearest upper writable branch than the readonly branch.
1024+- If there is a writable branch above the policy-selected branch and
1025+ the parent dir is marked as opaque or the target file is whiteouted
1026+ on the branch, then the result of the policy is ignored and the target
1027+ file will be created on the highest one among the upper writable
1028+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1029+ it as usual.
1030+- link(2) and rename(2) systemcalls are exceptions in every policy.
1031+ They try selecting the branch where the source exists as possible
1032+ since copyup a large file will take long time. If it can't be,
1033+ ie. the branch where the source exists is readonly, then they will
1034+ follow the copyup policy.
1035+- There is an exception for rename(2) when the target exists.
1036+ If the rename target exists, aufs compares the index of the branches
1037+ where the source and the target exists and selects the higher
1038+ one. If the selected branch is readonly, then aufs follows the
1039+ copyup policy.
1040diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/06fmode_exec.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/06fmode_exec.txt
1041--- linux-2.6.31/Documentation/filesystems/aufs/design/06fmode_exec.txt 1970-01-01 00:00:00.000000000 +0000
1042+++ aufs2-2.6.git/Documentation/filesystems/aufs/design/06fmode_exec.txt 2009-09-21 21:48:58.761610020 +0000
1043@@ -0,0 +1,33 @@
1044+
1045+# Copyright (C) 2005-2009 Junjiro R. Okajima
1046+#
1047+# This program is free software; you can redistribute it and/or modify
1048+# it under the terms of the GNU General Public License as published by
1049+# the Free Software Foundation; either version 2 of the License, or
1050+# (at your option) any later version.
1051+#
1052+# This program is distributed in the hope that it will be useful,
1053+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1054+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1055+# GNU General Public License for more details.
1056+#
1057+# You should have received a copy of the GNU General Public License
1058+# along with this program; if not, write to the Free Software
1059+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1060+
1061+FMODE_EXEC and deny_write()
1062+----------------------------------------------------------------------
1063+Generally Unix prevents an executing file from writing its filedata.
1064+In linux it is implemented by deny_write() and allow_write().
1065+When a file is executed by exec() family, open_exec() (and sys_uselib())
1066+they opens the file and calls deny_write(). If the file is aufs's virtual
1067+one, it has no meaning. The file which deny_write() is really necessary
1068+is the file on a branch. But the FMODE_EXEC flag is not passed to
1069+->open() operation. So aufs adopt a dirty trick.
1070+
1071+- in order to get FMODE_EXEC, aufs ->lookup() and ->d_revalidate() set
1072+ nd->intent.open.file->private_data to nd->intent.open.flags temporary.
1073+- in aufs ->open(), when FMODE_EXEC is set in file->private_data, it
1074+ calls deny_write() for the file on a branch.
1075+- when the aufs file is released, allow_write() for the file on a branch
1076+ is called.
1077diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/07mmap.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/07mmap.txt
1078--- linux-2.6.31/Documentation/filesystems/aufs/design/07mmap.txt 1970-01-01 00:00:00.000000000 +0000
1079+++ aufs2-2.6.git/Documentation/filesystems/aufs/design/07mmap.txt 2009-09-21 21:48:58.761610020 +0000
1080@@ -0,0 +1,53 @@
1081+
1082+# Copyright (C) 2005-2009 Junjiro R. Okajima
1083+#
1084+# This program is free software; you can redistribute it and/or modify
1085+# it under the terms of the GNU General Public License as published by
1086+# the Free Software Foundation; either version 2 of the License, or
1087+# (at your option) any later version.
1088+#
1089+# This program is distributed in the hope that it will be useful,
1090+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1091+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1092+# GNU General Public License for more details.
1093+#
1094+# You should have received a copy of the GNU General Public License
1095+# along with this program; if not, write to the Free Software
1096+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1097+
1098+mmap(2) -- File Memory Mapping
1099+----------------------------------------------------------------------
1100+In aufs, the file-mapped pages are shared between the file on a branch
1101+and the virtual one in aufs by overriding vm_operation, particularly
1102+->fault().
1103+
1104+In aufs_mmap(),
1105+- get and store vm_ops of the real file on a branch.
1106+- map the file of aufs by generic_file_mmap() and set aufs's vm
1107+ operations.
1108+
1109+In aufs_fault(),
1110+- get the file of aufs from the passed vma, sleep if needed.
1111+- get the real file on a branch from the aufs file.
1112+- a race may happen. for instance a multithreaded library. so some lock
1113+ is implemented.
1114+- call ->fault() in the previously stored vm_ops with setting the
1115+ real file on a branch to vm_file.
1116+- restore vm_file and wake_up if someone else got sleep.
1117+
1118+When a branch is added to or deleted from aufs, the same-named file may
1119+unveil and its contents will be replaced by the new one when a process
1120+read(2) through previously opened file.
1121+(Some users may not want to refresh the filedata. For such users, I
1122+have a plan to implement a mount option 'refrof' which decides to
1123+refresh the opened files or not. See plan.txt too.)
1124+In this case, an already mapped file will not be updated since the
1125+contents are a part of a process already and it should not be changed by
1126+aufs branch manipulation. (Even if MAP_SHARED is specified, currently).
1127+Of course, in case of the deleting branch has a busy file, it cannot be
1128+deleted from the union.
1129+
1130+In Unionfs, it took an approach which the memory pages mapped to
1131+filedata are copied from the lower (real) file into the Unionfs's
1132+virtual one and handles it by address_space operations. Recently Unionfs
1133+changed it to this approach which aufs adopted since Jul 2006.
1134diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/08export.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/08export.txt
1135--- linux-2.6.31/Documentation/filesystems/aufs/design/08export.txt 1970-01-01 00:00:00.000000000 +0000
1136+++ aufs2-2.6.git/Documentation/filesystems/aufs/design/08export.txt 2009-09-21 21:48:58.761610020 +0000
1137@@ -0,0 +1,59 @@
1138+
1139+# Copyright (C) 2005-2009 Junjiro R. Okajima
1140+#
1141+# This program is free software; you can redistribute it and/or modify
1142+# it under the terms of the GNU General Public License as published by
1143+# the Free Software Foundation; either version 2 of the License, or
1144+# (at your option) any later version.
1145+#
1146+# This program is distributed in the hope that it will be useful,
1147+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1148+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1149+# GNU General Public License for more details.
1150+#
1151+# You should have received a copy of the GNU General Public License
1152+# along with this program; if not, write to the Free Software
1153+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1154+
1155+Export Aufs via NFS
1156+----------------------------------------------------------------------
1157+Here is an approach.
1158+- like xino/xib, add a new file 'xigen' which stores aufs inode
1159+ generation.
1160+- iget_locked(): initialize aufs inode generation for a new inode, and
1161+ store it in xigen file.
1162+- destroy_inode(): increment aufs inode generation and store it in xigen
1163+ file. it is necessary even if it is not unlinked, because any data of
1164+ inode may be changed by UDBA.
1165+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
1166+ build file handle by
1167+ + branch id (4 bytes)
1168+ + superblock generation (4 bytes)
1169+ + inode number (4 or 8 bytes)
1170+ + parent dir inode number (4 or 8 bytes)
1171+ + inode generation (4 bytes))
1172+ + return value of exportfs_encode_fh() for the parent on a branch (4
1173+ bytes)
1174+ + file handle for a branch (by exportfs_encode_fh())
1175+- fh_to_dentry():
1176+ + find the index of a branch from its id in handle, and check it is
1177+ still exist in aufs.
1178+ + 1st level: get the inode number from handle and search it in cache.
1179+ + 2nd level: if not found, get the parent inode number from handle and
1180+ search it in cache. and then open the parent dir, find the matching
1181+ inode number by vfs_readdir() and get its name, and call
1182+ lookup_one_len() for the target dentry.
1183+ + 3rd level: if the parent dir is not cached, call
1184+ exportfs_decode_fh() for a branch and get the parent on a branch,
1185+ build a pathname of it, convert it a pathname in aufs, call
1186+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
1187+ the 2nd level.
1188+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
1189+ for every branch, but not itself. to get this, (currently) aufs
1190+ searches in current->nsproxy->mnt_ns list. it may not be a good
1191+ idea, but I didn't get other approach.
1192+ + test the generation of the gotten inode.
1193+- every inode operation: they may get EBUSY due to UDBA. in this case,
1194+ convert it into ESTALE for NFSD.
1195+- readdir(): call lockdep_on/off() because filldir in NFSD calls
1196+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
1197diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/09shwh.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/09shwh.txt
1198--- linux-2.6.31/Documentation/filesystems/aufs/design/09shwh.txt 1970-01-01 00:00:00.000000000 +0000
1199+++ aufs2-2.6.git/Documentation/filesystems/aufs/design/09shwh.txt 2009-09-21 21:48:58.761610020 +0000
1200@@ -0,0 +1,53 @@
1201+
1202+# Copyright (C) 2005-2009 Junjiro R. Okajima
1203+#
1204+# This program is free software; you can redistribute it and/or modify
1205+# it under the terms of the GNU General Public License as published by
1206+# the Free Software Foundation; either version 2 of the License, or
1207+# (at your option) any later version.
1208+#
1209+# This program is distributed in the hope that it will be useful,
1210+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1211+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1212+# GNU General Public License for more details.
1213+#
1214+# You should have received a copy of the GNU General Public License
1215+# along with this program; if not, write to the Free Software
1216+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1217+
1218+Show Whiteout Mode (shwh)
1219+----------------------------------------------------------------------
1220+Generally aufs hides the name of whiteouts. But in some cases, to show
1221+them is very useful for users. For instance, creating a new middle layer
1222+(branch) by merging existing layers.
1223+
1224+(borrowing aufs1 HOW-TO from a user, Michael Towers)
1225+When you have three branches,
1226+- Bottom: 'system', squashfs (underlying base system), read-only
1227+- Middle: 'mods', squashfs, read-only
1228+- Top: 'overlay', ram (tmpfs), read-write
1229+
1230+The top layer is loaded at boot time and saved at shutdown, to preserve
1231+the changes made to the system during the session.
1232+When larger changes have been made, or smaller changes have accumulated,
1233+the size of the saved top layer data grows. At this point, it would be
1234+nice to be able to merge the two overlay branches ('mods' and 'overlay')
1235+and rewrite the 'mods' squashfs, clearing the top layer and thus
1236+restoring save and load speed.
1237+
1238+This merging is simplified by the use of another aufs mount, of just the
1239+two overlay branches using the 'shwh' option.
1240+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
1241+ aufs /livesys/merge_union
1242+
1243+A merged view of these two branches is then available at
1244+/livesys/merge_union, and the new feature is that the whiteouts are
1245+visible!
1246+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
1247+writing to all branches. Also the default mode for all branches is 'ro'.
1248+It is now possible to save the combined contents of the two overlay
1249+branches to a new squashfs, e.g.:
1250+# mksquashfs /livesys/merge_union /path/to/newmods.squash
1251+
1252+This new squashfs archive can be stored on the boot device and the
1253+initramfs will use it to replace the old one at the next boot.
1254diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/99plan.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/99plan.txt
1255--- linux-2.6.31/Documentation/filesystems/aufs/design/99plan.txt 1970-01-01 00:00:00.000000000 +0000
1256+++ aufs2-2.6.git/Documentation/filesystems/aufs/design/99plan.txt 2009-09-21 21:48:58.765776628 +0000
1257@@ -0,0 +1,96 @@
1258+
1259+# Copyright (C) 2005-2009 Junjiro R. Okajima
1260+#
1261+# This program is free software; you can redistribute it and/or modify
1262+# it under the terms of the GNU General Public License as published by
1263+# the Free Software Foundation; either version 2 of the License, or
1264+# (at your option) any later version.
1265+#
1266+# This program is distributed in the hope that it will be useful,
1267+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1268+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1269+# GNU General Public License for more details.
1270+#
1271+# You should have received a copy of the GNU General Public License
1272+# along with this program; if not, write to the Free Software
1273+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1274+
1275+Plan
1276+
1277+Restoring some features which was implemented in aufs1.
1278+They were dropped in aufs2 in order to make source files simpler and
1279+easier to be reviewed.
1280+
1281+
1282+Test Only the Highest One for the Directory Permission (dirperm1 option)
1283+----------------------------------------------------------------------
1284+Let's try case study.
1285+- aufs has two branches, upper readwrite and lower readonly.
1286+ /au = /rw + /ro
1287+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1288+- user invoked "chmod a+rx /au/dirA"
1289+- then "dirA" becomes world readable?
1290+
1291+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1292+or it may be a natively readonly filesystem. If aufs respects the lower
1293+branch, it should not respond readdir request from other users. But user
1294+allowed it by chmod. Should really aufs rejects showing the entries
1295+under /ro/dirA?
1296+
1297+To be honest, I don't have a best solution for this case. So I
1298+implemented 'dirperm1' and 'nodirperm1' option in aufs1, and leave it to
1299+users.
1300+When dirperm1 is specified, aufs checks only the highest one for the
1301+directory permission, and shows the entries. Otherwise, as usual, checks
1302+every dir existing on all branches and rejects the request.
1303+
1304+As a side effect, dirperm1 option improves the performance of aufs
1305+because the number of permission check is reduced.
1306+
1307+
1308+Being Another Aufs's Readonly Branch (robr)
1309+----------------------------------------------------------------------
1310+Aufs1 allows aufs to be another aufs's readonly branch.
1311+This feature was developed by a user's request. But it may not be used
1312+currecnly.
1313+
1314+
1315+Copy-up on Open (coo=)
1316+----------------------------------------------------------------------
1317+By default the internal copy-up is executed when it is really necessary.
1318+It is not done when a file is opened for writing, but when write(2) is
1319+done. Users who have many (over 100) branches want to know and analyse
1320+when and what file is copied-up. To insert a new upper branch which
1321+contains such files only may improve the performance of aufs.
1322+
1323+Aufs1 implemented "coo=none | leaf | all" option.
1324+
1325+
1326+Refresh the Opened File (refrof)
1327+----------------------------------------------------------------------
1328+This option is implemented in aufs1 but incomplete.
1329+
1330+When user reads from a file, he expects to get its latest filedata
1331+generally. If the file is removed and a new same named file is created,
1332+the content he gets is unchanged, ie. the unlinked filedata.
1333+
1334+Let's try case study again.
1335+- aufs has two branches.
1336+ /au = /rw + /ro
1337+- "fileA" exists under /ro, but /rw.
1338+- user opened "/au/fileA".
1339+- he or someone else inserts a branch (/new) between /rw and /ro.
1340+ /au = /rw + /new + /ro
1341+- the new branch has "fileA".
1342+- user reads from the opened "fileA"
1343+- which filedata should aufs return, from /ro or /new?
1344+
1345+Some people says it has to be "from /ro" and it is a semantics of Unix.
1346+The others say it should be "from /new" because the file is not removed
1347+and it is equivalent to the case of someone else modifies the file.
1348+
1349+Here again I don't have a best and final answer. I got an idea to
1350+implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the
1351+Opened File) is specified (by default), aufs returns the filedata from
1352+/new.
1353+Otherwise from /new.
1354diff -uprN -x .git linux-2.6.31/fs/Kconfig aufs2-2.6.git/fs/Kconfig
1355--- linux-2.6.31/fs/Kconfig 2009-09-09 22:13:59.000000000 +0000
1356+++ aufs2-2.6.git/fs/Kconfig 2009-09-21 21:49:23.315008102 +0000
1357@@ -187,6 +187,7 @@ source "fs/sysv/Kconfig"
1358 source "fs/ufs/Kconfig"
1359 source "fs/exofs/Kconfig"
1360 source "fs/nilfs2/Kconfig"
1361+source "fs/aufs/Kconfig"
1362
1363 endif # MISC_FILESYSTEMS
1364
1365diff -uprN -x .git linux-2.6.31/fs/Makefile aufs2-2.6.git/fs/Makefile
1366--- linux-2.6.31/fs/Makefile 2009-09-09 22:13:59.000000000 +0000
1367+++ aufs2-2.6.git/fs/Makefile 2009-09-14 14:52:35.065948532 +0000
1368@@ -124,3 +124,4 @@ obj-$(CONFIG_OCFS2_FS) += ocfs2/
1369 obj-$(CONFIG_BTRFS_FS) += btrfs/
1370 obj-$(CONFIG_GFS2_FS) += gfs2/
1371 obj-$(CONFIG_EXOFS_FS) += exofs/
1372+obj-$(CONFIG_AUFS_FS) += aufs/
1373diff -uprN -x .git linux-2.6.31/fs/aufs/Kconfig aufs2-2.6.git/fs/aufs/Kconfig
1374--- linux-2.6.31/fs/aufs/Kconfig 1970-01-01 00:00:00.000000000 +0000
1375+++ aufs2-2.6.git/fs/aufs/Kconfig 2009-09-21 21:49:23.374524295 +0000
1376@@ -0,0 +1,140 @@
1377+config AUFS_FS
1378+ bool "Aufs (Advanced multi layered unification filesystem) support"
1379+ depends on EXPERIMENTAL
1380+ help
1381+ Aufs is a stackable unification filesystem such as Unionfs,
1382+ which unifies several directories and provides a merged single
1383+ directory.
1384+ In the early days, aufs was entirely re-designed and
1385+ re-implemented Unionfs Version 1.x series. Introducing many
1386+ original ideas, approaches and improvements, it becomes totally
1387+ different from Unionfs while keeping the basic features.
1388+
1389+if AUFS_FS
1390+choice
1391+ prompt "Maximum number of branches"
1392+ default AUFS_BRANCH_MAX_127
1393+ help
1394+ Specifies the maximum number of branches (or member directories)
1395+ in a single aufs. The larger value consumes more system
1396+ resources and has a minor impact to performance.
1397+config AUFS_BRANCH_MAX_127
1398+ bool "127"
1399+ help
1400+ Specifies the maximum number of branches (or member directories)
1401+ in a single aufs. The larger value consumes more system
1402+ resources and has a minor impact to performance.
1403+config AUFS_BRANCH_MAX_511
1404+ bool "511"
1405+ help
1406+ Specifies the maximum number of branches (or member directories)
1407+ in a single aufs. The larger value consumes more system
1408+ resources and has a minor impact to performance.
1409+config AUFS_BRANCH_MAX_1023
1410+ bool "1023"
1411+ help
1412+ Specifies the maximum number of branches (or member directories)
1413+ in a single aufs. The larger value consumes more system
1414+ resources and has a minor impact to performance.
1415+config AUFS_BRANCH_MAX_32767
1416+ bool "32767"
1417+ help
1418+ Specifies the maximum number of branches (or member directories)
1419+ in a single aufs. The larger value consumes more system
1420+ resources and has a minor impact to performance.
1421+endchoice
1422+
1423+config AUFS_HINOTIFY
1424+ bool "Use inotify to detect actions on a branch"
1425+ depends on INOTIFY
1426+ help
1427+ If you want to modify files on branches directly, eg. bypassing aufs,
1428+ and want aufs to detect the changes of them fully, then enable this
1429+ option and use 'udba=inotify' mount option.
1430+ It will have a negative impact to the performance.
1431+ See detail in aufs.5.
1432+
1433+config AUFS_EXPORT
1434+ bool "NFS-exportable aufs"
1435+ depends on EXPORTFS = y
1436+ help
1437+ If you want to export your mounted aufs via NFS, then enable this
1438+ option. There are several requirements for this configuration.
1439+ See detail in aufs.5.
1440+
1441+config AUFS_RDU
1442+ bool "Readdir in userspace"
1443+ help
1444+ If you have millions of files under a single aufs directory, and
1445+ meet the out of memory, then enable this option and set
1446+ environment variables for your readdir(3).
1447+ See detail in aufs.5.
1448+
1449+config AUFS_SHWH
1450+ bool "Show whiteouts"
1451+ help
1452+ If you want to make the whiteouts in aufs visible, then enable
1453+ this option and specify 'shwh' mount option. Although it may
1454+ sounds like philosophy or something, but in technically it
1455+ simply shows the name of whiteout with keeping its behaviour.
1456+
1457+config AUFS_BR_RAMFS
1458+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
1459+ help
1460+ If you want to use ramfs as an aufs branch fs, then enable this
1461+ option. Generally tmpfs is recommended.
1462+ Aufs prohibited them to be a branch fs by default, because
1463+ initramfs becomes unusable after switch_root or something
1464+ generally. If you sets initramfs as an aufs branch and boot your
1465+ system by switch_root, you will meet a problem easily since the
1466+ files in initramfs may be inaccessible.
1467+ Unless you are going to use ramfs as an aufs branch fs without
1468+ switch_root or something, leave it N.
1469+
1470+config AUFS_BR_FUSE
1471+ bool "Fuse fs as an aufs branch"
1472+ depends on FUSE_FS
1473+ select AUFS_POLL
1474+ help
1475+ If you want to use fuse-based userspace filesystem as an aufs
1476+ branch fs, then enable this option.
1477+ It implements the internal poll(2) operation which is
1478+ implemented by fuse only (curretnly).
1479+
1480+config AUFS_DEBUG
1481+ bool "Debug aufs"
1482+ help
1483+ Enable this to compile aufs internal debug code.
1484+ It will have a negative impact to the performance.
1485+
1486+config AUFS_MAGIC_SYSRQ
1487+ bool
1488+ depends on AUFS_DEBUG && MAGIC_SYSRQ
1489+ default y
1490+ help
1491+ Automatic configuration for internal use.
1492+ When aufs supports Magic SysRq, enabled automatically.
1493+
1494+config AUFS_BDEV_LOOP
1495+ bool
1496+ depends on BLK_DEV_LOOP
1497+ default y
1498+ help
1499+ Automatic configuration for internal use.
1500+ Convert =[ym] into =y.
1501+
1502+config AUFS_INO_T_64
1503+ bool
1504+ depends on AUFS_EXPORT
1505+ depends on 64BIT && !(ALPHA || S390)
1506+ default y
1507+ help
1508+ Automatic configuration for internal use.
1509+ /* typedef unsigned long/int __kernel_ino_t */
1510+ /* alpha and s390x are int */
1511+
1512+config AUFS_POLL
1513+ bool
1514+ help
1515+ Automatic configuration for internal use.
1516+endif
1517diff -uprN -x .git linux-2.6.31/fs/aufs/Makefile aufs2-2.6.git/fs/aufs/Makefile
1518--- linux-2.6.31/fs/aufs/Makefile 1970-01-01 00:00:00.000000000 +0000
1519+++ aufs2-2.6.git/fs/aufs/Makefile 2009-09-21 21:49:23.374524295 +0000
1520@@ -0,0 +1,23 @@
1521+
1522+include ${srctree}/${src}/magic.mk
1523+
1524+obj-$(CONFIG_AUFS_FS) += aufs.o
1525+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
1526+ wkq.o vfsub.o dcsub.o \
1527+ cpup.o whout.o plink.o wbr_policy.o \
1528+ dinfo.o dentry.o \
1529+ finfo.o file.o f_op.o \
1530+ dir.o vdir.o \
1531+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
1532+ ioctl.o
1533+
1534+# all are boolean
1535+aufs-$(CONFIG_SYSFS) += sysfs.o
1536+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
1537+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
1538+aufs-$(CONFIG_AUFS_HINOTIFY) += hinotify.o
1539+aufs-$(CONFIG_AUFS_EXPORT) += export.o
1540+aufs-$(CONFIG_AUFS_POLL) += poll.o
1541+aufs-$(CONFIG_AUFS_RDU) += rdu.o
1542+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
1543+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
1544diff -uprN -x .git linux-2.6.31/fs/aufs/aufs.h aufs2-2.6.git/fs/aufs/aufs.h
1545--- linux-2.6.31/fs/aufs/aufs.h 1970-01-01 00:00:00.000000000 +0000
1546+++ aufs2-2.6.git/fs/aufs/aufs.h 2009-09-21 21:49:23.374524295 +0000
1facf9fc 1547@@ -0,0 +1,51 @@
1facf9fc 1548+/*
1549+ * Copyright (C) 2005-2009 Junjiro R. Okajima
1550+ *
1551+ * This program, aufs is free software; you can redistribute it and/or modify
1552+ * it under the terms of the GNU General Public License as published by
1553+ * the Free Software Foundation; either version 2 of the License, or
1554+ * (at your option) any later version.
dece6358
AM
1555+ *
1556+ * This program is distributed in the hope that it will be useful,
1557+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1558+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1559+ * GNU General Public License for more details.
1560+ *
1561+ * You should have received a copy of the GNU General Public License
1562+ * along with this program; if not, write to the Free Software
1563+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 1564+ */
1565+
1566+/*
1567+ * all header files
1568+ */
1569+
1570+#ifndef __AUFS_H__
1571+#define __AUFS_H__
1572+
1573+#ifdef __KERNEL__
1574+
1facf9fc 1575+#include "debug.h"
1576+
1577+#include "branch.h"
1578+#include "cpup.h"
1579+#include "dcsub.h"
1580+#include "dbgaufs.h"
1581+#include "dentry.h"
1582+#include "dir.h"
1583+#include "file.h"
1584+#include "fstype.h"
1585+#include "inode.h"
1586+#include "loop.h"
1587+#include "module.h"
1588+#include "opts.h"
1589+#include "rwsem.h"
1590+#include "spl.h"
1591+#include "super.h"
1592+#include "sysaufs.h"
1593+#include "vfsub.h"
1594+#include "whout.h"
1595+#include "wkq.h"
1596+
1597+#endif /* __KERNEL__ */
1598+#endif /* __AUFS_H__ */
1308ab2a 1599diff -uprN -x .git linux-2.6.31/fs/aufs/branch.c aufs2-2.6.git/fs/aufs/branch.c
1600--- linux-2.6.31/fs/aufs/branch.c 1970-01-01 00:00:00.000000000 +0000
1601+++ aufs2-2.6.git/fs/aufs/branch.c 2009-09-21 21:49:23.374524295 +0000
1602@@ -0,0 +1,970 @@
1facf9fc 1603+/*
1604+ * Copyright (C) 2005-2009 Junjiro R. Okajima
1605+ *
1606+ * This program, aufs is free software; you can redistribute it and/or modify
1607+ * it under the terms of the GNU General Public License as published by
1608+ * the Free Software Foundation; either version 2 of the License, or
1609+ * (at your option) any later version.
dece6358
AM
1610+ *
1611+ * This program is distributed in the hope that it will be useful,
1612+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1613+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1614+ * GNU General Public License for more details.
1615+ *
1616+ * You should have received a copy of the GNU General Public License
1617+ * along with this program; if not, write to the Free Software
1618+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 1619+ */
1620+
1621+/*
1622+ * branch management
1623+ */
1624+
dece6358 1625+#include <linux/file.h>
1facf9fc 1626+#include "aufs.h"
1627+
1628+/*
1629+ * free a single branch
1630+ */
1631+static void au_br_do_free(struct au_branch *br)
1632+{
1633+ int i;
1634+ struct au_wbr *wbr;
1635+
1636+ if (br->br_xino.xi_file)
1637+ fput(br->br_xino.xi_file);
1638+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
1639+
1640+ AuDebugOn(atomic_read(&br->br_count));
1641+
1642+ wbr = br->br_wbr;
1643+ if (wbr) {
1644+ for (i = 0; i < AuBrWh_Last; i++)
1645+ dput(wbr->wbr_wh[i]);
1646+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 1647+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 1648+ }
1649+
1650+ /* some filesystems acquire extra lock */
1651+ lockdep_off();
1652+ mntput(br->br_mnt);
1653+ lockdep_on();
1654+
1655+ kfree(wbr);
1656+ kfree(br);
1657+}
1658+
1659+/*
1660+ * frees all branches
1661+ */
1662+void au_br_free(struct au_sbinfo *sbinfo)
1663+{
1664+ aufs_bindex_t bmax;
1665+ struct au_branch **br;
1666+
dece6358
AM
1667+ AuRwMustWriteLock(&sbinfo->si_rwsem);
1668+
1facf9fc 1669+ bmax = sbinfo->si_bend + 1;
1670+ br = sbinfo->si_branch;
1671+ while (bmax--)
1672+ au_br_do_free(*br++);
1673+}
1674+
1675+/*
1676+ * find the index of a branch which is specified by @br_id.
1677+ */
1678+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
1679+{
1680+ aufs_bindex_t bindex, bend;
1681+
1682+ bend = au_sbend(sb);
1683+ for (bindex = 0; bindex <= bend; bindex++)
1684+ if (au_sbr_id(sb, bindex) == br_id)
1685+ return bindex;
1686+ return -1;
1687+}
1688+
1689+/* ---------------------------------------------------------------------- */
1690+
1691+/*
1692+ * add a branch
1693+ */
1694+
1695+static int test_overlap(struct super_block *sb, struct dentry *h_d1,
1696+ struct dentry *h_d2)
1697+{
1698+ if (unlikely(h_d1 == h_d2))
1699+ return 1;
1700+ return !!au_test_subdir(h_d1, h_d2)
1701+ || !!au_test_subdir(h_d2, h_d1)
1702+ || au_test_loopback_overlap(sb, h_d1, h_d2)
1703+ || au_test_loopback_overlap(sb, h_d2, h_d1);
1704+}
1705+
1706+/*
1707+ * returns a newly allocated branch. @new_nbranch is a number of branches
1708+ * after adding a branch.
1709+ */
1710+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
1711+ int perm)
1712+{
1713+ struct au_branch *add_branch;
1714+ struct dentry *root;
1715+
1716+ root = sb->s_root;
1717+ add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
1718+ if (unlikely(!add_branch))
1719+ goto out;
1720+
1721+ add_branch->br_wbr = NULL;
1722+ if (au_br_writable(perm)) {
1723+ /* may be freed separately at changing the branch permission */
1724+ add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
1725+ GFP_NOFS);
1726+ if (unlikely(!add_branch->br_wbr))
1727+ goto out_br;
1728+ }
1729+
1730+ if (unlikely(au_sbr_realloc(au_sbi(sb), new_nbranch)
1731+ || au_di_realloc(au_di(root), new_nbranch)
1732+ || au_ii_realloc(au_ii(root->d_inode), new_nbranch)))
1733+ goto out_wbr;
1734+ return add_branch; /* success */
1735+
1736+ out_wbr:
1737+ kfree(add_branch->br_wbr);
1738+ out_br:
1739+ kfree(add_branch);
1740+ out:
1741+ return ERR_PTR(-ENOMEM);
1742+}
1743+
1744+/*
1745+ * test if the branch permission is legal or not.
1746+ */
1747+static int test_br(struct inode *inode, int brperm, char *path)
1748+{
1749+ int err;
1750+
1751+ err = 0;
1752+ if (unlikely(au_br_writable(brperm) && IS_RDONLY(inode))) {
1753+ AuErr("write permission for readonly mount or inode, %s\n",
1754+ path);
1755+ err = -EINVAL;
1756+ }
1757+
1758+ return err;
1759+}
1760+
1761+/*
1762+ * returns:
1763+ * 0: success, the caller will add it
1764+ * plus: success, it is already unified, the caller should ignore it
1765+ * minus: error
1766+ */
1767+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
1768+{
1769+ int err;
1770+ aufs_bindex_t bend, bindex;
1771+ struct dentry *root;
1772+ struct inode *inode, *h_inode;
1773+
1774+ root = sb->s_root;
1775+ bend = au_sbend(sb);
1776+ if (unlikely(bend >= 0
1777+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
1778+ err = 1;
1779+ if (!remount) {
1780+ err = -EINVAL;
1781+ AuErr("%s duplicated\n", add->pathname);
1782+ }
1783+ goto out;
1784+ }
1785+
1786+ err = -ENOSPC; /* -E2BIG; */
1787+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
1788+ || AUFS_BRANCH_MAX - 1 <= bend)) {
1789+ AuErr("number of branches exceeded %s\n", add->pathname);
1790+ goto out;
1791+ }
1792+
1793+ err = -EDOM;
1794+ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
1795+ AuErr("bad index %d\n", add->bindex);
1796+ goto out;
1797+ }
1798+
1799+ inode = add->path.dentry->d_inode;
1800+ err = -ENOENT;
1801+ if (unlikely(!inode->i_nlink)) {
1802+ AuErr("no existence %s\n", add->pathname);
1803+ goto out;
1804+ }
1805+
1806+ err = -EINVAL;
1807+ if (unlikely(inode->i_sb == sb)) {
1808+ AuErr("%s must be outside\n", add->pathname);
1809+ goto out;
1810+ }
1811+
1812+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
1813+ AuErr("unsupported filesystem, %s (%s)\n",
1814+ add->pathname, au_sbtype(inode->i_sb));
1815+ goto out;
1816+ }
1817+
1818+ err = test_br(add->path.dentry->d_inode, add->perm, add->pathname);
1819+ if (unlikely(err))
1820+ goto out;
1821+
1822+ if (bend < 0)
1823+ return 0; /* success */
1824+
1825+ err = -EINVAL;
1826+ for (bindex = 0; bindex <= bend; bindex++)
1827+ if (unlikely(test_overlap(sb, add->path.dentry,
1828+ au_h_dptr(root, bindex)))) {
1829+ AuErr("%s is overlapped\n", add->pathname);
1830+ goto out;
1831+ }
1832+
1833+ err = 0;
1834+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
1835+ h_inode = au_h_dptr(root, 0)->d_inode;
1836+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
1837+ || h_inode->i_uid != inode->i_uid
1838+ || h_inode->i_gid != inode->i_gid)
1839+ AuWarn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
1840+ add->pathname,
1841+ inode->i_uid, inode->i_gid,
1842+ (inode->i_mode & S_IALLUGO),
1843+ h_inode->i_uid, h_inode->i_gid,
1844+ (h_inode->i_mode & S_IALLUGO));
1845+ }
1846+
1847+ out:
1848+ return err;
1849+}
1850+
1851+/*
1852+ * initialize or clean the whiteouts for an adding branch
1853+ */
1854+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
1855+ int new_perm, struct dentry *h_root)
1856+{
1857+ int err, old_perm;
1858+ aufs_bindex_t bindex;
1859+ struct mutex *h_mtx;
1860+ struct au_wbr *wbr;
1861+ struct au_hinode *hdir;
1862+
1863+ wbr = br->br_wbr;
1864+ old_perm = br->br_perm;
1865+ br->br_perm = new_perm;
1866+ hdir = NULL;
1867+ h_mtx = NULL;
1868+ bindex = au_br_index(sb, br->br_id);
1869+ if (0 <= bindex) {
1870+ hdir = au_hi(sb->s_root->d_inode, bindex);
1871+ au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1872+ } else {
1873+ h_mtx = &h_root->d_inode->i_mutex;
1874+ mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
1875+ }
1876+ if (!wbr)
1877+ err = au_wh_init(h_root, br, sb);
1878+ else {
1879+ wbr_wh_write_lock(wbr);
1880+ err = au_wh_init(h_root, br, sb);
1881+ wbr_wh_write_unlock(wbr);
1882+ }
1883+ if (hdir)
1884+ au_hin_imtx_unlock(hdir);
1885+ else
1886+ mutex_unlock(h_mtx);
1887+ br->br_perm = old_perm;
1888+
1889+ if (!err && wbr && !au_br_writable(new_perm)) {
1890+ kfree(wbr);
1891+ br->br_wbr = NULL;
1892+ }
1893+
1894+ return err;
1895+}
1896+
1897+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
1898+ int perm, struct path *path)
1899+{
1900+ int err;
1901+ struct au_wbr *wbr;
1902+
1903+ wbr = br->br_wbr;
dece6358 1904+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 1905+ memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
1906+ atomic_set(&wbr->wbr_wh_running, 0);
1907+ wbr->wbr_bytes = 0;
1908+
1909+ err = au_br_init_wh(sb, br, perm, path->dentry);
1910+
1911+ return err;
1912+}
1913+
1914+/* intialize a new branch */
1915+static int au_br_init(struct au_branch *br, struct super_block *sb,
1916+ struct au_opt_add *add)
1917+{
1918+ int err;
1919+
1920+ err = 0;
1921+ memset(&br->br_xino, 0, sizeof(br->br_xino));
1922+ mutex_init(&br->br_xino.xi_nondir_mtx);
1923+ br->br_perm = add->perm;
1924+ br->br_mnt = add->path.mnt; /* set first, mntget() later */
1925+ atomic_set(&br->br_count, 0);
1926+ br->br_xino_upper = AUFS_XINO_TRUNC_INIT;
1927+ atomic_set(&br->br_xino_running, 0);
1928+ br->br_id = au_new_br_id(sb);
1929+
1930+ if (au_br_writable(add->perm)) {
1931+ err = au_wbr_init(br, sb, add->perm, &add->path);
1932+ if (unlikely(err))
1933+ goto out;
1934+ }
1935+
1936+ if (au_opt_test(au_mntflags(sb), XINO)) {
1937+ err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino,
1938+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
1939+ if (unlikely(err)) {
1940+ AuDebugOn(br->br_xino.xi_file);
1941+ goto out;
1942+ }
1943+ }
1944+
1945+ sysaufs_br_init(br);
1946+ mntget(add->path.mnt);
1947+
1948+ out:
1949+ return err;
1950+}
1951+
1952+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
1953+ struct au_branch *br, aufs_bindex_t bend,
1954+ aufs_bindex_t amount)
1955+{
1956+ struct au_branch **brp;
1957+
dece6358
AM
1958+ AuRwMustWriteLock(&sbinfo->si_rwsem);
1959+
1facf9fc 1960+ brp = sbinfo->si_branch + bindex;
1961+ memmove(brp + 1, brp, sizeof(*brp) * amount);
1962+ *brp = br;
1963+ sbinfo->si_bend++;
1964+ if (unlikely(bend < 0))
1965+ sbinfo->si_bend = 0;
1966+}
1967+
1968+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
1969+ aufs_bindex_t bend, aufs_bindex_t amount)
1970+{
1971+ struct au_hdentry *hdp;
1972+
1308ab2a 1973+ AuRwMustWriteLock(&dinfo->di_rwsem);
1974+
1facf9fc 1975+ hdp = dinfo->di_hdentry + bindex;
1976+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
1977+ au_h_dentry_init(hdp);
1978+ dinfo->di_bend++;
1979+ if (unlikely(bend < 0))
1980+ dinfo->di_bstart = 0;
1981+}
1982+
1983+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
1984+ aufs_bindex_t bend, aufs_bindex_t amount)
1985+{
1986+ struct au_hinode *hip;
1987+
1308ab2a 1988+ AuRwMustWriteLock(&iinfo->ii_rwsem);
1989+
1facf9fc 1990+ hip = iinfo->ii_hinode + bindex;
1991+ memmove(hip + 1, hip, sizeof(*hip) * amount);
1992+ hip->hi_inode = NULL;
1993+ au_hin_init(hip, NULL);
1994+ iinfo->ii_bend++;
1995+ if (unlikely(bend < 0))
1996+ iinfo->ii_bstart = 0;
1997+}
1998+
1999+static void au_br_do_add(struct super_block *sb, struct dentry *h_dentry,
2000+ struct au_branch *br, aufs_bindex_t bindex)
2001+{
2002+ struct dentry *root;
2003+ struct inode *root_inode;
2004+ aufs_bindex_t bend, amount;
2005+
2006+ root = sb->s_root;
2007+ root_inode = root->d_inode;
2008+ au_plink_block_maintain(sb);
2009+ bend = au_sbend(sb);
2010+ amount = bend + 1 - bindex;
2011+ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
2012+ au_br_do_add_hdp(au_di(root), bindex, bend, amount);
2013+ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
2014+ au_set_h_dptr(root, bindex, dget(h_dentry));
2015+ au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode),
2016+ /*flags*/0);
2017+}
2018+
2019+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
2020+{
2021+ int err;
1facf9fc 2022+ aufs_bindex_t bend, add_bindex;
2023+ struct dentry *root, *h_dentry;
2024+ struct inode *root_inode;
2025+ struct au_branch *add_branch;
2026+
2027+ root = sb->s_root;
2028+ root_inode = root->d_inode;
2029+ IMustLock(root_inode);
2030+ err = test_add(sb, add, remount);
2031+ if (unlikely(err < 0))
2032+ goto out;
2033+ if (err) {
2034+ err = 0;
2035+ goto out; /* success */
2036+ }
2037+
2038+ bend = au_sbend(sb);
2039+ add_branch = au_br_alloc(sb, bend + 2, add->perm);
2040+ err = PTR_ERR(add_branch);
2041+ if (IS_ERR(add_branch))
2042+ goto out;
2043+
2044+ err = au_br_init(add_branch, sb, add);
2045+ if (unlikely(err)) {
2046+ au_br_do_free(add_branch);
2047+ goto out;
2048+ }
2049+
2050+ add_bindex = add->bindex;
2051+ h_dentry = add->path.dentry;
2052+ if (!remount)
2053+ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2054+ else {
2055+ sysaufs_brs_del(sb, add_bindex);
2056+ au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2057+ sysaufs_brs_add(sb, add_bindex);
2058+ }
2059+
1308ab2a 2060+ if (!add_bindex) {
1facf9fc 2061+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 2062+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
2063+ } else
1facf9fc 2064+ au_add_nlink(root_inode, h_dentry->d_inode);
1facf9fc 2065+
2066+ /*
2067+ * this test/set prevents aufs from handling unnecesary inotify events
2068+ * of xino files, in a case of re-adding a writable branch which was
2069+ * once detached from aufs.
2070+ */
2071+ if (au_xino_brid(sb) < 0
2072+ && au_br_writable(add_branch->br_perm)
2073+ && !au_test_fs_bad_xino(h_dentry->d_sb)
2074+ && add_branch->br_xino.xi_file
2075+ && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry)
2076+ au_xino_brid_set(sb, add_branch->br_id);
2077+
2078+ out:
2079+ return err;
2080+}
2081+
2082+/* ---------------------------------------------------------------------- */
2083+
2084+/*
2085+ * delete a branch
2086+ */
2087+
2088+/* to show the line number, do not make it inlined function */
2089+#define AuVerbose(do_info, fmt, args...) do { \
2090+ if (do_info) \
2091+ AuInfo(fmt, ##args); \
2092+} while (0)
2093+
2094+/*
2095+ * test if the branch is deletable or not.
2096+ */
2097+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
2098+ unsigned int sigen)
2099+{
2100+ int err, i, j, ndentry;
2101+ aufs_bindex_t bstart, bend;
2102+ unsigned char verbose;
2103+ struct au_dcsub_pages dpages;
2104+ struct au_dpage *dpage;
2105+ struct dentry *d;
2106+ struct inode *inode;
2107+
2108+ err = au_dpages_init(&dpages, GFP_NOFS);
2109+ if (unlikely(err))
2110+ goto out;
2111+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
2112+ if (unlikely(err))
2113+ goto out_dpages;
2114+
2115+ verbose = !!au_opt_test(au_mntflags(root->d_sb), VERBOSE);
2116+ for (i = 0; !err && i < dpages.ndpage; i++) {
2117+ dpage = dpages.dpages + i;
2118+ ndentry = dpage->ndentry;
2119+ for (j = 0; !err && j < ndentry; j++) {
2120+ d = dpage->dentries[j];
2121+ AuDebugOn(!atomic_read(&d->d_count));
2122+ inode = d->d_inode;
2123+ if (au_digen(d) == sigen && au_iigen(inode) == sigen)
2124+ di_read_lock_child(d, AuLock_IR);
2125+ else {
2126+ di_write_lock_child(d);
2127+ err = au_reval_dpath(d, sigen);
2128+ if (!err)
2129+ di_downgrade_lock(d, AuLock_IR);
2130+ else {
2131+ di_write_unlock(d);
2132+ break;
2133+ }
2134+ }
2135+
2136+ bstart = au_dbstart(d);
2137+ bend = au_dbend(d);
2138+ if (bstart <= bindex
2139+ && bindex <= bend
2140+ && au_h_dptr(d, bindex)
2141+ && (!S_ISDIR(inode->i_mode) || bstart == bend)) {
2142+ err = -EBUSY;
2143+ AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d));
2144+ }
2145+ di_read_unlock(d, AuLock_IR);
2146+ }
2147+ }
2148+
2149+ out_dpages:
2150+ au_dpages_free(&dpages);
2151+ out:
2152+ return err;
2153+}
2154+
2155+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
2156+ unsigned int sigen)
2157+{
2158+ int err;
2159+ struct inode *i;
2160+ aufs_bindex_t bstart, bend;
2161+ unsigned char verbose;
2162+
2163+ err = 0;
2164+ verbose = !!au_opt_test(au_mntflags(sb), VERBOSE);
2165+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
2166+ AuDebugOn(!atomic_read(&i->i_count));
2167+ if (!list_empty(&i->i_dentry))
2168+ continue;
2169+
2170+ if (au_iigen(i) == sigen)
2171+ ii_read_lock_child(i);
2172+ else {
2173+ ii_write_lock_child(i);
2174+ err = au_refresh_hinode_self(i, /*do_attr*/1);
2175+ if (!err)
2176+ ii_downgrade_lock(i);
2177+ else {
2178+ ii_write_unlock(i);
2179+ break;
2180+ }
2181+ }
2182+
2183+ bstart = au_ibstart(i);
2184+ bend = au_ibend(i);
2185+ if (bstart <= bindex
2186+ && bindex <= bend
2187+ && au_h_iptr(i, bindex)
2188+ && (!S_ISDIR(i->i_mode) || bstart == bend)) {
2189+ err = -EBUSY;
2190+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
2191+ ii_read_unlock(i);
2192+ break;
2193+ }
2194+ ii_read_unlock(i);
2195+ }
2196+
2197+ return err;
2198+}
2199+
2200+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex)
2201+{
2202+ int err;
2203+ unsigned int sigen;
2204+
2205+ sigen = au_sigen(root->d_sb);
2206+ DiMustNoWaiters(root);
2207+ IiMustNoWaiters(root->d_inode);
2208+ di_write_unlock(root);
2209+ err = test_dentry_busy(root, bindex, sigen);
2210+ if (!err)
2211+ err = test_inode_busy(root->d_sb, bindex, sigen);
2212+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
2213+
2214+ return err;
2215+}
2216+
2217+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
2218+ const aufs_bindex_t bindex,
2219+ const aufs_bindex_t bend)
2220+{
2221+ struct au_branch **brp, **p;
2222+
dece6358
AM
2223+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2224+
1facf9fc 2225+ brp = sbinfo->si_branch + bindex;
2226+ if (bindex < bend)
2227+ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
2228+ sbinfo->si_branch[0 + bend] = NULL;
2229+ sbinfo->si_bend--;
2230+
2231+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, GFP_NOFS);
2232+ if (p)
2233+ sbinfo->si_branch = p;
2234+}
2235+
2236+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
2237+ const aufs_bindex_t bend)
2238+{
2239+ struct au_hdentry *hdp, *p;
2240+
1308ab2a 2241+ AuRwMustWriteLock(&dinfo->di_rwsem);
2242+
1facf9fc 2243+ hdp = dinfo->di_hdentry + bindex;
2244+ if (bindex < bend)
2245+ memmove(hdp, hdp + 1, sizeof(*hdp) * (bend - bindex));
2246+ dinfo->di_hdentry[0 + bend].hd_dentry = NULL;
2247+ dinfo->di_bend--;
2248+
2249+ p = krealloc(dinfo->di_hdentry, sizeof(*p) * bend, GFP_NOFS);
2250+ if (p)
2251+ dinfo->di_hdentry = p;
2252+}
2253+
2254+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
2255+ const aufs_bindex_t bend)
2256+{
2257+ struct au_hinode *hip, *p;
2258+
1308ab2a 2259+ AuRwMustWriteLock(&iinfo->ii_rwsem);
2260+
1facf9fc 2261+ hip = iinfo->ii_hinode + bindex;
2262+ if (bindex < bend)
2263+ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
2264+ iinfo->ii_hinode[0 + bend].hi_inode = NULL;
2265+ au_hin_init(iinfo->ii_hinode + bend, NULL);
2266+ iinfo->ii_bend--;
2267+
2268+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, GFP_NOFS);
2269+ if (p)
2270+ iinfo->ii_hinode = p;
2271+}
2272+
2273+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
2274+ struct au_branch *br)
2275+{
2276+ aufs_bindex_t bend;
2277+ struct au_sbinfo *sbinfo;
2278+ struct dentry *root;
2279+ struct inode *inode;
2280+
dece6358
AM
2281+ SiMustWriteLock(sb);
2282+
1facf9fc 2283+ root = sb->s_root;
2284+ inode = root->d_inode;
2285+ au_plink_block_maintain(sb);
2286+ sbinfo = au_sbi(sb);
2287+ bend = sbinfo->si_bend;
2288+
2289+ dput(au_h_dptr(root, bindex));
2290+ au_hiput(au_hi(inode, bindex));
2291+ au_br_do_free(br);
2292+
2293+ au_br_do_del_brp(sbinfo, bindex, bend);
2294+ au_br_do_del_hdp(au_di(root), bindex, bend);
2295+ au_br_do_del_hip(au_ii(inode), bindex, bend);
2296+}
2297+
2298+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
2299+{
2300+ int err, rerr, i;
2301+ unsigned int mnt_flags;
2302+ aufs_bindex_t bindex, bend, br_id;
2303+ unsigned char do_wh, verbose;
2304+ struct au_branch *br;
2305+ struct au_wbr *wbr;
2306+
2307+ err = 0;
2308+ bindex = au_find_dbindex(sb->s_root, del->h_path.dentry);
2309+ if (bindex < 0) {
2310+ if (remount)
2311+ goto out; /* success */
2312+ err = -ENOENT;
2313+ AuErr("%s no such branch\n", del->pathname);
2314+ goto out;
2315+ }
2316+ AuDbg("bindex b%d\n", bindex);
2317+
2318+ err = -EBUSY;
2319+ mnt_flags = au_mntflags(sb);
2320+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
2321+ bend = au_sbend(sb);
2322+ if (unlikely(!bend)) {
2323+ AuVerbose(verbose, "no more branches left\n");
2324+ goto out;
2325+ }
2326+ br = au_sbr(sb, bindex);
2327+ i = atomic_read(&br->br_count);
2328+ if (unlikely(i)) {
2329+ AuVerbose(verbose, "%d file(s) opened\n", i);
2330+ goto out;
2331+ }
2332+
2333+ wbr = br->br_wbr;
2334+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
2335+ if (do_wh) {
1308ab2a 2336+ /* instead of WbrWhMustWriteLock(wbr) */
2337+ SiMustWriteLock(sb);
1facf9fc 2338+ for (i = 0; i < AuBrWh_Last; i++) {
2339+ dput(wbr->wbr_wh[i]);
2340+ wbr->wbr_wh[i] = NULL;
2341+ }
2342+ }
2343+
2344+ err = test_children_busy(sb->s_root, bindex);
2345+ if (unlikely(err)) {
2346+ if (do_wh)
2347+ goto out_wh;
2348+ goto out;
2349+ }
2350+
2351+ err = 0;
2352+ br_id = br->br_id;
2353+ if (!remount)
2354+ au_br_do_del(sb, bindex, br);
2355+ else {
2356+ sysaufs_brs_del(sb, bindex);
2357+ au_br_do_del(sb, bindex, br);
2358+ sysaufs_brs_add(sb, bindex);
2359+ }
2360+
1308ab2a 2361+ if (!bindex) {
1facf9fc 2362+ au_cpup_attr_all(sb->s_root->d_inode, /*force*/1);
1308ab2a 2363+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
2364+ } else
1facf9fc 2365+ au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode);
2366+ if (au_opt_test(mnt_flags, PLINK))
2367+ au_plink_half_refresh(sb, br_id);
2368+
1facf9fc 2369+ if (au_xino_brid(sb) == br->br_id)
2370+ au_xino_brid_set(sb, -1);
2371+ goto out; /* success */
2372+
2373+ out_wh:
2374+ /* revert */
2375+ rerr = au_br_init_wh(sb, br, br->br_perm, del->h_path.dentry);
2376+ if (rerr)
2377+ AuWarn("failed re-creating base whiteout, %s. (%d)\n",
2378+ del->pathname, rerr);
2379+ out:
2380+ return err;
2381+}
2382+
2383+/* ---------------------------------------------------------------------- */
2384+
2385+/*
2386+ * change a branch permission
2387+ */
2388+
dece6358
AM
2389+static void au_warn_ima(void)
2390+{
2391+#ifdef CONFIG_IMA
1308ab2a 2392+ /* since it doesn't support mark_files_ro() */
dece6358
AM
2393+ AuWarn("RW -> RO makes IMA to produce wrong message");
2394+#endif
2395+}
2396+
1facf9fc 2397+static int do_need_sigen_inc(int a, int b)
2398+{
2399+ return au_br_whable(a) && !au_br_whable(b);
2400+}
2401+
2402+static int need_sigen_inc(int old, int new)
2403+{
2404+ return do_need_sigen_inc(old, new)
2405+ || do_need_sigen_inc(new, old);
2406+}
2407+
2408+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
2409+{
2410+ int err;
2411+ unsigned long n, ul, bytes, files;
2412+ aufs_bindex_t bstart;
2413+ struct file *file, *hf, **a;
2414+ const int step_bytes = 1024, /* memory allocation unit */
2415+ step_files = step_bytes / sizeof(*a);
2416+
2417+ err = -ENOMEM;
2418+ n = 0;
2419+ bytes = step_bytes;
2420+ files = step_files;
2421+ a = kmalloc(bytes, GFP_NOFS);
2422+ if (unlikely(!a))
2423+ goto out;
2424+
2425+ /* no need file_list_lock() since sbinfo is locked? defered? */
2426+ list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
2427+ if (special_file(file->f_dentry->d_inode->i_mode))
2428+ continue;
2429+
2430+ AuDbg("%.*s\n", AuDLNPair(file->f_dentry));
2431+ fi_read_lock(file);
2432+ if (unlikely(au_test_mmapped(file))) {
2433+ err = -EBUSY;
2434+ FiMustNoWaiters(file);
2435+ fi_read_unlock(file);
2436+ goto out_free;
2437+ }
2438+
2439+ bstart = au_fbstart(file);
2440+ if (!S_ISREG(file->f_dentry->d_inode->i_mode)
2441+ || !(file->f_mode & FMODE_WRITE)
2442+ || bstart != bindex) {
2443+ FiMustNoWaiters(file);
2444+ fi_read_unlock(file);
2445+ continue;
2446+ }
2447+
2448+ hf = au_h_fptr(file, bstart);
2449+ FiMustNoWaiters(file);
2450+ fi_read_unlock(file);
2451+
2452+ if (n < files)
2453+ a[n++] = hf;
2454+ else {
2455+ void *p;
2456+
2457+ err = -ENOMEM;
2458+ bytes += step_bytes;
2459+ files += step_files;
2460+ p = krealloc(a, bytes, GFP_NOFS);
2461+ if (p) {
2462+ a = p;
2463+ a[n++] = hf;
2464+ } else
2465+ goto out_free;
2466+ }
2467+ }
2468+
2469+ err = 0;
dece6358
AM
2470+ if (n)
2471+ au_warn_ima();
1facf9fc 2472+ for (ul = 0; ul < n; ul++) {
2473+ /* todo: already flushed? */
2474+ /* cf. fs/super.c:mark_files_ro() */
2475+ hf = a[ul];
2476+ hf->f_mode &= ~FMODE_WRITE;
2477+ if (!file_check_writeable(hf)) {
2478+ file_release_write(hf);
2479+ mnt_drop_write(hf->f_vfsmnt);
2480+ }
2481+ }
2482+
2483+ out_free:
2484+ kfree(a);
2485+ out:
2486+ return err;
2487+}
2488+
2489+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
2490+ int *do_update)
2491+{
2492+ int err, rerr;
2493+ aufs_bindex_t bindex;
1308ab2a 2494+ struct path path;
1facf9fc 2495+ struct dentry *root;
2496+ struct au_branch *br;
2497+
2498+ root = sb->s_root;
2499+ au_plink_block_maintain(sb);
2500+ bindex = au_find_dbindex(root, mod->h_root);
2501+ if (bindex < 0) {
2502+ if (remount)
2503+ return 0; /* success */
2504+ err = -ENOENT;
2505+ AuErr("%s no such branch\n", mod->path);
2506+ goto out;
2507+ }
2508+ AuDbg("bindex b%d\n", bindex);
2509+
2510+ err = test_br(mod->h_root->d_inode, mod->perm, mod->path);
2511+ if (unlikely(err))
2512+ goto out;
2513+
2514+ br = au_sbr(sb, bindex);
2515+ if (br->br_perm == mod->perm)
2516+ return 0; /* success */
2517+
2518+ if (au_br_writable(br->br_perm)) {
2519+ /* remove whiteout base */
2520+ err = au_br_init_wh(sb, br, mod->perm, mod->h_root);
2521+ if (unlikely(err))
2522+ goto out;
2523+
2524+ if (!au_br_writable(mod->perm)) {
2525+ /* rw --> ro, file might be mmapped */
2526+ DiMustNoWaiters(root);
2527+ IiMustNoWaiters(root->d_inode);
2528+ di_write_unlock(root);
2529+ err = au_br_mod_files_ro(sb, bindex);
2530+ /* aufs_write_lock() calls ..._child() */
2531+ di_write_lock_child(root);
2532+
2533+ if (unlikely(err)) {
2534+ rerr = -ENOMEM;
2535+ br->br_wbr = kmalloc(sizeof(*br->br_wbr),
2536+ GFP_NOFS);
1308ab2a 2537+ if (br->br_wbr) {
2538+ path.mnt = br->br_mnt;
2539+ path.dentry = mod->h_root;
2540+ rerr = au_wbr_init(br, sb, br->br_perm,
2541+ &path);
2542+ }
1facf9fc 2543+ if (unlikely(rerr)) {
2544+ AuIOErr("nested error %d (%d)\n",
2545+ rerr, err);
2546+ br->br_perm = mod->perm;
2547+ }
2548+ }
2549+ }
2550+ } else if (au_br_writable(mod->perm)) {
2551+ /* ro --> rw */
2552+ err = -ENOMEM;
2553+ br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
2554+ if (br->br_wbr) {
1308ab2a 2555+ path.mnt = br->br_mnt;
2556+ path.dentry = mod->h_root;
1facf9fc 2557+ err = au_wbr_init(br, sb, mod->perm, &path);
2558+ if (unlikely(err)) {
2559+ kfree(br->br_wbr);
2560+ br->br_wbr = NULL;
2561+ }
2562+ }
2563+ }
2564+
2565+ if (!err) {
2566+ *do_update |= need_sigen_inc(br->br_perm, mod->perm);
2567+ br->br_perm = mod->perm;
2568+ }
2569+
2570+ out:
2571+ return err;
2572+}
1308ab2a 2573diff -uprN -x .git linux-2.6.31/fs/aufs/branch.h aufs2-2.6.git/fs/aufs/branch.h
2574--- linux-2.6.31/fs/aufs/branch.h 1970-01-01 00:00:00.000000000 +0000
2575+++ aufs2-2.6.git/fs/aufs/branch.h 2009-09-21 21:49:23.377863284 +0000
dece6358 2576@@ -0,0 +1,219 @@
1facf9fc 2577+/*
2578+ * Copyright (C) 2005-2009 Junjiro R. Okajima
2579+ *
2580+ * This program, aufs is free software; you can redistribute it and/or modify
2581+ * it under the terms of the GNU General Public License as published by
2582+ * the Free Software Foundation; either version 2 of the License, or
2583+ * (at your option) any later version.
dece6358
AM
2584+ *
2585+ * This program is distributed in the hope that it will be useful,
2586+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2587+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2588+ * GNU General Public License for more details.
2589+ *
2590+ * You should have received a copy of the GNU General Public License
2591+ * along with this program; if not, write to the Free Software
2592+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 2593+ */
2594+
2595+/*
2596+ * branch filesystems and xino for them
2597+ */
2598+
2599+#ifndef __AUFS_BRANCH_H__
2600+#define __AUFS_BRANCH_H__
2601+
2602+#ifdef __KERNEL__
2603+
2604+#include <linux/fs.h>
2605+#include <linux/mount.h>
1facf9fc 2606+#include <linux/aufs_type.h>
2607+#include "rwsem.h"
2608+#include "super.h"
2609+
2610+/* ---------------------------------------------------------------------- */
2611+
2612+/* a xino file */
2613+struct au_xino_file {
2614+ struct file *xi_file;
2615+ struct mutex xi_nondir_mtx;
2616+
2617+ /* todo: make xino files an array to support huge inode number */
2618+
2619+#ifdef CONFIG_DEBUG_FS
2620+ struct dentry *xi_dbgaufs;
2621+#endif
2622+};
2623+
2624+/* members for writable branch only */
2625+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
2626+struct au_wbr {
dece6358 2627+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 2628+ struct dentry *wbr_wh[AuBrWh_Last];
2629+ atomic_t wbr_wh_running;
2630+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
2631+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
2632+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
2633+
2634+ /* mfs mode */
2635+ unsigned long long wbr_bytes;
2636+};
2637+
2638+/* protected by superblock rwsem */
2639+struct au_branch {
2640+ struct au_xino_file br_xino;
2641+
2642+ aufs_bindex_t br_id;
2643+
2644+ int br_perm;
2645+ struct vfsmount *br_mnt;
2646+ atomic_t br_count;
2647+
2648+ struct au_wbr *br_wbr;
2649+
2650+ /* xino truncation */
2651+ blkcnt_t br_xino_upper; /* watermark in blocks */
2652+ atomic_t br_xino_running;
2653+
2654+#ifdef CONFIG_SYSFS
2655+ /* an entry under sysfs per mount-point */
2656+ char br_name[8];
2657+ struct attribute br_attr;
2658+#endif
2659+};
2660+
2661+/* ---------------------------------------------------------------------- */
2662+
2663+/* branch permission and attribute */
2664+enum {
2665+ AuBrPerm_RW, /* writable, linkable wh */
2666+ AuBrPerm_RO, /* readonly, no wh */
2667+ AuBrPerm_RR, /* natively readonly, no wh */
2668+
2669+ AuBrPerm_RWNoLinkWH, /* un-linkable whiteouts */
2670+
2671+ AuBrPerm_ROWH, /* whiteout-able */
2672+ AuBrPerm_RRWH, /* whiteout-able */
2673+
2674+ AuBrPerm_Last
2675+};
2676+
2677+static inline int au_br_writable(int brperm)
2678+{
2679+ return brperm == AuBrPerm_RW || brperm == AuBrPerm_RWNoLinkWH;
2680+}
2681+
2682+static inline int au_br_whable(int brperm)
2683+{
2684+ return brperm == AuBrPerm_RW
2685+ || brperm == AuBrPerm_ROWH
2686+ || brperm == AuBrPerm_RRWH;
2687+}
2688+
2689+static inline int au_br_rdonly(struct au_branch *br)
2690+{
2691+ return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY)
2692+ || !au_br_writable(br->br_perm))
2693+ ? -EROFS : 0;
2694+}
2695+
2696+static inline int au_br_hinotifyable(int brperm __maybe_unused)
2697+{
2698+#ifdef CONFIG_AUFS_HINOTIFY
2699+ return brperm != AuBrPerm_RR && brperm != AuBrPerm_RRWH;
2700+#else
2701+ return 0;
2702+#endif
2703+}
2704+
2705+/* ---------------------------------------------------------------------- */
2706+
2707+/* branch.c */
2708+struct au_sbinfo;
2709+void au_br_free(struct au_sbinfo *sinfo);
2710+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
2711+struct au_opt_add;
2712+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
2713+struct au_opt_del;
2714+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
2715+struct au_opt_mod;
2716+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
2717+ int *do_update);
2718+
2719+/* xino.c */
2720+static const loff_t au_loff_max = LLONG_MAX;
2721+
2722+int au_xib_trunc(struct super_block *sb);
2723+ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
2724+ loff_t *pos);
2725+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
2726+ loff_t *pos);
2727+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
2728+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
2729+ino_t au_xino_new_ino(struct super_block *sb);
2730+int au_xino_write0(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
2731+ ino_t ino);
2732+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
2733+ ino_t ino);
2734+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
2735+ ino_t *ino);
2736+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
2737+ struct file *base_file, int do_test);
2738+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
2739+
2740+struct au_opt_xino;
2741+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
2742+void au_xino_clr(struct super_block *sb);
2743+struct file *au_xino_def(struct super_block *sb);
2744+int au_xino_path(struct seq_file *seq, struct file *file);
2745+
2746+/* ---------------------------------------------------------------------- */
2747+
2748+/* Superblock to branch */
2749+static inline
2750+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
2751+{
2752+ return au_sbr(sb, bindex)->br_id;
2753+}
2754+
2755+static inline
2756+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
2757+{
2758+ return au_sbr(sb, bindex)->br_mnt;
2759+}
2760+
2761+static inline
2762+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
2763+{
2764+ return au_sbr_mnt(sb, bindex)->mnt_sb;
2765+}
2766+
2767+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
2768+{
dece6358 2769+ atomic_dec_return(&au_sbr(sb, bindex)->br_count);
1facf9fc 2770+}
2771+
2772+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
2773+{
2774+ return au_sbr(sb, bindex)->br_perm;
2775+}
2776+
2777+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
2778+{
2779+ return au_br_whable(au_sbr_perm(sb, bindex));
2780+}
2781+
2782+/* ---------------------------------------------------------------------- */
2783+
2784+/*
2785+ * wbr_wh_read_lock, wbr_wh_write_lock
2786+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
2787+ */
2788+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
2789+
dece6358
AM
2790+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
2791+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
2792+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
2793+
1facf9fc 2794+#endif /* __KERNEL__ */
2795+#endif /* __AUFS_BRANCH_H__ */
1308ab2a 2796diff -uprN -x .git linux-2.6.31/fs/aufs/cpup.c aufs2-2.6.git/fs/aufs/cpup.c
2797--- linux-2.6.31/fs/aufs/cpup.c 1970-01-01 00:00:00.000000000 +0000
2798+++ aufs2-2.6.git/fs/aufs/cpup.c 2009-09-21 21:49:23.377863284 +0000
2799@@ -0,0 +1,1048 @@
1facf9fc 2800+/*
2801+ * Copyright (C) 2005-2009 Junjiro R. Okajima
2802+ *
2803+ * This program, aufs is free software; you can redistribute it and/or modify
2804+ * it under the terms of the GNU General Public License as published by
2805+ * the Free Software Foundation; either version 2 of the License, or
2806+ * (at your option) any later version.
dece6358
AM
2807+ *
2808+ * This program is distributed in the hope that it will be useful,
2809+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2810+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2811+ * GNU General Public License for more details.
2812+ *
2813+ * You should have received a copy of the GNU General Public License
2814+ * along with this program; if not, write to the Free Software
2815+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 2816+ */
2817+
2818+/*
2819+ * copy-up functions, see wbr_policy.c for copy-down
2820+ */
2821+
dece6358 2822+#include <linux/file.h>
1facf9fc 2823+#include <linux/fs_stack.h>
dece6358 2824+#include <linux/mm.h>
1facf9fc 2825+#include <linux/uaccess.h>
2826+#include "aufs.h"
2827+
2828+void au_cpup_attr_flags(struct inode *dst, struct inode *src)
2829+{
2830+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
2831+ | S_NOATIME | S_NOCMTIME;
2832+
2833+ dst->i_flags |= src->i_flags & ~mask;
2834+ if (au_test_fs_notime(dst->i_sb))
2835+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
2836+}
2837+
2838+void au_cpup_attr_timesizes(struct inode *inode)
2839+{
2840+ struct inode *h_inode;
2841+
2842+ h_inode = au_h_iptr(inode, au_ibstart(inode));
2843+ fsstack_copy_attr_times(inode, h_inode);
2844+ vfsub_copy_inode_size(inode, h_inode);
2845+}
2846+
2847+void au_cpup_attr_nlink(struct inode *inode, int force)
2848+{
2849+ struct inode *h_inode;
2850+ struct super_block *sb;
2851+ aufs_bindex_t bindex, bend;
2852+
2853+ sb = inode->i_sb;
2854+ bindex = au_ibstart(inode);
2855+ h_inode = au_h_iptr(inode, bindex);
2856+ if (!force
2857+ && !S_ISDIR(h_inode->i_mode)
2858+ && au_opt_test(au_mntflags(sb), PLINK)
2859+ && au_plink_test(inode))
2860+ return;
2861+
2862+ inode->i_nlink = h_inode->i_nlink;
2863+
2864+ /*
2865+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
2866+ * it may includes whplink directory.
2867+ */
2868+ if (S_ISDIR(h_inode->i_mode)) {
2869+ bend = au_ibend(inode);
2870+ for (bindex++; bindex <= bend; bindex++) {
2871+ h_inode = au_h_iptr(inode, bindex);
2872+ if (h_inode)
2873+ au_add_nlink(inode, h_inode);
2874+ }
2875+ }
2876+}
2877+
2878+void au_cpup_attr_changeable(struct inode *inode)
2879+{
2880+ struct inode *h_inode;
2881+
2882+ h_inode = au_h_iptr(inode, au_ibstart(inode));
2883+ inode->i_mode = h_inode->i_mode;
2884+ inode->i_uid = h_inode->i_uid;
2885+ inode->i_gid = h_inode->i_gid;
2886+ au_cpup_attr_timesizes(inode);
2887+ au_cpup_attr_flags(inode, h_inode);
2888+}
2889+
2890+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
2891+{
2892+ struct au_iinfo *iinfo = au_ii(inode);
2893+
1308ab2a 2894+ IiMustWriteLock(inode);
2895+
1facf9fc 2896+ iinfo->ii_higen = h_inode->i_generation;
2897+ iinfo->ii_hsb1 = h_inode->i_sb;
2898+}
2899+
2900+void au_cpup_attr_all(struct inode *inode, int force)
2901+{
2902+ struct inode *h_inode;
2903+
2904+ h_inode = au_h_iptr(inode, au_ibstart(inode));
2905+ au_cpup_attr_changeable(inode);
2906+ if (inode->i_nlink > 0)
2907+ au_cpup_attr_nlink(inode, force);
2908+ inode->i_rdev = h_inode->i_rdev;
2909+ inode->i_blkbits = h_inode->i_blkbits;
2910+ au_cpup_igen(inode, h_inode);
2911+}
2912+
2913+/* ---------------------------------------------------------------------- */
2914+
2915+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
2916+
2917+/* keep the timestamps of the parent dir when cpup */
2918+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
2919+ struct path *h_path)
2920+{
2921+ struct inode *h_inode;
2922+
2923+ dt->dt_dentry = dentry;
2924+ dt->dt_h_path = *h_path;
2925+ h_inode = h_path->dentry->d_inode;
2926+ dt->dt_atime = h_inode->i_atime;
2927+ dt->dt_mtime = h_inode->i_mtime;
2928+ /* smp_mb(); */
2929+}
2930+
2931+void au_dtime_revert(struct au_dtime *dt)
2932+{
2933+ struct iattr attr;
2934+ int err;
2935+
2936+ attr.ia_atime = dt->dt_atime;
2937+ attr.ia_mtime = dt->dt_mtime;
2938+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
2939+ | ATTR_ATIME | ATTR_ATIME_SET;
2940+
2941+ err = vfsub_notify_change(&dt->dt_h_path, &attr);
2942+ if (unlikely(err))
2943+ AuWarn("restoring timestamps failed(%d). ignored\n", err);
2944+}
2945+
2946+/* ---------------------------------------------------------------------- */
2947+
2948+static noinline_for_stack
2949+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src)
2950+{
2951+ int err, sbits;
2952+ struct iattr ia;
2953+ struct path h_path;
1308ab2a 2954+ struct inode *h_isrc, *h_idst;
1facf9fc 2955+
2956+ h_path.dentry = au_h_dptr(dst, bindex);
1308ab2a 2957+ h_idst = h_path.dentry->d_inode;
1facf9fc 2958+ h_path.mnt = au_sbr_mnt(dst->d_sb, bindex);
2959+ h_isrc = h_src->d_inode;
1308ab2a 2960+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 2961+ | ATTR_ATIME | ATTR_MTIME
2962+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
1facf9fc 2963+ ia.ia_uid = h_isrc->i_uid;
2964+ ia.ia_gid = h_isrc->i_gid;
2965+ ia.ia_atime = h_isrc->i_atime;
2966+ ia.ia_mtime = h_isrc->i_mtime;
1308ab2a 2967+ if (h_idst->i_mode != h_isrc->i_mode
2968+ && !S_ISLNK(h_idst->i_mode)) {
2969+ ia.ia_valid |= ATTR_MODE;
2970+ ia.ia_mode = h_isrc->i_mode;
2971+ }
2972+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
2973+ au_cpup_attr_flags(h_idst, h_isrc);
1facf9fc 2974+ err = vfsub_notify_change(&h_path, &ia);
2975+
2976+ /* is this nfs only? */
2977+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
2978+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
2979+ ia.ia_mode = h_isrc->i_mode;
2980+ err = vfsub_notify_change(&h_path, &ia);
2981+ }
2982+
2983+ return err;
2984+}
2985+
2986+/* ---------------------------------------------------------------------- */
2987+
2988+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
2989+ char *buf, unsigned long blksize)
2990+{
2991+ int err;
2992+ size_t sz, rbytes, wbytes;
2993+ unsigned char all_zero;
2994+ char *p, *zp;
2995+ struct mutex *h_mtx;
2996+ /* reduce stack usage */
2997+ struct iattr *ia;
2998+
2999+ zp = page_address(ZERO_PAGE(0));
3000+ if (unlikely(!zp))
3001+ return -ENOMEM; /* possible? */
3002+
3003+ err = 0;
3004+ all_zero = 0;
3005+ while (len) {
3006+ AuDbg("len %lld\n", len);
3007+ sz = blksize;
3008+ if (len < blksize)
3009+ sz = len;
3010+
3011+ rbytes = 0;
3012+ /* todo: signal_pending? */
3013+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
3014+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
3015+ err = rbytes;
3016+ }
3017+ if (unlikely(err < 0))
3018+ break;
3019+
3020+ all_zero = 0;
3021+ if (len >= rbytes && rbytes == blksize)
3022+ all_zero = !memcmp(buf, zp, rbytes);
3023+ if (!all_zero) {
3024+ wbytes = rbytes;
3025+ p = buf;
3026+ while (wbytes) {
3027+ size_t b;
3028+
3029+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
3030+ err = b;
3031+ /* todo: signal_pending? */
3032+ if (unlikely(err == -EAGAIN || err == -EINTR))
3033+ continue;
3034+ if (unlikely(err < 0))
3035+ break;
3036+ wbytes -= b;
3037+ p += b;
3038+ }
3039+ } else {
3040+ loff_t res;
3041+
3042+ AuLabel(hole);
3043+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
3044+ err = res;
3045+ if (unlikely(res < 0))
3046+ break;
3047+ }
3048+ len -= rbytes;
3049+ err = 0;
3050+ }
3051+
3052+ /* the last block may be a hole */
3053+ if (!err && all_zero) {
3054+ AuLabel(last hole);
3055+
3056+ err = 1;
3057+ if (au_test_nfs(dst->f_dentry->d_sb)) {
3058+ /* nfs requires this step to make last hole */
3059+ /* is this only nfs? */
3060+ do {
3061+ /* todo: signal_pending? */
3062+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
3063+ } while (err == -EAGAIN || err == -EINTR);
3064+ if (err == 1)
3065+ dst->f_pos--;
3066+ }
3067+
3068+ if (err == 1) {
3069+ ia = (void *)buf;
3070+ ia->ia_size = dst->f_pos;
3071+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
3072+ ia->ia_file = dst;
3073+ h_mtx = &dst->f_dentry->d_inode->i_mutex;
3074+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
3075+ err = vfsub_notify_change(&dst->f_path, ia);
3076+ mutex_unlock(h_mtx);
3077+ }
3078+ }
3079+
3080+ return err;
3081+}
3082+
3083+int au_copy_file(struct file *dst, struct file *src, loff_t len)
3084+{
3085+ int err;
3086+ unsigned long blksize;
3087+ unsigned char do_kfree;
3088+ char *buf;
3089+
3090+ err = -ENOMEM;
3091+ blksize = dst->f_dentry->d_sb->s_blocksize;
3092+ if (!blksize || PAGE_SIZE < blksize)
3093+ blksize = PAGE_SIZE;
3094+ AuDbg("blksize %lu\n", blksize);
3095+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
3096+ if (do_kfree)
3097+ buf = kmalloc(blksize, GFP_NOFS);
3098+ else
3099+ buf = (void *)__get_free_page(GFP_NOFS);
3100+ if (unlikely(!buf))
3101+ goto out;
3102+
3103+ if (len > (1 << 22))
3104+ AuDbg("copying a large file %lld\n", (long long)len);
3105+
3106+ src->f_pos = 0;
3107+ dst->f_pos = 0;
3108+ err = au_do_copy_file(dst, src, len, buf, blksize);
3109+ if (do_kfree)
3110+ kfree(buf);
3111+ else
3112+ free_page((unsigned long)buf);
3113+
3114+ out:
3115+ return err;
3116+}
3117+
3118+/*
3119+ * to support a sparse file which is opened with O_APPEND,
3120+ * we need to close the file.
3121+ */
3122+static int au_cp_regular(struct dentry *dentry, aufs_bindex_t bdst,
3123+ aufs_bindex_t bsrc, loff_t len)
3124+{
3125+ int err, i;
3126+ enum { SRC, DST };
3127+ struct {
3128+ aufs_bindex_t bindex;
3129+ unsigned int flags;
3130+ struct dentry *dentry;
3131+ struct file *file;
3132+ void *label, *label_file;
3133+ } *f, file[] = {
3134+ {
3135+ .bindex = bsrc,
3136+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
3137+ .file = NULL,
3138+ .label = &&out,
3139+ .label_file = &&out_src
3140+ },
3141+ {
3142+ .bindex = bdst,
3143+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
3144+ .file = NULL,
3145+ .label = &&out_src,
3146+ .label_file = &&out_dst
3147+ }
3148+ };
3149+ struct super_block *sb;
3150+
3151+ /* bsrc branch can be ro/rw. */
3152+ sb = dentry->d_sb;
3153+ f = file;
3154+ for (i = 0; i < 2; i++, f++) {
3155+ f->dentry = au_h_dptr(dentry, f->bindex);
3156+ f->file = au_h_open(dentry, f->bindex, f->flags, /*file*/NULL);
3157+ err = PTR_ERR(f->file);
3158+ if (IS_ERR(f->file))
3159+ goto *f->label;
3160+ err = -EINVAL;
3161+ if (unlikely(!f->file->f_op))
3162+ goto *f->label_file;
3163+ }
3164+
3165+ /* try stopping to update while we copyup */
3166+ IMustLock(file[SRC].dentry->d_inode);
3167+ err = au_copy_file(file[DST].file, file[SRC].file, len);
3168+
3169+ out_dst:
3170+ fput(file[DST].file);
3171+ au_sbr_put(sb, file[DST].bindex);
3172+ out_src:
3173+ fput(file[SRC].file);
3174+ au_sbr_put(sb, file[SRC].bindex);
3175+ out:
3176+ return err;
3177+}
3178+
3179+static int au_do_cpup_regular(struct dentry *dentry, aufs_bindex_t bdst,
3180+ aufs_bindex_t bsrc, loff_t len,
3181+ struct inode *h_dir, struct path *h_path)
3182+{
3183+ int err, rerr;
3184+ loff_t l;
3185+
3186+ err = 0;
3187+ l = i_size_read(au_h_iptr(dentry->d_inode, bsrc));
3188+ if (len == -1 || l < len)
3189+ len = l;
3190+ if (len)
3191+ err = au_cp_regular(dentry, bdst, bsrc, len);
3192+ if (!err)
3193+ goto out; /* success */
3194+
3195+ rerr = vfsub_unlink(h_dir, h_path, /*force*/0);
3196+ if (rerr) {
3197+ AuIOErr("failed unlinking cpup-ed %.*s(%d, %d)\n",
3198+ AuDLNPair(h_path->dentry), err, rerr);
3199+ err = -EIO;
3200+ }
3201+
3202+ out:
3203+ return err;
3204+}
3205+
3206+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
3207+ struct inode *h_dir)
3208+{
3209+ int err, symlen;
3210+ mm_segment_t old_fs;
3211+ char *sym;
3212+
3213+ err = -ENOSYS;
3214+ if (unlikely(!h_src->d_inode->i_op->readlink))
3215+ goto out;
3216+
3217+ err = -ENOMEM;
3218+ sym = __getname();
3219+ if (unlikely(!sym))
3220+ goto out;
3221+
3222+ old_fs = get_fs();
3223+ set_fs(KERNEL_DS);
3224+ symlen = h_src->d_inode->i_op->readlink(h_src, (char __user *)sym,
3225+ PATH_MAX);
3226+ err = symlen;
3227+ set_fs(old_fs);
3228+
3229+ if (symlen > 0) {
3230+ sym[symlen] = 0;
3231+ err = vfsub_symlink(h_dir, h_path, sym);
3232+ }
3233+ __putname(sym);
3234+
3235+ out:
3236+ return err;
3237+}
3238+
3239+/* return with the lower dst inode is locked */
3240+static noinline_for_stack
3241+int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst,
3242+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3243+ struct dentry *dst_parent)
3244+{
3245+ int err;
3246+ umode_t mode;
3247+ unsigned int mnt_flags;
3248+ unsigned char isdir;
3249+ const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME);
3250+ struct au_dtime dt;
3251+ struct path h_path;
3252+ struct dentry *h_src, *h_dst, *h_parent;
3253+ struct inode *h_inode, *h_dir;
3254+ struct super_block *sb;
3255+
3256+ /* bsrc branch can be ro/rw. */
3257+ h_src = au_h_dptr(dentry, bsrc);
3258+ h_inode = h_src->d_inode;
3259+ AuDebugOn(h_inode != au_h_iptr(dentry->d_inode, bsrc));
3260+
3261+ /* try stopping to be referenced while we are creating */
3262+ h_dst = au_h_dptr(dentry, bdst);
3263+ h_parent = h_dst->d_parent; /* dir inode is locked */
3264+ h_dir = h_parent->d_inode;
3265+ IMustLock(h_dir);
3266+ AuDebugOn(h_parent != h_dst->d_parent);
3267+
3268+ sb = dentry->d_sb;
3269+ h_path.mnt = au_sbr_mnt(sb, bdst);
3270+ if (do_dt) {
3271+ h_path.dentry = h_parent;
3272+ au_dtime_store(&dt, dst_parent, &h_path);
3273+ }
3274+ h_path.dentry = h_dst;
3275+
3276+ isdir = 0;
3277+ mode = h_inode->i_mode;
3278+ switch (mode & S_IFMT) {
3279+ case S_IFREG:
3280+ /* try stopping to update while we are referencing */
3281+ IMustLock(h_inode);
3282+ err = vfsub_create(h_dir, &h_path, mode | S_IWUSR);
3283+ if (!err)
3284+ err = au_do_cpup_regular
3285+ (dentry, bdst, bsrc, len,
3286+ au_h_iptr(dst_parent->d_inode, bdst), &h_path);
3287+ break;
3288+ case S_IFDIR:
3289+ isdir = 1;
3290+ err = vfsub_mkdir(h_dir, &h_path, mode);
3291+ if (!err) {
3292+ /*
3293+ * strange behaviour from the users view,
3294+ * particularry setattr case
3295+ */
3296+ if (au_ibstart(dst_parent->d_inode) == bdst)
3297+ au_cpup_attr_nlink(dst_parent->d_inode,
3298+ /*force*/1);
3299+ au_cpup_attr_nlink(dentry->d_inode, /*force*/1);
3300+ }
3301+ break;
3302+ case S_IFLNK:
3303+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
3304+ break;
3305+ case S_IFCHR:
3306+ case S_IFBLK:
3307+ AuDebugOn(!capable(CAP_MKNOD));
3308+ /*FALLTHROUGH*/
3309+ case S_IFIFO:
3310+ case S_IFSOCK:
3311+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
3312+ break;
3313+ default:
3314+ AuIOErr("Unknown inode type 0%o\n", mode);
3315+ err = -EIO;
3316+ }
3317+
3318+ mnt_flags = au_mntflags(sb);
3319+ if (!au_opt_test(mnt_flags, UDBA_NONE)
3320+ && !isdir
3321+ && au_opt_test(mnt_flags, XINO)
3322+ && h_inode->i_nlink == 1
3323+ /* todo: unnecessary? */
3324+ /* && dentry->d_inode->i_nlink == 1 */
3325+ && bdst < bsrc
3326+ && !au_ftest_cpup(flags, KEEPLINO))
1308ab2a 3327+ au_xino_write(sb, bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 3328+ /* ignore this error */
3329+
3330+ if (do_dt)
3331+ au_dtime_revert(&dt);
3332+ return err;
3333+}
3334+
3335+/*
3336+ * copyup the @dentry from @bsrc to @bdst.
3337+ * the caller must set the both of lower dentries.
3338+ * @len is for truncating when it is -1 copyup the entire file.
3339+ * in link/rename cases, @dst_parent may be different from the real one.
3340+ */
3341+static int au_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3342+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3343+ struct dentry *dst_parent)
3344+{
3345+ int err, rerr;
3346+ aufs_bindex_t old_ibstart;
3347+ unsigned char isdir, plink;
3348+ struct au_dtime dt;
3349+ struct path h_path;
3350+ struct dentry *h_src, *h_dst, *h_parent;
3351+ struct inode *dst_inode, *h_dir, *inode;
3352+ struct super_block *sb;
3353+
3354+ AuDebugOn(bsrc <= bdst);
3355+
3356+ sb = dentry->d_sb;
3357+ h_path.mnt = au_sbr_mnt(sb, bdst);
3358+ h_dst = au_h_dptr(dentry, bdst);
3359+ h_parent = h_dst->d_parent; /* dir inode is locked */
3360+ h_dir = h_parent->d_inode;
3361+ IMustLock(h_dir);
3362+
3363+ h_src = au_h_dptr(dentry, bsrc);
3364+ inode = dentry->d_inode;
3365+
3366+ if (!dst_parent)
3367+ dst_parent = dget_parent(dentry);
3368+ else
3369+ dget(dst_parent);
3370+
3371+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
3372+ dst_inode = au_h_iptr(inode, bdst);
3373+ if (dst_inode) {
3374+ if (unlikely(!plink)) {
3375+ err = -EIO;
3376+ AuIOErr("i%lu exists on a upper branch "
3377+ "but plink is disabled\n", inode->i_ino);
3378+ goto out;
3379+ }
3380+
3381+ if (dst_inode->i_nlink) {
3382+ const int do_dt = au_ftest_cpup(flags, DTIME);
3383+
3384+ h_src = au_plink_lkup(inode, bdst);
3385+ err = PTR_ERR(h_src);
3386+ if (IS_ERR(h_src))
3387+ goto out;
3388+ if (unlikely(!h_src->d_inode)) {
3389+ err = -EIO;
3390+ AuIOErr("i%lu exists on a upper branch "
3391+ "but plink is broken\n", inode->i_ino);
3392+ dput(h_src);
3393+ goto out;
3394+ }
3395+
3396+ if (do_dt) {
3397+ h_path.dentry = h_parent;
3398+ au_dtime_store(&dt, dst_parent, &h_path);
3399+ }
3400+ h_path.dentry = h_dst;
3401+ err = vfsub_link(h_src, h_dir, &h_path);
3402+ if (do_dt)
3403+ au_dtime_revert(&dt);
3404+ dput(h_src);
3405+ goto out;
3406+ } else
3407+ /* todo: cpup_wh_file? */
3408+ /* udba work */
3409+ au_update_brange(inode, 1);
3410+ }
3411+
3412+ old_ibstart = au_ibstart(inode);
3413+ err = cpup_entry(dentry, bdst, bsrc, len, flags, dst_parent);
3414+ if (unlikely(err))
3415+ goto out;
3416+ dst_inode = h_dst->d_inode;
3417+ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
3418+
3419+ err = cpup_iattr(dentry, bdst, h_src);
3420+ isdir = S_ISDIR(dst_inode->i_mode);
3421+ if (!err) {
3422+ if (bdst < old_ibstart)
3423+ au_set_ibstart(inode, bdst);
3424+ au_set_h_iptr(inode, bdst, au_igrab(dst_inode),
3425+ au_hi_flags(inode, isdir));
3426+ mutex_unlock(&dst_inode->i_mutex);
3427+ if (!isdir
3428+ && h_src->d_inode->i_nlink > 1
3429+ && plink)
3430+ au_plink_append(inode, bdst, h_dst);
3431+ goto out; /* success */
3432+ }
3433+
3434+ /* revert */
3435+ h_path.dentry = h_parent;
3436+ mutex_unlock(&dst_inode->i_mutex);
3437+ au_dtime_store(&dt, dst_parent, &h_path);
3438+ h_path.dentry = h_dst;
3439+ if (!isdir)
3440+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
3441+ else
3442+ rerr = vfsub_rmdir(h_dir, &h_path);
3443+ au_dtime_revert(&dt);
3444+ if (rerr) {
3445+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
3446+ err = -EIO;
3447+ }
3448+
3449+ out:
3450+ dput(dst_parent);
3451+ return err;
3452+}
3453+
3454+struct au_cpup_single_args {
3455+ int *errp;
3456+ struct dentry *dentry;
3457+ aufs_bindex_t bdst, bsrc;
3458+ loff_t len;
3459+ unsigned int flags;
3460+ struct dentry *dst_parent;
3461+};
3462+
3463+static void au_call_cpup_single(void *args)
3464+{
3465+ struct au_cpup_single_args *a = args;
3466+ *a->errp = au_cpup_single(a->dentry, a->bdst, a->bsrc, a->len,
3467+ a->flags, a->dst_parent);
3468+}
3469+
3470+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3471+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3472+ struct dentry *dst_parent)
3473+{
3474+ int err, wkq_err;
3475+ umode_t mode;
3476+ struct dentry *h_dentry;
3477+
3478+ h_dentry = au_h_dptr(dentry, bsrc);
3479+ mode = h_dentry->d_inode->i_mode & S_IFMT;
3480+ if ((mode != S_IFCHR && mode != S_IFBLK)
3481+ || capable(CAP_MKNOD))
3482+ err = au_cpup_single(dentry, bdst, bsrc, len, flags,
3483+ dst_parent);
3484+ else {
3485+ struct au_cpup_single_args args = {
3486+ .errp = &err,
3487+ .dentry = dentry,
3488+ .bdst = bdst,
3489+ .bsrc = bsrc,
3490+ .len = len,
3491+ .flags = flags,
3492+ .dst_parent = dst_parent
3493+ };
3494+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
3495+ if (unlikely(wkq_err))
3496+ err = wkq_err;
3497+ }
3498+
3499+ return err;
3500+}
3501+
3502+/*
3503+ * copyup the @dentry from the first active lower branch to @bdst,
3504+ * using au_cpup_single().
3505+ */
3506+static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3507+ unsigned int flags)
3508+{
3509+ int err;
3510+ aufs_bindex_t bsrc, bend;
3511+
3512+ bend = au_dbend(dentry);
3513+ for (bsrc = bdst + 1; bsrc <= bend; bsrc++)
3514+ if (au_h_dptr(dentry, bsrc))
3515+ break;
3516+
3517+ err = au_lkup_neg(dentry, bdst);
3518+ if (!err) {
3519+ err = au_cpup_single(dentry, bdst, bsrc, len, flags, NULL);
3520+ if (!err)
3521+ return 0; /* success */
3522+
3523+ /* revert */
3524+ au_set_h_dptr(dentry, bdst, NULL);
3525+ au_set_dbstart(dentry, bsrc);
3526+ }
3527+
3528+ return err;
3529+}
3530+
3531+struct au_cpup_simple_args {
3532+ int *errp;
3533+ struct dentry *dentry;
3534+ aufs_bindex_t bdst;
3535+ loff_t len;
3536+ unsigned int flags;
3537+};
3538+
3539+static void au_call_cpup_simple(void *args)
3540+{
3541+ struct au_cpup_simple_args *a = args;
3542+ *a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags);
3543+}
3544+
3545+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3546+ unsigned int flags)
3547+{
3548+ int err, wkq_err;
3549+ unsigned char do_sio;
3550+ struct dentry *parent;
3551+ struct inode *h_dir;
3552+
3553+ parent = dget_parent(dentry);
3554+ h_dir = au_h_iptr(parent->d_inode, bdst);
3555+ do_sio = !!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE);
3556+ if (!do_sio) {
3557+ /*
3558+ * testing CAP_MKNOD is for generic fs,
3559+ * but CAP_FSETID is for xfs only, currently.
3560+ */
3561+ umode_t mode = dentry->d_inode->i_mode;
3562+ do_sio = (((mode & (S_IFCHR | S_IFBLK))
3563+ && !capable(CAP_MKNOD))
3564+ || ((mode & (S_ISUID | S_ISGID))
3565+ && !capable(CAP_FSETID)));
3566+ }
3567+ if (!do_sio)
3568+ err = au_cpup_simple(dentry, bdst, len, flags);
3569+ else {
3570+ struct au_cpup_simple_args args = {
3571+ .errp = &err,
3572+ .dentry = dentry,
3573+ .bdst = bdst,
3574+ .len = len,
3575+ .flags = flags
3576+ };
3577+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
3578+ if (unlikely(wkq_err))
3579+ err = wkq_err;
3580+ }
3581+
3582+ dput(parent);
3583+ return err;
3584+}
3585+
3586+/* ---------------------------------------------------------------------- */
3587+
3588+/*
3589+ * copyup the deleted file for writing.
3590+ */
3591+static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst,
3592+ struct dentry *wh_dentry, struct file *file,
3593+ loff_t len)
3594+{
3595+ int err;
3596+ aufs_bindex_t bstart;
3597+ struct au_dinfo *dinfo;
3598+ struct dentry *h_d_dst, *h_d_start;
3599+
3600+ dinfo = au_di(dentry);
1308ab2a 3601+ AuRwMustWriteLock(&dinfo->di_rwsem);
3602+
1facf9fc 3603+ bstart = dinfo->di_bstart;
3604+ h_d_dst = dinfo->di_hdentry[0 + bdst].hd_dentry;
3605+ dinfo->di_bstart = bdst;
3606+ dinfo->di_hdentry[0 + bdst].hd_dentry = wh_dentry;
3607+ h_d_start = dinfo->di_hdentry[0 + bstart].hd_dentry;
3608+ if (file)
3609+ dinfo->di_hdentry[0 + bstart].hd_dentry
3610+ = au_h_fptr(file, au_fbstart(file))->f_dentry;
3611+ err = au_cpup_single(dentry, bdst, bstart, len, !AuCpup_DTIME,
3612+ /*h_parent*/NULL);
3613+ if (!err && file) {
3614+ err = au_reopen_nondir(file);
3615+ dinfo->di_hdentry[0 + bstart].hd_dentry = h_d_start;
3616+ }
3617+ dinfo->di_hdentry[0 + bdst].hd_dentry = h_d_dst;
3618+ dinfo->di_bstart = bstart;
3619+
3620+ return err;
3621+}
3622+
3623+static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3624+ struct file *file)
3625+{
3626+ int err;
3627+ struct au_dtime dt;
3628+ struct dentry *parent, *h_parent, *wh_dentry;
3629+ struct au_branch *br;
3630+ struct path h_path;
3631+
3632+ br = au_sbr(dentry->d_sb, bdst);
3633+ parent = dget_parent(dentry);
3634+ h_parent = au_h_dptr(parent, bdst);
3635+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
3636+ err = PTR_ERR(wh_dentry);
3637+ if (IS_ERR(wh_dentry))
3638+ goto out;
3639+
3640+ h_path.dentry = h_parent;
3641+ h_path.mnt = br->br_mnt;
3642+ au_dtime_store(&dt, parent, &h_path);
3643+ err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len);
3644+ if (unlikely(err))
3645+ goto out_wh;
3646+
3647+ dget(wh_dentry);
3648+ h_path.dentry = wh_dentry;
3649+ err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0);
3650+ if (unlikely(err)) {
3651+ AuIOErr("failed remove copied-up tmp file %.*s(%d)\n",
3652+ AuDLNPair(wh_dentry), err);
3653+ err = -EIO;
3654+ }
3655+ au_dtime_revert(&dt);
3656+ au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
3657+
3658+ out_wh:
3659+ dput(wh_dentry);
3660+ out:
3661+ dput(parent);
3662+ return err;
3663+}
3664+
3665+struct au_cpup_wh_args {
3666+ int *errp;
3667+ struct dentry *dentry;
3668+ aufs_bindex_t bdst;
3669+ loff_t len;
3670+ struct file *file;
3671+};
3672+
3673+static void au_call_cpup_wh(void *args)
3674+{
3675+ struct au_cpup_wh_args *a = args;
3676+ *a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file);
3677+}
3678+
3679+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3680+ struct file *file)
3681+{
3682+ int err, wkq_err;
3683+ struct dentry *parent, *h_orph, *h_parent, *h_dentry;
3684+ struct inode *dir, *h_dir, *h_tmpdir, *h_inode;
3685+ struct au_wbr *wbr;
3686+
3687+ parent = dget_parent(dentry);
3688+ dir = parent->d_inode;
3689+ h_orph = NULL;
3690+ h_parent = NULL;
3691+ h_dir = au_igrab(au_h_iptr(dir, bdst));
3692+ h_tmpdir = h_dir;
3693+ if (!h_dir->i_nlink) {
3694+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
3695+ h_orph = wbr->wbr_orph;
3696+
3697+ h_parent = dget(au_h_dptr(parent, bdst));
3698+ au_set_h_dptr(parent, bdst, NULL);
3699+ au_set_h_dptr(parent, bdst, dget(h_orph));
3700+ h_tmpdir = h_orph->d_inode;
3701+ au_set_h_iptr(dir, bdst, NULL, 0);
3702+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
3703+
3704+ /* this temporary unlock is safe */
3705+ if (file)
3706+ h_dentry = au_h_fptr(file, au_fbstart(file))->f_dentry;
3707+ else
3708+ h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
3709+ h_inode = h_dentry->d_inode;
3710+ IMustLock(h_inode);
3711+ mutex_unlock(&h_inode->i_mutex);
dece6358 3712+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
1facf9fc 3713+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
3714+ }
3715+
3716+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE))
3717+ err = au_cpup_wh(dentry, bdst, len, file);
3718+ else {
3719+ struct au_cpup_wh_args args = {
3720+ .errp = &err,
3721+ .dentry = dentry,
3722+ .bdst = bdst,
3723+ .len = len,
3724+ .file = file
3725+ };
3726+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
3727+ if (unlikely(wkq_err))
3728+ err = wkq_err;
3729+ }
3730+
3731+ if (h_orph) {
3732+ mutex_unlock(&h_tmpdir->i_mutex);
3733+ au_set_h_iptr(dir, bdst, NULL, 0);
3734+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
3735+ au_set_h_dptr(parent, bdst, NULL);
3736+ au_set_h_dptr(parent, bdst, h_parent);
3737+ }
3738+ iput(h_dir);
3739+ dput(parent);
3740+
3741+ return err;
3742+}
3743+
3744+/* ---------------------------------------------------------------------- */
3745+
3746+/*
3747+ * generic routine for both of copy-up and copy-down.
3748+ */
3749+/* cf. revalidate function in file.c */
3750+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
3751+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
3752+ struct dentry *h_parent, void *arg),
3753+ void *arg)
3754+{
3755+ int err;
3756+ struct au_pin pin;
3757+ struct dentry *d, *parent, *h_parent, *real_parent;
3758+
3759+ err = 0;
3760+ parent = dget_parent(dentry);
3761+ if (IS_ROOT(parent))
3762+ goto out;
3763+
3764+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
3765+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
3766+
3767+ /* do not use au_dpage */
3768+ real_parent = parent;
3769+ while (1) {
3770+ dput(parent);
3771+ parent = dget_parent(dentry);
3772+ h_parent = au_h_dptr(parent, bdst);
3773+ if (h_parent)
3774+ goto out; /* success */
3775+
3776+ /* find top dir which is necessary to cpup */
3777+ do {
3778+ d = parent;
3779+ dput(parent);
3780+ parent = dget_parent(d);
3781+ di_read_lock_parent3(parent, !AuLock_IR);
3782+ h_parent = au_h_dptr(parent, bdst);
3783+ di_read_unlock(parent, !AuLock_IR);
3784+ } while (!h_parent);
3785+
3786+ if (d != real_parent)
3787+ di_write_lock_child3(d);
3788+
3789+ /* somebody else might create while we were sleeping */
3790+ if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
3791+ if (au_h_dptr(d, bdst))
3792+ au_update_dbstart(d);
3793+
3794+ au_pin_set_dentry(&pin, d);
3795+ err = au_do_pin(&pin);
3796+ if (!err) {
3797+ err = cp(d, bdst, h_parent, arg);
3798+ au_unpin(&pin);
3799+ }
3800+ }
3801+
3802+ if (d != real_parent)
3803+ di_write_unlock(d);
3804+ if (unlikely(err))
3805+ break;
3806+ }
3807+
3808+ out:
3809+ dput(parent);
3810+ return err;
3811+}
3812+
3813+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
3814+ struct dentry *h_parent __maybe_unused ,
3815+ void *arg __maybe_unused)
3816+{
3817+ return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME);
3818+}
3819+
3820+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
3821+{
3822+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
3823+}
3824+
3825+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
3826+{
3827+ int err;
3828+ struct dentry *parent;
3829+ struct inode *dir;
3830+
3831+ parent = dget_parent(dentry);
3832+ dir = parent->d_inode;
3833+ err = 0;
3834+ if (au_h_iptr(dir, bdst))
3835+ goto out;
3836+
3837+ di_read_unlock(parent, AuLock_IR);
3838+ di_write_lock_parent(parent);
3839+ /* someone else might change our inode while we were sleeping */
3840+ if (!au_h_iptr(dir, bdst))
3841+ err = au_cpup_dirs(dentry, bdst);
3842+ di_downgrade_lock(parent, AuLock_IR);
3843+
3844+ out:
3845+ dput(parent);
3846+ return err;
3847+}
1308ab2a 3848diff -uprN -x .git linux-2.6.31/fs/aufs/cpup.h aufs2-2.6.git/fs/aufs/cpup.h
3849--- linux-2.6.31/fs/aufs/cpup.h 1970-01-01 00:00:00.000000000 +0000
3850+++ aufs2-2.6.git/fs/aufs/cpup.h 2009-09-21 21:49:23.377863284 +0000
dece6358 3851@@ -0,0 +1,81 @@
1facf9fc 3852+/*
3853+ * Copyright (C) 2005-2009 Junjiro R. Okajima
3854+ *
3855+ * This program, aufs is free software; you can redistribute it and/or modify
3856+ * it under the terms of the GNU General Public License as published by
3857+ * the Free Software Foundation; either version 2 of the License, or
3858+ * (at your option) any later version.
dece6358
AM
3859+ *
3860+ * This program is distributed in the hope that it will be useful,
3861+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3862+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3863+ * GNU General Public License for more details.
3864+ *
3865+ * You should have received a copy of the GNU General Public License
3866+ * along with this program; if not, write to the Free Software
3867+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 3868+ */
3869+
3870+/*
3871+ * copy-up/down functions
3872+ */
3873+
3874+#ifndef __AUFS_CPUP_H__
3875+#define __AUFS_CPUP_H__
3876+
3877+#ifdef __KERNEL__
3878+
dece6358
AM
3879+#include <linux/path.h>
3880+#include <linux/time.h>
1facf9fc 3881+#include <linux/aufs_type.h>
3882+
dece6358
AM
3883+struct inode;
3884+struct file;
3885+
1facf9fc 3886+void au_cpup_attr_flags(struct inode *dst, struct inode *src);
3887+void au_cpup_attr_timesizes(struct inode *inode);
3888+void au_cpup_attr_nlink(struct inode *inode, int force);
3889+void au_cpup_attr_changeable(struct inode *inode);
3890+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
3891+void au_cpup_attr_all(struct inode *inode, int force);
3892+
3893+/* ---------------------------------------------------------------------- */
3894+
3895+/* cpup flags */
3896+#define AuCpup_DTIME 1 /* do dtime_store/revert */
3897+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
3898+ for link(2) */
3899+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
3900+#define au_fset_cpup(flags, name) { (flags) |= AuCpup_##name; }
3901+#define au_fclr_cpup(flags, name) { (flags) &= ~AuCpup_##name; }
3902+
3903+int au_copy_file(struct file *dst, struct file *src, loff_t len);
3904+int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3905+ aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3906+ struct dentry *dst_parent);
3907+int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3908+ unsigned int flags);
3909+int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3910+ struct file *file);
3911+
3912+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
3913+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
3914+ struct dentry *h_parent, void *arg),
3915+ void *arg);
3916+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
3917+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
3918+
3919+/* ---------------------------------------------------------------------- */
3920+
3921+/* keep timestamps when copyup */
3922+struct au_dtime {
3923+ struct dentry *dt_dentry;
3924+ struct path dt_h_path;
3925+ struct timespec dt_atime, dt_mtime;
3926+};
3927+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
3928+ struct path *h_path);
3929+void au_dtime_revert(struct au_dtime *dt);
3930+
3931+#endif /* __KERNEL__ */
3932+#endif /* __AUFS_CPUP_H__ */
1308ab2a 3933diff -uprN -x .git linux-2.6.31/fs/aufs/dbgaufs.c aufs2-2.6.git/fs/aufs/dbgaufs.c
3934--- linux-2.6.31/fs/aufs/dbgaufs.c 1970-01-01 00:00:00.000000000 +0000
3935+++ aufs2-2.6.git/fs/aufs/dbgaufs.c 2009-09-21 21:49:23.377863284 +0000
dece6358 3936@@ -0,0 +1,331 @@
1facf9fc 3937+/*
3938+ * Copyright (C) 2005-2009 Junjiro R. Okajima
3939+ *
3940+ * This program, aufs is free software; you can redistribute it and/or modify
3941+ * it under the terms of the GNU General Public License as published by
3942+ * the Free Software Foundation; either version 2 of the License, or
3943+ * (at your option) any later version.
dece6358
AM
3944+ *
3945+ * This program is distributed in the hope that it will be useful,
3946+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3947+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3948+ * GNU General Public License for more details.
3949+ *
3950+ * You should have received a copy of the GNU General Public License
3951+ * along with this program; if not, write to the Free Software
3952+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 3953+ */
3954+
3955+/*
3956+ * debugfs interface
3957+ */
3958+
3959+#include <linux/debugfs.h>
3960+#include "aufs.h"
3961+
3962+#ifndef CONFIG_SYSFS
3963+#error DEBUG_FS depends upon SYSFS
3964+#endif
3965+
3966+static struct dentry *dbgaufs;
3967+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
3968+
3969+/* 20 is max digits length of ulong 64 */
3970+struct dbgaufs_arg {
3971+ int n;
3972+ char a[20 * 4];
3973+};
3974+
3975+/*
3976+ * common function for all XINO files
3977+ */
3978+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
3979+ struct file *file)
3980+{
3981+ kfree(file->private_data);
3982+ return 0;
3983+}
3984+
3985+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
3986+{
3987+ int err;
3988+ struct kstat st;
3989+ struct dbgaufs_arg *p;
3990+
3991+ err = -ENOMEM;
3992+ p = kmalloc(sizeof(*p), GFP_NOFS);
3993+ if (unlikely(!p))
3994+ goto out;
3995+
3996+ err = 0;
3997+ p->n = 0;
3998+ file->private_data = p;
3999+ if (!xf)
4000+ goto out;
4001+
4002+ err = vfs_getattr(xf->f_vfsmnt, xf->f_dentry, &st);
4003+ if (!err) {
4004+ if (do_fcnt)
4005+ p->n = snprintf
4006+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
4007+ (long)file_count(xf), st.blocks, st.blksize,
4008+ (long long)st.size);
4009+ else
4010+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
4011+ st.blocks, st.blksize,
4012+ (long long)st.size);
4013+ AuDebugOn(p->n >= sizeof(p->a));
4014+ } else {
4015+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
4016+ err = 0;
4017+ }
4018+
4019+ out:
4020+ return err;
4021+
4022+}
4023+
4024+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
4025+ size_t count, loff_t *ppos)
4026+{
4027+ struct dbgaufs_arg *p;
4028+
4029+ p = file->private_data;
4030+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
4031+}
4032+
4033+/* ---------------------------------------------------------------------- */
4034+
4035+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
4036+{
4037+ int err;
4038+ struct au_sbinfo *sbinfo;
4039+ struct super_block *sb;
4040+
4041+ sbinfo = inode->i_private;
4042+ sb = sbinfo->si_sb;
4043+ si_noflush_read_lock(sb);
4044+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
4045+ si_read_unlock(sb);
4046+ return err;
4047+}
4048+
4049+static const struct file_operations dbgaufs_xib_fop = {
4050+ .open = dbgaufs_xib_open,
4051+ .release = dbgaufs_xi_release,
4052+ .read = dbgaufs_xi_read
4053+};
4054+
4055+/* ---------------------------------------------------------------------- */
4056+
4057+#define DbgaufsXi_PREFIX "xi"
4058+
4059+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
4060+{
4061+ int err;
4062+ long l;
4063+ struct au_sbinfo *sbinfo;
4064+ struct super_block *sb;
4065+ struct file *xf;
4066+ struct qstr *name;
4067+
4068+ err = -ENOENT;
4069+ xf = NULL;
4070+ name = &file->f_dentry->d_name;
4071+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
4072+ || memcmp(name->name, DbgaufsXi_PREFIX,
4073+ sizeof(DbgaufsXi_PREFIX) - 1)))
4074+ goto out;
4075+ err = strict_strtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
4076+ if (unlikely(err))
4077+ goto out;
4078+
4079+ sbinfo = inode->i_private;
4080+ sb = sbinfo->si_sb;
4081+ si_noflush_read_lock(sb);
4082+ if (l <= au_sbend(sb)) {
4083+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
4084+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
4085+ } else
4086+ err = -ENOENT;
4087+ si_read_unlock(sb);
4088+
4089+ out:
4090+ return err;
4091+}
4092+
4093+static const struct file_operations dbgaufs_xino_fop = {
4094+ .open = dbgaufs_xino_open,
4095+ .release = dbgaufs_xi_release,
4096+ .read = dbgaufs_xi_read
4097+};
4098+
4099+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
4100+{
4101+ aufs_bindex_t bend;
4102+ struct au_branch *br;
4103+ struct au_xino_file *xi;
4104+
4105+ if (!au_sbi(sb)->si_dbgaufs)
4106+ return;
4107+
4108+ bend = au_sbend(sb);
4109+ for (; bindex <= bend; bindex++) {
4110+ br = au_sbr(sb, bindex);
4111+ xi = &br->br_xino;
4112+ if (xi->xi_dbgaufs) {
4113+ debugfs_remove(xi->xi_dbgaufs);
4114+ xi->xi_dbgaufs = NULL;
4115+ }
4116+ }
4117+}
4118+
4119+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
4120+{
4121+ struct au_sbinfo *sbinfo;
4122+ struct dentry *parent;
4123+ struct au_branch *br;
4124+ struct au_xino_file *xi;
4125+ aufs_bindex_t bend;
4126+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
4127+
4128+ sbinfo = au_sbi(sb);
4129+ parent = sbinfo->si_dbgaufs;
4130+ if (!parent)
4131+ return;
4132+
4133+ bend = au_sbend(sb);
4134+ for (; bindex <= bend; bindex++) {
4135+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
4136+ br = au_sbr(sb, bindex);
4137+ xi = &br->br_xino;
4138+ AuDebugOn(xi->xi_dbgaufs);
4139+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
4140+ sbinfo, &dbgaufs_xino_fop);
4141+ /* ignore an error */
4142+ if (unlikely(!xi->xi_dbgaufs))
4143+ AuWarn1("failed %s under debugfs\n", name);
4144+ }
4145+}
4146+
4147+/* ---------------------------------------------------------------------- */
4148+
4149+#ifdef CONFIG_AUFS_EXPORT
4150+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
4151+{
4152+ int err;
4153+ struct au_sbinfo *sbinfo;
4154+ struct super_block *sb;
4155+
4156+ sbinfo = inode->i_private;
4157+ sb = sbinfo->si_sb;
4158+ si_noflush_read_lock(sb);
4159+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
4160+ si_read_unlock(sb);
4161+ return err;
4162+}
4163+
4164+static const struct file_operations dbgaufs_xigen_fop = {
4165+ .open = dbgaufs_xigen_open,
4166+ .release = dbgaufs_xi_release,
4167+ .read = dbgaufs_xi_read
4168+};
4169+
4170+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4171+{
4172+ int err;
4173+
dece6358
AM
4174+ /*
4175+ * This function is a dynamic '__init' fucntion actually,
4176+ * so the tiny check for si_rwsem is unnecessary.
4177+ */
4178+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4179+
1facf9fc 4180+ err = -EIO;
4181+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
4182+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4183+ &dbgaufs_xigen_fop);
4184+ if (sbinfo->si_dbgaufs_xigen)
4185+ err = 0;
4186+
4187+ return err;
4188+}
4189+#else
4190+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4191+{
4192+ return 0;
4193+}
4194+#endif /* CONFIG_AUFS_EXPORT */
4195+
4196+/* ---------------------------------------------------------------------- */
4197+
4198+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
4199+{
dece6358
AM
4200+ /*
4201+ * This function is a dynamic '__init' fucntion actually,
4202+ * so the tiny check for si_rwsem is unnecessary.
4203+ */
4204+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4205+
1facf9fc 4206+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
4207+ sbinfo->si_dbgaufs = NULL;
4208+ kobject_put(&sbinfo->si_kobj);
4209+}
4210+
4211+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
4212+{
4213+ int err;
4214+ char name[SysaufsSiNameLen];
4215+
dece6358
AM
4216+ /*
4217+ * This function is a dynamic '__init' fucntion actually,
4218+ * so the tiny check for si_rwsem is unnecessary.
4219+ */
4220+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4221+
1facf9fc 4222+ err = -ENOENT;
4223+ if (!dbgaufs) {
4224+ AuErr1("/debug/aufs is uninitialized\n");
4225+ goto out;
4226+ }
4227+
4228+ err = -EIO;
4229+ sysaufs_name(sbinfo, name);
4230+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
4231+ if (unlikely(!sbinfo->si_dbgaufs))
4232+ goto out;
4233+ kobject_get(&sbinfo->si_kobj);
4234+
4235+ sbinfo->si_dbgaufs_xib = debugfs_create_file
4236+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4237+ &dbgaufs_xib_fop);
4238+ if (unlikely(!sbinfo->si_dbgaufs_xib))
4239+ goto out_dir;
4240+
4241+ err = dbgaufs_xigen_init(sbinfo);
4242+ if (!err)
4243+ goto out; /* success */
4244+
4245+ out_dir:
4246+ dbgaufs_si_fin(sbinfo);
4247+ out:
4248+ return err;
4249+}
4250+
4251+/* ---------------------------------------------------------------------- */
4252+
4253+void dbgaufs_fin(void)
4254+{
4255+ debugfs_remove(dbgaufs);
4256+}
4257+
4258+int __init dbgaufs_init(void)
4259+{
4260+ int err;
4261+
4262+ err = -EIO;
4263+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
4264+ if (dbgaufs)
4265+ err = 0;
4266+ return err;
4267+}
1308ab2a 4268diff -uprN -x .git linux-2.6.31/fs/aufs/dbgaufs.h aufs2-2.6.git/fs/aufs/dbgaufs.h
4269--- linux-2.6.31/fs/aufs/dbgaufs.h 1970-01-01 00:00:00.000000000 +0000
4270+++ aufs2-2.6.git/fs/aufs/dbgaufs.h 2009-09-21 21:49:23.377863284 +0000
dece6358 4271@@ -0,0 +1,79 @@
1facf9fc 4272+/*
4273+ * Copyright (C) 2005-2009 Junjiro R. Okajima
4274+ *
4275+ * This program, aufs is free software; you can redistribute it and/or modify
4276+ * it under the terms of the GNU General Public License as published by
4277+ * the Free Software Foundation; either version 2 of the License, or
4278+ * (at your option) any later version.
dece6358
AM
4279+ *
4280+ * This program is distributed in the hope that it will be useful,
4281+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4282+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4283+ * GNU General Public License for more details.
4284+ *
4285+ * You should have received a copy of the GNU General Public License
4286+ * along with this program; if not, write to the Free Software
4287+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4288+ */
4289+
4290+/*
4291+ * debugfs interface
4292+ */
4293+
4294+#ifndef __DBGAUFS_H__
4295+#define __DBGAUFS_H__
4296+
4297+#ifdef __KERNEL__
4298+
dece6358 4299+#include <linux/init.h>
1facf9fc 4300+#include <linux/aufs_type.h>
4301+
dece6358 4302+struct super_block;
1facf9fc 4303+struct au_sbinfo;
dece6358 4304+
1facf9fc 4305+#ifdef CONFIG_DEBUG_FS
4306+/* dbgaufs.c */
4307+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
4308+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
4309+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
4310+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
4311+void dbgaufs_fin(void);
4312+int __init dbgaufs_init(void);
4313+
4314+#else
4315+
4316+static inline
4317+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
4318+{
4319+ /* empty */
4320+}
4321+
4322+static inline
4323+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
4324+{
4325+ /* empty */
4326+}
4327+
4328+static inline
4329+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
4330+{
4331+ /* empty */
4332+}
4333+
4334+static inline
4335+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
4336+{
4337+ return 0;
4338+}
4339+
4340+#define dbgaufs_fin() do {} while (0)
4341+
4342+static inline
4343+int __init dbgaufs_init(void)
4344+{
4345+ return 0;
4346+}
4347+#endif /* CONFIG_DEBUG_FS */
4348+
4349+#endif /* __KERNEL__ */
4350+#endif /* __DBGAUFS_H__ */
1308ab2a 4351diff -uprN -x .git linux-2.6.31/fs/aufs/dcsub.c aufs2-2.6.git/fs/aufs/dcsub.c
4352--- linux-2.6.31/fs/aufs/dcsub.c 1970-01-01 00:00:00.000000000 +0000
4353+++ aufs2-2.6.git/fs/aufs/dcsub.c 2009-09-21 21:49:23.377863284 +0000
dece6358 4354@@ -0,0 +1,223 @@
1facf9fc 4355+/*
4356+ * Copyright (C) 2005-2009 Junjiro R. Okajima
4357+ *
4358+ * This program, aufs is free software; you can redistribute it and/or modify
4359+ * it under the terms of the GNU General Public License as published by
4360+ * the Free Software Foundation; either version 2 of the License, or
4361+ * (at your option) any later version.
dece6358
AM
4362+ *
4363+ * This program is distributed in the hope that it will be useful,
4364+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4365+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4366+ * GNU General Public License for more details.
4367+ *
4368+ * You should have received a copy of the GNU General Public License
4369+ * along with this program; if not, write to the Free Software
4370+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4371+ */
4372+
4373+/*
4374+ * sub-routines for dentry cache
4375+ */
4376+
4377+#include "aufs.h"
4378+
4379+static void au_dpage_free(struct au_dpage *dpage)
4380+{
4381+ int i;
4382+ struct dentry **p;
4383+
4384+ p = dpage->dentries;
4385+ for (i = 0; i < dpage->ndentry; i++)
4386+ dput(*p++);
4387+ free_page((unsigned long)dpage->dentries);
4388+}
4389+
4390+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
4391+{
4392+ int err;
4393+ void *p;
4394+
4395+ err = -ENOMEM;
4396+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
4397+ if (unlikely(!dpages->dpages))
4398+ goto out;
4399+
4400+ p = (void *)__get_free_page(gfp);
4401+ if (unlikely(!p))
4402+ goto out_dpages;
4403+
4404+ dpages->dpages[0].ndentry = 0;
4405+ dpages->dpages[0].dentries = p;
4406+ dpages->ndpage = 1;
4407+ return 0; /* success */
4408+
4409+ out_dpages:
4410+ kfree(dpages->dpages);
4411+ out:
4412+ return err;
4413+}
4414+
4415+void au_dpages_free(struct au_dcsub_pages *dpages)
4416+{
4417+ int i;
4418+ struct au_dpage *p;
4419+
4420+ p = dpages->dpages;
4421+ for (i = 0; i < dpages->ndpage; i++)
4422+ au_dpage_free(p++);
4423+ kfree(dpages->dpages);
4424+}
4425+
4426+static int au_dpages_append(struct au_dcsub_pages *dpages,
4427+ struct dentry *dentry, gfp_t gfp)
4428+{
4429+ int err, sz;
4430+ struct au_dpage *dpage;
4431+ void *p;
4432+
4433+ dpage = dpages->dpages + dpages->ndpage - 1;
4434+ sz = PAGE_SIZE / sizeof(dentry);
4435+ if (unlikely(dpage->ndentry >= sz)) {
4436+ AuLabel(new dpage);
4437+ err = -ENOMEM;
4438+ sz = dpages->ndpage * sizeof(*dpages->dpages);
4439+ p = au_kzrealloc(dpages->dpages, sz,
4440+ sz + sizeof(*dpages->dpages), gfp);
4441+ if (unlikely(!p))
4442+ goto out;
4443+
4444+ dpages->dpages = p;
4445+ dpage = dpages->dpages + dpages->ndpage;
4446+ p = (void *)__get_free_page(gfp);
4447+ if (unlikely(!p))
4448+ goto out;
4449+
4450+ dpage->ndentry = 0;
4451+ dpage->dentries = p;
4452+ dpages->ndpage++;
4453+ }
4454+
4455+ dpage->dentries[dpage->ndentry++] = dget(dentry);
4456+ return 0; /* success */
4457+
4458+ out:
4459+ return err;
4460+}
4461+
4462+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
4463+ au_dpages_test test, void *arg)
4464+{
4465+ int err;
4466+ struct dentry *this_parent = root;
4467+ struct list_head *next;
4468+ struct super_block *sb = root->d_sb;
4469+
4470+ err = 0;
4471+ spin_lock(&dcache_lock);
4472+ repeat:
4473+ next = this_parent->d_subdirs.next;
4474+ resume:
4475+ if (this_parent->d_sb == sb
4476+ && !IS_ROOT(this_parent)
4477+ && atomic_read(&this_parent->d_count)
4478+ && this_parent->d_inode
4479+ && (!test || test(this_parent, arg))) {
4480+ err = au_dpages_append(dpages, this_parent, GFP_ATOMIC);
4481+ if (unlikely(err))
4482+ goto out;
4483+ }
4484+
4485+ while (next != &this_parent->d_subdirs) {
4486+ struct list_head *tmp = next;
4487+ struct dentry *dentry = list_entry(tmp, struct dentry,
4488+ d_u.d_child);
4489+ next = tmp->next;
4490+ if (/*d_unhashed(dentry) || */!dentry->d_inode)
4491+ continue;
4492+ if (!list_empty(&dentry->d_subdirs)) {
4493+ this_parent = dentry;
4494+ goto repeat;
4495+ }
4496+ if (dentry->d_sb == sb
4497+ && atomic_read(&dentry->d_count)
4498+ && (!test || test(dentry, arg))) {
4499+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
4500+ if (unlikely(err))
4501+ goto out;
4502+ }
4503+ }
4504+
4505+ if (this_parent != root) {
4506+ next = this_parent->d_u.d_child.next;
4507+ this_parent = this_parent->d_parent; /* dcache_lock is locked */
4508+ goto resume;
4509+ }
4510+ out:
4511+ spin_unlock(&dcache_lock);
4512+ return err;
4513+}
4514+
4515+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
4516+ int do_include, au_dpages_test test, void *arg)
4517+{
4518+ int err;
4519+
4520+ err = 0;
4521+ spin_lock(&dcache_lock);
4522+ if (do_include && (!test || test(dentry, arg))) {
4523+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
4524+ if (unlikely(err))
4525+ goto out;
4526+ }
4527+ while (!IS_ROOT(dentry)) {
4528+ dentry = dentry->d_parent; /* dcache_lock is locked */
4529+ if (!test || test(dentry, arg)) {
4530+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
4531+ if (unlikely(err))
4532+ break;
4533+ }
4534+ }
4535+
4536+ out:
4537+ spin_unlock(&dcache_lock);
4538+
4539+ return err;
4540+}
4541+
4542+struct dentry *au_test_subdir(struct dentry *d1, struct dentry *d2)
4543+{
4544+ struct dentry *trap, **dentries;
4545+ int err, i, j;
4546+ struct au_dcsub_pages dpages;
4547+ struct au_dpage *dpage;
4548+
4549+ trap = ERR_PTR(-ENOMEM);
4550+ err = au_dpages_init(&dpages, GFP_NOFS);
4551+ if (unlikely(err))
4552+ goto out;
4553+ err = au_dcsub_pages_rev(&dpages, d1, /*do_include*/1, NULL, NULL);
4554+ if (unlikely(err))
4555+ goto out_dpages;
4556+
4557+ trap = d1;
4558+ for (i = 0; !err && i < dpages.ndpage; i++) {
4559+ dpage = dpages.dpages + i;
4560+ dentries = dpage->dentries;
4561+ for (j = 0; !err && j < dpage->ndentry; j++) {
4562+ struct dentry *d;
4563+
4564+ d = dentries[j];
4565+ err = (d == d2);
4566+ if (!err)
4567+ trap = d;
4568+ }
4569+ }
4570+ if (!err)
4571+ trap = NULL;
4572+
4573+ out_dpages:
4574+ au_dpages_free(&dpages);
4575+ out:
4576+ return trap;
4577+}
1308ab2a 4578diff -uprN -x .git linux-2.6.31/fs/aufs/dcsub.h aufs2-2.6.git/fs/aufs/dcsub.h
4579--- linux-2.6.31/fs/aufs/dcsub.h 1970-01-01 00:00:00.000000000 +0000
4580+++ aufs2-2.6.git/fs/aufs/dcsub.h 2009-09-21 21:49:23.377863284 +0000
dece6358 4581@@ -0,0 +1,54 @@
1facf9fc 4582+/*
4583+ * Copyright (C) 2005-2009 Junjiro R. Okajima
4584+ *
4585+ * This program, aufs is free software; you can redistribute it and/or modify
4586+ * it under the terms of the GNU General Public License as published by
4587+ * the Free Software Foundation; either version 2 of the License, or
4588+ * (at your option) any later version.
dece6358
AM
4589+ *
4590+ * This program is distributed in the hope that it will be useful,
4591+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4592+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4593+ * GNU General Public License for more details.
4594+ *
4595+ * You should have received a copy of the GNU General Public License
4596+ * along with this program; if not, write to the Free Software
4597+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4598+ */
4599+
4600+/*
4601+ * sub-routines for dentry cache
4602+ */
4603+
4604+#ifndef __AUFS_DCSUB_H__
4605+#define __AUFS_DCSUB_H__
4606+
4607+#ifdef __KERNEL__
4608+
dece6358
AM
4609+#include <linux/types.h>
4610+
4611+struct dentry;
1facf9fc 4612+
4613+struct au_dpage {
4614+ int ndentry;
4615+ struct dentry **dentries;
4616+};
4617+
4618+struct au_dcsub_pages {
4619+ int ndpage;
4620+ struct au_dpage *dpages;
4621+};
4622+
4623+/* ---------------------------------------------------------------------- */
4624+
4625+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
4626+void au_dpages_free(struct au_dcsub_pages *dpages);
4627+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
4628+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
4629+ au_dpages_test test, void *arg);
4630+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
4631+ int do_include, au_dpages_test test, void *arg);
4632+struct dentry *au_test_subdir(struct dentry *d1, struct dentry *d2);
4633+
4634+#endif /* __KERNEL__ */
4635+#endif /* __AUFS_DCSUB_H__ */
1308ab2a 4636diff -uprN -x .git linux-2.6.31/fs/aufs/debug.c aufs2-2.6.git/fs/aufs/debug.c
4637--- linux-2.6.31/fs/aufs/debug.c 1970-01-01 00:00:00.000000000 +0000
4638+++ aufs2-2.6.git/fs/aufs/debug.c 2009-09-21 21:49:23.377863284 +0000
4639@@ -0,0 +1,431 @@
1facf9fc 4640+/*
4641+ * Copyright (C) 2005-2009 Junjiro R. Okajima
4642+ *
4643+ * This program, aufs is free software; you can redistribute it and/or modify
4644+ * it under the terms of the GNU General Public License as published by
4645+ * the Free Software Foundation; either version 2 of the License, or
4646+ * (at your option) any later version.
dece6358
AM
4647+ *
4648+ * This program is distributed in the hope that it will be useful,
4649+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4650+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4651+ * GNU General Public License for more details.
4652+ *
4653+ * You should have received a copy of the GNU General Public License
4654+ * along with this program; if not, write to the Free Software
4655+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 4656+ */
4657+
4658+/*
4659+ * debug print functions
4660+ */
4661+
dece6358
AM
4662+#include <linux/module.h>
4663+#include <linux/vt_kern.h>
1facf9fc 4664+#include "aufs.h"
4665+
4666+int aufs_debug;
4667+MODULE_PARM_DESC(debug, "debug print");
4668+module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
4669+
4670+char *au_plevel = KERN_DEBUG;
4671+#define dpri(fmt, arg...) do { \
4672+ if (au_debug_test()) \
4673+ printk("%s" fmt, au_plevel, ##arg); \
4674+} while (0)
4675+
4676+/* ---------------------------------------------------------------------- */
4677+
4678+void au_dpri_whlist(struct au_nhash *whlist)
4679+{
4680+ unsigned long ul, n;
4681+ struct hlist_head *head;
4682+ struct au_vdir_wh *tpos;
4683+ struct hlist_node *pos;
4684+
4685+ n = whlist->nh_num;
4686+ head = whlist->nh_head;
4687+ for (ul = 0; ul < n; ul++) {
4688+ hlist_for_each_entry(tpos, pos, head, wh_hash)
4689+ dpri("b%d, %.*s, %d\n",
4690+ tpos->wh_bindex,
4691+ tpos->wh_str.len, tpos->wh_str.name,
4692+ tpos->wh_str.len);
4693+ head++;
4694+ }
4695+}
4696+
4697+void au_dpri_vdir(struct au_vdir *vdir)
4698+{
4699+ unsigned long ul;
4700+ union au_vdir_deblk_p p;
4701+ unsigned char *o;
4702+
4703+ if (!vdir || IS_ERR(vdir)) {
4704+ dpri("err %ld\n", PTR_ERR(vdir));
4705+ return;
4706+ }
4707+
4708+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
4709+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
4710+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
4711+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
4712+ p.deblk = vdir->vd_deblk[ul];
4713+ o = p.deblk;
4714+ dpri("[%lu]: %p\n", ul, o);
4715+ }
4716+}
4717+
4718+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode,
4719+ struct dentry *wh)
4720+{
4721+ char *n = NULL;
4722+ int l = 0;
4723+
4724+ if (!inode || IS_ERR(inode)) {
4725+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
4726+ return -1;
4727+ }
4728+
4729+ /* the type of i_blocks depends upon CONFIG_LSF */
4730+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
4731+ && sizeof(inode->i_blocks) != sizeof(u64));
4732+ if (wh) {
4733+ n = (void *)wh->d_name.name;
4734+ l = wh->d_name.len;
4735+ }
4736+
4737+ dpri("i%d: i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
4738+ " ct %lld, np %lu, st 0x%lx, f 0x%x, g %x%s%.*s\n",
4739+ bindex,
4740+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
4741+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
4742+ i_size_read(inode), (unsigned long long)inode->i_blocks,
4743+ (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
4744+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
4745+ inode->i_state, inode->i_flags, inode->i_generation,
4746+ l ? ", wh " : "", l, n);
4747+ return 0;
4748+}
4749+
4750+void au_dpri_inode(struct inode *inode)
4751+{
4752+ struct au_iinfo *iinfo;
4753+ aufs_bindex_t bindex;
4754+ int err;
4755+
4756+ err = do_pri_inode(-1, inode, NULL);
4757+ if (err || !au_test_aufs(inode->i_sb))
4758+ return;
4759+
4760+ iinfo = au_ii(inode);
4761+ if (!iinfo)
4762+ return;
4763+ dpri("i-1: bstart %d, bend %d, gen %d\n",
4764+ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode));
4765+ if (iinfo->ii_bstart < 0)
4766+ return;
4767+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++)
4768+ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode,
4769+ iinfo->ii_hinode[0 + bindex].hi_whdentry);
4770+}
4771+
4772+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
4773+{
4774+ struct dentry *wh = NULL;
4775+
4776+ if (!dentry || IS_ERR(dentry)) {
4777+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
4778+ return -1;
4779+ }
4780+ /* do not call dget_parent() here */
4781+ dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n",
4782+ bindex,
4783+ AuDLNPair(dentry->d_parent), AuDLNPair(dentry),
4784+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
4785+ atomic_read(&dentry->d_count), dentry->d_flags);
4786+ if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) {
4787+ struct au_iinfo *iinfo = au_ii(dentry->d_inode);
4788+ if (iinfo)
4789+ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
4790+ }
4791+ do_pri_inode(bindex, dentry->d_inode, wh);
4792+ return 0;
4793+}
4794+
4795+void au_dpri_dentry(struct dentry *dentry)
4796+{
4797+ struct au_dinfo *dinfo;
4798+ aufs_bindex_t bindex;
4799+ int err;
4800+
4801+ err = do_pri_dentry(-1, dentry);
4802+ if (err || !au_test_aufs(dentry->d_sb))
4803+ return;
4804+
4805+ dinfo = au_di(dentry);
4806+ if (!dinfo)
4807+ return;
4808+ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n",
4809+ dinfo->di_bstart, dinfo->di_bend,
4810+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry));
4811+ if (dinfo->di_bstart < 0)
4812+ return;
4813+ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
4814+ do_pri_dentry(bindex, dinfo->di_hdentry[0 + bindex].hd_dentry);
4815+}
4816+
4817+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
4818+{
4819+ char a[32];
4820+
4821+ if (!file || IS_ERR(file)) {
4822+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
4823+ return -1;
4824+ }
4825+ a[0] = 0;
4826+ if (bindex < 0
4827+ && file->f_dentry
4828+ && au_test_aufs(file->f_dentry->d_sb)
4829+ && au_fi(file))
4830+ snprintf(a, sizeof(a), ", mmapped %d", au_test_mmapped(file));
4831+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, pos %llu%s\n",
4832+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
4833+ file->f_pos, a);
4834+ if (file->f_dentry)
4835+ do_pri_dentry(bindex, file->f_dentry);
4836+ return 0;
4837+}
4838+
4839+void au_dpri_file(struct file *file)
4840+{
4841+ struct au_finfo *finfo;
4842+ aufs_bindex_t bindex;
4843+ int err;
4844+
4845+ err = do_pri_file(-1, file);
4846+ if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb))
4847+ return;
4848+
4849+ finfo = au_fi(file);
4850+ if (!finfo)
4851+ return;
4852+ if (finfo->fi_bstart < 0)
4853+ return;
4854+ for (bindex = finfo->fi_bstart; bindex <= finfo->fi_bend; bindex++) {
4855+ struct au_hfile *hf;
4856+
4857+ hf = finfo->fi_hfile + bindex;
4858+ do_pri_file(bindex, hf ? hf->hf_file : NULL);
4859+ }
4860+}
4861+
4862+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
4863+{
4864+ struct vfsmount *mnt;
4865+ struct super_block *sb;
4866+
4867+ if (!br || IS_ERR(br))
4868+ goto out;
4869+ mnt = br->br_mnt;
4870+ if (!mnt || IS_ERR(mnt))
4871+ goto out;
4872+ sb = mnt->mnt_sb;
4873+ if (!sb || IS_ERR(sb))
4874+ goto out;
4875+
4876+ dpri("s%d: {perm 0x%x, cnt %d, wbr %p}, "
4877+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt(BIAS) %d, active %d, "
4878+ "xino %d\n",
4879+ bindex, br->br_perm, atomic_read(&br->br_count), br->br_wbr,
4880+ au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
4881+ sb->s_flags, sb->s_count - S_BIAS,
4882+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
4883+ return 0;
4884+
4885+ out:
4886+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
4887+ return -1;
4888+}
4889+
4890+void au_dpri_sb(struct super_block *sb)
4891+{
4892+ struct au_sbinfo *sbinfo;
4893+ aufs_bindex_t bindex;
4894+ int err;
4895+ /* to reuduce stack size */
4896+ struct {
4897+ struct vfsmount mnt;
4898+ struct au_branch fake;
4899+ } *a;
4900+
4901+ /* this function can be called from magic sysrq */
4902+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
4903+ if (unlikely(!a)) {
4904+ dpri("no memory\n");
4905+ return;
4906+ }
4907+
4908+ a->mnt.mnt_sb = sb;
4909+ a->fake.br_perm = 0;
4910+ a->fake.br_mnt = &a->mnt;
4911+ a->fake.br_xino.xi_file = NULL;
4912+ atomic_set(&a->fake.br_count, 0);
4913+ smp_mb(); /* atomic_set */
4914+ err = do_pri_br(-1, &a->fake);
4915+ kfree(a);
4916+ dpri("dev 0x%x\n", sb->s_dev);
4917+ if (err || !au_test_aufs(sb))
4918+ return;
4919+
4920+ sbinfo = au_sbi(sb);
4921+ if (!sbinfo)
4922+ return;
4923+ dpri("nw %d, gen %u, kobj %d\n",
4924+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
4925+ atomic_read(&sbinfo->si_kobj.kref.refcount));
4926+ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
4927+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
4928+}
4929+
4930+/* ---------------------------------------------------------------------- */
4931+
4932+void au_dbg_sleep_jiffy(int jiffy)
4933+{
4934+ while (jiffy)
4935+ jiffy = schedule_timeout_uninterruptible(jiffy);
4936+}
4937+
4938+void au_dbg_iattr(struct iattr *ia)
4939+{
4940+#define AuBit(name) if (ia->ia_valid & ATTR_ ## name) \
4941+ dpri(#name "\n")
4942+ AuBit(MODE);
4943+ AuBit(UID);
4944+ AuBit(GID);
4945+ AuBit(SIZE);
4946+ AuBit(ATIME);
4947+ AuBit(MTIME);
4948+ AuBit(CTIME);
4949+ AuBit(ATIME_SET);
4950+ AuBit(MTIME_SET);
4951+ AuBit(FORCE);
4952+ AuBit(ATTR_FLAG);
4953+ AuBit(KILL_SUID);
4954+ AuBit(KILL_SGID);
4955+ AuBit(FILE);
4956+ AuBit(KILL_PRIV);
4957+ AuBit(OPEN);
4958+ AuBit(TIMES_SET);
4959+#undef AuBit
4960+ dpri("ia_file %p\n", ia->ia_file);
4961+}
4962+
4963+/* ---------------------------------------------------------------------- */
4964+
4965+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen)
4966+{
4967+ struct dentry *parent;
4968+
4969+ parent = dget_parent(dentry);
4970+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode)
4971+ || IS_ROOT(dentry)
4972+ || au_digen(parent) != sigen);
4973+ dput(parent);
4974+}
4975+
4976+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen)
4977+{
4978+ struct dentry *parent;
4979+
4980+ parent = dget_parent(dentry);
4981+ AuDebugOn(S_ISDIR(dentry->d_inode->i_mode)
4982+ || au_digen(parent) != sigen);
4983+ dput(parent);
4984+}
4985+
4986+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
4987+{
4988+ int err, i, j;
4989+ struct au_dcsub_pages dpages;
4990+ struct au_dpage *dpage;
4991+ struct dentry **dentries;
4992+
4993+ err = au_dpages_init(&dpages, GFP_NOFS);
4994+ AuDebugOn(err);
4995+ err = au_dcsub_pages_rev(&dpages, parent, /*do_include*/1, NULL, NULL);
4996+ AuDebugOn(err);
4997+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
4998+ dpage = dpages.dpages + i;
4999+ dentries = dpage->dentries;
5000+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
5001+ AuDebugOn(au_digen(dentries[j]) != sigen);
5002+ }
5003+ au_dpages_free(&dpages);
5004+}
5005+
5006+void au_dbg_verify_hf(struct au_finfo *finfo)
5007+{
5008+ struct au_hfile *hf;
5009+ aufs_bindex_t bend, bindex;
5010+
5011+ if (finfo->fi_bstart >= 0) {
5012+ bend = finfo->fi_bend;
5013+ for (bindex = finfo->fi_bstart; bindex <= bend; bindex++) {
5014+ hf = finfo->fi_hfile + bindex;
5015+ AuDebugOn(hf->hf_file || hf->hf_br);
5016+ }
5017+ }
5018+}
5019+
5020+void au_dbg_verify_kthread(void)
5021+{
5022+ if (au_test_wkq(current)) {
5023+ au_dbg_blocked();
5024+ BUG();
5025+ }
5026+}
5027+
5028+/* ---------------------------------------------------------------------- */
5029+
5030+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused)
5031+{
5032+#ifdef AuForceNoPlink
5033+ au_opt_clr(sbinfo->si_mntflags, PLINK);
5034+#endif
5035+#ifdef AuForceNoXino
5036+ au_opt_clr(sbinfo->si_mntflags, XINO);
5037+#endif
5038+#ifdef AuForceNoRefrof
5039+ au_opt_clr(sbinfo->si_mntflags, REFROF);
5040+#endif
5041+#ifdef AuForceHinotify
5042+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_HINOTIFY);
5043+#endif
1308ab2a 5044+#ifdef AuForceRd0
5045+ sbinfo->si_rdblk = 0;
5046+ sbinfo->si_rdhash = 0;
5047+#endif
1facf9fc 5048+}
5049+
5050+int __init au_debug_init(void)
5051+{
5052+ aufs_bindex_t bindex;
5053+ struct au_vdir_destr destr;
5054+
5055+ bindex = -1;
5056+ AuDebugOn(bindex >= 0);
5057+
5058+ destr.len = -1;
5059+ AuDebugOn(destr.len < NAME_MAX);
5060+
5061+#ifdef CONFIG_4KSTACKS
5062+ AuWarn("CONFIG_4KSTACKS is defined.\n");
5063+#endif
5064+
5065+#ifdef AuForceNoBrs
5066+ sysaufs_brs = 0;
5067+#endif
5068+
5069+ return 0;
5070+}
1308ab2a 5071diff -uprN -x .git linux-2.6.31/fs/aufs/debug.h aufs2-2.6.git/fs/aufs/debug.h
5072--- linux-2.6.31/fs/aufs/debug.h 1970-01-01 00:00:00.000000000 +0000
5073+++ aufs2-2.6.git/fs/aufs/debug.h 2009-09-21 21:49:23.377863284 +0000
5074@@ -0,0 +1,261 @@
1facf9fc 5075+/*
5076+ * Copyright (C) 2005-2009 Junjiro R. Okajima
5077+ *
5078+ * This program, aufs is free software; you can redistribute it and/or modify
5079+ * it under the terms of the GNU General Public License as published by
5080+ * the Free Software Foundation; either version 2 of the License, or
5081+ * (at your option) any later version.
dece6358
AM
5082+ *
5083+ * This program is distributed in the hope that it will be useful,
5084+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5085+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5086+ * GNU General Public License for more details.
5087+ *
5088+ * You should have received a copy of the GNU General Public License
5089+ * along with this program; if not, write to the Free Software
5090+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5091+ */
5092+
5093+/*
5094+ * debug print functions
5095+ */
5096+
5097+#ifndef __AUFS_DEBUG_H__
5098+#define __AUFS_DEBUG_H__
5099+
5100+#ifdef __KERNEL__
5101+
1308ab2a 5102+#include <asm/system.h>
dece6358
AM
5103+#include <linux/bug.h>
5104+/* #include <linux/err.h> */
1308ab2a 5105+#include <linux/init.h>
dece6358 5106+/* #include <linux/kernel.h> */
1facf9fc 5107+#include <linux/delay.h>
dece6358
AM
5108+/* #include <linux/kd.h> */
5109+/* #include <linux/vt_kern.h> */
1facf9fc 5110+#include <linux/sysrq.h>
5111+#include <linux/aufs_type.h>
5112+
5113+#ifdef CONFIG_AUFS_DEBUG
5114+#define AuDebugOn(a) BUG_ON(a)
5115+
5116+/* module parameter */
5117+extern int aufs_debug;
5118+static inline void au_debug(int n)
5119+{
5120+ aufs_debug = n;
5121+ smp_mb();
5122+}
5123+
5124+static inline int au_debug_test(void)
5125+{
5126+ return aufs_debug;
5127+}
5128+#else
5129+#define AuDebugOn(a) do {} while (0)
5130+#define au_debug() do {} while (0)
5131+static inline int au_debug_test(void)
5132+{
5133+ return 0;
5134+}
5135+#endif /* CONFIG_AUFS_DEBUG */
5136+
5137+/* ---------------------------------------------------------------------- */
5138+
5139+/* debug print */
5140+
5141+#define AuDpri(lvl, fmt, arg...) \
5142+ printk(lvl AUFS_NAME " %s:%d:%s[%d]: " fmt, \
5143+ __func__, __LINE__, current->comm, current->pid, ##arg)
5144+#define AuDbg(fmt, arg...) do { \
5145+ if (au_debug_test()) \
1308ab2a 5146+ AuDpri(KERN_DEBUG, "DEBUG: " fmt, ##arg); \
1facf9fc 5147+} while (0)
5148+#define AuLabel(l) AuDbg(#l "\n")
5149+#define AuInfo(fmt, arg...) AuDpri(KERN_INFO, fmt, ##arg)
5150+#define AuWarn(fmt, arg...) AuDpri(KERN_WARNING, fmt, ##arg)
5151+#define AuErr(fmt, arg...) AuDpri(KERN_ERR, fmt, ##arg)
5152+#define AuIOErr(fmt, arg...) AuErr("I/O Error, " fmt, ##arg)
5153+#define AuWarn1(fmt, arg...) do { \
5154+ static unsigned char _c; \
5155+ if (!_c++) \
5156+ AuWarn(fmt, ##arg); \
5157+} while (0)
5158+
5159+#define AuErr1(fmt, arg...) do { \
5160+ static unsigned char _c; \
5161+ if (!_c++) \
5162+ AuErr(fmt, ##arg); \
5163+} while (0)
5164+
5165+#define AuIOErr1(fmt, arg...) do { \
5166+ static unsigned char _c; \
5167+ if (!_c++) \
5168+ AuIOErr(fmt, ##arg); \
5169+} while (0)
5170+
5171+#define AuUnsupportMsg "This operation is not supported." \
5172+ " Please report this application to aufs-users ML."
5173+#define AuUnsupport(fmt, args...) do { \
5174+ AuErr(AuUnsupportMsg "\n" fmt, ##args); \
5175+ dump_stack(); \
5176+} while (0)
5177+
5178+#define AuTraceErr(e) do { \
5179+ if (unlikely((e) < 0)) \
5180+ AuDbg("err %d\n", (int)(e)); \
5181+} while (0)
5182+
5183+#define AuTraceErrPtr(p) do { \
5184+ if (IS_ERR(p)) \
5185+ AuDbg("err %ld\n", PTR_ERR(p)); \
5186+} while (0)
5187+
5188+/* dirty macros for debug print, use with "%.*s" and caution */
5189+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
5190+#define AuDLNPair(d) AuLNPair(&(d)->d_name)
5191+
5192+/* ---------------------------------------------------------------------- */
5193+
5194+struct au_sbinfo;
5195+struct au_finfo;
dece6358 5196+struct dentry;
1facf9fc 5197+#ifdef CONFIG_AUFS_DEBUG
5198+extern char *au_plevel;
5199+struct au_nhash;
5200+void au_dpri_whlist(struct au_nhash *whlist);
5201+struct au_vdir;
5202+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 5203+struct inode;
1facf9fc 5204+void au_dpri_inode(struct inode *inode);
5205+void au_dpri_dentry(struct dentry *dentry);
dece6358 5206+struct file;
1facf9fc 5207+void au_dpri_file(struct file *filp);
dece6358 5208+struct super_block;
1facf9fc 5209+void au_dpri_sb(struct super_block *sb);
5210+
5211+void au_dbg_sleep_jiffy(int jiffy);
dece6358 5212+struct iattr;
1facf9fc 5213+void au_dbg_iattr(struct iattr *ia);
5214+
5215+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen);
5216+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen);
5217+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
5218+void au_dbg_verify_hf(struct au_finfo *finfo);
5219+void au_dbg_verify_kthread(void);
5220+
5221+int __init au_debug_init(void);
5222+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo);
5223+#define AuDbgWhlist(w) do { \
5224+ AuDbg(#w "\n"); \
5225+ au_dpri_whlist(w); \
5226+} while (0)
5227+
5228+#define AuDbgVdir(v) do { \
5229+ AuDbg(#v "\n"); \
5230+ au_dpri_vdir(v); \
5231+} while (0)
5232+
5233+#define AuDbgInode(i) do { \
5234+ AuDbg(#i "\n"); \
5235+ au_dpri_inode(i); \
5236+} while (0)
5237+
5238+#define AuDbgDentry(d) do { \
5239+ AuDbg(#d "\n"); \
5240+ au_dpri_dentry(d); \
5241+} while (0)
5242+
5243+#define AuDbgFile(f) do { \
5244+ AuDbg(#f "\n"); \
5245+ au_dpri_file(f); \
5246+} while (0)
5247+
5248+#define AuDbgSb(sb) do { \
5249+ AuDbg(#sb "\n"); \
5250+ au_dpri_sb(sb); \
5251+} while (0)
5252+
5253+#define AuDbgSleep(sec) do { \
5254+ AuDbg("sleep %d sec\n", sec); \
5255+ ssleep(sec); \
5256+} while (0)
5257+
5258+#define AuDbgSleepJiffy(jiffy) do { \
5259+ AuDbg("sleep %d jiffies\n", jiffy); \
5260+ au_dbg_sleep_jiffy(jiffy); \
5261+} while (0)
5262+
5263+#define AuDbgIAttr(ia) do { \
5264+ AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \
5265+ au_dbg_iattr(ia); \
5266+} while (0)
5267+#else
5268+static inline void au_dbg_verify_dir_parent(struct dentry *dentry,
5269+ unsigned int sigen)
5270+{
5271+ /* empty */
5272+}
5273+static inline void au_dbg_verify_nondir_parent(struct dentry *dentry,
dece6358 5274+ unsigned int sigen)
1facf9fc 5275+{
5276+ /* empty */
5277+}
5278+static inline void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
5279+{
5280+ /* empty */
5281+}
5282+static inline void au_dbg_verify_hf(struct au_finfo *finfo)
5283+{
5284+ /* empty */
5285+}
5286+static inline void au_dbg_verify_kthread(void)
5287+{
5288+ /* empty */
5289+}
5290+
5291+static inline int au_debug_init(void)
5292+{
5293+ return 0;
5294+}
5295+static inline void au_debug_sbinfo_init(struct au_sbinfo *sbinfo)
5296+{
5297+ /* empty */
5298+}
5299+#define AuDbgWhlist(w) do {} while (0)
5300+#define AuDbgVdir(v) do {} while (0)
5301+#define AuDbgInode(i) do {} while (0)
5302+#define AuDbgDentry(d) do {} while (0)
5303+#define AuDbgFile(f) do {} while (0)
5304+#define AuDbgSb(sb) do {} while (0)
5305+#define AuDbgSleep(sec) do {} while (0)
5306+#define AuDbgSleepJiffy(jiffy) do {} while (0)
5307+#define AuDbgIAttr(ia) do {} while (0)
5308+#endif /* CONFIG_AUFS_DEBUG */
5309+
5310+/* ---------------------------------------------------------------------- */
5311+
5312+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
5313+int __init au_sysrq_init(void);
5314+void au_sysrq_fin(void);
5315+
5316+#ifdef CONFIG_HW_CONSOLE
5317+#define au_dbg_blocked() do { \
5318+ WARN_ON(1); \
5319+ handle_sysrq('w', vc_cons[fg_console].d->vc_tty); \
5320+} while (0)
5321+#else
5322+#define au_dbg_blocked() do {} while (0)
5323+#endif
5324+
5325+#else
5326+static inline int au_sysrq_init(void)
5327+{
5328+ return 0;
5329+}
5330+#define au_sysrq_fin() do {} while (0)
5331+#define au_dbg_blocked() do {} while (0)
5332+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
5333+
5334+#endif /* __KERNEL__ */
5335+#endif /* __AUFS_DEBUG_H__ */
1308ab2a 5336diff -uprN -x .git linux-2.6.31/fs/aufs/dentry.c aufs2-2.6.git/fs/aufs/dentry.c
5337--- linux-2.6.31/fs/aufs/dentry.c 1970-01-01 00:00:00.000000000 +0000
5338+++ aufs2-2.6.git/fs/aufs/dentry.c 2009-09-21 21:49:23.377863284 +0000
5339@@ -0,0 +1,879 @@
1facf9fc 5340+/*
5341+ * Copyright (C) 2005-2009 Junjiro R. Okajima
5342+ *
5343+ * This program, aufs is free software; you can redistribute it and/or modify
5344+ * it under the terms of the GNU General Public License as published by
5345+ * the Free Software Foundation; either version 2 of the License, or
5346+ * (at your option) any later version.
dece6358
AM
5347+ *
5348+ * This program is distributed in the hope that it will be useful,
5349+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5350+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5351+ * GNU General Public License for more details.
5352+ *
5353+ * You should have received a copy of the GNU General Public License
5354+ * along with this program; if not, write to the Free Software
5355+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 5356+ */
5357+
5358+/*
5359+ * lookup and dentry operations
5360+ */
5361+
dece6358 5362+#include <linux/namei.h>
1facf9fc 5363+#include "aufs.h"
5364+
5365+static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
5366+{
5367+ if (nd) {
5368+ *h_nd = *nd;
5369+
5370+ /*
5371+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
5372+ * due to whiteout and branch permission.
5373+ */
5374+ h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
5375+ | LOOKUP_FOLLOW);
5376+ /* unnecessary? */
5377+ h_nd->intent.open.file = NULL;
5378+ } else
5379+ memset(h_nd, 0, sizeof(*h_nd));
5380+}
5381+
5382+struct au_lkup_one_args {
5383+ struct dentry **errp;
5384+ struct qstr *name;
5385+ struct dentry *h_parent;
5386+ struct au_branch *br;
5387+ struct nameidata *nd;
5388+};
5389+
5390+struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
5391+ struct au_branch *br, struct nameidata *nd)
5392+{
5393+ struct dentry *h_dentry;
5394+ int err;
5395+ struct nameidata h_nd;
5396+
5397+ if (au_test_fs_null_nd(h_parent->d_sb))
5398+ return vfsub_lookup_one_len(name->name, h_parent, name->len);
5399+
5400+ au_h_nd(&h_nd, nd);
5401+ h_nd.path.dentry = h_parent;
5402+ h_nd.path.mnt = br->br_mnt;
5403+
5404+ err = __lookup_one_len(name->name, &h_nd.last, NULL, name->len);
5405+ h_dentry = ERR_PTR(err);
5406+ if (!err) {
5407+ path_get(&h_nd.path);
5408+ h_dentry = vfsub_lookup_hash(&h_nd);
5409+ path_put(&h_nd.path);
5410+ }
5411+
5412+ return h_dentry;
5413+}
5414+
5415+static void au_call_lkup_one(void *args)
5416+{
5417+ struct au_lkup_one_args *a = args;
5418+ *a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
5419+}
5420+
5421+#define AuLkup_ALLOW_NEG 1
5422+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
5423+#define au_fset_lkup(flags, name) { (flags) |= AuLkup_##name; }
5424+#define au_fclr_lkup(flags, name) { (flags) &= ~AuLkup_##name; }
5425+
5426+struct au_do_lookup_args {
5427+ unsigned int flags;
5428+ mode_t type;
5429+ struct nameidata *nd;
5430+};
5431+
5432+/*
5433+ * returns positive/negative dentry, NULL or an error.
5434+ * NULL means whiteout-ed or not-found.
5435+ */
5436+static struct dentry*
5437+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
5438+ aufs_bindex_t bindex, struct qstr *wh_name,
5439+ struct au_do_lookup_args *args)
5440+{
5441+ struct dentry *h_dentry;
5442+ struct inode *h_inode, *inode;
5443+ struct qstr *name;
5444+ struct au_branch *br;
5445+ int wh_found, opq;
5446+ unsigned char wh_able;
5447+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
5448+
5449+ name = &dentry->d_name;
5450+ wh_found = 0;
5451+ br = au_sbr(dentry->d_sb, bindex);
5452+ wh_able = !!au_br_whable(br->br_perm);
5453+ if (wh_able)
5454+ wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
5455+ h_dentry = ERR_PTR(wh_found);
5456+ if (!wh_found)
5457+ goto real_lookup;
5458+ if (unlikely(wh_found < 0))
5459+ goto out;
5460+
5461+ /* We found a whiteout */
5462+ /* au_set_dbend(dentry, bindex); */
5463+ au_set_dbwh(dentry, bindex);
5464+ if (!allow_neg)
5465+ return NULL; /* success */
5466+
5467+ real_lookup:
5468+ h_dentry = au_lkup_one(name, h_parent, br, args->nd);
5469+ if (IS_ERR(h_dentry))
5470+ goto out;
5471+
5472+ h_inode = h_dentry->d_inode;
5473+ if (!h_inode) {
5474+ if (!allow_neg)
5475+ goto out_neg;
5476+ } else if (wh_found
5477+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
5478+ goto out_neg;
5479+
5480+ if (au_dbend(dentry) <= bindex)
5481+ au_set_dbend(dentry, bindex);
5482+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
5483+ au_set_dbstart(dentry, bindex);
5484+ au_set_h_dptr(dentry, bindex, h_dentry);
5485+
5486+ inode = dentry->d_inode;
5487+ if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
5488+ || (inode && !S_ISDIR(inode->i_mode)))
5489+ goto out; /* success */
5490+
5491+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
5492+ opq = au_diropq_test(h_dentry, br);
5493+ mutex_unlock(&h_inode->i_mutex);
5494+ if (opq > 0)
5495+ au_set_dbdiropq(dentry, bindex);
5496+ else if (unlikely(opq < 0)) {
5497+ au_set_h_dptr(dentry, bindex, NULL);
5498+ h_dentry = ERR_PTR(opq);
5499+ }
5500+ goto out;
5501+
5502+ out_neg:
5503+ dput(h_dentry);
5504+ h_dentry = NULL;
5505+ out:
5506+ return h_dentry;
5507+}
5508+
dece6358
AM
5509+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
5510+{
5511+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
5512+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
5513+ return -EPERM;
5514+ return 0;
5515+}
5516+
1facf9fc 5517+/*
5518+ * returns the number of lower positive dentries,
5519+ * otherwise an error.
5520+ * can be called at unlinking with @type is zero.
5521+ */
5522+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
5523+ struct nameidata *nd)
5524+{
5525+ int npositive, err;
5526+ aufs_bindex_t bindex, btail, bdiropq;
5527+ unsigned char isdir;
5528+ struct qstr whname;
5529+ struct au_do_lookup_args args = {
5530+ .flags = 0,
5531+ .type = type,
5532+ .nd = nd
5533+ };
5534+ const struct qstr *name = &dentry->d_name;
5535+ struct dentry *parent;
5536+ struct inode *inode;
5537+
1facf9fc 5538+ parent = dget_parent(dentry);
dece6358
AM
5539+ err = au_test_shwh(dentry->d_sb, name);
5540+ if (unlikely(err))
1facf9fc 5541+ goto out;
5542+
5543+ err = au_wh_name_alloc(&whname, name);
5544+ if (unlikely(err))
5545+ goto out;
5546+
5547+ inode = dentry->d_inode;
5548+ isdir = !!(inode && S_ISDIR(inode->i_mode));
5549+ if (!type)
5550+ au_fset_lkup(args.flags, ALLOW_NEG);
5551+
5552+ npositive = 0;
5553+ btail = au_dbtaildir(parent);
5554+ for (bindex = bstart; bindex <= btail; bindex++) {
5555+ struct dentry *h_parent, *h_dentry;
5556+ struct inode *h_inode, *h_dir;
5557+
5558+ h_dentry = au_h_dptr(dentry, bindex);
5559+ if (h_dentry) {
5560+ if (h_dentry->d_inode)
5561+ npositive++;
5562+ if (type != S_IFDIR)
5563+ break;
5564+ continue;
5565+ }
5566+ h_parent = au_h_dptr(parent, bindex);
5567+ if (!h_parent)
5568+ continue;
5569+ h_dir = h_parent->d_inode;
5570+ if (!h_dir || !S_ISDIR(h_dir->i_mode))
5571+ continue;
5572+
5573+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
5574+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
5575+ &args);
5576+ mutex_unlock(&h_dir->i_mutex);
5577+ err = PTR_ERR(h_dentry);
5578+ if (IS_ERR(h_dentry))
5579+ goto out_wh;
5580+ au_fclr_lkup(args.flags, ALLOW_NEG);
5581+
5582+ if (au_dbwh(dentry) >= 0)
5583+ break;
5584+ if (!h_dentry)
5585+ continue;
5586+ h_inode = h_dentry->d_inode;
5587+ if (!h_inode)
5588+ continue;
5589+ npositive++;
5590+ if (!args.type)
5591+ args.type = h_inode->i_mode & S_IFMT;
5592+ if (args.type != S_IFDIR)
5593+ break;
5594+ else if (isdir) {
5595+ /* the type of lower may be different */
5596+ bdiropq = au_dbdiropq(dentry);
5597+ if (bdiropq >= 0 && bdiropq <= bindex)
5598+ break;
5599+ }
5600+ }
5601+
5602+ if (npositive) {
5603+ AuLabel(positive);
5604+ au_update_dbstart(dentry);
5605+ }
5606+ err = npositive;
5607+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
5608+ && au_dbstart(dentry) < 0))
5609+ /* both of real entry and whiteout found */
5610+ err = -EIO;
5611+
5612+ out_wh:
5613+ kfree(whname.name);
5614+ out:
5615+ dput(parent);
5616+ return err;
5617+}
5618+
5619+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
5620+ struct au_branch *br)
5621+{
5622+ struct dentry *dentry;
5623+ int wkq_err;
5624+
5625+ if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
5626+ dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
5627+ else {
5628+ struct au_lkup_one_args args = {
5629+ .errp = &dentry,
5630+ .name = name,
5631+ .h_parent = parent,
5632+ .br = br,
5633+ .nd = NULL
5634+ };
5635+
5636+ wkq_err = au_wkq_wait(au_call_lkup_one, &args);
5637+ if (unlikely(wkq_err))
5638+ dentry = ERR_PTR(wkq_err);
5639+ }
5640+
5641+ return dentry;
5642+}
5643+
5644+/*
5645+ * lookup @dentry on @bindex which should be negative.
5646+ */
5647+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex)
5648+{
5649+ int err;
5650+ struct dentry *parent, *h_parent, *h_dentry;
5651+ struct qstr *name;
5652+
5653+ name = &dentry->d_name;
5654+ parent = dget_parent(dentry);
5655+ h_parent = au_h_dptr(parent, bindex);
5656+ h_dentry = au_sio_lkup_one(name, h_parent,
5657+ au_sbr(dentry->d_sb, bindex));
5658+ err = PTR_ERR(h_dentry);
5659+ if (IS_ERR(h_dentry))
5660+ goto out;
5661+ if (unlikely(h_dentry->d_inode)) {
5662+ err = -EIO;
5663+ AuIOErr("b%d %.*s should be negative.\n",
5664+ bindex, AuDLNPair(h_dentry));
5665+ dput(h_dentry);
5666+ goto out;
5667+ }
5668+
5669+ if (bindex < au_dbstart(dentry))
5670+ au_set_dbstart(dentry, bindex);
5671+ if (au_dbend(dentry) < bindex)
5672+ au_set_dbend(dentry, bindex);
5673+ au_set_h_dptr(dentry, bindex, h_dentry);
5674+ err = 0;
5675+
5676+ out:
5677+ dput(parent);
5678+ return err;
5679+}
5680+
5681+/* ---------------------------------------------------------------------- */
5682+
5683+/* subset of struct inode */
5684+struct au_iattr {
5685+ unsigned long i_ino;
5686+ /* unsigned int i_nlink; */
5687+ uid_t i_uid;
5688+ gid_t i_gid;
5689+ u64 i_version;
5690+/*
5691+ loff_t i_size;
5692+ blkcnt_t i_blocks;
5693+*/
5694+ umode_t i_mode;
5695+};
5696+
5697+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
5698+{
5699+ ia->i_ino = h_inode->i_ino;
5700+ /* ia->i_nlink = h_inode->i_nlink; */
5701+ ia->i_uid = h_inode->i_uid;
5702+ ia->i_gid = h_inode->i_gid;
5703+ ia->i_version = h_inode->i_version;
5704+/*
5705+ ia->i_size = h_inode->i_size;
5706+ ia->i_blocks = h_inode->i_blocks;
5707+*/
5708+ ia->i_mode = (h_inode->i_mode & S_IFMT);
5709+}
5710+
5711+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
5712+{
5713+ return ia->i_ino != h_inode->i_ino
5714+ /* || ia->i_nlink != h_inode->i_nlink */
5715+ || ia->i_uid != h_inode->i_uid
5716+ || ia->i_gid != h_inode->i_gid
5717+ || ia->i_version != h_inode->i_version
5718+/*
5719+ || ia->i_size != h_inode->i_size
5720+ || ia->i_blocks != h_inode->i_blocks
5721+*/
5722+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
5723+}
5724+
5725+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
5726+ struct au_branch *br)
5727+{
5728+ int err;
5729+ struct au_iattr ia;
5730+ struct inode *h_inode;
5731+ struct dentry *h_d;
5732+ struct super_block *h_sb;
5733+
5734+ err = 0;
5735+ memset(&ia, -1, sizeof(ia));
5736+ h_sb = h_dentry->d_sb;
5737+ h_inode = h_dentry->d_inode;
5738+ if (h_inode)
5739+ au_iattr_save(&ia, h_inode);
5740+ else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
5741+ /* nfs d_revalidate may return 0 for negative dentry */
5742+ /* fuse d_revalidate always return 0 for negative dentry */
5743+ goto out;
5744+
5745+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
5746+ h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
5747+ err = PTR_ERR(h_d);
5748+ if (IS_ERR(h_d))
5749+ goto out;
5750+
5751+ err = 0;
5752+ if (unlikely(h_d != h_dentry
5753+ || h_d->d_inode != h_inode
5754+ || (h_inode && au_iattr_test(&ia, h_inode))))
5755+ err = au_busy_or_stale();
5756+ dput(h_d);
5757+
5758+ out:
5759+ AuTraceErr(err);
5760+ return err;
5761+}
5762+
5763+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
5764+ struct dentry *h_parent, struct au_branch *br)
5765+{
5766+ int err;
5767+
5768+ err = 0;
5769+ if (udba == AuOpt_UDBA_REVAL) {
5770+ IMustLock(h_dir);
5771+ err = (h_dentry->d_parent->d_inode != h_dir);
5772+ } else if (udba == AuOpt_UDBA_HINOTIFY)
5773+ err = au_h_verify_dentry(h_dentry, h_parent, br);
5774+
5775+ return err;
5776+}
5777+
5778+/* ---------------------------------------------------------------------- */
5779+
5780+static void au_do_refresh_hdentry(struct au_hdentry *p, struct au_dinfo *dinfo,
5781+ struct dentry *parent)
5782+{
5783+ struct dentry *h_d, *h_dp;
5784+ struct au_hdentry tmp, *q;
5785+ struct super_block *sb;
5786+ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
5787+
1308ab2a 5788+ AuRwMustWriteLock(&dinfo->di_rwsem);
5789+
1facf9fc 5790+ bend = dinfo->di_bend;
5791+ bwh = dinfo->di_bwh;
5792+ bdiropq = dinfo->di_bdiropq;
5793+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
5794+ h_d = p->hd_dentry;
5795+ if (!h_d)
5796+ continue;
5797+
5798+ h_dp = dget_parent(h_d);
5799+ if (h_dp == au_h_dptr(parent, bindex)) {
5800+ dput(h_dp);
5801+ continue;
5802+ }
5803+
5804+ new_bindex = au_find_dbindex(parent, h_dp);
5805+ dput(h_dp);
5806+ if (dinfo->di_bwh == bindex)
5807+ bwh = new_bindex;
5808+ if (dinfo->di_bdiropq == bindex)
5809+ bdiropq = new_bindex;
5810+ if (new_bindex < 0) {
5811+ au_hdput(p);
5812+ p->hd_dentry = NULL;
5813+ continue;
5814+ }
5815+
5816+ /* swap two lower dentries, and loop again */
5817+ q = dinfo->di_hdentry + new_bindex;
5818+ tmp = *q;
5819+ *q = *p;
5820+ *p = tmp;
5821+ if (tmp.hd_dentry) {
5822+ bindex--;
5823+ p--;
5824+ }
5825+ }
5826+
5827+ sb = parent->d_sb;
5828+ dinfo->di_bwh = -1;
5829+ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
5830+ dinfo->di_bwh = bwh;
5831+
5832+ dinfo->di_bdiropq = -1;
5833+ if (bdiropq >= 0
5834+ && bdiropq <= au_sbend(sb)
5835+ && au_sbr_whable(sb, bdiropq))
5836+ dinfo->di_bdiropq = bdiropq;
5837+
5838+ bend = au_dbend(parent);
5839+ p = dinfo->di_hdentry;
5840+ for (bindex = 0; bindex <= bend; bindex++, p++)
5841+ if (p->hd_dentry) {
5842+ dinfo->di_bstart = bindex;
5843+ break;
5844+ }
5845+
5846+ p = dinfo->di_hdentry + bend;
5847+ for (bindex = bend; bindex >= 0; bindex--, p--)
5848+ if (p->hd_dentry) {
5849+ dinfo->di_bend = bindex;
5850+ break;
5851+ }
5852+}
5853+
5854+/*
5855+ * returns the number of found lower positive dentries,
5856+ * otherwise an error.
5857+ */
5858+int au_refresh_hdentry(struct dentry *dentry, mode_t type)
5859+{
5860+ int npositive, err;
5861+ unsigned int sigen;
5862+ aufs_bindex_t bstart;
5863+ struct au_dinfo *dinfo;
5864+ struct super_block *sb;
5865+ struct dentry *parent;
5866+
1308ab2a 5867+ DiMustWriteLock(dentry);
5868+
1facf9fc 5869+ sb = dentry->d_sb;
5870+ AuDebugOn(IS_ROOT(dentry));
5871+ sigen = au_sigen(sb);
5872+ parent = dget_parent(dentry);
5873+ AuDebugOn(au_digen(parent) != sigen
5874+ || au_iigen(parent->d_inode) != sigen);
5875+
5876+ dinfo = au_di(dentry);
5877+ err = au_di_realloc(dinfo, au_sbend(sb) + 1);
5878+ npositive = err;
5879+ if (unlikely(err))
5880+ goto out;
5881+ au_do_refresh_hdentry(dinfo->di_hdentry + dinfo->di_bstart, dinfo,
5882+ parent);
5883+
5884+ npositive = 0;
5885+ bstart = au_dbstart(parent);
5886+ if (type != S_IFDIR && dinfo->di_bstart == bstart)
5887+ goto out_dgen; /* success */
5888+
5889+ npositive = au_lkup_dentry(dentry, bstart, type, /*nd*/NULL);
5890+ if (npositive < 0)
5891+ goto out;
5892+ if (dinfo->di_bwh >= 0 && dinfo->di_bwh <= dinfo->di_bstart)
5893+ d_drop(dentry);
5894+
5895+ out_dgen:
5896+ au_update_digen(dentry);
5897+ out:
5898+ dput(parent);
5899+ AuTraceErr(npositive);
5900+ return npositive;
5901+}
5902+
5903+static noinline_for_stack
5904+int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
5905+ struct dentry *dentry, aufs_bindex_t bindex)
5906+{
5907+ int err, valid;
5908+ int (*reval)(struct dentry *, struct nameidata *);
5909+
5910+ err = 0;
5911+ reval = NULL;
5912+ if (h_dentry->d_op)
5913+ reval = h_dentry->d_op->d_revalidate;
5914+ if (!reval)
5915+ goto out;
5916+
5917+ AuDbg("b%d\n", bindex);
5918+ if (au_test_fs_null_nd(h_dentry->d_sb))
5919+ /* it may return tri-state */
5920+ valid = reval(h_dentry, NULL);
5921+ else {
5922+ struct nameidata h_nd;
5923+ int locked;
5924+ struct dentry *parent;
5925+
5926+ au_h_nd(&h_nd, nd);
5927+ parent = nd->path.dentry;
5928+ locked = (nd && nd->path.dentry != dentry);
5929+ if (locked)
5930+ di_read_lock_parent(parent, AuLock_IR);
5931+ BUG_ON(bindex > au_dbend(parent));
5932+ h_nd.path.dentry = au_h_dptr(parent, bindex);
5933+ BUG_ON(!h_nd.path.dentry);
5934+ h_nd.path.mnt = au_sbr(parent->d_sb, bindex)->br_mnt;
5935+ path_get(&h_nd.path);
5936+ valid = reval(h_dentry, &h_nd);
5937+ path_put(&h_nd.path);
5938+ if (locked)
5939+ di_read_unlock(parent, AuLock_IR);
5940+ }
5941+
5942+ if (unlikely(valid < 0))
5943+ err = valid;
5944+ else if (!valid)
5945+ err = -EINVAL;
5946+
5947+ out:
5948+ AuTraceErr(err);
5949+ return err;
5950+}
5951+
5952+/* todo: remove this */
5953+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
5954+ struct nameidata *nd, int do_udba)
5955+{
5956+ int err;
5957+ umode_t mode, h_mode;
5958+ aufs_bindex_t bindex, btail, bstart, ibs, ibe;
5959+ unsigned char plus, unhashed, is_root, h_plus;
5960+ struct inode *first, *h_inode, *h_cached_inode;
5961+ struct dentry *h_dentry;
5962+ struct qstr *name, *h_name;
5963+
5964+ err = 0;
5965+ plus = 0;
5966+ mode = 0;
5967+ first = NULL;
5968+ ibs = -1;
5969+ ibe = -1;
5970+ unhashed = !!d_unhashed(dentry);
5971+ is_root = !!IS_ROOT(dentry);
5972+ name = &dentry->d_name;
5973+
5974+ /*
5975+ * Theoretically, REVAL test should be unnecessary in case of INOTIFY.
5976+ * But inotify doesn't fire some necessary events,
5977+ * IN_ATTRIB for atime/nlink/pageio
5978+ * IN_DELETE for NFS dentry
5979+ * Let's do REVAL test too.
5980+ */
5981+ if (do_udba && inode) {
5982+ mode = (inode->i_mode & S_IFMT);
5983+ plus = (inode->i_nlink > 0);
5984+ first = au_h_iptr(inode, au_ibstart(inode));
5985+ ibs = au_ibstart(inode);
5986+ ibe = au_ibend(inode);
5987+ }
5988+
5989+ bstart = au_dbstart(dentry);
5990+ btail = bstart;
5991+ if (inode && S_ISDIR(inode->i_mode))
5992+ btail = au_dbtaildir(dentry);
5993+ for (bindex = bstart; bindex <= btail; bindex++) {
5994+ h_dentry = au_h_dptr(dentry, bindex);
5995+ if (!h_dentry)
5996+ continue;
5997+
5998+ AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
5999+ h_name = &h_dentry->d_name;
6000+ if (unlikely(do_udba
6001+ && !is_root
6002+ && (unhashed != !!d_unhashed(h_dentry)
6003+ || name->len != h_name->len
6004+ || memcmp(name->name, h_name->name, name->len))
6005+ )) {
6006+ AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
6007+ unhashed, d_unhashed(h_dentry),
6008+ AuDLNPair(dentry), AuDLNPair(h_dentry));
6009+ goto err;
6010+ }
6011+
6012+ err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
6013+ if (unlikely(err))
6014+ /* do not goto err, to keep the errno */
6015+ break;
6016+
6017+ /* todo: plink too? */
6018+ if (!do_udba)
6019+ continue;
6020+
6021+ /* UDBA tests */
6022+ h_inode = h_dentry->d_inode;
6023+ if (unlikely(!!inode != !!h_inode))
6024+ goto err;
6025+
6026+ h_plus = plus;
6027+ h_mode = mode;
6028+ h_cached_inode = h_inode;
6029+ if (h_inode) {
6030+ h_mode = (h_inode->i_mode & S_IFMT);
6031+ h_plus = (h_inode->i_nlink > 0);
6032+ }
6033+ if (inode && ibs <= bindex && bindex <= ibe)
6034+ h_cached_inode = au_h_iptr(inode, bindex);
6035+
6036+ if (unlikely(plus != h_plus
6037+ || mode != h_mode
6038+ || h_cached_inode != h_inode))
6039+ goto err;
6040+ continue;
6041+
6042+ err:
6043+ err = -EINVAL;
6044+ break;
6045+ }
6046+
6047+ return err;
6048+}
6049+
6050+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
6051+{
6052+ int err;
6053+ struct dentry *parent;
6054+ struct inode *inode;
6055+
6056+ inode = dentry->d_inode;
6057+ if (au_digen(dentry) == sigen && au_iigen(inode) == sigen)
6058+ return 0;
6059+
6060+ parent = dget_parent(dentry);
6061+ di_read_lock_parent(parent, AuLock_IR);
6062+ AuDebugOn(au_digen(parent) != sigen
6063+ || au_iigen(parent->d_inode) != sigen);
6064+ au_dbg_verify_gen(parent, sigen);
6065+
6066+ /* returns a number of positive dentries */
6067+ err = au_refresh_hdentry(dentry, inode->i_mode & S_IFMT);
6068+ if (err >= 0)
6069+ err = au_refresh_hinode(inode, dentry);
6070+
6071+ di_read_unlock(parent, AuLock_IR);
6072+ dput(parent);
6073+ return err;
6074+}
6075+
6076+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
6077+{
6078+ int err;
6079+ struct dentry *d, *parent;
6080+ struct inode *inode;
6081+
6082+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIRS))
6083+ return simple_reval_dpath(dentry, sigen);
6084+
6085+ /* slow loop, keep it simple and stupid */
6086+ /* cf: au_cpup_dirs() */
6087+ err = 0;
6088+ parent = NULL;
6089+ while (au_digen(dentry) != sigen
6090+ || au_iigen(dentry->d_inode) != sigen) {
6091+ d = dentry;
6092+ while (1) {
6093+ dput(parent);
6094+ parent = dget_parent(d);
6095+ if (au_digen(parent) == sigen
6096+ && au_iigen(parent->d_inode) == sigen)
6097+ break;
6098+ d = parent;
6099+ }
6100+
6101+ inode = d->d_inode;
6102+ if (d != dentry)
6103+ di_write_lock_child(d);
6104+
6105+ /* someone might update our dentry while we were sleeping */
6106+ if (au_digen(d) != sigen || au_iigen(d->d_inode) != sigen) {
6107+ di_read_lock_parent(parent, AuLock_IR);
6108+ /* returns a number of positive dentries */
6109+ err = au_refresh_hdentry(d, inode->i_mode & S_IFMT);
6110+ if (err >= 0)
6111+ err = au_refresh_hinode(inode, d);
6112+ di_read_unlock(parent, AuLock_IR);
6113+ }
6114+
6115+ if (d != dentry)
6116+ di_write_unlock(d);
6117+ dput(parent);
6118+ if (unlikely(err))
6119+ break;
6120+ }
6121+
6122+ return err;
6123+}
6124+
6125+/*
6126+ * if valid returns 1, otherwise 0.
6127+ */
6128+static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
6129+{
6130+ int valid, err;
6131+ unsigned int sigen;
6132+ unsigned char do_udba;
6133+ struct super_block *sb;
6134+ struct inode *inode;
6135+
6136+ err = -EINVAL;
6137+ sb = dentry->d_sb;
6138+ inode = dentry->d_inode;
6139+ aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW);
6140+ sigen = au_sigen(sb);
6141+ if (au_digen(dentry) != sigen) {
6142+ AuDebugOn(IS_ROOT(dentry));
6143+ if (inode)
6144+ err = au_reval_dpath(dentry, sigen);
6145+ if (unlikely(err))
6146+ goto out_dgrade;
6147+ AuDebugOn(au_digen(dentry) != sigen);
6148+ }
6149+ if (inode && au_iigen(inode) != sigen) {
6150+ AuDebugOn(IS_ROOT(dentry));
6151+ err = au_refresh_hinode(inode, dentry);
6152+ if (unlikely(err))
6153+ goto out_dgrade;
6154+ AuDebugOn(au_iigen(inode) != sigen);
6155+ }
6156+ di_downgrade_lock(dentry, AuLock_IR);
6157+
6158+ AuDebugOn(au_digen(dentry) != sigen);
6159+ AuDebugOn(inode && au_iigen(inode) != sigen);
6160+ err = -EINVAL;
6161+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
6162+ if (do_udba && inode) {
6163+ aufs_bindex_t bstart = au_ibstart(inode);
6164+
6165+ if (bstart >= 0
6166+ && au_test_higen(inode, au_h_iptr(inode, bstart)))
6167+ goto out;
6168+ }
6169+
6170+ err = h_d_revalidate(dentry, inode, nd, do_udba);
6171+ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0))
6172+ /* both of real entry and whiteout found */
6173+ err = -EIO;
6174+ goto out;
6175+
6176+ out_dgrade:
6177+ di_downgrade_lock(dentry, AuLock_IR);
6178+ out:
1facf9fc 6179+ aufs_read_unlock(dentry, AuLock_IR);
6180+ AuTraceErr(err);
6181+ valid = !err;
6182+ if (!valid)
6183+ AuDbg("%.*s invalid\n", AuDLNPair(dentry));
6184+ return valid;
6185+}
6186+
6187+static void aufs_d_release(struct dentry *dentry)
6188+{
6189+ struct au_dinfo *dinfo;
6190+ aufs_bindex_t bend, bindex;
6191+
6192+ dinfo = dentry->d_fsdata;
6193+ if (!dinfo)
6194+ return;
6195+
6196+ /* dentry may not be revalidated */
6197+ bindex = dinfo->di_bstart;
6198+ if (bindex >= 0) {
6199+ struct au_hdentry *p;
6200+
6201+ bend = dinfo->di_bend;
6202+ p = dinfo->di_hdentry + bindex;
6203+ while (bindex++ <= bend) {
6204+ if (p->hd_dentry)
6205+ au_hdput(p);
6206+ p++;
6207+ }
6208+ }
6209+ kfree(dinfo->di_hdentry);
dece6358 6210+ AuRwDestroy(&dinfo->di_rwsem);
1facf9fc 6211+ au_cache_free_dinfo(dinfo);
6212+ au_hin_di_reinit(dentry);
6213+}
6214+
6215+struct dentry_operations aufs_dop = {
6216+ .d_revalidate = aufs_d_revalidate,
6217+ .d_release = aufs_d_release
6218+};
1308ab2a 6219diff -uprN -x .git linux-2.6.31/fs/aufs/dentry.h aufs2-2.6.git/fs/aufs/dentry.h
6220--- linux-2.6.31/fs/aufs/dentry.h 1970-01-01 00:00:00.000000000 +0000
6221+++ aufs2-2.6.git/fs/aufs/dentry.h 2009-09-21 21:49:23.399892755 +0000
6222@@ -0,0 +1,231 @@
1facf9fc 6223+/*
6224+ * Copyright (C) 2005-2009 Junjiro R. Okajima
6225+ *
6226+ * This program, aufs is free software; you can redistribute it and/or modify
6227+ * it under the terms of the GNU General Public License as published by
6228+ * the Free Software Foundation; either version 2 of the License, or
6229+ * (at your option) any later version.
dece6358
AM
6230+ *
6231+ * This program is distributed in the hope that it will be useful,
6232+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6233+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6234+ * GNU General Public License for more details.
6235+ *
6236+ * You should have received a copy of the GNU General Public License
6237+ * along with this program; if not, write to the Free Software
6238+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 6239+ */
6240+
6241+/*
6242+ * lookup and dentry operations
6243+ */
6244+
6245+#ifndef __AUFS_DENTRY_H__
6246+#define __AUFS_DENTRY_H__
6247+
6248+#ifdef __KERNEL__
6249+
dece6358 6250+#include <linux/dcache.h>
1facf9fc 6251+#include <linux/aufs_type.h>
6252+#include "rwsem.h"
6253+
6254+/* make a single member structure for future use */
6255+/* todo: remove this structure */
6256+struct au_hdentry {
6257+ struct dentry *hd_dentry;
6258+};
6259+
6260+struct au_dinfo {
6261+ atomic_t di_generation;
6262+
dece6358 6263+ struct au_rwsem di_rwsem;
1facf9fc 6264+ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
6265+ struct au_hdentry *di_hdentry;
6266+};
6267+
6268+/* ---------------------------------------------------------------------- */
6269+
6270+/* dentry.c */
6271+extern struct dentry_operations aufs_dop;
6272+struct au_branch;
6273+struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
6274+ struct au_branch *br, struct nameidata *nd);
6275+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
6276+ struct au_branch *br);
6277+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
6278+ struct dentry *h_parent, struct au_branch *br);
6279+
6280+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
6281+ struct nameidata *nd);
6282+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex);
6283+int au_refresh_hdentry(struct dentry *dentry, mode_t type);
6284+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
6285+
6286+/* dinfo.c */
6287+int au_alloc_dinfo(struct dentry *dentry);
6288+int au_di_realloc(struct au_dinfo *dinfo, int nbr);
6289+
6290+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
6291+void di_read_unlock(struct dentry *d, int flags);
6292+void di_downgrade_lock(struct dentry *d, int flags);
6293+void di_write_lock(struct dentry *d, unsigned int lsc);
6294+void di_write_unlock(struct dentry *d);
6295+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
6296+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
6297+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
6298+
6299+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
6300+aufs_bindex_t au_dbtail(struct dentry *dentry);
6301+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
6302+
6303+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
6304+ struct dentry *h_dentry);
6305+void au_update_digen(struct dentry *dentry);
6306+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
6307+void au_update_dbstart(struct dentry *dentry);
6308+void au_update_dbend(struct dentry *dentry);
6309+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
6310+
6311+/* ---------------------------------------------------------------------- */
6312+
6313+static inline struct au_dinfo *au_di(struct dentry *dentry)
6314+{
6315+ return dentry->d_fsdata;
6316+}
6317+
6318+/* ---------------------------------------------------------------------- */
6319+
6320+/* lock subclass for dinfo */
6321+enum {
6322+ AuLsc_DI_CHILD, /* child first */
6323+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hinotify */
6324+ AuLsc_DI_CHILD3, /* copyup dirs */
6325+ AuLsc_DI_PARENT,
6326+ AuLsc_DI_PARENT2,
6327+ AuLsc_DI_PARENT3
6328+};
6329+
6330+/*
6331+ * di_read_lock_child, di_write_lock_child,
6332+ * di_read_lock_child2, di_write_lock_child2,
6333+ * di_read_lock_child3, di_write_lock_child3,
6334+ * di_read_lock_parent, di_write_lock_parent,
6335+ * di_read_lock_parent2, di_write_lock_parent2,
6336+ * di_read_lock_parent3, di_write_lock_parent3,
6337+ */
6338+#define AuReadLockFunc(name, lsc) \
6339+static inline void di_read_lock_##name(struct dentry *d, int flags) \
6340+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
6341+
6342+#define AuWriteLockFunc(name, lsc) \
6343+static inline void di_write_lock_##name(struct dentry *d) \
6344+{ di_write_lock(d, AuLsc_DI_##lsc); }
6345+
6346+#define AuRWLockFuncs(name, lsc) \
6347+ AuReadLockFunc(name, lsc) \
6348+ AuWriteLockFunc(name, lsc)
6349+
6350+AuRWLockFuncs(child, CHILD);
6351+AuRWLockFuncs(child2, CHILD2);
6352+AuRWLockFuncs(child3, CHILD3);
6353+AuRWLockFuncs(parent, PARENT);
6354+AuRWLockFuncs(parent2, PARENT2);
6355+AuRWLockFuncs(parent3, PARENT3);
6356+
6357+#undef AuReadLockFunc
6358+#undef AuWriteLockFunc
6359+#undef AuRWLockFuncs
6360+
6361+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
6362+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
6363+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 6364+
6365+/* ---------------------------------------------------------------------- */
6366+
6367+/* todo: memory barrier? */
6368+static inline unsigned int au_digen(struct dentry *d)
6369+{
6370+ return atomic_read(&au_di(d)->di_generation);
6371+}
6372+
6373+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
6374+{
6375+ hdentry->hd_dentry = NULL;
6376+}
6377+
6378+static inline void au_hdput(struct au_hdentry *hd)
6379+{
6380+ dput(hd->hd_dentry);
6381+}
6382+
6383+static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
6384+{
1308ab2a 6385+ DiMustAnyLock(dentry);
1facf9fc 6386+ return au_di(dentry)->di_bstart;
6387+}
6388+
6389+static inline aufs_bindex_t au_dbend(struct dentry *dentry)
6390+{
1308ab2a 6391+ DiMustAnyLock(dentry);
1facf9fc 6392+ return au_di(dentry)->di_bend;
6393+}
6394+
6395+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
6396+{
1308ab2a 6397+ DiMustAnyLock(dentry);
1facf9fc 6398+ return au_di(dentry)->di_bwh;
6399+}
6400+
6401+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
6402+{
1308ab2a 6403+ DiMustAnyLock(dentry);
1facf9fc 6404+ return au_di(dentry)->di_bdiropq;
6405+}
6406+
6407+/* todo: hard/soft set? */
6408+static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
6409+{
1308ab2a 6410+ DiMustWriteLock(dentry);
1facf9fc 6411+ au_di(dentry)->di_bstart = bindex;
6412+}
6413+
6414+static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
6415+{
1308ab2a 6416+ DiMustWriteLock(dentry);
1facf9fc 6417+ au_di(dentry)->di_bend = bindex;
6418+}
6419+
6420+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
6421+{
1308ab2a 6422+ DiMustWriteLock(dentry);
1facf9fc 6423+ /* dbwh can be outside of bstart - bend range */
6424+ au_di(dentry)->di_bwh = bindex;
6425+}
6426+
6427+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
6428+{
1308ab2a 6429+ DiMustWriteLock(dentry);
1facf9fc 6430+ au_di(dentry)->di_bdiropq = bindex;
6431+}
6432+
6433+/* ---------------------------------------------------------------------- */
6434+
6435+#ifdef CONFIG_AUFS_HINOTIFY
6436+static inline void au_digen_dec(struct dentry *d)
6437+{
dece6358 6438+ atomic_dec_return(&au_di(d)->di_generation);
1facf9fc 6439+}
6440+
6441+static inline void au_hin_di_reinit(struct dentry *dentry)
6442+{
6443+ dentry->d_fsdata = NULL;
6444+}
6445+#else
6446+static inline void au_hin_di_reinit(struct dentry *dentry __maybe_unused)
6447+{
6448+ /* empty */
6449+}
6450+#endif /* CONFIG_AUFS_HINOTIFY */
6451+
6452+#endif /* __KERNEL__ */
6453+#endif /* __AUFS_DENTRY_H__ */
1308ab2a 6454diff -uprN -x .git linux-2.6.31/fs/aufs/dinfo.c aufs2-2.6.git/fs/aufs/dinfo.c
6455--- linux-2.6.31/fs/aufs/dinfo.c 1970-01-01 00:00:00.000000000 +0000
6456+++ aufs2-2.6.git/fs/aufs/dinfo.c 2009-09-21 21:49:23.399892755 +0000
6457@@ -0,0 +1,367 @@
1facf9fc 6458+/*
6459+ * Copyright (C) 2005-2009 Junjiro R. Okajima
6460+ *
6461+ * This program, aufs is free software; you can redistribute it and/or modify
6462+ * it under the terms of the GNU General Public License as published by
6463+ * the Free Software Foundation; either version 2 of the License, or
6464+ * (at your option) any later version.
dece6358
AM
6465+ *
6466+ * This program is distributed in the hope that it will be useful,
6467+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6468+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6469+ * GNU General Public License for more details.
6470+ *
6471+ * You should have received a copy of the GNU General Public License
6472+ * along with this program; if not, write to the Free Software
6473+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 6474+ */
6475+
6476+/*
6477+ * dentry private data
6478+ */
6479+
6480+#include "aufs.h"
6481+
6482+int au_alloc_dinfo(struct dentry *dentry)
6483+{
6484+ struct au_dinfo *dinfo;
6485+ struct super_block *sb;
6486+ int nbr;
6487+
6488+ dinfo = au_cache_alloc_dinfo();
6489+ if (unlikely(!dinfo))
6490+ goto out;
6491+
6492+ sb = dentry->d_sb;
6493+ nbr = au_sbend(sb) + 1;
6494+ if (nbr <= 0)
6495+ nbr = 1;
6496+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
6497+ if (unlikely(!dinfo->di_hdentry))
6498+ goto out_dinfo;
6499+
6500+ atomic_set(&dinfo->di_generation, au_sigen(sb));
6501+ /* smp_mb(); */ /* atomic_set */
dece6358 6502+ au_rw_init_wlock_nested(&dinfo->di_rwsem, AuLsc_DI_CHILD);
1facf9fc 6503+ dinfo->di_bstart = -1;
6504+ dinfo->di_bend = -1;
6505+ dinfo->di_bwh = -1;
6506+ dinfo->di_bdiropq = -1;
6507+
6508+ dentry->d_fsdata = dinfo;
6509+ dentry->d_op = &aufs_dop;
6510+ return 0; /* success */
6511+
6512+ out_dinfo:
6513+ au_cache_free_dinfo(dinfo);
6514+ out:
6515+ return -ENOMEM;
6516+}
6517+
6518+int au_di_realloc(struct au_dinfo *dinfo, int nbr)
6519+{
6520+ int err, sz;
6521+ struct au_hdentry *hdp;
6522+
1308ab2a 6523+ AuRwMustWriteLock(&dinfo->di_rwsem);
6524+
1facf9fc 6525+ err = -ENOMEM;
6526+ sz = sizeof(*hdp) * (dinfo->di_bend + 1);
6527+ if (!sz)
6528+ sz = sizeof(*hdp);
6529+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
6530+ if (hdp) {
6531+ dinfo->di_hdentry = hdp;
6532+ err = 0;
6533+ }
6534+
6535+ return err;
6536+}
6537+
6538+/* ---------------------------------------------------------------------- */
6539+
6540+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
6541+{
6542+ switch (lsc) {
6543+ case AuLsc_DI_CHILD:
6544+ ii_write_lock_child(inode);
6545+ break;
6546+ case AuLsc_DI_CHILD2:
6547+ ii_write_lock_child2(inode);
6548+ break;
6549+ case AuLsc_DI_CHILD3:
6550+ ii_write_lock_child3(inode);
6551+ break;
6552+ case AuLsc_DI_PARENT:
6553+ ii_write_lock_parent(inode);
6554+ break;
6555+ case AuLsc_DI_PARENT2:
6556+ ii_write_lock_parent2(inode);
6557+ break;
6558+ case AuLsc_DI_PARENT3:
6559+ ii_write_lock_parent3(inode);
6560+ break;
6561+ default:
6562+ BUG();
6563+ }
6564+}
6565+
6566+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
6567+{
6568+ switch (lsc) {
6569+ case AuLsc_DI_CHILD:
6570+ ii_read_lock_child(inode);
6571+ break;
6572+ case AuLsc_DI_CHILD2:
6573+ ii_read_lock_child2(inode);
6574+ break;
6575+ case AuLsc_DI_CHILD3:
6576+ ii_read_lock_child3(inode);
6577+ break;
6578+ case AuLsc_DI_PARENT:
6579+ ii_read_lock_parent(inode);
6580+ break;
6581+ case AuLsc_DI_PARENT2:
6582+ ii_read_lock_parent2(inode);
6583+ break;
6584+ case AuLsc_DI_PARENT3:
6585+ ii_read_lock_parent3(inode);
6586+ break;
6587+ default:
6588+ BUG();
6589+ }
6590+}
6591+
6592+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
6593+{
dece6358 6594+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 6595+ if (d->d_inode) {
6596+ if (au_ftest_lock(flags, IW))
6597+ do_ii_write_lock(d->d_inode, lsc);
6598+ else if (au_ftest_lock(flags, IR))
6599+ do_ii_read_lock(d->d_inode, lsc);
6600+ }
6601+}
6602+
6603+void di_read_unlock(struct dentry *d, int flags)
6604+{
6605+ if (d->d_inode) {
6606+ if (au_ftest_lock(flags, IW))
6607+ ii_write_unlock(d->d_inode);
6608+ else if (au_ftest_lock(flags, IR))
6609+ ii_read_unlock(d->d_inode);
6610+ }
dece6358 6611+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 6612+}
6613+
6614+void di_downgrade_lock(struct dentry *d, int flags)
6615+{
1facf9fc 6616+ if (d->d_inode && au_ftest_lock(flags, IR))
6617+ ii_downgrade_lock(d->d_inode);
dece6358 6618+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 6619+}
6620+
6621+void di_write_lock(struct dentry *d, unsigned int lsc)
6622+{
dece6358 6623+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 6624+ if (d->d_inode)
6625+ do_ii_write_lock(d->d_inode, lsc);
6626+}
6627+
6628+void di_write_unlock(struct dentry *d)
6629+{
6630+ if (d->d_inode)
6631+ ii_write_unlock(d->d_inode);
dece6358 6632+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 6633+}
6634+
6635+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
6636+{
6637+ AuDebugOn(d1 == d2
6638+ || d1->d_inode == d2->d_inode
6639+ || d1->d_sb != d2->d_sb);
6640+
6641+ if (isdir && au_test_subdir(d1, d2)) {
6642+ di_write_lock_child(d1);
6643+ di_write_lock_child2(d2);
6644+ } else {
6645+ /* there should be no races */
6646+ di_write_lock_child(d2);
6647+ di_write_lock_child2(d1);
6648+ }
6649+}
6650+
6651+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
6652+{
6653+ AuDebugOn(d1 == d2
6654+ || d1->d_inode == d2->d_inode
6655+ || d1->d_sb != d2->d_sb);
6656+
6657+ if (isdir && au_test_subdir(d1, d2)) {
6658+ di_write_lock_parent(d1);
6659+ di_write_lock_parent2(d2);
6660+ } else {
6661+ /* there should be no races */
6662+ di_write_lock_parent(d2);
6663+ di_write_lock_parent2(d1);
6664+ }
6665+}
6666+
6667+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
6668+{
6669+ di_write_unlock(d1);
6670+ if (d1->d_inode == d2->d_inode)
dece6358 6671+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 6672+ else
6673+ di_write_unlock(d2);
6674+}
6675+
6676+/* ---------------------------------------------------------------------- */
6677+
6678+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
6679+{
6680+ struct dentry *d;
6681+
1308ab2a 6682+ DiMustAnyLock(dentry);
6683+
1facf9fc 6684+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
6685+ return NULL;
6686+ AuDebugOn(bindex < 0);
6687+ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
6688+ AuDebugOn(d && (atomic_read(&d->d_count) <= 0));
6689+ return d;
6690+}
6691+
6692+aufs_bindex_t au_dbtail(struct dentry *dentry)
6693+{
6694+ aufs_bindex_t bend, bwh;
6695+
6696+ bend = au_dbend(dentry);
6697+ if (0 <= bend) {
6698+ bwh = au_dbwh(dentry);
6699+ if (!bwh)
6700+ return bwh;
6701+ if (0 < bwh && bwh < bend)
6702+ return bwh - 1;
6703+ }
6704+ return bend;
6705+}
6706+
6707+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
6708+{
6709+ aufs_bindex_t bend, bopq;
6710+
6711+ bend = au_dbtail(dentry);
6712+ if (0 <= bend) {
6713+ bopq = au_dbdiropq(dentry);
6714+ if (0 <= bopq && bopq < bend)
6715+ bend = bopq;
6716+ }
6717+ return bend;
6718+}
6719+
6720+/* ---------------------------------------------------------------------- */
6721+
6722+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
6723+ struct dentry *h_dentry)
6724+{
6725+ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
6726+
1308ab2a 6727+ DiMustWriteLock(dentry);
6728+
1facf9fc 6729+ if (hd->hd_dentry)
6730+ au_hdput(hd);
6731+ hd->hd_dentry = h_dentry;
6732+}
6733+
6734+void au_update_digen(struct dentry *dentry)
6735+{
6736+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
6737+ /* smp_mb(); */ /* atomic_set */
6738+}
6739+
6740+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
6741+{
6742+ struct au_dinfo *dinfo;
6743+ struct dentry *h_d;
6744+
1308ab2a 6745+ DiMustWriteLock(dentry);
6746+
1facf9fc 6747+ dinfo = au_di(dentry);
6748+ if (!dinfo || dinfo->di_bstart < 0)
6749+ return;
6750+
6751+ if (do_put_zero) {
6752+ aufs_bindex_t bindex, bend;
6753+
6754+ bend = dinfo->di_bend;
6755+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
6756+ h_d = dinfo->di_hdentry[0 + bindex].hd_dentry;
6757+ if (h_d && !h_d->d_inode)
6758+ au_set_h_dptr(dentry, bindex, NULL);
6759+ }
6760+ }
6761+
6762+ dinfo->di_bstart = -1;
6763+ while (++dinfo->di_bstart <= dinfo->di_bend)
6764+ if (dinfo->di_hdentry[0 + dinfo->di_bstart].hd_dentry)
6765+ break;
6766+ if (dinfo->di_bstart > dinfo->di_bend) {
6767+ dinfo->di_bstart = -1;
6768+ dinfo->di_bend = -1;
6769+ return;
6770+ }
6771+
6772+ dinfo->di_bend++;
6773+ while (0 <= --dinfo->di_bend)
6774+ if (dinfo->di_hdentry[0 + dinfo->di_bend].hd_dentry)
6775+ break;
6776+ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
6777+}
6778+
6779+void au_update_dbstart(struct dentry *dentry)
6780+{
6781+ aufs_bindex_t bindex, bend;
6782+ struct dentry *h_dentry;
6783+
6784+ bend = au_dbend(dentry);
6785+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
6786+ h_dentry = au_h_dptr(dentry, bindex);
6787+ if (!h_dentry)
6788+ continue;
6789+ if (h_dentry->d_inode) {
6790+ au_set_dbstart(dentry, bindex);
6791+ return;
6792+ }
6793+ au_set_h_dptr(dentry, bindex, NULL);
6794+ }
6795+}
6796+
6797+void au_update_dbend(struct dentry *dentry)
6798+{
6799+ aufs_bindex_t bindex, bstart;
6800+ struct dentry *h_dentry;
6801+
6802+ bstart = au_dbstart(dentry);
6803+ for (bindex = au_dbend(dentry); bindex <= bstart; bindex--) {
6804+ h_dentry = au_h_dptr(dentry, bindex);
6805+ if (!h_dentry)
6806+ continue;
6807+ if (h_dentry->d_inode) {
6808+ au_set_dbend(dentry, bindex);
6809+ return;
6810+ }
6811+ au_set_h_dptr(dentry, bindex, NULL);
6812+ }
6813+}
6814+
6815+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
6816+{
6817+ aufs_bindex_t bindex, bend;
6818+
6819+ bend = au_dbend(dentry);
6820+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
6821+ if (au_h_dptr(dentry, bindex) == h_dentry)
6822+ return bindex;
6823+ return -1;
6824+}
1308ab2a 6825diff -uprN -x .git linux-2.6.31/fs/aufs/dir.c aufs2-2.6.git/fs/aufs/dir.c
6826--- linux-2.6.31/fs/aufs/dir.c 1970-01-01 00:00:00.000000000 +0000
6827+++ aufs2-2.6.git/fs/aufs/dir.c 2009-09-21 21:49:23.399892755 +0000
6828@@ -0,0 +1,593 @@
1facf9fc 6829+/*
6830+ * Copyright (C) 2005-2009 Junjiro R. Okajima
6831+ *
6832+ * This program, aufs is free software; you can redistribute it and/or modify
6833+ * it under the terms of the GNU General Public License as published by
6834+ * the Free Software Foundation; either version 2 of the License, or
6835+ * (at your option) any later version.
dece6358
AM
6836+ *
6837+ * This program is distributed in the hope that it will be useful,
6838+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6839+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6840+ * GNU General Public License for more details.
6841+ *
6842+ * You should have received a copy of the GNU General Public License
6843+ * along with this program; if not, write to the Free Software
6844+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 6845+ */
6846+
6847+/*
6848+ * directory operations
6849+ */
6850+
dece6358 6851+#include <linux/file.h>
1facf9fc 6852+#include <linux/fs_stack.h>
6853+#include "aufs.h"
6854+
6855+void au_add_nlink(struct inode *dir, struct inode *h_dir)
6856+{
6857+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
6858+
6859+ dir->i_nlink += h_dir->i_nlink - 2;
6860+ if (h_dir->i_nlink < 2)
6861+ dir->i_nlink += 2;
6862+}
6863+
6864+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
6865+{
6866+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
6867+
6868+ dir->i_nlink -= h_dir->i_nlink - 2;
6869+ if (h_dir->i_nlink < 2)
6870+ dir->i_nlink -= 2;
6871+}
6872+
1308ab2a 6873+loff_t au_dir_size(struct file *file, struct dentry *dentry)
6874+{
6875+ loff_t sz;
6876+ aufs_bindex_t bindex, bend;
6877+ struct file *h_file;
6878+ struct dentry *h_dentry;
6879+
6880+ sz = 0;
6881+ if (file) {
6882+ AuDebugOn(!file->f_dentry);
6883+ AuDebugOn(!file->f_dentry->d_inode);
6884+ AuDebugOn(!S_ISDIR(file->f_dentry->d_inode->i_mode));
6885+
6886+ bend = au_fbend(file);
6887+ for (bindex = au_fbstart(file);
6888+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
6889+ bindex++) {
6890+ h_file = au_h_fptr(file, bindex);
6891+ if (h_file
6892+ && h_file->f_dentry
6893+ && h_file->f_dentry->d_inode)
6894+ sz += i_size_read(h_file->f_dentry->d_inode);
6895+ }
6896+ } else {
6897+ AuDebugOn(!dentry);
6898+ AuDebugOn(!dentry->d_inode);
6899+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
6900+
6901+ bend = au_dbtaildir(dentry);
6902+ for (bindex = au_dbstart(dentry);
6903+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
6904+ bindex++) {
6905+ h_dentry = au_h_dptr(dentry, bindex);
6906+ if (h_dentry && h_dentry->d_inode)
6907+ sz += i_size_read(h_dentry->d_inode);
6908+ }
6909+ }
6910+ if (sz < KMALLOC_MAX_SIZE)
6911+ sz = roundup_pow_of_two(sz);
6912+ if (sz > KMALLOC_MAX_SIZE)
6913+ sz = KMALLOC_MAX_SIZE;
6914+ else if (sz < NAME_MAX) {
6915+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
6916+ sz = AUFS_RDBLK_DEF;
6917+ }
6918+ return sz;
6919+}
6920+
1facf9fc 6921+/* ---------------------------------------------------------------------- */
6922+
6923+static int reopen_dir(struct file *file)
6924+{
6925+ int err;
6926+ unsigned int flags;
6927+ aufs_bindex_t bindex, btail, bstart;
6928+ struct dentry *dentry, *h_dentry;
6929+ struct file *h_file;
6930+
6931+ /* open all lower dirs */
6932+ dentry = file->f_dentry;
6933+ bstart = au_dbstart(dentry);
6934+ for (bindex = au_fbstart(file); bindex < bstart; bindex++)
6935+ au_set_h_fptr(file, bindex, NULL);
6936+ au_set_fbstart(file, bstart);
6937+
6938+ btail = au_dbtaildir(dentry);
6939+ for (bindex = au_fbend(file); btail < bindex; bindex--)
6940+ au_set_h_fptr(file, bindex, NULL);
6941+ au_set_fbend(file, btail);
6942+
1308ab2a 6943+ spin_lock(&file->f_lock);
1facf9fc 6944+ flags = file->f_flags;
1308ab2a 6945+ spin_unlock(&file->f_lock);
1facf9fc 6946+ for (bindex = bstart; bindex <= btail; bindex++) {
6947+ h_dentry = au_h_dptr(dentry, bindex);
6948+ if (!h_dentry)
6949+ continue;
6950+ h_file = au_h_fptr(file, bindex);
6951+ if (h_file)
6952+ continue;
6953+
6954+ h_file = au_h_open(dentry, bindex, flags, file);
6955+ err = PTR_ERR(h_file);
6956+ if (IS_ERR(h_file))
6957+ goto out; /* close all? */
6958+ au_set_h_fptr(file, bindex, h_file);
6959+ }
6960+ au_update_figen(file);
6961+ /* todo: necessary? */
6962+ /* file->f_ra = h_file->f_ra; */
6963+ err = 0;
6964+
6965+ out:
6966+ return err;
6967+}
6968+
6969+static int do_open_dir(struct file *file, int flags)
6970+{
6971+ int err;
6972+ aufs_bindex_t bindex, btail;
6973+ struct dentry *dentry, *h_dentry;
6974+ struct file *h_file;
6975+
1308ab2a 6976+ FiMustWriteLock(file);
6977+
1facf9fc 6978+ err = 0;
6979+ dentry = file->f_dentry;
6980+ au_set_fvdir_cache(file, NULL);
6981+ au_fi(file)->fi_maintain_plink = 0;
6982+ file->f_version = dentry->d_inode->i_version;
6983+ bindex = au_dbstart(dentry);
6984+ au_set_fbstart(file, bindex);
6985+ btail = au_dbtaildir(dentry);
6986+ au_set_fbend(file, btail);
6987+ for (; !err && bindex <= btail; bindex++) {
6988+ h_dentry = au_h_dptr(dentry, bindex);
6989+ if (!h_dentry)
6990+ continue;
6991+
6992+ h_file = au_h_open(dentry, bindex, flags, file);
6993+ if (IS_ERR(h_file)) {
6994+ err = PTR_ERR(h_file);
6995+ break;
6996+ }
6997+ au_set_h_fptr(file, bindex, h_file);
6998+ }
6999+ au_update_figen(file);
7000+ /* todo: necessary? */
7001+ /* file->f_ra = h_file->f_ra; */
7002+ if (!err)
7003+ return 0; /* success */
7004+
7005+ /* close all */
7006+ for (bindex = au_fbstart(file); bindex <= btail; bindex++)
7007+ au_set_h_fptr(file, bindex, NULL);
7008+ au_set_fbstart(file, -1);
7009+ au_set_fbend(file, -1);
7010+ return err;
7011+}
7012+
7013+static int aufs_open_dir(struct inode *inode __maybe_unused,
7014+ struct file *file)
7015+{
7016+ return au_do_open(file, do_open_dir);
7017+}
7018+
7019+static int aufs_release_dir(struct inode *inode __maybe_unused,
7020+ struct file *file)
7021+{
7022+ struct au_vdir *vdir_cache;
7023+ struct super_block *sb;
7024+ struct au_sbinfo *sbinfo;
7025+
7026+ sb = file->f_dentry->d_sb;
7027+ si_noflush_read_lock(sb);
7028+ fi_write_lock(file);
7029+ vdir_cache = au_fvdir_cache(file);
7030+ if (vdir_cache)
7031+ au_vdir_free(vdir_cache);
7032+ if (au_fi(file)->fi_maintain_plink) {
7033+ sbinfo = au_sbi(sb);
dece6358
AM
7034+ /* clear the flag without write-lock */
7035+ sbinfo->au_si_status &= ~AuSi_MAINTAIN_PLINK;
1308ab2a 7036+ smp_mb();
1facf9fc 7037+ wake_up_all(&sbinfo->si_plink_wq);
7038+ }
7039+ fi_write_unlock(file);
7040+ au_finfo_fin(file);
7041+ si_read_unlock(sb);
7042+ return 0;
7043+}
7044+
7045+/* ---------------------------------------------------------------------- */
7046+
7047+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
7048+{
7049+ int err;
7050+ aufs_bindex_t bend, bindex;
7051+ struct inode *inode;
7052+ struct super_block *sb;
7053+
7054+ err = 0;
7055+ sb = dentry->d_sb;
7056+ inode = dentry->d_inode;
7057+ IMustLock(inode);
7058+ bend = au_dbend(dentry);
7059+ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
7060+ struct path h_path;
7061+ struct inode *h_inode;
7062+
7063+ if (au_test_ro(sb, bindex, inode))
7064+ continue;
7065+ h_path.dentry = au_h_dptr(dentry, bindex);
7066+ if (!h_path.dentry)
7067+ continue;
7068+ h_inode = h_path.dentry->d_inode;
7069+ if (!h_inode)
7070+ continue;
7071+
7072+ /* no mnt_want_write() */
7073+ /* cf. fs/nsfd/vfs.c and fs/nfsd/nfs4recover.c */
7074+ /* todo: inotiry fired? */
7075+ h_path.mnt = au_sbr_mnt(sb, bindex);
7076+ mutex_lock(&h_inode->i_mutex);
7077+ err = filemap_fdatawrite(h_inode->i_mapping);
7078+ AuDebugOn(!h_inode->i_fop);
7079+ if (!err && h_inode->i_fop->fsync)
7080+ err = h_inode->i_fop->fsync(NULL, h_path.dentry,
7081+ datasync);
7082+ if (!err)
7083+ err = filemap_fdatawrite(h_inode->i_mapping);
7084+ if (!err)
7085+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
7086+ mutex_unlock(&h_inode->i_mutex);
7087+ }
7088+
7089+ return err;
7090+}
7091+
7092+static int au_do_fsync_dir(struct file *file, int datasync)
7093+{
7094+ int err;
7095+ aufs_bindex_t bend, bindex;
7096+ struct file *h_file;
7097+ struct super_block *sb;
7098+ struct inode *inode;
7099+ struct mutex *h_mtx;
7100+
7101+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
7102+ if (unlikely(err))
7103+ goto out;
7104+
7105+ sb = file->f_dentry->d_sb;
7106+ inode = file->f_dentry->d_inode;
7107+ bend = au_fbend(file);
7108+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
7109+ h_file = au_h_fptr(file, bindex);
7110+ if (!h_file || au_test_ro(sb, bindex, inode))
7111+ continue;
7112+
7113+ err = vfs_fsync(h_file, h_file->f_dentry, datasync);
7114+ if (!err) {
7115+ h_mtx = &h_file->f_dentry->d_inode->i_mutex;
7116+ mutex_lock(h_mtx);
7117+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
7118+ /*ignore*/
7119+ mutex_unlock(h_mtx);
7120+ }
7121+ }
7122+
7123+ out:
7124+ return err;
7125+}
7126+
7127+/*
7128+ * @file may be NULL
7129+ */
7130+static int aufs_fsync_dir(struct file *file, struct dentry *dentry,
7131+ int datasync)
7132+{
7133+ int err;
7134+ struct super_block *sb;
7135+
7136+ IMustLock(dentry->d_inode);
7137+
7138+ err = 0;
7139+ sb = dentry->d_sb;
7140+ si_noflush_read_lock(sb);
7141+ if (file)
7142+ err = au_do_fsync_dir(file, datasync);
7143+ else {
7144+ di_write_lock_child(dentry);
7145+ err = au_do_fsync_dir_no_file(dentry, datasync);
7146+ }
7147+ au_cpup_attr_timesizes(dentry->d_inode);
7148+ di_write_unlock(dentry);
7149+ if (file)
7150+ fi_write_unlock(file);
7151+
7152+ si_read_unlock(sb);
7153+ return err;
7154+}
7155+
7156+/* ---------------------------------------------------------------------- */
7157+
7158+static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir)
7159+{
7160+ int err;
7161+ struct dentry *dentry;
7162+ struct inode *inode;
7163+ struct super_block *sb;
7164+
7165+ dentry = file->f_dentry;
7166+ inode = dentry->d_inode;
7167+ IMustLock(inode);
7168+
7169+ sb = dentry->d_sb;
7170+ si_read_lock(sb, AuLock_FLUSH);
7171+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
7172+ if (unlikely(err))
7173+ goto out;
7174+ err = au_vdir_init(file);
7175+ di_downgrade_lock(dentry, AuLock_IR);
7176+ if (unlikely(err))
7177+ goto out_unlock;
7178+
7179+ if (!au_test_nfsd(current)) {
7180+ err = au_vdir_fill_de(file, dirent, filldir);
7181+ fsstack_copy_attr_atime(inode,
7182+ au_h_iptr(inode, au_ibstart(inode)));
7183+ } else {
7184+ /*
7185+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
7186+ * encode_fh() and others.
7187+ */
7188+ struct inode *h_inode = au_h_iptr(inode, au_ibstart(inode));
7189+
7190+ di_read_unlock(dentry, AuLock_IR);
7191+ si_read_unlock(sb);
7192+ lockdep_off();
7193+ err = au_vdir_fill_de(file, dirent, filldir);
7194+ lockdep_on();
7195+ fsstack_copy_attr_atime(inode, h_inode);
7196+ fi_write_unlock(file);
7197+
7198+ AuTraceErr(err);
7199+ return err;
7200+ }
7201+
7202+ out_unlock:
7203+ di_read_unlock(dentry, AuLock_IR);
7204+ fi_write_unlock(file);
7205+ out:
7206+ si_read_unlock(sb);
7207+ return err;
7208+}
7209+
7210+/* ---------------------------------------------------------------------- */
7211+
7212+#define AuTestEmpty_WHONLY 1
dece6358
AM
7213+#define AuTestEmpty_CALLED (1 << 1)
7214+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 7215+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7216+#define au_fset_testempty(flags, name) { (flags) |= AuTestEmpty_##name; }
7217+#define au_fclr_testempty(flags, name) { (flags) &= ~AuTestEmpty_##name; }
7218+
dece6358
AM
7219+#ifndef CONFIG_AUFS_SHWH
7220+#undef AuTestEmpty_SHWH
7221+#define AuTestEmpty_SHWH 0
7222+#endif
7223+
1facf9fc 7224+struct test_empty_arg {
1308ab2a 7225+ struct au_nhash *whlist;
1facf9fc 7226+ unsigned int flags;
7227+ int err;
7228+ aufs_bindex_t bindex;
7229+};
7230+
7231+static int test_empty_cb(void *__arg, const char *__name, int namelen,
dece6358
AM
7232+ loff_t offset __maybe_unused, u64 ino,
7233+ unsigned int d_type)
1facf9fc 7234+{
7235+ struct test_empty_arg *arg = __arg;
7236+ char *name = (void *)__name;
7237+
7238+ arg->err = 0;
7239+ au_fset_testempty(arg->flags, CALLED);
7240+ /* smp_mb(); */
7241+ if (name[0] == '.'
7242+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
7243+ goto out; /* success */
7244+
7245+ if (namelen <= AUFS_WH_PFX_LEN
7246+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
7247+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 7248+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 7249+ arg->err = -ENOTEMPTY;
7250+ goto out;
7251+ }
7252+
7253+ name += AUFS_WH_PFX_LEN;
7254+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 7255+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 7256+ arg->err = au_nhash_append_wh
1308ab2a 7257+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 7258+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 7259+
7260+ out:
7261+ /* smp_mb(); */
7262+ AuTraceErr(arg->err);
7263+ return arg->err;
7264+}
7265+
7266+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
7267+{
7268+ int err;
7269+ struct file *h_file;
7270+
7271+ h_file = au_h_open(dentry, arg->bindex,
7272+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
7273+ /*file*/NULL);
7274+ err = PTR_ERR(h_file);
7275+ if (IS_ERR(h_file))
7276+ goto out;
7277+
7278+ err = 0;
7279+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
7280+ && !h_file->f_dentry->d_inode->i_nlink)
7281+ goto out_put;
7282+
7283+ do {
7284+ arg->err = 0;
7285+ au_fclr_testempty(arg->flags, CALLED);
7286+ /* smp_mb(); */
7287+ err = vfsub_readdir(h_file, test_empty_cb, arg);
7288+ if (err >= 0)
7289+ err = arg->err;
7290+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
7291+
7292+ out_put:
7293+ fput(h_file);
7294+ au_sbr_put(dentry->d_sb, arg->bindex);
7295+ out:
7296+ return err;
7297+}
7298+
7299+struct do_test_empty_args {
7300+ int *errp;
7301+ struct dentry *dentry;
7302+ struct test_empty_arg *arg;
7303+};
7304+
7305+static void call_do_test_empty(void *args)
7306+{
7307+ struct do_test_empty_args *a = args;
7308+ *a->errp = do_test_empty(a->dentry, a->arg);
7309+}
7310+
7311+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
7312+{
7313+ int err, wkq_err;
7314+ struct dentry *h_dentry;
7315+ struct inode *h_inode;
7316+
7317+ h_dentry = au_h_dptr(dentry, arg->bindex);
7318+ h_inode = h_dentry->d_inode;
7319+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
7320+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
7321+ mutex_unlock(&h_inode->i_mutex);
7322+ if (!err)
7323+ err = do_test_empty(dentry, arg);
7324+ else {
7325+ struct do_test_empty_args args = {
7326+ .errp = &err,
7327+ .dentry = dentry,
7328+ .arg = arg
7329+ };
7330+ unsigned int flags = arg->flags;
7331+
7332+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
7333+ if (unlikely(wkq_err))
7334+ err = wkq_err;
7335+ arg->flags = flags;
7336+ }
7337+
7338+ return err;
7339+}
7340+
7341+int au_test_empty_lower(struct dentry *dentry)
7342+{
7343+ int err;
1308ab2a 7344+ unsigned int rdhash;
1facf9fc 7345+ aufs_bindex_t bindex, bstart, btail;
1308ab2a 7346+ struct au_nhash whlist;
1facf9fc 7347+ struct test_empty_arg arg;
1facf9fc 7348+
dece6358
AM
7349+ SiMustAnyLock(dentry->d_sb);
7350+
1308ab2a 7351+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
7352+ if (!rdhash)
7353+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
7354+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 7355+ if (unlikely(err))
1facf9fc 7356+ goto out;
7357+
1facf9fc 7358+ arg.flags = 0;
1308ab2a 7359+ arg.whlist = &whlist;
7360+ bstart = au_dbstart(dentry);
dece6358
AM
7361+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
7362+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 7363+ arg.bindex = bstart;
7364+ err = do_test_empty(dentry, &arg);
7365+ if (unlikely(err))
7366+ goto out_whlist;
7367+
7368+ au_fset_testempty(arg.flags, WHONLY);
7369+ btail = au_dbtaildir(dentry);
7370+ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
7371+ struct dentry *h_dentry;
7372+
7373+ h_dentry = au_h_dptr(dentry, bindex);
7374+ if (h_dentry && h_dentry->d_inode) {
7375+ arg.bindex = bindex;
7376+ err = do_test_empty(dentry, &arg);
7377+ }
7378+ }
7379+
7380+ out_whlist:
1308ab2a 7381+ au_nhash_wh_free(&whlist);
1facf9fc 7382+ out:
7383+ return err;
7384+}
7385+
7386+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
7387+{
7388+ int err;
7389+ struct test_empty_arg arg;
7390+ aufs_bindex_t bindex, btail;
7391+
7392+ err = 0;
1308ab2a 7393+ arg.whlist = whlist;
1facf9fc 7394+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
7395+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
7396+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 7397+ btail = au_dbtaildir(dentry);
7398+ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
7399+ struct dentry *h_dentry;
7400+
7401+ h_dentry = au_h_dptr(dentry, bindex);
7402+ if (h_dentry && h_dentry->d_inode) {
7403+ arg.bindex = bindex;
7404+ err = sio_test_empty(dentry, &arg);
7405+ }
7406+ }
7407+
7408+ return err;
7409+}
7410+
7411+/* ---------------------------------------------------------------------- */
7412+
7413+const struct file_operations aufs_dir_fop = {
7414+ .read = generic_read_dir,
7415+ .readdir = aufs_readdir,
7416+ .unlocked_ioctl = aufs_ioctl_dir,
7417+ .open = aufs_open_dir,
7418+ .release = aufs_release_dir,
7419+ .flush = aufs_flush,
7420+ .fsync = aufs_fsync_dir
7421+};
1308ab2a 7422diff -uprN -x .git linux-2.6.31/fs/aufs/dir.h aufs2-2.6.git/fs/aufs/dir.h
7423--- linux-2.6.31/fs/aufs/dir.h 1970-01-01 00:00:00.000000000 +0000
7424+++ aufs2-2.6.git/fs/aufs/dir.h 2009-09-21 21:49:23.399892755 +0000
7425@@ -0,0 +1,127 @@
1facf9fc 7426+/*
7427+ * Copyright (C) 2005-2009 Junjiro R. Okajima
7428+ *
7429+ * This program, aufs is free software; you can redistribute it and/or modify
7430+ * it under the terms of the GNU General Public License as published by
7431+ * the Free Software Foundation; either version 2 of the License, or
7432+ * (at your option) any later version.
dece6358
AM
7433+ *
7434+ * This program is distributed in the hope that it will be useful,
7435+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7436+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7437+ * GNU General Public License for more details.
7438+ *
7439+ * You should have received a copy of the GNU General Public License
7440+ * along with this program; if not, write to the Free Software
7441+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7442+ */
7443+
7444+/*
7445+ * directory operations
7446+ */
7447+
7448+#ifndef __AUFS_DIR_H__
7449+#define __AUFS_DIR_H__
7450+
7451+#ifdef __KERNEL__
7452+
7453+#include <linux/fs.h>
7454+#include <linux/aufs_type.h>
7455+
7456+/* ---------------------------------------------------------------------- */
7457+
7458+/* need to be faster and smaller */
7459+
7460+struct au_nhash {
dece6358
AM
7461+ unsigned int nh_num;
7462+ struct hlist_head *nh_head;
1facf9fc 7463+};
7464+
7465+struct au_vdir_destr {
7466+ unsigned char len;
7467+ unsigned char name[0];
7468+} __packed;
7469+
7470+struct au_vdir_dehstr {
7471+ struct hlist_node hash;
7472+ struct au_vdir_destr *str;
7473+};
7474+
7475+struct au_vdir_de {
7476+ ino_t de_ino;
7477+ unsigned char de_type;
7478+ /* caution: packed */
7479+ struct au_vdir_destr de_str;
7480+} __packed;
7481+
7482+struct au_vdir_wh {
7483+ struct hlist_node wh_hash;
dece6358
AM
7484+#ifdef CONFIG_AUFS_SHWH
7485+ ino_t wh_ino;
1facf9fc 7486+ aufs_bindex_t wh_bindex;
dece6358
AM
7487+ unsigned char wh_type;
7488+#else
7489+ aufs_bindex_t wh_bindex;
7490+#endif
7491+ /* caution: packed */
1facf9fc 7492+ struct au_vdir_destr wh_str;
7493+} __packed;
7494+
7495+union au_vdir_deblk_p {
7496+ unsigned char *deblk;
7497+ struct au_vdir_de *de;
7498+};
7499+
7500+struct au_vdir {
7501+ unsigned char **vd_deblk;
7502+ unsigned long vd_nblk;
1facf9fc 7503+ struct {
7504+ unsigned long ul;
7505+ union au_vdir_deblk_p p;
7506+ } vd_last;
7507+
7508+ unsigned long vd_version;
dece6358 7509+ unsigned int vd_deblk_sz;
1facf9fc 7510+ unsigned long vd_jiffy;
7511+};
7512+
7513+/* ---------------------------------------------------------------------- */
7514+
7515+/* dir.c */
7516+extern const struct file_operations aufs_dir_fop;
7517+void au_add_nlink(struct inode *dir, struct inode *h_dir);
7518+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 7519+loff_t au_dir_size(struct file *file, struct dentry *dentry);
1facf9fc 7520+int au_test_empty_lower(struct dentry *dentry);
7521+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
7522+
7523+/* vdir.c */
1308ab2a 7524+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
7525+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
7526+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 7527+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
7528+ int limit);
dece6358
AM
7529+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
7530+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
7531+ unsigned int d_type, aufs_bindex_t bindex,
7532+ unsigned char shwh);
1facf9fc 7533+void au_vdir_free(struct au_vdir *vdir);
7534+int au_vdir_init(struct file *file);
7535+int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir);
7536+
7537+/* ioctl.c */
7538+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
7539+
1308ab2a 7540+#ifdef CONFIG_AUFS_RDU
7541+/* rdu.c */
7542+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
7543+#else
7544+static inline long au_rdu_ioctl(struct file *file, unsigned int cmd,
7545+ unsigned long arg)
7546+{
7547+ return -EINVAL;
7548+}
7549+#endif
7550+
1facf9fc 7551+#endif /* __KERNEL__ */
7552+#endif /* __AUFS_DIR_H__ */
1308ab2a 7553diff -uprN -x .git linux-2.6.31/fs/aufs/export.c aufs2-2.6.git/fs/aufs/export.c
7554--- linux-2.6.31/fs/aufs/export.c 1970-01-01 00:00:00.000000000 +0000
7555+++ aufs2-2.6.git/fs/aufs/export.c 2009-09-21 21:49:23.399892755 +0000
7556@@ -0,0 +1,746 @@
1facf9fc 7557+/*
dece6358 7558+ * Copyright (C) 2005-2009 Junjiro R. Okajima
1facf9fc 7559+ *
7560+ * This program, aufs is free software; you can redistribute it and/or modify
7561+ * it under the terms of the GNU General Public License as published by
7562+ * the Free Software Foundation; either version 2 of the License, or
7563+ * (at your option) any later version.
dece6358
AM
7564+ *
7565+ * This program is distributed in the hope that it will be useful,
7566+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7567+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7568+ * GNU General Public License for more details.
7569+ *
7570+ * You should have received a copy of the GNU General Public License
7571+ * along with this program; if not, write to the Free Software
7572+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 7573+ */
7574+
7575+/*
7576+ * export via nfs
7577+ */
7578+
7579+#include <linux/exportfs.h>
dece6358 7580+#include <linux/file.h>
1facf9fc 7581+#include <linux/mnt_namespace.h>
dece6358 7582+#include <linux/namei.h>
1308ab2a 7583+#include <linux/nsproxy.h>
1facf9fc 7584+#include <linux/random.h>
7585+#include "aufs.h"
7586+
7587+union conv {
7588+#ifdef CONFIG_AUFS_INO_T_64
7589+ __u32 a[2];
7590+#else
7591+ __u32 a[1];
7592+#endif
7593+ ino_t ino;
7594+};
7595+
7596+static ino_t decode_ino(__u32 *a)
7597+{
7598+ union conv u;
7599+
7600+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
7601+ u.a[0] = a[0];
7602+#ifdef CONFIG_AUFS_INO_T_64
7603+ u.a[1] = a[1];
7604+#endif
7605+ return u.ino;
7606+}
7607+
7608+static void encode_ino(__u32 *a, ino_t ino)
7609+{
7610+ union conv u;
7611+
7612+ u.ino = ino;
7613+ a[0] = u.a[0];
7614+#ifdef CONFIG_AUFS_INO_T_64
7615+ a[1] = u.a[1];
7616+#endif
7617+}
7618+
7619+/* NFS file handle */
7620+enum {
7621+ Fh_br_id,
7622+ Fh_sigen,
7623+#ifdef CONFIG_AUFS_INO_T_64
7624+ /* support 64bit inode number */
7625+ Fh_ino1,
7626+ Fh_ino2,
7627+ Fh_dir_ino1,
7628+ Fh_dir_ino2,
7629+#else
7630+ Fh_ino1,
7631+ Fh_dir_ino1,
7632+#endif
7633+ Fh_igen,
7634+ Fh_h_type,
7635+ Fh_tail,
7636+
7637+ Fh_ino = Fh_ino1,
7638+ Fh_dir_ino = Fh_dir_ino1
7639+};
7640+
7641+static int au_test_anon(struct dentry *dentry)
7642+{
7643+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
7644+}
7645+
7646+/* ---------------------------------------------------------------------- */
7647+/* inode generation external table */
7648+
7649+int au_xigen_inc(struct inode *inode)
7650+{
7651+ int err;
7652+ loff_t pos;
7653+ ssize_t sz;
7654+ __u32 igen;
7655+ struct super_block *sb;
7656+ struct au_sbinfo *sbinfo;
7657+
7658+ err = 0;
7659+ sb = inode->i_sb;
dece6358
AM
7660+ sbinfo = au_sbi(sb);
7661+ /*
7662+ * temporary workaround for escaping from SiMustAnyLock() in
7663+ * au_mntflags(), since this function is called from au_iinfo_fin().
7664+ */
7665+ if (unlikely(!au_opt_test(sbinfo->si_mntflags, XINO)))
1facf9fc 7666+ goto out;
7667+
7668+ pos = inode->i_ino;
7669+ pos *= sizeof(igen);
7670+ igen = inode->i_generation + 1;
1facf9fc 7671+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
7672+ sizeof(igen), &pos);
7673+ if (sz == sizeof(igen))
7674+ goto out; /* success */
7675+
7676+ err = sz;
7677+ if (unlikely(sz >= 0)) {
7678+ err = -EIO;
7679+ AuIOErr("xigen error (%zd)\n", sz);
7680+ }
7681+
7682+ out:
7683+ return err;
7684+}
7685+
7686+int au_xigen_new(struct inode *inode)
7687+{
7688+ int err;
7689+ loff_t pos;
7690+ ssize_t sz;
7691+ struct super_block *sb;
7692+ struct au_sbinfo *sbinfo;
7693+ struct file *file;
7694+
7695+ err = 0;
7696+ /* todo: dirty, at mount time */
7697+ if (inode->i_ino == AUFS_ROOT_INO)
7698+ goto out;
7699+ sb = inode->i_sb;
dece6358 7700+ SiMustAnyLock(sb);
1facf9fc 7701+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
7702+ goto out;
7703+
7704+ err = -EFBIG;
7705+ pos = inode->i_ino;
7706+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
7707+ AuIOErr1("too large i%lld\n", pos);
7708+ goto out;
7709+ }
7710+ pos *= sizeof(inode->i_generation);
7711+
7712+ err = 0;
7713+ sbinfo = au_sbi(sb);
7714+ file = sbinfo->si_xigen;
7715+ BUG_ON(!file);
7716+
7717+ if (i_size_read(file->f_dentry->d_inode)
7718+ < pos + sizeof(inode->i_generation)) {
7719+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
7720+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
7721+ sizeof(inode->i_generation), &pos);
7722+ } else
7723+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
7724+ sizeof(inode->i_generation), &pos);
7725+ if (sz == sizeof(inode->i_generation))
7726+ goto out; /* success */
7727+
7728+ err = sz;
7729+ if (unlikely(sz >= 0)) {
7730+ err = -EIO;
7731+ AuIOErr("xigen error (%zd)\n", sz);
7732+ }
7733+
7734+ out:
7735+ return err;
7736+}
7737+
7738+int au_xigen_set(struct super_block *sb, struct file *base)
7739+{
7740+ int err;
7741+ struct au_sbinfo *sbinfo;
7742+ struct file *file;
7743+
dece6358
AM
7744+ SiMustWriteLock(sb);
7745+
1facf9fc 7746+ sbinfo = au_sbi(sb);
7747+ file = au_xino_create2(base, sbinfo->si_xigen);
7748+ err = PTR_ERR(file);
7749+ if (IS_ERR(file))
7750+ goto out;
7751+ err = 0;
7752+ if (sbinfo->si_xigen)
7753+ fput(sbinfo->si_xigen);
7754+ sbinfo->si_xigen = file;
7755+
7756+ out:
7757+ return err;
7758+}
7759+
7760+void au_xigen_clr(struct super_block *sb)
7761+{
7762+ struct au_sbinfo *sbinfo;
7763+
dece6358
AM
7764+ SiMustWriteLock(sb);
7765+
1facf9fc 7766+ sbinfo = au_sbi(sb);
7767+ if (sbinfo->si_xigen) {
7768+ fput(sbinfo->si_xigen);
7769+ sbinfo->si_xigen = NULL;
7770+ }
7771+}
7772+
7773+/* ---------------------------------------------------------------------- */
7774+
7775+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
7776+ ino_t dir_ino)
7777+{
7778+ struct dentry *dentry, *d;
7779+ struct inode *inode;
7780+ unsigned int sigen;
7781+
7782+ dentry = NULL;
7783+ inode = ilookup(sb, ino);
7784+ if (!inode)
7785+ goto out;
7786+
7787+ dentry = ERR_PTR(-ESTALE);
7788+ sigen = au_sigen(sb);
7789+ if (unlikely(is_bad_inode(inode)
7790+ || IS_DEADDIR(inode)
7791+ || sigen != au_iigen(inode)))
7792+ goto out_iput;
7793+
7794+ dentry = NULL;
7795+ if (!dir_ino || S_ISDIR(inode->i_mode))
7796+ dentry = d_find_alias(inode);
7797+ else {
7798+ spin_lock(&dcache_lock);
7799+ list_for_each_entry(d, &inode->i_dentry, d_alias)
7800+ if (!au_test_anon(d)
7801+ && d->d_parent->d_inode->i_ino == dir_ino) {
7802+ dentry = dget_locked(d);
7803+ break;
7804+ }
7805+ spin_unlock(&dcache_lock);
7806+ }
7807+ if (unlikely(dentry && sigen != au_digen(dentry))) {
7808+ dput(dentry);
7809+ dentry = ERR_PTR(-ESTALE);
7810+ }
7811+
7812+ out_iput:
7813+ iput(inode);
7814+ out:
7815+ return dentry;
7816+}
7817+
7818+/* ---------------------------------------------------------------------- */
7819+
7820+/* todo: dirty? */
7821+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
7822+static struct vfsmount *au_mnt_get(struct super_block *sb)
7823+{
7824+ struct mnt_namespace *ns;
7825+ struct vfsmount *pos, *mnt;
7826+
7827+ spin_lock(&vfsmount_lock);
7828+ /* no get/put ?? */
7829+ AuDebugOn(!current->nsproxy);
7830+ ns = current->nsproxy->mnt_ns;
7831+ AuDebugOn(!ns);
7832+ mnt = NULL;
7833+ /* the order (reverse) will not be a problem */
7834+ list_for_each_entry(pos, &ns->list, mnt_list)
7835+ if (pos->mnt_sb == sb) {
7836+ mnt = mntget(pos);
7837+ break;
7838+ }
7839+ spin_unlock(&vfsmount_lock);
7840+ AuDebugOn(!mnt);
7841+
7842+ return mnt;
7843+}
7844+
7845+struct au_nfsd_si_lock {
7846+ const unsigned int sigen;
7847+ const aufs_bindex_t br_id;
7848+ unsigned char force_lock;
7849+};
7850+
7851+static aufs_bindex_t si_nfsd_read_lock(struct super_block *sb,
7852+ struct au_nfsd_si_lock *nsi_lock)
7853+{
7854+ aufs_bindex_t bindex;
7855+
7856+ si_read_lock(sb, AuLock_FLUSH);
7857+
7858+ /* branch id may be wrapped around */
7859+ bindex = au_br_index(sb, nsi_lock->br_id);
7860+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
7861+ goto out; /* success */
7862+
7863+ if (!nsi_lock->force_lock)
7864+ si_read_unlock(sb);
7865+ bindex = -1;
7866+
7867+ out:
7868+ return bindex;
7869+}
7870+
7871+struct find_name_by_ino {
7872+ int called, found;
7873+ ino_t ino;
7874+ char *name;
7875+ int namelen;
7876+};
7877+
7878+static int
7879+find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset,
7880+ u64 ino, unsigned int d_type)
7881+{
7882+ struct find_name_by_ino *a = arg;
7883+
7884+ a->called++;
7885+ if (a->ino != ino)
7886+ return 0;
7887+
7888+ memcpy(a->name, name, namelen);
7889+ a->namelen = namelen;
7890+ a->found = 1;
7891+ return 1;
7892+}
7893+
7894+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
7895+ struct au_nfsd_si_lock *nsi_lock)
7896+{
7897+ struct dentry *dentry, *parent;
7898+ struct file *file;
7899+ struct inode *dir;
7900+ struct find_name_by_ino arg;
7901+ int err;
7902+
7903+ parent = path->dentry;
7904+ if (nsi_lock)
7905+ si_read_unlock(parent->d_sb);
7906+ path_get(path);
1308ab2a 7907+ file = vfsub_dentry_open(path, au_dir_roflags, current_cred());
1facf9fc 7908+ dentry = (void *)file;
7909+ if (IS_ERR(file))
7910+ goto out;
7911+
7912+ dentry = ERR_PTR(-ENOMEM);
7913+ arg.name = __getname();
7914+ if (unlikely(!arg.name))
7915+ goto out_file;
7916+ arg.ino = ino;
7917+ arg.found = 0;
7918+ do {
7919+ arg.called = 0;
7920+ /* smp_mb(); */
7921+ err = vfsub_readdir(file, find_name_by_ino, &arg);
7922+ } while (!err && !arg.found && arg.called);
7923+ dentry = ERR_PTR(err);
7924+ if (unlikely(err))
7925+ goto out_name;
7926+ dentry = ERR_PTR(-ENOENT);
7927+ if (!arg.found)
7928+ goto out_name;
7929+
7930+ /* do not call au_lkup_one() */
7931+ dir = parent->d_inode;
7932+ mutex_lock(&dir->i_mutex);
7933+ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
7934+ mutex_unlock(&dir->i_mutex);
7935+ AuTraceErrPtr(dentry);
7936+ if (IS_ERR(dentry))
7937+ goto out_name;
7938+ AuDebugOn(au_test_anon(dentry));
7939+ if (unlikely(!dentry->d_inode)) {
7940+ dput(dentry);
7941+ dentry = ERR_PTR(-ENOENT);
7942+ }
7943+
7944+ out_name:
7945+ __putname(arg.name);
7946+ out_file:
7947+ fput(file);
7948+ out:
7949+ if (unlikely(nsi_lock
7950+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
7951+ if (!IS_ERR(dentry)) {
7952+ dput(dentry);
7953+ dentry = ERR_PTR(-ESTALE);
7954+ }
7955+ AuTraceErrPtr(dentry);
7956+ return dentry;
7957+}
7958+
7959+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
7960+ ino_t dir_ino,
7961+ struct au_nfsd_si_lock *nsi_lock)
7962+{
7963+ struct dentry *dentry;
7964+ struct path path;
7965+
7966+ if (dir_ino != AUFS_ROOT_INO) {
7967+ path.dentry = decode_by_ino(sb, dir_ino, 0);
7968+ dentry = path.dentry;
7969+ if (!path.dentry || IS_ERR(path.dentry))
7970+ goto out;
7971+ AuDebugOn(au_test_anon(path.dentry));
7972+ } else
7973+ path.dentry = dget(sb->s_root);
7974+
7975+ path.mnt = au_mnt_get(sb);
7976+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
7977+ path_put(&path);
7978+
7979+ out:
7980+ AuTraceErrPtr(dentry);
7981+ return dentry;
7982+}
7983+
7984+/* ---------------------------------------------------------------------- */
7985+
7986+static int h_acceptable(void *expv, struct dentry *dentry)
7987+{
7988+ return 1;
7989+}
7990+
7991+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
7992+ char *buf, int len, struct super_block *sb)
7993+{
7994+ char *p;
7995+ int n;
7996+ struct path path;
7997+
7998+ p = d_path(h_rootpath, buf, len);
7999+ if (IS_ERR(p))
8000+ goto out;
8001+ n = strlen(p);
8002+
8003+ path.mnt = h_rootpath->mnt;
8004+ path.dentry = h_parent;
8005+ p = d_path(&path, buf, len);
8006+ if (IS_ERR(p))
8007+ goto out;
8008+ if (n != 1)
8009+ p += n;
8010+
8011+ path.mnt = au_mnt_get(sb);
8012+ path.dentry = sb->s_root;
8013+ p = d_path(&path, buf, len - strlen(p));
8014+ mntput(path.mnt);
8015+ if (IS_ERR(p))
8016+ goto out;
8017+ if (n != 1)
8018+ p[strlen(p)] = '/';
8019+
8020+ out:
8021+ AuTraceErrPtr(p);
8022+ return p;
8023+}
8024+
8025+static
8026+struct dentry *decode_by_path(struct super_block *sb, aufs_bindex_t bindex,
8027+ ino_t ino, __u32 *fh, int fh_len,
8028+ struct au_nfsd_si_lock *nsi_lock)
8029+{
8030+ struct dentry *dentry, *h_parent, *root;
8031+ struct super_block *h_sb;
8032+ char *pathname, *p;
8033+ struct vfsmount *h_mnt;
8034+ struct au_branch *br;
8035+ int err;
8036+ struct path path;
8037+
8038+ br = au_sbr(sb, bindex);
8039+ /* au_br_get(br); */
8040+ h_mnt = br->br_mnt;
8041+ h_sb = h_mnt->mnt_sb;
8042+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
8043+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
8044+ fh_len - Fh_tail, fh[Fh_h_type],
8045+ h_acceptable, /*context*/NULL);
8046+ dentry = h_parent;
8047+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
8048+ AuWarn1("%s decode_fh failed, %ld\n",
8049+ au_sbtype(h_sb), PTR_ERR(h_parent));
8050+ goto out;
8051+ }
8052+ dentry = NULL;
8053+ if (unlikely(au_test_anon(h_parent))) {
8054+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
8055+ au_sbtype(h_sb));
8056+ goto out_h_parent;
8057+ }
8058+
8059+ dentry = ERR_PTR(-ENOMEM);
8060+ pathname = (void *)__get_free_page(GFP_NOFS);
8061+ if (unlikely(!pathname))
8062+ goto out_h_parent;
8063+
8064+ root = sb->s_root;
8065+ path.mnt = h_mnt;
8066+ di_read_lock_parent(root, !AuLock_IR);
8067+ path.dentry = au_h_dptr(root, bindex);
8068+ di_read_unlock(root, !AuLock_IR);
8069+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
8070+ dentry = (void *)p;
8071+ if (IS_ERR(p))
8072+ goto out_pathname;
8073+
8074+ si_read_unlock(sb);
8075+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
8076+ dentry = ERR_PTR(err);
8077+ if (unlikely(err))
8078+ goto out_relock;
8079+
8080+ dentry = ERR_PTR(-ENOENT);
8081+ AuDebugOn(au_test_anon(path.dentry));
8082+ if (unlikely(!path.dentry->d_inode))
8083+ goto out_path;
8084+
8085+ if (ino != path.dentry->d_inode->i_ino)
8086+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
8087+ else
8088+ dentry = dget(path.dentry);
8089+
8090+ out_path:
8091+ path_put(&path);
8092+ out_relock:
8093+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
8094+ if (!IS_ERR(dentry)) {
8095+ dput(dentry);
8096+ dentry = ERR_PTR(-ESTALE);
8097+ }
8098+ out_pathname:
8099+ free_page((unsigned long)pathname);
8100+ out_h_parent:
8101+ dput(h_parent);
8102+ out:
8103+ /* au_br_put(br); */
8104+ AuTraceErrPtr(dentry);
8105+ return dentry;
8106+}
8107+
8108+/* ---------------------------------------------------------------------- */
8109+
8110+static struct dentry *
8111+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
8112+ int fh_type)
8113+{
8114+ struct dentry *dentry;
8115+ __u32 *fh = fid->raw;
8116+ ino_t ino, dir_ino;
8117+ aufs_bindex_t bindex;
8118+ struct au_nfsd_si_lock nsi_lock = {
8119+ .sigen = fh[Fh_sigen],
8120+ .br_id = fh[Fh_br_id],
8121+ .force_lock = 0
8122+ };
8123+
8124+ AuDebugOn(fh_len < Fh_tail);
8125+
8126+ dentry = ERR_PTR(-ESTALE);
8127+ /* branch id may be wrapped around */
8128+ bindex = si_nfsd_read_lock(sb, &nsi_lock);
8129+ if (unlikely(bindex < 0))
8130+ goto out;
8131+ nsi_lock.force_lock = 1;
8132+
8133+ /* is this inode still cached? */
8134+ ino = decode_ino(fh + Fh_ino);
8135+ AuDebugOn(ino == AUFS_ROOT_INO);
8136+ dir_ino = decode_ino(fh + Fh_dir_ino);
8137+ dentry = decode_by_ino(sb, ino, dir_ino);
8138+ if (IS_ERR(dentry))
8139+ goto out_unlock;
8140+ if (dentry)
8141+ goto accept;
8142+
8143+ /* is the parent dir cached? */
8144+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
8145+ if (IS_ERR(dentry))
8146+ goto out_unlock;
8147+ if (dentry)
8148+ goto accept;
8149+
8150+ /* lookup path */
8151+ dentry = decode_by_path(sb, bindex, ino, fh, fh_len, &nsi_lock);
8152+ if (IS_ERR(dentry))
8153+ goto out_unlock;
8154+ if (unlikely(!dentry))
8155+ /* todo?: make it ESTALE */
8156+ goto out_unlock;
8157+
8158+ accept:
8159+ if (dentry->d_inode->i_generation == fh[Fh_igen])
8160+ goto out_unlock; /* success */
8161+
8162+ dput(dentry);
8163+ dentry = ERR_PTR(-ESTALE);
8164+ out_unlock:
8165+ si_read_unlock(sb);
8166+ out:
8167+ AuTraceErrPtr(dentry);
8168+ return dentry;
8169+}
8170+
8171+#if 0 /* reserved for future use */
8172+/* support subtreecheck option */
8173+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
8174+ int fh_len, int fh_type)
8175+{
8176+ struct dentry *parent;
8177+ __u32 *fh = fid->raw;
8178+ ino_t dir_ino;
8179+
8180+ dir_ino = decode_ino(fh + Fh_dir_ino);
8181+ parent = decode_by_ino(sb, dir_ino, 0);
8182+ if (IS_ERR(parent))
8183+ goto out;
8184+ if (!parent)
8185+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
8186+ dir_ino, fh, fh_len);
8187+
8188+ out:
8189+ AuTraceErrPtr(parent);
8190+ return parent;
8191+}
8192+#endif
8193+
8194+/* ---------------------------------------------------------------------- */
8195+
8196+static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
8197+ int connectable)
8198+{
8199+ int err;
8200+ aufs_bindex_t bindex, bend;
8201+ struct super_block *sb, *h_sb;
8202+ struct inode *inode;
8203+ struct dentry *parent, *h_parent;
8204+ struct au_branch *br;
8205+
8206+ AuDebugOn(au_test_anon(dentry));
8207+
8208+ parent = NULL;
8209+ err = -ENOSPC;
8210+ if (unlikely(*max_len <= Fh_tail)) {
8211+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
8212+ goto out;
8213+ }
8214+
8215+ err = FILEID_ROOT;
8216+ if (IS_ROOT(dentry)) {
8217+ AuDebugOn(dentry->d_inode->i_ino != AUFS_ROOT_INO);
8218+ goto out;
8219+ }
8220+
8221+ err = -EIO;
8222+ h_parent = NULL;
8223+ sb = dentry->d_sb;
8224+ aufs_read_lock(dentry, AuLock_FLUSH | AuLock_IR);
8225+ parent = dget_parent(dentry);
8226+ di_read_lock_parent(parent, !AuLock_IR);
8227+ inode = dentry->d_inode;
8228+ AuDebugOn(!inode);
8229+#ifdef CONFIG_AUFS_DEBUG
8230+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
8231+ AuWarn1("NFS-exporting requires xino\n");
8232+#endif
8233+
8234+ bend = au_dbtaildir(parent);
8235+ for (bindex = au_dbstart(parent); bindex <= bend; bindex++) {
8236+ h_parent = au_h_dptr(parent, bindex);
8237+ if (h_parent) {
8238+ dget(h_parent);
8239+ break;
8240+ }
8241+ }
8242+ if (unlikely(!h_parent))
8243+ goto out_unlock;
8244+
8245+ err = -EPERM;
8246+ br = au_sbr(sb, bindex);
8247+ h_sb = br->br_mnt->mnt_sb;
8248+ if (unlikely(!h_sb->s_export_op)) {
8249+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
8250+ goto out_dput;
8251+ }
8252+
8253+ fh[Fh_br_id] = br->br_id;
8254+ fh[Fh_sigen] = au_sigen(sb);
8255+ encode_ino(fh + Fh_ino, inode->i_ino);
8256+ encode_ino(fh + Fh_dir_ino, parent->d_inode->i_ino);
8257+ fh[Fh_igen] = inode->i_generation;
8258+
8259+ *max_len -= Fh_tail;
8260+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
8261+ max_len,
8262+ /*connectable or subtreecheck*/0);
8263+ err = fh[Fh_h_type];
8264+ *max_len += Fh_tail;
8265+ /* todo: macros? */
8266+ if (err != 255)
8267+ err = 99;
8268+ else
8269+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
8270+
8271+ out_dput:
8272+ dput(h_parent);
8273+ out_unlock:
8274+ di_read_unlock(parent, !AuLock_IR);
8275+ dput(parent);
8276+ aufs_read_unlock(dentry, AuLock_IR);
8277+ out:
8278+ if (unlikely(err < 0))
8279+ err = 255;
8280+ return err;
8281+}
8282+
8283+/* ---------------------------------------------------------------------- */
8284+
8285+static struct export_operations aufs_export_op = {
8286+ .fh_to_dentry = aufs_fh_to_dentry,
8287+ /* .fh_to_parent = aufs_fh_to_parent, */
8288+ .encode_fh = aufs_encode_fh
8289+};
8290+
8291+void au_export_init(struct super_block *sb)
8292+{
8293+ struct au_sbinfo *sbinfo;
8294+ __u32 u;
8295+
8296+ sb->s_export_op = &aufs_export_op;
8297+ sbinfo = au_sbi(sb);
8298+ sbinfo->si_xigen = NULL;
8299+ get_random_bytes(&u, sizeof(u));
8300+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
8301+ atomic_set(&sbinfo->si_xigen_next, u);
8302+}
1308ab2a 8303diff -uprN -x .git linux-2.6.31/fs/aufs/f_op.c aufs2-2.6.git/fs/aufs/f_op.c
8304--- linux-2.6.31/fs/aufs/f_op.c 1970-01-01 00:00:00.000000000 +0000
8305+++ aufs2-2.6.git/fs/aufs/f_op.c 2009-09-21 21:49:23.401607657 +0000
8306@@ -0,0 +1,826 @@
1facf9fc 8307+/*
8308+ * Copyright (C) 2005-2009 Junjiro R. Okajima
8309+ *
8310+ * This program, aufs is free software; you can redistribute it and/or modify
8311+ * it under the terms of the GNU General Public License as published by
8312+ * the Free Software Foundation; either version 2 of the License, or
8313+ * (at your option) any later version.
dece6358
AM
8314+ *
8315+ * This program is distributed in the hope that it will be useful,
8316+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8317+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8318+ * GNU General Public License for more details.
8319+ *
8320+ * You should have received a copy of the GNU General Public License
8321+ * along with this program; if not, write to the Free Software
8322+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 8323+ */
8324+
8325+/*
1308ab2a 8326+ * file and vm operations
1facf9fc 8327+ */
8328+
dece6358 8329+#include <linux/file.h>
1308ab2a 8330+#include <linux/fs_stack.h>
8331+#include <linux/ima.h>
8332+#include <linux/mman.h>
8333+#include <linux/mm.h>
8334+#include <linux/security.h>
1facf9fc 8335+#include "aufs.h"
8336+
1308ab2a 8337+/* common function to regular file and dir */
8338+int aufs_flush(struct file *file, fl_owner_t id)
1facf9fc 8339+{
1308ab2a 8340+ int err;
8341+ aufs_bindex_t bindex, bend;
8342+ struct dentry *dentry;
8343+ struct file *h_file;
1facf9fc 8344+
1308ab2a 8345+ dentry = file->f_dentry;
8346+ si_noflush_read_lock(dentry->d_sb);
8347+ fi_read_lock(file);
8348+ di_read_lock_child(dentry, AuLock_IW);
1facf9fc 8349+
1308ab2a 8350+ err = 0;
8351+ bend = au_fbend(file);
8352+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
8353+ h_file = au_h_fptr(file, bindex);
8354+ if (!h_file || !h_file->f_op || !h_file->f_op->flush)
8355+ continue;
1facf9fc 8356+
1308ab2a 8357+ err = h_file->f_op->flush(h_file, id);
8358+ if (!err)
8359+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
8360+ /*ignore*/
dece6358 8361+ }
1308ab2a 8362+ au_cpup_attr_timesizes(dentry->d_inode);
dece6358 8363+
1308ab2a 8364+ di_read_unlock(dentry, AuLock_IW);
8365+ fi_read_unlock(file);
8366+ si_read_unlock(dentry->d_sb);
8367+ return err;
1facf9fc 8368+}
8369+
1308ab2a 8370+/* ---------------------------------------------------------------------- */
8371+
8372+static int do_open_nondir(struct file *file, int flags)
1facf9fc 8373+{
dece6358 8374+ int err;
1308ab2a 8375+ aufs_bindex_t bindex;
8376+ struct file *h_file;
1facf9fc 8377+ struct dentry *dentry;
1308ab2a 8378+ struct au_finfo *finfo;
8379+
8380+ FiMustWriteLock(file);
1facf9fc 8381+
1308ab2a 8382+ err = 0;
1facf9fc 8383+ dentry = file->f_dentry;
1308ab2a 8384+ finfo = au_fi(file);
8385+ finfo->fi_h_vm_ops = NULL;
8386+ finfo->fi_vm_ops = NULL;
8387+ bindex = au_dbstart(dentry);
8388+ /* O_TRUNC is processed already */
8389+ BUG_ON(au_test_ro(dentry->d_sb, bindex, dentry->d_inode)
8390+ && (flags & O_TRUNC));
1facf9fc 8391+
1308ab2a 8392+ h_file = au_h_open(dentry, bindex, flags, file);
8393+ if (IS_ERR(h_file))
8394+ err = PTR_ERR(h_file);
8395+ else {
8396+ au_set_fbstart(file, bindex);
8397+ au_set_fbend(file, bindex);
8398+ au_set_h_fptr(file, bindex, h_file);
8399+ au_update_figen(file);
8400+ /* todo: necessary? */
8401+ /* file->f_ra = h_file->f_ra; */
8402+ }
8403+ return err;
8404+}
dece6358 8405+
1308ab2a 8406+static int aufs_open_nondir(struct inode *inode __maybe_unused,
8407+ struct file *file)
8408+{
8409+ return au_do_open(file, do_open_nondir);
8410+}
8411+
8412+static int aufs_release_nondir(struct inode *inode __maybe_unused,
8413+ struct file *file)
8414+{
8415+ struct super_block *sb = file->f_dentry->d_sb;
8416+
8417+ si_noflush_read_lock(sb);
8418+ kfree(au_fi(file)->fi_vm_ops);
8419+ au_finfo_fin(file);
1facf9fc 8420+ si_read_unlock(sb);
1308ab2a 8421+ return 0;
1facf9fc 8422+}
8423+
1308ab2a 8424+/* ---------------------------------------------------------------------- */
8425+
8426+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
8427+ loff_t *ppos)
1facf9fc 8428+{
1308ab2a 8429+ ssize_t err;
1facf9fc 8430+ struct dentry *dentry;
1308ab2a 8431+ struct file *h_file;
8432+ struct super_block *sb;
1facf9fc 8433+
8434+ dentry = file->f_dentry;
1308ab2a 8435+ sb = dentry->d_sb;
8436+ si_read_lock(sb, AuLock_FLUSH);
8437+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
8438+ if (unlikely(err))
8439+ goto out;
dece6358 8440+
1308ab2a 8441+ h_file = au_h_fptr(file, au_fbstart(file));
8442+ err = vfsub_read_u(h_file, buf, count, ppos);
dece6358 8443+ /* todo: necessary? */
1facf9fc 8444+ /* file->f_ra = h_file->f_ra; */
1308ab2a 8445+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
1facf9fc 8446+
1308ab2a 8447+ di_read_unlock(dentry, AuLock_IR);
8448+ fi_read_unlock(file);
1facf9fc 8449+ out:
1308ab2a 8450+ si_read_unlock(sb);
1facf9fc 8451+ return err;
8452+}
8453+
1308ab2a 8454+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
8455+ size_t count, loff_t *ppos)
1facf9fc 8456+{
1308ab2a 8457+ ssize_t err;
dece6358 8458+ aufs_bindex_t bstart;
1308ab2a 8459+ struct au_pin pin;
8460+ struct dentry *dentry;
dece6358 8461+ struct inode *inode;
dece6358 8462+ struct super_block *sb;
1308ab2a 8463+ struct file *h_file;
8464+ char __user *buf = (char __user *)ubuf;
1facf9fc 8465+
dece6358 8466+ dentry = file->f_dentry;
1308ab2a 8467+ sb = dentry->d_sb;
dece6358 8468+ inode = dentry->d_inode;
1308ab2a 8469+ mutex_lock(&inode->i_mutex);
8470+ si_read_lock(sb, AuLock_FLUSH);
1facf9fc 8471+
1308ab2a 8472+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8473+ if (unlikely(err))
8474+ goto out;
8475+
8476+ err = au_ready_to_write(file, -1, &pin);
8477+ di_downgrade_lock(dentry, AuLock_IR);
8478+ if (unlikely(err))
8479+ goto out_unlock;
1facf9fc 8480+
1308ab2a 8481+ bstart = au_fbstart(file);
8482+ h_file = au_h_fptr(file, bstart);
8483+ au_unpin(&pin);
8484+ err = vfsub_write_u(h_file, buf, count, ppos);
8485+ au_cpup_attr_timesizes(inode);
8486+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
8487+
8488+ out_unlock:
8489+ di_read_unlock(dentry, AuLock_IR);
8490+ fi_write_unlock(file);
8491+ out:
8492+ si_read_unlock(sb);
8493+ mutex_unlock(&inode->i_mutex);
dece6358 8494+ return err;
1facf9fc 8495+}
8496+
1308ab2a 8497+static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
8498+ unsigned long nv, loff_t pos)
1facf9fc 8499+{
1308ab2a 8500+ ssize_t err;
8501+ struct file *file, *h_file;
8502+ struct dentry *dentry;
1facf9fc 8503+ struct super_block *sb;
1facf9fc 8504+
1308ab2a 8505+ file = kio->ki_filp;
1facf9fc 8506+ dentry = file->f_dentry;
1facf9fc 8507+ sb = dentry->d_sb;
1308ab2a 8508+ si_read_lock(sb, AuLock_FLUSH);
8509+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
8510+ if (unlikely(err))
1facf9fc 8511+ goto out;
1facf9fc 8512+
1308ab2a 8513+ err = -ENOSYS;
8514+ h_file = au_h_fptr(file, au_fbstart(file));
8515+ if (h_file->f_op && h_file->f_op->aio_read) {
8516+ err = security_file_permission(h_file, MAY_READ);
1facf9fc 8517+ if (unlikely(err))
1308ab2a 8518+ goto out_unlock;
8519+ if (!is_sync_kiocb(kio)) {
8520+ get_file(h_file);
8521+ fput(file);
8522+ }
8523+ kio->ki_filp = h_file;
8524+ err = h_file->f_op->aio_read(kio, iov, nv, pos);
8525+ /* todo: necessary? */
8526+ /* file->f_ra = h_file->f_ra; */
8527+ fsstack_copy_attr_atime(dentry->d_inode,
8528+ h_file->f_dentry->d_inode);
8529+ } else
8530+ /* currently there is no such fs */
8531+ WARN_ON_ONCE(h_file->f_op && h_file->f_op->read);
1facf9fc 8532+
8533+ out_unlock:
1308ab2a 8534+ di_read_unlock(dentry, AuLock_IR);
8535+ fi_read_unlock(file);
1facf9fc 8536+ out:
1308ab2a 8537+ si_read_unlock(sb);
1facf9fc 8538+ return err;
8539+}
8540+
1308ab2a 8541+static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
8542+ unsigned long nv, loff_t pos)
1facf9fc 8543+{
1308ab2a 8544+ ssize_t err;
dece6358
AM
8545+ aufs_bindex_t bstart;
8546+ struct au_pin pin;
1308ab2a 8547+ struct dentry *dentry;
dece6358 8548+ struct inode *inode;
1facf9fc 8549+ struct super_block *sb;
1308ab2a 8550+ struct file *file, *h_file;
1facf9fc 8551+
1308ab2a 8552+ file = kio->ki_filp;
1facf9fc 8553+ dentry = file->f_dentry;
8554+ sb = dentry->d_sb;
dece6358 8555+ inode = dentry->d_inode;
1308ab2a 8556+ mutex_lock(&inode->i_mutex);
8557+ si_read_lock(sb, AuLock_FLUSH);
8558+
8559+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8560+ if (unlikely(err))
1facf9fc 8561+ goto out;
8562+
1308ab2a 8563+ err = au_ready_to_write(file, -1, &pin);
8564+ di_downgrade_lock(dentry, AuLock_IR);
8565+ if (unlikely(err))
8566+ goto out_unlock;
1facf9fc 8567+
1308ab2a 8568+ err = -ENOSYS;
8569+ bstart = au_fbstart(file);
8570+ h_file = au_h_fptr(file, bstart);
8571+ au_unpin(&pin);
8572+ if (h_file->f_op && h_file->f_op->aio_write) {
8573+ err = security_file_permission(h_file, MAY_WRITE);
dece6358
AM
8574+ if (unlikely(err))
8575+ goto out_unlock;
1308ab2a 8576+ if (!is_sync_kiocb(kio)) {
8577+ get_file(h_file);
8578+ fput(file);
8579+ }
8580+ kio->ki_filp = h_file;
8581+ err = h_file->f_op->aio_write(kio, iov, nv, pos);
8582+ au_cpup_attr_timesizes(inode);
8583+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
8584+ } else
8585+ /* currently there is no such fs */
8586+ WARN_ON_ONCE(h_file->f_op && h_file->f_op->write);
1facf9fc 8587+
dece6358 8588+ out_unlock:
1308ab2a 8589+ di_read_unlock(dentry, AuLock_IR);
8590+ fi_write_unlock(file);
1facf9fc 8591+ out:
1308ab2a 8592+ si_read_unlock(sb);
8593+ mutex_unlock(&inode->i_mutex);
dece6358 8594+ return err;
1facf9fc 8595+}
8596+
1308ab2a 8597+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
8598+ struct pipe_inode_info *pipe, size_t len,
8599+ unsigned int flags)
1facf9fc 8600+{
1308ab2a 8601+ ssize_t err;
8602+ struct file *h_file;
8603+ struct dentry *dentry;
1facf9fc 8604+ struct super_block *sb;
8605+
1308ab2a 8606+ dentry = file->f_dentry;
8607+ sb = dentry->d_sb;
8608+ si_read_lock(sb, AuLock_FLUSH);
8609+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
8610+ if (unlikely(err))
8611+ goto out;
dece6358 8612+
1308ab2a 8613+ err = -EINVAL;
8614+ h_file = au_h_fptr(file, au_fbstart(file));
8615+ if (au_test_loopback_kthread()) {
8616+ file->f_mapping = h_file->f_mapping;
8617+ smp_mb(); /* unnecessary? */
1facf9fc 8618+ }
1308ab2a 8619+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
8620+ /* todo: necessasry? */
8621+ /* file->f_ra = h_file->f_ra; */
8622+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
1facf9fc 8623+
1308ab2a 8624+ di_read_unlock(dentry, AuLock_IR);
8625+ fi_read_unlock(file);
1facf9fc 8626+
1308ab2a 8627+ out:
8628+ si_read_unlock(sb);
8629+ return err;
dece6358 8630+}
1facf9fc 8631+
1308ab2a 8632+static ssize_t
8633+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
8634+ size_t len, unsigned int flags)
dece6358 8635+{
1308ab2a 8636+ ssize_t err;
8637+ struct au_pin pin;
dece6358 8638+ struct dentry *dentry;
1308ab2a 8639+ struct inode *inode;
8640+ struct super_block *sb;
8641+ struct file *h_file;
1facf9fc 8642+
dece6358 8643+ dentry = file->f_dentry;
1308ab2a 8644+ inode = dentry->d_inode;
8645+ mutex_lock(&inode->i_mutex);
8646+ sb = dentry->d_sb;
8647+ si_read_lock(sb, AuLock_FLUSH);
1facf9fc 8648+
1308ab2a 8649+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8650+ if (unlikely(err))
8651+ goto out;
8652+
8653+ err = au_ready_to_write(file, -1, &pin);
8654+ di_downgrade_lock(dentry, AuLock_IR);
8655+ if (unlikely(err))
8656+ goto out_unlock;
8657+
8658+ h_file = au_h_fptr(file, au_fbstart(file));
8659+ au_unpin(&pin);
8660+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
8661+ au_cpup_attr_timesizes(inode);
8662+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
dece6358 8663+
1308ab2a 8664+ out_unlock:
8665+ di_read_unlock(dentry, AuLock_IR);
8666+ fi_write_unlock(file);
1facf9fc 8667+ out:
1308ab2a 8668+ si_read_unlock(sb);
8669+ mutex_unlock(&inode->i_mutex);
1facf9fc 8670+ return err;
8671+}
8672+
1308ab2a 8673+/* ---------------------------------------------------------------------- */
8674+
8675+static struct file *au_safe_file(struct vm_area_struct *vma)
8676+{
8677+ struct file *file;
8678+
8679+ file = vma->vm_file;
8680+ if (file->private_data && au_test_aufs(file->f_dentry->d_sb))
8681+ return file;
8682+ return NULL;
8683+}
8684+
8685+static void au_reset_file(struct vm_area_struct *vma, struct file *file)
8686+{
8687+ vma->vm_file = file;
8688+ /* smp_mb(); */ /* flush vm_file */
8689+}
8690+
8691+static int aufs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1facf9fc 8692+{
8693+ int err;
1308ab2a 8694+ static DECLARE_WAIT_QUEUE_HEAD(wq);
8695+ struct file *file, *h_file;
8696+ struct au_finfo *finfo;
1facf9fc 8697+
1308ab2a 8698+ /* todo: non-robr mode, user vm_file as it is? */
8699+ wait_event(wq, (file = au_safe_file(vma)));
1facf9fc 8700+
1308ab2a 8701+ /* do not revalidate, no si lock */
8702+ finfo = au_fi(file);
8703+ h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
8704+ AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
1facf9fc 8705+
1308ab2a 8706+ mutex_lock(&finfo->fi_vm_mtx);
8707+ vma->vm_file = h_file;
8708+ err = finfo->fi_h_vm_ops->fault(vma, vmf);
8709+ /* todo: necessary? */
8710+ /* file->f_ra = h_file->f_ra; */
8711+ au_reset_file(vma, file);
8712+ mutex_unlock(&finfo->fi_vm_mtx);
8713+#if 0 /* def CONFIG_SMP */
8714+ /* wake_up_nr(&wq, online_cpu - 1); */
8715+ wake_up_all(&wq);
8716+#else
8717+ wake_up(&wq);
8718+#endif
1facf9fc 8719+
1facf9fc 8720+ return err;
8721+}
8722+
1308ab2a 8723+static int aufs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
dece6358 8724+{
1308ab2a 8725+ int err;
8726+ static DECLARE_WAIT_QUEUE_HEAD(wq);
8727+ struct file *file, *h_file;
8728+ struct au_finfo *finfo;
1facf9fc 8729+
1308ab2a 8730+ wait_event(wq, (file = au_safe_file(vma)));
dece6358 8731+
1308ab2a 8732+ finfo = au_fi(file);
8733+ h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
8734+ AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
dece6358 8735+
1308ab2a 8736+ mutex_lock(&finfo->fi_vm_mtx);
8737+ vma->vm_file = h_file;
8738+ err = finfo->fi_h_vm_ops->page_mkwrite(vma, vmf);
8739+ au_reset_file(vma, file);
8740+ mutex_unlock(&finfo->fi_vm_mtx);
8741+ wake_up(&wq);
1facf9fc 8742+
1308ab2a 8743+ return err;
8744+}
1facf9fc 8745+
1308ab2a 8746+static void aufs_vm_close(struct vm_area_struct *vma)
8747+{
8748+ static DECLARE_WAIT_QUEUE_HEAD(wq);
8749+ struct file *file, *h_file;
8750+ struct au_finfo *finfo;
1facf9fc 8751+
1308ab2a 8752+ wait_event(wq, (file = au_safe_file(vma)));
1facf9fc 8753+
1308ab2a 8754+ finfo = au_fi(file);
8755+ h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
8756+ AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
1facf9fc 8757+
1308ab2a 8758+ mutex_lock(&finfo->fi_vm_mtx);
8759+ vma->vm_file = h_file;
8760+ finfo->fi_h_vm_ops->close(vma);
8761+ au_reset_file(vma, file);
8762+ mutex_unlock(&finfo->fi_vm_mtx);
8763+ wake_up(&wq);
8764+}
8765+
8766+static struct vm_operations_struct aufs_vm_ops = {
8767+ /* .close and .page_mkwrite are not set by default */
8768+ .fault = aufs_fault,
dece6358 8769+};
1facf9fc 8770+
1308ab2a 8771+/* ---------------------------------------------------------------------- */
1facf9fc 8772+
1308ab2a 8773+static unsigned long au_prot_conv(unsigned long flags)
8774+{
8775+ unsigned long prot;
1facf9fc 8776+
1308ab2a 8777+ prot = 0;
8778+ if (flags & VM_READ)
8779+ prot |= PROT_READ;
8780+ if (flags & VM_WRITE)
8781+ prot |= PROT_WRITE;
8782+ if (flags & VM_EXEC)
8783+ prot |= PROT_EXEC;
8784+ return prot;
8785+}
1facf9fc 8786+
1308ab2a 8787+static struct vm_operations_struct *au_vm_ops(struct file *h_file,
8788+ struct vm_area_struct *vma)
8789+{
8790+ struct vm_operations_struct *vm_ops;
8791+ int err;
1facf9fc 8792+
1308ab2a 8793+ vm_ops = ERR_PTR(-ENODEV);
8794+ if (!h_file->f_op || !h_file->f_op->mmap)
8795+ goto out;
1facf9fc 8796+
1308ab2a 8797+ err = ima_file_mmap(h_file, au_prot_conv(vma->vm_flags));
8798+ vm_ops = ERR_PTR(err);
8799+ if (err)
8800+ goto out;
1facf9fc 8801+
1308ab2a 8802+ err = h_file->f_op->mmap(h_file, vma);
8803+ vm_ops = ERR_PTR(err);
8804+ if (unlikely(err))
8805+ goto out;
1facf9fc 8806+
1308ab2a 8807+ vm_ops = vma->vm_ops;
8808+ err = do_munmap(current->mm, vma->vm_start,
8809+ vma->vm_end - vma->vm_start);
8810+ if (unlikely(err)) {
8811+ AuIOErr("failed internal unmapping %.*s, %d\n",
8812+ AuDLNPair(h_file->f_dentry), err);
8813+ vm_ops = ERR_PTR(-EIO);
8814+ }
1facf9fc 8815+
1308ab2a 8816+ out:
8817+ return vm_ops;
8818+}
1facf9fc 8819+
1308ab2a 8820+static int au_custom_vm_ops(struct au_finfo *finfo, struct vm_area_struct *vma)
dece6358 8821+{
1308ab2a 8822+ int err;
8823+ struct vm_operations_struct *h_ops;
1facf9fc 8824+
1308ab2a 8825+ AuRwMustAnyLock(&finfo->fi_rwsem);
1facf9fc 8826+
1308ab2a 8827+ err = 0;
8828+ h_ops = finfo->fi_h_vm_ops;
8829+ AuDebugOn(!h_ops);
8830+ if ((!h_ops->page_mkwrite && !h_ops->close)
8831+ || finfo->fi_vm_ops)
8832+ goto out;
dece6358 8833+
1308ab2a 8834+ err = -ENOMEM;
8835+ finfo->fi_vm_ops = kmemdup(&aufs_vm_ops, sizeof(aufs_vm_ops), GFP_NOFS);
8836+ if (unlikely(!finfo->fi_vm_ops))
8837+ goto out;
1facf9fc 8838+
1308ab2a 8839+ err = 0;
8840+ if (h_ops->page_mkwrite)
8841+ finfo->fi_vm_ops->page_mkwrite = aufs_page_mkwrite;
8842+ if (h_ops->close)
8843+ finfo->fi_vm_ops->close = aufs_vm_close;
1facf9fc 8844+
1308ab2a 8845+ vma->vm_ops = finfo->fi_vm_ops;
1facf9fc 8846+
1308ab2a 8847+ out:
8848+ return err;
dece6358 8849+}
1facf9fc 8850+
1308ab2a 8851+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
dece6358 8852+{
1308ab2a 8853+ int err;
8854+ unsigned char wlock, mmapped;
8855+ struct dentry *dentry;
8856+ struct super_block *sb;
8857+ struct file *h_file;
8858+ struct vm_operations_struct *vm_ops;
1facf9fc 8859+
1308ab2a 8860+ dentry = file->f_dentry;
8861+ wlock = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
8862+ sb = dentry->d_sb;
8863+ si_read_lock(sb, AuLock_FLUSH);
8864+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8865+ if (unlikely(err))
8866+ goto out;
1facf9fc 8867+
1308ab2a 8868+ mmapped = !!au_test_mmapped(file);
8869+ if (wlock) {
8870+ struct au_pin pin;
1facf9fc 8871+
1308ab2a 8872+ err = au_ready_to_write(file, -1, &pin);
8873+ di_downgrade_lock(dentry, AuLock_IR);
8874+ if (unlikely(err))
8875+ goto out_unlock;
8876+ au_unpin(&pin);
8877+ } else
8878+ di_downgrade_lock(dentry, AuLock_IR);
1facf9fc 8879+
1308ab2a 8880+ h_file = au_h_fptr(file, au_fbstart(file));
8881+ if (!mmapped && au_test_fs_bad_mapping(h_file->f_dentry->d_sb)) {
8882+ /*
8883+ * by this assignment, f_mapping will differs from aufs inode
8884+ * i_mapping.
8885+ * if someone else mixes the use of f_dentry->d_inode and
8886+ * f_mapping->host, then a problem may arise.
8887+ */
8888+ file->f_mapping = h_file->f_mapping;
8889+ }
1facf9fc 8890+
1308ab2a 8891+ vm_ops = NULL;
8892+ if (!mmapped) {
8893+ vm_ops = au_vm_ops(h_file, vma);
8894+ err = PTR_ERR(vm_ops);
8895+ if (IS_ERR(vm_ops))
8896+ goto out_unlock;
8897+ }
1facf9fc 8898+
1308ab2a 8899+ /*
8900+ * unnecessary to handle MAP_DENYWRITE and deny_write_access()?
8901+ * currently MAP_DENYWRITE from userspace is ignored, but elf loader
8902+ * sets it. when FMODE_EXEC is set (by open_exec() or sys_uselib()),
8903+ * both of the aufs file and the lower file is deny_write_access()-ed.
8904+ * finally I hope we can skip handlling MAP_DENYWRITE here.
8905+ */
8906+ err = generic_file_mmap(file, vma);
8907+ if (unlikely(err))
8908+ goto out_unlock;
1facf9fc 8909+
1308ab2a 8910+ vma->vm_ops = &aufs_vm_ops;
8911+ if (!mmapped) {
8912+ struct au_finfo *finfo = au_fi(file);
1facf9fc 8913+
1308ab2a 8914+ finfo->fi_h_vm_ops = vm_ops;
8915+ mutex_init(&finfo->fi_vm_mtx);
8916+ }
1facf9fc 8917+
1308ab2a 8918+ err = au_custom_vm_ops(au_fi(file), vma);
8919+ if (unlikely(err))
8920+ goto out_unlock;
1facf9fc 8921+
1308ab2a 8922+ vfsub_file_accessed(h_file);
8923+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
8924+
8925+ out_unlock:
8926+ di_read_unlock(dentry, AuLock_IR);
8927+ fi_write_unlock(file);
8928+ out:
8929+ si_read_unlock(sb);
8930+ return err;
dece6358 8931+}
1facf9fc 8932+
1308ab2a 8933+/* ---------------------------------------------------------------------- */
8934+
8935+static int aufs_fsync_nondir(struct file *file, struct dentry *dentry,
8936+ int datasync)
dece6358 8937+{
1308ab2a 8938+ int err;
8939+ struct au_pin pin;
8940+ struct inode *inode;
8941+ struct file *h_file;
8942+ struct super_block *sb;
dece6358 8943+
1308ab2a 8944+ inode = dentry->d_inode;
8945+ IMustLock(file->f_mapping->host);
8946+ if (inode != file->f_mapping->host) {
8947+ mutex_unlock(&file->f_mapping->host->i_mutex);
8948+ mutex_lock(&inode->i_mutex);
1facf9fc 8949+ }
1308ab2a 8950+ IMustLock(inode);
1facf9fc 8951+
1308ab2a 8952+ sb = dentry->d_sb;
8953+ si_read_lock(sb, AuLock_FLUSH);
1facf9fc 8954+
1308ab2a 8955+ err = 0; /* -EBADF; */ /* posix? */
8956+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
8957+ goto out;
8958+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8959+ if (unlikely(err))
8960+ goto out;
1facf9fc 8961+
1308ab2a 8962+ err = au_ready_to_write(file, -1, &pin);
8963+ di_downgrade_lock(dentry, AuLock_IR);
8964+ if (unlikely(err))
8965+ goto out_unlock;
8966+ au_unpin(&pin);
8967+
8968+ err = -EINVAL;
8969+ h_file = au_h_fptr(file, au_fbstart(file));
8970+ if (h_file->f_op && h_file->f_op->fsync) {
8971+ struct dentry *h_d;
8972+ struct mutex *h_mtx;
dece6358 8973+
dece6358 8974+ /*
1308ab2a 8975+ * no filemap_fdatawrite() since aufs file has no its own
8976+ * mapping, but dir.
dece6358 8977+ */
1308ab2a 8978+ h_d = h_file->f_dentry;
8979+ h_mtx = &h_d->d_inode->i_mutex;
8980+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
8981+ err = h_file->f_op->fsync(h_file, h_d, datasync);
8982+ if (!err)
8983+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
8984+ /*ignore*/
8985+ au_cpup_attr_timesizes(inode);
8986+ mutex_unlock(h_mtx);
8987+ }
1facf9fc 8988+
1308ab2a 8989+ out_unlock:
8990+ di_read_unlock(dentry, AuLock_IR);
dece6358 8991+ fi_write_unlock(file);
1308ab2a 8992+ out:
8993+ si_read_unlock(sb);
8994+ if (inode != file->f_mapping->host) {
8995+ mutex_unlock(&inode->i_mutex);
8996+ mutex_lock(&file->f_mapping->host->i_mutex);
8997+ }
8998+ return err;
dece6358
AM
8999+}
9000+
1308ab2a 9001+/* no one supports this operation, currently */
9002+#if 0
9003+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
dece6358 9004+{
1308ab2a 9005+ int err;
9006+ struct au_pin pin;
dece6358 9007+ struct dentry *dentry;
1308ab2a 9008+ struct inode *inode;
9009+ struct file *file, *h_file;
9010+ struct super_block *sb;
dece6358 9011+
1308ab2a 9012+ file = kio->ki_filp;
1facf9fc 9013+ dentry = file->f_dentry;
1308ab2a 9014+ inode = dentry->d_inode;
9015+ mutex_lock(&inode->i_mutex);
9016+
9017+ sb = dentry->d_sb;
9018+ si_read_lock(sb, AuLock_FLUSH);
9019+
9020+ err = 0; /* -EBADF; */ /* posix? */
9021+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
9022+ goto out;
9023+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
9024+ if (unlikely(err))
1facf9fc 9025+ goto out;
9026+
1308ab2a 9027+ err = au_ready_to_write(file, -1, &pin);
9028+ di_downgrade_lock(dentry, AuLock_IR);
9029+ if (unlikely(err))
9030+ goto out_unlock;
9031+ au_unpin(&pin);
1facf9fc 9032+
1308ab2a 9033+ err = -ENOSYS;
9034+ h_file = au_h_fptr(file, au_fbstart(file));
9035+ if (h_file->f_op && h_file->f_op->aio_fsync) {
9036+ struct dentry *h_d;
9037+ struct mutex *h_mtx;
1facf9fc 9038+
1308ab2a 9039+ h_d = h_file->f_dentry;
9040+ h_mtx = &h_d->d_inode->i_mutex;
9041+ if (!is_sync_kiocb(kio)) {
9042+ get_file(h_file);
9043+ fput(file);
9044+ }
9045+ kio->ki_filp = h_file;
9046+ err = h_file->f_op->aio_fsync(kio, datasync);
9047+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
9048+ if (!err)
9049+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
9050+ /*ignore*/
9051+ au_cpup_attr_timesizes(inode);
9052+ mutex_unlock(h_mtx);
9053+ }
1facf9fc 9054+
1308ab2a 9055+ out_unlock:
9056+ di_read_unlock(dentry, AuLock_IR);
9057+ fi_write_unlock(file);
1facf9fc 9058+ out:
1308ab2a 9059+ si_read_unlock(sb);
9060+ mutex_unlock(&inode->i_mutex);
9061+ return err;
1facf9fc 9062+}
1308ab2a 9063+#endif
1facf9fc 9064+
1308ab2a 9065+static int aufs_fasync(int fd, struct file *file, int flag)
1facf9fc 9066+{
1308ab2a 9067+ int err;
9068+ struct file *h_file;
9069+ struct dentry *dentry;
9070+ struct super_block *sb;
1facf9fc 9071+
1308ab2a 9072+ dentry = file->f_dentry;
9073+ sb = dentry->d_sb;
9074+ si_read_lock(sb, AuLock_FLUSH);
9075+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
9076+ if (unlikely(err))
9077+ goto out;
9078+
9079+ h_file = au_h_fptr(file, au_fbstart(file));
9080+ if (h_file->f_op && h_file->f_op->fasync)
9081+ err = h_file->f_op->fasync(fd, h_file, flag);
9082+
9083+ di_read_unlock(dentry, AuLock_IR);
9084+ fi_read_unlock(file);
1facf9fc 9085+
1308ab2a 9086+ out:
9087+ si_read_unlock(sb);
dece6358 9088+ return err;
1facf9fc 9089+}
1308ab2a 9090+
9091+/* ---------------------------------------------------------------------- */
9092+
9093+/* no one supports this operation, currently */
9094+#if 0
9095+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
9096+ size_t len, loff_t *pos , int more)
9097+{
9098+}
9099+#endif
9100+
9101+/* ---------------------------------------------------------------------- */
9102+
9103+const struct file_operations aufs_file_fop = {
9104+ /*
9105+ * while generic_file_llseek/_unlocked() don't use BKL,
9106+ * don't use it since it operates file->f_mapping->host.
9107+ * in aufs, it may be a real file and may confuse users by UDBA.
9108+ */
9109+ /* .llseek = generic_file_llseek, */
9110+
9111+ .read = aufs_read,
9112+ .write = aufs_write,
9113+ .aio_read = aufs_aio_read,
9114+ .aio_write = aufs_aio_write,
9115+#ifdef CONFIG_AUFS_POLL
9116+ .poll = aufs_poll,
9117+#endif
9118+ .mmap = aufs_mmap,
9119+ .open = aufs_open_nondir,
9120+ .flush = aufs_flush,
9121+ .release = aufs_release_nondir,
9122+ .fsync = aufs_fsync_nondir,
9123+ /* .aio_fsync = aufs_aio_fsync_nondir, */
9124+ .fasync = aufs_fasync,
9125+ /* .sendpage = aufs_sendpage, */
9126+ .splice_write = aufs_splice_write,
9127+ .splice_read = aufs_splice_read,
9128+#if 0
9129+ .aio_splice_write = aufs_aio_splice_write,
9130+ .aio_splice_read = aufs_aio_splice_read
9131+#endif
9132+};
9133diff -uprN -x .git linux-2.6.31/fs/aufs/file.c aufs2-2.6.git/fs/aufs/file.c
9134--- linux-2.6.31/fs/aufs/file.c 1970-01-01 00:00:00.000000000 +0000
9135+++ aufs2-2.6.git/fs/aufs/file.c 2009-09-21 21:49:23.401607657 +0000
9136@@ -0,0 +1,568 @@
dece6358
AM
9137+/*
9138+ * Copyright (C) 2005-2009 Junjiro R. Okajima
9139+ *
9140+ * This program, aufs is free software; you can redistribute it and/or modify
9141+ * it under the terms of the GNU General Public License as published by
9142+ * the Free Software Foundation; either version 2 of the License, or
9143+ * (at your option) any later version.
9144+ *
9145+ * This program is distributed in the hope that it will be useful,
9146+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9147+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9148+ * GNU General Public License for more details.
9149+ *
9150+ * You should have received a copy of the GNU General Public License
9151+ * along with this program; if not, write to the Free Software
9152+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
9153+ */
1facf9fc 9154+
9155+/*
1308ab2a 9156+ * handling file/dir, and address_space operation
1facf9fc 9157+ */
dece6358
AM
9158+
9159+#include <linux/file.h>
1308ab2a 9160+#include <linux/fsnotify.h>
9161+#include <linux/namei.h>
9162+#include <linux/pagemap.h>
dece6358
AM
9163+#include "aufs.h"
9164+
1308ab2a 9165+/* drop flags for writing */
9166+unsigned int au_file_roflags(unsigned int flags)
1facf9fc 9167+{
1308ab2a 9168+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
9169+ flags |= O_RDONLY | O_NOATIME;
9170+ return flags;
1facf9fc 9171+}
9172+
1308ab2a 9173+/* common functions to regular file and dir */
9174+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
9175+ struct file *file)
1facf9fc 9176+{
dece6358 9177+ struct file *h_file;
1308ab2a 9178+ struct dentry *h_dentry;
9179+ struct inode *h_inode;
9180+ struct super_block *sb;
9181+ struct au_branch *br;
9182+ int err, exec_flag;
9183+ struct path h_path;
1facf9fc 9184+
1308ab2a 9185+ /* a race condition can happen between open and unlink/rmdir */
9186+ h_file = ERR_PTR(-ENOENT);
9187+ h_dentry = au_h_dptr(dentry, bindex);
9188+ if (au_test_nfsd(current) && !h_dentry)
9189+ goto out;
9190+ h_inode = h_dentry->d_inode;
9191+ if (au_test_nfsd(current) && !h_inode)
9192+ goto out;
9193+ if (unlikely((!d_unhashed(dentry) && d_unhashed(h_dentry))
9194+ || !h_inode))
9195+ goto out;
1facf9fc 9196+
1308ab2a 9197+ sb = dentry->d_sb;
9198+ br = au_sbr(sb, bindex);
9199+ h_file = ERR_PTR(-EACCES);
9200+ exec_flag = flags & vfsub_fmode_to_uint(FMODE_EXEC);
9201+ if (exec_flag && (br->br_mnt->mnt_flags & MNT_NOEXEC))
9202+ goto out;
1facf9fc 9203+
1308ab2a 9204+ /* drop flags for writing */
9205+ if (au_test_ro(sb, bindex, dentry->d_inode))
9206+ flags = au_file_roflags(flags);
9207+ flags &= ~O_CREAT;
9208+ atomic_inc(&br->br_count);
9209+ h_path.dentry = h_dentry;
9210+ h_path.mnt = br->br_mnt;
9211+ path_get(&h_path);
9212+ h_file = vfsub_dentry_open(&h_path, flags, current_cred());
9213+ if (IS_ERR(h_file))
9214+ goto out_br;
1facf9fc 9215+
1308ab2a 9216+ if (exec_flag) {
9217+ err = deny_write_access(h_file);
9218+ if (unlikely(err)) {
9219+ fput(h_file);
9220+ h_file = ERR_PTR(err);
9221+ goto out_br;
9222+ }
9223+ }
9224+ fsnotify_open(h_dentry);
9225+ goto out; /* success */
dece6358 9226+
1308ab2a 9227+ out_br:
9228+ atomic_dec(&br->br_count);
9229+ out:
9230+ return h_file;
1facf9fc 9231+}
9232+
1308ab2a 9233+int au_do_open(struct file *file, int (*open)(struct file *file, int flags))
dece6358 9234+{
1308ab2a 9235+ int err;
9236+ unsigned int flags;
dece6358 9237+ struct dentry *dentry;
dece6358 9238+ struct super_block *sb;
1facf9fc 9239+
dece6358
AM
9240+ dentry = file->f_dentry;
9241+ sb = dentry->d_sb;
9242+ si_read_lock(sb, AuLock_FLUSH);
1308ab2a 9243+ err = au_finfo_init(file);
dece6358
AM
9244+ if (unlikely(err))
9245+ goto out;
1facf9fc 9246+
1308ab2a 9247+ di_read_lock_child(dentry, AuLock_IR);
9248+ spin_lock(&file->f_lock);
9249+ flags = file->f_flags;
9250+ spin_unlock(&file->f_lock);
9251+ err = open(file, flags);
dece6358 9252+ di_read_unlock(dentry, AuLock_IR);
1308ab2a 9253+
9254+ fi_write_unlock(file);
9255+ if (unlikely(err))
9256+ au_finfo_fin(file);
dece6358
AM
9257+ out:
9258+ si_read_unlock(sb);
9259+ return err;
9260+}
1facf9fc 9261+
1308ab2a 9262+int au_reopen_nondir(struct file *file)
dece6358 9263+{
1308ab2a 9264+ int err;
9265+ unsigned int flags;
9266+ aufs_bindex_t bstart, bindex, bend;
dece6358 9267+ struct dentry *dentry;
1308ab2a 9268+ struct file *h_file, *h_file_tmp;
1facf9fc 9269+
dece6358 9270+ dentry = file->f_dentry;
1308ab2a 9271+ bstart = au_dbstart(dentry);
9272+ h_file_tmp = NULL;
9273+ if (au_fbstart(file) == bstart) {
9274+ h_file = au_h_fptr(file, bstart);
9275+ if (file->f_mode == h_file->f_mode)
9276+ return 0; /* success */
9277+ h_file_tmp = h_file;
9278+ get_file(h_file_tmp);
9279+ au_set_h_fptr(file, bstart, NULL);
9280+ }
9281+ AuDebugOn(au_fbstart(file) < bstart
9282+ || au_fi(file)->fi_hfile[0 + bstart].hf_file);
1facf9fc 9283+
1308ab2a 9284+ spin_lock(&file->f_lock);
9285+ flags = file->f_flags & ~O_TRUNC;
9286+ spin_unlock(&file->f_lock);
9287+ h_file = au_h_open(dentry, bstart, flags, file);
9288+ err = PTR_ERR(h_file);
9289+ if (IS_ERR(h_file))
9290+ goto out; /* todo: close all? */
1facf9fc 9291+
1308ab2a 9292+ err = 0;
9293+ au_set_fbstart(file, bstart);
9294+ au_set_h_fptr(file, bstart, h_file);
9295+ au_update_figen(file);
9296+ /* todo: necessary? */
9297+ /* file->f_ra = h_file->f_ra; */
1facf9fc 9298+
1308ab2a 9299+ /* close lower files */
9300+ bend = au_fbend(file);
9301+ for (bindex = bstart + 1; bindex <= bend; bindex++)
9302+ au_set_h_fptr(file, bindex, NULL);
9303+ au_set_fbend(file, bstart);
1facf9fc 9304+
dece6358 9305+ out:
1308ab2a 9306+ if (h_file_tmp)
9307+ fput(h_file_tmp);
dece6358
AM
9308+ return err;
9309+}
1facf9fc 9310+
1308ab2a 9311+/* ---------------------------------------------------------------------- */
9312+
9313+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
9314+ struct dentry *hi_wh)
dece6358 9315+{
1308ab2a 9316+ int err;
9317+ aufs_bindex_t bstart;
9318+ struct au_dinfo *dinfo;
9319+ struct dentry *h_dentry;
1facf9fc 9320+
1308ab2a 9321+ dinfo = au_di(file->f_dentry);
9322+ AuRwMustWriteLock(&dinfo->di_rwsem);
1facf9fc 9323+
1308ab2a 9324+ bstart = dinfo->di_bstart;
9325+ dinfo->di_bstart = btgt;
9326+ h_dentry = dinfo->di_hdentry[0 + btgt].hd_dentry;
9327+ dinfo->di_hdentry[0 + btgt].hd_dentry = hi_wh;
9328+ err = au_reopen_nondir(file);
9329+ dinfo->di_hdentry[0 + btgt].hd_dentry = h_dentry;
9330+ dinfo->di_bstart = bstart;
1facf9fc 9331+
dece6358
AM
9332+ return err;
9333+}
1facf9fc 9334+
1308ab2a 9335+static int au_ready_to_write_wh(struct file *file, loff_t len,
9336+ aufs_bindex_t bcpup)
1facf9fc 9337+{
1308ab2a 9338+ int err;
dece6358 9339+ struct inode *inode;
1308ab2a 9340+ struct dentry *dentry, *hi_wh;
dece6358 9341+ struct super_block *sb;
1facf9fc 9342+
dece6358 9343+ dentry = file->f_dentry;
dece6358 9344+ inode = dentry->d_inode;
1308ab2a 9345+ hi_wh = au_hi_wh(inode, bcpup);
9346+ if (!hi_wh)
9347+ err = au_sio_cpup_wh(dentry, bcpup, len, file);
9348+ else
9349+ /* already copied-up after unlink */
9350+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 9351+
1308ab2a 9352+ sb = dentry->d_sb;
9353+ if (!err && inode->i_nlink > 1 && au_opt_test(au_mntflags(sb), PLINK))
9354+ au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup));
1facf9fc 9355+
1308ab2a 9356+ return err;
9357+}
1facf9fc 9358+
1308ab2a 9359+/*
9360+ * prepare the @file for writing.
9361+ */
9362+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
9363+{
9364+ int err;
9365+ aufs_bindex_t bstart, bcpup;
9366+ struct dentry *dentry, *parent, *h_dentry;
9367+ struct inode *h_inode, *inode;
9368+ struct super_block *sb;
9369+
9370+ dentry = file->f_dentry;
9371+ sb = dentry->d_sb;
dece6358 9372+ bstart = au_fbstart(file);
1308ab2a 9373+ inode = dentry->d_inode;
9374+ err = au_test_ro(sb, bstart, inode);
9375+ if (!err && (au_h_fptr(file, bstart)->f_mode & FMODE_WRITE)) {
9376+ err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0);
9377+ goto out;
9378+ }
1facf9fc 9379+
1308ab2a 9380+ /* need to cpup */
9381+ parent = dget_parent(dentry);
9382+ di_write_lock_parent(parent);
9383+ err = AuWbrCopyup(au_sbi(sb), dentry);
9384+ bcpup = err;
9385+ if (unlikely(err < 0))
9386+ goto out_dgrade;
9387+ err = 0;
1facf9fc 9388+
1308ab2a 9389+ if (!au_h_dptr(parent, bcpup)) {
9390+ err = au_cpup_dirs(dentry, bcpup);
9391+ if (unlikely(err))
9392+ goto out_dgrade;
9393+ }
1facf9fc 9394+
1308ab2a 9395+ err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE,
9396+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
dece6358 9397+ if (unlikely(err))
1308ab2a 9398+ goto out_dgrade;
1facf9fc 9399+
1308ab2a 9400+ h_dentry = au_h_fptr(file, bstart)->f_dentry;
9401+ h_inode = h_dentry->d_inode;
9402+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
9403+ if (d_unhashed(dentry) /* || d_unhashed(h_dentry) */
9404+ /* || !h_inode->i_nlink */) {
9405+ err = au_ready_to_write_wh(file, len, bcpup);
9406+ di_downgrade_lock(parent, AuLock_IR);
9407+ } else {
9408+ di_downgrade_lock(parent, AuLock_IR);
9409+ if (!au_h_dptr(dentry, bcpup))
9410+ err = au_sio_cpup_simple(dentry, bcpup, len,
9411+ AuCpup_DTIME);
9412+ if (!err)
9413+ err = au_reopen_nondir(file);
dece6358 9414+ }
1308ab2a 9415+ mutex_unlock(&h_inode->i_mutex);
1facf9fc 9416+
1308ab2a 9417+ if (!err) {
9418+ au_pin_set_parent_lflag(pin, /*lflag*/0);
9419+ goto out_dput; /* success */
9420+ }
9421+ au_unpin(pin);
9422+ goto out_unlock;
1facf9fc 9423+
1308ab2a 9424+ out_dgrade:
9425+ di_downgrade_lock(parent, AuLock_IR);
9426+ out_unlock:
9427+ di_read_unlock(parent, AuLock_IR);
9428+ out_dput:
9429+ dput(parent);
dece6358 9430+ out:
dece6358 9431+ return err;
1facf9fc 9432+}
9433+
1308ab2a 9434+/* ---------------------------------------------------------------------- */
9435+
9436+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 9437+{
1308ab2a 9438+ int err;
9439+ aufs_bindex_t bstart;
dece6358 9440+ struct au_pin pin;
1308ab2a 9441+ struct au_finfo *finfo;
9442+ struct dentry *dentry, *parent, *hi_wh;
dece6358
AM
9443+ struct inode *inode;
9444+ struct super_block *sb;
1facf9fc 9445+
1308ab2a 9446+ FiMustWriteLock(file);
9447+
9448+ err = 0;
9449+ finfo = au_fi(file);
dece6358 9450+ dentry = file->f_dentry;
dece6358 9451+ sb = dentry->d_sb;
1308ab2a 9452+ inode = dentry->d_inode;
9453+ bstart = au_ibstart(inode);
9454+ if (bstart == finfo->fi_bstart)
dece6358 9455+ goto out;
1facf9fc 9456+
1308ab2a 9457+ parent = dget_parent(dentry);
9458+ if (au_test_ro(sb, bstart, inode)) {
9459+ di_read_lock_parent(parent, !AuLock_IR);
9460+ err = AuWbrCopyup(au_sbi(sb), dentry);
9461+ bstart = err;
9462+ di_read_unlock(parent, !AuLock_IR);
9463+ if (unlikely(err < 0))
9464+ goto out_parent;
9465+ err = 0;
9466+ }
1facf9fc 9467+
1308ab2a 9468+ di_read_lock_parent(parent, AuLock_IR);
9469+ hi_wh = au_hi_wh(inode, bstart);
9470+ if (au_opt_test(au_mntflags(sb), PLINK)
9471+ && au_plink_test(inode)
9472+ && !d_unhashed(dentry)) {
9473+ err = au_test_and_cpup_dirs(dentry, bstart);
9474+ if (unlikely(err))
9475+ goto out_unlock;
9476+
9477+ /* always superio. */
9478+ err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE,
9479+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
9480+ if (!err)
9481+ err = au_sio_cpup_simple(dentry, bstart, -1,
9482+ AuCpup_DTIME);
9483+ au_unpin(&pin);
9484+ } else if (hi_wh) {
9485+ /* already copied-up after unlink */
9486+ err = au_reopen_wh(file, bstart, hi_wh);
9487+ *need_reopen = 0;
9488+ }
1facf9fc 9489+
dece6358 9490+ out_unlock:
1308ab2a 9491+ di_read_unlock(parent, AuLock_IR);
9492+ out_parent:
9493+ dput(parent);
dece6358 9494+ out:
dece6358 9495+ return err;
1facf9fc 9496+}
9497+
1308ab2a 9498+static void au_do_refresh_file(struct file *file)
1facf9fc 9499+{
1308ab2a 9500+ aufs_bindex_t bindex, bend, new_bindex, brid;
9501+ struct au_hfile *p, tmp, *q;
1facf9fc 9502+ struct au_finfo *finfo;
1308ab2a 9503+ struct super_block *sb;
1facf9fc 9504+
1308ab2a 9505+ FiMustWriteLock(file);
1facf9fc 9506+
1308ab2a 9507+ sb = file->f_dentry->d_sb;
1facf9fc 9508+ finfo = au_fi(file);
1308ab2a 9509+ p = finfo->fi_hfile + finfo->fi_bstart;
9510+ brid = p->hf_br->br_id;
9511+ bend = finfo->fi_bend;
9512+ for (bindex = finfo->fi_bstart; bindex <= bend; bindex++, p++) {
9513+ if (!p->hf_file)
9514+ continue;
1facf9fc 9515+
1308ab2a 9516+ new_bindex = au_br_index(sb, p->hf_br->br_id);
9517+ if (new_bindex == bindex)
9518+ continue;
9519+ if (new_bindex < 0) {
9520+ au_set_h_fptr(file, bindex, NULL);
9521+ continue;
9522+ }
1facf9fc 9523+
1308ab2a 9524+ /* swap two lower inode, and loop again */
9525+ q = finfo->fi_hfile + new_bindex;
9526+ tmp = *q;
9527+ *q = *p;
9528+ *p = tmp;
9529+ if (tmp.hf_file) {
9530+ bindex--;
9531+ p--;
9532+ }
9533+ }
1facf9fc 9534+
1308ab2a 9535+ p = finfo->fi_hfile;
9536+ if (!au_test_mmapped(file) && !d_unhashed(file->f_dentry)) {
9537+ bend = au_sbend(sb);
9538+ for (finfo->fi_bstart = 0; finfo->fi_bstart <= bend;
9539+ finfo->fi_bstart++, p++)
9540+ if (p->hf_file) {
9541+ if (p->hf_file->f_dentry
9542+ && p->hf_file->f_dentry->d_inode)
9543+ break;
9544+ else
9545+ au_hfput(p, file);
9546+ }
9547+ } else {
9548+ bend = au_br_index(sb, brid);
9549+ for (finfo->fi_bstart = 0; finfo->fi_bstart < bend;
9550+ finfo->fi_bstart++, p++)
9551+ if (p->hf_file)
9552+ au_hfput(p, file);
9553+ bend = au_sbend(sb);
dece6358 9554+ }
1facf9fc 9555+
1308ab2a 9556+ p = finfo->fi_hfile + bend;
9557+ for (finfo->fi_bend = bend; finfo->fi_bend >= finfo->fi_bstart;
9558+ finfo->fi_bend--, p--)
9559+ if (p->hf_file) {
9560+ if (p->hf_file->f_dentry
9561+ && p->hf_file->f_dentry->d_inode)
9562+ break;
9563+ else
9564+ au_hfput(p, file);
9565+ }
9566+ AuDebugOn(finfo->fi_bend < finfo->fi_bstart);
1facf9fc 9567+}
9568+
1308ab2a 9569+/*
9570+ * after branch manipulating, refresh the file.
9571+ */
9572+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 9573+{
1308ab2a 9574+ int err, need_reopen;
9575+ struct dentry *dentry;
9576+ aufs_bindex_t bend, bindex;
1facf9fc 9577+
1308ab2a 9578+ dentry = file->f_dentry;
9579+ err = au_fi_realloc(au_fi(file), au_sbend(dentry->d_sb) + 1);
9580+ if (unlikely(err))
dece6358 9581+ goto out;
1308ab2a 9582+ au_do_refresh_file(file);
1facf9fc 9583+
dece6358 9584+ err = 0;
1308ab2a 9585+ need_reopen = 1;
9586+ if (!au_test_mmapped(file))
9587+ err = au_file_refresh_by_inode(file, &need_reopen);
9588+ if (!err && need_reopen && !d_unhashed(dentry))
9589+ err = reopen(file);
9590+ if (!err) {
9591+ au_update_figen(file);
9592+ return 0; /* success */
9593+ }
1facf9fc 9594+
1308ab2a 9595+ /* error, close all lower files */
9596+ bend = au_fbend(file);
9597+ for (bindex = au_fbstart(file); bindex <= bend; bindex++)
9598+ au_set_h_fptr(file, bindex, NULL);
1facf9fc 9599+
dece6358
AM
9600+ out:
9601+ return err;
1facf9fc 9602+}
9603+
1308ab2a 9604+/* common function to regular file and dir */
9605+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
9606+ int wlock)
1facf9fc 9607+{
dece6358 9608+ int err;
1308ab2a 9609+ unsigned int sigen, figen;
9610+ aufs_bindex_t bstart;
9611+ unsigned char pseudo_link;
dece6358 9612+ struct dentry *dentry;
1facf9fc 9613+
1308ab2a 9614+ err = 0;
dece6358 9615+ dentry = file->f_dentry;
1308ab2a 9616+ sigen = au_sigen(dentry->d_sb);
9617+ fi_write_lock(file);
9618+ figen = au_figen(file);
9619+ di_write_lock_child(dentry);
9620+ bstart = au_dbstart(dentry);
9621+ pseudo_link = (bstart != au_ibstart(dentry->d_inode));
9622+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
9623+ if (!wlock) {
9624+ di_downgrade_lock(dentry, AuLock_IR);
9625+ fi_downgrade_lock(file);
9626+ }
9627+ goto out; /* success */
dece6358 9628+ }
1facf9fc 9629+
1308ab2a 9630+ AuDbg("sigen %d, figen %d\n", sigen, figen);
9631+ if (sigen != au_digen(dentry)
9632+ || sigen != au_iigen(dentry->d_inode)) {
9633+ err = au_reval_dpath(dentry, sigen);
9634+ if (unlikely(err < 0))
9635+ goto out;
9636+ AuDebugOn(au_digen(dentry) != sigen
9637+ || au_iigen(dentry->d_inode) != sigen);
dece6358 9638+ }
1facf9fc 9639+
1308ab2a 9640+ err = refresh_file(file, reopen);
9641+ if (!err) {
9642+ if (!wlock) {
9643+ di_downgrade_lock(dentry, AuLock_IR);
9644+ fi_downgrade_lock(file);
9645+ }
9646+ } else {
9647+ di_write_unlock(dentry);
dece6358 9648+ fi_write_unlock(file);
1308ab2a 9649+ }
9650+
dece6358 9651+ out:
dece6358 9652+ return err;
1facf9fc 9653+}
9654+
dece6358 9655+/* ---------------------------------------------------------------------- */
1facf9fc 9656+
1308ab2a 9657+/* cf. aufs_nopage() */
9658+/* for madvise(2) */
9659+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1facf9fc 9660+{
1308ab2a 9661+ unlock_page(page);
9662+ return 0;
9663+}
dece6358 9664+
1308ab2a 9665+/* they will never be called. */
9666+#ifdef CONFIG_AUFS_DEBUG
9667+static int aufs_write_begin(struct file *file, struct address_space *mapping,
9668+ loff_t pos, unsigned len, unsigned flags,
9669+ struct page **pagep, void **fsdata)
9670+{ AuUnsupport(); return 0; }
9671+static int aufs_write_end(struct file *file, struct address_space *mapping,
9672+ loff_t pos, unsigned len, unsigned copied,
9673+ struct page *page, void *fsdata)
9674+{ AuUnsupport(); return 0; }
9675+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
9676+{ AuUnsupport(); return 0; }
9677+static void aufs_sync_page(struct page *page)
9678+{ AuUnsupport(); }
dece6358 9679+
1308ab2a 9680+static int aufs_set_page_dirty(struct page *page)
9681+{ AuUnsupport(); return 0; }
9682+static void aufs_invalidatepage(struct page *page, unsigned long offset)
9683+{ AuUnsupport(); }
9684+static int aufs_releasepage(struct page *page, gfp_t gfp)
9685+{ AuUnsupport(); return 0; }
9686+static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
9687+ const struct iovec *iov, loff_t offset,
9688+ unsigned long nr_segs)
9689+{ AuUnsupport(); return 0; }
9690+#endif /* CONFIG_AUFS_DEBUG */
dece6358 9691+
1308ab2a 9692+struct address_space_operations aufs_aop = {
9693+ .readpage = aufs_readpage,
9694+#ifdef CONFIG_AUFS_DEBUG
9695+ .writepage = aufs_writepage,
9696+ .sync_page = aufs_sync_page,
9697+ .set_page_dirty = aufs_set_page_dirty,
9698+ .write_begin = aufs_write_begin,
9699+ .write_end = aufs_write_end,
9700+ .invalidatepage = aufs_invalidatepage,
9701+ .releasepage = aufs_releasepage,
9702+ .direct_IO = aufs_direct_IO,
9703+#endif /* CONFIG_AUFS_DEBUG */
dece6358 9704+};
1308ab2a 9705diff -uprN -x .git linux-2.6.31/fs/aufs/file.h aufs2-2.6.git/fs/aufs/file.h
9706--- linux-2.6.31/fs/aufs/file.h 1970-01-01 00:00:00.000000000 +0000
9707+++ aufs2-2.6.git/fs/aufs/file.h 2009-09-21 21:49:23.401607657 +0000
9708@@ -0,0 +1,175 @@
dece6358
AM
9709+/*
9710+ * Copyright (C) 2005-2009 Junjiro R. Okajima
9711+ *
9712+ * This program, aufs is free software; you can redistribute it and/or modify
9713+ * it under the terms of the GNU General Public License as published by
9714+ * the Free Software Foundation; either version 2 of the License, or
9715+ * (at your option) any later version.
9716+ *
9717+ * This program is distributed in the hope that it will be useful,
9718+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9719+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9720+ * GNU General Public License for more details.
9721+ *
9722+ * You should have received a copy of the GNU General Public License
9723+ * along with this program; if not, write to the Free Software
9724+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
9725+ */
9726+
9727+/*
1308ab2a 9728+ * file operations
dece6358
AM
9729+ */
9730+
1308ab2a 9731+#ifndef __AUFS_FILE_H__
9732+#define __AUFS_FILE_H__
dece6358
AM
9733+
9734+#ifdef __KERNEL__
9735+
dece6358 9736+#include <linux/fs.h>
1308ab2a 9737+#include <linux/poll.h>
dece6358 9738+#include <linux/aufs_type.h>
1308ab2a 9739+#include "rwsem.h"
dece6358 9740+
1308ab2a 9741+struct au_branch;
9742+struct au_hfile {
9743+ struct file *hf_file;
9744+ struct au_branch *hf_br;
9745+};
1facf9fc 9746+
1308ab2a 9747+struct au_vdir;
9748+struct au_finfo {
9749+ atomic_t fi_generation;
dece6358 9750+
1308ab2a 9751+ struct au_rwsem fi_rwsem;
9752+ struct au_hfile *fi_hfile;
9753+ aufs_bindex_t fi_bstart, fi_bend;
1facf9fc 9754+
1308ab2a 9755+ union {
9756+ /* non-dir only */
9757+ struct {
9758+ struct vm_operations_struct *fi_h_vm_ops;
9759+ struct vm_operations_struct *fi_vm_ops;
9760+ struct mutex fi_vm_mtx;
9761+ };
1facf9fc 9762+
1308ab2a 9763+ /* dir only */
9764+ struct {
9765+ struct au_vdir *fi_vdir_cache;
9766+ int fi_maintain_plink;
9767+ };
9768+ };
9769+};
dece6358 9770+
1308ab2a 9771+/* ---------------------------------------------------------------------- */
1facf9fc 9772+
1308ab2a 9773+/* file.c */
9774+extern struct address_space_operations aufs_aop;
9775+unsigned int au_file_roflags(unsigned int flags);
9776+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
9777+ struct file *file);
9778+int au_do_open(struct file *file, int (*open)(struct file *file, int flags));
9779+int au_reopen_nondir(struct file *file);
9780+struct au_pin;
9781+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
9782+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
9783+ int wlock);
1facf9fc 9784+
1308ab2a 9785+/* poll.c */
9786+#ifdef CONFIG_AUFS_POLL
9787+unsigned int aufs_poll(struct file *file, poll_table *wait);
1facf9fc 9788+#endif
1facf9fc 9789+
1308ab2a 9790+/* f_op.c */
9791+extern const struct file_operations aufs_file_fop;
9792+int aufs_flush(struct file *file, fl_owner_t id);
1facf9fc 9793+
1308ab2a 9794+/* finfo.c */
9795+void au_hfput(struct au_hfile *hf, struct file *file);
9796+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
9797+ struct file *h_file);
dece6358 9798+
1308ab2a 9799+void au_update_figen(struct file *file);
dece6358 9800+
1308ab2a 9801+void au_finfo_fin(struct file *file);
9802+int au_finfo_init(struct file *file);
9803+int au_fi_realloc(struct au_finfo *finfo, int nbr);
9804+
9805+/* ---------------------------------------------------------------------- */
9806+
9807+static inline struct au_finfo *au_fi(struct file *file)
dece6358 9808+{
1308ab2a 9809+ return file->private_data;
dece6358
AM
9810+}
9811+
1308ab2a 9812+/* ---------------------------------------------------------------------- */
9813+
9814+/*
9815+ * fi_read_lock, fi_write_lock,
9816+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
9817+ */
9818+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
9819+
9820+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
9821+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
9822+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
9823+
9824+/* ---------------------------------------------------------------------- */
9825+
9826+/* todo: hard/soft set? */
9827+static inline aufs_bindex_t au_fbstart(struct file *file)
dece6358 9828+{
1308ab2a 9829+ FiMustAnyLock(file);
9830+ return au_fi(file)->fi_bstart;
dece6358
AM
9831+}
9832+
1308ab2a 9833+static inline aufs_bindex_t au_fbend(struct file *file)
dece6358 9834+{
1308ab2a 9835+ FiMustAnyLock(file);
9836+ return au_fi(file)->fi_bend;
dece6358
AM
9837+}
9838+
1308ab2a 9839+static inline struct au_vdir *au_fvdir_cache(struct file *file)
dece6358 9840+{
1308ab2a 9841+ FiMustAnyLock(file);
9842+ return au_fi(file)->fi_vdir_cache;
dece6358
AM
9843+}
9844+
1308ab2a 9845+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
dece6358 9846+{
1308ab2a 9847+ FiMustWriteLock(file);
9848+ au_fi(file)->fi_bstart = bindex;
dece6358
AM
9849+}
9850+
1308ab2a 9851+static inline void au_set_fbend(struct file *file, aufs_bindex_t bindex)
dece6358 9852+{
1308ab2a 9853+ FiMustWriteLock(file);
9854+ au_fi(file)->fi_bend = bindex;
dece6358
AM
9855+}
9856+
1308ab2a 9857+static inline void au_set_fvdir_cache(struct file *file,
9858+ struct au_vdir *vdir_cache)
dece6358 9859+{
1308ab2a 9860+ FiMustWriteLock(file);
9861+ au_fi(file)->fi_vdir_cache = vdir_cache;
dece6358
AM
9862+}
9863+
1308ab2a 9864+static inline struct file *au_h_fptr(struct file *file, aufs_bindex_t bindex)
dece6358 9865+{
1308ab2a 9866+ FiMustAnyLock(file);
9867+ return au_fi(file)->fi_hfile[0 + bindex].hf_file;
dece6358
AM
9868+}
9869+
1308ab2a 9870+/* todo: memory barrier? */
9871+static inline unsigned int au_figen(struct file *f)
dece6358 9872+{
1308ab2a 9873+ return atomic_read(&au_fi(f)->fi_generation);
dece6358
AM
9874+}
9875+
1308ab2a 9876+static inline int au_test_mmapped(struct file *f)
dece6358 9877+{
1308ab2a 9878+ /* FiMustAnyLock(f); */
9879+ return !!(au_fi(f)->fi_h_vm_ops);
dece6358
AM
9880+}
9881+
1308ab2a 9882+#endif /* __KERNEL__ */
9883+#endif /* __AUFS_FILE_H__ */
9884diff -uprN -x .git linux-2.6.31/fs/aufs/finfo.c aufs2-2.6.git/fs/aufs/finfo.c
9885--- linux-2.6.31/fs/aufs/finfo.c 1970-01-01 00:00:00.000000000 +0000
9886+++ aufs2-2.6.git/fs/aufs/finfo.c 2009-09-21 21:49:23.401607657 +0000
9887@@ -0,0 +1,128 @@
9888+/*
9889+ * Copyright (C) 2005-2009 Junjiro R. Okajima
9890+ *
9891+ * This program, aufs is free software; you can redistribute it and/or modify
9892+ * it under the terms of the GNU General Public License as published by
9893+ * the Free Software Foundation; either version 2 of the License, or
9894+ * (at your option) any later version.
9895+ *
9896+ * This program is distributed in the hope that it will be useful,
9897+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9898+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9899+ * GNU General Public License for more details.
9900+ *
9901+ * You should have received a copy of the GNU General Public License
9902+ * along with this program; if not, write to the Free Software
9903+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
9904+ */
9905+
9906+/*
9907+ * file private data
9908+ */
9909+
9910+#include <linux/file.h>
9911+#include "aufs.h"
9912+
9913+void au_hfput(struct au_hfile *hf, struct file *file)
dece6358 9914+{
1308ab2a 9915+ if (file->f_flags & vfsub_fmode_to_uint(FMODE_EXEC))
9916+ allow_write_access(hf->hf_file);
9917+ fput(hf->hf_file);
9918+ hf->hf_file = NULL;
9919+ atomic_dec_return(&hf->hf_br->br_count);
9920+ hf->hf_br = NULL;
dece6358
AM
9921+}
9922+
1308ab2a 9923+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
dece6358 9924+{
1308ab2a 9925+ struct au_finfo *finfo = au_fi(file);
9926+ struct au_hfile *hf;
9927+
9928+ hf = finfo->fi_hfile + bindex;
9929+ if (hf->hf_file)
9930+ au_hfput(hf, file);
9931+ if (val) {
9932+ hf->hf_file = val;
9933+ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
9934+ }
dece6358
AM
9935+}
9936+
1308ab2a 9937+void au_update_figen(struct file *file)
dece6358 9938+{
1308ab2a 9939+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
9940+ /* smp_mb(); */ /* atomic_set */
dece6358
AM
9941+}
9942+
1308ab2a 9943+/* ---------------------------------------------------------------------- */
9944+
9945+void au_finfo_fin(struct file *file)
dece6358 9946+{
1308ab2a 9947+ struct au_finfo *finfo;
9948+ aufs_bindex_t bindex, bend;
9949+
9950+ fi_write_lock(file);
9951+ bend = au_fbend(file);
9952+ bindex = au_fbstart(file);
9953+ if (bindex >= 0)
9954+ /*
9955+ * calls fput() instead of filp_close(),
9956+ * since no dnotify or lock for the lower file.
9957+ */
9958+ for (; bindex <= bend; bindex++)
9959+ au_set_h_fptr(file, bindex, NULL);
9960+
9961+ finfo = au_fi(file);
9962+ au_dbg_verify_hf(finfo);
9963+ kfree(finfo->fi_hfile);
9964+ fi_write_unlock(file);
9965+ AuRwDestroy(&finfo->fi_rwsem);
9966+ au_cache_free_finfo(finfo);
dece6358
AM
9967+}
9968+
1308ab2a 9969+int au_finfo_init(struct file *file)
dece6358 9970+{
1308ab2a 9971+ struct au_finfo *finfo;
9972+ struct dentry *dentry;
9973+
9974+ dentry = file->f_dentry;
9975+ finfo = au_cache_alloc_finfo();
9976+ if (unlikely(!finfo))
9977+ goto out;
9978+
9979+ finfo->fi_hfile = kcalloc(au_sbend(dentry->d_sb) + 1,
9980+ sizeof(*finfo->fi_hfile), GFP_NOFS);
9981+ if (unlikely(!finfo->fi_hfile))
9982+ goto out_finfo;
9983+
9984+ au_rw_init_wlock(&finfo->fi_rwsem);
9985+ finfo->fi_bstart = -1;
9986+ finfo->fi_bend = -1;
9987+ atomic_set(&finfo->fi_generation, au_digen(dentry));
9988+ /* smp_mb(); */ /* atomic_set */
9989+
9990+ file->private_data = finfo;
9991+ return 0; /* success */
9992+
9993+ out_finfo:
9994+ au_cache_free_finfo(finfo);
9995+ out:
9996+ return -ENOMEM;
dece6358
AM
9997+}
9998+
1308ab2a 9999+int au_fi_realloc(struct au_finfo *finfo, int nbr)
dece6358 10000+{
1308ab2a 10001+ int err, sz;
10002+ struct au_hfile *hfp;
10003+
10004+ err = -ENOMEM;
10005+ sz = sizeof(*hfp) * (finfo->fi_bend + 1);
10006+ if (!sz)
10007+ sz = sizeof(*hfp);
10008+ hfp = au_kzrealloc(finfo->fi_hfile, sz, sizeof(*hfp) * nbr, GFP_NOFS);
10009+ if (hfp) {
10010+ finfo->fi_hfile = hfp;
10011+ err = 0;
10012+ }
10013+
10014+ return err;
dece6358 10015+}
1308ab2a 10016diff -uprN -x .git linux-2.6.31/fs/aufs/fstype.h aufs2-2.6.git/fs/aufs/fstype.h
10017--- linux-2.6.31/fs/aufs/fstype.h 1970-01-01 00:00:00.000000000 +0000
10018+++ aufs2-2.6.git/fs/aufs/fstype.h 2009-09-21 21:49:23.401607657 +0000
10019@@ -0,0 +1,485 @@
10020+/*
10021+ * Copyright (C) 2005-2009 Junjiro R. Okajima
10022+ *
10023+ * This program, aufs is free software; you can redistribute it and/or modify
10024+ * it under the terms of the GNU General Public License as published by
10025+ * the Free Software Foundation; either version 2 of the License, or
10026+ * (at your option) any later version.
10027+ *
10028+ * This program is distributed in the hope that it will be useful,
10029+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10030+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10031+ * GNU General Public License for more details.
10032+ *
10033+ * You should have received a copy of the GNU General Public License
10034+ * along with this program; if not, write to the Free Software
10035+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
10036+ */
dece6358 10037+
1308ab2a 10038+/*
10039+ * judging filesystem type
10040+ */
10041+
10042+#ifndef __AUFS_FSTYPE_H__
10043+#define __AUFS_FSTYPE_H__
10044+
10045+#ifdef __KERNEL__
10046+
10047+#include <linux/cramfs_fs.h>
10048+#include <linux/fs.h>
10049+#include <linux/magic.h>
10050+#include <linux/romfs_fs.h>
10051+#include <linux/aufs_type.h>
10052+
10053+static inline int au_test_aufs(struct super_block *sb)
dece6358 10054+{
1308ab2a 10055+ return sb->s_magic == AUFS_SUPER_MAGIC;
10056+}
10057+
10058+static inline const char *au_sbtype(struct super_block *sb)
10059+{
10060+ return sb->s_type->name;
10061+}
10062+
10063+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
10064+{
10065+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
10066+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
10067+#else
10068+ return 0;
10069+#endif
10070+}
10071+
1308ab2a 10072+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 10073+{
1308ab2a 10074+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
10075+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
10076+#else
10077+ return 0;
10078+#endif
10079+}
10080+
1308ab2a 10081+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 10082+{
1308ab2a 10083+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
10084+ return sb->s_magic == CRAMFS_MAGIC;
10085+#endif
10086+ return 0;
10087+}
10088+
10089+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
10090+{
10091+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
10092+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
10093+#else
10094+ return 0;
10095+#endif
10096+}
10097+
1308ab2a 10098+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 10099+{
1308ab2a 10100+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
10101+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
10102+#else
10103+ return 0;
10104+#endif
10105+}
10106+
1308ab2a 10107+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 10108+{
1308ab2a 10109+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
10110+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
10111+#else
10112+ return 0;
10113+#endif
10114+}
10115+
1308ab2a 10116+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 10117+{
1308ab2a 10118+#ifdef CONFIG_TMPFS
10119+ return sb->s_magic == TMPFS_MAGIC;
10120+#else
10121+ return 0;
dece6358 10122+#endif
dece6358
AM
10123+}
10124+
1308ab2a 10125+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 10126+{
1308ab2a 10127+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
10128+ return !strcmp(au_sbtype(sb), "ecryptfs");
10129+#else
10130+ return 0;
10131+#endif
1facf9fc 10132+}
10133+
1308ab2a 10134+static inline int au_test_smbfs(struct super_block *sb __maybe_unused)
1facf9fc 10135+{
1308ab2a 10136+#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE)
10137+ return sb->s_magic == SMB_SUPER_MAGIC;
10138+#else
10139+ return 0;
1facf9fc 10140+#endif
1facf9fc 10141+}
10142+
1308ab2a 10143+static inline int au_test_ocfs2(struct super_block *sb __maybe_unused)
1facf9fc 10144+{
1308ab2a 10145+#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE)
10146+ return sb->s_magic == OCFS2_SUPER_MAGIC;
10147+#else
10148+ return 0;
10149+#endif
1facf9fc 10150+}
10151+
1308ab2a 10152+static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused)
1facf9fc 10153+{
1308ab2a 10154+#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE)
10155+ return sb->s_magic == DLMFS_MAGIC;
10156+#else
10157+ return 0;
10158+#endif
1facf9fc 10159+}
10160+
1308ab2a 10161+static inline int au_test_coda(struct super_block *sb __maybe_unused)
1facf9fc 10162+{
1308ab2a 10163+#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE)
10164+ return sb->s_magic == CODA_SUPER_MAGIC;
10165+#else
10166+ return 0;
10167+#endif
10168+}
10169+
10170+static inline int au_test_v9fs(struct super_block *sb __maybe_unused)
10171+{
10172+#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE)
10173+ return sb->s_magic == V9FS_MAGIC;
10174+#else
10175+ return 0;
10176+#endif
10177+}
10178+
10179+static inline int au_test_ext4(struct super_block *sb __maybe_unused)
10180+{
10181+#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE)
10182+ return sb->s_magic == EXT4_SUPER_MAGIC;
10183+#else
10184+ return 0;
10185+#endif
10186+}
10187+
10188+static inline int au_test_sysv(struct super_block *sb __maybe_unused)
10189+{
10190+#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE)
10191+ return !strcmp(au_sbtype(sb), "sysv");
10192+#else
10193+ return 0;
10194+#endif
10195+}
10196+
10197+static inline int au_test_ramfs(struct super_block *sb)
10198+{
10199+ return sb->s_magic == RAMFS_MAGIC;
10200+}
10201+
10202+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
10203+{
10204+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
10205+ return sb->s_magic == UBIFS_SUPER_MAGIC;
10206+#else
10207+ return 0;
10208+#endif
10209+}
10210+
10211+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
10212+{
10213+#ifdef CONFIG_PROC_FS
10214+ return sb->s_magic == PROC_SUPER_MAGIC;
10215+#else
10216+ return 0;
10217+#endif
10218+}
10219+
10220+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
10221+{
10222+#ifdef CONFIG_SYSFS
10223+ return sb->s_magic == SYSFS_MAGIC;
10224+#else
10225+ return 0;
10226+#endif
10227+}
10228+
10229+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
10230+{
10231+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
10232+ return sb->s_magic == CONFIGFS_MAGIC;
10233+#else
10234+ return 0;
10235+#endif
10236+}
10237+
10238+static inline int au_test_minix(struct super_block *sb __maybe_unused)
10239+{
10240+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
10241+ return sb->s_magic == MINIX3_SUPER_MAGIC
10242+ || sb->s_magic == MINIX2_SUPER_MAGIC
10243+ || sb->s_magic == MINIX2_SUPER_MAGIC2
10244+ || sb->s_magic == MINIX_SUPER_MAGIC
10245+ || sb->s_magic == MINIX_SUPER_MAGIC2;
10246+#else
10247+ return 0;
10248+#endif
10249+}
10250+
10251+static inline int au_test_cifs(struct super_block *sb __maybe_unused)
10252+{
10253+#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE)
10254+ return sb->s_magic == CIFS_MAGIC_NUMBER;
10255+#else
10256+ return 0;
10257+#endif
10258+}
10259+
10260+static inline int au_test_fat(struct super_block *sb __maybe_unused)
10261+{
10262+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
10263+ return sb->s_magic == MSDOS_SUPER_MAGIC;
10264+#else
10265+ return 0;
10266+#endif
10267+}
10268+
10269+static inline int au_test_msdos(struct super_block *sb)
10270+{
10271+ return au_test_fat(sb);
10272+}
10273+
10274+static inline int au_test_vfat(struct super_block *sb)
10275+{
10276+ return au_test_fat(sb);
10277+}
10278+
10279+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
10280+{
10281+#ifdef CONFIG_SECURITYFS
10282+ return sb->s_magic == SECURITYFS_MAGIC;
10283+#else
10284+ return 0;
10285+#endif
10286+}
10287+
10288+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
10289+{
10290+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
10291+ return sb->s_magic == SQUASHFS_MAGIC;
10292+#else
10293+ return 0;
10294+#endif
10295+}
10296+
10297+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
10298+{
10299+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
10300+ return sb->s_magic == BTRFS_SUPER_MAGIC;
10301+#else
10302+ return 0;
10303+#endif
10304+}
10305+
10306+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
10307+{
10308+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
10309+ return sb->s_magic == XENFS_SUPER_MAGIC;
10310+#else
10311+ return 0;
10312+#endif
10313+}
10314+
10315+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
10316+{
10317+#ifdef CONFIG_DEBUG_FS
10318+ return sb->s_magic == DEBUGFS_MAGIC;
10319+#else
10320+ return 0;
10321+#endif
10322+}
10323+
10324+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
10325+{
10326+#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
10327+ return sb->s_magic == NILFS_SUPER_MAGIC;
10328+#else
10329+ return 0;
10330+#endif
10331+}
10332+
10333+/* ---------------------------------------------------------------------- */
10334+/*
10335+ * they can't be an aufs branch.
10336+ */
10337+static inline int au_test_fs_unsuppoted(struct super_block *sb)
10338+{
10339+ return
10340+#ifndef CONFIG_AUFS_BR_RAMFS
10341+ au_test_ramfs(sb) ||
10342+#endif
10343+ au_test_procfs(sb)
10344+ || au_test_sysfs(sb)
10345+ || au_test_configfs(sb)
10346+ || au_test_debugfs(sb)
10347+ || au_test_securityfs(sb)
10348+ || au_test_xenfs(sb)
10349+ || au_test_ecryptfs(sb)
10350+ /* || !strcmp(au_sbtype(sb), "unionfs") */
10351+ || au_test_aufs(sb); /* will be supported in next version */
10352+}
10353+
10354+/*
10355+ * If the filesystem supports NFS-export, then it has to support NULL as
10356+ * a nameidata parameter for ->create(), ->lookup() and ->d_revalidate().
10357+ * We can apply this principle when we handle a lower filesystem.
10358+ */
10359+static inline int au_test_fs_null_nd(struct super_block *sb)
10360+{
10361+ return !!sb->s_export_op;
10362+}
10363+
10364+static inline int au_test_fs_remote(struct super_block *sb)
10365+{
10366+ return !au_test_tmpfs(sb)
10367+#ifdef CONFIG_AUFS_BR_RAMFS
10368+ && !au_test_ramfs(sb)
10369+#endif
10370+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
10371+}
10372+
10373+/* ---------------------------------------------------------------------- */
10374+
10375+/*
10376+ * Note: these functions (below) are created after reading ->getattr() in all
10377+ * filesystems under linux/fs. it means we have to do so in every update...
10378+ */
10379+
10380+/*
10381+ * some filesystems require getattr to refresh the inode attributes before
10382+ * referencing.
10383+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
10384+ * and leave the work for d_revalidate()
10385+ */
10386+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
10387+{
10388+ return au_test_nfs(sb)
10389+ || au_test_fuse(sb)
10390+ /* || au_test_smbfs(sb) */ /* untested */
10391+ /* || au_test_ocfs2(sb) */ /* untested */
10392+ /* || au_test_btrfs(sb) */ /* untested */
10393+ /* || au_test_coda(sb) */ /* untested */
10394+ /* || au_test_v9fs(sb) */ /* untested */
10395+ ;
10396+}
10397+
10398+/*
10399+ * filesystems which don't maintain i_size or i_blocks.
10400+ */
10401+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
10402+{
10403+ return au_test_xfs(sb)
10404+ /* || au_test_ext4(sb) */ /* untested */
10405+ /* || au_test_ocfs2(sb) */ /* untested */
10406+ /* || au_test_ocfs2_dlmfs(sb) */ /* untested */
10407+ /* || au_test_sysv(sb) */ /* untested */
10408+ /* || au_test_ubifs(sb) */ /* untested */
10409+ /* || au_test_minix(sb) */ /* untested */
10410+ ;
10411+}
10412+
10413+/*
10414+ * filesystems which don't store the correct value in some of their inode
10415+ * attributes.
10416+ */
10417+static inline int au_test_fs_bad_iattr(struct super_block *sb)
10418+{
10419+ return au_test_fs_bad_iattr_size(sb)
10420+ /* || au_test_cifs(sb) */ /* untested */
10421+ || au_test_fat(sb)
10422+ || au_test_msdos(sb)
10423+ || au_test_vfat(sb);
1facf9fc 10424+}
10425+
10426+/* they don't check i_nlink in link(2) */
10427+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
10428+{
10429+ return au_test_tmpfs(sb)
10430+#ifdef CONFIG_AUFS_BR_RAMFS
10431+ || au_test_ramfs(sb)
10432+#endif
dece6358 10433+ || au_test_ubifs(sb);
1facf9fc 10434+}
10435+
10436+/*
10437+ * filesystems which sets S_NOATIME and S_NOCMTIME.
10438+ */
10439+static inline int au_test_fs_notime(struct super_block *sb)
10440+{
10441+ return au_test_nfs(sb)
10442+ || au_test_fuse(sb)
dece6358 10443+ || au_test_ubifs(sb)
1facf9fc 10444+ /* || au_test_cifs(sb) */ /* untested */
1facf9fc 10445+ ;
10446+}
10447+
10448+/*
10449+ * filesystems which requires replacing i_mapping.
10450+ */
10451+static inline int au_test_fs_bad_mapping(struct super_block *sb)
10452+{
dece6358
AM
10453+ return au_test_fuse(sb)
10454+ || au_test_ubifs(sb);
1facf9fc 10455+}
10456+
10457+/* temporary support for i#1 in cramfs */
10458+static inline int au_test_fs_unique_ino(struct inode *inode)
10459+{
10460+ if (au_test_cramfs(inode->i_sb))
10461+ return inode->i_ino != 1;
10462+ return 1;
10463+}
10464+
10465+/* ---------------------------------------------------------------------- */
10466+
10467+/*
10468+ * the filesystem where the xino files placed must support i/o after unlink and
10469+ * maintain i_size and i_blocks.
10470+ */
10471+static inline int au_test_fs_bad_xino(struct super_block *sb)
10472+{
10473+ return au_test_fs_remote(sb)
10474+ || au_test_fs_bad_iattr_size(sb)
10475+#ifdef CONFIG_AUFS_BR_RAMFS
10476+ || !(au_test_ramfs(sb) || au_test_fs_null_nd(sb))
10477+#else
10478+ || !au_test_fs_null_nd(sb) /* to keep xino code simple */
10479+#endif
10480+ /* don't want unnecessary work for xino */
10481+ || au_test_aufs(sb)
1308ab2a 10482+ || au_test_ecryptfs(sb)
10483+ || au_test_nilfs(sb);
1facf9fc 10484+}
10485+
10486+static inline int au_test_fs_trunc_xino(struct super_block *sb)
10487+{
10488+ return au_test_tmpfs(sb)
10489+ || au_test_ramfs(sb);
10490+}
10491+
10492+/*
10493+ * test if the @sb is real-readonly.
10494+ */
10495+static inline int au_test_fs_rr(struct super_block *sb)
10496+{
10497+ return au_test_squashfs(sb)
10498+ || au_test_iso9660(sb)
10499+ || au_test_cramfs(sb)
10500+ || au_test_romfs(sb);
10501+}
10502+
10503+#endif /* __KERNEL__ */
10504+#endif /* __AUFS_FSTYPE_H__ */
1308ab2a 10505diff -uprN -x .git linux-2.6.31/fs/aufs/hinotify.c aufs2-2.6.git/fs/aufs/hinotify.c
10506--- linux-2.6.31/fs/aufs/hinotify.c 1970-01-01 00:00:00.000000000 +0000
10507+++ aufs2-2.6.git/fs/aufs/hinotify.c 2009-09-21 21:49:23.401607657 +0000
dece6358 10508@@ -0,0 +1,755 @@
1facf9fc 10509+/*
10510+ * Copyright (C) 2005-2009 Junjiro R. Okajima
10511+ *
10512+ * This program, aufs is free software; you can redistribute it and/or modify
10513+ * it under the terms of the GNU General Public License as published by
10514+ * the Free Software Foundation; either version 2 of the License, or
10515+ * (at your option) any later version.
dece6358
AM
10516+ *
10517+ * This program is distributed in the hope that it will be useful,
10518+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10519+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10520+ * GNU General Public License for more details.
10521+ *
10522+ * You should have received a copy of the GNU General Public License
10523+ * along with this program; if not, write to the Free Software
10524+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 10525+ */
10526+
10527+/*
10528+ * inotify for the lower directories
10529+ */
10530+
10531+#include "aufs.h"
10532+
10533+static const __u32 AuHinMask = (IN_MOVE | IN_DELETE | IN_CREATE);
10534+static struct inotify_handle *au_hin_handle;
10535+
10536+AuCacheFuncs(hinotify, HINOTIFY);
10537+
10538+int au_hin_alloc(struct au_hinode *hinode, struct inode *inode,
10539+ struct inode *h_inode)
10540+{
10541+ int err;
10542+ struct au_hinotify *hin;
10543+ s32 wd;
10544+
10545+ err = -ENOMEM;
10546+ hin = au_cache_alloc_hinotify();
10547+ if (hin) {
10548+ AuDebugOn(hinode->hi_notify);
10549+ hinode->hi_notify = hin;
10550+ hin->hin_aufs_inode = inode;
10551+
10552+ inotify_init_watch(&hin->hin_watch);
10553+ wd = inotify_add_watch(au_hin_handle, &hin->hin_watch, h_inode,
10554+ AuHinMask);
10555+ if (wd >= 0)
10556+ return 0; /* success */
10557+
10558+ err = wd;
10559+ put_inotify_watch(&hin->hin_watch);
10560+ au_cache_free_hinotify(hin);
10561+ hinode->hi_notify = NULL;
10562+ }
10563+
10564+ return err;
10565+}
10566+
10567+void au_hin_free(struct au_hinode *hinode)
10568+{
10569+ int err;
10570+ struct au_hinotify *hin;
10571+
10572+ hin = hinode->hi_notify;
10573+ if (hin) {
10574+ err = 0;
10575+ if (atomic_read(&hin->hin_watch.count))
10576+ err = inotify_rm_watch(au_hin_handle, &hin->hin_watch);
10577+ if (unlikely(err))
10578+ /* it means the watch is already removed */
10579+ AuWarn("failed inotify_rm_watch() %d\n", err);
10580+ au_cache_free_hinotify(hin);
10581+ hinode->hi_notify = NULL;
10582+ }
10583+}
10584+
10585+/* ---------------------------------------------------------------------- */
10586+
10587+void au_hin_ctl(struct au_hinode *hinode, int do_set)
10588+{
10589+ struct inode *h_inode;
10590+ struct inotify_watch *watch;
10591+
10592+ if (!hinode->hi_notify)
10593+ return;
10594+
10595+ h_inode = hinode->hi_inode;
10596+ IMustLock(h_inode);
10597+
10598+ /* todo: try inotify_find_update_watch()? */
10599+ watch = &hinode->hi_notify->hin_watch;
10600+ mutex_lock(&h_inode->inotify_mutex);
10601+ /* mutex_lock(&watch->ih->mutex); */
10602+ if (do_set) {
10603+ AuDebugOn(watch->mask & AuHinMask);
10604+ watch->mask |= AuHinMask;
10605+ } else {
10606+ AuDebugOn(!(watch->mask & AuHinMask));
10607+ watch->mask &= ~AuHinMask;
10608+ }
10609+ /* mutex_unlock(&watch->ih->mutex); */
10610+ mutex_unlock(&h_inode->inotify_mutex);
10611+}
10612+
10613+void au_reset_hinotify(struct inode *inode, unsigned int flags)
10614+{
10615+ aufs_bindex_t bindex, bend;
10616+ struct inode *hi;
10617+ struct dentry *iwhdentry;
10618+
10619+ bend = au_ibend(inode);
10620+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
10621+ hi = au_h_iptr(inode, bindex);
10622+ if (!hi)
10623+ continue;
10624+
10625+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
10626+ iwhdentry = au_hi_wh(inode, bindex);
10627+ if (iwhdentry)
10628+ dget(iwhdentry);
10629+ au_igrab(hi);
10630+ au_set_h_iptr(inode, bindex, NULL, 0);
10631+ au_set_h_iptr(inode, bindex, au_igrab(hi),
10632+ flags & ~AuHi_XINO);
10633+ iput(hi);
10634+ dput(iwhdentry);
10635+ /* mutex_unlock(&hi->i_mutex); */
10636+ }
10637+}
10638+
10639+/* ---------------------------------------------------------------------- */
10640+
10641+static int hin_xino(struct inode *inode, struct inode *h_inode)
10642+{
10643+ int err;
10644+ aufs_bindex_t bindex, bend, bfound, bstart;
10645+ struct inode *h_i;
10646+
10647+ err = 0;
10648+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
10649+ AuWarn("branch root dir was changed\n");
10650+ goto out;
10651+ }
10652+
10653+ bfound = -1;
10654+ bend = au_ibend(inode);
10655+ bstart = au_ibstart(inode);
10656+#if 0 /* reserved for future use */
10657+ if (bindex == bend) {
10658+ /* keep this ino in rename case */
10659+ goto out;
10660+ }
10661+#endif
10662+ for (bindex = bstart; bindex <= bend; bindex++) {
10663+ if (au_h_iptr(inode, bindex) == h_inode) {
10664+ bfound = bindex;
10665+ break;
10666+ }
10667+ }
10668+ if (bfound < 0)
10669+ goto out;
10670+
10671+ for (bindex = bstart; bindex <= bend; bindex++) {
10672+ h_i = au_h_iptr(inode, bindex);
10673+ if (!h_i)
10674+ continue;
10675+
1308ab2a 10676+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
1facf9fc 10677+ /* ignore this error */
10678+ /* bad action? */
10679+ }
10680+
10681+ /* children inode number will be broken */
10682+
10683+ out:
10684+ AuTraceErr(err);
10685+ return err;
10686+}
10687+
10688+static int hin_gen_tree(struct dentry *dentry)
10689+{
10690+ int err, i, j, ndentry;
10691+ struct au_dcsub_pages dpages;
10692+ struct au_dpage *dpage;
10693+ struct dentry **dentries;
10694+
10695+ err = au_dpages_init(&dpages, GFP_NOFS);
10696+ if (unlikely(err))
10697+ goto out;
10698+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
10699+ if (unlikely(err))
10700+ goto out_dpages;
10701+
10702+ for (i = 0; i < dpages.ndpage; i++) {
10703+ dpage = dpages.dpages + i;
10704+ dentries = dpage->dentries;
10705+ ndentry = dpage->ndentry;
10706+ for (j = 0; j < ndentry; j++) {
10707+ struct dentry *d;
10708+
10709+ d = dentries[j];
10710+ if (IS_ROOT(d))
10711+ continue;
10712+
10713+ d_drop(d);
10714+ au_digen_dec(d);
10715+ if (d->d_inode)
10716+ /* todo: reset children xino?
10717+ cached children only? */
10718+ au_iigen_dec(d->d_inode);
10719+ }
10720+ }
10721+
10722+ out_dpages:
10723+ au_dpages_free(&dpages);
10724+
10725+ /* discard children */
10726+ dentry_unhash(dentry);
10727+ dput(dentry);
10728+ out:
10729+ return err;
10730+}
10731+
10732+/*
10733+ * return 0 if processed.
10734+ */
10735+static int hin_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
10736+ const unsigned int isdir)
10737+{
10738+ int err;
10739+ struct dentry *d;
10740+ struct qstr *dname;
10741+
10742+ err = 1;
10743+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
10744+ AuWarn("branch root dir was changed\n");
10745+ err = 0;
10746+ goto out;
10747+ }
10748+
10749+ if (!isdir) {
10750+ AuDebugOn(!name);
10751+ au_iigen_dec(inode);
10752+ spin_lock(&dcache_lock);
10753+ list_for_each_entry(d, &inode->i_dentry, d_alias) {
10754+ dname = &d->d_name;
10755+ if (dname->len != nlen
10756+ && memcmp(dname->name, name, nlen))
10757+ continue;
10758+ err = 0;
10759+ spin_lock(&d->d_lock);
10760+ __d_drop(d);
10761+ au_digen_dec(d);
10762+ spin_unlock(&d->d_lock);
10763+ break;
10764+ }
10765+ spin_unlock(&dcache_lock);
10766+ } else {
10767+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIRS);
10768+ d = d_find_alias(inode);
10769+ if (!d) {
10770+ au_iigen_dec(inode);
10771+ goto out;
10772+ }
10773+
10774+ dname = &d->d_name;
10775+ if (dname->len == nlen && !memcmp(dname->name, name, nlen))
10776+ err = hin_gen_tree(d);
10777+ dput(d);
10778+ }
10779+
10780+ out:
10781+ AuTraceErr(err);
10782+ return err;
10783+}
10784+
10785+static int hin_gen_by_name(struct dentry *dentry, const unsigned int isdir)
10786+{
10787+ int err;
10788+ struct inode *inode;
10789+
10790+ inode = dentry->d_inode;
10791+ if (IS_ROOT(dentry)
10792+ /* || (inode && inode->i_ino == AUFS_ROOT_INO) */
10793+ ) {
10794+ AuWarn("branch root dir was changed\n");
10795+ return 0;
10796+ }
10797+
10798+ err = 0;
10799+ if (!isdir) {
10800+ d_drop(dentry);
10801+ au_digen_dec(dentry);
10802+ if (inode)
10803+ au_iigen_dec(inode);
10804+ } else {
10805+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIRS);
10806+ if (inode)
10807+ err = hin_gen_tree(dentry);
10808+ }
10809+
10810+ AuTraceErr(err);
10811+ return err;
10812+}
10813+
10814+/* ---------------------------------------------------------------------- */
10815+
10816+/* hinotify job flags */
10817+#define AuHinJob_XINO0 1
10818+#define AuHinJob_GEN (1 << 1)
10819+#define AuHinJob_DIRENT (1 << 2)
10820+#define AuHinJob_ISDIR (1 << 3)
10821+#define AuHinJob_TRYXINO0 (1 << 4)
10822+#define AuHinJob_MNTPNT (1 << 5)
10823+#define au_ftest_hinjob(flags, name) ((flags) & AuHinJob_##name)
10824+#define au_fset_hinjob(flags, name) { (flags) |= AuHinJob_##name; }
10825+#define au_fclr_hinjob(flags, name) { (flags) &= ~AuHinJob_##name; }
10826+
10827+struct hin_job_args {
10828+ unsigned int flags;
10829+ struct inode *inode, *h_inode, *dir, *h_dir;
10830+ struct dentry *dentry;
10831+ char *h_name;
10832+ int h_nlen;
10833+};
10834+
10835+static int hin_job(struct hin_job_args *a)
10836+{
10837+ const unsigned int isdir = au_ftest_hinjob(a->flags, ISDIR);
10838+
10839+ /* reset xino */
10840+ if (au_ftest_hinjob(a->flags, XINO0) && a->inode)
10841+ hin_xino(a->inode, a->h_inode); /* ignore this error */
10842+
10843+ if (au_ftest_hinjob(a->flags, TRYXINO0)
10844+ && a->inode
10845+ && a->h_inode) {
10846+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
10847+ if (!a->h_inode->i_nlink)
10848+ hin_xino(a->inode, a->h_inode); /* ignore this error */
10849+ mutex_unlock(&a->h_inode->i_mutex);
10850+ }
10851+
10852+ /* make the generation obsolete */
10853+ if (au_ftest_hinjob(a->flags, GEN)) {
10854+ int err = -1;
10855+ if (a->inode)
10856+ err = hin_gen_by_inode(a->h_name, a->h_nlen, a->inode,
10857+ isdir);
10858+ if (err && a->dentry)
10859+ hin_gen_by_name(a->dentry, isdir);
10860+ /* ignore this error */
10861+ }
10862+
10863+ /* make dir entries obsolete */
10864+ if (au_ftest_hinjob(a->flags, DIRENT) && a->inode) {
10865+ struct au_vdir *vdir;
10866+
10867+ vdir = au_ivdir(a->inode);
10868+ if (vdir)
10869+ vdir->vd_jiffy = 0;
10870+ /* IMustLock(a->inode); */
10871+ /* a->inode->i_version++; */
10872+ }
10873+
10874+ /* can do nothing but warn */
10875+ if (au_ftest_hinjob(a->flags, MNTPNT)
10876+ && a->dentry
10877+ && d_mountpoint(a->dentry))
10878+ AuWarn("mount-point %.*s is removed or renamed\n",
10879+ AuDLNPair(a->dentry));
10880+
10881+ return 0;
10882+}
10883+
10884+/* ---------------------------------------------------------------------- */
10885+
10886+static char *in_name(u32 mask)
10887+{
10888+#ifdef CONFIG_AUFS_DEBUG
10889+#define test_ret(flag) if (mask & flag) \
10890+ return #flag;
10891+ test_ret(IN_ACCESS);
10892+ test_ret(IN_MODIFY);
10893+ test_ret(IN_ATTRIB);
10894+ test_ret(IN_CLOSE_WRITE);
10895+ test_ret(IN_CLOSE_NOWRITE);
10896+ test_ret(IN_OPEN);
10897+ test_ret(IN_MOVED_FROM);
10898+ test_ret(IN_MOVED_TO);
10899+ test_ret(IN_CREATE);
10900+ test_ret(IN_DELETE);
10901+ test_ret(IN_DELETE_SELF);
10902+ test_ret(IN_MOVE_SELF);
10903+ test_ret(IN_UNMOUNT);
10904+ test_ret(IN_Q_OVERFLOW);
10905+ test_ret(IN_IGNORED);
10906+ return "";
10907+#undef test_ret
10908+#else
10909+ return "??";
10910+#endif
10911+}
10912+
10913+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
10914+ struct inode *dir)
10915+{
10916+ struct dentry *dentry, *d, *parent;
10917+ struct qstr *dname;
10918+
10919+ parent = d_find_alias(dir);
10920+ if (!parent)
10921+ return NULL;
10922+
10923+ dentry = NULL;
10924+ spin_lock(&dcache_lock);
10925+ list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) {
10926+ /* AuDbg("%.*s\n", AuDLNPair(d)); */
10927+ dname = &d->d_name;
10928+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
10929+ continue;
10930+ if (!atomic_read(&d->d_count) || !d->d_fsdata) {
10931+ spin_lock(&d->d_lock);
10932+ __d_drop(d);
10933+ spin_unlock(&d->d_lock);
10934+ continue;
10935+ }
10936+
10937+ dentry = dget(d);
10938+ break;
10939+ }
10940+ spin_unlock(&dcache_lock);
10941+ dput(parent);
10942+
10943+ if (dentry)
10944+ di_write_lock_child(dentry);
10945+
10946+ return dentry;
10947+}
10948+
10949+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
10950+ aufs_bindex_t bindex, ino_t h_ino)
10951+{
10952+ struct inode *inode;
10953+ ino_t ino;
10954+ int err;
10955+
10956+ inode = NULL;
10957+ err = au_xino_read(sb, bindex, h_ino, &ino);
10958+ if (!err && ino)
10959+ inode = ilookup(sb, ino);
10960+ if (!inode)
10961+ goto out;
10962+
10963+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
10964+ AuWarn("wrong root branch\n");
10965+ iput(inode);
10966+ inode = NULL;
10967+ goto out;
10968+ }
10969+
10970+ ii_write_lock_child(inode);
10971+
10972+ out:
10973+ return inode;
10974+}
10975+
10976+enum { CHILD, PARENT };
10977+struct postproc_args {
10978+ struct inode *h_dir, *dir, *h_child_inode;
10979+ u32 mask;
10980+ unsigned int flags[2];
10981+ unsigned int h_child_nlen;
10982+ char h_child_name[];
10983+};
10984+
10985+static void postproc(void *_args)
10986+{
10987+ struct postproc_args *a = _args;
10988+ struct super_block *sb;
10989+ aufs_bindex_t bindex, bend, bfound;
10990+ unsigned char xino, try_iput;
10991+ int err;
10992+ struct inode *inode;
10993+ ino_t h_ino;
10994+ struct hin_job_args args;
10995+ struct dentry *dentry;
10996+ struct au_sbinfo *sbinfo;
10997+
10998+ AuDebugOn(!_args);
10999+ AuDebugOn(!a->h_dir);
11000+ AuDebugOn(!a->dir);
11001+ AuDebugOn(!a->mask);
11002+ AuDbg("mask 0x%x %s, i%lu, hi%lu, hci%lu\n",
11003+ a->mask, in_name(a->mask), a->dir->i_ino, a->h_dir->i_ino,
11004+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
11005+
11006+ inode = NULL;
11007+ dentry = NULL;
11008+ /*
11009+ * do not lock a->dir->i_mutex here
11010+ * because of d_revalidate() may cause a deadlock.
11011+ */
11012+ sb = a->dir->i_sb;
11013+ AuDebugOn(!sb);
11014+ sbinfo = au_sbi(sb);
11015+ AuDebugOn(!sbinfo);
11016+ /* big aufs lock */
11017+ si_noflush_write_lock(sb);
11018+
11019+ ii_read_lock_parent(a->dir);
11020+ bfound = -1;
11021+ bend = au_ibend(a->dir);
11022+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
11023+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
11024+ bfound = bindex;
11025+ break;
11026+ }
11027+ ii_read_unlock(a->dir);
11028+ if (unlikely(bfound < 0))
11029+ goto out;
11030+
11031+ xino = !!au_opt_test(au_mntflags(sb), XINO);
11032+ h_ino = 0;
11033+ if (a->h_child_inode)
11034+ h_ino = a->h_child_inode->i_ino;
11035+
11036+ if (a->h_child_nlen
11037+ && (au_ftest_hinjob(a->flags[CHILD], GEN)
11038+ || au_ftest_hinjob(a->flags[CHILD], MNTPNT)))
11039+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
11040+ a->dir);
11041+ try_iput = 0;
11042+ if (dentry)
11043+ inode = dentry->d_inode;
11044+ if (xino && !inode && h_ino
11045+ && (au_ftest_hinjob(a->flags[CHILD], XINO0)
11046+ || au_ftest_hinjob(a->flags[CHILD], TRYXINO0)
11047+ || au_ftest_hinjob(a->flags[CHILD], GEN))) {
11048+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
11049+ try_iput = 1;
11050+ }
11051+
11052+ args.flags = a->flags[CHILD];
11053+ args.dentry = dentry;
11054+ args.inode = inode;
11055+ args.h_inode = a->h_child_inode;
11056+ args.dir = a->dir;
11057+ args.h_dir = a->h_dir;
11058+ args.h_name = a->h_child_name;
11059+ args.h_nlen = a->h_child_nlen;
11060+ err = hin_job(&args);
11061+ if (dentry) {
11062+ if (dentry->d_fsdata)
11063+ di_write_unlock(dentry);
11064+ dput(dentry);
11065+ }
11066+ if (inode && try_iput) {
11067+ ii_write_unlock(inode);
11068+ iput(inode);
11069+ }
11070+
11071+ ii_write_lock_parent(a->dir);
11072+ args.flags = a->flags[PARENT];
11073+ args.dentry = NULL;
11074+ args.inode = a->dir;
11075+ args.h_inode = a->h_dir;
11076+ args.dir = NULL;
11077+ args.h_dir = NULL;
11078+ args.h_name = NULL;
11079+ args.h_nlen = 0;
11080+ err = hin_job(&args);
11081+ ii_write_unlock(a->dir);
11082+
11083+ out:
11084+ au_nwt_done(&sbinfo->si_nowait);
11085+ si_write_unlock(sb);
11086+
11087+ iput(a->h_child_inode);
11088+ iput(a->h_dir);
11089+ iput(a->dir);
11090+ kfree(a);
11091+}
11092+
11093+/* ---------------------------------------------------------------------- */
11094+
11095+static void aufs_inotify(struct inotify_watch *watch, u32 wd __maybe_unused,
11096+ u32 mask, u32 cookie __maybe_unused,
11097+ const char *h_child_name, struct inode *h_child_inode)
11098+{
11099+ struct au_hinotify *hinotify;
11100+ struct postproc_args *args;
11101+ int len, wkq_err;
11102+ unsigned char isdir, isroot, wh;
11103+ char *p;
11104+ struct inode *dir;
11105+ unsigned int flags[2];
11106+
11107+ /* if IN_UNMOUNT happens, there must be another bug */
11108+ AuDebugOn(mask & IN_UNMOUNT);
11109+ if (mask & (IN_IGNORED | IN_UNMOUNT)) {
11110+ put_inotify_watch(watch);
11111+ return;
11112+ }
11113+#ifdef AuDbgHinotify
11114+ au_debug(1);
11115+ if (1 || !h_child_name || strcmp(h_child_name, AUFS_XINO_FNAME)) {
11116+ AuDbg("i%lu, wd %d, mask 0x%x %s, cookie 0x%x, hcname %s,"
11117+ " hi%lu\n",
11118+ watch->inode->i_ino, wd, mask, in_name(mask), cookie,
11119+ h_child_name ? h_child_name : "",
11120+ h_child_inode ? h_child_inode->i_ino : 0);
11121+ WARN_ON(1);
11122+ }
11123+ au_debug(0);
11124+#endif
11125+
11126+ hinotify = container_of(watch, struct au_hinotify, hin_watch);
11127+ AuDebugOn(!hinotify || !hinotify->hin_aufs_inode);
11128+ dir = igrab(hinotify->hin_aufs_inode);
11129+ if (!dir)
11130+ return;
11131+
11132+ isroot = (dir->i_ino == AUFS_ROOT_INO);
11133+ len = 0;
11134+ wh = 0;
11135+ if (h_child_name) {
11136+ len = strlen(h_child_name);
11137+ if (!memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
11138+ h_child_name += AUFS_WH_PFX_LEN;
11139+ len -= AUFS_WH_PFX_LEN;
11140+ wh = 1;
11141+ }
11142+ }
11143+
11144+ isdir = 0;
11145+ if (h_child_inode)
11146+ isdir = !!S_ISDIR(h_child_inode->i_mode);
11147+ flags[PARENT] = AuHinJob_ISDIR;
11148+ flags[CHILD] = 0;
11149+ if (isdir)
11150+ flags[CHILD] = AuHinJob_ISDIR;
11151+ switch (mask & IN_ALL_EVENTS) {
11152+ case IN_MOVED_FROM:
11153+ case IN_MOVED_TO:
11154+ AuDebugOn(!h_child_name || !h_child_inode);
11155+ au_fset_hinjob(flags[CHILD], GEN);
11156+ au_fset_hinjob(flags[CHILD], XINO0);
11157+ au_fset_hinjob(flags[CHILD], MNTPNT);
11158+ au_fset_hinjob(flags[PARENT], DIRENT);
11159+ break;
11160+
11161+ case IN_CREATE:
11162+ AuDebugOn(!h_child_name || !h_child_inode);
11163+ au_fset_hinjob(flags[PARENT], DIRENT);
11164+ au_fset_hinjob(flags[CHILD], GEN);
11165+ break;
11166+
11167+ case IN_DELETE:
11168+ /*
11169+ * aufs never be able to get this child inode.
11170+ * revalidation should be in d_revalidate()
11171+ * by checking i_nlink, i_generation or d_unhashed().
11172+ */
11173+ AuDebugOn(!h_child_name);
11174+ au_fset_hinjob(flags[PARENT], DIRENT);
11175+ au_fset_hinjob(flags[CHILD], GEN);
11176+ au_fset_hinjob(flags[CHILD], TRYXINO0);
11177+ au_fset_hinjob(flags[CHILD], MNTPNT);
11178+ break;
11179+
11180+ default:
11181+ AuDebugOn(1);
11182+ }
11183+
11184+ if (wh)
11185+ h_child_inode = NULL;
11186+
11187+ /* iput() and kfree() will be called in postproc() */
11188+ /*
11189+ * inotify_mutex is already acquired and kmalloc/prune_icache may lock
11190+ * iprune_mutex. strange.
11191+ */
11192+ lockdep_off();
11193+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
11194+ lockdep_on();
11195+ if (unlikely(!args)) {
11196+ AuErr1("no memory\n");
11197+ iput(dir);
11198+ return;
11199+ }
11200+ args->flags[PARENT] = flags[PARENT];
11201+ args->flags[CHILD] = flags[CHILD];
11202+ args->mask = mask;
11203+ args->dir = dir;
11204+ args->h_dir = igrab(watch->inode);
11205+ if (h_child_inode)
11206+ h_child_inode = igrab(h_child_inode); /* can be NULL */
11207+ args->h_child_inode = h_child_inode;
11208+ args->h_child_nlen = len;
11209+ if (len) {
11210+ p = (void *)args;
11211+ p += sizeof(*args);
11212+ memcpy(p, h_child_name, len + 1);
11213+ }
11214+
11215+ lockdep_off();
11216+ wkq_err = au_wkq_nowait(postproc, args, dir->i_sb);
11217+ lockdep_on();
11218+ if (unlikely(wkq_err))
11219+ AuErr("wkq %d\n", wkq_err);
11220+}
11221+
11222+static void aufs_inotify_destroy(struct inotify_watch *watch __maybe_unused)
11223+{
11224+ return;
11225+}
11226+
11227+static struct inotify_operations aufs_inotify_ops = {
11228+ .handle_event = aufs_inotify,
11229+ .destroy_watch = aufs_inotify_destroy
11230+};
11231+
11232+/* ---------------------------------------------------------------------- */
11233+
11234+static void au_hin_destroy_cache(void)
11235+{
11236+ kmem_cache_destroy(au_cachep[AuCache_HINOTIFY]);
11237+ au_cachep[AuCache_HINOTIFY] = NULL;
11238+}
11239+
11240+int __init au_hinotify_init(void)
11241+{
11242+ int err;
11243+
11244+ err = -ENOMEM;
11245+ au_cachep[AuCache_HINOTIFY] = AuCache(au_hinotify);
11246+ if (au_cachep[AuCache_HINOTIFY]) {
11247+ err = 0;
11248+ au_hin_handle = inotify_init(&aufs_inotify_ops);
11249+ if (IS_ERR(au_hin_handle)) {
11250+ err = PTR_ERR(au_hin_handle);
11251+ au_hin_destroy_cache();
11252+ }
11253+ }
11254+ AuTraceErr(err);
11255+ return err;
11256+}
11257+
11258+void au_hinotify_fin(void)
11259+{
11260+ inotify_destroy(au_hin_handle);
11261+ if (au_cachep[AuCache_HINOTIFY])
11262+ au_hin_destroy_cache();
11263+}
1308ab2a 11264diff -uprN -x .git linux-2.6.31/fs/aufs/i_op.c aufs2-2.6.git/fs/aufs/i_op.c
11265--- linux-2.6.31/fs/aufs/i_op.c 1970-01-01 00:00:00.000000000 +0000
11266+++ aufs2-2.6.git/fs/aufs/i_op.c 2009-09-21 21:49:23.401607657 +0000
11267@@ -0,0 +1,883 @@
1facf9fc 11268+/*
11269+ * Copyright (C) 2005-2009 Junjiro R. Okajima
11270+ *
11271+ * This program, aufs is free software; you can redistribute it and/or modify
11272+ * it under the terms of the GNU General Public License as published by
11273+ * the Free Software Foundation; either version 2 of the License, or
11274+ * (at your option) any later version.
dece6358
AM
11275+ *
11276+ * This program is distributed in the hope that it will be useful,
11277+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11278+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11279+ * GNU General Public License for more details.
11280+ *
11281+ * You should have received a copy of the GNU General Public License
11282+ * along with this program; if not, write to the Free Software
11283+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 11284+ */
11285+
11286+/*
1308ab2a 11287+ * inode operations (except add/del/rename)
1facf9fc 11288+ */
11289+
1308ab2a 11290+#include <linux/device_cgroup.h>
11291+#include <linux/fs_stack.h>
11292+#include <linux/mm.h>
11293+#include <linux/namei.h>
11294+#include <linux/security.h>
11295+#include <linux/uaccess.h>
11296+#include "aufs.h"
1facf9fc 11297+
1308ab2a 11298+static int h_permission(struct inode *h_inode, int mask,
11299+ struct vfsmount *h_mnt, int brperm)
dece6358 11300+{
1308ab2a 11301+ int err;
11302+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
1facf9fc 11303+
1308ab2a 11304+ err = -EACCES;
11305+ if ((write_mask && IS_IMMUTABLE(h_inode))
11306+ || ((mask & MAY_EXEC)
11307+ && S_ISREG(h_inode->i_mode)
11308+ && ((h_mnt->mnt_flags & MNT_NOEXEC)
11309+ || !(h_inode->i_mode & S_IXUGO))))
11310+ goto out;
1facf9fc 11311+
1308ab2a 11312+ /*
11313+ * - skip the lower fs test in the case of write to ro branch.
11314+ * - nfs dir permission write check is optimized, but a policy for
11315+ * link/rename requires a real check.
11316+ */
11317+ if ((write_mask && !au_br_writable(brperm))
11318+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
11319+ && write_mask && !(mask & MAY_READ))
11320+ || !h_inode->i_op->permission) {
11321+ /* AuLabel(generic_permission); */
11322+ err = generic_permission(h_inode, mask, NULL);
11323+ } else {
11324+ /* AuLabel(h_inode->permission); */
11325+ err = h_inode->i_op->permission(h_inode, mask);
11326+ AuTraceErr(err);
11327+ }
1facf9fc 11328+
1308ab2a 11329+ if (!err)
11330+ err = devcgroup_inode_permission(h_inode, mask);
11331+ if (!err)
11332+ err = security_inode_permission
11333+ (h_inode, mask & (MAY_READ | MAY_WRITE | MAY_EXEC
11334+ | MAY_APPEND));
1facf9fc 11335+
1308ab2a 11336+#if 0
11337+ if (!err) {
11338+ /* todo: do we need to call ima_path_check()? */
11339+ struct path h_path = {
11340+ .dentry =
11341+ .mnt = h_mnt
11342+ };
11343+ err = ima_path_check(&h_path,
11344+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
11345+ IMA_COUNT_LEAVE);
11346+ }
11347+#endif
11348+
11349+ out:
11350+ return err;
dece6358 11351+}
1facf9fc 11352+
1308ab2a 11353+static int aufs_permission(struct inode *inode, int mask)
dece6358 11354+{
1308ab2a 11355+ int err;
11356+ aufs_bindex_t bindex, bend;
11357+ const unsigned char isdir = !!S_ISDIR(inode->i_mode);
11358+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
11359+ struct inode *h_inode;
11360+ struct super_block *sb;
11361+ struct au_branch *br;
1facf9fc 11362+
1308ab2a 11363+ sb = inode->i_sb;
11364+ si_read_lock(sb, AuLock_FLUSH);
11365+ ii_read_lock_child(inode);
dece6358 11366+
1308ab2a 11367+ if (!isdir || write_mask) {
11368+ err = au_busy_or_stale();
11369+ h_inode = au_h_iptr(inode, au_ibstart(inode));
11370+ if (unlikely(!h_inode
11371+ || (h_inode->i_mode & S_IFMT)
11372+ != (inode->i_mode & S_IFMT)))
11373+ goto out;
dece6358 11374+
1308ab2a 11375+ err = 0;
11376+ bindex = au_ibstart(inode);
dece6358 11377+ br = au_sbr(sb, bindex);
1308ab2a 11378+ err = h_permission(h_inode, mask, br->br_mnt, br->br_perm);
11379+ if (write_mask && !err) {
11380+ /* test whether the upper writable branch exists */
11381+ err = -EROFS;
11382+ for (; bindex >= 0; bindex--)
11383+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
11384+ err = 0;
11385+ break;
11386+ }
1facf9fc 11387+ }
1308ab2a 11388+ goto out;
11389+ }
1facf9fc 11390+
1308ab2a 11391+ /* non-write to dir */
11392+ err = 0;
11393+ bend = au_ibend(inode);
11394+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
11395+ h_inode = au_h_iptr(inode, bindex);
11396+ if (h_inode) {
11397+ err = au_busy_or_stale();
11398+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
11399+ break;
11400+
11401+ br = au_sbr(sb, bindex);
11402+ err = h_permission(h_inode, mask, br->br_mnt,
11403+ br->br_perm);
1facf9fc 11404+ }
11405+ }
dece6358 11406+
1308ab2a 11407+ out:
11408+ ii_read_unlock(inode);
11409+ si_read_unlock(sb);
11410+ return err;
1facf9fc 11411+}
11412+
1308ab2a 11413+/* ---------------------------------------------------------------------- */
1facf9fc 11414+
1308ab2a 11415+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
11416+ struct nameidata *nd)
1facf9fc 11417+{
1308ab2a 11418+ struct dentry *ret, *parent;
11419+ struct inode *inode, *h_inode;
11420+ struct mutex *mtx;
11421+ struct super_block *sb;
11422+ int err, npositive;
11423+ aufs_bindex_t bstart;
1facf9fc 11424+
1308ab2a 11425+ IMustLock(dir);
1facf9fc 11426+
1308ab2a 11427+ sb = dir->i_sb;
11428+ si_read_lock(sb, AuLock_FLUSH);
11429+ err = au_alloc_dinfo(dentry);
11430+ ret = ERR_PTR(err);
11431+ if (unlikely(err))
11432+ goto out;
1facf9fc 11433+
1308ab2a 11434+ parent = dentry->d_parent; /* dir inode is locked */
11435+ di_read_lock_parent(parent, AuLock_IR);
11436+ npositive = au_lkup_dentry(dentry, au_dbstart(parent), /*type*/0, nd);
11437+ di_read_unlock(parent, AuLock_IR);
11438+ err = npositive;
11439+ ret = ERR_PTR(err);
11440+ if (unlikely(err < 0))
11441+ goto out_unlock;
1facf9fc 11442+
1308ab2a 11443+ inode = NULL;
11444+ if (npositive) {
11445+ bstart = au_dbstart(dentry);
11446+ h_inode = au_h_dptr(dentry, bstart)->d_inode;
11447+ if (!S_ISDIR(h_inode->i_mode)) {
11448+ /*
11449+ * stop 'race'-ing between hardlinks under different
11450+ * parents.
11451+ */
11452+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
11453+ mutex_lock(mtx);
11454+ inode = au_new_inode(dentry, /*must_new*/0);
11455+ mutex_unlock(mtx);
11456+ } else
11457+ inode = au_new_inode(dentry, /*must_new*/0);
11458+ ret = (void *)inode;
1facf9fc 11459+ }
1308ab2a 11460+ if (IS_ERR(inode))
11461+ goto out_unlock;
1facf9fc 11462+
1308ab2a 11463+ ret = d_splice_alias(inode, dentry);
11464+ if (unlikely(IS_ERR(ret) && inode))
11465+ ii_write_unlock(inode);
1facf9fc 11466+
1308ab2a 11467+ out_unlock:
11468+ di_write_unlock(dentry);
11469+ out:
11470+ si_read_unlock(sb);
11471+ return ret;
1facf9fc 11472+}
11473+
11474+/* ---------------------------------------------------------------------- */
11475+
1308ab2a 11476+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
11477+ const unsigned char add_entry, aufs_bindex_t bcpup,
11478+ aufs_bindex_t bstart)
1facf9fc 11479+{
1308ab2a 11480+ int err;
11481+ struct dentry *h_parent;
11482+ struct inode *h_dir;
1facf9fc 11483+
1308ab2a 11484+ if (add_entry) {
11485+ au_update_dbstart(dentry);
11486+ IMustLock(parent->d_inode);
11487+ } else
11488+ di_write_lock_parent(parent);
1facf9fc 11489+
1308ab2a 11490+ err = 0;
11491+ if (!au_h_dptr(parent, bcpup)) {
11492+ if (bstart < bcpup)
11493+ err = au_cpdown_dirs(dentry, bcpup);
11494+ else
11495+ err = au_cpup_dirs(dentry, bcpup);
1facf9fc 11496+ }
1308ab2a 11497+ if (!err && add_entry) {
11498+ h_parent = au_h_dptr(parent, bcpup);
11499+ h_dir = h_parent->d_inode;
11500+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
11501+ err = au_lkup_neg(dentry, bcpup);
11502+ /* todo: no unlock here */
11503+ mutex_unlock(&h_dir->i_mutex);
11504+ if (bstart < bcpup && au_dbstart(dentry) < 0) {
11505+ au_set_dbstart(dentry, 0);
11506+ au_update_dbrange(dentry, /*do_put_zero*/0);
11507+ }
dece6358 11508+ }
1facf9fc 11509+
1308ab2a 11510+ if (!add_entry)
11511+ di_write_unlock(parent);
11512+ if (!err)
11513+ err = bcpup; /* success */
dece6358 11514+
dece6358
AM
11515+ return err;
11516+}
11517+
1308ab2a 11518+/*
11519+ * decide the branch and the parent dir where we will create a new entry.
11520+ * returns new bindex or an error.
11521+ * copyup the parent dir if needed.
11522+ */
11523+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
11524+ struct au_wr_dir_args *args)
dece6358 11525+{
1308ab2a 11526+ int err;
11527+ aufs_bindex_t bcpup, bstart, src_bstart;
11528+ const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
11529+ ADD_ENTRY);
1facf9fc 11530+ struct super_block *sb;
1308ab2a 11531+ struct dentry *parent;
11532+ struct au_sbinfo *sbinfo;
1facf9fc 11533+
1308ab2a 11534+ sb = dentry->d_sb;
11535+ sbinfo = au_sbi(sb);
11536+ parent = dget_parent(dentry);
11537+ bstart = au_dbstart(dentry);
11538+ bcpup = bstart;
11539+ if (args->force_btgt < 0) {
11540+ if (src_dentry) {
11541+ src_bstart = au_dbstart(src_dentry);
11542+ if (src_bstart < bstart)
11543+ bcpup = src_bstart;
11544+ } else if (add_entry) {
11545+ err = AuWbrCreate(sbinfo, dentry,
11546+ au_ftest_wrdir(args->flags, ISDIR));
11547+ bcpup = err;
11548+ }
dece6358 11549+
1308ab2a 11550+ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
11551+ if (add_entry)
11552+ err = AuWbrCopyup(sbinfo, dentry);
11553+ else {
11554+ if (!IS_ROOT(dentry)) {
11555+ di_read_lock_parent(parent, !AuLock_IR);
11556+ err = AuWbrCopyup(sbinfo, dentry);
11557+ di_read_unlock(parent, !AuLock_IR);
11558+ } else
11559+ err = AuWbrCopyup(sbinfo, dentry);
1facf9fc 11560+ }
1308ab2a 11561+ bcpup = err;
11562+ if (unlikely(err < 0))
11563+ goto out;
1facf9fc 11564+ }
1308ab2a 11565+ } else {
11566+ bcpup = args->force_btgt;
11567+ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
1facf9fc 11568+ }
1308ab2a 11569+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
11570+ if (bstart < bcpup)
11571+ au_update_dbrange(dentry, /*do_put_zero*/1);
1facf9fc 11572+
1308ab2a 11573+ err = bcpup;
11574+ if (bcpup == bstart)
11575+ goto out; /* success */
1facf9fc 11576+
1308ab2a 11577+ /* copyup the new parent into the branch we process */
11578+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
1facf9fc 11579+
1308ab2a 11580+ out:
11581+ dput(parent);
11582+ return err;
11583+}
dece6358 11584+
1308ab2a 11585+/* ---------------------------------------------------------------------- */
11586+
11587+struct dentry *au_pinned_h_parent(struct au_pin *pin)
1facf9fc 11588+{
1308ab2a 11589+ if (pin && pin->parent)
11590+ return au_h_dptr(pin->parent, pin->bindex);
11591+ return NULL;
1facf9fc 11592+}
11593+
1308ab2a 11594+void au_unpin(struct au_pin *p)
1facf9fc 11595+{
1308ab2a 11596+ if (au_ftest_pin(p->flags, MNT_WRITE))
11597+ mnt_drop_write(p->h_mnt);
11598+ if (!p->hdir)
11599+ return;
11600+
11601+ au_hin_imtx_unlock(p->hdir);
11602+ if (!au_ftest_pin(p->flags, DI_LOCKED))
11603+ di_read_unlock(p->parent, AuLock_IR);
11604+ iput(p->hdir->hi_inode);
11605+ dput(p->parent);
11606+ p->parent = NULL;
11607+ p->hdir = NULL;
11608+ p->h_mnt = NULL;
1facf9fc 11609+}
11610+
1308ab2a 11611+int au_do_pin(struct au_pin *p)
1facf9fc 11612+{
11613+ int err;
11614+ struct super_block *sb;
1308ab2a 11615+ struct dentry *h_dentry, *h_parent;
11616+ struct au_branch *br;
11617+ struct inode *h_dir;
1facf9fc 11618+
1308ab2a 11619+ err = 0;
11620+ sb = p->dentry->d_sb;
11621+ br = au_sbr(sb, p->bindex);
11622+ if (IS_ROOT(p->dentry)) {
11623+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
11624+ p->h_mnt = br->br_mnt;
11625+ err = mnt_want_write(p->h_mnt);
11626+ if (unlikely(err)) {
11627+ au_fclr_pin(p->flags, MNT_WRITE);
11628+ goto out_err;
11629+ }
11630+ }
1facf9fc 11631+ goto out;
1308ab2a 11632+ }
1facf9fc 11633+
1308ab2a 11634+ h_dentry = NULL;
11635+ if (p->bindex <= au_dbend(p->dentry))
11636+ h_dentry = au_h_dptr(p->dentry, p->bindex);
1facf9fc 11637+
1308ab2a 11638+ p->parent = dget_parent(p->dentry);
11639+ if (!au_ftest_pin(p->flags, DI_LOCKED))
11640+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
1facf9fc 11641+
1308ab2a 11642+ h_dir = NULL;
11643+ h_parent = au_h_dptr(p->parent, p->bindex);
11644+ p->hdir = au_hi(p->parent->d_inode, p->bindex);
11645+ if (p->hdir)
11646+ h_dir = p->hdir->hi_inode;
11647+
11648+ /* udba case */
11649+ if (unlikely(!p->hdir || !h_dir)) {
11650+ if (!au_ftest_pin(p->flags, DI_LOCKED))
11651+ di_read_unlock(p->parent, AuLock_IR);
11652+ dput(p->parent);
11653+ p->parent = NULL;
11654+ goto out_err;
11655+ }
11656+
11657+ au_igrab(h_dir);
11658+ au_hin_imtx_lock_nested(p->hdir, p->lsc_hi);
11659+
11660+ if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) {
11661+ err = -EBUSY;
11662+ goto out_unpin;
11663+ }
11664+ if (h_dentry) {
11665+ err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
11666+ if (unlikely(err)) {
11667+ au_fclr_pin(p->flags, MNT_WRITE);
11668+ goto out_unpin;
1facf9fc 11669+ }
1308ab2a 11670+ }
1facf9fc 11671+
1308ab2a 11672+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
11673+ p->h_mnt = br->br_mnt;
11674+ err = mnt_want_write(p->h_mnt);
11675+ if (unlikely(err)) {
11676+ au_fclr_pin(p->flags, MNT_WRITE);
11677+ goto out_unpin;
1facf9fc 11678+ }
11679+ }
1308ab2a 11680+ goto out; /* success */
1facf9fc 11681+
1308ab2a 11682+ out_unpin:
11683+ au_unpin(p);
11684+ out_err:
11685+ AuErr("err %d\n", err);
11686+ err = au_busy_or_stale();
1facf9fc 11687+ out:
11688+ return err;
11689+}
11690+
1308ab2a 11691+void au_pin_init(struct au_pin *p, struct dentry *dentry,
11692+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
11693+ unsigned int udba, unsigned char flags)
1facf9fc 11694+{
1308ab2a 11695+ p->dentry = dentry;
11696+ p->udba = udba;
11697+ p->lsc_di = lsc_di;
11698+ p->lsc_hi = lsc_hi;
11699+ p->flags = flags;
11700+ p->bindex = bindex;
1facf9fc 11701+
1308ab2a 11702+ p->parent = NULL;
11703+ p->hdir = NULL;
11704+ p->h_mnt = NULL;
11705+}
1facf9fc 11706+
1308ab2a 11707+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
11708+ unsigned int udba, unsigned char flags)
11709+{
11710+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
11711+ udba, flags);
11712+ return au_do_pin(pin);
11713+}
1facf9fc 11714+
1308ab2a 11715+/* ---------------------------------------------------------------------- */
1facf9fc 11716+
1308ab2a 11717+#define AuIcpup_DID_CPUP 1
11718+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
11719+#define au_fset_icpup(flags, name) { (flags) |= AuIcpup_##name; }
11720+#define au_fclr_icpup(flags, name) { (flags) &= ~AuIcpup_##name; }
11721+
11722+struct au_icpup_args {
11723+ unsigned char flags;
11724+ unsigned char pin_flags;
11725+ aufs_bindex_t btgt;
11726+ struct au_pin pin;
11727+ struct path h_path;
11728+ struct inode *h_inode;
11729+};
11730+
11731+static int au_lock_and_icpup(struct dentry *dentry, struct iattr *ia,
11732+ struct au_icpup_args *a)
11733+{
11734+ int err;
11735+ unsigned int udba;
11736+ loff_t sz;
11737+ aufs_bindex_t bstart;
11738+ struct dentry *hi_wh, *parent;
11739+ struct inode *inode;
11740+ struct au_wr_dir_args wr_dir_args = {
11741+ .force_btgt = -1,
11742+ .flags = 0
11743+ };
11744+
11745+ di_write_lock_child(dentry);
11746+ bstart = au_dbstart(dentry);
11747+ inode = dentry->d_inode;
11748+ if (S_ISDIR(inode->i_mode))
11749+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
11750+ /* plink or hi_wh() case */
11751+ if (bstart != au_ibstart(inode))
11752+ wr_dir_args.force_btgt = au_ibstart(inode);
11753+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
11754+ if (unlikely(err < 0))
11755+ goto out_dentry;
11756+ a->btgt = err;
11757+ if (err != bstart)
11758+ au_fset_icpup(a->flags, DID_CPUP);
11759+
11760+ err = 0;
11761+ a->pin_flags = AuPin_MNT_WRITE;
11762+ parent = NULL;
11763+ if (!IS_ROOT(dentry)) {
11764+ au_fset_pin(a->pin_flags, DI_LOCKED);
11765+ parent = dget_parent(dentry);
11766+ di_write_lock_parent(parent);
11767+ }
11768+
11769+ udba = au_opt_udba(dentry->d_sb);
11770+ if (d_unhashed(dentry) || (ia->ia_valid & ATTR_FILE))
11771+ udba = AuOpt_UDBA_NONE;
11772+ err = au_pin(&a->pin, dentry, a->btgt, udba, a->pin_flags);
11773+ if (unlikely(err)) {
11774+ if (parent) {
11775+ di_write_unlock(parent);
11776+ dput(parent);
dece6358 11777+ }
1308ab2a 11778+ goto out_dentry;
dece6358 11779+ }
1308ab2a 11780+ a->h_path.dentry = au_h_dptr(dentry, bstart);
11781+ a->h_inode = a->h_path.dentry->d_inode;
11782+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
11783+ sz = -1;
11784+ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
11785+ sz = ia->ia_size;
1facf9fc 11786+
1308ab2a 11787+ hi_wh = NULL;
11788+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unhashed(dentry)) {
11789+ hi_wh = au_hi_wh(inode, a->btgt);
11790+ if (!hi_wh) {
11791+ err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
11792+ if (unlikely(err))
11793+ goto out_unlock;
11794+ hi_wh = au_hi_wh(inode, a->btgt);
11795+ /* todo: revalidate hi_wh? */
11796+ }
11797+ }
1facf9fc 11798+
1308ab2a 11799+ if (parent) {
11800+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
11801+ di_downgrade_lock(parent, AuLock_IR);
11802+ dput(parent);
11803+ }
11804+ if (!au_ftest_icpup(a->flags, DID_CPUP))
11805+ goto out; /* success */
11806+
11807+ if (!d_unhashed(dentry)) {
11808+ err = au_sio_cpup_simple(dentry, a->btgt, sz, AuCpup_DTIME);
11809+ if (!err)
11810+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
11811+ } else if (!hi_wh)
11812+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
11813+ else
11814+ a->h_path.dentry = hi_wh; /* do not dget here */
dece6358 11815+
1308ab2a 11816+ out_unlock:
11817+ mutex_unlock(&a->h_inode->i_mutex);
11818+ a->h_inode = a->h_path.dentry->d_inode;
11819+ if (!err) {
11820+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
11821+ goto out; /* success */
11822+ }
11823+
11824+ au_unpin(&a->pin);
11825+
11826+ out_dentry:
11827+ di_write_unlock(dentry);
dece6358
AM
11828+ out:
11829+ return err;
11830+}
11831+
1308ab2a 11832+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
1facf9fc 11833+{
11834+ int err;
1308ab2a 11835+ struct inode *inode;
11836+ struct super_block *sb;
11837+ struct file *file;
11838+ struct au_icpup_args *a;
1facf9fc 11839+
1308ab2a 11840+ err = -ENOMEM;
11841+ a = kzalloc(sizeof(*a), GFP_NOFS);
11842+ if (unlikely(!a))
dece6358 11843+ goto out;
1facf9fc 11844+
1308ab2a 11845+ inode = dentry->d_inode;
11846+ IMustLock(inode);
11847+ sb = dentry->d_sb;
11848+ si_read_lock(sb, AuLock_FLUSH);
1facf9fc 11849+
1308ab2a 11850+ file = NULL;
11851+ if (ia->ia_valid & ATTR_FILE) {
11852+ /* currently ftruncate(2) only */
11853+ file = ia->ia_file;
11854+ fi_write_lock(file);
11855+ ia->ia_file = au_h_fptr(file, au_fbstart(file));
11856+ }
1facf9fc 11857+
1308ab2a 11858+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
11859+ ia->ia_valid &= ~ATTR_MODE;
1facf9fc 11860+
1308ab2a 11861+ err = au_lock_and_icpup(dentry, ia, a);
11862+ if (unlikely(err < 0))
11863+ goto out_si;
11864+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
11865+ ia->ia_file = NULL;
11866+ ia->ia_valid &= ~ATTR_FILE;
11867+ }
1facf9fc 11868+
1308ab2a 11869+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
11870+ if (ia->ia_valid & ATTR_SIZE) {
11871+ struct file *f;
1facf9fc 11872+
1308ab2a 11873+ if (ia->ia_size < i_size_read(inode)) {
11874+ /* unmap only */
11875+ err = vmtruncate(inode, ia->ia_size);
11876+ if (unlikely(err))
11877+ goto out_unlock;
dece6358 11878+ }
1facf9fc 11879+
1308ab2a 11880+ f = NULL;
11881+ if (ia->ia_valid & ATTR_FILE)
11882+ f = ia->ia_file;
11883+ mutex_unlock(&a->h_inode->i_mutex);
11884+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
11885+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
11886+ } else
11887+ err = vfsub_notify_change(&a->h_path, ia);
11888+ if (!err)
11889+ au_cpup_attr_changeable(inode);
11890+
11891+ out_unlock:
11892+ mutex_unlock(&a->h_inode->i_mutex);
11893+ au_unpin(&a->pin);
11894+ di_write_unlock(dentry);
11895+ out_si:
11896+ if (file) {
11897+ fi_write_unlock(file);
11898+ ia->ia_file = file;
11899+ ia->ia_valid |= ATTR_FILE;
11900+ }
11901+ si_read_unlock(sb);
11902+ kfree(a);
dece6358
AM
11903+ out:
11904+ return err;
11905+}
1facf9fc 11906+
1308ab2a 11907+static int au_getattr_lock_reval(struct dentry *dentry, unsigned int sigen)
dece6358 11908+{
1308ab2a 11909+ int err;
dece6358 11910+ struct inode *inode;
1308ab2a 11911+ struct dentry *parent;
dece6358 11912+
1308ab2a 11913+ err = 0;
11914+ inode = dentry->d_inode;
11915+ di_write_lock_child(dentry);
11916+ if (au_digen(dentry) != sigen || au_iigen(inode) != sigen) {
11917+ parent = dget_parent(dentry);
11918+ di_read_lock_parent(parent, AuLock_IR);
11919+ /* returns a number of positive dentries */
11920+ err = au_refresh_hdentry(dentry, inode->i_mode & S_IFMT);
11921+ if (err >= 0)
11922+ err = au_refresh_hinode(inode, dentry);
11923+ di_read_unlock(parent, AuLock_IR);
11924+ dput(parent);
1facf9fc 11925+ }
1308ab2a 11926+ di_downgrade_lock(dentry, AuLock_IR);
11927+ if (unlikely(err))
11928+ di_read_unlock(dentry, AuLock_IR);
1facf9fc 11929+
1308ab2a 11930+ AuTraceErr(err);
11931+ return err;
11932+}
1facf9fc 11933+
1308ab2a 11934+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
11935+ unsigned int nlink)
11936+{
11937+ inode->i_mode = st->mode;
11938+ inode->i_uid = st->uid;
11939+ inode->i_gid = st->gid;
11940+ inode->i_atime = st->atime;
11941+ inode->i_mtime = st->mtime;
11942+ inode->i_ctime = st->ctime;
1facf9fc 11943+
1308ab2a 11944+ au_cpup_attr_nlink(inode, /*force*/0);
11945+ if (S_ISDIR(inode->i_mode)) {
11946+ inode->i_nlink -= nlink;
11947+ inode->i_nlink += st->nlink;
1facf9fc 11948+ }
dece6358 11949+
1308ab2a 11950+ spin_lock(&inode->i_lock);
11951+ inode->i_blocks = st->blocks;
11952+ i_size_write(inode, st->size);
11953+ spin_unlock(&inode->i_lock);
1facf9fc 11954+}
11955+
1308ab2a 11956+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
11957+ struct dentry *dentry, struct kstat *st)
1facf9fc 11958+{
11959+ int err;
1308ab2a 11960+ unsigned int mnt_flags;
11961+ aufs_bindex_t bindex;
11962+ unsigned char udba_none, positive;
11963+ struct super_block *sb, *h_sb;
11964+ struct inode *inode;
11965+ struct vfsmount *h_mnt;
11966+ struct dentry *h_dentry;
1facf9fc 11967+
1308ab2a 11968+ err = 0;
11969+ sb = dentry->d_sb;
11970+ inode = dentry->d_inode;
11971+ si_read_lock(sb, AuLock_FLUSH);
11972+ mnt_flags = au_mntflags(sb);
11973+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
dece6358 11974+
1308ab2a 11975+ /* support fstat(2) */
11976+ if (!d_unhashed(dentry) && !udba_none) {
11977+ unsigned int sigen = au_sigen(sb);
11978+ if (au_digen(dentry) == sigen && au_iigen(inode) == sigen)
11979+ di_read_lock_child(dentry, AuLock_IR);
11980+ else {
11981+ AuDebugOn(IS_ROOT(dentry));
11982+ err = au_getattr_lock_reval(dentry, sigen);
11983+ if (unlikely(err))
11984+ goto out;
11985+ }
11986+ } else
11987+ di_read_lock_child(dentry, AuLock_IR);
11988+
11989+ bindex = au_ibstart(inode);
11990+ h_mnt = au_sbr_mnt(sb, bindex);
11991+ h_sb = h_mnt->mnt_sb;
11992+ if (!au_test_fs_bad_iattr(h_sb) && udba_none)
11993+ goto out_fill; /* success */
11994+
11995+ h_dentry = NULL;
11996+ if (au_dbstart(dentry) == bindex)
11997+ h_dentry = dget(au_h_dptr(dentry, bindex));
11998+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
11999+ h_dentry = au_plink_lkup(inode, bindex);
12000+ if (IS_ERR(h_dentry))
12001+ goto out_fill; /* pretending success */
12002+ }
12003+ /* illegally overlapped or something */
12004+ if (unlikely(!h_dentry))
12005+ goto out_fill; /* pretending success */
12006+
12007+ positive = !!h_dentry->d_inode;
12008+ if (positive)
12009+ err = vfs_getattr(h_mnt, h_dentry, st);
12010+ dput(h_dentry);
12011+ if (!err) {
12012+ if (positive)
12013+ au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
12014+ goto out_fill; /* success */
1facf9fc 12015+ }
1308ab2a 12016+ goto out_unlock;
1facf9fc 12017+
1308ab2a 12018+ out_fill:
12019+ generic_fillattr(inode, st);
12020+ out_unlock:
12021+ di_read_unlock(dentry, AuLock_IR);
12022+ out:
12023+ si_read_unlock(sb);
1facf9fc 12024+ return err;
12025+}
12026+
1308ab2a 12027+/* ---------------------------------------------------------------------- */
12028+
12029+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
12030+ int bufsiz)
1facf9fc 12031+{
1308ab2a 12032+ int err;
12033+ struct super_block *sb;
12034+ struct dentry *h_dentry;
12035+
12036+ err = -EINVAL;
12037+ h_dentry = au_h_dptr(dentry, bindex);
12038+ if (unlikely(/* !h_dentry
12039+ || !h_dentry->d_inode
12040+ || !h_dentry->d_inode->i_op
12041+ || */ !h_dentry->d_inode->i_op->readlink))
12042+ goto out;
12043+
12044+ err = security_inode_readlink(h_dentry);
12045+ if (unlikely(err))
12046+ goto out;
12047+
12048+ sb = dentry->d_sb;
12049+ if (!au_test_ro(sb, bindex, dentry->d_inode)) {
12050+ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
12051+ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
12052+ }
12053+ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
12054+
12055+ out:
12056+ return err;
1facf9fc 12057+}
12058+
1308ab2a 12059+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
1facf9fc 12060+{
1308ab2a 12061+ int err;
12062+
12063+ aufs_read_lock(dentry, AuLock_IR);
12064+ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
12065+ aufs_read_unlock(dentry, AuLock_IR);
12066+
12067+ return err;
12068+}
12069+
12070+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
12071+{
12072+ int err;
12073+ char *buf;
12074+ mm_segment_t old_fs;
12075+
12076+ err = -ENOMEM;
12077+ buf = __getname();
12078+ if (unlikely(!buf))
12079+ goto out;
12080+
12081+ aufs_read_lock(dentry, AuLock_IR);
12082+ old_fs = get_fs();
12083+ set_fs(KERNEL_DS);
12084+ err = h_readlink(dentry, au_dbstart(dentry), (char __user *)buf,
12085+ PATH_MAX);
12086+ set_fs(old_fs);
12087+ aufs_read_unlock(dentry, AuLock_IR);
12088+
12089+ if (err >= 0) {
12090+ buf[err] = 0;
12091+ /* will be freed by put_link */
12092+ nd_set_link(nd, buf);
12093+ return NULL; /* success */
12094+ }
12095+ __putname(buf);
12096+
12097+ out:
12098+ path_put(&nd->path);
12099+ AuTraceErr(err);
12100+ return ERR_PTR(err);
12101+}
12102+
12103+static void aufs_put_link(struct dentry *dentry __maybe_unused,
12104+ struct nameidata *nd, void *cookie __maybe_unused)
12105+{
12106+ __putname(nd_get_link(nd));
12107+}
12108+
12109+/* ---------------------------------------------------------------------- */
12110+
12111+static void aufs_truncate_range(struct inode *inode __maybe_unused,
12112+ loff_t start __maybe_unused,
12113+ loff_t end __maybe_unused)
12114+{
12115+ AuUnsupport();
dece6358 12116+}
1308ab2a 12117+
12118+/* ---------------------------------------------------------------------- */
12119+
12120+struct inode_operations aufs_symlink_iop = {
12121+ .permission = aufs_permission,
12122+ .setattr = aufs_setattr,
12123+ .getattr = aufs_getattr,
12124+ .readlink = aufs_readlink,
12125+ .follow_link = aufs_follow_link,
12126+ .put_link = aufs_put_link
12127+};
12128+
12129+struct inode_operations aufs_dir_iop = {
12130+ .create = aufs_create,
12131+ .lookup = aufs_lookup,
12132+ .link = aufs_link,
12133+ .unlink = aufs_unlink,
12134+ .symlink = aufs_symlink,
12135+ .mkdir = aufs_mkdir,
12136+ .rmdir = aufs_rmdir,
12137+ .mknod = aufs_mknod,
12138+ .rename = aufs_rename,
12139+
12140+ .permission = aufs_permission,
12141+ .setattr = aufs_setattr,
12142+ .getattr = aufs_getattr
12143+};
12144+
12145+struct inode_operations aufs_iop = {
12146+ .permission = aufs_permission,
12147+ .setattr = aufs_setattr,
12148+ .getattr = aufs_getattr,
12149+ .truncate_range = aufs_truncate_range
12150+};
12151diff -uprN -x .git linux-2.6.31/fs/aufs/i_op_add.c aufs2-2.6.git/fs/aufs/i_op_add.c
12152--- linux-2.6.31/fs/aufs/i_op_add.c 1970-01-01 00:00:00.000000000 +0000
12153+++ aufs2-2.6.git/fs/aufs/i_op_add.c 2009-09-21 21:49:23.401607657 +0000
12154@@ -0,0 +1,649 @@
dece6358
AM
12155+/*
12156+ * Copyright (C) 2005-2009 Junjiro R. Okajima
12157+ *
12158+ * This program, aufs is free software; you can redistribute it and/or modify
12159+ * it under the terms of the GNU General Public License as published by
12160+ * the Free Software Foundation; either version 2 of the License, or
12161+ * (at your option) any later version.
12162+ *
12163+ * This program is distributed in the hope that it will be useful,
12164+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12165+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12166+ * GNU General Public License for more details.
12167+ *
12168+ * You should have received a copy of the GNU General Public License
12169+ * along with this program; if not, write to the Free Software
12170+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12171+ */
1facf9fc 12172+
dece6358 12173+/*
1308ab2a 12174+ * inode operations (add entry)
dece6358 12175+ */
1facf9fc 12176+
1308ab2a 12177+#include "aufs.h"
1facf9fc 12178+
1308ab2a 12179+/*
12180+ * final procedure of adding a new entry, except link(2).
12181+ * remove whiteout, instantiate, copyup the parent dir's times and size
12182+ * and update version.
12183+ * if it failed, re-create the removed whiteout.
12184+ */
12185+static int epilog(struct inode *dir, aufs_bindex_t bindex,
12186+ struct dentry *wh_dentry, struct dentry *dentry)
12187+{
12188+ int err, rerr;
12189+ aufs_bindex_t bwh;
12190+ struct path h_path;
12191+ struct inode *inode, *h_dir;
12192+ struct dentry *wh;
1facf9fc 12193+
1308ab2a 12194+ bwh = -1;
12195+ if (wh_dentry) {
12196+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
12197+ IMustLock(h_dir);
12198+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
12199+ bwh = au_dbwh(dentry);
12200+ h_path.dentry = wh_dentry;
12201+ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
12202+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
12203+ dentry);
12204+ if (unlikely(err))
12205+ goto out;
12206+ }
1facf9fc 12207+
1308ab2a 12208+ inode = au_new_inode(dentry, /*must_new*/1);
12209+ if (!IS_ERR(inode)) {
12210+ d_instantiate(dentry, inode);
12211+ dir = dentry->d_parent->d_inode; /* dir inode is locked */
12212+ IMustLock(dir);
12213+ if (au_ibstart(dir) == au_dbstart(dentry))
12214+ au_cpup_attr_timesizes(dir);
12215+ dir->i_version++;
12216+ return 0; /* success */
12217+ }
1facf9fc 12218+
1308ab2a 12219+ err = PTR_ERR(inode);
12220+ if (!wh_dentry)
12221+ goto out;
1facf9fc 12222+
1308ab2a 12223+ /* revert */
12224+ /* dir inode is locked */
12225+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
12226+ rerr = PTR_ERR(wh);
12227+ if (IS_ERR(wh)) {
12228+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
12229+ AuDLNPair(dentry), err, rerr);
12230+ err = -EIO;
12231+ } else
12232+ dput(wh);
1facf9fc 12233+
1308ab2a 12234+ out:
12235+ return err;
1facf9fc 12236+}
12237+
1308ab2a 12238+/*
12239+ * simple tests for the adding inode operations.
12240+ * following the checks in vfs, plus the parent-child relationship.
12241+ */
dece6358 12242+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
1308ab2a 12243+ struct dentry *h_parent, int isdir)
12244+{
12245+ int err;
12246+ umode_t h_mode;
12247+ struct dentry *h_dentry;
12248+ struct inode *h_inode;
1facf9fc 12249+
1308ab2a 12250+ h_dentry = au_h_dptr(dentry, bindex);
12251+ h_inode = h_dentry->d_inode;
12252+ if (!dentry->d_inode) {
12253+ err = -EEXIST;
12254+ if (unlikely(h_inode))
12255+ goto out;
12256+ } else {
12257+ /* rename(2) case */
12258+ err = -EIO;
12259+ if (unlikely(!h_inode || !h_inode->i_nlink))
12260+ goto out;
dece6358 12261+
1308ab2a 12262+ h_mode = h_inode->i_mode;
12263+ if (!isdir) {
12264+ err = -EISDIR;
12265+ if (unlikely(S_ISDIR(h_mode)))
12266+ goto out;
12267+ } else if (unlikely(!S_ISDIR(h_mode))) {
12268+ err = -ENOTDIR;
12269+ goto out;
12270+ }
12271+ }
dece6358 12272+
1308ab2a 12273+ err = -EIO;
12274+ /* expected parent dir is locked */
12275+ if (unlikely(h_parent != h_dentry->d_parent))
12276+ goto out;
12277+ err = 0;
dece6358 12278+
1308ab2a 12279+ out:
12280+ return err;
1facf9fc 12281+}
12282+
12283+/*
1308ab2a 12284+ * initial procedure of adding a new entry.
12285+ * prepare writable branch and the parent dir, lock it,
12286+ * and lookup whiteout for the new entry.
1facf9fc 12287+ */
1308ab2a 12288+static struct dentry*
12289+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
12290+ struct dentry *src_dentry, struct au_pin *pin,
12291+ struct au_wr_dir_args *wr_dir_args)
12292+{
12293+ struct dentry *wh_dentry, *h_parent;
12294+ struct super_block *sb;
12295+ struct au_branch *br;
12296+ int err;
12297+ unsigned int udba;
12298+ aufs_bindex_t bcpup;
1facf9fc 12299+
1308ab2a 12300+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
12301+ bcpup = err;
12302+ wh_dentry = ERR_PTR(err);
12303+ if (unlikely(err < 0))
12304+ goto out;
1facf9fc 12305+
1308ab2a 12306+ sb = dentry->d_sb;
12307+ udba = au_opt_udba(sb);
12308+ err = au_pin(pin, dentry, bcpup, udba,
12309+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12310+ wh_dentry = ERR_PTR(err);
12311+ if (unlikely(err))
12312+ goto out;
1facf9fc 12313+
1308ab2a 12314+ h_parent = au_pinned_h_parent(pin);
12315+ if (udba != AuOpt_UDBA_NONE
12316+ && au_dbstart(dentry) == bcpup) {
12317+ err = au_may_add(dentry, bcpup, h_parent,
12318+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
12319+ wh_dentry = ERR_PTR(err);
12320+ if (unlikely(err))
12321+ goto out_unpin;
12322+ }
1facf9fc 12323+
1308ab2a 12324+ br = au_sbr(sb, bcpup);
12325+ if (dt) {
12326+ struct path tmp = {
12327+ .dentry = h_parent,
12328+ .mnt = br->br_mnt
12329+ };
12330+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
12331+ }
1facf9fc 12332+
1308ab2a 12333+ wh_dentry = NULL;
12334+ if (bcpup != au_dbwh(dentry))
12335+ goto out; /* success */
1facf9fc 12336+
1308ab2a 12337+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
12338+
12339+ out_unpin:
12340+ if (IS_ERR(wh_dentry))
12341+ au_unpin(pin);
12342+ out:
12343+ return wh_dentry;
12344+}
1facf9fc 12345+
dece6358 12346+/* ---------------------------------------------------------------------- */
1facf9fc 12347+
1308ab2a 12348+enum { Mknod, Symlink, Creat };
12349+struct simple_arg {
12350+ int type;
12351+ union {
12352+ struct {
12353+ int mode;
12354+ struct nameidata *nd;
12355+ } c;
12356+ struct {
12357+ const char *symname;
12358+ } s;
12359+ struct {
12360+ int mode;
12361+ dev_t dev;
12362+ } m;
12363+ } u;
12364+};
1facf9fc 12365+
1308ab2a 12366+static int add_simple(struct inode *dir, struct dentry *dentry,
12367+ struct simple_arg *arg)
dece6358 12368+{
1308ab2a 12369+ int err;
12370+ aufs_bindex_t bstart;
12371+ unsigned char created;
12372+ struct au_dtime dt;
12373+ struct au_pin pin;
12374+ struct path h_path;
12375+ struct dentry *wh_dentry, *parent;
12376+ struct inode *h_dir;
12377+ struct au_wr_dir_args wr_dir_args = {
12378+ .force_btgt = -1,
12379+ .flags = AuWrDir_ADD_ENTRY
12380+ };
1facf9fc 12381+
1308ab2a 12382+ IMustLock(dir);
1facf9fc 12383+
1308ab2a 12384+ parent = dentry->d_parent; /* dir inode is locked */
12385+ aufs_read_lock(dentry, AuLock_DW);
12386+ di_write_lock_parent(parent);
12387+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
12388+ &wr_dir_args);
12389+ err = PTR_ERR(wh_dentry);
12390+ if (IS_ERR(wh_dentry))
12391+ goto out;
1facf9fc 12392+
1308ab2a 12393+ bstart = au_dbstart(dentry);
12394+ h_path.dentry = au_h_dptr(dentry, bstart);
12395+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
12396+ h_dir = au_pinned_h_dir(&pin);
12397+ switch (arg->type) {
12398+ case Creat:
12399+ err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
12400+ break;
12401+ case Symlink:
12402+ err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
12403+ break;
12404+ case Mknod:
12405+ err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
12406+ break;
12407+ default:
12408+ BUG();
12409+ }
12410+ created = !err;
12411+ if (!err)
12412+ err = epilog(dir, bstart, wh_dentry, dentry);
1facf9fc 12413+
1308ab2a 12414+ /* revert */
12415+ if (unlikely(created && err && h_path.dentry->d_inode)) {
12416+ int rerr;
12417+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
12418+ if (rerr) {
12419+ AuIOErr("%.*s revert failure(%d, %d)\n",
12420+ AuDLNPair(dentry), err, rerr);
12421+ err = -EIO;
12422+ }
12423+ au_dtime_revert(&dt);
12424+ d_drop(dentry);
12425+ }
1facf9fc 12426+
1308ab2a 12427+ au_unpin(&pin);
12428+ dput(wh_dentry);
1facf9fc 12429+
1308ab2a 12430+ out:
12431+ if (unlikely(err)) {
12432+ au_update_dbstart(dentry);
12433+ d_drop(dentry);
12434+ }
12435+ di_write_unlock(parent);
12436+ aufs_read_unlock(dentry, AuLock_DW);
12437+ return err;
1facf9fc 12438+}
12439+
1308ab2a 12440+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1facf9fc 12441+{
1308ab2a 12442+ struct simple_arg arg = {
12443+ .type = Mknod,
12444+ .u.m = {
12445+ .mode = mode,
12446+ .dev = dev
12447+ }
12448+ };
12449+ return add_simple(dir, dentry, &arg);
1facf9fc 12450+}
12451+
1308ab2a 12452+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1facf9fc 12453+{
1308ab2a 12454+ struct simple_arg arg = {
12455+ .type = Symlink,
12456+ .u.s.symname = symname
12457+ };
12458+ return add_simple(dir, dentry, &arg);
1facf9fc 12459+}
12460+
1308ab2a 12461+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
12462+ struct nameidata *nd)
1facf9fc 12463+{
1308ab2a 12464+ struct simple_arg arg = {
12465+ .type = Creat,
12466+ .u.c = {
12467+ .mode = mode,
12468+ .nd = nd
12469+ }
12470+ };
12471+ return add_simple(dir, dentry, &arg);
1facf9fc 12472+}
12473+
12474+/* ---------------------------------------------------------------------- */
12475+
1308ab2a 12476+struct au_link_args {
12477+ aufs_bindex_t bdst, bsrc;
12478+ struct au_pin pin;
12479+ struct path h_path;
12480+ struct dentry *src_parent, *parent;
12481+};
12482+
12483+static int au_cpup_before_link(struct dentry *src_dentry,
12484+ struct au_link_args *a)
1facf9fc 12485+{
1308ab2a 12486+ int err;
12487+ struct dentry *h_src_dentry;
12488+ struct mutex *h_mtx;
12489+
12490+ di_read_lock_parent(a->src_parent, AuLock_IR);
12491+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
12492+ if (unlikely(err))
12493+ goto out;
12494+
12495+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
12496+ h_mtx = &h_src_dentry->d_inode->i_mutex;
12497+ err = au_pin(&a->pin, src_dentry, a->bdst,
12498+ au_opt_udba(src_dentry->d_sb),
12499+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12500+ if (unlikely(err))
12501+ goto out;
12502+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
12503+ err = au_sio_cpup_simple(src_dentry, a->bdst, -1,
12504+ AuCpup_DTIME /* | AuCpup_KEEPLINO */);
12505+ mutex_unlock(h_mtx);
12506+ au_unpin(&a->pin);
1facf9fc 12507+
1308ab2a 12508+ out:
12509+ di_read_unlock(a->src_parent, AuLock_IR);
12510+ return err;
dece6358 12511+}
1facf9fc 12512+
1308ab2a 12513+static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
dece6358 12514+{
1308ab2a 12515+ int err;
12516+ unsigned char plink;
12517+ struct inode *h_inode, *inode;
12518+ struct dentry *h_src_dentry;
12519+ struct super_block *sb;
1facf9fc 12520+
1308ab2a 12521+ plink = 0;
12522+ h_inode = NULL;
12523+ sb = src_dentry->d_sb;
12524+ inode = src_dentry->d_inode;
12525+ if (au_ibstart(inode) <= a->bdst)
12526+ h_inode = au_h_iptr(inode, a->bdst);
12527+ if (!h_inode || !h_inode->i_nlink) {
12528+ /* copyup src_dentry as the name of dentry. */
12529+ au_set_dbstart(src_dentry, a->bdst);
12530+ au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
12531+ h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
12532+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
12533+ err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc, -1,
12534+ AuCpup_KEEPLINO, a->parent);
12535+ mutex_unlock(&h_inode->i_mutex);
12536+ au_set_h_dptr(src_dentry, a->bdst, NULL);
12537+ au_set_dbstart(src_dentry, a->bsrc);
12538+ } else {
12539+ /* the inode of src_dentry already exists on a.bdst branch */
12540+ h_src_dentry = d_find_alias(h_inode);
12541+ if (!h_src_dentry && au_plink_test(inode)) {
12542+ plink = 1;
12543+ h_src_dentry = au_plink_lkup(inode, a->bdst);
12544+ err = PTR_ERR(h_src_dentry);
12545+ if (IS_ERR(h_src_dentry))
12546+ goto out;
1facf9fc 12547+
1308ab2a 12548+ if (unlikely(!h_src_dentry->d_inode)) {
12549+ dput(h_src_dentry);
12550+ h_src_dentry = NULL;
12551+ }
12552+
12553+ }
12554+ if (h_src_dentry) {
12555+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
12556+ &a->h_path);
12557+ dput(h_src_dentry);
1facf9fc 12558+ } else {
1308ab2a 12559+ AuIOErr("no dentry found for hi%lu on b%d\n",
12560+ h_inode->i_ino, a->bdst);
12561+ err = -EIO;
1facf9fc 12562+ }
12563+ }
1facf9fc 12564+
1308ab2a 12565+ if (!err && !plink)
12566+ au_plink_append(inode, a->bdst, a->h_path.dentry);
1facf9fc 12567+
1308ab2a 12568+out:
12569+ return err;
dece6358 12570+}
1facf9fc 12571+
1308ab2a 12572+int aufs_link(struct dentry *src_dentry, struct inode *dir,
12573+ struct dentry *dentry)
dece6358 12574+{
1308ab2a 12575+ int err, rerr;
12576+ struct au_dtime dt;
12577+ struct au_link_args *a;
12578+ struct dentry *wh_dentry, *h_src_dentry;
12579+ struct inode *inode;
12580+ struct super_block *sb;
12581+ struct au_wr_dir_args wr_dir_args = {
12582+ /* .force_btgt = -1, */
12583+ .flags = AuWrDir_ADD_ENTRY
12584+ };
1facf9fc 12585+
1308ab2a 12586+ IMustLock(dir);
12587+ inode = src_dentry->d_inode;
12588+ IMustLock(inode);
1facf9fc 12589+
1308ab2a 12590+ err = -ENOENT;
12591+ if (unlikely(!inode->i_nlink))
12592+ goto out;
1facf9fc 12593+
1308ab2a 12594+ err = -ENOMEM;
12595+ a = kzalloc(sizeof(*a), GFP_NOFS);
12596+ if (unlikely(!a))
12597+ goto out;
1facf9fc 12598+
1308ab2a 12599+ a->parent = dentry->d_parent; /* dir inode is locked */
12600+ aufs_read_and_write_lock2(dentry, src_dentry, /*AuLock_FLUSH*/0);
12601+ a->src_parent = dget_parent(src_dentry);
12602+ wr_dir_args.force_btgt = au_dbstart(src_dentry);
1facf9fc 12603+
1308ab2a 12604+ di_write_lock_parent(a->parent);
12605+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
12606+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
12607+ &wr_dir_args);
12608+ err = PTR_ERR(wh_dentry);
12609+ if (IS_ERR(wh_dentry))
12610+ goto out_unlock;
1facf9fc 12611+
1308ab2a 12612+ err = 0;
12613+ sb = dentry->d_sb;
12614+ a->bdst = au_dbstart(dentry);
12615+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
12616+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
12617+ a->bsrc = au_dbstart(src_dentry);
12618+ if (au_opt_test(au_mntflags(sb), PLINK)) {
12619+ if (a->bdst < a->bsrc
12620+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
12621+ err = au_cpup_or_link(src_dentry, a);
12622+ else {
12623+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
12624+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
12625+ &a->h_path);
12626+ }
12627+ } else {
12628+ /*
12629+ * copyup src_dentry to the branch we process,
12630+ * and then link(2) to it.
12631+ */
12632+ if (a->bdst < a->bsrc
12633+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
12634+ au_unpin(&a->pin);
12635+ di_write_unlock(a->parent);
12636+ err = au_cpup_before_link(src_dentry, a);
12637+ di_write_lock_parent(a->parent);
12638+ if (!err)
12639+ err = au_pin(&a->pin, dentry, a->bdst,
12640+ au_opt_udba(sb),
12641+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12642+ if (unlikely(err))
12643+ goto out_wh;
12644+ }
12645+ if (!err) {
12646+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
12647+ err = -ENOENT;
12648+ if (h_src_dentry && h_src_dentry->d_inode)
12649+ err = vfsub_link(h_src_dentry,
12650+ au_pinned_h_dir(&a->pin),
12651+ &a->h_path);
12652+ }
12653+ }
12654+ if (unlikely(err))
12655+ goto out_unpin;
1facf9fc 12656+
1308ab2a 12657+ if (wh_dentry) {
12658+ a->h_path.dentry = wh_dentry;
12659+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
12660+ dentry);
12661+ if (unlikely(err))
12662+ goto out_revert;
12663+ }
1facf9fc 12664+
1308ab2a 12665+ dir->i_version++;
12666+ if (au_ibstart(dir) == au_dbstart(dentry))
12667+ au_cpup_attr_timesizes(dir);
12668+ inc_nlink(inode);
12669+ inode->i_ctime = dir->i_ctime;
12670+ if (!d_unhashed(a->h_path.dentry))
12671+ d_instantiate(dentry, au_igrab(inode));
12672+ else
12673+ /* some filesystem calls d_drop() */
12674+ d_drop(dentry);
12675+ goto out_unpin; /* success */
1facf9fc 12676+
1308ab2a 12677+ out_revert:
12678+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
12679+ if (!rerr)
12680+ goto out_dt;
12681+ AuIOErr("%.*s reverting failed(%d, %d)\n",
12682+ AuDLNPair(dentry), err, rerr);
12683+ err = -EIO;
12684+ out_dt:
12685+ d_drop(dentry);
12686+ au_dtime_revert(&dt);
12687+ out_unpin:
12688+ au_unpin(&a->pin);
12689+ out_wh:
12690+ dput(wh_dentry);
12691+ out_unlock:
12692+ if (unlikely(err)) {
12693+ au_update_dbstart(dentry);
12694+ d_drop(dentry);
12695+ }
12696+ di_write_unlock(a->parent);
12697+ dput(a->src_parent);
12698+ aufs_read_and_write_unlock2(dentry, src_dentry);
12699+ kfree(a);
12700+ out:
12701+ return err;
dece6358
AM
12702+}
12703+
1308ab2a 12704+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
dece6358 12705+{
1308ab2a 12706+ int err, rerr;
12707+ aufs_bindex_t bindex;
12708+ unsigned char diropq;
12709+ struct path h_path;
12710+ struct dentry *wh_dentry, *parent, *opq_dentry;
12711+ struct mutex *h_mtx;
12712+ struct super_block *sb;
12713+ struct {
12714+ struct au_pin pin;
12715+ struct au_dtime dt;
12716+ } *a; /* reduce the stack usage */
12717+ struct au_wr_dir_args wr_dir_args = {
12718+ .force_btgt = -1,
12719+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
12720+ };
dece6358 12721+
1308ab2a 12722+ IMustLock(dir);
dece6358 12723+
1308ab2a 12724+ err = -ENOMEM;
12725+ a = kmalloc(sizeof(*a), GFP_NOFS);
12726+ if (unlikely(!a))
12727+ goto out;
dece6358 12728+
1308ab2a 12729+ aufs_read_lock(dentry, AuLock_DW);
12730+ parent = dentry->d_parent; /* dir inode is locked */
12731+ di_write_lock_parent(parent);
12732+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
12733+ &a->pin, &wr_dir_args);
12734+ err = PTR_ERR(wh_dentry);
12735+ if (IS_ERR(wh_dentry))
12736+ goto out_free;
dece6358 12737+
1308ab2a 12738+ sb = dentry->d_sb;
12739+ bindex = au_dbstart(dentry);
12740+ h_path.dentry = au_h_dptr(dentry, bindex);
12741+ h_path.mnt = au_sbr_mnt(sb, bindex);
12742+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
12743+ if (unlikely(err))
12744+ goto out_unlock;
dece6358 12745+
1308ab2a 12746+ /* make the dir opaque */
12747+ diropq = 0;
12748+ h_mtx = &h_path.dentry->d_inode->i_mutex;
12749+ if (wh_dentry
12750+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
12751+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
12752+ opq_dentry = au_diropq_create(dentry, bindex);
12753+ mutex_unlock(h_mtx);
12754+ err = PTR_ERR(opq_dentry);
12755+ if (IS_ERR(opq_dentry))
12756+ goto out_dir;
12757+ dput(opq_dentry);
12758+ diropq = 1;
12759+ }
dece6358 12760+
1308ab2a 12761+ err = epilog(dir, bindex, wh_dentry, dentry);
12762+ if (!err) {
12763+ inc_nlink(dir);
12764+ goto out_unlock; /* success */
12765+ }
dece6358 12766+
1308ab2a 12767+ /* revert */
12768+ if (diropq) {
12769+ AuLabel(revert opq);
12770+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
12771+ rerr = au_diropq_remove(dentry, bindex);
12772+ mutex_unlock(h_mtx);
12773+ if (rerr) {
12774+ AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
12775+ AuDLNPair(dentry), err, rerr);
12776+ err = -EIO;
12777+ }
1facf9fc 12778+ }
dece6358 12779+
1308ab2a 12780+ out_dir:
12781+ AuLabel(revert dir);
12782+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
12783+ if (rerr) {
12784+ AuIOErr("%.*s reverting dir failed(%d, %d)\n",
12785+ AuDLNPair(dentry), err, rerr);
12786+ err = -EIO;
12787+ }
12788+ d_drop(dentry);
12789+ au_dtime_revert(&a->dt);
12790+ out_unlock:
12791+ au_unpin(&a->pin);
12792+ dput(wh_dentry);
12793+ out_free:
12794+ if (unlikely(err)) {
12795+ au_update_dbstart(dentry);
12796+ d_drop(dentry);
12797+ }
12798+ di_write_unlock(parent);
12799+ aufs_read_unlock(dentry, AuLock_DW);
12800+ kfree(a);
1facf9fc 12801+ out:
1facf9fc 12802+ return err;
12803+}
1308ab2a 12804diff -uprN -x .git linux-2.6.31/fs/aufs/i_op_del.c aufs2-2.6.git/fs/aufs/i_op_del.c
12805--- linux-2.6.31/fs/aufs/i_op_del.c 1970-01-01 00:00:00.000000000 +0000
12806+++ aufs2-2.6.git/fs/aufs/i_op_del.c 2009-09-21 21:49:23.401607657 +0000
12807@@ -0,0 +1,468 @@
1facf9fc 12808+/*
12809+ * Copyright (C) 2005-2009 Junjiro R. Okajima
12810+ *
12811+ * This program, aufs is free software; you can redistribute it and/or modify
12812+ * it under the terms of the GNU General Public License as published by
12813+ * the Free Software Foundation; either version 2 of the License, or
12814+ * (at your option) any later version.
dece6358
AM
12815+ *
12816+ * This program is distributed in the hope that it will be useful,
12817+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12818+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12819+ * GNU General Public License for more details.
12820+ *
12821+ * You should have received a copy of the GNU General Public License
12822+ * along with this program; if not, write to the Free Software
12823+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 12824+ */
12825+
12826+/*
1308ab2a 12827+ * inode operations (del entry)
1facf9fc 12828+ */
12829+
12830+#include "aufs.h"
12831+
12832+/*
1308ab2a 12833+ * decide if a new whiteout for @dentry is necessary or not.
12834+ * when it is necessary, prepare the parent dir for the upper branch whose
12835+ * branch index is @bcpup for creation. the actual creation of the whiteout will
12836+ * be done by caller.
12837+ * return value:
12838+ * 0: wh is unnecessary
12839+ * plus: wh is necessary
12840+ * minus: error
1facf9fc 12841+ */
1308ab2a 12842+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1facf9fc 12843+{
1308ab2a 12844+ int need_wh, err;
12845+ aufs_bindex_t bstart;
12846+ struct super_block *sb;
1facf9fc 12847+
1308ab2a 12848+ sb = dentry->d_sb;
12849+ bstart = au_dbstart(dentry);
12850+ if (*bcpup < 0) {
12851+ *bcpup = bstart;
12852+ if (au_test_ro(sb, bstart, dentry->d_inode)) {
12853+ err = AuWbrCopyup(au_sbi(sb), dentry);
12854+ *bcpup = err;
12855+ if (unlikely(err < 0))
12856+ goto out;
12857+ }
12858+ } else
12859+ AuDebugOn(bstart < *bcpup
12860+ || au_test_ro(sb, *bcpup, dentry->d_inode));
12861+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
12862+
12863+ if (*bcpup != bstart) {
12864+ err = au_cpup_dirs(dentry, *bcpup);
1facf9fc 12865+ if (unlikely(err))
12866+ goto out;
1308ab2a 12867+ need_wh = 1;
12868+ } else {
12869+ aufs_bindex_t old_bend, new_bend, bdiropq = -1;
dece6358 12870+
1308ab2a 12871+ old_bend = au_dbend(dentry);
12872+ if (isdir) {
12873+ bdiropq = au_dbdiropq(dentry);
12874+ au_set_dbdiropq(dentry, -1);
12875+ }
12876+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
12877+ /*nd*/NULL);
12878+ err = need_wh;
12879+ if (isdir)
12880+ au_set_dbdiropq(dentry, bdiropq);
12881+ if (unlikely(err < 0))
12882+ goto out;
12883+ new_bend = au_dbend(dentry);
12884+ if (!need_wh && old_bend != new_bend) {
12885+ au_set_h_dptr(dentry, new_bend, NULL);
12886+ au_set_dbend(dentry, old_bend);
12887+ }
12888+ }
12889+ AuDbg("need_wh %d\n", need_wh);
12890+ err = need_wh;
1facf9fc 12891+
12892+ out:
12893+ return err;
12894+}
12895+
12896+/*
1308ab2a 12897+ * simple tests for the del-entry operations.
1facf9fc 12898+ * following the checks in vfs, plus the parent-child relationship.
12899+ */
1308ab2a 12900+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
1facf9fc 12901+ struct dentry *h_parent, int isdir)
12902+{
12903+ int err;
12904+ umode_t h_mode;
1308ab2a 12905+ struct dentry *h_dentry, *h_latest;
1facf9fc 12906+ struct inode *h_inode;
12907+
12908+ h_dentry = au_h_dptr(dentry, bindex);
12909+ h_inode = h_dentry->d_inode;
1308ab2a 12910+ if (dentry->d_inode) {
12911+ err = -ENOENT;
1facf9fc 12912+ if (unlikely(!h_inode || !h_inode->i_nlink))
12913+ goto out;
12914+
12915+ h_mode = h_inode->i_mode;
12916+ if (!isdir) {
12917+ err = -EISDIR;
12918+ if (unlikely(S_ISDIR(h_mode)))
dece6358
AM
12919+ goto out;
12920+ } else if (unlikely(!S_ISDIR(h_mode))) {
12921+ err = -ENOTDIR;
12922+ goto out;
12923+ }
1308ab2a 12924+ } else {
12925+ /* rename(2) case */
12926+ err = -EIO;
12927+ if (unlikely(h_inode))
12928+ goto out;
dece6358 12929+ }
1facf9fc 12930+
1308ab2a 12931+ err = -ENOENT;
dece6358
AM
12932+ /* expected parent dir is locked */
12933+ if (unlikely(h_parent != h_dentry->d_parent))
1facf9fc 12934+ goto out;
dece6358 12935+ err = 0;
1facf9fc 12936+
1308ab2a 12937+ /*
12938+ * rmdir a dir may break the consistency on some filesystem.
12939+ * let's try heavy test.
12940+ */
12941+ err = -EACCES;
12942+ if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
12943+ goto out;
12944+
12945+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
12946+ au_sbr(dentry->d_sb, bindex));
12947+ err = -EIO;
12948+ if (IS_ERR(h_latest))
12949+ goto out;
12950+ if (h_latest == h_dentry)
12951+ err = 0;
12952+ dput(h_latest);
12953+
1facf9fc 12954+ out:
12955+ return err;
12956+}
12957+
12958+/*
1308ab2a 12959+ * decide the branch where we operate for @dentry. the branch index will be set
12960+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
12961+ * dir for reverting.
12962+ * when a new whiteout is necessary, create it.
1facf9fc 12963+ */
12964+static struct dentry*
1308ab2a 12965+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
12966+ struct au_dtime *dt, struct au_pin *pin)
1facf9fc 12967+{
1308ab2a 12968+ struct dentry *wh_dentry;
1facf9fc 12969+ struct super_block *sb;
1308ab2a 12970+ struct path h_path;
12971+ int err, need_wh;
1facf9fc 12972+ unsigned int udba;
12973+ aufs_bindex_t bcpup;
12974+
1308ab2a 12975+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
12976+ wh_dentry = ERR_PTR(need_wh);
12977+ if (unlikely(need_wh < 0))
1facf9fc 12978+ goto out;
12979+
12980+ sb = dentry->d_sb;
12981+ udba = au_opt_udba(sb);
1308ab2a 12982+ bcpup = *rbcpup;
1facf9fc 12983+ err = au_pin(pin, dentry, bcpup, udba,
12984+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12985+ wh_dentry = ERR_PTR(err);
12986+ if (unlikely(err))
12987+ goto out;
12988+
1308ab2a 12989+ h_path.dentry = au_pinned_h_parent(pin);
1facf9fc 12990+ if (udba != AuOpt_UDBA_NONE
12991+ && au_dbstart(dentry) == bcpup) {
1308ab2a 12992+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
1facf9fc 12993+ wh_dentry = ERR_PTR(err);
12994+ if (unlikely(err))
12995+ goto out_unpin;
12996+ }
12997+
1308ab2a 12998+ h_path.mnt = au_sbr_mnt(sb, bcpup);
12999+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
dece6358 13000+ wh_dentry = NULL;
1308ab2a 13001+ if (!need_wh)
13002+ goto out; /* success, no need to create whiteout */
dece6358 13003+
1308ab2a 13004+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
13005+ if (!IS_ERR(wh_dentry))
13006+ goto out; /* success */
13007+ /* returns with the parent is locked and wh_dentry is dget-ed */
1facf9fc 13008+
13009+ out_unpin:
1308ab2a 13010+ au_unpin(pin);
1facf9fc 13011+ out:
13012+ return wh_dentry;
13013+}
13014+
1308ab2a 13015+/*
13016+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
13017+ * in order to be revertible and save time for removing many child whiteouts
13018+ * under the dir.
13019+ * returns 1 when there are too many child whiteout and caller should remove
13020+ * them asynchronously. returns 0 when the number of children is enough small to
13021+ * remove now or the branch fs is a remote fs.
13022+ * otherwise return an error.
13023+ */
13024+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
13025+ struct au_nhash *whlist, struct inode *dir)
dece6358 13026+{
1308ab2a 13027+ int rmdir_later, err, dirwh;
13028+ struct dentry *h_dentry;
13029+ struct super_block *sb;
dece6358 13030+
1308ab2a 13031+ sb = dentry->d_sb;
13032+ SiMustAnyLock(sb);
13033+ h_dentry = au_h_dptr(dentry, bindex);
13034+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
dece6358
AM
13035+ if (unlikely(err))
13036+ goto out;
13037+
1308ab2a 13038+ /* stop monitoring */
13039+ au_hin_free(au_hi(dentry->d_inode, bindex));
13040+
13041+ if (!au_test_fs_remote(h_dentry->d_sb)) {
13042+ dirwh = au_sbi(sb)->si_dirwh;
13043+ rmdir_later = (dirwh <= 1);
13044+ if (!rmdir_later)
13045+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
13046+ dirwh);
13047+ if (rmdir_later)
13048+ return rmdir_later;
13049+ }
13050+
13051+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
13052+ if (unlikely(err)) {
13053+ AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
13054+ AuDLNPair(h_dentry), bindex, err);
13055+ err = 0;
13056+ }
1facf9fc 13057+
dece6358 13058+ out:
dece6358 13059+ return err;
1facf9fc 13060+}
13061+
1308ab2a 13062+/*
13063+ * final procedure for deleting a entry.
13064+ * maintain dentry and iattr.
13065+ */
13066+static void epilog(struct inode *dir, struct dentry *dentry,
13067+ aufs_bindex_t bindex)
1facf9fc 13068+{
1308ab2a 13069+ struct inode *inode;
dece6358 13070+
1308ab2a 13071+ inode = dentry->d_inode;
13072+ d_drop(dentry);
13073+ inode->i_ctime = dir->i_ctime;
dece6358 13074+
1308ab2a 13075+ if (atomic_read(&dentry->d_count) == 1) {
13076+ au_set_h_dptr(dentry, au_dbstart(dentry), NULL);
13077+ au_update_dbstart(dentry);
1facf9fc 13078+ }
1308ab2a 13079+ if (au_ibstart(dir) == bindex)
13080+ au_cpup_attr_timesizes(dir);
13081+ dir->i_version++;
13082+}
1facf9fc 13083+
1308ab2a 13084+/*
13085+ * when an error happened, remove the created whiteout and revert everything.
13086+ */
13087+static int do_revert(int err, struct inode *dir, aufs_bindex_t bwh,
13088+ struct dentry *wh_dentry, struct dentry *dentry,
13089+ struct au_dtime *dt)
13090+{
13091+ int rerr;
13092+ struct path h_path = {
13093+ .dentry = wh_dentry,
13094+ .mnt = au_sbr_mnt(dir->i_sb, bwh)
13095+ };
1facf9fc 13096+
1308ab2a 13097+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bwh), &h_path, dentry);
13098+ if (!rerr) {
13099+ au_set_dbwh(dentry, bwh);
13100+ au_dtime_revert(dt);
13101+ return 0;
13102+ }
13103+
13104+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
13105+ AuDLNPair(dentry), err, rerr);
13106+ return -EIO;
dece6358 13107+}
1facf9fc 13108+
1308ab2a 13109+/* ---------------------------------------------------------------------- */
13110+
13111+int aufs_unlink(struct inode *dir, struct dentry *dentry)
1facf9fc 13112+{
1308ab2a 13113+ int err;
13114+ aufs_bindex_t bwh, bindex, bstart;
1facf9fc 13115+ struct au_dtime dt;
1308ab2a 13116+ struct au_pin pin;
13117+ struct path h_path;
13118+ struct inode *inode, *h_dir;
13119+ struct dentry *parent, *wh_dentry;
1facf9fc 13120+
13121+ IMustLock(dir);
1308ab2a 13122+ inode = dentry->d_inode;
13123+ if (unlikely(!inode))
13124+ return -ENOENT; /* possible? */
1facf9fc 13125+ IMustLock(inode);
13126+
1308ab2a 13127+ aufs_read_lock(dentry, AuLock_DW);
13128+ parent = dentry->d_parent; /* dir inode is locked */
13129+ di_write_lock_parent(parent);
1facf9fc 13130+
1308ab2a 13131+ bstart = au_dbstart(dentry);
13132+ bwh = au_dbwh(dentry);
13133+ bindex = -1;
13134+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
dece6358
AM
13135+ err = PTR_ERR(wh_dentry);
13136+ if (IS_ERR(wh_dentry))
1308ab2a 13137+ goto out;
1facf9fc 13138+
1308ab2a 13139+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
13140+ h_path.dentry = au_h_dptr(dentry, bstart);
13141+ dget(h_path.dentry);
13142+ if (bindex == bstart) {
13143+ h_dir = au_pinned_h_dir(&pin);
13144+ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
dece6358 13145+ } else {
1308ab2a 13146+ /* dir inode is locked */
13147+ h_dir = wh_dentry->d_parent->d_inode;
13148+ IMustLock(h_dir);
13149+ err = 0;
1facf9fc 13150+ }
13151+
1308ab2a 13152+ if (!err) {
13153+ drop_nlink(inode);
13154+ epilog(dir, dentry, bindex);
13155+
13156+ /* update target timestamps */
13157+ if (bindex == bstart) {
13158+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
13159+ inode->i_ctime = h_path.dentry->d_inode->i_ctime;
13160+ } else
13161+ /* todo: this timestamp may be reverted later */
13162+ inode->i_ctime = h_dir->i_ctime;
13163+ goto out_unlock; /* success */
1facf9fc 13164+ }
13165+
1308ab2a 13166+ /* revert */
13167+ if (wh_dentry) {
13168+ int rerr;
13169+
13170+ rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt);
13171+ if (rerr)
13172+ err = rerr;
13173+ }
dece6358 13174+
dece6358 13175+ out_unlock:
1308ab2a 13176+ au_unpin(&pin);
13177+ dput(wh_dentry);
13178+ dput(h_path.dentry);
1facf9fc 13179+ out:
1308ab2a 13180+ di_write_unlock(parent);
13181+ aufs_read_unlock(dentry, AuLock_DW);
1facf9fc 13182+ return err;
13183+}
13184+
1308ab2a 13185+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
dece6358 13186+{
1308ab2a 13187+ int err, rmdir_later;
13188+ aufs_bindex_t bwh, bindex, bstart;
dece6358 13189+ struct au_dtime dt;
1308ab2a 13190+ struct au_pin pin;
13191+ struct inode *inode;
13192+ struct dentry *parent, *wh_dentry, *h_dentry;
13193+ struct au_whtmp_rmdir *args;
1facf9fc 13194+
13195+ IMustLock(dir);
1308ab2a 13196+ inode = dentry->d_inode;
13197+ err = -ENOENT; /* possible? */
13198+ if (unlikely(!inode))
13199+ goto out;
13200+ IMustLock(inode);
13201+
13202+ aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH);
13203+ err = -ENOMEM;
13204+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
13205+ if (unlikely(!args))
13206+ goto out_unlock;
1facf9fc 13207+
1facf9fc 13208+ parent = dentry->d_parent; /* dir inode is locked */
13209+ di_write_lock_parent(parent);
1308ab2a 13210+ err = au_test_empty(dentry, &args->whlist);
13211+ if (unlikely(err))
13212+ goto out_args;
13213+
13214+ bstart = au_dbstart(dentry);
13215+ bwh = au_dbwh(dentry);
13216+ bindex = -1;
13217+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
1facf9fc 13218+ err = PTR_ERR(wh_dentry);
13219+ if (IS_ERR(wh_dentry))
1308ab2a 13220+ goto out_args;
1facf9fc 13221+
1308ab2a 13222+ h_dentry = au_h_dptr(dentry, bstart);
13223+ dget(h_dentry);
13224+ rmdir_later = 0;
13225+ if (bindex == bstart) {
13226+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
13227+ if (err > 0) {
13228+ rmdir_later = err;
13229+ err = 0;
13230+ }
13231+ } else {
13232+ /* stop monitoring */
13233+ au_hin_free(au_hi(inode, bstart));
1facf9fc 13234+
1308ab2a 13235+ /* dir inode is locked */
13236+ IMustLock(wh_dentry->d_parent->d_inode);
13237+ err = 0;
1facf9fc 13238+ }
13239+
13240+ if (!err) {
1308ab2a 13241+ clear_nlink(inode);
13242+ au_set_dbdiropq(dentry, -1);
13243+ epilog(dir, dentry, bindex);
1facf9fc 13244+
1308ab2a 13245+ if (rmdir_later) {
13246+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
13247+ args = NULL;
dece6358 13248+ }
1308ab2a 13249+
13250+ goto out_unpin; /* success */
1facf9fc 13251+ }
13252+
1308ab2a 13253+ /* revert */
13254+ AuLabel(revert);
13255+ if (wh_dentry) {
13256+ int rerr;
13257+
13258+ rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt);
13259+ if (rerr)
13260+ err = rerr;
dece6358 13261+ }
1308ab2a 13262+
13263+ out_unpin:
1facf9fc 13264+ au_unpin(&pin);
13265+ dput(wh_dentry);
1308ab2a 13266+ dput(h_dentry);
13267+ out_args:
1facf9fc 13268+ di_write_unlock(parent);
1308ab2a 13269+ if (args)
13270+ au_whtmp_rmdir_free(args);
13271+ out_unlock:
1facf9fc 13272+ aufs_read_unlock(dentry, AuLock_DW);
1308ab2a 13273+ out:
1facf9fc 13274+ return err;
13275+}
1308ab2a 13276diff -uprN -x .git linux-2.6.31/fs/aufs/i_op_ren.c aufs2-2.6.git/fs/aufs/i_op_ren.c
13277--- linux-2.6.31/fs/aufs/i_op_ren.c 1970-01-01 00:00:00.000000000 +0000
13278+++ aufs2-2.6.git/fs/aufs/i_op_ren.c 2009-09-21 21:49:23.404940801 +0000
13279@@ -0,0 +1,957 @@
1facf9fc 13280+/*
13281+ * Copyright (C) 2005-2009 Junjiro R. Okajima
13282+ *
13283+ * This program, aufs is free software; you can redistribute it and/or modify
13284+ * it under the terms of the GNU General Public License as published by
13285+ * the Free Software Foundation; either version 2 of the License, or
13286+ * (at your option) any later version.
dece6358
AM
13287+ *
13288+ * This program is distributed in the hope that it will be useful,
13289+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13290+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13291+ * GNU General Public License for more details.
13292+ *
13293+ * You should have received a copy of the GNU General Public License
13294+ * along with this program; if not, write to the Free Software
13295+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 13296+ */
13297+
13298+/*
1308ab2a 13299+ * inode operation (rename entry)
13300+ * todo: this is crazy monster
1facf9fc 13301+ */
13302+
dece6358 13303+#include "aufs.h"
1facf9fc 13304+
1308ab2a 13305+enum { AuSRC, AuDST, AuSrcDst };
13306+enum { AuPARENT, AuCHILD, AuParentChild };
13307+
13308+#define AuRen_ISDIR 1
13309+#define AuRen_ISSAMEDIR (1 << 1)
13310+#define AuRen_WHSRC (1 << 2)
13311+#define AuRen_WHDST (1 << 3)
13312+#define AuRen_MNT_WRITE (1 << 4)
13313+#define AuRen_DT_DSTDIR (1 << 5)
13314+#define AuRen_DIROPQ (1 << 6)
13315+#define AuRen_CPUP (1 << 7)
13316+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
13317+#define au_fset_ren(flags, name) { (flags) |= AuRen_##name; }
13318+#define au_fclr_ren(flags, name) { (flags) &= ~AuRen_##name; }
13319+
13320+struct au_ren_args {
13321+ struct {
13322+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
13323+ *wh_dentry;
13324+ struct inode *dir, *inode;
13325+ struct au_hinode *hdir;
13326+ struct au_dtime dt[AuParentChild];
13327+ aufs_bindex_t bstart;
13328+ } sd[AuSrcDst];
13329+
13330+#define src_dentry sd[AuSRC].dentry
13331+#define src_dir sd[AuSRC].dir
13332+#define src_inode sd[AuSRC].inode
13333+#define src_h_dentry sd[AuSRC].h_dentry
13334+#define src_parent sd[AuSRC].parent
13335+#define src_h_parent sd[AuSRC].h_parent
13336+#define src_wh_dentry sd[AuSRC].wh_dentry
13337+#define src_hdir sd[AuSRC].hdir
13338+#define src_h_dir sd[AuSRC].hdir->hi_inode
13339+#define src_dt sd[AuSRC].dt
13340+#define src_bstart sd[AuSRC].bstart
13341+
13342+#define dst_dentry sd[AuDST].dentry
13343+#define dst_dir sd[AuDST].dir
13344+#define dst_inode sd[AuDST].inode
13345+#define dst_h_dentry sd[AuDST].h_dentry
13346+#define dst_parent sd[AuDST].parent
13347+#define dst_h_parent sd[AuDST].h_parent
13348+#define dst_wh_dentry sd[AuDST].wh_dentry
13349+#define dst_hdir sd[AuDST].hdir
13350+#define dst_h_dir sd[AuDST].hdir->hi_inode
13351+#define dst_dt sd[AuDST].dt
13352+#define dst_bstart sd[AuDST].bstart
13353+
13354+ struct dentry *h_trap;
13355+ struct au_branch *br;
13356+ struct au_hinode *src_hinode;
13357+ struct path h_path;
13358+ struct au_nhash whlist;
13359+ aufs_bindex_t btgt;
13360+
13361+ unsigned int flags;
13362+
13363+ struct au_whtmp_rmdir *thargs;
13364+ struct dentry *h_dst;
13365+};
13366+
13367+/* ---------------------------------------------------------------------- */
13368+
13369+/*
13370+ * functions for reverting.
13371+ * when an error happened in a single rename systemcall, we should revert
13372+ * everything as if nothing happend.
13373+ * we don't need to revert the copied-up/down the parent dir since they are
13374+ * harmless.
13375+ */
13376+
13377+#define RevertFailure(fmt, args...) do { \
13378+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
13379+ ##args, err, rerr); \
13380+ err = -EIO; \
13381+} while (0)
13382+
13383+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
13384+{
13385+ int rerr;
13386+
13387+ au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
13388+ rerr = au_diropq_remove(a->src_dentry, a->btgt);
13389+ au_hin_imtx_unlock(a->src_hinode);
13390+ if (rerr)
13391+ RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
13392+}
13393+
1facf9fc 13394+
1308ab2a 13395+static void au_ren_rev_rename(int err, struct au_ren_args *a)
13396+{
13397+ int rerr;
dece6358 13398+
1308ab2a 13399+ a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
13400+ a->br, /*nd*/NULL);
13401+ rerr = PTR_ERR(a->h_path.dentry);
13402+ if (IS_ERR(a->h_path.dentry)) {
13403+ RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
13404+ return;
1facf9fc 13405+ }
13406+
1308ab2a 13407+ rerr = vfsub_rename(a->dst_h_dir,
13408+ au_h_dptr(a->src_dentry, a->btgt),
13409+ a->src_h_dir, &a->h_path);
13410+ d_drop(a->h_path.dentry);
13411+ dput(a->h_path.dentry);
13412+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
13413+ if (rerr)
13414+ RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
1facf9fc 13415+}
13416+
1308ab2a 13417+static void au_ren_rev_cpup(int err, struct au_ren_args *a)
1facf9fc 13418+{
1308ab2a 13419+ int rerr;
1facf9fc 13420+
1308ab2a 13421+ a->h_path.dentry = a->dst_h_dentry;
13422+ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
13423+ au_set_h_dptr(a->src_dentry, a->btgt, NULL);
13424+ au_set_dbstart(a->src_dentry, a->src_bstart);
13425+ if (rerr)
13426+ RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
13427+}
1facf9fc 13428+
1facf9fc 13429+
1308ab2a 13430+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
13431+{
13432+ int rerr;
13433+
13434+ a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
13435+ a->br, /*nd*/NULL);
13436+ rerr = PTR_ERR(a->h_path.dentry);
13437+ if (IS_ERR(a->h_path.dentry)) {
13438+ RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
13439+ return;
13440+ }
13441+ if (a->h_path.dentry->d_inode) {
13442+ d_drop(a->h_path.dentry);
13443+ dput(a->h_path.dentry);
13444+ return;
1facf9fc 13445+ }
13446+
1308ab2a 13447+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
13448+ d_drop(a->h_path.dentry);
13449+ dput(a->h_path.dentry);
13450+ if (!rerr) {
13451+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
13452+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
13453+ } else
13454+ RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
13455+}
13456+
13457+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
13458+{
13459+ int rerr;
13460+
13461+ a->h_path.dentry = a->src_wh_dentry;
13462+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
13463+ if (rerr)
13464+ RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
13465+}
13466+
13467+static void au_ren_rev_drop(struct au_ren_args *a)
13468+{
13469+ struct dentry *d, *h_d;
13470+ int i;
13471+ aufs_bindex_t bend, bindex;
13472+
13473+ for (i = 0; i < AuSrcDst; i++) {
13474+ d = a->sd[i].dentry;
13475+ d_drop(d);
13476+ bend = au_dbend(d);
13477+ for (bindex = au_dbstart(d); bindex <= bend; bindex++) {
13478+ h_d = au_h_dptr(d, bindex);
13479+ if (h_d)
13480+ d_drop(h_d);
dece6358
AM
13481+ }
13482+ }
13483+
1308ab2a 13484+ au_update_dbstart(a->dst_dentry);
13485+ if (a->thargs)
13486+ d_drop(a->h_dst);
1facf9fc 13487+}
1308ab2a 13488+#undef RevertFailure
1facf9fc 13489+
13490+/* ---------------------------------------------------------------------- */
13491+
1308ab2a 13492+/*
13493+ * when we have to copyup the renaming entry, do it with the rename-target name
13494+ * in order to minimize the cost (the later actual rename is unnecessary).
13495+ * otherwise rename it on the target branch.
13496+ */
13497+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 13498+{
1308ab2a 13499+ int err;
13500+ struct dentry *d;
1facf9fc 13501+
1308ab2a 13502+ d = a->src_dentry;
13503+ if (au_dbstart(d) == a->btgt) {
13504+ a->h_path.dentry = a->dst_h_dentry;
13505+ if (au_ftest_ren(a->flags, DIROPQ)
13506+ && au_dbdiropq(d) == a->btgt)
13507+ au_fclr_ren(a->flags, DIROPQ);
13508+ AuDebugOn(au_dbstart(d) != a->btgt);
13509+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
13510+ a->dst_h_dir, &a->h_path);
13511+ } else {
13512+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
1facf9fc 13513+
1308ab2a 13514+ au_fset_ren(a->flags, CPUP);
13515+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
13516+ au_set_dbstart(d, a->btgt);
13517+ au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
13518+ err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
13519+ !AuCpup_DTIME, a->dst_parent);
13520+ if (unlikely(err)) {
13521+ au_set_h_dptr(d, a->btgt, NULL);
13522+ au_set_dbstart(d, a->src_bstart);
13523+ }
13524+ mutex_unlock(h_mtx);
13525+ }
dece6358 13526+
1308ab2a 13527+ return err;
13528+}
dece6358 13529+
1308ab2a 13530+/* cf. aufs_rmdir() */
13531+static int au_ren_del_whtmp(struct au_ren_args *a)
13532+{
13533+ int err;
13534+ struct inode *dir;
1facf9fc 13535+
1308ab2a 13536+ dir = a->dst_dir;
13537+ SiMustAnyLock(dir->i_sb);
13538+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
13539+ au_sbi(dir->i_sb)->si_dirwh)
13540+ || au_test_fs_remote(a->h_dst->d_sb)) {
13541+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
13542+ if (unlikely(err))
13543+ AuWarn("failed removing whtmp dir %.*s (%d), "
13544+ "ignored.\n", AuDLNPair(a->h_dst), err);
13545+ } else {
13546+ au_nhash_wh_free(&a->thargs->whlist);
13547+ a->thargs->whlist = a->whlist;
13548+ a->whlist.nh_num = 0;
13549+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
13550+ dput(a->h_dst);
13551+ a->thargs = NULL;
13552+ }
dece6358 13553+
1308ab2a 13554+ return 0;
1facf9fc 13555+}
13556+
1308ab2a 13557+/* make it 'opaque' dir. */
13558+static int au_ren_diropq(struct au_ren_args *a)
13559+{
13560+ int err;
13561+ struct dentry *diropq;
dece6358 13562+
1308ab2a 13563+ err = 0;
13564+ a->src_hinode = au_hi(a->src_inode, a->btgt);
13565+ au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
13566+ diropq = au_diropq_create(a->src_dentry, a->btgt);
13567+ au_hin_imtx_unlock(a->src_hinode);
13568+ if (IS_ERR(diropq))
13569+ err = PTR_ERR(diropq);
13570+ dput(diropq);
13571+
13572+ return err;
13573+}
13574+
13575+static int do_rename(struct au_ren_args *a)
1facf9fc 13576+{
13577+ int err;
1308ab2a 13578+ struct dentry *d, *h_d;
1facf9fc 13579+
1308ab2a 13580+ /* prepare workqueue args for asynchronous rmdir */
13581+ h_d = a->dst_h_dentry;
13582+ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
13583+ err = -ENOMEM;
13584+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
13585+ if (unlikely(!a->thargs))
13586+ goto out;
13587+ a->h_dst = dget(h_d);
13588+ }
dece6358 13589+
1308ab2a 13590+ /* create whiteout for src_dentry */
13591+ if (au_ftest_ren(a->flags, WHSRC)) {
13592+ a->src_wh_dentry
13593+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
13594+ err = PTR_ERR(a->src_wh_dentry);
13595+ if (IS_ERR(a->src_wh_dentry))
13596+ goto out_thargs;
13597+ }
13598+
13599+ /* lookup whiteout for dentry */
13600+ if (au_ftest_ren(a->flags, WHDST)) {
13601+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
13602+ a->br);
13603+ err = PTR_ERR(h_d);
13604+ if (IS_ERR(h_d))
13605+ goto out_whsrc;
13606+ if (!h_d->d_inode)
13607+ dput(h_d);
dece6358 13608+ else
1308ab2a 13609+ a->dst_wh_dentry = h_d;
dece6358 13610+ }
1308ab2a 13611+
13612+ /* rename dentry to tmpwh */
13613+ if (a->thargs) {
13614+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
13615+ if (unlikely(err))
13616+ goto out_whdst;
13617+
13618+ d = a->dst_dentry;
13619+ au_set_h_dptr(d, a->btgt, NULL);
13620+ err = au_lkup_neg(d, a->btgt);
13621+ if (unlikely(err))
13622+ goto out_whtmp;
13623+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
1facf9fc 13624+ }
13625+
1308ab2a 13626+ /* cpup src */
13627+ if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
13628+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
1facf9fc 13629+
1308ab2a 13630+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
13631+ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
13632+ !AuCpup_DTIME);
13633+ mutex_unlock(h_mtx);
13634+ if (unlikely(err))
13635+ goto out_whtmp;
13636+ }
1facf9fc 13637+
1308ab2a 13638+ /* rename by vfs_rename or cpup */
13639+ d = a->dst_dentry;
13640+ if (au_ftest_ren(a->flags, ISDIR)
13641+ && (a->dst_wh_dentry
13642+ || au_dbdiropq(d) == a->btgt
13643+ /* hide the lower to keep xino */
13644+ || a->btgt < au_dbend(d)
13645+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
13646+ au_fset_ren(a->flags, DIROPQ);
13647+ err = au_ren_or_cpup(a);
13648+ if (unlikely(err))
13649+ /* leave the copied-up one */
13650+ goto out_whtmp;
1facf9fc 13651+
1308ab2a 13652+ /* make dir opaque */
13653+ if (au_ftest_ren(a->flags, DIROPQ)) {
13654+ err = au_ren_diropq(a);
13655+ if (unlikely(err))
13656+ goto out_rename;
13657+ }
1facf9fc 13658+
1308ab2a 13659+ /* update target timestamps */
13660+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
13661+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
13662+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
13663+ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
13664+
13665+ /* remove whiteout for dentry */
13666+ if (a->dst_wh_dentry) {
13667+ a->h_path.dentry = a->dst_wh_dentry;
13668+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
13669+ a->dst_dentry);
13670+ if (unlikely(err))
13671+ goto out_diropq;
1facf9fc 13672+ }
13673+
1308ab2a 13674+ /* remove whtmp */
13675+ if (a->thargs)
13676+ au_ren_del_whtmp(a); /* ignore this error */
1facf9fc 13677+
1308ab2a 13678+ err = 0;
13679+ goto out_success;
1facf9fc 13680+
1308ab2a 13681+ out_diropq:
13682+ if (au_ftest_ren(a->flags, DIROPQ))
13683+ au_ren_rev_diropq(err, a);
13684+ out_rename:
13685+ if (!au_ftest_ren(a->flags, CPUP))
13686+ au_ren_rev_rename(err, a);
13687+ else
13688+ au_ren_rev_cpup(err, a);
13689+ out_whtmp:
13690+ if (a->thargs)
13691+ au_ren_rev_whtmp(err, a);
13692+ out_whdst:
13693+ dput(a->dst_wh_dentry);
13694+ a->dst_wh_dentry = NULL;
13695+ out_whsrc:
13696+ if (a->src_wh_dentry)
13697+ au_ren_rev_whsrc(err, a);
13698+ au_ren_rev_drop(a);
13699+ out_success:
13700+ dput(a->src_wh_dentry);
13701+ dput(a->dst_wh_dentry);
13702+ out_thargs:
13703+ if (a->thargs) {
13704+ dput(a->h_dst);
13705+ au_whtmp_rmdir_free(a->thargs);
13706+ a->thargs = NULL;
13707+ }
dece6358 13708+ out:
dece6358
AM
13709+ return err;
13710+}
1facf9fc 13711+
1308ab2a 13712+/* ---------------------------------------------------------------------- */
13713+
13714+/*
13715+ * test if @dentry dir can be rename destination or not.
13716+ * success means, it is a logically empty dir.
13717+ */
13718+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
13719+{
13720+ return au_test_empty(dentry, whlist);
dece6358 13721+}
1facf9fc 13722+
1308ab2a 13723+/*
13724+ * test if @dentry dir can be rename source or not.
13725+ * if it can, return 0 and @children is filled.
13726+ * success means,
13727+ * - it is a logically empty dir.
13728+ * - or, it exists on writable branch and has no children including whiteouts
13729+ * on the lower branch.
13730+ */
13731+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
dece6358
AM
13732+{
13733+ int err;
1308ab2a 13734+ unsigned int rdhash;
13735+ aufs_bindex_t bstart;
1facf9fc 13736+
1308ab2a 13737+ bstart = au_dbstart(dentry);
13738+ if (bstart != btgt) {
13739+ struct au_nhash whlist;
13740+
13741+ SiMustAnyLock(dentry->d_sb);
13742+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
13743+ if (!rdhash)
13744+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
13745+ dentry));
13746+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
13747+ if (unlikely(err))
13748+ goto out;
13749+ err = au_test_empty(dentry, &whlist);
13750+ au_nhash_wh_free(&whlist);
dece6358 13751+ goto out;
1facf9fc 13752+ }
13753+
1308ab2a 13754+ if (bstart == au_dbtaildir(dentry))
13755+ return 0; /* success */
1facf9fc 13756+
1308ab2a 13757+ err = au_test_empty_lower(dentry);
dece6358 13758+
1308ab2a 13759+ out:
13760+ if (err == -ENOTEMPTY) {
13761+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
13762+ " is not supported\n");
13763+ err = -EXDEV;
dece6358 13764+ }
1308ab2a 13765+ return err;
13766+}
dece6358 13767+
1308ab2a 13768+/* side effect: sets whlist and h_dentry */
13769+static int au_ren_may_dir(struct au_ren_args *a)
13770+{
13771+ int err;
13772+ unsigned int rdhash;
13773+ struct dentry *d;
dece6358 13774+
1308ab2a 13775+ d = a->dst_dentry;
13776+ SiMustAnyLock(d->d_sb);
13777+
13778+ err = 0;
13779+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
13780+ rdhash = au_sbi(d->d_sb)->si_rdhash;
13781+ if (!rdhash)
13782+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
13783+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
13784+ if (unlikely(err))
13785+ goto out;
13786+
13787+ au_set_dbstart(d, a->dst_bstart);
13788+ err = may_rename_dstdir(d, &a->whlist);
13789+ au_set_dbstart(d, a->btgt);
1facf9fc 13790+ }
1308ab2a 13791+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
13792+ if (unlikely(err))
13793+ goto out;
dece6358 13794+
1308ab2a 13795+ d = a->src_dentry;
13796+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
13797+ if (au_ftest_ren(a->flags, ISDIR)) {
13798+ err = may_rename_srcdir(d, a->btgt);
dece6358 13799+ if (unlikely(err)) {
1308ab2a 13800+ au_nhash_wh_free(&a->whlist);
13801+ a->whlist.nh_num = 0;
dece6358
AM
13802+ }
13803+ }
1facf9fc 13804+ out:
13805+ return err;
13806+}
13807+
dece6358
AM
13808+/* ---------------------------------------------------------------------- */
13809+
1308ab2a 13810+/*
13811+ * simple tests for rename.
13812+ * following the checks in vfs, plus the parent-child relationship.
13813+ */
13814+static int au_may_ren(struct au_ren_args *a)
1facf9fc 13815+{
1308ab2a 13816+ int err, isdir;
13817+ struct inode *h_inode;
1facf9fc 13818+
1308ab2a 13819+ if (a->src_bstart == a->btgt) {
13820+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
13821+ au_ftest_ren(a->flags, ISDIR));
13822+ if (unlikely(err))
13823+ goto out;
13824+ err = -EINVAL;
13825+ if (unlikely(a->src_h_dentry == a->h_trap))
13826+ goto out;
1facf9fc 13827+ }
13828+
1308ab2a 13829+ err = 0;
13830+ if (a->dst_bstart != a->btgt)
13831+ goto out;
1facf9fc 13832+
1308ab2a 13833+ err = -EIO;
13834+ h_inode = a->dst_h_dentry->d_inode;
13835+ isdir = !!au_ftest_ren(a->flags, ISDIR);
13836+ if (!a->dst_dentry->d_inode) {
13837+ if (unlikely(h_inode))
13838+ goto out;
13839+ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
13840+ isdir);
13841+ } else {
13842+ if (unlikely(!h_inode || !h_inode->i_nlink))
13843+ goto out;
13844+ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
13845+ isdir);
13846+ if (unlikely(err))
13847+ goto out;
13848+ err = -ENOTEMPTY;
13849+ if (unlikely(a->dst_h_dentry == a->h_trap))
13850+ goto out;
13851+ err = 0;
dece6358 13852+ }
1facf9fc 13853+
1308ab2a 13854+ out:
13855+ if (unlikely(err == -ENOENT || err == -EEXIST))
13856+ err = -EIO;
13857+ return err;
13858+}
dece6358 13859+
1308ab2a 13860+/* ---------------------------------------------------------------------- */
dece6358 13861+
1308ab2a 13862+/*
13863+ * locking order
13864+ * (VFS)
13865+ * - src_dir and dir by lock_rename()
13866+ * - inode if exitsts
13867+ * (aufs)
13868+ * - lock all
13869+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
13870+ * + si_read_lock
13871+ * + di_write_lock2_child()
13872+ * + di_write_lock_child()
13873+ * + ii_write_lock_child()
13874+ * + di_write_lock_child2()
13875+ * + ii_write_lock_child2()
13876+ * + src_parent and parent
13877+ * + di_write_lock_parent()
13878+ * + ii_write_lock_parent()
13879+ * + di_write_lock_parent2()
13880+ * + ii_write_lock_parent2()
13881+ * + lower src_dir and dir by vfsub_lock_rename()
13882+ * + verify the every relationships between child and parent. if any
13883+ * of them failed, unlock all and return -EBUSY.
13884+ */
13885+static void au_ren_unlock(struct au_ren_args *a)
13886+{
13887+ struct super_block *sb;
13888+
13889+ sb = a->dst_dentry->d_sb;
13890+ if (au_ftest_ren(a->flags, MNT_WRITE))
13891+ mnt_drop_write(a->br->br_mnt);
13892+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
13893+ a->dst_h_parent, a->dst_hdir);
13894+}
13895+
13896+static int au_ren_lock(struct au_ren_args *a)
13897+{
13898+ int err;
13899+ unsigned int udba;
13900+
13901+ err = 0;
13902+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
13903+ a->src_hdir = au_hi(a->src_dir, a->btgt);
13904+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
13905+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
13906+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
13907+ a->dst_h_parent, a->dst_hdir);
13908+ udba = au_opt_udba(a->src_dentry->d_sb);
13909+ if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
13910+ || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
13911+ err = au_busy_or_stale();
13912+ if (!err && au_dbstart(a->src_dentry) == a->btgt)
13913+ err = au_h_verify(a->src_h_dentry, udba,
13914+ a->src_h_parent->d_inode, a->src_h_parent,
13915+ a->br);
13916+ if (!err && au_dbstart(a->dst_dentry) == a->btgt)
13917+ err = au_h_verify(a->dst_h_dentry, udba,
13918+ a->dst_h_parent->d_inode, a->dst_h_parent,
13919+ a->br);
dece6358 13920+ if (!err) {
1308ab2a 13921+ err = mnt_want_write(a->br->br_mnt);
13922+ if (unlikely(err))
13923+ goto out_unlock;
13924+ au_fset_ren(a->flags, MNT_WRITE);
dece6358 13925+ goto out; /* success */
1facf9fc 13926+ }
dece6358 13927+
1308ab2a 13928+ err = au_busy_or_stale();
dece6358 13929+
1308ab2a 13930+ out_unlock:
13931+ au_ren_unlock(a);
dece6358 13932+ out:
1facf9fc 13933+ return err;
13934+}
13935+
1308ab2a 13936+/* ---------------------------------------------------------------------- */
13937+
13938+static void au_ren_refresh_dir(struct au_ren_args *a)
1facf9fc 13939+{
1308ab2a 13940+ struct inode *dir;
1facf9fc 13941+
1308ab2a 13942+ dir = a->dst_dir;
13943+ dir->i_version++;
13944+ if (au_ftest_ren(a->flags, ISDIR)) {
13945+ /* is this updating defined in POSIX? */
13946+ au_cpup_attr_timesizes(a->src_inode);
13947+ au_cpup_attr_nlink(dir, /*force*/1);
13948+ if (a->dst_inode) {
13949+ clear_nlink(a->dst_inode);
13950+ au_cpup_attr_timesizes(a->dst_inode);
13951+ }
13952+ }
13953+ if (au_ibstart(dir) == a->btgt)
13954+ au_cpup_attr_timesizes(dir);
1facf9fc 13955+
1308ab2a 13956+ if (au_ftest_ren(a->flags, ISSAMEDIR))
13957+ return;
dece6358 13958+
1308ab2a 13959+ dir = a->src_dir;
13960+ dir->i_version++;
13961+ if (au_ftest_ren(a->flags, ISDIR))
13962+ au_cpup_attr_nlink(dir, /*force*/1);
13963+ if (au_ibstart(dir) == a->btgt)
13964+ au_cpup_attr_timesizes(dir);
13965+}
1facf9fc 13966+
1308ab2a 13967+static void au_ren_refresh(struct au_ren_args *a)
13968+{
13969+ aufs_bindex_t bend, bindex;
13970+ struct dentry *d, *h_d;
13971+ struct inode *i, *h_i;
13972+ struct super_block *sb;
dece6358 13973+
1308ab2a 13974+ d = a->src_dentry;
13975+ au_set_dbwh(d, -1);
13976+ bend = au_dbend(d);
13977+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
13978+ h_d = au_h_dptr(d, bindex);
13979+ if (h_d)
13980+ au_set_h_dptr(d, bindex, NULL);
dece6358 13981+ }
1308ab2a 13982+ au_set_dbend(d, a->btgt);
dece6358 13983+
1308ab2a 13984+ sb = d->d_sb;
13985+ i = a->src_inode;
13986+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
13987+ return; /* success */
dece6358 13988+
1308ab2a 13989+ bend = au_ibend(i);
13990+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
13991+ h_i = au_h_iptr(i, bindex);
13992+ if (h_i) {
13993+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
13994+ /* ignore this error */
13995+ au_set_h_iptr(i, bindex, NULL, 0);
1facf9fc 13996+ }
1308ab2a 13997+ }
13998+ au_set_ibend(i, a->btgt);
13999+}
dece6358 14000+
1308ab2a 14001+/* ---------------------------------------------------------------------- */
dece6358 14002+
1308ab2a 14003+/* mainly for link(2) and rename(2) */
14004+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
14005+{
14006+ aufs_bindex_t bdiropq, bwh;
14007+ struct dentry *parent;
14008+ struct au_branch *br;
14009+
14010+ parent = dentry->d_parent;
14011+ IMustLock(parent->d_inode); /* dir is locked */
14012+
14013+ bdiropq = au_dbdiropq(parent);
14014+ bwh = au_dbwh(dentry);
14015+ br = au_sbr(dentry->d_sb, btgt);
14016+ if (au_br_rdonly(br)
14017+ || (0 <= bdiropq && bdiropq < btgt)
14018+ || (0 <= bwh && bwh < btgt))
14019+ btgt = -1;
14020+
14021+ AuDbg("btgt %d\n", btgt);
14022+ return btgt;
1facf9fc 14023+}
14024+
1308ab2a 14025+/* sets src_bstart, dst_bstart and btgt */
14026+static int au_ren_wbr(struct au_ren_args *a)
1facf9fc 14027+{
dece6358 14028+ int err;
1308ab2a 14029+ struct au_wr_dir_args wr_dir_args = {
14030+ /* .force_btgt = -1, */
14031+ .flags = AuWrDir_ADD_ENTRY
14032+ };
1facf9fc 14033+
1308ab2a 14034+ a->src_bstart = au_dbstart(a->src_dentry);
14035+ a->dst_bstart = au_dbstart(a->dst_dentry);
14036+ if (au_ftest_ren(a->flags, ISDIR))
14037+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
14038+ wr_dir_args.force_btgt = a->src_bstart;
14039+ if (a->dst_inode && a->dst_bstart < a->src_bstart)
14040+ wr_dir_args.force_btgt = a->dst_bstart;
14041+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
14042+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
14043+ a->btgt = err;
1facf9fc 14044+
1facf9fc 14045+ return err;
14046+}
14047+
1308ab2a 14048+static void au_ren_dt(struct au_ren_args *a)
1facf9fc 14049+{
1308ab2a 14050+ a->h_path.dentry = a->src_h_parent;
14051+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
14052+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
14053+ a->h_path.dentry = a->dst_h_parent;
14054+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
dece6358
AM
14055+ }
14056+
1308ab2a 14057+ au_fclr_ren(a->flags, DT_DSTDIR);
14058+ if (!au_ftest_ren(a->flags, ISDIR))
14059+ return;
14060+
14061+ a->h_path.dentry = a->src_h_dentry;
14062+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
14063+ if (a->dst_h_dentry->d_inode) {
14064+ au_fset_ren(a->flags, DT_DSTDIR);
14065+ a->h_path.dentry = a->dst_h_dentry;
14066+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
14067+ }
1facf9fc 14068+}
14069+
1308ab2a 14070+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1facf9fc 14071+{
1308ab2a 14072+ struct dentry *h_d;
14073+ struct mutex *h_mtx;
1facf9fc 14074+
1308ab2a 14075+ au_dtime_revert(a->src_dt + AuPARENT);
14076+ if (!au_ftest_ren(a->flags, ISSAMEDIR))
14077+ au_dtime_revert(a->dst_dt + AuPARENT);
1facf9fc 14078+
1308ab2a 14079+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
14080+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
14081+ h_mtx = &h_d->d_inode->i_mutex;
14082+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
14083+ au_dtime_revert(a->src_dt + AuCHILD);
14084+ mutex_unlock(h_mtx);
1facf9fc 14085+
1308ab2a 14086+ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
14087+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
14088+ h_mtx = &h_d->d_inode->i_mutex;
14089+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
14090+ au_dtime_revert(a->dst_dt + AuCHILD);
14091+ mutex_unlock(h_mtx);
1facf9fc 14092+ }
1facf9fc 14093+ }
1facf9fc 14094+}
14095+
14096+/* ---------------------------------------------------------------------- */
14097+
1308ab2a 14098+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
14099+ struct inode *_dst_dir, struct dentry *_dst_dentry)
1facf9fc 14100+{
14101+ int err;
1308ab2a 14102+ /* reduce stack space */
14103+ struct au_ren_args *a;
1facf9fc 14104+
1308ab2a 14105+ IMustLock(_src_dir);
14106+ IMustLock(_dst_dir);
1facf9fc 14107+
1308ab2a 14108+ err = -ENOMEM;
14109+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
14110+ a = kzalloc(sizeof(*a), GFP_NOFS);
14111+ if (unlikely(!a))
dece6358 14112+ goto out;
1facf9fc 14113+
1308ab2a 14114+ a->src_dir = _src_dir;
14115+ a->src_dentry = _src_dentry;
14116+ a->src_inode = a->src_dentry->d_inode;
14117+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
14118+ a->dst_dir = _dst_dir;
14119+ a->dst_dentry = _dst_dentry;
14120+ a->dst_inode = a->dst_dentry->d_inode;
14121+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
14122+ if (a->dst_inode) {
14123+ IMustLock(a->dst_inode);
14124+ au_igrab(a->dst_inode);
1facf9fc 14125+ }
14126+
1308ab2a 14127+ err = -ENOTDIR;
14128+ if (S_ISDIR(a->src_inode->i_mode)) {
14129+ au_fset_ren(a->flags, ISDIR);
14130+ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
14131+ goto out_free;
14132+ aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
14133+ AuLock_DIR | AuLock_FLUSH);
14134+ } else
14135+ aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
14136+ AuLock_FLUSH);
1facf9fc 14137+
1308ab2a 14138+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
14139+ di_write_lock_parent(a->dst_parent);
1facf9fc 14140+
1308ab2a 14141+ /* which branch we process */
14142+ err = au_ren_wbr(a);
14143+ if (unlikely(err < 0))
14144+ goto out_unlock;
14145+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
14146+ a->h_path.mnt = a->br->br_mnt;
1facf9fc 14147+
1308ab2a 14148+ /* are they available to be renamed */
14149+ err = au_ren_may_dir(a);
14150+ if (unlikely(err))
14151+ goto out_children;
1facf9fc 14152+
1308ab2a 14153+ /* prepare the writable parent dir on the same branch */
14154+ if (a->dst_bstart == a->btgt) {
14155+ au_fset_ren(a->flags, WHDST);
14156+ } else {
14157+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
14158+ if (unlikely(err))
14159+ goto out_children;
1facf9fc 14160+ }
1facf9fc 14161+
1308ab2a 14162+ if (a->src_dir != a->dst_dir) {
14163+ /*
14164+ * this temporary unlock is safe,
14165+ * because both dir->i_mutex are locked.
14166+ */
14167+ di_write_unlock(a->dst_parent);
14168+ di_write_lock_parent(a->src_parent);
14169+ err = au_wr_dir_need_wh(a->src_dentry,
14170+ au_ftest_ren(a->flags, ISDIR),
14171+ &a->btgt);
14172+ di_write_unlock(a->src_parent);
14173+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
14174+ au_fclr_ren(a->flags, ISSAMEDIR);
14175+ } else
14176+ err = au_wr_dir_need_wh(a->src_dentry,
14177+ au_ftest_ren(a->flags, ISDIR),
14178+ &a->btgt);
14179+ if (unlikely(err < 0))
14180+ goto out_children;
14181+ if (err)
14182+ au_fset_ren(a->flags, WHSRC);
1facf9fc 14183+
1308ab2a 14184+ /* lock them all */
14185+ err = au_ren_lock(a);
14186+ if (unlikely(err))
14187+ goto out_children;
1facf9fc 14188+
1308ab2a 14189+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE)) {
14190+ err = au_may_ren(a);
14191+ if (unlikely(err))
14192+ goto out_hdir;
14193+ }
1facf9fc 14194+
1308ab2a 14195+ /* store timestamps to be revertible */
14196+ au_ren_dt(a);
1facf9fc 14197+
1308ab2a 14198+ /* here we go */
14199+ err = do_rename(a);
14200+ if (unlikely(err))
14201+ goto out_dt;
1facf9fc 14202+
1308ab2a 14203+ /* update dir attributes */
14204+ au_ren_refresh_dir(a);
1facf9fc 14205+
1308ab2a 14206+ /* dput/iput all lower dentries */
14207+ au_ren_refresh(a);
1facf9fc 14208+
1308ab2a 14209+ goto out_hdir; /* success */
14210+
14211+ out_dt:
14212+ au_ren_rev_dt(err, a);
14213+ out_hdir:
14214+ au_ren_unlock(a);
14215+ out_children:
14216+ au_nhash_wh_free(&a->whlist);
14217+ out_unlock:
14218+ if (unlikely(err && au_ftest_ren(a->flags, ISDIR))) {
14219+ au_update_dbstart(a->dst_dentry);
14220+ d_drop(a->dst_dentry);
14221+ }
14222+ if (!err)
14223+ d_move(a->src_dentry, a->dst_dentry);
14224+ if (au_ftest_ren(a->flags, ISSAMEDIR))
14225+ di_write_unlock(a->dst_parent);
14226+ else
14227+ di_write_unlock2(a->src_parent, a->dst_parent);
14228+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
14229+ out_free:
14230+ iput(a->dst_inode);
14231+ if (a->thargs)
14232+ au_whtmp_rmdir_free(a->thargs);
14233+ kfree(a);
14234+ out:
14235+ return err;
14236+}
14237diff -uprN -x .git linux-2.6.31/fs/aufs/iinfo.c aufs2-2.6.git/fs/aufs/iinfo.c
14238--- linux-2.6.31/fs/aufs/iinfo.c 1970-01-01 00:00:00.000000000 +0000
14239+++ aufs2-2.6.git/fs/aufs/iinfo.c 2009-09-21 21:49:23.404940801 +0000
14240@@ -0,0 +1,283 @@
1facf9fc 14241+/*
14242+ * Copyright (C) 2005-2009 Junjiro R. Okajima
14243+ *
14244+ * This program, aufs is free software; you can redistribute it and/or modify
14245+ * it under the terms of the GNU General Public License as published by
14246+ * the Free Software Foundation; either version 2 of the License, or
14247+ * (at your option) any later version.
dece6358
AM
14248+ *
14249+ * This program is distributed in the hope that it will be useful,
14250+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14251+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14252+ * GNU General Public License for more details.
14253+ *
14254+ * You should have received a copy of the GNU General Public License
14255+ * along with this program; if not, write to the Free Software
14256+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 14257+ */
14258+
14259+/*
1308ab2a 14260+ * inode private data
14261+ */
dece6358 14262+
1308ab2a 14263+#include "aufs.h"
dece6358 14264+
1308ab2a 14265+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
14266+{
14267+ struct inode *h_inode;
dece6358 14268+
1308ab2a 14269+ IiMustAnyLock(inode);
14270+
14271+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
14272+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
14273+ return h_inode;
1facf9fc 14274+}
14275+
1308ab2a 14276+/* todo: hard/soft set? */
14277+void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
1facf9fc 14278+{
1308ab2a 14279+ struct au_iinfo *iinfo = au_ii(inode);
14280+ struct inode *h_inode;
1facf9fc 14281+
1308ab2a 14282+ IiMustWriteLock(inode);
1facf9fc 14283+
1308ab2a 14284+ iinfo->ii_bstart = bindex;
14285+ h_inode = iinfo->ii_hinode[bindex + 0].hi_inode;
14286+ if (h_inode)
14287+ au_cpup_igen(inode, h_inode);
14288+}
1facf9fc 14289+
1308ab2a 14290+void au_hiput(struct au_hinode *hinode)
14291+{
14292+ au_hin_free(hinode);
14293+ dput(hinode->hi_whdentry);
14294+ iput(hinode->hi_inode);
14295+}
1facf9fc 14296+
1308ab2a 14297+unsigned int au_hi_flags(struct inode *inode, int isdir)
14298+{
14299+ unsigned int flags;
14300+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
dece6358 14301+
1308ab2a 14302+ flags = 0;
14303+ if (au_opt_test(mnt_flags, XINO))
14304+ au_fset_hi(flags, XINO);
14305+ if (isdir && au_opt_test(mnt_flags, UDBA_HINOTIFY))
14306+ au_fset_hi(flags, HINOTIFY);
14307+ return flags;
1facf9fc 14308+}
14309+
1308ab2a 14310+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
14311+ struct inode *h_inode, unsigned int flags)
1facf9fc 14312+{
1308ab2a 14313+ struct au_hinode *hinode;
14314+ struct inode *hi;
14315+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 14316+
1308ab2a 14317+ IiMustWriteLock(inode);
dece6358 14318+
1308ab2a 14319+ hinode = iinfo->ii_hinode + bindex;
14320+ hi = hinode->hi_inode;
14321+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
14322+ AuDebugOn(h_inode && hi);
14323+
14324+ if (hi)
14325+ au_hiput(hinode);
14326+ hinode->hi_inode = h_inode;
14327+ if (h_inode) {
14328+ int err;
14329+ struct super_block *sb = inode->i_sb;
14330+ struct au_branch *br;
14331+
14332+ if (bindex == iinfo->ii_bstart)
14333+ au_cpup_igen(inode, h_inode);
14334+ br = au_sbr(sb, bindex);
14335+ hinode->hi_id = br->br_id;
14336+ if (au_ftest_hi(flags, XINO)) {
14337+ err = au_xino_write(sb, bindex, h_inode->i_ino,
14338+ inode->i_ino);
14339+ if (unlikely(err))
14340+ AuIOErr1("failed au_xino_write() %d\n", err);
14341+ }
14342+
14343+ if (au_ftest_hi(flags, HINOTIFY)
14344+ && au_br_hinotifyable(br->br_perm)) {
14345+ err = au_hin_alloc(hinode, inode, h_inode);
14346+ if (unlikely(err))
14347+ AuIOErr1("au_hin_alloc() %d\n", err);
14348+ }
dece6358 14349+ }
1facf9fc 14350+}
14351+
1308ab2a 14352+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
14353+ struct dentry *h_wh)
1facf9fc 14354+{
1308ab2a 14355+ struct au_hinode *hinode;
dece6358 14356+
1308ab2a 14357+ IiMustWriteLock(inode);
dece6358 14358+
1308ab2a 14359+ hinode = au_ii(inode)->ii_hinode + bindex;
14360+ AuDebugOn(hinode->hi_whdentry);
14361+ hinode->hi_whdentry = h_wh;
1facf9fc 14362+}
14363+
1308ab2a 14364+void au_update_iigen(struct inode *inode)
1facf9fc 14365+{
1308ab2a 14366+ atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
14367+ /* smp_mb(); */ /* atomic_set */
14368+}
1facf9fc 14369+
1308ab2a 14370+/* it may be called at remount time, too */
14371+void au_update_brange(struct inode *inode, int do_put_zero)
14372+{
14373+ struct au_iinfo *iinfo;
1facf9fc 14374+
1308ab2a 14375+ iinfo = au_ii(inode);
14376+ if (!iinfo || iinfo->ii_bstart < 0)
14377+ return;
1facf9fc 14378+
1308ab2a 14379+ IiMustWriteLock(inode);
1facf9fc 14380+
1308ab2a 14381+ if (do_put_zero) {
14382+ aufs_bindex_t bindex;
1facf9fc 14383+
1308ab2a 14384+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
14385+ bindex++) {
14386+ struct inode *h_i;
dece6358 14387+
1308ab2a 14388+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
14389+ if (h_i && !h_i->i_nlink)
14390+ au_set_h_iptr(inode, bindex, NULL, 0);
14391+ }
1facf9fc 14392+ }
14393+
1308ab2a 14394+ iinfo->ii_bstart = -1;
14395+ while (++iinfo->ii_bstart <= iinfo->ii_bend)
14396+ if (iinfo->ii_hinode[0 + iinfo->ii_bstart].hi_inode)
14397+ break;
14398+ if (iinfo->ii_bstart > iinfo->ii_bend) {
14399+ iinfo->ii_bstart = -1;
14400+ iinfo->ii_bend = -1;
14401+ return;
dece6358 14402+ }
1facf9fc 14403+
1308ab2a 14404+ iinfo->ii_bend++;
14405+ while (0 <= --iinfo->ii_bend)
14406+ if (iinfo->ii_hinode[0 + iinfo->ii_bend].hi_inode)
14407+ break;
14408+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend || iinfo->ii_bend < 0);
dece6358
AM
14409+}
14410+
1308ab2a 14411+/* ---------------------------------------------------------------------- */
1facf9fc 14412+
1308ab2a 14413+int au_iinfo_init(struct inode *inode)
14414+{
14415+ struct au_iinfo *iinfo;
14416+ struct super_block *sb;
14417+ int nbr, i;
1facf9fc 14418+
1308ab2a 14419+ sb = inode->i_sb;
14420+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
14421+ nbr = au_sbend(sb) + 1;
14422+ if (unlikely(nbr <= 0))
14423+ nbr = 1;
14424+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
14425+ if (iinfo->ii_hinode) {
14426+ for (i = 0; i < nbr; i++)
14427+ iinfo->ii_hinode[i].hi_id = -1;
dece6358 14428+
1308ab2a 14429+ atomic_set(&iinfo->ii_generation, au_sigen(sb));
14430+ /* smp_mb(); */ /* atomic_set */
14431+ au_rw_init(&iinfo->ii_rwsem);
14432+ iinfo->ii_bstart = -1;
14433+ iinfo->ii_bend = -1;
14434+ iinfo->ii_vdir = NULL;
14435+ return 0;
14436+ }
14437+ return -ENOMEM;
14438+}
dece6358 14439+
1308ab2a 14440+int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
14441+{
14442+ int err, sz;
14443+ struct au_hinode *hip;
1facf9fc 14444+
1308ab2a 14445+ AuRwMustWriteLock(&iinfo->ii_rwsem);
1facf9fc 14446+
1308ab2a 14447+ err = -ENOMEM;
14448+ sz = sizeof(*hip) * (iinfo->ii_bend + 1);
14449+ if (!sz)
14450+ sz = sizeof(*hip);
14451+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
14452+ if (hip) {
14453+ iinfo->ii_hinode = hip;
1facf9fc 14454+ err = 0;
14455+ }
14456+
1308ab2a 14457+ return err;
14458+}
1facf9fc 14459+
1308ab2a 14460+static int au_iinfo_write0(struct super_block *sb, struct au_hinode *hinode,
14461+ ino_t ino)
14462+{
14463+ int err;
14464+ aufs_bindex_t bindex;
14465+ unsigned char locked;
1facf9fc 14466+
1308ab2a 14467+ err = 0;
14468+ locked = !!si_noflush_read_trylock(sb);
14469+ bindex = au_br_index(sb, hinode->hi_id);
14470+ if (bindex >= 0)
14471+ err = au_xino_write0(sb, bindex, hinode->hi_inode->i_ino, ino);
14472+ /* error action? */
14473+ if (locked)
14474+ si_read_unlock(sb);
14475+ return err;
14476+}
14477+
14478+void au_iinfo_fin(struct inode *inode)
14479+{
14480+ ino_t ino;
14481+ aufs_bindex_t bend;
14482+ unsigned char unlinked = !inode->i_nlink;
14483+ struct au_iinfo *iinfo;
14484+ struct au_hinode *hi;
14485+ struct super_block *sb;
14486+
14487+ if (unlinked) {
14488+ int err = au_xigen_inc(inode);
14489+ if (unlikely(err))
14490+ AuWarn1("failed resetting i_generation, %d\n", err);
1facf9fc 14491+ }
14492+
1308ab2a 14493+ iinfo = au_ii(inode);
14494+ /* bad_inode case */
14495+ if (!iinfo)
14496+ return;
1facf9fc 14497+
1308ab2a 14498+ if (iinfo->ii_vdir)
14499+ au_vdir_free(iinfo->ii_vdir);
14500+
14501+ if (iinfo->ii_bstart >= 0) {
14502+ sb = inode->i_sb;
14503+ ino = 0;
14504+ if (unlinked)
14505+ ino = inode->i_ino;
14506+ hi = iinfo->ii_hinode + iinfo->ii_bstart;
14507+ bend = iinfo->ii_bend;
14508+ while (iinfo->ii_bstart++ <= bend) {
14509+ if (hi->hi_inode) {
14510+ if (unlinked || !hi->hi_inode->i_nlink) {
14511+ au_iinfo_write0(sb, hi, ino);
14512+ /* ignore this error */
14513+ ino = 0;
14514+ }
14515+ au_hiput(hi);
14516+ }
14517+ hi++;
14518+ }
1facf9fc 14519+ }
14520+
1308ab2a 14521+ kfree(iinfo->ii_hinode);
14522+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 14523+}
1308ab2a 14524diff -uprN -x .git linux-2.6.31/fs/aufs/inode.c aufs2-2.6.git/fs/aufs/inode.c
14525--- linux-2.6.31/fs/aufs/inode.c 1970-01-01 00:00:00.000000000 +0000
14526+++ aufs2-2.6.git/fs/aufs/inode.c 2009-09-21 21:49:23.404940801 +0000
14527@@ -0,0 +1,414 @@
1facf9fc 14528+/*
14529+ * Copyright (C) 2005-2009 Junjiro R. Okajima
14530+ *
14531+ * This program, aufs is free software; you can redistribute it and/or modify
14532+ * it under the terms of the GNU General Public License as published by
14533+ * the Free Software Foundation; either version 2 of the License, or
14534+ * (at your option) any later version.
dece6358
AM
14535+ *
14536+ * This program is distributed in the hope that it will be useful,
14537+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14538+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14539+ * GNU General Public License for more details.
14540+ *
14541+ * You should have received a copy of the GNU General Public License
14542+ * along with this program; if not, write to the Free Software
14543+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 14544+ */
14545+
14546+/*
1308ab2a 14547+ * inode functions
1facf9fc 14548+ */
14549+
14550+#include "aufs.h"
14551+
1308ab2a 14552+struct inode *au_igrab(struct inode *inode)
14553+{
14554+ if (inode) {
14555+ AuDebugOn(!atomic_read(&inode->i_count));
14556+ atomic_inc_return(&inode->i_count);
14557+ }
14558+ return inode;
14559+}
1facf9fc 14560+
1308ab2a 14561+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
14562+{
14563+ au_cpup_attr_all(inode, /*force*/0);
14564+ au_update_iigen(inode);
14565+ if (do_version)
14566+ inode->i_version++;
14567+}
1facf9fc 14568+
1308ab2a 14569+int au_refresh_hinode_self(struct inode *inode, int do_attr)
14570+{
14571+ int err;
14572+ aufs_bindex_t bindex, new_bindex;
14573+ unsigned char update;
14574+ struct inode *first;
14575+ struct au_hinode *p, *q, tmp;
14576+ struct super_block *sb;
14577+ struct au_iinfo *iinfo;
1facf9fc 14578+
1308ab2a 14579+ IiMustWriteLock(inode);
1facf9fc 14580+
1308ab2a 14581+ update = 0;
14582+ sb = inode->i_sb;
14583+ iinfo = au_ii(inode);
14584+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
14585+ if (unlikely(err))
14586+ goto out;
1facf9fc 14587+
1308ab2a 14588+ p = iinfo->ii_hinode + iinfo->ii_bstart;
14589+ first = p->hi_inode;
14590+ err = 0;
14591+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
14592+ bindex++, p++) {
14593+ if (!p->hi_inode)
14594+ continue;
1facf9fc 14595+
1308ab2a 14596+ new_bindex = au_br_index(sb, p->hi_id);
14597+ if (new_bindex == bindex)
14598+ continue;
1facf9fc 14599+
1308ab2a 14600+ if (new_bindex < 0) {
14601+ update++;
14602+ au_hiput(p);
14603+ p->hi_inode = NULL;
14604+ continue;
14605+ }
1facf9fc 14606+
1308ab2a 14607+ if (new_bindex < iinfo->ii_bstart)
14608+ iinfo->ii_bstart = new_bindex;
14609+ if (iinfo->ii_bend < new_bindex)
14610+ iinfo->ii_bend = new_bindex;
14611+ /* swap two lower inode, and loop again */
14612+ q = iinfo->ii_hinode + new_bindex;
14613+ tmp = *q;
14614+ *q = *p;
14615+ *p = tmp;
14616+ if (tmp.hi_inode) {
14617+ bindex--;
14618+ p--;
14619+ }
14620+ }
14621+ au_update_brange(inode, /*do_put_zero*/0);
14622+ if (do_attr)
14623+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
1facf9fc 14624+
1308ab2a 14625+ out:
14626+ return err;
14627+}
1facf9fc 14628+
1308ab2a 14629+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
dece6358 14630+{
1308ab2a 14631+ int err, update;
14632+ unsigned int flags;
14633+ aufs_bindex_t bindex, bend;
14634+ unsigned char isdir;
14635+ struct inode *first;
14636+ struct au_hinode *p;
14637+ struct au_iinfo *iinfo;
dece6358 14638+
1308ab2a 14639+ err = au_refresh_hinode_self(inode, /*do_attr*/0);
14640+ if (unlikely(err))
14641+ goto out;
1facf9fc 14642+
1308ab2a 14643+ update = 0;
14644+ iinfo = au_ii(inode);
14645+ p = iinfo->ii_hinode + iinfo->ii_bstart;
14646+ first = p->hi_inode;
14647+ isdir = S_ISDIR(inode->i_mode);
14648+ flags = au_hi_flags(inode, isdir);
14649+ bend = au_dbend(dentry);
14650+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
14651+ struct inode *h_i;
14652+ struct dentry *h_d;
1facf9fc 14653+
1308ab2a 14654+ h_d = au_h_dptr(dentry, bindex);
14655+ if (!h_d || !h_d->d_inode)
14656+ continue;
1facf9fc 14657+
1308ab2a 14658+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
14659+ h_i = au_h_iptr(inode, bindex);
14660+ if (h_i) {
14661+ if (h_i == h_d->d_inode)
14662+ continue;
14663+ err = -EIO;
14664+ break;
14665+ }
14666+ }
14667+ if (bindex < iinfo->ii_bstart)
14668+ iinfo->ii_bstart = bindex;
14669+ if (iinfo->ii_bend < bindex)
14670+ iinfo->ii_bend = bindex;
14671+ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
14672+ update = 1;
1facf9fc 14673+ }
1308ab2a 14674+ au_update_brange(inode, /*do_put_zero*/0);
1facf9fc 14675+
1308ab2a 14676+ if (unlikely(err))
14677+ goto out;
1facf9fc 14678+
1308ab2a 14679+ au_refresh_hinode_attr(inode, update && isdir);
1facf9fc 14680+
1308ab2a 14681+ out:
14682+ AuTraceErr(err);
14683+ return err;
dece6358 14684+}
1facf9fc 14685+
1308ab2a 14686+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 14687+{
1308ab2a 14688+ int err;
14689+ unsigned int flags;
14690+ umode_t mode;
14691+ aufs_bindex_t bindex, bstart, btail;
14692+ unsigned char isdir;
14693+ struct dentry *h_dentry;
14694+ struct inode *h_inode;
14695+ struct au_iinfo *iinfo;
1facf9fc 14696+
1308ab2a 14697+ IiMustWriteLock(inode);
14698+
14699+ err = 0;
14700+ isdir = 0;
14701+ bstart = au_dbstart(dentry);
14702+ h_inode = au_h_dptr(dentry, bstart)->d_inode;
14703+ mode = h_inode->i_mode;
14704+ switch (mode & S_IFMT) {
14705+ case S_IFREG:
14706+ btail = au_dbtail(dentry);
14707+ inode->i_op = &aufs_iop;
14708+ inode->i_fop = &aufs_file_fop;
14709+ inode->i_mapping->a_ops = &aufs_aop;
14710+ break;
14711+ case S_IFDIR:
14712+ isdir = 1;
14713+ btail = au_dbtaildir(dentry);
14714+ inode->i_op = &aufs_dir_iop;
14715+ inode->i_fop = &aufs_dir_fop;
14716+ break;
14717+ case S_IFLNK:
14718+ btail = au_dbtail(dentry);
14719+ inode->i_op = &aufs_symlink_iop;
14720+ break;
14721+ case S_IFBLK:
14722+ case S_IFCHR:
14723+ case S_IFIFO:
14724+ case S_IFSOCK:
14725+ btail = au_dbtail(dentry);
14726+ inode->i_op = &aufs_iop;
14727+ init_special_inode(inode, mode, h_inode->i_rdev);
14728+ break;
14729+ default:
14730+ AuIOErr("Unknown file type 0%o\n", mode);
14731+ err = -EIO;
14732+ goto out;
dece6358 14733+ }
1308ab2a 14734+
14735+ /* do not set inotify for whiteouted dirs (SHWH mode) */
14736+ flags = au_hi_flags(inode, isdir);
14737+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
14738+ && au_ftest_hi(flags, HINOTIFY)
14739+ && dentry->d_name.len > AUFS_WH_PFX_LEN
14740+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
14741+ au_fclr_hi(flags, HINOTIFY);
14742+ iinfo = au_ii(inode);
14743+ iinfo->ii_bstart = bstart;
14744+ iinfo->ii_bend = btail;
14745+ for (bindex = bstart; bindex <= btail; bindex++) {
14746+ h_dentry = au_h_dptr(dentry, bindex);
14747+ if (h_dentry)
14748+ au_set_h_iptr(inode, bindex,
14749+ au_igrab(h_dentry->d_inode), flags);
1facf9fc 14750+ }
1308ab2a 14751+ au_cpup_attr_all(inode, /*force*/1);
1facf9fc 14752+
1308ab2a 14753+ out:
14754+ return err;
1facf9fc 14755+}
14756+
1308ab2a 14757+/* successful returns with iinfo write_locked */
14758+static int reval_inode(struct inode *inode, struct dentry *dentry, int *matched)
1facf9fc 14759+{
1308ab2a 14760+ int err;
14761+ aufs_bindex_t bindex, bend;
14762+ struct inode *h_inode, *h_dinode;
1facf9fc 14763+
1308ab2a 14764+ *matched = 0;
1facf9fc 14765+
1308ab2a 14766+ /*
14767+ * before this function, if aufs got any iinfo lock, it must be only
14768+ * one, the parent dir.
14769+ * it can happen by UDBA and the obsoleted inode number.
14770+ */
14771+ err = -EIO;
14772+ if (unlikely(inode->i_ino == parent_ino(dentry)))
14773+ goto out;
1facf9fc 14774+
1308ab2a 14775+ err = 0;
14776+ ii_write_lock_new_child(inode);
14777+ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
14778+ bend = au_ibend(inode);
14779+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
14780+ h_inode = au_h_iptr(inode, bindex);
14781+ if (h_inode && h_inode == h_dinode) {
14782+ *matched = 1;
14783+ err = 0;
14784+ if (au_iigen(inode) != au_digen(dentry))
14785+ err = au_refresh_hinode(inode, dentry);
14786+ break;
dece6358 14787+ }
1facf9fc 14788+ }
14789+
1308ab2a 14790+ if (unlikely(err))
14791+ ii_write_unlock(inode);
14792+ out:
14793+ return err;
1facf9fc 14794+}
14795+
1308ab2a 14796+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
14797+ unsigned int d_type, ino_t *ino)
1facf9fc 14798+{
14799+ int err;
1308ab2a 14800+ struct mutex *mtx;
14801+ const int isdir = (d_type == DT_DIR);
1facf9fc 14802+
1308ab2a 14803+ /* prevent hardlinks from race condition */
14804+ mtx = NULL;
14805+ if (!isdir) {
14806+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
14807+ mutex_lock(mtx);
14808+ }
14809+ err = au_xino_read(sb, bindex, h_ino, ino);
14810+ if (unlikely(err))
14811+ goto out;
1facf9fc 14812+
1308ab2a 14813+ if (!*ino) {
14814+ err = -EIO;
14815+ *ino = au_xino_new_ino(sb);
14816+ if (unlikely(!*ino))
14817+ goto out;
14818+ err = au_xino_write(sb, bindex, h_ino, *ino);
14819+ if (unlikely(err))
14820+ goto out;
1facf9fc 14821+ }
14822+
1308ab2a 14823+ out:
14824+ if (!isdir)
14825+ mutex_unlock(mtx);
1facf9fc 14826+ return err;
14827+}
14828+
1308ab2a 14829+/* successful returns with iinfo write_locked */
14830+/* todo: return with unlocked? */
14831+struct inode *au_new_inode(struct dentry *dentry, int must_new)
1facf9fc 14832+{
1308ab2a 14833+ struct inode *inode;
14834+ struct dentry *h_dentry;
14835+ struct super_block *sb;
14836+ ino_t h_ino, ino;
14837+ int err, match;
14838+ aufs_bindex_t bstart;
dece6358 14839+
1308ab2a 14840+ sb = dentry->d_sb;
14841+ bstart = au_dbstart(dentry);
14842+ h_dentry = au_h_dptr(dentry, bstart);
14843+ h_ino = h_dentry->d_inode->i_ino;
14844+ err = au_xino_read(sb, bstart, h_ino, &ino);
14845+ inode = ERR_PTR(err);
14846+ if (unlikely(err))
14847+ goto out;
14848+ new_ino:
14849+ if (!ino) {
14850+ ino = au_xino_new_ino(sb);
14851+ if (unlikely(!ino)) {
14852+ inode = ERR_PTR(-EIO);
14853+ goto out;
14854+ }
dece6358
AM
14855+ }
14856+
1308ab2a 14857+ AuDbg("i%lu\n", (unsigned long)ino);
14858+ inode = au_iget_locked(sb, ino);
14859+ err = PTR_ERR(inode);
14860+ if (IS_ERR(inode))
14861+ goto out;
14862+
14863+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
14864+ if (inode->i_state & I_NEW) {
14865+ ii_write_lock_new_child(inode);
14866+ err = set_inode(inode, dentry);
14867+ unlock_new_inode(inode);
14868+ if (!err)
14869+ goto out; /* success */
14870+
14871+ iget_failed(inode);
14872+ ii_write_unlock(inode);
14873+ goto out_iput;
14874+ } else if (!must_new) {
14875+ err = reval_inode(inode, dentry, &match);
14876+ if (!err)
14877+ goto out; /* success */
14878+ else if (match)
14879+ goto out_iput;
14880+ }
14881+
14882+ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
14883+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
14884+ " b%d, %s, %.*s, hi%lu, i%lu.\n",
14885+ bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
14886+ (unsigned long)h_ino, (unsigned long)ino);
14887+ ino = 0;
14888+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
14889+ if (!err) {
14890+ iput(inode);
14891+ goto new_ino;
14892+ }
14893+
14894+ out_iput:
14895+ iput(inode);
14896+ inode = ERR_PTR(err);
14897+ out:
14898+ return inode;
1facf9fc 14899+}
14900+
1308ab2a 14901+/* ---------------------------------------------------------------------- */
14902+
14903+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
14904+ struct inode *inode)
1facf9fc 14905+{
dece6358 14906+ int err;
1facf9fc 14907+
1308ab2a 14908+ err = au_br_rdonly(au_sbr(sb, bindex));
14909+
14910+ /* pseudo-link after flushed may happen out of bounds */
14911+ if (!err
14912+ && inode
14913+ && au_ibstart(inode) <= bindex
14914+ && bindex <= au_ibend(inode)) {
14915+ /*
14916+ * permission check is unnecessary since vfsub routine
14917+ * will be called later
14918+ */
14919+ struct inode *hi = au_h_iptr(inode, bindex);
14920+ if (hi)
14921+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
14922+ }
1facf9fc 14923+
dece6358
AM
14924+ return err;
14925+}
1facf9fc 14926+
1308ab2a 14927+int au_test_h_perm(struct inode *h_inode, int mask)
dece6358 14928+{
1308ab2a 14929+ if (!current_fsuid())
14930+ return 0;
14931+ return inode_permission(h_inode, mask);
14932+}
1facf9fc 14933+
1308ab2a 14934+int au_test_h_perm_sio(struct inode *h_inode, int mask)
14935+{
14936+ if (au_test_nfs(h_inode->i_sb)
14937+ && (mask & MAY_WRITE)
14938+ && S_ISDIR(h_inode->i_mode))
14939+ mask |= MAY_READ; /* force permission check */
14940+ return au_test_h_perm(h_inode, mask);
14941+}
14942diff -uprN -x .git linux-2.6.31/fs/aufs/inode.h aufs2-2.6.git/fs/aufs/inode.h
14943--- linux-2.6.31/fs/aufs/inode.h 1970-01-01 00:00:00.000000000 +0000
14944+++ aufs2-2.6.git/fs/aufs/inode.h 2009-09-21 21:49:23.404940801 +0000
14945@@ -0,0 +1,497 @@
14946+/*
14947+ * Copyright (C) 2005-2009 Junjiro R. Okajima
14948+ *
14949+ * This program, aufs is free software; you can redistribute it and/or modify
14950+ * it under the terms of the GNU General Public License as published by
14951+ * the Free Software Foundation; either version 2 of the License, or
14952+ * (at your option) any later version.
14953+ *
14954+ * This program is distributed in the hope that it will be useful,
14955+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14956+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14957+ * GNU General Public License for more details.
14958+ *
14959+ * You should have received a copy of the GNU General Public License
14960+ * along with this program; if not, write to the Free Software
14961+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
14962+ */
1facf9fc 14963+
1308ab2a 14964+/*
14965+ * inode operations
14966+ */
1facf9fc 14967+
1308ab2a 14968+#ifndef __AUFS_INODE_H__
14969+#define __AUFS_INODE_H__
1facf9fc 14970+
1308ab2a 14971+#ifdef __KERNEL__
1facf9fc 14972+
1308ab2a 14973+#include <linux/fs.h>
14974+#include <linux/inotify.h>
14975+#include <linux/aufs_type.h>
14976+#include "rwsem.h"
1facf9fc 14977+
1308ab2a 14978+struct vfsmount;
1facf9fc 14979+
1308ab2a 14980+struct au_hinotify {
14981+#ifdef CONFIG_AUFS_HINOTIFY
14982+ struct inotify_watch hin_watch;
14983+ struct inode *hin_aufs_inode; /* no get/put */
14984+#endif
14985+};
1facf9fc 14986+
1308ab2a 14987+struct au_hinode {
14988+ struct inode *hi_inode;
14989+ aufs_bindex_t hi_id;
14990+#ifdef CONFIG_AUFS_HINOTIFY
14991+ struct au_hinotify *hi_notify;
14992+#endif
1facf9fc 14993+
1308ab2a 14994+ /* reference to the copied-up whiteout with get/put */
14995+ struct dentry *hi_whdentry;
14996+};
dece6358 14997+
1308ab2a 14998+struct au_vdir;
14999+struct au_iinfo {
15000+ atomic_t ii_generation;
15001+ struct super_block *ii_hsb1; /* no get/put */
1facf9fc 15002+
1308ab2a 15003+ struct au_rwsem ii_rwsem;
15004+ aufs_bindex_t ii_bstart, ii_bend;
15005+ __u32 ii_higen;
15006+ struct au_hinode *ii_hinode;
15007+ struct au_vdir *ii_vdir;
15008+};
1facf9fc 15009+
1308ab2a 15010+struct au_icntnr {
15011+ struct au_iinfo iinfo;
15012+ struct inode vfs_inode;
15013+};
1facf9fc 15014+
1308ab2a 15015+/* au_pin flags */
15016+#define AuPin_DI_LOCKED 1
15017+#define AuPin_MNT_WRITE (1 << 1)
15018+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
15019+#define au_fset_pin(flags, name) { (flags) |= AuPin_##name; }
15020+#define au_fclr_pin(flags, name) { (flags) &= ~AuPin_##name; }
15021+
15022+struct au_pin {
15023+ /* input */
15024+ struct dentry *dentry;
15025+ unsigned int udba;
15026+ unsigned char lsc_di, lsc_hi, flags;
15027+ aufs_bindex_t bindex;
15028+
15029+ /* output */
15030+ struct dentry *parent;
15031+ struct au_hinode *hdir;
15032+ struct vfsmount *h_mnt;
15033+};
1facf9fc 15034+
15035+/* ---------------------------------------------------------------------- */
15036+
1308ab2a 15037+static inline struct au_iinfo *au_ii(struct inode *inode)
dece6358 15038+{
1308ab2a 15039+ struct au_iinfo *iinfo;
15040+
15041+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
15042+ if (iinfo->ii_hinode)
15043+ return iinfo;
15044+ return NULL; /* debugging bad_inode case */
dece6358 15045+}
1facf9fc 15046+
1308ab2a 15047+/* ---------------------------------------------------------------------- */
1facf9fc 15048+
1308ab2a 15049+/* inode.c */
15050+struct inode *au_igrab(struct inode *inode);
15051+int au_refresh_hinode_self(struct inode *inode, int do_attr);
15052+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
15053+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
15054+ unsigned int d_type, ino_t *ino);
15055+struct inode *au_new_inode(struct dentry *dentry, int must_new);
15056+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
15057+ struct inode *inode);
15058+int au_test_h_perm(struct inode *h_inode, int mask);
15059+int au_test_h_perm_sio(struct inode *h_inode, int mask);
1facf9fc 15060+
1308ab2a 15061+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
15062+ ino_t h_ino, unsigned int d_type, ino_t *ino)
15063+{
15064+#ifdef CONFIG_AUFS_SHWH
15065+ return au_ino(sb, bindex, h_ino, d_type, ino);
15066+#else
15067+ return 0;
15068+#endif
15069+}
1facf9fc 15070+
1308ab2a 15071+/* i_op.c */
15072+extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
1facf9fc 15073+
1308ab2a 15074+/* au_wr_dir flags */
15075+#define AuWrDir_ADD_ENTRY 1
15076+#define AuWrDir_ISDIR (1 << 1)
15077+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
15078+#define au_fset_wrdir(flags, name) { (flags) |= AuWrDir_##name; }
15079+#define au_fclr_wrdir(flags, name) { (flags) &= ~AuWrDir_##name; }
1facf9fc 15080+
1308ab2a 15081+struct au_wr_dir_args {
15082+ aufs_bindex_t force_btgt;
15083+ unsigned char flags;
15084+};
15085+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
15086+ struct au_wr_dir_args *args);
1facf9fc 15087+
1308ab2a 15088+struct dentry *au_pinned_h_parent(struct au_pin *pin);
15089+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
15090+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
15091+ unsigned int udba, unsigned char flags);
15092+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
15093+ unsigned int udba, unsigned char flags) __must_check;
15094+int au_do_pin(struct au_pin *pin) __must_check;
15095+void au_unpin(struct au_pin *pin);
1facf9fc 15096+
1308ab2a 15097+/* i_op_add.c */
15098+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
15099+ struct dentry *h_parent, int isdir);
15100+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
15101+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
15102+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
15103+ struct nameidata *nd);
15104+int aufs_link(struct dentry *src_dentry, struct inode *dir,
15105+ struct dentry *dentry);
15106+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
1facf9fc 15107+
1308ab2a 15108+/* i_op_del.c */
15109+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
15110+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
15111+ struct dentry *h_parent, int isdir);
15112+int aufs_unlink(struct inode *dir, struct dentry *dentry);
15113+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1facf9fc 15114+
1308ab2a 15115+/* i_op_ren.c */
15116+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
15117+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
15118+ struct inode *dir, struct dentry *dentry);
1facf9fc 15119+
1308ab2a 15120+/* iinfo.c */
15121+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
15122+void au_hiput(struct au_hinode *hinode);
15123+void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex);
15124+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
15125+ struct dentry *h_wh);
15126+unsigned int au_hi_flags(struct inode *inode, int isdir);
1facf9fc 15127+
1308ab2a 15128+/* hinode flags */
15129+#define AuHi_XINO 1
15130+#define AuHi_HINOTIFY (1 << 1)
15131+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
15132+#define au_fset_hi(flags, name) { (flags) |= AuHi_##name; }
15133+#define au_fclr_hi(flags, name) { (flags) &= ~AuHi_##name; }
dece6358 15134+
1308ab2a 15135+#ifndef CONFIG_AUFS_HINOTIFY
15136+#undef AuHi_HINOTIFY
15137+#define AuHi_HINOTIFY 0
15138+#endif
dece6358 15139+
1308ab2a 15140+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
15141+ struct inode *h_inode, unsigned int flags);
dece6358 15142+
1308ab2a 15143+void au_update_iigen(struct inode *inode);
15144+void au_update_brange(struct inode *inode, int do_put_zero);
1facf9fc 15145+
1308ab2a 15146+int au_iinfo_init(struct inode *inode);
15147+void au_iinfo_fin(struct inode *inode);
15148+int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
15149+
15150+/* plink.c */
15151+void au_plink_block_maintain(struct super_block *sb);
15152+#ifdef CONFIG_AUFS_DEBUG
15153+void au_plink_list(struct super_block *sb);
15154+#else
15155+static inline void au_plink_list(struct super_block *sb)
15156+{
15157+ /* nothing */
1facf9fc 15158+}
1308ab2a 15159+#endif
15160+int au_plink_test(struct inode *inode);
15161+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
15162+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
15163+ struct dentry *h_dentry);
15164+void au_plink_put(struct super_block *sb);
15165+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
15166+long au_plink_ioctl(struct file *file, unsigned int cmd);
1facf9fc 15167+
dece6358 15168+/* ---------------------------------------------------------------------- */
1facf9fc 15169+
1308ab2a 15170+/* lock subclass for iinfo */
15171+enum {
15172+ AuLsc_II_CHILD, /* child first */
15173+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hinotify */
15174+ AuLsc_II_CHILD3, /* copyup dirs */
15175+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
15176+ AuLsc_II_PARENT2,
15177+ AuLsc_II_PARENT3, /* copyup dirs */
15178+ AuLsc_II_NEW_CHILD
15179+};
15180+
1facf9fc 15181+/*
1308ab2a 15182+ * ii_read_lock_child, ii_write_lock_child,
15183+ * ii_read_lock_child2, ii_write_lock_child2,
15184+ * ii_read_lock_child3, ii_write_lock_child3,
15185+ * ii_read_lock_parent, ii_write_lock_parent,
15186+ * ii_read_lock_parent2, ii_write_lock_parent2,
15187+ * ii_read_lock_parent3, ii_write_lock_parent3,
15188+ * ii_read_lock_new_child, ii_write_lock_new_child,
1facf9fc 15189+ */
1308ab2a 15190+#define AuReadLockFunc(name, lsc) \
15191+static inline void ii_read_lock_##name(struct inode *i) \
15192+{ \
15193+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
1facf9fc 15194+}
15195+
1308ab2a 15196+#define AuWriteLockFunc(name, lsc) \
15197+static inline void ii_write_lock_##name(struct inode *i) \
15198+{ \
15199+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
1facf9fc 15200+}
15201+
1308ab2a 15202+#define AuRWLockFuncs(name, lsc) \
15203+ AuReadLockFunc(name, lsc) \
15204+ AuWriteLockFunc(name, lsc)
1facf9fc 15205+
1308ab2a 15206+AuRWLockFuncs(child, CHILD);
15207+AuRWLockFuncs(child2, CHILD2);
15208+AuRWLockFuncs(child3, CHILD3);
15209+AuRWLockFuncs(parent, PARENT);
15210+AuRWLockFuncs(parent2, PARENT2);
15211+AuRWLockFuncs(parent3, PARENT3);
15212+AuRWLockFuncs(new_child, NEW_CHILD);
1facf9fc 15213+
1308ab2a 15214+#undef AuReadLockFunc
15215+#undef AuWriteLockFunc
15216+#undef AuRWLockFuncs
1facf9fc 15217+
1308ab2a 15218+/*
15219+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
15220+ */
15221+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
1facf9fc 15222+
1308ab2a 15223+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
15224+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
15225+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
1facf9fc 15226+
1308ab2a 15227+/* ---------------------------------------------------------------------- */
1facf9fc 15228+
1308ab2a 15229+static inline unsigned int au_iigen(struct inode *inode)
15230+{
15231+ return atomic_read(&au_ii(inode)->ii_generation);
15232+}
15233+
15234+/* tiny test for inode number */
15235+/* tmpfs generation is too rough */
15236+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
15237+{
15238+ struct au_iinfo *iinfo;
15239+
15240+ iinfo = au_ii(inode);
15241+ AuRwMustAnyLock(&iinfo->ii_rwsem);
15242+ return !(iinfo->ii_hsb1 == h_inode->i_sb
15243+ && iinfo->ii_higen == h_inode->i_generation);
1facf9fc 15244+}
15245+
15246+/* ---------------------------------------------------------------------- */
15247+
1308ab2a 15248+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
15249+ aufs_bindex_t bindex)
1facf9fc 15250+{
1308ab2a 15251+ IiMustAnyLock(inode);
15252+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
15253+}
dece6358 15254+
1308ab2a 15255+static inline aufs_bindex_t au_ibstart(struct inode *inode)
15256+{
15257+ IiMustAnyLock(inode);
15258+ return au_ii(inode)->ii_bstart;
15259+}
dece6358 15260+
1308ab2a 15261+static inline aufs_bindex_t au_ibend(struct inode *inode)
15262+{
15263+ IiMustAnyLock(inode);
15264+ return au_ii(inode)->ii_bend;
15265+}
dece6358 15266+
1308ab2a 15267+static inline struct au_vdir *au_ivdir(struct inode *inode)
15268+{
15269+ IiMustAnyLock(inode);
15270+ return au_ii(inode)->ii_vdir;
1facf9fc 15271+}
15272+
1308ab2a 15273+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
1facf9fc 15274+{
1308ab2a 15275+ IiMustAnyLock(inode);
15276+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
15277+}
dece6358 15278+
1308ab2a 15279+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
15280+{
15281+ IiMustWriteLock(inode);
15282+ au_ii(inode)->ii_bend = bindex;
15283+}
dece6358 15284+
1308ab2a 15285+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
15286+{
15287+ IiMustWriteLock(inode);
15288+ au_ii(inode)->ii_vdir = vdir;
1facf9fc 15289+}
15290+
1308ab2a 15291+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
1facf9fc 15292+{
1308ab2a 15293+ IiMustAnyLock(inode);
15294+ return au_ii(inode)->ii_hinode + bindex;
15295+}
dece6358 15296+
1308ab2a 15297+/* ---------------------------------------------------------------------- */
dece6358 15298+
1308ab2a 15299+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
15300+{
15301+ if (pin)
15302+ return pin->parent;
15303+ return NULL;
1facf9fc 15304+}
15305+
1308ab2a 15306+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
1facf9fc 15307+{
1308ab2a 15308+ if (pin && pin->hdir)
15309+ return pin->hdir->hi_inode;
15310+ return NULL;
15311+}
1facf9fc 15312+
1308ab2a 15313+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
15314+{
15315+ if (pin)
15316+ return pin->hdir;
15317+ return NULL;
15318+}
dece6358 15319+
1308ab2a 15320+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
15321+{
15322+ if (pin)
15323+ pin->dentry = dentry;
15324+}
dece6358 15325+
1308ab2a 15326+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
15327+ unsigned char lflag)
15328+{
15329+ if (pin) {
15330+ /* dirty macros require brackets */
15331+ if (lflag) {
15332+ au_fset_pin(pin->flags, DI_LOCKED);
15333+ } else {
15334+ au_fclr_pin(pin->flags, DI_LOCKED);
1facf9fc 15335+ }
15336+ }
15337+}
15338+
1308ab2a 15339+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
1facf9fc 15340+{
1308ab2a 15341+ if (pin) {
15342+ dput(pin->parent);
15343+ pin->parent = dget(parent);
1facf9fc 15344+ }
1308ab2a 15345+}
1facf9fc 15346+
1308ab2a 15347+/* ---------------------------------------------------------------------- */
1facf9fc 15348+
1308ab2a 15349+#ifdef CONFIG_AUFS_HINOTIFY
15350+/* hinotify.c */
15351+int au_hin_alloc(struct au_hinode *hinode, struct inode *inode,
15352+ struct inode *h_inode);
15353+void au_hin_free(struct au_hinode *hinode);
15354+void au_hin_ctl(struct au_hinode *hinode, int do_set);
15355+void au_reset_hinotify(struct inode *inode, unsigned int flags);
1facf9fc 15356+
1308ab2a 15357+int __init au_hinotify_init(void);
15358+void au_hinotify_fin(void);
1facf9fc 15359+
1308ab2a 15360+static inline
15361+void au_hin_init(struct au_hinode *hinode, struct au_hinotify *val)
15362+{
15363+ hinode->hi_notify = val;
15364+}
1facf9fc 15365+
1308ab2a 15366+static inline void au_iigen_dec(struct inode *inode)
15367+{
15368+ atomic_dec_return(&au_ii(inode)->ii_generation);
15369+}
1facf9fc 15370+
1308ab2a 15371+#else
15372+static inline
15373+int au_hin_alloc(struct au_hinode *hinode __maybe_unused,
15374+ struct inode *inode __maybe_unused,
15375+ struct inode *h_inode __maybe_unused)
15376+{
15377+ return -EOPNOTSUPP;
15378+}
1facf9fc 15379+
1308ab2a 15380+static inline void au_hin_free(struct au_hinode *hinode __maybe_unused)
15381+{
15382+ /* nothing */
15383+}
1facf9fc 15384+
1308ab2a 15385+static inline void au_hin_ctl(struct au_hinode *hinode __maybe_unused,
15386+ int do_set __maybe_unused)
15387+{
15388+ /* nothing */
15389+}
1facf9fc 15390+
1308ab2a 15391+static inline void au_reset_hinotify(struct inode *inode __maybe_unused,
15392+ unsigned int flags __maybe_unused)
15393+{
15394+ /* nothing */
15395+}
1facf9fc 15396+
1308ab2a 15397+static inline int au_hinotify_init(void)
15398+{
15399+ return 0;
15400+}
1facf9fc 15401+
1308ab2a 15402+#define au_hinotify_fin() do {} while (0)
1facf9fc 15403+
1308ab2a 15404+static inline
15405+void au_hin_init(struct au_hinode *hinode __maybe_unused,
15406+ struct au_hinotify *val __maybe_unused)
15407+{
15408+ /* empty */
15409+}
15410+#endif /* CONFIG_AUFS_HINOTIFY */
dece6358 15411+
1308ab2a 15412+static inline void au_hin_suspend(struct au_hinode *hdir)
15413+{
15414+ au_hin_ctl(hdir, /*do_set*/0);
1facf9fc 15415+}
dece6358 15416+
1308ab2a 15417+static inline void au_hin_resume(struct au_hinode *hdir)
15418+{
15419+ au_hin_ctl(hdir, /*do_set*/1);
15420+}
1facf9fc 15421+
1308ab2a 15422+static inline void au_hin_imtx_lock(struct au_hinode *hdir)
15423+{
15424+ mutex_lock(&hdir->hi_inode->i_mutex);
15425+ au_hin_suspend(hdir);
15426+}
1facf9fc 15427+
1308ab2a 15428+static inline void au_hin_imtx_lock_nested(struct au_hinode *hdir,
15429+ unsigned int sc __maybe_unused)
15430+{
15431+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
15432+ au_hin_suspend(hdir);
15433+}
1facf9fc 15434+
1308ab2a 15435+static inline void au_hin_imtx_unlock(struct au_hinode *hdir)
15436+{
15437+ au_hin_resume(hdir);
15438+ mutex_unlock(&hdir->hi_inode->i_mutex);
15439+}
1facf9fc 15440+
1308ab2a 15441+#endif /* __KERNEL__ */
15442+#endif /* __AUFS_INODE_H__ */
15443diff -uprN -x .git linux-2.6.31/fs/aufs/ioctl.c aufs2-2.6.git/fs/aufs/ioctl.c
15444--- linux-2.6.31/fs/aufs/ioctl.c 1970-01-01 00:00:00.000000000 +0000
15445+++ aufs2-2.6.git/fs/aufs/ioctl.c 2009-09-21 21:49:23.404940801 +0000
15446@@ -0,0 +1,47 @@
15447+/*
15448+ * Copyright (C) 2005-2009 Junjiro R. Okajima
15449+ *
15450+ * This program, aufs is free software; you can redistribute it and/or modify
15451+ * it under the terms of the GNU General Public License as published by
15452+ * the Free Software Foundation; either version 2 of the License, or
15453+ * (at your option) any later version.
15454+ *
15455+ * This program is distributed in the hope that it will be useful,
15456+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15457+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15458+ * GNU General Public License for more details.
15459+ *
15460+ * You should have received a copy of the GNU General Public License
15461+ * along with this program; if not, write to the Free Software
15462+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
15463+ */
1facf9fc 15464+
1308ab2a 15465+/*
15466+ * ioctl
15467+ * plink-management and readdir in userspace.
15468+ */
1facf9fc 15469+
1308ab2a 15470+#include "aufs.h"
1facf9fc 15471+
1308ab2a 15472+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
15473+{
15474+ long err;
1facf9fc 15475+
1308ab2a 15476+ switch (cmd) {
15477+ case AUFS_CTL_PLINK_MAINT:
15478+ case AUFS_CTL_PLINK_CLEAN:
15479+ err = au_plink_ioctl(file, cmd);
15480+ break;
1facf9fc 15481+
1308ab2a 15482+ case AUFS_CTL_RDU:
15483+ case AUFS_CTL_RDU_INO:
15484+ err = au_rdu_ioctl(file, cmd, arg);
15485+ break;
1facf9fc 15486+
1308ab2a 15487+ default:
15488+ err = -EINVAL;
15489+ }
15490+
15491+ AuTraceErr(err);
15492+ return err;
15493+}
15494diff -uprN -x .git linux-2.6.31/fs/aufs/loop.c aufs2-2.6.git/fs/aufs/loop.c
15495--- linux-2.6.31/fs/aufs/loop.c 1970-01-01 00:00:00.000000000 +0000
15496+++ aufs2-2.6.git/fs/aufs/loop.c 2009-09-21 21:49:23.404940801 +0000
dece6358 15497@@ -0,0 +1,55 @@
1facf9fc 15498+/*
15499+ * Copyright (C) 2005-2009 Junjiro R. Okajima
15500+ *
15501+ * This program, aufs is free software; you can redistribute it and/or modify
15502+ * it under the terms of the GNU General Public License as published by
15503+ * the Free Software Foundation; either version 2 of the License, or
15504+ * (at your option) any later version.
dece6358
AM
15505+ *
15506+ * This program is distributed in the hope that it will be useful,
15507+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15508+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15509+ * GNU General Public License for more details.
15510+ *
15511+ * You should have received a copy of the GNU General Public License
15512+ * along with this program; if not, write to the Free Software
15513+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 15514+ */
15515+
15516+/*
15517+ * support for loopback block device as a branch
15518+ */
15519+
15520+#include <linux/loop.h>
15521+#include "aufs.h"
15522+
15523+/*
15524+ * test if two lower dentries have overlapping branches.
15525+ */
15526+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_d1,
15527+ struct dentry *h_d2)
15528+{
15529+ struct inode *h_inode;
15530+ struct loop_device *l;
15531+
15532+ h_inode = h_d1->d_inode;
15533+ if (MAJOR(h_inode->i_sb->s_dev) != LOOP_MAJOR)
15534+ return 0;
15535+
15536+ l = h_inode->i_sb->s_bdev->bd_disk->private_data;
15537+ h_d1 = l->lo_backing_file->f_dentry;
15538+ /* h_d1 can be local NFS. in this case aufs cannot detect the loop */
15539+ if (unlikely(h_d1->d_sb == sb))
15540+ return 1;
15541+ return !!au_test_subdir(h_d1, h_d2);
15542+}
15543+
15544+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
15545+int au_test_loopback_kthread(void)
15546+{
15547+ const char c = current->comm[4];
15548+
15549+ return current->mm == NULL
15550+ && '0' <= c && c <= '9'
15551+ && strncmp(current->comm, "loop", 4) == 0;
15552+}
1308ab2a 15553diff -uprN -x .git linux-2.6.31/fs/aufs/loop.h aufs2-2.6.git/fs/aufs/loop.h
15554--- linux-2.6.31/fs/aufs/loop.h 1970-01-01 00:00:00.000000000 +0000
15555+++ aufs2-2.6.git/fs/aufs/loop.h 2009-09-21 21:49:23.404940801 +0000
dece6358 15556@@ -0,0 +1,51 @@
1facf9fc 15557+/*
15558+ * Copyright (C) 2005-2009 Junjiro R. Okajima
15559+ *
15560+ * This program, aufs is free software; you can redistribute it and/or modify
15561+ * it under the terms of the GNU General Public License as published by
15562+ * the Free Software Foundation; either version 2 of the License, or
15563+ * (at your option) any later version.
dece6358
AM
15564+ *
15565+ * This program is distributed in the hope that it will be useful,
15566+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15567+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15568+ * GNU General Public License for more details.
15569+ *
15570+ * You should have received a copy of the GNU General Public License
15571+ * along with this program; if not, write to the Free Software
15572+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 15573+ */
15574+
15575+/*
15576+ * support for loopback mount as a branch
15577+ */
15578+
15579+#ifndef __AUFS_LOOP_H__
15580+#define __AUFS_LOOP_H__
15581+
15582+#ifdef __KERNEL__
15583+
dece6358
AM
15584+struct dentry;
15585+struct super_block;
1facf9fc 15586+
15587+#ifdef CONFIG_AUFS_BDEV_LOOP
15588+/* loop.c */
15589+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_d1,
15590+ struct dentry *h_d2);
15591+int au_test_loopback_kthread(void);
15592+#else
15593+static inline
15594+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_d1,
15595+ struct dentry *h_d2)
15596+{
15597+ return 0;
15598+}
15599+
15600+static inline int au_test_loopback_kthread(void)
15601+{
15602+ return 0;
15603+}
15604+#endif /* BLK_DEV_LOOP */
15605+
15606+#endif /* __KERNEL__ */
15607+#endif /* __AUFS_LOOP_H__ */
1308ab2a 15608diff -uprN -x .git linux-2.6.31/fs/aufs/magic.mk aufs2-2.6.git/fs/aufs/magic.mk
15609--- linux-2.6.31/fs/aufs/magic.mk 1970-01-01 00:00:00.000000000 +0000
15610+++ aufs2-2.6.git/fs/aufs/magic.mk 2009-09-21 21:49:23.404940801 +0000
dece6358 15611@@ -0,0 +1,52 @@
1facf9fc 15612+
15613+# defined in ${srctree}/fs/fuse/inode.c
15614+# tristate
15615+ifdef CONFIG_FUSE_FS
15616+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
15617+endif
15618+
15619+# defined in ${srctree}/fs/ocfs2/ocfs2_fs.h
15620+# tristate
15621+ifdef CONFIG_OCFS2_FS
15622+ccflags-y += -DOCFS2_SUPER_MAGIC=0x7461636f
15623+endif
15624+
15625+# defined in ${srctree}/fs/ocfs2/dlm/userdlm.h
15626+# tristate
15627+ifdef CONFIG_OCFS2_FS_O2CB
15628+ccflags-y += -DDLMFS_MAGIC=0x76a9f425
15629+endif
15630+
15631+# defined in ${srctree}/fs/ramfs/inode.c
15632+# always true
15633+ccflags-y += -DRAMFS_MAGIC=0x858458f6
15634+
15635+# defined in ${srctree}/fs/cifs/cifsfs.c
15636+# tristate
15637+ifdef CONFIG_CIFS_FS
15638+ccflags-y += -DCIFS_MAGIC_NUMBER=0xFF534D42
15639+endif
15640+
15641+# defined in ${srctree}/fs/xfs/xfs_sb.h
15642+# tristate
15643+ifdef CONFIG_XFS_FS
15644+ccflags-y += -DXFS_SB_MAGIC=0x58465342
15645+endif
15646+
15647+# defined in ${srctree}/fs/configfs/mount.c
15648+# tristate
15649+ifdef CONFIG_CONFIGFS_FS
15650+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
15651+endif
15652+
15653+# defined in ${srctree}/fs/9p/v9fs.h
15654+# tristate
15655+ifdef CONFIG_9P_FS
15656+ccflags-y += -DV9FS_MAGIC=0x01021997
15657+endif
15658+
15659+# defined in ${srctree}/fs/ubifs/ubifs.h
15660+# tristate
15661+ifdef CONFIG_UBIFS_FS
15662+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
15663+endif
1308ab2a 15664diff -uprN -x .git linux-2.6.31/fs/aufs/module.c aufs2-2.6.git/fs/aufs/module.c
15665--- linux-2.6.31/fs/aufs/module.c 1970-01-01 00:00:00.000000000 +0000
15666+++ aufs2-2.6.git/fs/aufs/module.c 2009-09-21 21:49:23.404940801 +0000
dece6358 15667@@ -0,0 +1,173 @@
1facf9fc 15668+/*
15669+ * Copyright (C) 2005-2009 Junjiro R. Okajima
15670+ *
15671+ * This program, aufs is free software; you can redistribute it and/or modify
15672+ * it under the terms of the GNU General Public License as published by
15673+ * the Free Software Foundation; either version 2 of the License, or
15674+ * (at your option) any later version.
dece6358
AM
15675+ *
15676+ * This program is distributed in the hope that it will be useful,
15677+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15678+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15679+ * GNU General Public License for more details.
15680+ *
15681+ * You should have received a copy of the GNU General Public License
15682+ * along with this program; if not, write to the Free Software
15683+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 15684+ */
15685+
15686+/*
15687+ * module global variables and operations
15688+ */
15689+
15690+#include <linux/module.h>
15691+#include <linux/seq_file.h>
15692+#include "aufs.h"
15693+
15694+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
15695+{
15696+ if (new_sz <= nused)
15697+ return p;
15698+
15699+ p = krealloc(p, new_sz, gfp);
15700+ if (p)
15701+ memset(p + nused, 0, new_sz - nused);
15702+ return p;
15703+}
15704+
15705+/* ---------------------------------------------------------------------- */
15706+
15707+/*
15708+ * aufs caches
15709+ */
15710+struct kmem_cache *au_cachep[AuCache_Last];
15711+static int __init au_cache_init(void)
15712+{
15713+ au_cachep[AuCache_DINFO] = AuCache(au_dinfo);
15714+ if (au_cachep[AuCache_DINFO])
15715+ au_cachep[AuCache_ICNTNR] = AuCache(au_icntnr);
15716+ if (au_cachep[AuCache_ICNTNR])
15717+ au_cachep[AuCache_FINFO] = AuCache(au_finfo);
15718+ if (au_cachep[AuCache_FINFO])
15719+ au_cachep[AuCache_VDIR] = AuCache(au_vdir);
15720+ if (au_cachep[AuCache_VDIR])
15721+ au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
15722+ if (au_cachep[AuCache_DEHSTR])
15723+ return 0;
15724+
15725+ return -ENOMEM;
15726+}
15727+
15728+static void au_cache_fin(void)
15729+{
15730+ int i;
15731+ for (i = 0; i < AuCache_Last; i++)
15732+ if (au_cachep[i]) {
15733+ kmem_cache_destroy(au_cachep[i]);
15734+ au_cachep[i] = NULL;
15735+ }
15736+}
15737+
15738+/* ---------------------------------------------------------------------- */
15739+
15740+int au_dir_roflags;
15741+
15742+/*
15743+ * functions for module interface.
15744+ */
15745+MODULE_LICENSE("GPL");
15746+/* MODULE_LICENSE("GPL v2"); */
dece6358 15747+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 15748+MODULE_DESCRIPTION(AUFS_NAME
15749+ " -- Advanced multi layered unification filesystem");
15750+MODULE_VERSION(AUFS_VERSION);
15751+
15752+/* it should be 'byte', but param_set_byte() prints it by "%c" */
15753+short aufs_nwkq = AUFS_NWKQ_DEF;
15754+MODULE_PARM_DESC(nwkq, "the number of workqueue thread, " AUFS_WKQ_NAME);
15755+module_param_named(nwkq, aufs_nwkq, short, S_IRUGO);
15756+
15757+/* this module parameter has no meaning when SYSFS is disabled */
15758+int sysaufs_brs = 1;
15759+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
15760+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
15761+
15762+/* ---------------------------------------------------------------------- */
15763+
15764+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
15765+
15766+int au_seq_path(struct seq_file *seq, struct path *path)
15767+{
15768+ return seq_path(seq, path, au_esc_chars);
15769+}
15770+
15771+/* ---------------------------------------------------------------------- */
15772+
15773+static int __init aufs_init(void)
15774+{
15775+ int err, i;
15776+ char *p;
15777+
15778+ p = au_esc_chars;
15779+ for (i = 1; i <= ' '; i++)
15780+ *p++ = i;
15781+ *p++ = '\\';
15782+ *p++ = '\x7f';
15783+ *p = 0;
15784+
15785+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
15786+
15787+ sysaufs_brs_init();
15788+ au_debug_init();
15789+
15790+ err = -EINVAL;
15791+ if (unlikely(aufs_nwkq <= 0))
15792+ goto out;
15793+
15794+ err = sysaufs_init();
15795+ if (unlikely(err))
15796+ goto out;
15797+ err = au_wkq_init();
15798+ if (unlikely(err))
15799+ goto out_sysaufs;
15800+ err = au_hinotify_init();
15801+ if (unlikely(err))
15802+ goto out_wkq;
15803+ err = au_sysrq_init();
15804+ if (unlikely(err))
15805+ goto out_hin;
15806+ err = au_cache_init();
15807+ if (unlikely(err))
15808+ goto out_sysrq;
15809+ err = register_filesystem(&aufs_fs_type);
15810+ if (unlikely(err))
15811+ goto out_cache;
15812+ pr_info(AUFS_NAME " " AUFS_VERSION "\n");
15813+ goto out; /* success */
15814+
15815+ out_cache:
15816+ au_cache_fin();
15817+ out_sysrq:
15818+ au_sysrq_fin();
15819+ out_hin:
15820+ au_hinotify_fin();
15821+ out_wkq:
15822+ au_wkq_fin();
15823+ out_sysaufs:
15824+ sysaufs_fin();
15825+ out:
15826+ return err;
15827+}
15828+
15829+static void __exit aufs_exit(void)
15830+{
15831+ unregister_filesystem(&aufs_fs_type);
15832+ au_cache_fin();
15833+ au_sysrq_fin();
15834+ au_hinotify_fin();
15835+ au_wkq_fin();
15836+ sysaufs_fin();
15837+}
15838+
15839+module_init(aufs_init);
15840+module_exit(aufs_exit);
1308ab2a 15841diff -uprN -x .git linux-2.6.31/fs/aufs/module.h aufs2-2.6.git/fs/aufs/module.h
15842--- linux-2.6.31/fs/aufs/module.h 1970-01-01 00:00:00.000000000 +0000
15843+++ aufs2-2.6.git/fs/aufs/module.h 2009-09-21 21:49:23.404940801 +0000
dece6358 15844@@ -0,0 +1,78 @@
1facf9fc 15845+/*
15846+ * Copyright (C) 2005-2009 Junjiro R. Okajima
15847+ *
15848+ * This program, aufs is free software; you can redistribute it and/or modify
15849+ * it under the terms of the GNU General Public License as published by
15850+ * the Free Software Foundation; either version 2 of the License, or
15851+ * (at your option) any later version.
dece6358
AM
15852+ *
15853+ * This program is distributed in the hope that it will be useful,
15854+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15855+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15856+ * GNU General Public License for more details.
15857+ *
15858+ * You should have received a copy of the GNU General Public License
15859+ * along with this program; if not, write to the Free Software
15860+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 15861+ */
15862+
15863+/*
15864+ * module initialization and module-global
15865+ */
15866+
15867+#ifndef __AUFS_MODULE_H__
15868+#define __AUFS_MODULE_H__
15869+
15870+#ifdef __KERNEL__
15871+
15872+#include <linux/slab.h>
15873+
dece6358
AM
15874+struct path;
15875+struct seq_file;
15876+
1facf9fc 15877+/* module parameters */
15878+extern short aufs_nwkq;
15879+extern int sysaufs_brs;
15880+
15881+/* ---------------------------------------------------------------------- */
15882+
15883+extern int au_dir_roflags;
15884+
15885+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
15886+int au_seq_path(struct seq_file *seq, struct path *path);
15887+
15888+/* ---------------------------------------------------------------------- */
15889+
15890+/* kmem cache */
15891+enum {
15892+ AuCache_DINFO,
15893+ AuCache_ICNTNR,
15894+ AuCache_FINFO,
15895+ AuCache_VDIR,
15896+ AuCache_DEHSTR,
15897+#ifdef CONFIG_AUFS_HINOTIFY
15898+ AuCache_HINOTIFY,
15899+#endif
15900+ AuCache_Last
15901+};
15902+
15903+#define AuCache(type) KMEM_CACHE(type, SLAB_RECLAIM_ACCOUNT)
15904+
15905+extern struct kmem_cache *au_cachep[];
15906+
15907+#define AuCacheFuncs(name, index) \
15908+static inline void *au_cache_alloc_##name(void) \
15909+{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
15910+static inline void au_cache_free_##name(void *p) \
15911+{ kmem_cache_free(au_cachep[AuCache_##index], p); }
15912+
15913+AuCacheFuncs(dinfo, DINFO);
15914+AuCacheFuncs(icntnr, ICNTNR);
15915+AuCacheFuncs(finfo, FINFO);
15916+AuCacheFuncs(vdir, VDIR);
15917+AuCacheFuncs(dehstr, DEHSTR);
15918+
15919+/* ---------------------------------------------------------------------- */
15920+
15921+#endif /* __KERNEL__ */
15922+#endif /* __AUFS_MODULE_H__ */
1308ab2a 15923diff -uprN -x .git linux-2.6.31/fs/aufs/opts.c aufs2-2.6.git/fs/aufs/opts.c
15924--- linux-2.6.31/fs/aufs/opts.c 1970-01-01 00:00:00.000000000 +0000
15925+++ aufs2-2.6.git/fs/aufs/opts.c 2009-09-21 21:49:23.404940801 +0000
15926@@ -0,0 +1,1546 @@
1facf9fc 15927+/*
15928+ * Copyright (C) 2005-2009 Junjiro R. Okajima
15929+ *
15930+ * This program, aufs is free software; you can redistribute it and/or modify
15931+ * it under the terms of the GNU General Public License as published by
15932+ * the Free Software Foundation; either version 2 of the License, or
15933+ * (at your option) any later version.
dece6358
AM
15934+ *
15935+ * This program is distributed in the hope that it will be useful,
15936+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15937+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15938+ * GNU General Public License for more details.
15939+ *
15940+ * You should have received a copy of the GNU General Public License
15941+ * along with this program; if not, write to the Free Software
15942+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 15943+ */
15944+
15945+/*
15946+ * mount options/flags
15947+ */
15948+
dece6358
AM
15949+#include <linux/file.h>
15950+#include <linux/namei.h>
1facf9fc 15951+#include <linux/types.h> /* a distribution requires */
15952+#include <linux/parser.h>
15953+#include "aufs.h"
15954+
15955+/* ---------------------------------------------------------------------- */
15956+
15957+enum {
15958+ Opt_br,
15959+ Opt_add, Opt_del, Opt_mod, Opt_reorder, Opt_append, Opt_prepend,
15960+ Opt_idel, Opt_imod, Opt_ireorder,
15961+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash, Opt_rendir,
dece6358 15962+ Opt_rdblk_def, Opt_rdhash_def,
1facf9fc 15963+ Opt_xino, Opt_zxino, Opt_noxino,
15964+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
15965+ Opt_trunc_xino_path, Opt_itrunc_xino,
15966+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 15967+ Opt_shwh, Opt_noshwh,
1facf9fc 15968+ Opt_plink, Opt_noplink, Opt_list_plink,
15969+ Opt_udba,
15970+ /* Opt_lock, Opt_unlock, */
15971+ Opt_cmd, Opt_cmd_args,
15972+ Opt_diropq_a, Opt_diropq_w,
15973+ Opt_warn_perm, Opt_nowarn_perm,
15974+ Opt_wbr_copyup, Opt_wbr_create,
15975+ Opt_refrof, Opt_norefrof,
15976+ Opt_verbose, Opt_noverbose,
15977+ Opt_sum, Opt_nosum, Opt_wsum,
15978+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
15979+};
15980+
15981+static match_table_t options = {
15982+ {Opt_br, "br=%s"},
15983+ {Opt_br, "br:%s"},
15984+
15985+ {Opt_add, "add=%d:%s"},
15986+ {Opt_add, "add:%d:%s"},
15987+ {Opt_add, "ins=%d:%s"},
15988+ {Opt_add, "ins:%d:%s"},
15989+ {Opt_append, "append=%s"},
15990+ {Opt_append, "append:%s"},
15991+ {Opt_prepend, "prepend=%s"},
15992+ {Opt_prepend, "prepend:%s"},
15993+
15994+ {Opt_del, "del=%s"},
15995+ {Opt_del, "del:%s"},
15996+ /* {Opt_idel, "idel:%d"}, */
15997+ {Opt_mod, "mod=%s"},
15998+ {Opt_mod, "mod:%s"},
15999+ /* {Opt_imod, "imod:%d:%s"}, */
16000+
16001+ {Opt_dirwh, "dirwh=%d"},
16002+
16003+ {Opt_xino, "xino=%s"},
16004+ {Opt_noxino, "noxino"},
16005+ {Opt_trunc_xino, "trunc_xino"},
16006+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
16007+ {Opt_notrunc_xino, "notrunc_xino"},
16008+ {Opt_trunc_xino_path, "trunc_xino=%s"},
16009+ {Opt_itrunc_xino, "itrunc_xino=%d"},
16010+ /* {Opt_zxino, "zxino=%s"}, */
16011+ {Opt_trunc_xib, "trunc_xib"},
16012+ {Opt_notrunc_xib, "notrunc_xib"},
16013+
16014+ {Opt_plink, "plink"},
16015+ {Opt_noplink, "noplink"},
16016+#ifdef CONFIG_AUFS_DEBUG
16017+ {Opt_list_plink, "list_plink"},
16018+#endif
16019+
16020+ {Opt_udba, "udba=%s"},
16021+
16022+ {Opt_diropq_a, "diropq=always"},
16023+ {Opt_diropq_a, "diropq=a"},
16024+ {Opt_diropq_w, "diropq=whiteouted"},
16025+ {Opt_diropq_w, "diropq=w"},
16026+
16027+ {Opt_warn_perm, "warn_perm"},
16028+ {Opt_nowarn_perm, "nowarn_perm"},
16029+
16030+ /* keep them temporary */
16031+ {Opt_ignore_silent, "coo=%s"},
16032+ {Opt_ignore_silent, "nodlgt"},
16033+ {Opt_ignore_silent, "nodirperm1"},
1facf9fc 16034+ {Opt_ignore_silent, "clean_plink"},
16035+
dece6358
AM
16036+#ifdef CONFIG_AUFS_SHWH
16037+ {Opt_shwh, "shwh"},
16038+#endif
16039+ {Opt_noshwh, "noshwh"},
16040+
1facf9fc 16041+ {Opt_rendir, "rendir=%d"},
16042+
16043+ {Opt_refrof, "refrof"},
16044+ {Opt_norefrof, "norefrof"},
16045+
16046+ {Opt_verbose, "verbose"},
16047+ {Opt_verbose, "v"},
16048+ {Opt_noverbose, "noverbose"},
16049+ {Opt_noverbose, "quiet"},
16050+ {Opt_noverbose, "q"},
16051+ {Opt_noverbose, "silent"},
16052+
16053+ {Opt_sum, "sum"},
16054+ {Opt_nosum, "nosum"},
16055+ {Opt_wsum, "wsum"},
16056+
16057+ {Opt_rdcache, "rdcache=%d"},
16058+ {Opt_rdblk, "rdblk=%d"},
dece6358 16059+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 16060+ {Opt_rdhash, "rdhash=%d"},
dece6358 16061+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 16062+
16063+ {Opt_wbr_create, "create=%s"},
16064+ {Opt_wbr_create, "create_policy=%s"},
16065+ {Opt_wbr_copyup, "cpup=%s"},
16066+ {Opt_wbr_copyup, "copyup=%s"},
16067+ {Opt_wbr_copyup, "copyup_policy=%s"},
16068+
16069+ /* internal use for the scripts */
16070+ {Opt_ignore_silent, "si=%s"},
16071+
16072+ {Opt_br, "dirs=%s"},
16073+ {Opt_ignore, "debug=%d"},
16074+ {Opt_ignore, "delete=whiteout"},
16075+ {Opt_ignore, "delete=all"},
16076+ {Opt_ignore, "imap=%s"},
16077+
1308ab2a 16078+ /* temporary workaround, due to old mount(8)? */
16079+ {Opt_ignore_silent, "relatime"},
16080+
1facf9fc 16081+ {Opt_err, NULL}
16082+};
16083+
16084+/* ---------------------------------------------------------------------- */
16085+
16086+static const char *au_parser_pattern(int val, struct match_token *token)
16087+{
16088+ while (token->pattern) {
16089+ if (token->token == val)
16090+ return token->pattern;
16091+ token++;
16092+ }
16093+ BUG();
16094+ return "??";
16095+}
16096+
16097+/* ---------------------------------------------------------------------- */
16098+
16099+static match_table_t brperms = {
16100+ {AuBrPerm_RO, AUFS_BRPERM_RO},
16101+ {AuBrPerm_RR, AUFS_BRPERM_RR},
16102+ {AuBrPerm_RW, AUFS_BRPERM_RW},
16103+
16104+ {AuBrPerm_ROWH, AUFS_BRPERM_ROWH},
16105+ {AuBrPerm_RRWH, AUFS_BRPERM_RRWH},
16106+ {AuBrPerm_RWNoLinkWH, AUFS_BRPERM_RWNLWH},
16107+
16108+ {AuBrPerm_ROWH, "nfsro"},
16109+ {AuBrPerm_RO, NULL}
16110+};
16111+
16112+static int br_perm_val(char *perm)
16113+{
16114+ int val;
16115+ substring_t args[MAX_OPT_ARGS];
16116+
16117+ val = match_token(perm, brperms, args);
16118+ return val;
16119+}
16120+
16121+const char *au_optstr_br_perm(int brperm)
16122+{
16123+ return au_parser_pattern(brperm, (void *)brperms);
16124+}
16125+
16126+/* ---------------------------------------------------------------------- */
16127+
16128+static match_table_t udbalevel = {
16129+ {AuOpt_UDBA_REVAL, "reval"},
16130+ {AuOpt_UDBA_NONE, "none"},
16131+#ifdef CONFIG_AUFS_HINOTIFY
16132+ {AuOpt_UDBA_HINOTIFY, "inotify"},
16133+#endif
16134+ {-1, NULL}
16135+};
16136+
16137+static int udba_val(char *str)
16138+{
16139+ substring_t args[MAX_OPT_ARGS];
16140+
16141+ return match_token(str, udbalevel, args);
16142+}
16143+
16144+const char *au_optstr_udba(int udba)
16145+{
16146+ return au_parser_pattern(udba, (void *)udbalevel);
16147+}
16148+
16149+/* ---------------------------------------------------------------------- */
16150+
16151+static match_table_t au_wbr_create_policy = {
16152+ {AuWbrCreate_TDP, "tdp"},
16153+ {AuWbrCreate_TDP, "top-down-parent"},
16154+ {AuWbrCreate_RR, "rr"},
16155+ {AuWbrCreate_RR, "round-robin"},
16156+ {AuWbrCreate_MFS, "mfs"},
16157+ {AuWbrCreate_MFS, "most-free-space"},
16158+ {AuWbrCreate_MFSV, "mfs:%d"},
16159+ {AuWbrCreate_MFSV, "most-free-space:%d"},
16160+
16161+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
16162+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
16163+ {AuWbrCreate_PMFS, "pmfs"},
16164+ {AuWbrCreate_PMFSV, "pmfs:%d"},
16165+
16166+ {-1, NULL}
16167+};
16168+
dece6358
AM
16169+/*
16170+ * cf. linux/lib/parser.c and cmdline.c
16171+ * gave up calling memparse() since it uses simple_strtoull() instead of
16172+ * strict_...().
16173+ */
1facf9fc 16174+static int au_match_ull(substring_t *s, unsigned long long *result)
16175+{
16176+ int err;
16177+ unsigned int len;
16178+ char a[32];
16179+
16180+ err = -ERANGE;
16181+ len = s->to - s->from;
16182+ if (len + 1 <= sizeof(a)) {
16183+ memcpy(a, s->from, len);
16184+ a[len] = '\0';
16185+ err = strict_strtoull(a, 0, result);
16186+ }
16187+ return err;
16188+}
16189+
16190+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
16191+ struct au_opt_wbr_create *create)
16192+{
16193+ int err;
16194+ unsigned long long ull;
16195+
16196+ err = 0;
16197+ if (!au_match_ull(arg, &ull))
16198+ create->mfsrr_watermark = ull;
16199+ else {
16200+ AuErr("bad integer in %s\n", str);
16201+ err = -EINVAL;
16202+ }
16203+
16204+ return err;
16205+}
16206+
16207+static int au_wbr_mfs_sec(substring_t *arg, char *str,
16208+ struct au_opt_wbr_create *create)
16209+{
16210+ int n, err;
16211+
16212+ err = 0;
16213+ if (!match_int(arg, &n) && 0 <= n)
16214+ create->mfs_second = n;
16215+ else {
16216+ AuErr("bad integer in %s\n", str);
16217+ err = -EINVAL;
16218+ }
16219+
16220+ return err;
16221+}
16222+
16223+static int au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
16224+{
16225+ int err, e;
16226+ substring_t args[MAX_OPT_ARGS];
16227+
16228+ err = match_token(str, au_wbr_create_policy, args);
16229+ create->wbr_create = err;
16230+ switch (err) {
16231+ case AuWbrCreate_MFSRRV:
16232+ e = au_wbr_mfs_wmark(&args[0], str, create);
16233+ if (!e)
16234+ e = au_wbr_mfs_sec(&args[1], str, create);
16235+ if (unlikely(e))
16236+ err = e;
16237+ break;
16238+ case AuWbrCreate_MFSRR:
16239+ e = au_wbr_mfs_wmark(&args[0], str, create);
16240+ if (unlikely(e)) {
16241+ err = e;
16242+ break;
16243+ }
16244+ /*FALLTHROUGH*/
16245+ case AuWbrCreate_MFS:
16246+ case AuWbrCreate_PMFS:
16247+ create->mfs_second = AUFS_MFS_SECOND_DEF;
16248+ break;
16249+ case AuWbrCreate_MFSV:
16250+ case AuWbrCreate_PMFSV:
16251+ e = au_wbr_mfs_sec(&args[0], str, create);
16252+ if (unlikely(e))
16253+ err = e;
16254+ break;
16255+ }
16256+
16257+ return err;
16258+}
16259+
16260+const char *au_optstr_wbr_create(int wbr_create)
16261+{
16262+ return au_parser_pattern(wbr_create, (void *)au_wbr_create_policy);
16263+}
16264+
16265+static match_table_t au_wbr_copyup_policy = {
16266+ {AuWbrCopyup_TDP, "tdp"},
16267+ {AuWbrCopyup_TDP, "top-down-parent"},
16268+ {AuWbrCopyup_BUP, "bup"},
16269+ {AuWbrCopyup_BUP, "bottom-up-parent"},
16270+ {AuWbrCopyup_BU, "bu"},
16271+ {AuWbrCopyup_BU, "bottom-up"},
16272+ {-1, NULL}
16273+};
16274+
16275+static int au_wbr_copyup_val(char *str)
16276+{
16277+ substring_t args[MAX_OPT_ARGS];
16278+
16279+ return match_token(str, au_wbr_copyup_policy, args);
16280+}
16281+
16282+const char *au_optstr_wbr_copyup(int wbr_copyup)
16283+{
16284+ return au_parser_pattern(wbr_copyup, (void *)au_wbr_copyup_policy);
16285+}
16286+
16287+/* ---------------------------------------------------------------------- */
16288+
16289+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
16290+
16291+static void dump_opts(struct au_opts *opts)
16292+{
16293+#ifdef CONFIG_AUFS_DEBUG
16294+ /* reduce stack space */
16295+ union {
16296+ struct au_opt_add *add;
16297+ struct au_opt_del *del;
16298+ struct au_opt_mod *mod;
16299+ struct au_opt_xino *xino;
16300+ struct au_opt_xino_itrunc *xino_itrunc;
16301+ struct au_opt_wbr_create *create;
16302+ } u;
16303+ struct au_opt *opt;
16304+
16305+ opt = opts->opt;
16306+ while (opt->type != Opt_tail) {
16307+ switch (opt->type) {
16308+ case Opt_add:
16309+ u.add = &opt->add;
16310+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
16311+ u.add->bindex, u.add->pathname, u.add->perm,
16312+ u.add->path.dentry);
16313+ break;
16314+ case Opt_del:
16315+ case Opt_idel:
16316+ u.del = &opt->del;
16317+ AuDbg("del {%s, %p}\n",
16318+ u.del->pathname, u.del->h_path.dentry);
16319+ break;
16320+ case Opt_mod:
16321+ case Opt_imod:
16322+ u.mod = &opt->mod;
16323+ AuDbg("mod {%s, 0x%x, %p}\n",
16324+ u.mod->path, u.mod->perm, u.mod->h_root);
16325+ break;
16326+ case Opt_append:
16327+ u.add = &opt->add;
16328+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
16329+ u.add->bindex, u.add->pathname, u.add->perm,
16330+ u.add->path.dentry);
16331+ break;
16332+ case Opt_prepend:
16333+ u.add = &opt->add;
16334+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
16335+ u.add->bindex, u.add->pathname, u.add->perm,
16336+ u.add->path.dentry);
16337+ break;
16338+ case Opt_dirwh:
16339+ AuDbg("dirwh %d\n", opt->dirwh);
16340+ break;
16341+ case Opt_rdcache:
16342+ AuDbg("rdcache %d\n", opt->rdcache);
16343+ break;
16344+ case Opt_rdblk:
16345+ AuDbg("rdblk %u\n", opt->rdblk);
16346+ break;
dece6358
AM
16347+ case Opt_rdblk_def:
16348+ AuDbg("rdblk_def\n");
16349+ break;
1facf9fc 16350+ case Opt_rdhash:
16351+ AuDbg("rdhash %u\n", opt->rdhash);
16352+ break;
dece6358
AM
16353+ case Opt_rdhash_def:
16354+ AuDbg("rdhash_def\n");
16355+ break;
1facf9fc 16356+ case Opt_xino:
16357+ u.xino = &opt->xino;
16358+ AuDbg("xino {%s %.*s}\n",
16359+ u.xino->path,
16360+ AuDLNPair(u.xino->file->f_dentry));
16361+ break;
16362+ case Opt_trunc_xino:
16363+ AuLabel(trunc_xino);
16364+ break;
16365+ case Opt_notrunc_xino:
16366+ AuLabel(notrunc_xino);
16367+ break;
16368+ case Opt_trunc_xino_path:
16369+ case Opt_itrunc_xino:
16370+ u.xino_itrunc = &opt->xino_itrunc;
16371+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
16372+ break;
16373+
16374+ case Opt_noxino:
16375+ AuLabel(noxino);
16376+ break;
16377+ case Opt_trunc_xib:
16378+ AuLabel(trunc_xib);
16379+ break;
16380+ case Opt_notrunc_xib:
16381+ AuLabel(notrunc_xib);
16382+ break;
dece6358
AM
16383+ case Opt_shwh:
16384+ AuLabel(shwh);
16385+ break;
16386+ case Opt_noshwh:
16387+ AuLabel(noshwh);
16388+ break;
1facf9fc 16389+ case Opt_plink:
16390+ AuLabel(plink);
16391+ break;
16392+ case Opt_noplink:
16393+ AuLabel(noplink);
16394+ break;
16395+ case Opt_list_plink:
16396+ AuLabel(list_plink);
16397+ break;
16398+ case Opt_udba:
16399+ AuDbg("udba %d, %s\n",
16400+ opt->udba, au_optstr_udba(opt->udba));
16401+ break;
16402+ case Opt_diropq_a:
16403+ AuLabel(diropq_a);
16404+ break;
16405+ case Opt_diropq_w:
16406+ AuLabel(diropq_w);
16407+ break;
16408+ case Opt_warn_perm:
16409+ AuLabel(warn_perm);
16410+ break;
16411+ case Opt_nowarn_perm:
16412+ AuLabel(nowarn_perm);
16413+ break;
16414+ case Opt_refrof:
16415+ AuLabel(refrof);
16416+ break;
16417+ case Opt_norefrof:
16418+ AuLabel(norefrof);
16419+ break;
16420+ case Opt_verbose:
16421+ AuLabel(verbose);
16422+ break;
16423+ case Opt_noverbose:
16424+ AuLabel(noverbose);
16425+ break;
16426+ case Opt_sum:
16427+ AuLabel(sum);
16428+ break;
16429+ case Opt_nosum:
16430+ AuLabel(nosum);
16431+ break;
16432+ case Opt_wsum:
16433+ AuLabel(wsum);
16434+ break;
16435+ case Opt_wbr_create:
16436+ u.create = &opt->wbr_create;
16437+ AuDbg("create %d, %s\n", u.create->wbr_create,
16438+ au_optstr_wbr_create(u.create->wbr_create));
16439+ switch (u.create->wbr_create) {
16440+ case AuWbrCreate_MFSV:
16441+ case AuWbrCreate_PMFSV:
16442+ AuDbg("%d sec\n", u.create->mfs_second);
16443+ break;
16444+ case AuWbrCreate_MFSRR:
16445+ AuDbg("%llu watermark\n",
16446+ u.create->mfsrr_watermark);
16447+ break;
16448+ case AuWbrCreate_MFSRRV:
16449+ AuDbg("%llu watermark, %d sec\n",
16450+ u.create->mfsrr_watermark,
16451+ u.create->mfs_second);
16452+ break;
16453+ }
16454+ break;
16455+ case Opt_wbr_copyup:
16456+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
16457+ au_optstr_wbr_copyup(opt->wbr_copyup));
16458+ break;
16459+ default:
16460+ BUG();
16461+ }
16462+ opt++;
16463+ }
16464+#endif
16465+}
16466+
16467+void au_opts_free(struct au_opts *opts)
16468+{
16469+ struct au_opt *opt;
16470+
16471+ opt = opts->opt;
16472+ while (opt->type != Opt_tail) {
16473+ switch (opt->type) {
16474+ case Opt_add:
16475+ case Opt_append:
16476+ case Opt_prepend:
16477+ path_put(&opt->add.path);
16478+ break;
16479+ case Opt_del:
16480+ case Opt_idel:
16481+ path_put(&opt->del.h_path);
16482+ break;
16483+ case Opt_mod:
16484+ case Opt_imod:
16485+ dput(opt->mod.h_root);
16486+ break;
16487+ case Opt_xino:
16488+ fput(opt->xino.file);
16489+ break;
16490+ }
16491+ opt++;
16492+ }
16493+}
16494+
16495+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
16496+ aufs_bindex_t bindex)
16497+{
16498+ int err;
16499+ struct au_opt_add *add = &opt->add;
16500+ char *p;
16501+
16502+ add->bindex = bindex;
16503+ add->perm = AuBrPerm_Last;
16504+ add->pathname = opt_str;
16505+ p = strchr(opt_str, '=');
16506+ if (p) {
16507+ *p++ = 0;
16508+ if (*p)
16509+ add->perm = br_perm_val(p);
16510+ }
16511+
16512+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
16513+ if (!err) {
16514+ if (!p) {
16515+ add->perm = AuBrPerm_RO;
16516+ if (au_test_fs_rr(add->path.dentry->d_sb))
16517+ add->perm = AuBrPerm_RR;
16518+ else if (!bindex && !(sb_flags & MS_RDONLY))
16519+ add->perm = AuBrPerm_RW;
16520+ }
16521+ opt->type = Opt_add;
16522+ goto out;
16523+ }
16524+ AuErr("lookup failed %s (%d)\n", add->pathname, err);
16525+ err = -EINVAL;
16526+
16527+ out:
16528+ return err;
16529+}
16530+
16531+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
16532+{
16533+ int err;
16534+
16535+ del->pathname = args[0].from;
16536+ AuDbg("del path %s\n", del->pathname);
16537+
16538+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
16539+ if (unlikely(err))
16540+ AuErr("lookup failed %s (%d)\n", del->pathname, err);
16541+
16542+ return err;
16543+}
16544+
16545+#if 0 /* reserved for future use */
16546+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
16547+ struct au_opt_del *del, substring_t args[])
16548+{
16549+ int err;
16550+ struct dentry *root;
16551+
16552+ err = -EINVAL;
16553+ root = sb->s_root;
16554+ aufs_read_lock(root, AuLock_FLUSH);
16555+ if (bindex < 0 || au_sbend(sb) < bindex) {
16556+ AuErr("out of bounds, %d\n", bindex);
16557+ goto out;
16558+ }
16559+
16560+ err = 0;
16561+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
16562+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
16563+
16564+ out:
16565+ aufs_read_unlock(root, !AuLock_IR);
16566+ return err;
16567+}
16568+#endif
16569+
16570+static int au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
16571+{
16572+ int err;
16573+ struct path path;
16574+ char *p;
16575+
16576+ err = -EINVAL;
16577+ mod->path = args[0].from;
16578+ p = strchr(mod->path, '=');
16579+ if (unlikely(!p)) {
16580+ AuErr("no permssion %s\n", args[0].from);
16581+ goto out;
16582+ }
16583+
16584+ *p++ = 0;
16585+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
16586+ if (unlikely(err)) {
16587+ AuErr("lookup failed %s (%d)\n", mod->path, err);
16588+ goto out;
16589+ }
16590+
16591+ mod->perm = br_perm_val(p);
16592+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
16593+ mod->h_root = dget(path.dentry);
16594+ path_put(&path);
16595+
16596+ out:
16597+ return err;
16598+}
16599+
16600+#if 0 /* reserved for future use */
16601+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
16602+ struct au_opt_mod *mod, substring_t args[])
16603+{
16604+ int err;
16605+ struct dentry *root;
16606+
16607+ err = -EINVAL;
16608+ root = sb->s_root;
16609+ aufs_read_lock(root, AuLock_FLUSH);
16610+ if (bindex < 0 || au_sbend(sb) < bindex) {
16611+ AuErr("out of bounds, %d\n", bindex);
16612+ goto out;
16613+ }
16614+
16615+ err = 0;
16616+ mod->perm = br_perm_val(args[1].from);
16617+ AuDbg("mod path %s, perm 0x%x, %s\n",
16618+ mod->path, mod->perm, args[1].from);
16619+ mod->h_root = dget(au_h_dptr(root, bindex));
16620+
16621+ out:
16622+ aufs_read_unlock(root, !AuLock_IR);
16623+ return err;
16624+}
16625+#endif
16626+
16627+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
16628+ substring_t args[])
16629+{
16630+ int err;
16631+ struct file *file;
16632+
16633+ file = au_xino_create(sb, args[0].from, /*silent*/0);
16634+ err = PTR_ERR(file);
16635+ if (IS_ERR(file))
16636+ goto out;
16637+
16638+ err = -EINVAL;
16639+ if (unlikely(file->f_dentry->d_sb == sb)) {
16640+ fput(file);
16641+ AuErr("%s must be outside\n", args[0].from);
16642+ goto out;
16643+ }
16644+
16645+ err = 0;
16646+ xino->file = file;
16647+ xino->path = args[0].from;
16648+
16649+ out:
16650+ return err;
16651+}
16652+
16653+static
16654+int au_opts_parse_xino_itrunc_path(struct super_block *sb,
16655+ struct au_opt_xino_itrunc *xino_itrunc,
16656+ substring_t args[])
16657+{
16658+ int err;
16659+ aufs_bindex_t bend, bindex;
16660+ struct path path;
16661+ struct dentry *root;
16662+
16663+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
16664+ if (unlikely(err)) {
16665+ AuErr("lookup failed %s (%d)\n", args[0].from, err);
16666+ goto out;
16667+ }
16668+
16669+ xino_itrunc->bindex = -1;
16670+ root = sb->s_root;
16671+ aufs_read_lock(root, AuLock_FLUSH);
16672+ bend = au_sbend(sb);
16673+ for (bindex = 0; bindex <= bend; bindex++) {
16674+ if (au_h_dptr(root, bindex) == path.dentry) {
16675+ xino_itrunc->bindex = bindex;
16676+ break;
16677+ }
16678+ }
16679+ aufs_read_unlock(root, !AuLock_IR);
16680+ path_put(&path);
16681+
16682+ if (unlikely(xino_itrunc->bindex < 0)) {
16683+ AuErr("no such branch %s\n", args[0].from);
16684+ err = -EINVAL;
16685+ }
16686+
16687+ out:
16688+ return err;
16689+}
16690+
16691+/* called without aufs lock */
16692+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
16693+{
16694+ int err, n, token;
16695+ aufs_bindex_t bindex;
16696+ unsigned char skipped;
16697+ struct dentry *root;
16698+ struct au_opt *opt, *opt_tail;
16699+ char *opt_str;
16700+ /* reduce the stack space */
16701+ union {
16702+ struct au_opt_xino_itrunc *xino_itrunc;
16703+ struct au_opt_wbr_create *create;
16704+ } u;
16705+ struct {
16706+ substring_t args[MAX_OPT_ARGS];
16707+ } *a;
16708+
16709+ err = -ENOMEM;
16710+ a = kmalloc(sizeof(*a), GFP_NOFS);
16711+ if (unlikely(!a))
16712+ goto out;
16713+
16714+ root = sb->s_root;
16715+ err = 0;
16716+ bindex = 0;
16717+ opt = opts->opt;
16718+ opt_tail = opt + opts->max_opt - 1;
16719+ opt->type = Opt_tail;
16720+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
16721+ err = -EINVAL;
16722+ skipped = 0;
16723+ token = match_token(opt_str, options, a->args);
16724+ switch (token) {
16725+ case Opt_br:
16726+ err = 0;
16727+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
16728+ && *opt_str) {
16729+ err = opt_add(opt, opt_str, opts->sb_flags,
16730+ bindex++);
16731+ if (unlikely(!err && ++opt > opt_tail)) {
16732+ err = -E2BIG;
16733+ break;
16734+ }
16735+ opt->type = Opt_tail;
16736+ skipped = 1;
16737+ }
16738+ break;
16739+ case Opt_add:
16740+ if (unlikely(match_int(&a->args[0], &n))) {
16741+ AuErr("bad integer in %s\n", opt_str);
16742+ break;
16743+ }
16744+ bindex = n;
16745+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
16746+ bindex);
16747+ if (!err)
16748+ opt->type = token;
16749+ break;
16750+ case Opt_append:
16751+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
16752+ /*dummy bindex*/1);
16753+ if (!err)
16754+ opt->type = token;
16755+ break;
16756+ case Opt_prepend:
16757+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
16758+ /*bindex*/0);
16759+ if (!err)
16760+ opt->type = token;
16761+ break;
16762+ case Opt_del:
16763+ err = au_opts_parse_del(&opt->del, a->args);
16764+ if (!err)
16765+ opt->type = token;
16766+ break;
16767+#if 0 /* reserved for future use */
16768+ case Opt_idel:
16769+ del->pathname = "(indexed)";
16770+ if (unlikely(match_int(&args[0], &n))) {
16771+ AuErr("bad integer in %s\n", opt_str);
16772+ break;
16773+ }
16774+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
16775+ if (!err)
16776+ opt->type = token;
16777+ break;
16778+#endif
16779+ case Opt_mod:
16780+ err = au_opts_parse_mod(&opt->mod, a->args);
16781+ if (!err)
16782+ opt->type = token;
16783+ break;
16784+#ifdef IMOD /* reserved for future use */
16785+ case Opt_imod:
16786+ u.mod->path = "(indexed)";
16787+ if (unlikely(match_int(&a->args[0], &n))) {
16788+ AuErr("bad integer in %s\n", opt_str);
16789+ break;
16790+ }
16791+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
16792+ if (!err)
16793+ opt->type = token;
16794+ break;
16795+#endif
16796+ case Opt_xino:
16797+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
16798+ if (!err)
16799+ opt->type = token;
16800+ break;
16801+
16802+ case Opt_trunc_xino_path:
16803+ err = au_opts_parse_xino_itrunc_path
16804+ (sb, &opt->xino_itrunc, a->args);
16805+ if (!err)
16806+ opt->type = token;
16807+ break;
16808+
16809+ case Opt_itrunc_xino:
16810+ u.xino_itrunc = &opt->xino_itrunc;
16811+ if (unlikely(match_int(&a->args[0], &n))) {
16812+ AuErr("bad integer in %s\n", opt_str);
16813+ break;
16814+ }
16815+ u.xino_itrunc->bindex = n;
16816+ aufs_read_lock(root, AuLock_FLUSH);
16817+ if (n < 0 || au_sbend(sb) < n) {
16818+ AuErr("out of bounds, %d\n", n);
16819+ aufs_read_unlock(root, !AuLock_IR);
16820+ break;
16821+ }
16822+ aufs_read_unlock(root, !AuLock_IR);
16823+ err = 0;
16824+ opt->type = token;
16825+ break;
16826+
16827+ case Opt_dirwh:
16828+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
16829+ break;
16830+ err = 0;
16831+ opt->type = token;
16832+ break;
16833+
16834+ case Opt_rdcache:
16835+ if (unlikely(match_int(&a->args[0], &opt->rdcache)))
16836+ break;
16837+ err = 0;
16838+ opt->type = token;
16839+ break;
16840+ case Opt_rdblk:
16841+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 16842+ || n < 0
1facf9fc 16843+ || n > KMALLOC_MAX_SIZE)) {
16844+ AuErr("bad integer in %s\n", opt_str);
16845+ break;
16846+ }
1308ab2a 16847+ if (unlikely(n && n < NAME_MAX)) {
1facf9fc 16848+ AuErr("rdblk must be larger than %d\n",
16849+ NAME_MAX);
16850+ break;
16851+ }
16852+ opt->rdblk = n;
16853+ err = 0;
16854+ opt->type = token;
16855+ break;
16856+ case Opt_rdhash:
16857+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 16858+ || n < 0
1facf9fc 16859+ || n * sizeof(struct hlist_head)
16860+ > KMALLOC_MAX_SIZE)) {
16861+ AuErr("bad integer in %s\n", opt_str);
16862+ break;
16863+ }
16864+ opt->rdhash = n;
16865+ err = 0;
16866+ opt->type = token;
16867+ break;
16868+
16869+ case Opt_trunc_xino:
16870+ case Opt_notrunc_xino:
16871+ case Opt_noxino:
16872+ case Opt_trunc_xib:
16873+ case Opt_notrunc_xib:
dece6358
AM
16874+ case Opt_shwh:
16875+ case Opt_noshwh:
1facf9fc 16876+ case Opt_plink:
16877+ case Opt_noplink:
16878+ case Opt_list_plink:
16879+ case Opt_diropq_a:
16880+ case Opt_diropq_w:
16881+ case Opt_warn_perm:
16882+ case Opt_nowarn_perm:
16883+ case Opt_refrof:
16884+ case Opt_norefrof:
16885+ case Opt_verbose:
16886+ case Opt_noverbose:
16887+ case Opt_sum:
16888+ case Opt_nosum:
16889+ case Opt_wsum:
dece6358
AM
16890+ case Opt_rdblk_def:
16891+ case Opt_rdhash_def:
1facf9fc 16892+ err = 0;
16893+ opt->type = token;
16894+ break;
16895+
16896+ case Opt_udba:
16897+ opt->udba = udba_val(a->args[0].from);
16898+ if (opt->udba >= 0) {
16899+ err = 0;
16900+ opt->type = token;
16901+ } else
16902+ AuErr("wrong value, %s\n", opt_str);
16903+ break;
16904+
16905+ case Opt_wbr_create:
16906+ u.create = &opt->wbr_create;
16907+ u.create->wbr_create
16908+ = au_wbr_create_val(a->args[0].from, u.create);
16909+ if (u.create->wbr_create >= 0) {
16910+ err = 0;
16911+ opt->type = token;
16912+ } else
16913+ AuErr("wrong value, %s\n", opt_str);
16914+ break;
16915+ case Opt_wbr_copyup:
16916+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
16917+ if (opt->wbr_copyup >= 0) {
16918+ err = 0;
16919+ opt->type = token;
16920+ } else
16921+ AuErr("wrong value, %s\n", opt_str);
16922+ break;
16923+
16924+ case Opt_ignore:
16925+ AuWarn("ignored %s\n", opt_str);
16926+ /*FALLTHROUGH*/
16927+ case Opt_ignore_silent:
16928+ skipped = 1;
16929+ err = 0;
16930+ break;
16931+ case Opt_err:
16932+ AuErr("unknown option %s\n", opt_str);
16933+ break;
16934+ }
16935+
16936+ if (!err && !skipped) {
16937+ if (unlikely(++opt > opt_tail)) {
16938+ err = -E2BIG;
16939+ opt--;
16940+ opt->type = Opt_tail;
16941+ break;
16942+ }
16943+ opt->type = Opt_tail;
16944+ }
16945+ }
16946+
16947+ kfree(a);
16948+ dump_opts(opts);
16949+ if (unlikely(err))
16950+ au_opts_free(opts);
16951+
16952+ out:
16953+ return err;
16954+}
16955+
16956+static int au_opt_wbr_create(struct super_block *sb,
16957+ struct au_opt_wbr_create *create)
16958+{
16959+ int err;
16960+ struct au_sbinfo *sbinfo;
16961+
dece6358
AM
16962+ SiMustWriteLock(sb);
16963+
1facf9fc 16964+ err = 1; /* handled */
16965+ sbinfo = au_sbi(sb);
16966+ if (sbinfo->si_wbr_create_ops->fin) {
16967+ err = sbinfo->si_wbr_create_ops->fin(sb);
16968+ if (!err)
16969+ err = 1;
16970+ }
16971+
16972+ sbinfo->si_wbr_create = create->wbr_create;
16973+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
16974+ switch (create->wbr_create) {
16975+ case AuWbrCreate_MFSRRV:
16976+ case AuWbrCreate_MFSRR:
16977+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
16978+ /*FALLTHROUGH*/
16979+ case AuWbrCreate_MFS:
16980+ case AuWbrCreate_MFSV:
16981+ case AuWbrCreate_PMFS:
16982+ case AuWbrCreate_PMFSV:
16983+ sbinfo->si_wbr_mfs.mfs_expire = create->mfs_second * HZ;
16984+ break;
16985+ }
16986+
16987+ if (sbinfo->si_wbr_create_ops->init)
16988+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
16989+
16990+ return err;
16991+}
16992+
16993+/*
16994+ * returns,
16995+ * plus: processed without an error
16996+ * zero: unprocessed
16997+ */
16998+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
16999+ struct au_opts *opts)
17000+{
17001+ int err;
17002+ struct au_sbinfo *sbinfo;
17003+
dece6358
AM
17004+ SiMustWriteLock(sb);
17005+
1facf9fc 17006+ err = 1; /* handled */
17007+ sbinfo = au_sbi(sb);
17008+ switch (opt->type) {
17009+ case Opt_udba:
17010+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
17011+ sbinfo->si_mntflags |= opt->udba;
17012+ opts->given_udba |= opt->udba;
17013+ break;
17014+
17015+ case Opt_plink:
17016+ au_opt_set(sbinfo->si_mntflags, PLINK);
17017+ break;
17018+ case Opt_noplink:
17019+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
17020+ au_plink_put(sb);
17021+ au_opt_clr(sbinfo->si_mntflags, PLINK);
17022+ break;
17023+ case Opt_list_plink:
17024+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
17025+ au_plink_list(sb);
17026+ break;
17027+
17028+ case Opt_diropq_a:
17029+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
17030+ break;
17031+ case Opt_diropq_w:
17032+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
17033+ break;
17034+
17035+ case Opt_warn_perm:
17036+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
17037+ break;
17038+ case Opt_nowarn_perm:
17039+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
17040+ break;
17041+
17042+ case Opt_refrof:
17043+ au_opt_set(sbinfo->si_mntflags, REFROF);
17044+ break;
17045+ case Opt_norefrof:
17046+ au_opt_clr(sbinfo->si_mntflags, REFROF);
17047+ break;
17048+
17049+ case Opt_verbose:
17050+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
17051+ break;
17052+ case Opt_noverbose:
17053+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
17054+ break;
17055+
17056+ case Opt_sum:
17057+ au_opt_set(sbinfo->si_mntflags, SUM);
17058+ break;
17059+ case Opt_wsum:
17060+ au_opt_clr(sbinfo->si_mntflags, SUM);
17061+ au_opt_set(sbinfo->si_mntflags, SUM_W);
17062+ case Opt_nosum:
17063+ au_opt_clr(sbinfo->si_mntflags, SUM);
17064+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
17065+ break;
17066+
17067+ case Opt_wbr_create:
17068+ err = au_opt_wbr_create(sb, &opt->wbr_create);
17069+ break;
17070+ case Opt_wbr_copyup:
17071+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
17072+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
17073+ break;
17074+
17075+ case Opt_dirwh:
17076+ sbinfo->si_dirwh = opt->dirwh;
17077+ break;
17078+
17079+ case Opt_rdcache:
17080+ sbinfo->si_rdcache = opt->rdcache * HZ;
17081+ break;
17082+ case Opt_rdblk:
17083+ sbinfo->si_rdblk = opt->rdblk;
17084+ break;
dece6358
AM
17085+ case Opt_rdblk_def:
17086+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
17087+ break;
1facf9fc 17088+ case Opt_rdhash:
17089+ sbinfo->si_rdhash = opt->rdhash;
17090+ break;
dece6358
AM
17091+ case Opt_rdhash_def:
17092+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
17093+ break;
17094+
17095+ case Opt_shwh:
17096+ au_opt_set(sbinfo->si_mntflags, SHWH);
17097+ break;
17098+ case Opt_noshwh:
17099+ au_opt_clr(sbinfo->si_mntflags, SHWH);
17100+ break;
1facf9fc 17101+
17102+ case Opt_trunc_xino:
17103+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
17104+ break;
17105+ case Opt_notrunc_xino:
17106+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
17107+ break;
17108+
17109+ case Opt_trunc_xino_path:
17110+ case Opt_itrunc_xino:
17111+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
17112+ if (!err)
17113+ err = 1;
17114+ break;
17115+
17116+ case Opt_trunc_xib:
17117+ au_fset_opts(opts->flags, TRUNC_XIB);
17118+ break;
17119+ case Opt_notrunc_xib:
17120+ au_fclr_opts(opts->flags, TRUNC_XIB);
17121+ break;
17122+
17123+ default:
17124+ err = 0;
17125+ break;
17126+ }
17127+
17128+ return err;
17129+}
17130+
17131+/*
17132+ * returns tri-state.
17133+ * plus: processed without an error
17134+ * zero: unprocessed
17135+ * minus: error
17136+ */
17137+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
17138+ struct au_opts *opts)
17139+{
17140+ int err, do_refresh;
17141+
17142+ err = 0;
17143+ switch (opt->type) {
17144+ case Opt_append:
17145+ opt->add.bindex = au_sbend(sb) + 1;
17146+ if (opt->add.bindex < 0)
17147+ opt->add.bindex = 0;
17148+ goto add;
17149+ case Opt_prepend:
17150+ opt->add.bindex = 0;
17151+ add:
17152+ case Opt_add:
17153+ err = au_br_add(sb, &opt->add,
17154+ au_ftest_opts(opts->flags, REMOUNT));
17155+ if (!err) {
17156+ err = 1;
17157+ au_fset_opts(opts->flags, REFRESH_DIR);
17158+ if (au_br_whable(opt->add.perm))
17159+ au_fset_opts(opts->flags, REFRESH_NONDIR);
17160+ }
17161+ break;
17162+
17163+ case Opt_del:
17164+ case Opt_idel:
17165+ err = au_br_del(sb, &opt->del,
17166+ au_ftest_opts(opts->flags, REMOUNT));
17167+ if (!err) {
17168+ err = 1;
17169+ au_fset_opts(opts->flags, TRUNC_XIB);
17170+ au_fset_opts(opts->flags, REFRESH_DIR);
17171+ au_fset_opts(opts->flags, REFRESH_NONDIR);
17172+ }
17173+ break;
17174+
17175+ case Opt_mod:
17176+ case Opt_imod:
17177+ err = au_br_mod(sb, &opt->mod,
17178+ au_ftest_opts(opts->flags, REMOUNT),
17179+ &do_refresh);
17180+ if (!err) {
17181+ err = 1;
17182+ if (do_refresh) {
17183+ au_fset_opts(opts->flags, REFRESH_DIR);
17184+ au_fset_opts(opts->flags, REFRESH_NONDIR);
17185+ }
17186+ }
17187+ break;
17188+ }
17189+
17190+ return err;
17191+}
17192+
17193+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
17194+ struct au_opt_xino **opt_xino,
17195+ struct au_opts *opts)
17196+{
17197+ int err;
17198+ aufs_bindex_t bend, bindex;
17199+ struct dentry *root, *parent, *h_root;
17200+
17201+ err = 0;
17202+ switch (opt->type) {
17203+ case Opt_xino:
17204+ err = au_xino_set(sb, &opt->xino,
17205+ !!au_ftest_opts(opts->flags, REMOUNT));
17206+ if (unlikely(err))
17207+ break;
17208+
17209+ *opt_xino = &opt->xino;
17210+ au_xino_brid_set(sb, -1);
17211+
17212+ /* safe d_parent access */
17213+ parent = opt->xino.file->f_dentry->d_parent;
17214+ root = sb->s_root;
17215+ bend = au_sbend(sb);
17216+ for (bindex = 0; bindex <= bend; bindex++) {
17217+ h_root = au_h_dptr(root, bindex);
17218+ if (h_root == parent) {
17219+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
17220+ break;
17221+ }
17222+ }
17223+ break;
17224+
17225+ case Opt_noxino:
17226+ au_xino_clr(sb);
17227+ au_xino_brid_set(sb, -1);
17228+ *opt_xino = (void *)-1;
17229+ break;
17230+ }
17231+
17232+ return err;
17233+}
17234+
17235+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
17236+ unsigned int pending)
17237+{
17238+ int err;
17239+ aufs_bindex_t bindex, bend;
17240+ unsigned char do_plink, skip, do_free;
17241+ struct au_branch *br;
17242+ struct au_wbr *wbr;
17243+ struct dentry *root;
17244+ struct inode *dir, *h_dir;
17245+ struct au_sbinfo *sbinfo;
17246+ struct au_hinode *hdir;
17247+
dece6358
AM
17248+ SiMustAnyLock(sb);
17249+
1facf9fc 17250+ sbinfo = au_sbi(sb);
17251+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
17252+
dece6358
AM
17253+ if (!(sb_flags & MS_RDONLY)) {
17254+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
17255+ AuWarn("first branch should be rw\n");
17256+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
17257+ AuWarn("shwh should be used with ro\n");
17258+ }
1facf9fc 17259+
17260+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HINOTIFY)
17261+ && !au_opt_test(sbinfo->si_mntflags, XINO))
17262+ AuWarn("udba=inotify requires xino\n");
17263+
17264+ err = 0;
17265+ root = sb->s_root;
17266+ dir = sb->s_root->d_inode;
17267+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
17268+ bend = au_sbend(sb);
17269+ for (bindex = 0; !err && bindex <= bend; bindex++) {
17270+ skip = 0;
17271+ h_dir = au_h_iptr(dir, bindex);
17272+ br = au_sbr(sb, bindex);
17273+ do_free = 0;
17274+
17275+ wbr = br->br_wbr;
17276+ if (wbr)
17277+ wbr_wh_read_lock(wbr);
17278+
17279+ switch (br->br_perm) {
17280+ case AuBrPerm_RO:
17281+ case AuBrPerm_ROWH:
17282+ case AuBrPerm_RR:
17283+ case AuBrPerm_RRWH:
17284+ do_free = !!wbr;
17285+ skip = (!wbr
17286+ || (!wbr->wbr_whbase
17287+ && !wbr->wbr_plink
17288+ && !wbr->wbr_orph));
17289+ break;
17290+
17291+ case AuBrPerm_RWNoLinkWH:
17292+ /* skip = (!br->br_whbase && !br->br_orph); */
17293+ skip = (!wbr || !wbr->wbr_whbase);
17294+ if (skip && wbr) {
17295+ if (do_plink)
17296+ skip = !!wbr->wbr_plink;
17297+ else
17298+ skip = !wbr->wbr_plink;
17299+ }
17300+ break;
17301+
17302+ case AuBrPerm_RW:
17303+ /* skip = (br->br_whbase && br->br_ohph); */
17304+ skip = (wbr && wbr->wbr_whbase);
17305+ if (skip) {
17306+ if (do_plink)
17307+ skip = !!wbr->wbr_plink;
17308+ else
17309+ skip = !wbr->wbr_plink;
17310+ }
17311+ break;
17312+
17313+ default:
17314+ BUG();
17315+ }
17316+ if (wbr)
17317+ wbr_wh_read_unlock(wbr);
17318+
17319+ if (skip)
17320+ continue;
17321+
17322+ hdir = au_hi(dir, bindex);
17323+ au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
17324+ if (wbr)
17325+ wbr_wh_write_lock(wbr);
17326+ err = au_wh_init(au_h_dptr(root, bindex), br, sb);
17327+ if (wbr)
17328+ wbr_wh_write_unlock(wbr);
17329+ au_hin_imtx_unlock(hdir);
17330+
17331+ if (!err && do_free) {
17332+ kfree(wbr);
17333+ br->br_wbr = NULL;
17334+ }
17335+ }
17336+
17337+ return err;
17338+}
17339+
17340+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
17341+{
17342+ int err;
17343+ unsigned int tmp;
17344+ aufs_bindex_t bend;
17345+ struct au_opt *opt;
17346+ struct au_opt_xino *opt_xino, xino;
17347+ struct au_sbinfo *sbinfo;
17348+
dece6358
AM
17349+ SiMustWriteLock(sb);
17350+
1facf9fc 17351+ err = 0;
17352+ opt_xino = NULL;
17353+ opt = opts->opt;
17354+ while (err >= 0 && opt->type != Opt_tail)
17355+ err = au_opt_simple(sb, opt++, opts);
17356+ if (err > 0)
17357+ err = 0;
17358+ else if (unlikely(err < 0))
17359+ goto out;
17360+
17361+ /* disable xino and udba temporary */
17362+ sbinfo = au_sbi(sb);
17363+ tmp = sbinfo->si_mntflags;
17364+ au_opt_clr(sbinfo->si_mntflags, XINO);
17365+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
17366+
17367+ opt = opts->opt;
17368+ while (err >= 0 && opt->type != Opt_tail)
17369+ err = au_opt_br(sb, opt++, opts);
17370+ if (err > 0)
17371+ err = 0;
17372+ else if (unlikely(err < 0))
17373+ goto out;
17374+
17375+ bend = au_sbend(sb);
17376+ if (unlikely(bend < 0)) {
17377+ err = -EINVAL;
17378+ AuErr("no branches\n");
17379+ goto out;
17380+ }
17381+
17382+ if (au_opt_test(tmp, XINO))
17383+ au_opt_set(sbinfo->si_mntflags, XINO);
17384+ opt = opts->opt;
17385+ while (!err && opt->type != Opt_tail)
17386+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
17387+ if (unlikely(err))
17388+ goto out;
17389+
17390+ err = au_opts_verify(sb, sb->s_flags, tmp);
17391+ if (unlikely(err))
17392+ goto out;
17393+
17394+ /* restore xino */
17395+ if (au_opt_test(tmp, XINO) && !opt_xino) {
17396+ xino.file = au_xino_def(sb);
17397+ err = PTR_ERR(xino.file);
17398+ if (IS_ERR(xino.file))
17399+ goto out;
17400+
17401+ err = au_xino_set(sb, &xino, /*remount*/0);
17402+ fput(xino.file);
17403+ if (unlikely(err))
17404+ goto out;
17405+ }
17406+
17407+ /* restore udba */
17408+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
17409+ sbinfo->si_mntflags |= (tmp & AuOptMask_UDBA);
17410+ if (au_opt_test(tmp, UDBA_HINOTIFY)) {
17411+ struct inode *dir = sb->s_root->d_inode;
17412+ au_reset_hinotify(dir,
17413+ au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
17414+ }
17415+
17416+ out:
17417+ return err;
17418+}
17419+
17420+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
17421+{
17422+ int err, rerr;
17423+ struct inode *dir;
17424+ struct au_opt_xino *opt_xino;
17425+ struct au_opt *opt;
17426+ struct au_sbinfo *sbinfo;
17427+
dece6358
AM
17428+ SiMustWriteLock(sb);
17429+
1facf9fc 17430+ dir = sb->s_root->d_inode;
17431+ sbinfo = au_sbi(sb);
17432+ err = 0;
17433+ opt_xino = NULL;
17434+ opt = opts->opt;
17435+ while (err >= 0 && opt->type != Opt_tail) {
17436+ err = au_opt_simple(sb, opt, opts);
17437+ if (!err)
17438+ err = au_opt_br(sb, opt, opts);
17439+ if (!err)
17440+ err = au_opt_xino(sb, opt, &opt_xino, opts);
17441+ opt++;
17442+ }
17443+ if (err > 0)
17444+ err = 0;
17445+ AuTraceErr(err);
17446+ /* go on even err */
17447+
17448+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
17449+ if (unlikely(rerr && !err))
17450+ err = rerr;
17451+
17452+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
17453+ rerr = au_xib_trunc(sb);
17454+ if (unlikely(rerr && !err))
17455+ err = rerr;
17456+ }
17457+
17458+ /* will be handled by the caller */
17459+ if (!au_ftest_opts(opts->flags, REFRESH_DIR)
17460+ && (opts->given_udba || au_opt_test(sbinfo->si_mntflags, XINO)))
17461+ au_fset_opts(opts->flags, REFRESH_DIR);
17462+
17463+ AuDbg("status 0x%x\n", opts->flags);
17464+ return err;
17465+}
17466+
17467+/* ---------------------------------------------------------------------- */
17468+
17469+unsigned int au_opt_udba(struct super_block *sb)
17470+{
17471+ return au_mntflags(sb) & AuOptMask_UDBA;
17472+}
1308ab2a 17473diff -uprN -x .git linux-2.6.31/fs/aufs/opts.h aufs2-2.6.git/fs/aufs/opts.h
17474--- linux-2.6.31/fs/aufs/opts.h 1970-01-01 00:00:00.000000000 +0000
17475+++ aufs2-2.6.git/fs/aufs/opts.h 2009-09-21 21:49:23.408274204 +0000
dece6358 17476@@ -0,0 +1,196 @@
1facf9fc 17477+/*
17478+ * Copyright (C) 2005-2009 Junjiro R. Okajima
17479+ *
17480+ * This program, aufs is free software; you can redistribute it and/or modify
17481+ * it under the terms of the GNU General Public License as published by
17482+ * the Free Software Foundation; either version 2 of the License, or
17483+ * (at your option) any later version.
dece6358
AM
17484+ *
17485+ * This program is distributed in the hope that it will be useful,
17486+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17487+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17488+ * GNU General Public License for more details.
17489+ *
17490+ * You should have received a copy of the GNU General Public License
17491+ * along with this program; if not, write to the Free Software
17492+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 17493+ */
17494+
17495+/*
17496+ * mount options/flags
17497+ */
17498+
17499+#ifndef __AUFS_OPTS_H__
17500+#define __AUFS_OPTS_H__
17501+
17502+#ifdef __KERNEL__
17503+
dece6358 17504+#include <linux/path.h>
1facf9fc 17505+#include <linux/aufs_type.h>
17506+
dece6358
AM
17507+struct file;
17508+struct super_block;
17509+
1facf9fc 17510+/* ---------------------------------------------------------------------- */
17511+
17512+/* mount flags */
17513+#define AuOpt_XINO 1 /* external inode number bitmap
17514+ and translation table */
17515+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
17516+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
17517+#define AuOpt_UDBA_REVAL (1 << 3)
17518+#define AuOpt_UDBA_HINOTIFY (1 << 4)
dece6358
AM
17519+#define AuOpt_SHWH (1 << 5) /* show whiteout */
17520+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
17521+#define AuOpt_DIRPERM1 (1 << 7) /* unimplemented */
17522+#define AuOpt_REFROF (1 << 8) /* unimplemented */
17523+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
17524+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
17525+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
17526+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
17527+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
1facf9fc 17528+
17529+#ifndef CONFIG_AUFS_HINOTIFY
17530+#undef AuOpt_UDBA_HINOTIFY
17531+#define AuOpt_UDBA_HINOTIFY 0
17532+#endif
dece6358
AM
17533+#ifndef CONFIG_AUFS_SHWH
17534+#undef AuOpt_SHWH
17535+#define AuOpt_SHWH 0
17536+#endif
1facf9fc 17537+
17538+#define AuOpt_Def (AuOpt_XINO \
17539+ | AuOpt_UDBA_REVAL \
17540+ | AuOpt_PLINK \
17541+ /* | AuOpt_DIRPERM1 */ \
17542+ | AuOpt_WARN_PERM)
17543+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
17544+ | AuOpt_UDBA_REVAL \
17545+ | AuOpt_UDBA_HINOTIFY)
17546+
17547+#define au_opt_test(flags, name) (flags & AuOpt_##name)
17548+#define au_opt_set(flags, name) do { \
17549+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
17550+ ((flags) |= AuOpt_##name); \
17551+} while (0)
17552+#define au_opt_set_udba(flags, name) do { \
17553+ (flags) &= ~AuOptMask_UDBA; \
17554+ ((flags) |= AuOpt_##name); \
17555+} while (0)
17556+#define au_opt_clr(flags, name) { ((flags) &= ~AuOpt_##name); }
17557+
17558+/* ---------------------------------------------------------------------- */
17559+
17560+/* policies to select one among multiple writable branches */
17561+enum {
17562+ AuWbrCreate_TDP, /* top down parent */
17563+ AuWbrCreate_RR, /* round robin */
17564+ AuWbrCreate_MFS, /* most free space */
17565+ AuWbrCreate_MFSV, /* mfs with seconds */
17566+ AuWbrCreate_MFSRR, /* mfs then rr */
17567+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
17568+ AuWbrCreate_PMFS, /* parent and mfs */
17569+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
17570+
17571+ AuWbrCreate_Def = AuWbrCreate_TDP
17572+};
17573+
17574+enum {
17575+ AuWbrCopyup_TDP, /* top down parent */
17576+ AuWbrCopyup_BUP, /* bottom up parent */
17577+ AuWbrCopyup_BU, /* bottom up */
17578+
17579+ AuWbrCopyup_Def = AuWbrCopyup_TDP
17580+};
17581+
17582+/* ---------------------------------------------------------------------- */
17583+
17584+struct au_opt_add {
17585+ aufs_bindex_t bindex;
17586+ char *pathname;
17587+ int perm;
17588+ struct path path;
17589+};
17590+
17591+struct au_opt_del {
17592+ char *pathname;
17593+ struct path h_path;
17594+};
17595+
17596+struct au_opt_mod {
17597+ char *path;
17598+ int perm;
17599+ struct dentry *h_root;
17600+};
17601+
17602+struct au_opt_xino {
17603+ char *path;
17604+ struct file *file;
17605+};
17606+
17607+struct au_opt_xino_itrunc {
17608+ aufs_bindex_t bindex;
17609+};
17610+
17611+struct au_opt_wbr_create {
17612+ int wbr_create;
17613+ int mfs_second;
17614+ unsigned long long mfsrr_watermark;
17615+};
17616+
17617+struct au_opt {
17618+ int type;
17619+ union {
17620+ struct au_opt_xino xino;
17621+ struct au_opt_xino_itrunc xino_itrunc;
17622+ struct au_opt_add add;
17623+ struct au_opt_del del;
17624+ struct au_opt_mod mod;
17625+ int dirwh;
17626+ int rdcache;
17627+ unsigned int rdblk;
17628+ unsigned int rdhash;
17629+ int udba;
17630+ struct au_opt_wbr_create wbr_create;
17631+ int wbr_copyup;
17632+ };
17633+};
17634+
17635+/* opts flags */
17636+#define AuOpts_REMOUNT 1
17637+#define AuOpts_REFRESH_DIR (1 << 1)
17638+#define AuOpts_REFRESH_NONDIR (1 << 2)
17639+#define AuOpts_TRUNC_XIB (1 << 3)
17640+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
17641+#define au_fset_opts(flags, name) { (flags) |= AuOpts_##name; }
17642+#define au_fclr_opts(flags, name) { (flags) &= ~AuOpts_##name; }
17643+
17644+struct au_opts {
17645+ struct au_opt *opt;
17646+ int max_opt;
17647+
17648+ unsigned int given_udba;
17649+ unsigned int flags;
17650+ unsigned long sb_flags;
17651+};
17652+
17653+/* ---------------------------------------------------------------------- */
17654+
17655+const char *au_optstr_br_perm(int brperm);
17656+const char *au_optstr_udba(int udba);
17657+const char *au_optstr_wbr_copyup(int wbr_copyup);
17658+const char *au_optstr_wbr_create(int wbr_create);
17659+
17660+void au_opts_free(struct au_opts *opts);
17661+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
17662+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
17663+ unsigned int pending);
17664+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
17665+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
17666+
17667+unsigned int au_opt_udba(struct super_block *sb);
17668+
17669+/* ---------------------------------------------------------------------- */
17670+
17671+#endif /* __KERNEL__ */
17672+#endif /* __AUFS_OPTS_H__ */
1308ab2a 17673diff -uprN -x .git linux-2.6.31/fs/aufs/plink.c aufs2-2.6.git/fs/aufs/plink.c
17674--- linux-2.6.31/fs/aufs/plink.c 1970-01-01 00:00:00.000000000 +0000
17675+++ aufs2-2.6.git/fs/aufs/plink.c 2009-09-21 21:49:23.408274204 +0000
17676@@ -0,0 +1,396 @@
1facf9fc 17677+/*
17678+ * Copyright (C) 2005-2009 Junjiro R. Okajima
17679+ *
17680+ * This program, aufs is free software; you can redistribute it and/or modify
17681+ * it under the terms of the GNU General Public License as published by
17682+ * the Free Software Foundation; either version 2 of the License, or
17683+ * (at your option) any later version.
dece6358
AM
17684+ *
17685+ * This program is distributed in the hope that it will be useful,
17686+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17687+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17688+ * GNU General Public License for more details.
17689+ *
17690+ * You should have received a copy of the GNU General Public License
17691+ * along with this program; if not, write to the Free Software
17692+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 17693+ */
17694+
17695+/*
17696+ * pseudo-link
17697+ */
17698+
17699+#include "aufs.h"
17700+
17701+/*
17702+ * during a user process maintains the pseudo-links,
17703+ * prohibit adding a new plink and branch manipulation.
17704+ */
17705+void au_plink_block_maintain(struct super_block *sb)
17706+{
17707+ struct au_sbinfo *sbi = au_sbi(sb);
dece6358
AM
17708+
17709+ SiMustAnyLock(sb);
17710+
1facf9fc 17711+ /* gave up wake_up_bit() */
17712+ wait_event(sbi->si_plink_wq, !au_ftest_si(sbi, MAINTAIN_PLINK));
17713+}
17714+
17715+/* ---------------------------------------------------------------------- */
17716+
17717+struct pseudo_link {
17718+ struct list_head list;
17719+ struct inode *inode;
17720+};
17721+
17722+#ifdef CONFIG_AUFS_DEBUG
17723+void au_plink_list(struct super_block *sb)
17724+{
17725+ struct au_sbinfo *sbinfo;
17726+ struct list_head *plink_list;
17727+ struct pseudo_link *plink;
17728+
dece6358
AM
17729+ SiMustAnyLock(sb);
17730+
1facf9fc 17731+ sbinfo = au_sbi(sb);
17732+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
17733+
17734+ plink_list = &sbinfo->si_plink.head;
17735+ spin_lock(&sbinfo->si_plink.spin);
17736+ list_for_each_entry(plink, plink_list, list)
17737+ AuDbg("%lu\n", plink->inode->i_ino);
17738+ spin_unlock(&sbinfo->si_plink.spin);
17739+}
17740+#endif
17741+
17742+/* is the inode pseudo-linked? */
17743+int au_plink_test(struct inode *inode)
17744+{
17745+ int found;
17746+ struct au_sbinfo *sbinfo;
17747+ struct list_head *plink_list;
17748+ struct pseudo_link *plink;
17749+
17750+ sbinfo = au_sbi(inode->i_sb);
dece6358 17751+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 17752+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
17753+
17754+ found = 0;
17755+ plink_list = &sbinfo->si_plink.head;
17756+ spin_lock(&sbinfo->si_plink.spin);
17757+ list_for_each_entry(plink, plink_list, list)
17758+ if (plink->inode == inode) {
17759+ found = 1;
17760+ break;
17761+ }
17762+ spin_unlock(&sbinfo->si_plink.spin);
17763+ return found;
17764+}
17765+
17766+/* ---------------------------------------------------------------------- */
17767+
17768+/*
17769+ * generate a name for plink.
17770+ * the file will be stored under AUFS_WH_PLINKDIR.
17771+ */
17772+/* 20 is max digits length of ulong 64 */
17773+#define PLINK_NAME_LEN ((20 + 1) * 2)
17774+
17775+static int plink_name(char *name, int len, struct inode *inode,
17776+ aufs_bindex_t bindex)
17777+{
17778+ int rlen;
17779+ struct inode *h_inode;
17780+
17781+ h_inode = au_h_iptr(inode, bindex);
17782+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
17783+ return rlen;
17784+}
17785+
17786+/* lookup the plink-ed @inode under the branch at @bindex */
17787+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
17788+{
17789+ struct dentry *h_dentry, *h_parent;
17790+ struct au_branch *br;
17791+ struct inode *h_dir;
17792+ char a[PLINK_NAME_LEN];
17793+ struct qstr tgtname = {
17794+ .name = a
17795+ };
17796+
17797+ br = au_sbr(inode->i_sb, bindex);
17798+ h_parent = br->br_wbr->wbr_plink;
17799+ h_dir = h_parent->d_inode;
17800+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
17801+
17802+ /* always superio. */
17803+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
17804+ h_dentry = au_sio_lkup_one(&tgtname, h_parent, br);
17805+ mutex_unlock(&h_dir->i_mutex);
17806+ return h_dentry;
17807+}
17808+
17809+/* create a pseudo-link */
17810+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
17811+ struct dentry *h_dentry, struct au_branch *br)
17812+{
17813+ int err;
17814+ struct path h_path = {
17815+ .mnt = br->br_mnt
17816+ };
17817+ struct inode *h_dir;
17818+
17819+ h_dir = h_parent->d_inode;
17820+ again:
17821+ h_path.dentry = au_lkup_one(tgt, h_parent, br, /*nd*/NULL);
17822+ err = PTR_ERR(h_path.dentry);
17823+ if (IS_ERR(h_path.dentry))
17824+ goto out;
17825+
17826+ err = 0;
17827+ /* wh.plink dir is not monitored */
17828+ if (h_path.dentry->d_inode
17829+ && h_path.dentry->d_inode != h_dentry->d_inode) {
17830+ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
17831+ dput(h_path.dentry);
17832+ h_path.dentry = NULL;
17833+ if (!err)
17834+ goto again;
17835+ }
17836+ if (!err && !h_path.dentry->d_inode)
17837+ err = vfsub_link(h_dentry, h_dir, &h_path);
17838+ dput(h_path.dentry);
17839+
17840+ out:
17841+ return err;
17842+}
17843+
17844+struct do_whplink_args {
17845+ int *errp;
17846+ struct qstr *tgt;
17847+ struct dentry *h_parent;
17848+ struct dentry *h_dentry;
17849+ struct au_branch *br;
17850+};
17851+
17852+static void call_do_whplink(void *args)
17853+{
17854+ struct do_whplink_args *a = args;
17855+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
17856+}
17857+
17858+static int whplink(struct dentry *h_dentry, struct inode *inode,
17859+ aufs_bindex_t bindex, struct au_branch *br)
17860+{
17861+ int err, wkq_err;
17862+ struct au_wbr *wbr;
17863+ struct dentry *h_parent;
17864+ struct inode *h_dir;
17865+ char a[PLINK_NAME_LEN];
17866+ struct qstr tgtname = {
17867+ .name = a
17868+ };
17869+
17870+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
17871+ h_parent = wbr->wbr_plink;
17872+ h_dir = h_parent->d_inode;
17873+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
17874+
17875+ /* always superio. */
17876+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
17877+ if (!au_test_wkq(current)) {
17878+ struct do_whplink_args args = {
17879+ .errp = &err,
17880+ .tgt = &tgtname,
17881+ .h_parent = h_parent,
17882+ .h_dentry = h_dentry,
17883+ .br = br
17884+ };
17885+ wkq_err = au_wkq_wait(call_do_whplink, &args);
17886+ if (unlikely(wkq_err))
17887+ err = wkq_err;
17888+ } else
17889+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
17890+ mutex_unlock(&h_dir->i_mutex);
17891+
17892+ return err;
17893+}
17894+
17895+/* free a single plink */
17896+static void do_put_plink(struct pseudo_link *plink, int do_del)
17897+{
17898+ iput(plink->inode);
17899+ if (do_del)
17900+ list_del(&plink->list);
17901+ kfree(plink);
17902+}
17903+
17904+/*
17905+ * create a new pseudo-link for @h_dentry on @bindex.
17906+ * the linked inode is held in aufs @inode.
17907+ */
17908+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
17909+ struct dentry *h_dentry)
17910+{
17911+ struct super_block *sb;
17912+ struct au_sbinfo *sbinfo;
17913+ struct list_head *plink_list;
17914+ struct pseudo_link *plink;
17915+ int found, err, cnt;
17916+
17917+ sb = inode->i_sb;
17918+ sbinfo = au_sbi(sb);
17919+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
17920+
17921+ err = 0;
17922+ cnt = 0;
17923+ found = 0;
17924+ plink_list = &sbinfo->si_plink.head;
17925+ spin_lock(&sbinfo->si_plink.spin);
17926+ list_for_each_entry(plink, plink_list, list) {
17927+ cnt++;
17928+ if (plink->inode == inode) {
17929+ found = 1;
17930+ break;
17931+ }
17932+ }
17933+ if (found) {
17934+ spin_unlock(&sbinfo->si_plink.spin);
17935+ return;
17936+ }
17937+
17938+ plink = NULL;
17939+ if (!found) {
17940+ plink = kmalloc(sizeof(*plink), GFP_ATOMIC);
17941+ if (plink) {
17942+ plink->inode = au_igrab(inode);
17943+ list_add(&plink->list, plink_list);
17944+ cnt++;
17945+ } else
17946+ err = -ENOMEM;
17947+ }
17948+ spin_unlock(&sbinfo->si_plink.spin);
17949+
17950+ if (!err) {
17951+ au_plink_block_maintain(sb);
17952+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
17953+ }
17954+
17955+ if (unlikely(cnt > AUFS_PLINK_WARN))
17956+ AuWarn1("unexpectedly many pseudo links, %d\n", cnt);
17957+ if (unlikely(err)) {
17958+ AuWarn("err %d, damaged pseudo link.\n", err);
17959+ if (!found && plink)
17960+ do_put_plink(plink, /*do_del*/1);
17961+ }
17962+}
17963+
17964+/* free all plinks */
17965+void au_plink_put(struct super_block *sb)
17966+{
17967+ struct au_sbinfo *sbinfo;
17968+ struct list_head *plink_list;
17969+ struct pseudo_link *plink, *tmp;
17970+
dece6358
AM
17971+ SiMustWriteLock(sb);
17972+
1facf9fc 17973+ sbinfo = au_sbi(sb);
17974+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
17975+
17976+ plink_list = &sbinfo->si_plink.head;
17977+ /* no spin_lock since sbinfo is write-locked */
17978+ list_for_each_entry_safe(plink, tmp, plink_list, list)
17979+ do_put_plink(plink, 0);
17980+ INIT_LIST_HEAD(plink_list);
17981+}
17982+
17983+/* free the plinks on a branch specified by @br_id */
17984+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
17985+{
17986+ struct au_sbinfo *sbinfo;
17987+ struct list_head *plink_list;
17988+ struct pseudo_link *plink, *tmp;
17989+ struct inode *inode;
17990+ aufs_bindex_t bstart, bend, bindex;
17991+ unsigned char do_put;
17992+
dece6358
AM
17993+ SiMustWriteLock(sb);
17994+
1facf9fc 17995+ sbinfo = au_sbi(sb);
17996+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
17997+
17998+ plink_list = &sbinfo->si_plink.head;
17999+ /* no spin_lock since sbinfo is write-locked */
18000+ list_for_each_entry_safe(plink, tmp, plink_list, list) {
18001+ do_put = 0;
18002+ inode = au_igrab(plink->inode);
18003+ ii_write_lock_child(inode);
18004+ bstart = au_ibstart(inode);
18005+ bend = au_ibend(inode);
18006+ if (bstart >= 0) {
18007+ for (bindex = bstart; bindex <= bend; bindex++) {
18008+ if (!au_h_iptr(inode, bindex)
18009+ || au_ii_br_id(inode, bindex) != br_id)
18010+ continue;
18011+ au_set_h_iptr(inode, bindex, NULL, 0);
18012+ do_put = 1;
18013+ break;
18014+ }
18015+ } else
18016+ do_put_plink(plink, 1);
18017+
dece6358
AM
18018+ if (do_put) {
18019+ for (bindex = bstart; bindex <= bend; bindex++)
18020+ if (au_h_iptr(inode, bindex)) {
18021+ do_put = 0;
18022+ break;
18023+ }
18024+ if (do_put)
18025+ do_put_plink(plink, 1);
18026+ }
18027+ ii_write_unlock(inode);
18028+ iput(inode);
18029+ }
18030+}
1308ab2a 18031+
18032+/* ---------------------------------------------------------------------- */
18033+
18034+long au_plink_ioctl(struct file *file, unsigned int cmd)
18035+{
18036+ long err;
18037+ struct super_block *sb;
18038+ struct au_sbinfo *sbinfo;
18039+
18040+ err = -EACCES;
18041+ if (!capable(CAP_SYS_ADMIN))
18042+ goto out;
18043+
18044+ err = 0;
18045+ sb = file->f_dentry->d_sb;
18046+ sbinfo = au_sbi(sb);
18047+ switch (cmd) {
18048+ case AUFS_CTL_PLINK_MAINT:
18049+ /*
18050+ * pseudo-link maintenance mode,
18051+ * cleared by aufs_release_dir()
18052+ */
18053+ si_write_lock(sb);
18054+ if (!au_ftest_si(sbinfo, MAINTAIN_PLINK)) {
18055+ au_fset_si(sbinfo, MAINTAIN_PLINK);
18056+ au_fi(file)->fi_maintain_plink = 1;
18057+ } else
18058+ err = -EBUSY;
18059+ si_write_unlock(sb);
18060+ break;
18061+ case AUFS_CTL_PLINK_CLEAN:
18062+ aufs_write_lock(sb->s_root);
18063+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
18064+ au_plink_put(sb);
18065+ aufs_write_unlock(sb->s_root);
18066+ break;
18067+ default:
18068+ err = -EINVAL;
18069+ }
18070+ out:
18071+ return err;
18072+}
18073diff -uprN -x .git linux-2.6.31/fs/aufs/poll.c aufs2-2.6.git/fs/aufs/poll.c
18074--- linux-2.6.31/fs/aufs/poll.c 1970-01-01 00:00:00.000000000 +0000
18075+++ aufs2-2.6.git/fs/aufs/poll.c 2009-09-21 21:49:23.408274204 +0000
dece6358
AM
18076@@ -0,0 +1,56 @@
18077+/*
18078+ * Copyright (C) 2005-2009 Junjiro R. Okajima
18079+ *
18080+ * This program, aufs is free software; you can redistribute it and/or modify
18081+ * it under the terms of the GNU General Public License as published by
18082+ * the Free Software Foundation; either version 2 of the License, or
18083+ * (at your option) any later version.
18084+ *
18085+ * This program is distributed in the hope that it will be useful,
18086+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18087+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18088+ * GNU General Public License for more details.
18089+ *
18090+ * You should have received a copy of the GNU General Public License
18091+ * along with this program; if not, write to the Free Software
18092+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18093+ */
18094+
1308ab2a 18095+/*
18096+ * poll operation
18097+ * There is only one filesystem which implements ->poll operation, currently.
18098+ */
18099+
18100+#include "aufs.h"
18101+
18102+unsigned int aufs_poll(struct file *file, poll_table *wait)
18103+{
18104+ unsigned int mask;
18105+ int err;
18106+ struct file *h_file;
18107+ struct dentry *dentry;
18108+ struct super_block *sb;
18109+
18110+ /* We should pretend an error happened. */
18111+ mask = POLLERR /* | POLLIN | POLLOUT */;
18112+ dentry = file->f_dentry;
18113+ sb = dentry->d_sb;
18114+ si_read_lock(sb, AuLock_FLUSH);
18115+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
18116+ if (unlikely(err))
18117+ goto out;
18118+
18119+ /* it is not an error if h_file has no operation */
18120+ mask = DEFAULT_POLLMASK;
18121+ h_file = au_h_fptr(file, au_fbstart(file));
18122+ if (h_file->f_op && h_file->f_op->poll)
18123+ mask = h_file->f_op->poll(h_file, wait);
18124+
18125+ di_read_unlock(dentry, AuLock_IR);
18126+ fi_read_unlock(file);
18127+
18128+ out:
18129+ si_read_unlock(sb);
18130+ AuTraceErr((int)mask);
18131+ return mask;
18132+}
18133diff -uprN -x .git linux-2.6.31/fs/aufs/rdu.c aufs2-2.6.git/fs/aufs/rdu.c
18134--- linux-2.6.31/fs/aufs/rdu.c 1970-01-01 00:00:00.000000000 +0000
18135+++ aufs2-2.6.git/fs/aufs/rdu.c 2009-09-21 21:49:23.408274204 +0000
18136@@ -0,0 +1,331 @@
18137+/*
18138+ * Copyright (C) 2005-2009 Junjiro R. Okajima
18139+ *
18140+ * This program, aufs is free software; you can redistribute it and/or modify
18141+ * it under the terms of the GNU General Public License as published by
18142+ * the Free Software Foundation; either version 2 of the License, or
18143+ * (at your option) any later version.
18144+ *
18145+ * This program is distributed in the hope that it will be useful,
18146+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18147+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18148+ * GNU General Public License for more details.
18149+ *
18150+ * You should have received a copy of the GNU General Public License
18151+ * along with this program; if not, write to the Free Software
18152+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18153+ */
18154+
18155+/*
18156+ * readdir in userspace.
18157+ */
18158+
18159+#include <linux/security.h>
18160+#include <linux/uaccess.h>
18161+#include <linux/aufs_type.h>
18162+#include "aufs.h"
18163+
18164+/* bits for struct aufs_rdu.flags */
18165+#define AuRdu_CALLED 1
18166+#define AuRdu_CONT (1 << 1)
18167+#define AuRdu_FULL (1 << 2)
18168+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
18169+#define au_fset_rdu(flags, name) { (flags) |= AuRdu_##name; }
18170+#define au_fclr_rdu(flags, name) { (flags) &= ~AuRdu_##name; }
18171+
18172+struct au_rdu_arg {
18173+ struct aufs_rdu *rdu;
18174+ union au_rdu_ent_ul ent;
18175+ unsigned long end;
18176+
18177+ struct super_block *sb;
18178+ int err;
18179+};
18180+
18181+static int au_rdu_fill(void *__arg, const char *name, int nlen,
18182+ loff_t offset, u64 h_ino, unsigned int d_type)
18183+{
18184+ int err, len;
18185+ struct au_rdu_arg *arg = __arg;
18186+ struct aufs_rdu *rdu = arg->rdu;
18187+ struct au_rdu_ent ent;
18188+
18189+ err = 0;
18190+ arg->err = 0;
18191+ au_fset_rdu(rdu->cookie.flags, CALLED);
18192+ len = au_rdu_len(nlen);
18193+ if (arg->ent.ul + len < arg->end) {
18194+ ent.ino = h_ino;
18195+ ent.bindex = rdu->cookie.bindex;
18196+ ent.type = d_type;
18197+ ent.nlen = nlen;
18198+
18199+ err = -EFAULT;
18200+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
18201+ goto out;
18202+ if (copy_to_user(arg->ent.e->name, name, nlen))
18203+ goto out;
18204+ /* the terminating NULL */
18205+ if (__put_user(0, arg->ent.e->name + nlen))
18206+ goto out;
18207+ err = 0;
18208+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
18209+ arg->ent.ul += len;
18210+ rdu->rent++;
18211+ } else {
18212+ err = -EFAULT;
18213+ au_fset_rdu(rdu->cookie.flags, FULL);
18214+ rdu->full = 1;
18215+ rdu->tail = arg->ent;
18216+ }
18217+
18218+ out:
18219+ /* AuTraceErr(err); */
18220+ return err;
18221+}
18222+
18223+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
18224+{
18225+ int err;
18226+ loff_t offset;
18227+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
18228+
18229+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
18230+ err = offset;
18231+ if (unlikely(offset != cookie->h_pos))
18232+ goto out;
18233+
18234+ err = 0;
18235+ do {
18236+ arg->err = 0;
18237+ au_fclr_rdu(cookie->flags, CALLED);
18238+ /* smp_mb(); */
18239+ err = vfsub_readdir(h_file, au_rdu_fill, arg);
18240+ if (err >= 0)
18241+ err = arg->err;
18242+ } while (!err
18243+ && au_ftest_rdu(cookie->flags, CALLED)
18244+ && !au_ftest_rdu(cookie->flags, FULL));
18245+ cookie->h_pos = h_file->f_pos;
18246+
18247+ out:
18248+ AuTraceErr(err);
18249+ return err;
18250+}
18251+
18252+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
18253+{
18254+ int err;
18255+ aufs_bindex_t bend;
18256+ struct au_rdu_arg arg;
18257+ struct dentry *dentry;
18258+ struct inode *inode;
18259+ struct file *h_file;
18260+ struct au_rdu_cookie *cookie = &rdu->cookie;
18261+
18262+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
18263+ if (unlikely(err)) {
18264+ err = -EFAULT;
18265+ AuTraceErr(err);
18266+ goto out;
18267+ }
18268+ rdu->rent = 0;
18269+ rdu->tail = rdu->ent;
18270+ rdu->full = 0;
18271+ arg.rdu = rdu;
18272+ arg.ent = rdu->ent;
18273+ arg.end = arg.ent.ul;
18274+ arg.end += rdu->sz;
18275+
18276+ err = -ENOTDIR;
18277+ if (unlikely(!file->f_op || !file->f_op->readdir))
18278+ goto out;
18279+
18280+ err = security_file_permission(file, MAY_READ);
18281+ AuTraceErr(err);
18282+ if (unlikely(err))
18283+ goto out;
18284+
18285+ dentry = file->f_dentry;
18286+ inode = dentry->d_inode;
18287+#if 1
18288+ mutex_lock(&inode->i_mutex);
18289+#else
18290+ err = mutex_lock_killable(&inode->i_mutex);
18291+ AuTraceErr(err);
18292+ if (unlikely(err))
18293+ goto out;
18294+#endif
18295+ err = -ENOENT;
18296+ if (unlikely(IS_DEADDIR(inode)))
18297+ goto out_mtx;
18298+
18299+ arg.sb = inode->i_sb;
18300+ si_read_lock(arg.sb, AuLock_FLUSH);
18301+ fi_read_lock(file);
18302+
18303+ err = -EAGAIN;
18304+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
18305+ && cookie->generation != au_figen(file)))
18306+ goto out_unlock;
18307+
18308+ err = 0;
18309+ if (!rdu->blk) {
18310+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
18311+ if (!rdu->blk)
18312+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
18313+ }
18314+ bend = au_fbstart(file);
18315+ if (cookie->bindex < bend)
18316+ cookie->bindex = bend;
18317+ bend = au_fbend(file);
18318+ /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
18319+ for (; !err && cookie->bindex <= bend;
18320+ cookie->bindex++, cookie->h_pos = 0) {
18321+ h_file = au_h_fptr(file, cookie->bindex);
18322+ if (!h_file)
18323+ continue;
18324+
18325+ au_fclr_rdu(cookie->flags, FULL);
18326+ err = au_rdu_do(h_file, &arg);
18327+ AuTraceErr(err);
18328+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
18329+ break;
18330+ }
18331+ AuDbg("rent %llu\n", rdu->rent);
18332+
18333+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
18334+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
18335+ au_fset_rdu(cookie->flags, CONT);
18336+ cookie->generation = au_figen(file);
18337+ }
18338+
18339+ ii_read_lock_child(inode);
18340+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
18341+ ii_read_unlock(inode);
18342+
18343+ out_unlock:
18344+ fi_read_unlock(file);
18345+ si_read_unlock(arg.sb);
18346+ out_mtx:
18347+ mutex_unlock(&inode->i_mutex);
18348+ out:
18349+ AuTraceErr(err);
18350+ return err;
18351+}
18352+
18353+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
18354+{
18355+ int err;
18356+ ino_t ino;
18357+ unsigned long long nent;
18358+ union au_rdu_ent_ul *u;
18359+ struct au_rdu_ent ent;
18360+ struct super_block *sb;
18361+
18362+ err = 0;
18363+ nent = rdu->nent;
18364+ u = &rdu->ent;
18365+ sb = file->f_dentry->d_sb;
18366+ si_read_lock(sb, AuLock_FLUSH);
18367+ while (nent-- > 0) {
18368+ err = !access_ok(VERIFY_WRITE, u->e, sizeof(ent));
18369+ if (unlikely(err)) {
18370+ err = -EFAULT;
18371+ AuTraceErr(err);
18372+ break;
18373+ }
18374+
18375+ err = copy_from_user(&ent, u->e, sizeof(ent));
18376+ if (unlikely(err)) {
18377+ err = -EFAULT;
18378+ AuTraceErr(err);
18379+ break;
18380+ }
18381+
18382+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
18383+ if (!ent.wh)
18384+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
18385+ else
18386+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
18387+ &ino);
18388+ if (unlikely(err)) {
18389+ AuTraceErr(err);
18390+ break;
18391+ }
18392+
18393+ err = __put_user(ino, &u->e->ino);
18394+ if (unlikely(err)) {
18395+ err = -EFAULT;
18396+ AuTraceErr(err);
18397+ break;
18398+ }
18399+ u->ul += au_rdu_len(ent.nlen);
18400+ }
18401+ si_read_unlock(sb);
18402+
18403+ return err;
18404+}
18405+
18406+/* ---------------------------------------------------------------------- */
18407+
18408+static int au_rdu_verify(struct aufs_rdu *rdu)
18409+{
18410+ AuDbg("rdu{%llu, %p, (%u, %u) | %u | %llu, %u, %u | "
18411+ "%llu, b%d, 0x%x, g%u}\n",
18412+ rdu->sz, rdu->ent.e, rdu->verify[0], rdu->verify[1],
18413+ rdu->blk,
18414+ rdu->rent, rdu->shwh, rdu->full,
18415+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
18416+ rdu->cookie.generation);
dece6358 18417+
1308ab2a 18418+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu)
18419+ && rdu->verify[AufsCtlRduV_SZ_PTR] == sizeof(rdu))
18420+ return 0;
dece6358 18421+
1308ab2a 18422+ AuDbg("%u:%u, %u:%u\n",
18423+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu),
18424+ rdu->verify[AufsCtlRduV_SZ_PTR], (unsigned int)sizeof(rdu));
18425+ return -EINVAL;
18426+}
18427+
18428+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 18429+{
1308ab2a 18430+ long err, e;
18431+ struct aufs_rdu rdu;
18432+ void __user *p = (void __user *)arg;
dece6358 18433+
1308ab2a 18434+ err = copy_from_user(&rdu, p, sizeof(rdu));
18435+ if (unlikely(err)) {
18436+ err = -EFAULT;
18437+ AuTraceErr(err);
18438+ goto out;
18439+ }
18440+ err = au_rdu_verify(&rdu);
dece6358
AM
18441+ if (unlikely(err))
18442+ goto out;
18443+
1308ab2a 18444+ switch (cmd) {
18445+ case AUFS_CTL_RDU:
18446+ err = au_rdu(file, &rdu);
18447+ if (unlikely(err))
18448+ break;
dece6358 18449+
1308ab2a 18450+ e = copy_to_user(p, &rdu, sizeof(rdu));
18451+ if (unlikely(e)) {
18452+ err = -EFAULT;
18453+ AuTraceErr(err);
18454+ }
18455+ break;
18456+ case AUFS_CTL_RDU_INO:
18457+ err = au_rdu_ino(file, &rdu);
18458+ break;
18459+
18460+ default:
18461+ err = -EINVAL;
18462+ }
dece6358
AM
18463+
18464+ out:
1308ab2a 18465+ AuTraceErr(err);
18466+ return err;
1facf9fc 18467+}
1308ab2a 18468diff -uprN -x .git linux-2.6.31/fs/aufs/rwsem.h aufs2-2.6.git/fs/aufs/rwsem.h
18469--- linux-2.6.31/fs/aufs/rwsem.h 1970-01-01 00:00:00.000000000 +0000
18470+++ aufs2-2.6.git/fs/aufs/rwsem.h 2009-09-21 21:49:23.408274204 +0000
dece6358 18471@@ -0,0 +1,186 @@
1facf9fc 18472+/*
18473+ * Copyright (C) 2005-2009 Junjiro R. Okajima
18474+ *
18475+ * This program, aufs is free software; you can redistribute it and/or modify
18476+ * it under the terms of the GNU General Public License as published by
18477+ * the Free Software Foundation; either version 2 of the License, or
18478+ * (at your option) any later version.
dece6358
AM
18479+ *
18480+ * This program is distributed in the hope that it will be useful,
18481+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18482+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18483+ * GNU General Public License for more details.
18484+ *
18485+ * You should have received a copy of the GNU General Public License
18486+ * along with this program; if not, write to the Free Software
18487+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18488+ */
18489+
18490+/*
18491+ * simple read-write semaphore wrappers
18492+ */
18493+
18494+#ifndef __AUFS_RWSEM_H__
18495+#define __AUFS_RWSEM_H__
18496+
18497+#ifdef __KERNEL__
18498+
dece6358
AM
18499+#include <linux/rwsem.h>
18500+
18501+struct au_rwsem {
18502+ struct rw_semaphore rwsem;
18503+#ifdef CONFIG_AUFS_DEBUG
18504+ /* just for debugging, not almighty counter */
18505+ atomic_t rcnt, wcnt;
18506+#endif
18507+};
18508+
18509+#ifdef CONFIG_AUFS_DEBUG
18510+#define AuDbgCntInit(rw) do { \
18511+ atomic_set(&(rw)->rcnt, 0); \
18512+ atomic_set(&(rw)->wcnt, 0); \
18513+ smp_mb(); /* atomic set */ \
18514+} while (0)
18515+
18516+#define AuDbgRcntInc(rw) atomic_inc_return(&(rw)->rcnt)
18517+#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
18518+#define AuDbgWcntInc(rw) WARN_ON(atomic_inc_return(&(rw)->wcnt) > 1)
18519+#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
18520+#else
18521+#define AuDbgCntInit(rw) do {} while (0)
18522+#define AuDbgRcntInc(rw) do {} while (0)
18523+#define AuDbgRcntDec(rw) do {} while (0)
18524+#define AuDbgWcntInc(rw) do {} while (0)
18525+#define AuDbgWcntDec(rw) do {} while (0)
18526+#endif /* CONFIG_AUFS_DEBUG */
18527+
18528+/* to debug easier, do not make them inlined functions */
18529+#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
18530+/* rwsem_is_locked() is unusable */
18531+#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
18532+#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
18533+#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
18534+ && atomic_read(&(rw)->wcnt) <= 0)
18535+#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \
18536+ || atomic_read(&(rw)->wcnt))
18537+
18538+static inline void au_rw_init(struct au_rwsem *rw)
18539+{
18540+ AuDbgCntInit(rw);
18541+ init_rwsem(&rw->rwsem);
18542+}
18543+
18544+static inline void au_rw_init_wlock(struct au_rwsem *rw)
18545+{
18546+ au_rw_init(rw);
18547+ down_write(&rw->rwsem);
18548+ AuDbgWcntInc(rw);
18549+}
18550+
18551+static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
18552+ unsigned int lsc)
18553+{
18554+ au_rw_init(rw);
18555+ down_write_nested(&rw->rwsem, lsc);
18556+ AuDbgWcntInc(rw);
18557+}
18558+
18559+static inline void au_rw_read_lock(struct au_rwsem *rw)
18560+{
18561+ down_read(&rw->rwsem);
18562+ AuDbgRcntInc(rw);
18563+}
18564+
18565+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
18566+{
18567+ down_read_nested(&rw->rwsem, lsc);
18568+ AuDbgRcntInc(rw);
18569+}
18570+
18571+static inline void au_rw_read_unlock(struct au_rwsem *rw)
18572+{
18573+ AuRwMustReadLock(rw);
18574+ AuDbgRcntDec(rw);
18575+ up_read(&rw->rwsem);
18576+}
18577+
18578+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
18579+{
18580+ AuRwMustWriteLock(rw);
18581+ AuDbgRcntInc(rw);
18582+ AuDbgWcntDec(rw);
18583+ downgrade_write(&rw->rwsem);
18584+}
18585+
18586+static inline void au_rw_write_lock(struct au_rwsem *rw)
18587+{
18588+ down_write(&rw->rwsem);
18589+ AuDbgWcntInc(rw);
18590+}
18591+
18592+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
18593+ unsigned int lsc)
18594+{
18595+ down_write_nested(&rw->rwsem, lsc);
18596+ AuDbgWcntInc(rw);
18597+}
1facf9fc 18598+
dece6358
AM
18599+static inline void au_rw_write_unlock(struct au_rwsem *rw)
18600+{
18601+ AuRwMustWriteLock(rw);
18602+ AuDbgWcntDec(rw);
18603+ up_write(&rw->rwsem);
18604+}
18605+
18606+/* why is not _nested version defined */
18607+static inline int au_rw_read_trylock(struct au_rwsem *rw)
18608+{
18609+ int ret = down_read_trylock(&rw->rwsem);
18610+ if (ret)
18611+ AuDbgRcntInc(rw);
18612+ return ret;
18613+}
18614+
18615+static inline int au_rw_write_trylock(struct au_rwsem *rw)
18616+{
18617+ int ret = down_write_trylock(&rw->rwsem);
18618+ if (ret)
18619+ AuDbgWcntInc(rw);
18620+ return ret;
18621+}
18622+
18623+#undef AuDbgCntInit
18624+#undef AuDbgRcntInc
18625+#undef AuDbgRcntDec
18626+#undef AuDbgWcntInc
18627+#undef AuDbgWcntDec
1facf9fc 18628+
18629+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
18630+static inline void prefix##_read_lock(param) \
dece6358 18631+{ au_rw_read_lock(rwsem); } \
1facf9fc 18632+static inline void prefix##_write_lock(param) \
dece6358 18633+{ au_rw_write_lock(rwsem); } \
1facf9fc 18634+static inline int prefix##_read_trylock(param) \
dece6358 18635+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 18636+static inline int prefix##_write_trylock(param) \
dece6358 18637+{ return au_rw_write_trylock(rwsem); }
1facf9fc 18638+/* why is not _nested version defined */
18639+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 18640+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 18641+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 18642+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 18643+
18644+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
18645+static inline void prefix##_read_unlock(param) \
dece6358 18646+{ au_rw_read_unlock(rwsem); } \
1facf9fc 18647+static inline void prefix##_write_unlock(param) \
dece6358 18648+{ au_rw_write_unlock(rwsem); } \
1facf9fc 18649+static inline void prefix##_downgrade_lock(param) \
dece6358 18650+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 18651+
18652+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
18653+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
18654+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
18655+
18656+#endif /* __KERNEL__ */
18657+#endif /* __AUFS_RWSEM_H__ */
1308ab2a 18658diff -uprN -x .git linux-2.6.31/fs/aufs/sbinfo.c aufs2-2.6.git/fs/aufs/sbinfo.c
18659--- linux-2.6.31/fs/aufs/sbinfo.c 1970-01-01 00:00:00.000000000 +0000
18660+++ aufs2-2.6.git/fs/aufs/sbinfo.c 2009-09-21 21:49:23.408274204 +0000
dece6358 18661@@ -0,0 +1,208 @@
1facf9fc 18662+/*
18663+ * Copyright (C) 2005-2009 Junjiro R. Okajima
18664+ *
18665+ * This program, aufs is free software; you can redistribute it and/or modify
18666+ * it under the terms of the GNU General Public License as published by
18667+ * the Free Software Foundation; either version 2 of the License, or
18668+ * (at your option) any later version.
dece6358
AM
18669+ *
18670+ * This program is distributed in the hope that it will be useful,
18671+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18672+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18673+ * GNU General Public License for more details.
18674+ *
18675+ * You should have received a copy of the GNU General Public License
18676+ * along with this program; if not, write to the Free Software
18677+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18678+ */
18679+
18680+/*
18681+ * superblock private data
18682+ */
18683+
18684+#include "aufs.h"
18685+
18686+/*
18687+ * they are necessary regardless sysfs is disabled.
18688+ */
18689+void au_si_free(struct kobject *kobj)
18690+{
18691+ struct au_sbinfo *sbinfo;
18692+ struct super_block *sb;
18693+
18694+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
18695+ AuDebugOn(!list_empty(&sbinfo->si_plink.head));
18696+
18697+ sb = sbinfo->si_sb;
18698+ si_write_lock(sb);
18699+ au_xino_clr(sb);
18700+ au_br_free(sbinfo);
18701+ kfree(sbinfo->si_branch);
18702+ mutex_destroy(&sbinfo->si_xib_mtx);
18703+ si_write_unlock(sb);
dece6358 18704+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 18705+
18706+ kfree(sbinfo);
18707+}
18708+
18709+int au_si_alloc(struct super_block *sb)
18710+{
18711+ int err;
18712+ struct au_sbinfo *sbinfo;
18713+
18714+ err = -ENOMEM;
18715+ sbinfo = kmalloc(sizeof(*sbinfo), GFP_NOFS);
18716+ if (unlikely(!sbinfo))
18717+ goto out;
18718+
18719+ /* will be reallocated separately */
18720+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
18721+ if (unlikely(!sbinfo->si_branch))
18722+ goto out_sbinfo;
18723+
18724+ memset(&sbinfo->si_kobj, 0, sizeof(sbinfo->si_kobj));
18725+ err = sysaufs_si_init(sbinfo);
18726+ if (unlikely(err))
18727+ goto out_br;
18728+
18729+ au_nwt_init(&sbinfo->si_nowait);
dece6358 18730+ au_rw_init_wlock(&sbinfo->si_rwsem);
1facf9fc 18731+ sbinfo->si_generation = 0;
18732+ sbinfo->au_si_status = 0;
18733+ sbinfo->si_bend = -1;
18734+ sbinfo->si_last_br_id = 0;
18735+
18736+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
18737+ sbinfo->si_wbr_create = AuWbrCreate_Def;
18738+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + AuWbrCopyup_Def;
18739+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + AuWbrCreate_Def;
18740+
18741+ sbinfo->si_mntflags = AuOpt_Def;
18742+
18743+ sbinfo->si_xread = NULL;
18744+ sbinfo->si_xwrite = NULL;
18745+ sbinfo->si_xib = NULL;
18746+ mutex_init(&sbinfo->si_xib_mtx);
18747+ sbinfo->si_xib_buf = NULL;
18748+ sbinfo->si_xino_brid = -1;
18749+ /* leave si_xib_last_pindex and si_xib_next_bit */
18750+
18751+ sbinfo->si_rdcache = AUFS_RDCACHE_DEF * HZ;
18752+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
18753+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
18754+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
18755+
18756+ au_spl_init(&sbinfo->si_plink);
18757+ init_waitqueue_head(&sbinfo->si_plink_wq);
18758+
18759+ /* leave other members for sysaufs and si_mnt. */
18760+ sbinfo->si_sb = sb;
18761+ sb->s_fs_info = sbinfo;
18762+ au_debug_sbinfo_init(sbinfo);
18763+ return 0; /* success */
18764+
18765+ out_br:
18766+ kfree(sbinfo->si_branch);
18767+ out_sbinfo:
18768+ kfree(sbinfo);
18769+ out:
18770+ return err;
18771+}
18772+
18773+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
18774+{
18775+ int err, sz;
18776+ struct au_branch **brp;
18777+
dece6358
AM
18778+ AuRwMustWriteLock(&sbinfo->si_rwsem);
18779+
1facf9fc 18780+ err = -ENOMEM;
18781+ sz = sizeof(*brp) * (sbinfo->si_bend + 1);
18782+ if (unlikely(!sz))
18783+ sz = sizeof(*brp);
18784+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
18785+ if (brp) {
18786+ sbinfo->si_branch = brp;
18787+ err = 0;
18788+ }
18789+
18790+ return err;
18791+}
18792+
18793+/* ---------------------------------------------------------------------- */
18794+
18795+unsigned int au_sigen_inc(struct super_block *sb)
18796+{
18797+ unsigned int gen;
18798+
dece6358
AM
18799+ SiMustWriteLock(sb);
18800+
1facf9fc 18801+ gen = ++au_sbi(sb)->si_generation;
18802+ au_update_digen(sb->s_root);
18803+ au_update_iigen(sb->s_root->d_inode);
18804+ sb->s_root->d_inode->i_version++;
18805+ return gen;
18806+}
18807+
18808+aufs_bindex_t au_new_br_id(struct super_block *sb)
18809+{
18810+ aufs_bindex_t br_id;
18811+ int i;
18812+ struct au_sbinfo *sbinfo;
18813+
dece6358
AM
18814+ SiMustWriteLock(sb);
18815+
1facf9fc 18816+ sbinfo = au_sbi(sb);
18817+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
18818+ br_id = ++sbinfo->si_last_br_id;
18819+ if (br_id && au_br_index(sb, br_id) < 0)
18820+ return br_id;
18821+ }
18822+
18823+ return -1;
18824+}
18825+
18826+/* ---------------------------------------------------------------------- */
18827+
18828+/* dentry and super_block lock. call at entry point */
18829+void aufs_read_lock(struct dentry *dentry, int flags)
18830+{
18831+ si_read_lock(dentry->d_sb, flags);
18832+ if (au_ftest_lock(flags, DW))
18833+ di_write_lock_child(dentry);
18834+ else
18835+ di_read_lock_child(dentry, flags);
18836+}
18837+
18838+void aufs_read_unlock(struct dentry *dentry, int flags)
18839+{
18840+ if (au_ftest_lock(flags, DW))
18841+ di_write_unlock(dentry);
18842+ else
18843+ di_read_unlock(dentry, flags);
18844+ si_read_unlock(dentry->d_sb);
18845+}
18846+
18847+void aufs_write_lock(struct dentry *dentry)
18848+{
18849+ si_write_lock(dentry->d_sb);
18850+ di_write_lock_child(dentry);
18851+}
18852+
18853+void aufs_write_unlock(struct dentry *dentry)
18854+{
18855+ di_write_unlock(dentry);
18856+ si_write_unlock(dentry->d_sb);
18857+}
18858+
18859+void aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
18860+{
18861+ si_read_lock(d1->d_sb, flags);
18862+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIR));
18863+}
18864+
18865+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
18866+{
18867+ di_write_unlock2(d1, d2);
18868+ si_read_unlock(d1->d_sb);
18869+}
1308ab2a 18870diff -uprN -x .git linux-2.6.31/fs/aufs/spl.h aufs2-2.6.git/fs/aufs/spl.h
18871--- linux-2.6.31/fs/aufs/spl.h 1970-01-01 00:00:00.000000000 +0000
18872+++ aufs2-2.6.git/fs/aufs/spl.h 2009-09-21 21:49:23.408274204 +0000
dece6358 18873@@ -0,0 +1,57 @@
1facf9fc 18874+/*
18875+ * Copyright (C) 2005-2009 Junjiro R. Okajima
18876+ *
18877+ * This program, aufs is free software; you can redistribute it and/or modify
18878+ * it under the terms of the GNU General Public License as published by
18879+ * the Free Software Foundation; either version 2 of the License, or
18880+ * (at your option) any later version.
dece6358
AM
18881+ *
18882+ * This program is distributed in the hope that it will be useful,
18883+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18884+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18885+ * GNU General Public License for more details.
18886+ *
18887+ * You should have received a copy of the GNU General Public License
18888+ * along with this program; if not, write to the Free Software
18889+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18890+ */
18891+
18892+/*
18893+ * simple list protected by a spinlock
18894+ */
18895+
18896+#ifndef __AUFS_SPL_H__
18897+#define __AUFS_SPL_H__
18898+
18899+#ifdef __KERNEL__
18900+
dece6358
AM
18901+#include <linux/spinlock.h>
18902+#include <linux/list.h>
1facf9fc 18903+
18904+struct au_splhead {
18905+ spinlock_t spin;
18906+ struct list_head head;
18907+};
18908+
18909+static inline void au_spl_init(struct au_splhead *spl)
18910+{
18911+ spin_lock_init(&spl->spin);
18912+ INIT_LIST_HEAD(&spl->head);
18913+}
18914+
18915+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
18916+{
18917+ spin_lock(&spl->spin);
18918+ list_add(list, &spl->head);
18919+ spin_unlock(&spl->spin);
18920+}
18921+
18922+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
18923+{
18924+ spin_lock(&spl->spin);
18925+ list_del(list);
18926+ spin_unlock(&spl->spin);
18927+}
18928+
18929+#endif /* __KERNEL__ */
18930+#endif /* __AUFS_SPL_H__ */
1308ab2a 18931diff -uprN -x .git linux-2.6.31/fs/aufs/super.c aufs2-2.6.git/fs/aufs/super.c
18932--- linux-2.6.31/fs/aufs/super.c 1970-01-01 00:00:00.000000000 +0000
18933+++ aufs2-2.6.git/fs/aufs/super.c 2009-09-21 21:49:23.408274204 +0000
dece6358 18934@@ -0,0 +1,874 @@
1facf9fc 18935+/*
18936+ * Copyright (C) 2005-2009 Junjiro R. Okajima
18937+ *
18938+ * This program, aufs is free software; you can redistribute it and/or modify
18939+ * it under the terms of the GNU General Public License as published by
18940+ * the Free Software Foundation; either version 2 of the License, or
18941+ * (at your option) any later version.
dece6358
AM
18942+ *
18943+ * This program is distributed in the hope that it will be useful,
18944+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18945+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18946+ * GNU General Public License for more details.
18947+ *
18948+ * You should have received a copy of the GNU General Public License
18949+ * along with this program; if not, write to the Free Software
18950+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 18951+ */
18952+
18953+/*
18954+ * mount and super_block operations
18955+ */
18956+
18957+#include <linux/buffer_head.h>
dece6358 18958+#include <linux/module.h>
1facf9fc 18959+#include <linux/seq_file.h>
18960+#include <linux/statfs.h>
18961+#include "aufs.h"
18962+
18963+/*
18964+ * super_operations
18965+ */
18966+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
18967+{
18968+ struct au_icntnr *c;
18969+
18970+ c = au_cache_alloc_icntnr();
18971+ if (c) {
18972+ inode_init_once(&c->vfs_inode);
18973+ c->vfs_inode.i_version = 1; /* sigen(sb); */
18974+ c->iinfo.ii_hinode = NULL;
18975+ return &c->vfs_inode;
18976+ }
18977+ return NULL;
18978+}
18979+
18980+static void aufs_destroy_inode(struct inode *inode)
18981+{
18982+ au_iinfo_fin(inode);
18983+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
18984+}
18985+
18986+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
18987+{
18988+ struct inode *inode;
18989+ int err;
18990+
18991+ inode = iget_locked(sb, ino);
18992+ if (unlikely(!inode)) {
18993+ inode = ERR_PTR(-ENOMEM);
18994+ goto out;
18995+ }
18996+ if (!(inode->i_state & I_NEW))
18997+ goto out;
18998+
18999+ err = au_xigen_new(inode);
19000+ if (!err)
19001+ err = au_iinfo_init(inode);
19002+ if (!err)
19003+ inode->i_version++;
19004+ else {
19005+ iget_failed(inode);
19006+ inode = ERR_PTR(err);
19007+ }
19008+
19009+ out:
19010+ /* never return NULL */
19011+ AuDebugOn(!inode);
19012+ AuTraceErrPtr(inode);
19013+ return inode;
19014+}
19015+
19016+/* lock free root dinfo */
19017+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
19018+{
19019+ int err;
19020+ aufs_bindex_t bindex, bend;
19021+ struct path path;
19022+ struct au_hdentry *hd;
19023+ struct au_branch *br;
19024+
19025+ err = 0;
19026+ bend = au_sbend(sb);
19027+ hd = au_di(sb->s_root)->di_hdentry;
19028+ for (bindex = 0; !err && bindex <= bend; bindex++) {
19029+ br = au_sbr(sb, bindex);
19030+ path.mnt = br->br_mnt;
19031+ path.dentry = hd[bindex].hd_dentry;
19032+ err = au_seq_path(seq, &path);
19033+ if (err > 0)
19034+ err = seq_printf(seq, "=%s",
19035+ au_optstr_br_perm(br->br_perm));
19036+ if (!err && bindex != bend)
19037+ err = seq_putc(seq, ':');
19038+ }
19039+
19040+ return err;
19041+}
19042+
19043+static void au_show_wbr_create(struct seq_file *m, int v,
19044+ struct au_sbinfo *sbinfo)
19045+{
19046+ const char *pat;
19047+
dece6358
AM
19048+ AuRwMustAnyLock(&sbinfo->si_rwsem);
19049+
1facf9fc 19050+ seq_printf(m, ",create=");
19051+ pat = au_optstr_wbr_create(v);
19052+ switch (v) {
19053+ case AuWbrCreate_TDP:
19054+ case AuWbrCreate_RR:
19055+ case AuWbrCreate_MFS:
19056+ case AuWbrCreate_PMFS:
19057+ seq_printf(m, pat);
19058+ break;
19059+ case AuWbrCreate_MFSV:
19060+ seq_printf(m, /*pat*/"mfs:%lu",
19061+ sbinfo->si_wbr_mfs.mfs_expire / HZ);
19062+ break;
19063+ case AuWbrCreate_PMFSV:
19064+ seq_printf(m, /*pat*/"pmfs:%lu",
19065+ sbinfo->si_wbr_mfs.mfs_expire / HZ);
19066+ break;
19067+ case AuWbrCreate_MFSRR:
19068+ seq_printf(m, /*pat*/"mfsrr:%llu",
19069+ sbinfo->si_wbr_mfs.mfsrr_watermark);
19070+ break;
19071+ case AuWbrCreate_MFSRRV:
19072+ seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
19073+ sbinfo->si_wbr_mfs.mfsrr_watermark,
19074+ sbinfo->si_wbr_mfs.mfs_expire / HZ);
19075+ break;
19076+ }
19077+}
19078+
19079+static int au_show_xino(struct seq_file *seq, struct vfsmount *mnt)
19080+{
19081+#ifdef CONFIG_SYSFS
19082+ return 0;
19083+#else
19084+ int err;
19085+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
19086+ aufs_bindex_t bindex, brid;
19087+ struct super_block *sb;
19088+ struct qstr *name;
19089+ struct file *f;
19090+ struct dentry *d, *h_root;
19091+
dece6358
AM
19092+ AuRwMustAnyLock(&sbinfo->si_rwsem);
19093+
1facf9fc 19094+ err = 0;
19095+ sb = mnt->mnt_sb;
19096+ f = au_sbi(sb)->si_xib;
19097+ if (!f)
19098+ goto out;
19099+
19100+ /* stop printing the default xino path on the first writable branch */
19101+ h_root = NULL;
19102+ brid = au_xino_brid(sb);
19103+ if (brid >= 0) {
19104+ bindex = au_br_index(sb, brid);
19105+ h_root = au_di(sb->s_root)->di_hdentry[0 + bindex].hd_dentry;
19106+ }
19107+ d = f->f_dentry;
19108+ name = &d->d_name;
19109+ /* safe ->d_parent because the file is unlinked */
19110+ if (d->d_parent == h_root
19111+ && name->len == len
19112+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
19113+ goto out;
19114+
19115+ seq_puts(seq, ",xino=");
19116+ err = au_xino_path(seq, f);
19117+
19118+ out:
19119+ return err;
19120+#endif
19121+}
19122+
19123+/* seq_file will re-call me in case of too long string */
19124+static int aufs_show_options(struct seq_file *m, struct vfsmount *mnt)
19125+{
19126+ int err, n;
19127+ unsigned int mnt_flags, v;
19128+ struct super_block *sb;
19129+ struct au_sbinfo *sbinfo;
19130+
19131+#define AuBool(name, str) do { \
19132+ v = au_opt_test(mnt_flags, name); \
19133+ if (v != au_opt_test(AuOpt_Def, name)) \
19134+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
19135+} while (0)
19136+
19137+#define AuStr(name, str) do { \
19138+ v = mnt_flags & AuOptMask_##name; \
19139+ if (v != (AuOpt_Def & AuOptMask_##name)) \
19140+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
19141+} while (0)
19142+
19143+#define AuUInt(name, str, val) do { \
19144+ if (val != AUFS_##name##_DEF) \
19145+ seq_printf(m, "," #str "=%u", val); \
19146+} while (0)
19147+
19148+ /* lock free root dinfo */
19149+ sb = mnt->mnt_sb;
19150+ si_noflush_read_lock(sb);
19151+ sbinfo = au_sbi(sb);
19152+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
19153+
19154+ mnt_flags = au_mntflags(sb);
19155+ if (au_opt_test(mnt_flags, XINO)) {
19156+ err = au_show_xino(m, mnt);
19157+ if (unlikely(err))
19158+ goto out;
19159+ } else
19160+ seq_puts(m, ",noxino");
19161+
19162+ AuBool(TRUNC_XINO, trunc_xino);
19163+ AuStr(UDBA, udba);
dece6358 19164+ AuBool(SHWH, shwh);
1facf9fc 19165+ AuBool(PLINK, plink);
19166+ /* AuBool(DIRPERM1, dirperm1); */
19167+ /* AuBool(REFROF, refrof); */
19168+
19169+ v = sbinfo->si_wbr_create;
19170+ if (v != AuWbrCreate_Def)
19171+ au_show_wbr_create(m, v, sbinfo);
19172+
19173+ v = sbinfo->si_wbr_copyup;
19174+ if (v != AuWbrCopyup_Def)
19175+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
19176+
19177+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
19178+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
19179+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
19180+
19181+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
19182+
19183+ n = sbinfo->si_rdcache / HZ;
19184+ AuUInt(RDCACHE, rdcache, n);
19185+
19186+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
19187+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
19188+
19189+ AuBool(SUM, sum);
19190+ /* AuBool(SUM_W, wsum); */
19191+ AuBool(WARN_PERM, warn_perm);
19192+ AuBool(VERBOSE, verbose);
19193+
19194+ out:
19195+ /* be sure to print "br:" last */
19196+ if (!sysaufs_brs) {
19197+ seq_puts(m, ",br:");
19198+ au_show_brs(m, sb);
19199+ }
19200+ si_read_unlock(sb);
19201+ return 0;
19202+
19203+#undef Deleted
19204+#undef AuBool
19205+#undef AuStr
19206+}
19207+
19208+/* ---------------------------------------------------------------------- */
19209+
19210+/* sum mode which returns the summation for statfs(2) */
19211+
19212+static u64 au_add_till_max(u64 a, u64 b)
19213+{
19214+ u64 old;
19215+
19216+ old = a;
19217+ a += b;
19218+ if (old < a)
19219+ return a;
19220+ return ULLONG_MAX;
19221+}
19222+
19223+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
19224+{
19225+ int err;
19226+ u64 blocks, bfree, bavail, files, ffree;
19227+ aufs_bindex_t bend, bindex, i;
19228+ unsigned char shared;
19229+ struct vfsmount *h_mnt;
19230+ struct super_block *h_sb;
19231+
19232+ blocks = 0;
19233+ bfree = 0;
19234+ bavail = 0;
19235+ files = 0;
19236+ ffree = 0;
19237+
19238+ err = 0;
19239+ bend = au_sbend(sb);
19240+ for (bindex = bend; bindex >= 0; bindex--) {
19241+ h_mnt = au_sbr_mnt(sb, bindex);
19242+ h_sb = h_mnt->mnt_sb;
19243+ shared = 0;
19244+ for (i = bindex + 1; !shared && i <= bend; i++)
19245+ shared = (au_sbr_sb(sb, i) == h_sb);
19246+ if (shared)
19247+ continue;
19248+
19249+ /* sb->s_root for NFS is unreliable */
19250+ err = vfs_statfs(h_mnt->mnt_root, buf);
19251+ if (unlikely(err))
19252+ goto out;
19253+
19254+ blocks = au_add_till_max(blocks, buf->f_blocks);
19255+ bfree = au_add_till_max(bfree, buf->f_bfree);
19256+ bavail = au_add_till_max(bavail, buf->f_bavail);
19257+ files = au_add_till_max(files, buf->f_files);
19258+ ffree = au_add_till_max(ffree, buf->f_ffree);
19259+ }
19260+
19261+ buf->f_blocks = blocks;
19262+ buf->f_bfree = bfree;
19263+ buf->f_bavail = bavail;
19264+ buf->f_files = files;
19265+ buf->f_ffree = ffree;
19266+
19267+ out:
19268+ return err;
19269+}
19270+
19271+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
19272+{
19273+ int err;
19274+ struct super_block *sb;
19275+
19276+ /* lock free root dinfo */
19277+ sb = dentry->d_sb;
19278+ si_noflush_read_lock(sb);
19279+ if (!au_opt_test(au_mntflags(sb), SUM))
19280+ /* sb->s_root for NFS is unreliable */
19281+ err = vfs_statfs(au_sbr_mnt(sb, 0)->mnt_root, buf);
19282+ else
19283+ err = au_statfs_sum(sb, buf);
19284+ si_read_unlock(sb);
19285+
19286+ if (!err) {
19287+ buf->f_type = AUFS_SUPER_MAGIC;
19288+ buf->f_namelen -= AUFS_WH_PFX_LEN;
19289+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
19290+ }
19291+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
19292+
19293+ return err;
19294+}
19295+
19296+/* ---------------------------------------------------------------------- */
19297+
19298+/* try flushing the lower fs at aufs remount/unmount time */
19299+
19300+static void au_fsync_br(struct super_block *sb)
19301+{
19302+ aufs_bindex_t bend, bindex;
19303+ int brperm;
19304+ struct au_branch *br;
19305+ struct super_block *h_sb;
19306+
19307+ bend = au_sbend(sb);
19308+ for (bindex = 0; bindex < bend; bindex++) {
19309+ br = au_sbr(sb, bindex);
19310+ brperm = br->br_perm;
19311+ if (brperm == AuBrPerm_RR || brperm == AuBrPerm_RRWH)
19312+ continue;
19313+ h_sb = br->br_mnt->mnt_sb;
19314+ if (bdev_read_only(h_sb->s_bdev))
19315+ continue;
19316+
19317+ lockdep_off();
19318+ down_write(&h_sb->s_umount);
19319+ shrink_dcache_sb(h_sb);
1308ab2a 19320+ sync_filesystem(h_sb);
1facf9fc 19321+ up_write(&h_sb->s_umount);
19322+ lockdep_on();
19323+ }
19324+}
19325+
19326+/*
19327+ * this IS NOT for super_operations.
19328+ * I guess it will be reverted someday.
19329+ */
19330+static void aufs_umount_begin(struct super_block *sb)
19331+{
19332+ struct au_sbinfo *sbinfo;
19333+
19334+ sbinfo = au_sbi(sb);
19335+ if (!sbinfo)
19336+ return;
19337+
19338+ si_write_lock(sb);
19339+ au_fsync_br(sb);
19340+ if (au_opt_test(au_mntflags(sb), PLINK))
19341+ au_plink_put(sb);
19342+ if (sbinfo->si_wbr_create_ops->fin)
19343+ sbinfo->si_wbr_create_ops->fin(sb);
19344+ si_write_unlock(sb);
19345+}
19346+
19347+/* final actions when unmounting a file system */
19348+static void aufs_put_super(struct super_block *sb)
19349+{
19350+ struct au_sbinfo *sbinfo;
19351+
19352+ sbinfo = au_sbi(sb);
19353+ if (!sbinfo)
19354+ return;
19355+
19356+ aufs_umount_begin(sb);
19357+ dbgaufs_si_fin(sbinfo);
19358+ kobject_put(&sbinfo->si_kobj);
19359+}
19360+
19361+/* ---------------------------------------------------------------------- */
19362+
19363+/*
19364+ * refresh dentry and inode at remount time.
19365+ */
19366+static int do_refresh(struct dentry *dentry, mode_t type,
19367+ unsigned int dir_flags)
19368+{
19369+ int err;
19370+ struct dentry *parent;
19371+
19372+ di_write_lock_child(dentry);
19373+ parent = dget_parent(dentry);
19374+ di_read_lock_parent(parent, AuLock_IR);
19375+
19376+ /* returns the number of positive dentries */
19377+ err = au_refresh_hdentry(dentry, type);
19378+ if (err >= 0) {
19379+ struct inode *inode = dentry->d_inode;
19380+ err = au_refresh_hinode(inode, dentry);
19381+ if (!err && type == S_IFDIR)
19382+ au_reset_hinotify(inode, dir_flags);
19383+ }
19384+ if (unlikely(err))
19385+ AuErr("unrecoverable error %d, %.*s\n", err, AuDLNPair(dentry));
19386+
19387+ di_read_unlock(parent, AuLock_IR);
19388+ dput(parent);
19389+ di_write_unlock(dentry);
19390+
19391+ return err;
19392+}
19393+
19394+static int test_dir(struct dentry *dentry, void *arg __maybe_unused)
19395+{
19396+ return S_ISDIR(dentry->d_inode->i_mode);
19397+}
19398+
19399+/* gave up consolidating with refresh_nondir() */
19400+static int refresh_dir(struct dentry *root, unsigned int sigen)
19401+{
19402+ int err, i, j, ndentry, e;
19403+ struct au_dcsub_pages dpages;
19404+ struct au_dpage *dpage;
19405+ struct dentry **dentries;
19406+ struct inode *inode;
19407+ const unsigned int flags = au_hi_flags(root->d_inode, /*isdir*/1);
19408+
19409+ err = 0;
19410+ list_for_each_entry(inode, &root->d_sb->s_inodes, i_sb_list)
19411+ if (S_ISDIR(inode->i_mode) && au_iigen(inode) != sigen) {
19412+ ii_write_lock_child(inode);
19413+ e = au_refresh_hinode_self(inode, /*do_attr*/1);
19414+ ii_write_unlock(inode);
19415+ if (unlikely(e)) {
19416+ AuDbg("e %d, i%lu\n", e, inode->i_ino);
19417+ if (!err)
19418+ err = e;
19419+ /* go on even if err */
19420+ }
19421+ }
19422+
19423+ e = au_dpages_init(&dpages, GFP_NOFS);
19424+ if (unlikely(e)) {
19425+ if (!err)
19426+ err = e;
19427+ goto out;
19428+ }
19429+ e = au_dcsub_pages(&dpages, root, test_dir, NULL);
19430+ if (unlikely(e)) {
19431+ if (!err)
19432+ err = e;
19433+ goto out_dpages;
19434+ }
19435+
19436+ for (i = 0; !e && i < dpages.ndpage; i++) {
19437+ dpage = dpages.dpages + i;
19438+ dentries = dpage->dentries;
19439+ ndentry = dpage->ndentry;
19440+ for (j = 0; !e && j < ndentry; j++) {
19441+ struct dentry *d;
19442+
19443+ d = dentries[j];
19444+ au_dbg_verify_dir_parent(d, sigen);
19445+ if (au_digen(d) != sigen) {
19446+ e = do_refresh(d, S_IFDIR, flags);
19447+ if (unlikely(e && !err))
19448+ err = e;
19449+ /* break on err */
19450+ }
19451+ }
19452+ }
19453+
19454+ out_dpages:
19455+ au_dpages_free(&dpages);
19456+ out:
19457+ return err;
19458+}
19459+
19460+static int test_nondir(struct dentry *dentry, void *arg __maybe_unused)
19461+{
19462+ return !S_ISDIR(dentry->d_inode->i_mode);
19463+}
19464+
19465+static int refresh_nondir(struct dentry *root, unsigned int sigen,
19466+ int do_dentry)
19467+{
19468+ int err, i, j, ndentry, e;
19469+ struct au_dcsub_pages dpages;
19470+ struct au_dpage *dpage;
19471+ struct dentry **dentries;
19472+ struct inode *inode;
19473+
19474+ err = 0;
19475+ list_for_each_entry(inode, &root->d_sb->s_inodes, i_sb_list)
19476+ if (!S_ISDIR(inode->i_mode) && au_iigen(inode) != sigen) {
19477+ ii_write_lock_child(inode);
19478+ e = au_refresh_hinode_self(inode, /*do_attr*/1);
19479+ ii_write_unlock(inode);
19480+ if (unlikely(e)) {
19481+ AuDbg("e %d, i%lu\n", e, inode->i_ino);
19482+ if (!err)
19483+ err = e;
19484+ /* go on even if err */
19485+ }
19486+ }
19487+
19488+ if (!do_dentry)
19489+ goto out;
19490+
19491+ e = au_dpages_init(&dpages, GFP_NOFS);
19492+ if (unlikely(e)) {
19493+ if (!err)
19494+ err = e;
19495+ goto out;
19496+ }
19497+ e = au_dcsub_pages(&dpages, root, test_nondir, NULL);
19498+ if (unlikely(e)) {
19499+ if (!err)
19500+ err = e;
19501+ goto out_dpages;
19502+ }
19503+
19504+ for (i = 0; i < dpages.ndpage; i++) {
19505+ dpage = dpages.dpages + i;
19506+ dentries = dpage->dentries;
19507+ ndentry = dpage->ndentry;
19508+ for (j = 0; j < ndentry; j++) {
19509+ struct dentry *d;
19510+
19511+ d = dentries[j];
19512+ au_dbg_verify_nondir_parent(d, sigen);
19513+ inode = d->d_inode;
19514+ if (inode && au_digen(d) != sigen) {
19515+ e = do_refresh(d, inode->i_mode & S_IFMT,
19516+ /*dir_flags*/0);
19517+ if (unlikely(e && !err))
19518+ err = e;
19519+ /* go on even err */
19520+ }
19521+ }
19522+ }
19523+
19524+ out_dpages:
19525+ au_dpages_free(&dpages);
19526+ out:
19527+ return err;
19528+}
19529+
19530+static void au_remount_refresh(struct super_block *sb, unsigned int flags)
19531+{
19532+ int err;
19533+ unsigned int sigen;
19534+ struct au_sbinfo *sbinfo;
19535+ struct dentry *root;
19536+ struct inode *inode;
19537+
19538+ au_sigen_inc(sb);
19539+ sigen = au_sigen(sb);
19540+ sbinfo = au_sbi(sb);
19541+ au_fclr_si(sbinfo, FAILED_REFRESH_DIRS);
19542+
19543+ root = sb->s_root;
19544+ DiMustNoWaiters(root);
19545+ inode = root->d_inode;
19546+ IiMustNoWaiters(inode);
19547+ au_reset_hinotify(inode, au_hi_flags(inode, /*isdir*/1));
19548+ di_write_unlock(root);
19549+
19550+ err = refresh_dir(root, sigen);
19551+ if (unlikely(err)) {
19552+ au_fset_si(sbinfo, FAILED_REFRESH_DIRS);
19553+ AuWarn("Refreshing directories failed, ignored (%d)\n", err);
19554+ }
19555+
19556+ if (au_ftest_opts(flags, REFRESH_NONDIR)) {
19557+ err = refresh_nondir(root, sigen, !err);
19558+ if (unlikely(err))
19559+ AuWarn("Refreshing non-directories failed, ignored"
19560+ "(%d)\n", err);
19561+ }
19562+
19563+ /* aufs_write_lock() calls ..._child() */
19564+ di_write_lock_child(root);
19565+ au_cpup_attr_all(root->d_inode, /*force*/1);
19566+}
19567+
19568+/* stop extra interpretation of errno in mount(8), and strange error messages */
19569+static int cvt_err(int err)
19570+{
19571+ AuTraceErr(err);
19572+
19573+ switch (err) {
19574+ case -ENOENT:
19575+ case -ENOTDIR:
19576+ case -EEXIST:
19577+ case -EIO:
19578+ err = -EINVAL;
19579+ }
19580+ return err;
19581+}
19582+
19583+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
19584+{
19585+ int err;
19586+ struct au_opts opts;
19587+ struct dentry *root;
19588+ struct inode *inode;
19589+ struct au_sbinfo *sbinfo;
19590+
19591+ err = 0;
19592+ root = sb->s_root;
19593+ if (!data || !*data) {
19594+ aufs_write_lock(root);
19595+ err = au_opts_verify(sb, *flags, /*pending*/0);
19596+ if (!err)
19597+ au_fsync_br(sb);
19598+ aufs_write_unlock(root);
19599+ goto out;
19600+ }
19601+
19602+ err = -ENOMEM;
19603+ memset(&opts, 0, sizeof(opts));
19604+ opts.opt = (void *)__get_free_page(GFP_NOFS);
19605+ if (unlikely(!opts.opt))
19606+ goto out;
19607+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
19608+ opts.flags = AuOpts_REMOUNT;
19609+ opts.sb_flags = *flags;
19610+
19611+ /* parse it before aufs lock */
19612+ err = au_opts_parse(sb, data, &opts);
19613+ if (unlikely(err))
19614+ goto out_opts;
19615+
19616+ sbinfo = au_sbi(sb);
19617+ inode = root->d_inode;
19618+ mutex_lock(&inode->i_mutex);
19619+ aufs_write_lock(root);
19620+ au_fsync_br(sb);
19621+
19622+ /* au_opts_remount() may return an error */
19623+ err = au_opts_remount(sb, &opts);
19624+ au_opts_free(&opts);
19625+
19626+ if (au_ftest_opts(opts.flags, REFRESH_DIR)
19627+ || au_ftest_opts(opts.flags, REFRESH_NONDIR))
19628+ au_remount_refresh(sb, opts.flags);
19629+
19630+ aufs_write_unlock(root);
19631+ mutex_unlock(&inode->i_mutex);
19632+
19633+ out_opts:
19634+ free_page((unsigned long)opts.opt);
19635+ out:
19636+ err = cvt_err(err);
19637+ AuTraceErr(err);
19638+ return err;
19639+}
19640+
19641+static struct super_operations aufs_sop = {
19642+ .alloc_inode = aufs_alloc_inode,
19643+ .destroy_inode = aufs_destroy_inode,
19644+ .drop_inode = generic_delete_inode,
19645+ .show_options = aufs_show_options,
19646+ .statfs = aufs_statfs,
19647+ .put_super = aufs_put_super,
19648+ .remount_fs = aufs_remount_fs
19649+};
19650+
19651+/* ---------------------------------------------------------------------- */
19652+
19653+static int alloc_root(struct super_block *sb)
19654+{
19655+ int err;
19656+ struct inode *inode;
19657+ struct dentry *root;
19658+
19659+ err = -ENOMEM;
19660+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
19661+ err = PTR_ERR(inode);
19662+ if (IS_ERR(inode))
19663+ goto out;
19664+
19665+ inode->i_op = &aufs_dir_iop;
19666+ inode->i_fop = &aufs_dir_fop;
19667+ inode->i_mode = S_IFDIR;
19668+ inode->i_nlink = 2;
19669+ unlock_new_inode(inode);
19670+
19671+ root = d_alloc_root(inode);
19672+ if (unlikely(!root))
19673+ goto out_iput;
19674+ err = PTR_ERR(root);
19675+ if (IS_ERR(root))
19676+ goto out_iput;
19677+
19678+ err = au_alloc_dinfo(root);
19679+ if (!err) {
19680+ sb->s_root = root;
19681+ return 0; /* success */
19682+ }
19683+ dput(root);
19684+ goto out; /* do not iput */
19685+
19686+ out_iput:
19687+ iget_failed(inode);
19688+ iput(inode);
19689+ out:
19690+ return err;
19691+
19692+}
19693+
19694+static int aufs_fill_super(struct super_block *sb, void *raw_data,
19695+ int silent __maybe_unused)
19696+{
19697+ int err;
19698+ struct au_opts opts;
19699+ struct dentry *root;
19700+ struct inode *inode;
19701+ char *arg = raw_data;
19702+
19703+ if (unlikely(!arg || !*arg)) {
19704+ err = -EINVAL;
19705+ AuErr("no arg\n");
19706+ goto out;
19707+ }
19708+
19709+ err = -ENOMEM;
19710+ memset(&opts, 0, sizeof(opts));
19711+ opts.opt = (void *)__get_free_page(GFP_NOFS);
19712+ if (unlikely(!opts.opt))
19713+ goto out;
19714+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
19715+ opts.sb_flags = sb->s_flags;
19716+
19717+ err = au_si_alloc(sb);
19718+ if (unlikely(err))
19719+ goto out_opts;
19720+
19721+ /* all timestamps always follow the ones on the branch */
19722+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
19723+ sb->s_op = &aufs_sop;
19724+ sb->s_magic = AUFS_SUPER_MAGIC;
19725+ sb->s_maxbytes = 0;
19726+ au_export_init(sb);
19727+
19728+ err = alloc_root(sb);
19729+ if (unlikely(err)) {
19730+ si_write_unlock(sb);
19731+ goto out_info;
19732+ }
19733+ root = sb->s_root;
19734+ inode = root->d_inode;
19735+
19736+ /*
19737+ * actually we can parse options regardless aufs lock here.
19738+ * but at remount time, parsing must be done before aufs lock.
19739+ * so we follow the same rule.
19740+ */
19741+ ii_write_lock_parent(inode);
19742+ aufs_write_unlock(root);
19743+ err = au_opts_parse(sb, arg, &opts);
19744+ if (unlikely(err))
19745+ goto out_root;
19746+
19747+ /* lock vfs_inode first, then aufs. */
19748+ mutex_lock(&inode->i_mutex);
19749+ inode->i_op = &aufs_dir_iop;
19750+ inode->i_fop = &aufs_dir_fop;
19751+ aufs_write_lock(root);
19752+ err = au_opts_mount(sb, &opts);
19753+ au_opts_free(&opts);
19754+ if (unlikely(err))
19755+ goto out_unlock;
19756+ aufs_write_unlock(root);
19757+ mutex_unlock(&inode->i_mutex);
19758+ goto out_opts; /* success */
19759+
19760+ out_unlock:
19761+ aufs_write_unlock(root);
19762+ mutex_unlock(&inode->i_mutex);
19763+ out_root:
19764+ dput(root);
19765+ sb->s_root = NULL;
19766+ out_info:
19767+ kobject_put(&au_sbi(sb)->si_kobj);
19768+ sb->s_fs_info = NULL;
19769+ out_opts:
19770+ free_page((unsigned long)opts.opt);
19771+ out:
19772+ AuTraceErr(err);
19773+ err = cvt_err(err);
19774+ AuTraceErr(err);
19775+ return err;
19776+}
19777+
19778+/* ---------------------------------------------------------------------- */
19779+
19780+static int aufs_get_sb(struct file_system_type *fs_type, int flags,
19781+ const char *dev_name __maybe_unused, void *raw_data,
19782+ struct vfsmount *mnt)
19783+{
19784+ int err;
19785+ struct super_block *sb;
19786+
19787+ /* all timestamps always follow the ones on the branch */
19788+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
19789+ err = get_sb_nodev(fs_type, flags, raw_data, aufs_fill_super, mnt);
19790+ if (!err) {
19791+ sb = mnt->mnt_sb;
19792+ si_write_lock(sb);
19793+ sysaufs_brs_add(sb, 0);
19794+ si_write_unlock(sb);
19795+ }
19796+ return err;
19797+}
19798+
19799+struct file_system_type aufs_fs_type = {
19800+ .name = AUFS_FSTYPE,
19801+ .fs_flags =
19802+ FS_RENAME_DOES_D_MOVE /* a race between rename and others */
19803+ | FS_REVAL_DOT, /* for NFS branch and udba */
19804+ .get_sb = aufs_get_sb,
19805+ .kill_sb = generic_shutdown_super,
19806+ /* no need to __module_get() and module_put(). */
19807+ .owner = THIS_MODULE,
19808+};
1308ab2a 19809diff -uprN -x .git linux-2.6.31/fs/aufs/super.h aufs2-2.6.git/fs/aufs/super.h
19810--- linux-2.6.31/fs/aufs/super.h 1970-01-01 00:00:00.000000000 +0000
19811+++ aufs2-2.6.git/fs/aufs/super.h 2009-09-21 21:49:23.411607814 +0000
dece6358 19812@@ -0,0 +1,384 @@
1facf9fc 19813+/*
19814+ * Copyright (C) 2005-2009 Junjiro R. Okajima
19815+ *
19816+ * This program, aufs is free software; you can redistribute it and/or modify
19817+ * it under the terms of the GNU General Public License as published by
19818+ * the Free Software Foundation; either version 2 of the License, or
19819+ * (at your option) any later version.
dece6358
AM
19820+ *
19821+ * This program is distributed in the hope that it will be useful,
19822+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19823+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19824+ * GNU General Public License for more details.
19825+ *
19826+ * You should have received a copy of the GNU General Public License
19827+ * along with this program; if not, write to the Free Software
19828+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 19829+ */
19830+
19831+/*
19832+ * super_block operations
19833+ */
19834+
19835+#ifndef __AUFS_SUPER_H__
19836+#define __AUFS_SUPER_H__
19837+
19838+#ifdef __KERNEL__
19839+
19840+#include <linux/fs.h>
1facf9fc 19841+#include <linux/aufs_type.h>
19842+#include "rwsem.h"
19843+#include "spl.h"
19844+#include "wkq.h"
19845+
19846+typedef ssize_t (*au_readf_t)(struct file *, char __user *, size_t, loff_t *);
19847+typedef ssize_t (*au_writef_t)(struct file *, const char __user *, size_t,
19848+ loff_t *);
19849+
19850+/* policies to select one among multiple writable branches */
19851+struct au_wbr_copyup_operations {
19852+ int (*copyup)(struct dentry *dentry);
19853+};
19854+
19855+struct au_wbr_create_operations {
19856+ int (*create)(struct dentry *dentry, int isdir);
19857+ int (*init)(struct super_block *sb);
19858+ int (*fin)(struct super_block *sb);
19859+};
19860+
19861+struct au_wbr_mfs {
19862+ struct mutex mfs_lock; /* protect this structure */
19863+ unsigned long mfs_jiffy;
19864+ unsigned long mfs_expire;
19865+ aufs_bindex_t mfs_bindex;
19866+
19867+ unsigned long long mfsrr_bytes;
19868+ unsigned long long mfsrr_watermark;
19869+};
19870+
1facf9fc 19871+struct au_branch;
19872+struct au_sbinfo {
19873+ /* nowait tasks in the system-wide workqueue */
19874+ struct au_nowait_tasks si_nowait;
19875+
dece6358 19876+ struct au_rwsem si_rwsem;
1facf9fc 19877+
19878+ /* branch management */
19879+ unsigned int si_generation;
19880+
19881+ /* see above flags */
19882+ unsigned char au_si_status;
19883+
19884+ aufs_bindex_t si_bend;
19885+ aufs_bindex_t si_last_br_id;
19886+ struct au_branch **si_branch;
19887+
19888+ /* policy to select a writable branch */
19889+ unsigned char si_wbr_copyup;
19890+ unsigned char si_wbr_create;
19891+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
19892+ struct au_wbr_create_operations *si_wbr_create_ops;
19893+
19894+ /* round robin */
19895+ atomic_t si_wbr_rr_next;
19896+
19897+ /* most free space */
19898+ struct au_wbr_mfs si_wbr_mfs;
19899+
19900+ /* mount flags */
19901+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
19902+ unsigned int si_mntflags;
19903+
19904+ /* external inode number (bitmap and translation table) */
19905+ au_readf_t si_xread;
19906+ au_writef_t si_xwrite;
19907+ struct file *si_xib;
19908+ struct mutex si_xib_mtx; /* protect xib members */
19909+ unsigned long *si_xib_buf;
19910+ unsigned long si_xib_last_pindex;
19911+ int si_xib_next_bit;
19912+ aufs_bindex_t si_xino_brid;
19913+ /* reserved for future use */
19914+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
19915+
19916+#ifdef CONFIG_AUFS_EXPORT
19917+ /* i_generation */
19918+ struct file *si_xigen;
19919+ atomic_t si_xigen_next;
19920+#endif
19921+
19922+ /* vdir parameters */
19923+ unsigned long si_rdcache; /* max cache time in HZ */
19924+ unsigned int si_rdblk; /* deblk size */
19925+ unsigned int si_rdhash; /* hash size */
19926+
19927+ /*
19928+ * If the number of whiteouts are larger than si_dirwh, leave all of
19929+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
19930+ * future fsck.aufs or kernel thread will remove them later.
19931+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
19932+ */
19933+ unsigned int si_dirwh;
19934+
19935+ /*
19936+ * rename(2) a directory with all children.
19937+ */
19938+ /* reserved for future use */
19939+ /* int si_rendir; */
19940+
19941+ /* pseudo_link list */
19942+ struct au_splhead si_plink;
19943+ wait_queue_head_t si_plink_wq;
19944+
19945+ /*
19946+ * sysfs and lifetime management.
19947+ * this is not a small structure and it may be a waste of memory in case
19948+ * of sysfs is disabled, particulary when many aufs-es are mounted.
19949+ * but using sysfs is majority.
19950+ */
19951+ struct kobject si_kobj;
19952+#ifdef CONFIG_DEBUG_FS
19953+ struct dentry *si_dbgaufs, *si_dbgaufs_xib;
19954+#ifdef CONFIG_AUFS_EXPORT
19955+ struct dentry *si_dbgaufs_xigen;
19956+#endif
19957+#endif
19958+
19959+ /* dirty, necessary for unmounting, sysfs and sysrq */
19960+ struct super_block *si_sb;
19961+};
19962+
dece6358
AM
19963+/* sbinfo status flags */
19964+/*
19965+ * set true when refresh_dirs() failed at remount time.
19966+ * then try refreshing dirs at access time again.
19967+ * if it is false, refreshing dirs at access time is unnecesary
19968+ */
19969+#define AuSi_FAILED_REFRESH_DIRS 1
19970+#define AuSi_MAINTAIN_PLINK (1 << 1) /* ioctl */
19971+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
19972+ unsigned int flag)
19973+{
19974+ AuRwMustAnyLock(&sbi->si_rwsem);
19975+ return sbi->au_si_status & flag;
19976+}
19977+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
19978+#define au_fset_si(sbinfo, name) do { \
19979+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
19980+ (sbinfo)->au_si_status |= AuSi_##name; \
19981+} while (0)
19982+#define au_fclr_si(sbinfo, name) do { \
19983+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
19984+ (sbinfo)->au_si_status &= ~AuSi_##name; \
19985+} while (0)
19986+
1facf9fc 19987+/* ---------------------------------------------------------------------- */
19988+
19989+/* policy to select one among writable branches */
19990+#define AuWbrCopyup(sbinfo, args...) \
19991+ ((sbinfo)->si_wbr_copyup_ops->copyup(args))
19992+#define AuWbrCreate(sbinfo, args...) \
19993+ ((sbinfo)->si_wbr_create_ops->create(args))
19994+
19995+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
19996+#define AuLock_DW 1 /* write-lock dentry */
19997+#define AuLock_IR (1 << 1) /* read-lock inode */
19998+#define AuLock_IW (1 << 2) /* write-lock inode */
19999+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
20000+#define AuLock_DIR (1 << 4) /* target is a dir */
20001+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
20002+#define au_fset_lock(flags, name) { (flags) |= AuLock_##name; }
20003+#define au_fclr_lock(flags, name) { (flags) &= ~AuLock_##name; }
20004+
20005+/* ---------------------------------------------------------------------- */
20006+
20007+/* super.c */
20008+extern struct file_system_type aufs_fs_type;
20009+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
20010+
20011+/* sbinfo.c */
20012+void au_si_free(struct kobject *kobj);
20013+int au_si_alloc(struct super_block *sb);
20014+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
20015+
20016+unsigned int au_sigen_inc(struct super_block *sb);
20017+aufs_bindex_t au_new_br_id(struct super_block *sb);
20018+
20019+void aufs_read_lock(struct dentry *dentry, int flags);
20020+void aufs_read_unlock(struct dentry *dentry, int flags);
20021+void aufs_write_lock(struct dentry *dentry);
20022+void aufs_write_unlock(struct dentry *dentry);
20023+void aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int isdir);
20024+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
20025+
20026+/* wbr_policy.c */
20027+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
20028+extern struct au_wbr_create_operations au_wbr_create_ops[];
20029+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
20030+
20031+/* ---------------------------------------------------------------------- */
20032+
20033+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
20034+{
20035+ return sb->s_fs_info;
20036+}
20037+
20038+/* ---------------------------------------------------------------------- */
20039+
20040+#ifdef CONFIG_AUFS_EXPORT
20041+void au_export_init(struct super_block *sb);
20042+
20043+static inline int au_test_nfsd(struct task_struct *tsk)
20044+{
20045+ return !tsk->mm && !strcmp(tsk->comm, "nfsd");
20046+}
20047+
20048+int au_xigen_inc(struct inode *inode);
20049+int au_xigen_new(struct inode *inode);
20050+int au_xigen_set(struct super_block *sb, struct file *base);
20051+void au_xigen_clr(struct super_block *sb);
20052+
20053+static inline int au_busy_or_stale(void)
20054+{
20055+ if (!au_test_nfsd(current))
20056+ return -EBUSY;
20057+ return -ESTALE;
20058+}
20059+#else
20060+static inline void au_export_init(struct super_block *sb)
20061+{
20062+ /* nothing */
20063+}
20064+
20065+static inline int au_test_nfsd(struct task_struct *tsk)
20066+{
20067+ return 0;
20068+}
20069+
20070+static inline int au_xigen_inc(struct inode *inode)
20071+{
20072+ return 0;
20073+}
20074+
20075+static inline int au_xigen_new(struct inode *inode)
20076+{
20077+ return 0;
20078+}
20079+
20080+static inline int au_xigen_set(struct super_block *sb, struct file *base)
20081+{
20082+ return 0;
20083+}
20084+
20085+static inline void au_xigen_clr(struct super_block *sb)
20086+{
20087+ /* empty */
20088+}
20089+
20090+static inline int au_busy_or_stale(void)
20091+{
20092+ return -EBUSY;
20093+}
20094+#endif /* CONFIG_AUFS_EXPORT */
20095+
20096+/* ---------------------------------------------------------------------- */
20097+
20098+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
20099+{
dece6358
AM
20100+ /*
20101+ * This function is a dynamic '__init' fucntion actually,
20102+ * so the tiny check for si_rwsem is unnecessary.
20103+ */
20104+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 20105+#ifdef CONFIG_DEBUG_FS
20106+ sbinfo->si_dbgaufs = NULL;
20107+ sbinfo->si_dbgaufs_xib = NULL;
20108+#ifdef CONFIG_AUFS_EXPORT
20109+ sbinfo->si_dbgaufs_xigen = NULL;
20110+#endif
20111+#endif
20112+}
20113+
20114+/* ---------------------------------------------------------------------- */
20115+
20116+/* lock superblock. mainly for entry point functions */
20117+/*
20118+ * si_noflush_read_lock, si_noflush_write_lock,
20119+ * si_read_unlock, si_write_unlock, si_downgrade_lock
20120+ */
20121+AuSimpleLockRwsemFuncs(si_noflush, struct super_block *sb,
20122+ &au_sbi(sb)->si_rwsem);
20123+AuSimpleUnlockRwsemFuncs(si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
20124+
dece6358
AM
20125+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
20126+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
20127+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
20128+
1facf9fc 20129+static inline void si_read_lock(struct super_block *sb, int flags)
20130+{
20131+ if (au_ftest_lock(flags, FLUSH))
20132+ au_nwt_flush(&au_sbi(sb)->si_nowait);
20133+ si_noflush_read_lock(sb);
20134+}
20135+
20136+static inline void si_write_lock(struct super_block *sb)
20137+{
20138+ au_nwt_flush(&au_sbi(sb)->si_nowait);
20139+ si_noflush_write_lock(sb);
20140+}
20141+
20142+static inline int si_read_trylock(struct super_block *sb, int flags)
20143+{
20144+ if (au_ftest_lock(flags, FLUSH))
20145+ au_nwt_flush(&au_sbi(sb)->si_nowait);
20146+ return si_noflush_read_trylock(sb);
20147+}
20148+
20149+static inline int si_write_trylock(struct super_block *sb, int flags)
20150+{
20151+ if (au_ftest_lock(flags, FLUSH))
20152+ au_nwt_flush(&au_sbi(sb)->si_nowait);
20153+ return si_noflush_write_trylock(sb);
20154+}
20155+
20156+/* ---------------------------------------------------------------------- */
20157+
20158+static inline aufs_bindex_t au_sbend(struct super_block *sb)
20159+{
dece6358 20160+ SiMustAnyLock(sb);
1facf9fc 20161+ return au_sbi(sb)->si_bend;
20162+}
20163+
20164+static inline unsigned int au_mntflags(struct super_block *sb)
20165+{
dece6358 20166+ SiMustAnyLock(sb);
1facf9fc 20167+ return au_sbi(sb)->si_mntflags;
20168+}
20169+
20170+static inline unsigned int au_sigen(struct super_block *sb)
20171+{
dece6358 20172+ SiMustAnyLock(sb);
1facf9fc 20173+ return au_sbi(sb)->si_generation;
20174+}
20175+
20176+static inline struct au_branch *au_sbr(struct super_block *sb,
20177+ aufs_bindex_t bindex)
20178+{
dece6358 20179+ SiMustAnyLock(sb);
1facf9fc 20180+ return au_sbi(sb)->si_branch[0 + bindex];
20181+}
20182+
20183+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
20184+{
dece6358 20185+ SiMustWriteLock(sb);
1facf9fc 20186+ au_sbi(sb)->si_xino_brid = brid;
20187+}
20188+
20189+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
20190+{
dece6358 20191+ SiMustAnyLock(sb);
1facf9fc 20192+ return au_sbi(sb)->si_xino_brid;
20193+}
20194+
20195+#endif /* __KERNEL__ */
20196+#endif /* __AUFS_SUPER_H__ */
1308ab2a 20197diff -uprN -x .git linux-2.6.31/fs/aufs/sysaufs.c aufs2-2.6.git/fs/aufs/sysaufs.c
20198--- linux-2.6.31/fs/aufs/sysaufs.c 1970-01-01 00:00:00.000000000 +0000
20199+++ aufs2-2.6.git/fs/aufs/sysaufs.c 2009-09-21 21:49:23.411607814 +0000
dece6358 20200@@ -0,0 +1,104 @@
1facf9fc 20201+/*
20202+ * Copyright (C) 2005-2009 Junjiro R. Okajima
20203+ *
20204+ * This program, aufs is free software; you can redistribute it and/or modify
20205+ * it under the terms of the GNU General Public License as published by
20206+ * the Free Software Foundation; either version 2 of the License, or
20207+ * (at your option) any later version.
dece6358
AM
20208+ *
20209+ * This program is distributed in the hope that it will be useful,
20210+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20211+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20212+ * GNU General Public License for more details.
20213+ *
20214+ * You should have received a copy of the GNU General Public License
20215+ * along with this program; if not, write to the Free Software
20216+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20217+ */
20218+
20219+/*
20220+ * sysfs interface and lifetime management
20221+ * they are necessary regardless sysfs is disabled.
20222+ */
20223+
20224+#include <linux/fs.h>
20225+#include <linux/random.h>
20226+#include <linux/sysfs.h>
20227+#include "aufs.h"
20228+
20229+unsigned long sysaufs_si_mask;
20230+struct kset *sysaufs_ket;
20231+
20232+#define AuSiAttr(_name) { \
20233+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
20234+ .show = sysaufs_si_##_name, \
20235+}
20236+
20237+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
20238+struct attribute *sysaufs_si_attrs[] = {
20239+ &sysaufs_si_attr_xi_path.attr,
20240+ NULL,
20241+};
20242+
20243+static struct sysfs_ops au_sbi_ops = {
20244+ .show = sysaufs_si_show
20245+};
20246+
20247+static struct kobj_type au_sbi_ktype = {
20248+ .release = au_si_free,
20249+ .sysfs_ops = &au_sbi_ops,
20250+ .default_attrs = sysaufs_si_attrs
20251+};
20252+
20253+/* ---------------------------------------------------------------------- */
20254+
20255+int sysaufs_si_init(struct au_sbinfo *sbinfo)
20256+{
20257+ int err;
20258+
20259+ sbinfo->si_kobj.kset = sysaufs_ket;
20260+ /* cf. sysaufs_name() */
20261+ err = kobject_init_and_add
20262+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_ket->kobj*/NULL,
20263+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
20264+
20265+ dbgaufs_si_null(sbinfo);
20266+ if (!err) {
20267+ err = dbgaufs_si_init(sbinfo);
20268+ if (unlikely(err))
20269+ kobject_put(&sbinfo->si_kobj);
20270+ }
20271+ return err;
20272+}
20273+
20274+void sysaufs_fin(void)
20275+{
20276+ dbgaufs_fin();
20277+ sysfs_remove_group(&sysaufs_ket->kobj, sysaufs_attr_group);
20278+ kset_unregister(sysaufs_ket);
20279+}
20280+
20281+int __init sysaufs_init(void)
20282+{
20283+ int err;
20284+
20285+ do {
20286+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
20287+ } while (!sysaufs_si_mask);
20288+
20289+ sysaufs_ket = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
20290+ err = PTR_ERR(sysaufs_ket);
20291+ if (IS_ERR(sysaufs_ket))
20292+ goto out;
20293+ err = sysfs_create_group(&sysaufs_ket->kobj, sysaufs_attr_group);
20294+ if (unlikely(err)) {
20295+ kset_unregister(sysaufs_ket);
20296+ goto out;
20297+ }
20298+
20299+ err = dbgaufs_init();
20300+ if (unlikely(err))
20301+ sysaufs_fin();
20302+ out:
20303+ return err;
20304+}
1308ab2a 20305diff -uprN -x .git linux-2.6.31/fs/aufs/sysaufs.h aufs2-2.6.git/fs/aufs/sysaufs.h
20306--- linux-2.6.31/fs/aufs/sysaufs.h 1970-01-01 00:00:00.000000000 +0000
20307+++ aufs2-2.6.git/fs/aufs/sysaufs.h 2009-09-21 21:49:23.411607814 +0000
dece6358 20308@@ -0,0 +1,120 @@
1facf9fc 20309+/*
20310+ * Copyright (C) 2005-2009 Junjiro R. Okajima
20311+ *
20312+ * This program, aufs is free software; you can redistribute it and/or modify
20313+ * it under the terms of the GNU General Public License as published by
20314+ * the Free Software Foundation; either version 2 of the License, or
20315+ * (at your option) any later version.
dece6358
AM
20316+ *
20317+ * This program is distributed in the hope that it will be useful,
20318+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20319+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20320+ * GNU General Public License for more details.
20321+ *
20322+ * You should have received a copy of the GNU General Public License
20323+ * along with this program; if not, write to the Free Software
20324+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20325+ */
20326+
20327+/*
20328+ * sysfs interface and mount lifetime management
20329+ */
20330+
20331+#ifndef __SYSAUFS_H__
20332+#define __SYSAUFS_H__
20333+
20334+#ifdef __KERNEL__
20335+
1facf9fc 20336+#include <linux/sysfs.h>
20337+#include <linux/aufs_type.h>
20338+#include "module.h"
20339+
dece6358
AM
20340+struct super_block;
20341+struct au_sbinfo;
20342+
1facf9fc 20343+struct sysaufs_si_attr {
20344+ struct attribute attr;
20345+ int (*show)(struct seq_file *seq, struct super_block *sb);
20346+};
20347+
20348+/* ---------------------------------------------------------------------- */
20349+
20350+/* sysaufs.c */
20351+extern unsigned long sysaufs_si_mask;
20352+extern struct kset *sysaufs_ket;
20353+extern struct attribute *sysaufs_si_attrs[];
20354+int sysaufs_si_init(struct au_sbinfo *sbinfo);
20355+int __init sysaufs_init(void);
20356+void sysaufs_fin(void);
20357+
20358+/* ---------------------------------------------------------------------- */
20359+
20360+/* some people doesn't like to show a pointer in kernel */
20361+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
20362+{
20363+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
20364+}
20365+
20366+#define SysaufsSiNamePrefix "si_"
20367+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
20368+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
20369+{
20370+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
20371+ sysaufs_si_id(sbinfo));
20372+}
20373+
20374+struct au_branch;
20375+#ifdef CONFIG_SYSFS
20376+/* sysfs.c */
20377+extern struct attribute_group *sysaufs_attr_group;
20378+
20379+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
20380+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
20381+ char *buf);
20382+
20383+void sysaufs_br_init(struct au_branch *br);
20384+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
20385+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
20386+
20387+#define sysaufs_brs_init() do {} while (0)
20388+
20389+#else
20390+#define sysaufs_attr_group NULL
20391+
20392+static inline
20393+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
20394+{
20395+ return 0;
20396+}
20397+
20398+static inline
20399+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
20400+ char *buf)
20401+{
20402+ return 0;
20403+}
20404+
20405+static inline void sysaufs_br_init(struct au_branch *br)
20406+{
20407+ /* empty */
20408+}
20409+
20410+static inline void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
20411+{
20412+ /* nothing */
20413+}
20414+
20415+static inline void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
20416+{
20417+ /* nothing */
20418+}
20419+
20420+static inline void sysaufs_brs_init(void)
20421+{
20422+ sysaufs_brs = 0;
20423+}
20424+
20425+#endif /* CONFIG_SYSFS */
20426+
20427+#endif /* __KERNEL__ */
20428+#endif /* __SYSAUFS_H__ */
1308ab2a 20429diff -uprN -x .git linux-2.6.31/fs/aufs/sysfs.c aufs2-2.6.git/fs/aufs/sysfs.c
20430--- linux-2.6.31/fs/aufs/sysfs.c 1970-01-01 00:00:00.000000000 +0000
20431+++ aufs2-2.6.git/fs/aufs/sysfs.c 2009-09-21 21:49:23.411607814 +0000
20432@@ -0,0 +1,224 @@
1facf9fc 20433+/*
20434+ * Copyright (C) 2005-2009 Junjiro R. Okajima
20435+ *
20436+ * This program, aufs is free software; you can redistribute it and/or modify
20437+ * it under the terms of the GNU General Public License as published by
20438+ * the Free Software Foundation; either version 2 of the License, or
20439+ * (at your option) any later version.
dece6358
AM
20440+ *
20441+ * This program is distributed in the hope that it will be useful,
20442+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20443+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20444+ * GNU General Public License for more details.
20445+ *
20446+ * You should have received a copy of the GNU General Public License
20447+ * along with this program; if not, write to the Free Software
20448+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20449+ */
20450+
20451+/*
20452+ * sysfs interface
20453+ */
20454+
20455+#include <linux/fs.h>
dece6358 20456+#include <linux/module.h>
1facf9fc 20457+#include <linux/seq_file.h>
20458+#include <linux/sysfs.h>
20459+#include "aufs.h"
20460+
20461+static struct attribute *au_attr[] = {
20462+ NULL, /* need to NULL terminate the list of attributes */
20463+};
20464+
20465+static struct attribute_group sysaufs_attr_group_body = {
20466+ .attrs = au_attr
20467+};
20468+
20469+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
20470+
20471+/* ---------------------------------------------------------------------- */
20472+
20473+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
20474+{
20475+ int err;
20476+
dece6358
AM
20477+ SiMustAnyLock(sb);
20478+
1facf9fc 20479+ err = 0;
20480+ if (au_opt_test(au_mntflags(sb), XINO)) {
20481+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
20482+ seq_putc(seq, '\n');
20483+ }
20484+ return err;
20485+}
20486+
20487+/*
20488+ * the lifetime of branch is independent from the entry under sysfs.
20489+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
20490+ * unlinked.
20491+ */
20492+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
20493+ aufs_bindex_t bindex)
20494+{
20495+ struct path path;
20496+ struct dentry *root;
20497+ struct au_branch *br;
20498+
20499+ AuDbg("b%d\n", bindex);
20500+
20501+ root = sb->s_root;
20502+ di_read_lock_parent(root, !AuLock_IR);
20503+ br = au_sbr(sb, bindex);
20504+ path.mnt = br->br_mnt;
20505+ path.dentry = au_h_dptr(root, bindex);
20506+ au_seq_path(seq, &path);
20507+ di_read_unlock(root, !AuLock_IR);
20508+ seq_printf(seq, "=%s\n", au_optstr_br_perm(br->br_perm));
20509+ return 0;
20510+}
20511+
20512+/* ---------------------------------------------------------------------- */
20513+
20514+static struct seq_file *au_seq(char *p, ssize_t len)
20515+{
20516+ struct seq_file *seq;
20517+
20518+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
20519+ if (seq) {
20520+ /* mutex_init(&seq.lock); */
20521+ seq->buf = p;
20522+ seq->size = len;
20523+ return seq; /* success */
20524+ }
20525+
20526+ seq = ERR_PTR(-ENOMEM);
20527+ return seq;
20528+}
20529+
20530+#define SysaufsBr_PREFIX "br"
20531+
20532+/* todo: file size may exceed PAGE_SIZE */
20533+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 20534+ char *buf)
1facf9fc 20535+{
20536+ ssize_t err;
20537+ long l;
20538+ aufs_bindex_t bend;
20539+ struct au_sbinfo *sbinfo;
20540+ struct super_block *sb;
20541+ struct seq_file *seq;
20542+ char *name;
20543+ struct attribute **cattr;
20544+
20545+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
20546+ sb = sbinfo->si_sb;
1308ab2a 20547+
20548+ /*
20549+ * prevent a race condition between sysfs and aufs.
20550+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
20551+ * prohibits maintaining the sysfs entries.
20552+ * hew we acquire read lock after sysfs_get_active_two().
20553+ * on the other hand, the remount process may maintain the sysfs/aufs
20554+ * entries after acquiring write lock.
20555+ * it can cause a deadlock.
20556+ * simply we gave up processing read here.
20557+ */
20558+ err = -EBUSY;
20559+ if (unlikely(!si_noflush_read_trylock(sb)))
20560+ goto out;
1facf9fc 20561+
20562+ seq = au_seq(buf, PAGE_SIZE);
20563+ err = PTR_ERR(seq);
20564+ if (IS_ERR(seq))
1308ab2a 20565+ goto out_unlock;
1facf9fc 20566+
20567+ name = (void *)attr->name;
20568+ cattr = sysaufs_si_attrs;
20569+ while (*cattr) {
20570+ if (!strcmp(name, (*cattr)->name)) {
20571+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
20572+ ->show(seq, sb);
20573+ goto out_seq;
20574+ }
20575+ cattr++;
20576+ }
20577+
20578+ bend = au_sbend(sb);
20579+ if (!strncmp(name, SysaufsBr_PREFIX, sizeof(SysaufsBr_PREFIX) - 1)) {
20580+ name += sizeof(SysaufsBr_PREFIX) - 1;
20581+ err = strict_strtol(name, 10, &l);
20582+ if (!err) {
20583+ if (l <= bend)
20584+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l);
20585+ else
20586+ err = -ENOENT;
20587+ }
20588+ goto out_seq;
20589+ }
20590+ BUG();
20591+
20592+ out_seq:
20593+ if (!err) {
20594+ err = seq->count;
20595+ /* sysfs limit */
20596+ if (unlikely(err == PAGE_SIZE))
20597+ err = -EFBIG;
20598+ }
20599+ kfree(seq);
1308ab2a 20600+ out_unlock:
1facf9fc 20601+ si_read_unlock(sb);
1308ab2a 20602+ out:
1facf9fc 20603+ return err;
20604+}
20605+
20606+/* ---------------------------------------------------------------------- */
20607+
20608+void sysaufs_br_init(struct au_branch *br)
20609+{
20610+ br->br_attr.name = br->br_name;
20611+ br->br_attr.mode = S_IRUGO;
20612+ br->br_attr.owner = THIS_MODULE;
20613+}
20614+
20615+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
20616+{
20617+ struct au_branch *br;
20618+ struct kobject *kobj;
20619+ aufs_bindex_t bend;
20620+
20621+ dbgaufs_brs_del(sb, bindex);
20622+
20623+ if (!sysaufs_brs)
20624+ return;
20625+
20626+ kobj = &au_sbi(sb)->si_kobj;
20627+ bend = au_sbend(sb);
20628+ for (; bindex <= bend; bindex++) {
20629+ br = au_sbr(sb, bindex);
20630+ sysfs_remove_file(kobj, &br->br_attr);
20631+ }
20632+}
20633+
20634+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
20635+{
20636+ int err;
20637+ aufs_bindex_t bend;
20638+ struct kobject *kobj;
20639+ struct au_branch *br;
20640+
20641+ dbgaufs_brs_add(sb, bindex);
20642+
20643+ if (!sysaufs_brs)
20644+ return;
20645+
20646+ kobj = &au_sbi(sb)->si_kobj;
20647+ bend = au_sbend(sb);
20648+ for (; bindex <= bend; bindex++) {
20649+ br = au_sbr(sb, bindex);
20650+ snprintf(br->br_name, sizeof(br->br_name), SysaufsBr_PREFIX
20651+ "%d", bindex);
20652+ err = sysfs_create_file(kobj, &br->br_attr);
20653+ if (unlikely(err))
20654+ AuWarn("failed %s under sysfs(%d)\n", br->br_name, err);
20655+ }
20656+}
1308ab2a 20657diff -uprN -x .git linux-2.6.31/fs/aufs/sysrq.c aufs2-2.6.git/fs/aufs/sysrq.c
20658--- linux-2.6.31/fs/aufs/sysrq.c 1970-01-01 00:00:00.000000000 +0000
20659+++ aufs2-2.6.git/fs/aufs/sysrq.c 2009-09-21 21:49:23.411607814 +0000
dece6358 20660@@ -0,0 +1,115 @@
1facf9fc 20661+/*
20662+ * Copyright (C) 2005-2009 Junjiro R. Okajima
20663+ *
20664+ * This program, aufs is free software; you can redistribute it and/or modify
20665+ * it under the terms of the GNU General Public License as published by
20666+ * the Free Software Foundation; either version 2 of the License, or
20667+ * (at your option) any later version.
dece6358
AM
20668+ *
20669+ * This program is distributed in the hope that it will be useful,
20670+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20671+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20672+ * GNU General Public License for more details.
20673+ *
20674+ * You should have received a copy of the GNU General Public License
20675+ * along with this program; if not, write to the Free Software
20676+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20677+ */
20678+
20679+/*
20680+ * magic sysrq hanlder
20681+ */
20682+
20683+#include <linux/fs.h>
20684+#include <linux/module.h>
20685+#include <linux/moduleparam.h>
20686+/* #include <linux/sysrq.h> */
20687+#include "aufs.h"
20688+
20689+/* ---------------------------------------------------------------------- */
20690+
20691+static void sysrq_sb(struct super_block *sb)
20692+{
20693+ char *plevel;
20694+ struct au_sbinfo *sbinfo;
20695+ struct file *file;
20696+
20697+ plevel = au_plevel;
20698+ au_plevel = KERN_WARNING;
20699+ au_debug(1);
20700+
20701+ sbinfo = au_sbi(sb);
20702+ pr_warning("si=%lx\n", sysaufs_si_id(sbinfo));
20703+ pr_warning(AUFS_NAME ": superblock\n");
20704+ au_dpri_sb(sb);
20705+ pr_warning(AUFS_NAME ": root dentry\n");
20706+ au_dpri_dentry(sb->s_root);
20707+ pr_warning(AUFS_NAME ": root inode\n");
20708+ au_dpri_inode(sb->s_root->d_inode);
20709+#if 0
20710+ struct inode *i;
20711+ pr_warning(AUFS_NAME ": isolated inode\n");
20712+ list_for_each_entry(i, &sb->s_inodes, i_sb_list)
20713+ if (list_empty(&i->i_dentry))
20714+ au_dpri_inode(i);
20715+#endif
20716+ pr_warning(AUFS_NAME ": files\n");
20717+ list_for_each_entry(file, &sb->s_files, f_u.fu_list)
20718+ if (!special_file(file->f_dentry->d_inode->i_mode))
20719+ au_dpri_file(file);
20720+
20721+ au_plevel = plevel;
20722+ au_debug(0);
20723+}
20724+
20725+/* ---------------------------------------------------------------------- */
20726+
20727+/* module parameter */
20728+static char *aufs_sysrq_key = "a";
20729+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
20730+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
20731+
20732+static void au_sysrq(int key __maybe_unused,
20733+ struct tty_struct *tty __maybe_unused)
20734+{
20735+ struct kobject *kobj;
20736+ struct au_sbinfo *sbinfo;
20737+
20738+ /* spin_lock(&sysaufs_ket->list_lock); */
20739+ list_for_each_entry(kobj, &sysaufs_ket->list, entry) {
20740+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
20741+ sysrq_sb(sbinfo->si_sb);
20742+ }
20743+ /* spin_unlock(&sysaufs_ket->list_lock); */
20744+}
20745+
20746+static struct sysrq_key_op au_sysrq_op = {
20747+ .handler = au_sysrq,
20748+ .help_msg = "Aufs",
20749+ .action_msg = "Aufs",
20750+ .enable_mask = SYSRQ_ENABLE_DUMP
20751+};
20752+
20753+/* ---------------------------------------------------------------------- */
20754+
20755+int __init au_sysrq_init(void)
20756+{
20757+ int err;
20758+ char key;
20759+
20760+ err = -1;
20761+ key = *aufs_sysrq_key;
20762+ if ('a' <= key && key <= 'z')
20763+ err = register_sysrq_key(key, &au_sysrq_op);
20764+ if (unlikely(err))
20765+ AuErr("err %d, sysrq=%c\n", err, key);
20766+ return err;
20767+}
20768+
20769+void au_sysrq_fin(void)
20770+{
20771+ int err;
20772+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
20773+ if (unlikely(err))
20774+ AuErr("err %d (ignored)\n", err);
20775+}
1308ab2a 20776diff -uprN -x .git linux-2.6.31/fs/aufs/vdir.c aufs2-2.6.git/fs/aufs/vdir.c
20777--- linux-2.6.31/fs/aufs/vdir.c 1970-01-01 00:00:00.000000000 +0000
20778+++ aufs2-2.6.git/fs/aufs/vdir.c 2009-09-21 21:49:23.411607814 +0000
20779@@ -0,0 +1,879 @@
1facf9fc 20780+/*
20781+ * Copyright (C) 2005-2009 Junjiro R. Okajima
20782+ *
20783+ * This program, aufs is free software; you can redistribute it and/or modify
20784+ * it under the terms of the GNU General Public License as published by
20785+ * the Free Software Foundation; either version 2 of the License, or
20786+ * (at your option) any later version.
dece6358
AM
20787+ *
20788+ * This program is distributed in the hope that it will be useful,
20789+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20790+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20791+ * GNU General Public License for more details.
20792+ *
20793+ * You should have received a copy of the GNU General Public License
20794+ * along with this program; if not, write to the Free Software
20795+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 20796+ */
20797+
20798+/*
20799+ * virtual or vertical directory
20800+ */
20801+
dece6358 20802+#include <linux/hash.h>
1facf9fc 20803+#include "aufs.h"
20804+
dece6358 20805+static unsigned int calc_size(int nlen)
1facf9fc 20806+{
1facf9fc 20807+ BUILD_BUG_ON(sizeof(ino_t) != sizeof(long));
dece6358 20808+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 20809+}
20810+
20811+static int set_deblk_end(union au_vdir_deblk_p *p,
20812+ union au_vdir_deblk_p *deblk_end)
20813+{
20814+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
20815+ p->de->de_str.len = 0;
20816+ /* smp_mb(); */
20817+ return 0;
20818+ }
20819+ return -1; /* error */
20820+}
20821+
20822+/* returns true or false */
20823+static int is_deblk_end(union au_vdir_deblk_p *p,
20824+ union au_vdir_deblk_p *deblk_end)
20825+{
20826+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
20827+ return !p->de->de_str.len;
20828+ return 1;
20829+}
20830+
20831+static unsigned char *last_deblk(struct au_vdir *vdir)
20832+{
20833+ return vdir->vd_deblk[vdir->vd_nblk - 1];
20834+}
20835+
20836+/* ---------------------------------------------------------------------- */
20837+
1308ab2a 20838+/* estimate the apropriate size for name hash table */
20839+unsigned int au_rdhash_est(loff_t sz)
20840+{
20841+ unsigned int n;
20842+
20843+ n = UINT_MAX;
20844+ sz >>= 10;
20845+ if (sz < n)
20846+ n = sz;
20847+ if (sz < AUFS_RDHASH_DEF)
20848+ n = AUFS_RDHASH_DEF;
20849+ /* AuInfo("n %u\n", n); */
20850+ return n;
20851+}
20852+
1facf9fc 20853+/*
20854+ * the allocated memory has to be freed by
dece6358 20855+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 20856+ */
dece6358 20857+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 20858+{
1facf9fc 20859+ struct hlist_head *head;
dece6358 20860+ unsigned int u;
1facf9fc 20861+
dece6358
AM
20862+ head = kmalloc(sizeof(*nhash->nh_head) * num_hash, gfp);
20863+ if (head) {
20864+ nhash->nh_num = num_hash;
20865+ nhash->nh_head = head;
20866+ for (u = 0; u < num_hash; u++)
1facf9fc 20867+ INIT_HLIST_HEAD(head++);
dece6358 20868+ return 0; /* success */
1facf9fc 20869+ }
1facf9fc 20870+
dece6358 20871+ return -ENOMEM;
1facf9fc 20872+}
20873+
dece6358
AM
20874+static void nhash_count(struct hlist_head *head)
20875+{
20876+#if 0
20877+ unsigned long n;
20878+ struct hlist_node *pos;
20879+
20880+ n = 0;
20881+ hlist_for_each(pos, head)
20882+ n++;
20883+ AuInfo("%lu\n", n);
20884+#endif
20885+}
20886+
20887+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 20888+{
1facf9fc 20889+ struct au_vdir_wh *tpos;
20890+ struct hlist_node *pos, *node;
20891+
dece6358
AM
20892+ hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) {
20893+ /* hlist_del(pos); */
20894+ kfree(tpos);
1facf9fc 20895+ }
20896+}
20897+
dece6358 20898+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 20899+{
dece6358
AM
20900+ struct au_vdir_dehstr *tpos;
20901+ struct hlist_node *pos, *node;
1facf9fc 20902+
dece6358
AM
20903+ hlist_for_each_entry_safe(tpos, pos, node, head, hash) {
20904+ /* hlist_del(pos); */
20905+ au_cache_free_dehstr(tpos);
1facf9fc 20906+ }
1facf9fc 20907+}
20908+
dece6358
AM
20909+static void au_nhash_do_free(struct au_nhash *nhash,
20910+ void (*free)(struct hlist_head *head))
1facf9fc 20911+{
1308ab2a 20912+ unsigned int n;
1facf9fc 20913+ struct hlist_head *head;
1facf9fc 20914+
dece6358 20915+ n = nhash->nh_num;
1308ab2a 20916+ if (!n)
20917+ return;
20918+
dece6358 20919+ head = nhash->nh_head;
1308ab2a 20920+ while (n-- > 0) {
dece6358
AM
20921+ nhash_count(head);
20922+ free(head++);
1facf9fc 20923+ }
dece6358 20924+ kfree(nhash->nh_head);
1facf9fc 20925+}
20926+
dece6358 20927+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 20928+{
dece6358
AM
20929+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
20930+}
1facf9fc 20931+
dece6358
AM
20932+static void au_nhash_de_free(struct au_nhash *delist)
20933+{
20934+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 20935+}
20936+
20937+/* ---------------------------------------------------------------------- */
20938+
20939+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
20940+ int limit)
20941+{
20942+ int num;
20943+ unsigned int u, n;
20944+ struct hlist_head *head;
20945+ struct au_vdir_wh *tpos;
20946+ struct hlist_node *pos;
20947+
20948+ num = 0;
20949+ n = whlist->nh_num;
20950+ head = whlist->nh_head;
1308ab2a 20951+ for (u = 0; u < n; u++, head++)
1facf9fc 20952+ hlist_for_each_entry(tpos, pos, head, wh_hash)
20953+ if (tpos->wh_bindex == btgt && ++num > limit)
20954+ return 1;
1facf9fc 20955+ return 0;
20956+}
20957+
20958+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 20959+ unsigned char *name,
1facf9fc 20960+ unsigned int len)
20961+{
dece6358
AM
20962+ unsigned int v;
20963+ /* const unsigned int magic_bit = 12; */
20964+
1308ab2a 20965+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
20966+
dece6358
AM
20967+ v = 0;
20968+ while (len--)
20969+ v += *name++;
20970+ /* v = hash_long(v, magic_bit); */
20971+ v %= nhash->nh_num;
20972+ return nhash->nh_head + v;
20973+}
20974+
20975+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
20976+ int nlen)
20977+{
20978+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 20979+}
20980+
20981+/* returns found or not */
dece6358 20982+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 20983+{
20984+ struct hlist_head *head;
20985+ struct au_vdir_wh *tpos;
20986+ struct hlist_node *pos;
20987+ struct au_vdir_destr *str;
20988+
dece6358 20989+ head = au_name_hash(whlist, name, nlen);
1facf9fc 20990+ hlist_for_each_entry(tpos, pos, head, wh_hash) {
20991+ str = &tpos->wh_str;
20992+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
20993+ if (au_nhash_test_name(str, name, nlen))
20994+ return 1;
20995+ }
20996+ return 0;
20997+}
20998+
20999+/* returns found(true) or not */
21000+static int test_known(struct au_nhash *delist, char *name, int nlen)
21001+{
21002+ struct hlist_head *head;
21003+ struct au_vdir_dehstr *tpos;
21004+ struct hlist_node *pos;
21005+ struct au_vdir_destr *str;
21006+
21007+ head = au_name_hash(delist, name, nlen);
21008+ hlist_for_each_entry(tpos, pos, head, hash) {
21009+ str = tpos->str;
21010+ AuDbg("%.*s\n", str->len, str->name);
21011+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 21012+ return 1;
21013+ }
21014+ return 0;
21015+}
21016+
dece6358
AM
21017+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
21018+ unsigned char d_type)
21019+{
21020+#ifdef CONFIG_AUFS_SHWH
21021+ wh->wh_ino = ino;
21022+ wh->wh_type = d_type;
21023+#endif
21024+}
21025+
21026+/* ---------------------------------------------------------------------- */
21027+
21028+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
21029+ unsigned int d_type, aufs_bindex_t bindex,
21030+ unsigned char shwh)
1facf9fc 21031+{
21032+ int err;
21033+ struct au_vdir_destr *str;
21034+ struct au_vdir_wh *wh;
21035+
dece6358 21036+ AuDbg("%.*s\n", nlen, name);
1308ab2a 21037+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
21038+
1facf9fc 21039+ err = -ENOMEM;
dece6358 21040+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 21041+ if (unlikely(!wh))
21042+ goto out;
21043+
21044+ err = 0;
21045+ wh->wh_bindex = bindex;
dece6358
AM
21046+ if (shwh)
21047+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 21048+ str = &wh->wh_str;
dece6358
AM
21049+ str->len = nlen;
21050+ memcpy(str->name, name, nlen);
21051+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 21052+ /* smp_mb(); */
21053+
21054+ out:
21055+ return err;
21056+}
21057+
1facf9fc 21058+static int append_deblk(struct au_vdir *vdir)
21059+{
21060+ int err;
dece6358 21061+ unsigned long ul;
1facf9fc 21062+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
21063+ union au_vdir_deblk_p p, deblk_end;
21064+ unsigned char **o;
21065+
21066+ err = -ENOMEM;
dece6358
AM
21067+ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
21068+ GFP_NOFS);
1facf9fc 21069+ if (unlikely(!o))
21070+ goto out;
21071+
21072+ vdir->vd_deblk = o;
21073+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
21074+ if (p.deblk) {
21075+ ul = vdir->vd_nblk++;
21076+ vdir->vd_deblk[ul] = p.deblk;
21077+ vdir->vd_last.ul = ul;
21078+ vdir->vd_last.p.deblk = p.deblk;
21079+ deblk_end.deblk = p.deblk + deblk_sz;
21080+ err = set_deblk_end(&p, &deblk_end);
21081+ }
21082+
21083+ out:
21084+ return err;
21085+}
21086+
dece6358
AM
21087+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
21088+ unsigned int d_type, struct au_nhash *delist)
21089+{
21090+ int err;
21091+ unsigned int sz;
21092+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
21093+ union au_vdir_deblk_p p, *room, deblk_end;
21094+ struct au_vdir_dehstr *dehstr;
21095+
21096+ p.deblk = last_deblk(vdir);
21097+ deblk_end.deblk = p.deblk + deblk_sz;
21098+ room = &vdir->vd_last.p;
21099+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
21100+ || !is_deblk_end(room, &deblk_end));
21101+
21102+ sz = calc_size(nlen);
21103+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
21104+ err = append_deblk(vdir);
21105+ if (unlikely(err))
21106+ goto out;
21107+
21108+ p.deblk = last_deblk(vdir);
21109+ deblk_end.deblk = p.deblk + deblk_sz;
21110+ /* smp_mb(); */
21111+ AuDebugOn(room->deblk != p.deblk);
21112+ }
21113+
21114+ err = -ENOMEM;
21115+ dehstr = au_cache_alloc_dehstr();
21116+ if (unlikely(!dehstr))
21117+ goto out;
21118+
21119+ dehstr->str = &room->de->de_str;
21120+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
21121+ room->de->de_ino = ino;
21122+ room->de->de_type = d_type;
21123+ room->de->de_str.len = nlen;
21124+ memcpy(room->de->de_str.name, name, nlen);
21125+
21126+ err = 0;
21127+ room->deblk += sz;
21128+ if (unlikely(set_deblk_end(room, &deblk_end)))
21129+ err = append_deblk(vdir);
21130+ /* smp_mb(); */
21131+
21132+ out:
21133+ return err;
21134+}
21135+
21136+/* ---------------------------------------------------------------------- */
21137+
21138+void au_vdir_free(struct au_vdir *vdir)
21139+{
21140+ unsigned char **deblk;
21141+
21142+ deblk = vdir->vd_deblk;
21143+ while (vdir->vd_nblk--)
21144+ kfree(*deblk++);
21145+ kfree(vdir->vd_deblk);
21146+ au_cache_free_vdir(vdir);
21147+}
21148+
1308ab2a 21149+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 21150+{
21151+ struct au_vdir *vdir;
1308ab2a 21152+ struct super_block *sb;
1facf9fc 21153+ int err;
21154+
1308ab2a 21155+ sb = file->f_dentry->d_sb;
dece6358
AM
21156+ SiMustAnyLock(sb);
21157+
1facf9fc 21158+ err = -ENOMEM;
21159+ vdir = au_cache_alloc_vdir();
21160+ if (unlikely(!vdir))
21161+ goto out;
21162+
21163+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
21164+ if (unlikely(!vdir->vd_deblk))
21165+ goto out_free;
21166+
21167+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 21168+ if (!vdir->vd_deblk_sz) {
21169+ /* estimate the apropriate size for deblk */
21170+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
21171+ /* AuInfo("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
21172+ }
1facf9fc 21173+ vdir->vd_nblk = 0;
21174+ vdir->vd_version = 0;
21175+ vdir->vd_jiffy = 0;
21176+ err = append_deblk(vdir);
21177+ if (!err)
21178+ return vdir; /* success */
21179+
21180+ kfree(vdir->vd_deblk);
21181+
21182+ out_free:
21183+ au_cache_free_vdir(vdir);
21184+ out:
21185+ vdir = ERR_PTR(err);
21186+ return vdir;
21187+}
21188+
21189+static int reinit_vdir(struct au_vdir *vdir)
21190+{
21191+ int err;
21192+ union au_vdir_deblk_p p, deblk_end;
21193+
21194+ while (vdir->vd_nblk > 1) {
21195+ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
21196+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
21197+ vdir->vd_nblk--;
21198+ }
21199+ p.deblk = vdir->vd_deblk[0];
21200+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
21201+ err = set_deblk_end(&p, &deblk_end);
21202+ /* keep vd_dblk_sz */
21203+ vdir->vd_last.ul = 0;
21204+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
21205+ vdir->vd_version = 0;
21206+ vdir->vd_jiffy = 0;
21207+ /* smp_mb(); */
21208+ return err;
21209+}
21210+
21211+/* ---------------------------------------------------------------------- */
21212+
1facf9fc 21213+#define AuFillVdir_CALLED 1
21214+#define AuFillVdir_WHABLE (1 << 1)
dece6358 21215+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 21216+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
21217+#define au_fset_fillvdir(flags, name) { (flags) |= AuFillVdir_##name; }
21218+#define au_fclr_fillvdir(flags, name) { (flags) &= ~AuFillVdir_##name; }
21219+
dece6358
AM
21220+#ifndef CONFIG_AUFS_SHWH
21221+#undef AuFillVdir_SHWH
21222+#define AuFillVdir_SHWH 0
21223+#endif
21224+
1facf9fc 21225+struct fillvdir_arg {
21226+ struct file *file;
21227+ struct au_vdir *vdir;
dece6358
AM
21228+ struct au_nhash delist;
21229+ struct au_nhash whlist;
1facf9fc 21230+ aufs_bindex_t bindex;
21231+ unsigned int flags;
21232+ int err;
21233+};
21234+
dece6358 21235+static int fillvdir(void *__arg, const char *__name, int nlen,
1facf9fc 21236+ loff_t offset __maybe_unused, u64 h_ino,
21237+ unsigned int d_type)
21238+{
21239+ struct fillvdir_arg *arg = __arg;
21240+ char *name = (void *)__name;
21241+ struct super_block *sb;
1facf9fc 21242+ ino_t ino;
dece6358 21243+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 21244+
1facf9fc 21245+ arg->err = 0;
dece6358 21246+ sb = arg->file->f_dentry->d_sb;
1facf9fc 21247+ au_fset_fillvdir(arg->flags, CALLED);
21248+ /* smp_mb(); */
dece6358 21249+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 21250+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
21251+ if (test_known(&arg->delist, name, nlen)
21252+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
21253+ goto out; /* already exists or whiteouted */
1facf9fc 21254+
21255+ sb = arg->file->f_dentry->d_sb;
dece6358 21256+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
1facf9fc 21257+ if (!arg->err)
dece6358
AM
21258+ arg->err = append_de(arg->vdir, name, nlen, ino,
21259+ d_type, &arg->delist);
1facf9fc 21260+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
21261+ name += AUFS_WH_PFX_LEN;
dece6358
AM
21262+ nlen -= AUFS_WH_PFX_LEN;
21263+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
21264+ goto out; /* already whiteouted */
1facf9fc 21265+
dece6358
AM
21266+ if (shwh)
21267+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
21268+ &ino);
1facf9fc 21269+ if (!arg->err)
21270+ arg->err = au_nhash_append_wh
dece6358
AM
21271+ (&arg->whlist, name, nlen, ino, d_type,
21272+ arg->bindex, shwh);
1facf9fc 21273+ }
21274+
21275+ out:
21276+ if (!arg->err)
21277+ arg->vdir->vd_jiffy = jiffies;
21278+ /* smp_mb(); */
21279+ AuTraceErr(arg->err);
21280+ return arg->err;
21281+}
21282+
dece6358
AM
21283+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
21284+ struct au_nhash *whlist, struct au_nhash *delist)
21285+{
21286+#ifdef CONFIG_AUFS_SHWH
21287+ int err;
21288+ unsigned int nh, u;
21289+ struct hlist_head *head;
21290+ struct au_vdir_wh *tpos;
21291+ struct hlist_node *pos, *n;
21292+ char *p, *o;
21293+ struct au_vdir_destr *destr;
21294+
21295+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
21296+
21297+ err = -ENOMEM;
21298+ o = p = __getname();
21299+ if (unlikely(!p))
21300+ goto out;
21301+
21302+ err = 0;
21303+ nh = whlist->nh_num;
21304+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
21305+ p += AUFS_WH_PFX_LEN;
21306+ for (u = 0; u < nh; u++) {
21307+ head = whlist->nh_head + u;
21308+ hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) {
21309+ destr = &tpos->wh_str;
21310+ memcpy(p, destr->name, destr->len);
21311+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
21312+ tpos->wh_ino, tpos->wh_type, delist);
21313+ if (unlikely(err))
21314+ break;
21315+ }
21316+ }
21317+
21318+ __putname(o);
21319+
21320+ out:
21321+ AuTraceErr(err);
21322+ return err;
21323+#else
21324+ return 0;
21325+#endif
21326+}
21327+
1facf9fc 21328+static int au_do_read_vdir(struct fillvdir_arg *arg)
21329+{
21330+ int err;
dece6358 21331+ unsigned int rdhash;
1facf9fc 21332+ loff_t offset;
dece6358
AM
21333+ aufs_bindex_t bend, bindex, bstart;
21334+ unsigned char shwh;
1facf9fc 21335+ struct file *hf, *file;
21336+ struct super_block *sb;
21337+
1facf9fc 21338+ file = arg->file;
21339+ sb = file->f_dentry->d_sb;
dece6358
AM
21340+ SiMustAnyLock(sb);
21341+
21342+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 21343+ if (!rdhash)
21344+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
21345+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
21346+ if (unlikely(err))
1facf9fc 21347+ goto out;
dece6358
AM
21348+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
21349+ if (unlikely(err))
1facf9fc 21350+ goto out_delist;
21351+
21352+ err = 0;
21353+ arg->flags = 0;
dece6358
AM
21354+ shwh = 0;
21355+ if (au_opt_test(au_mntflags(sb), SHWH)) {
21356+ shwh = 1;
21357+ au_fset_fillvdir(arg->flags, SHWH);
21358+ }
21359+ bstart = au_fbstart(file);
21360+ bend = au_fbend(file);
21361+ for (bindex = bstart; !err && bindex <= bend; bindex++) {
1facf9fc 21362+ hf = au_h_fptr(file, bindex);
21363+ if (!hf)
21364+ continue;
21365+
21366+ offset = vfsub_llseek(hf, 0, SEEK_SET);
21367+ err = offset;
21368+ if (unlikely(offset))
21369+ break;
21370+
21371+ arg->bindex = bindex;
21372+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358
AM
21373+ if (shwh
21374+ || (bindex != bend
21375+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 21376+ au_fset_fillvdir(arg->flags, WHABLE);
21377+ do {
21378+ arg->err = 0;
21379+ au_fclr_fillvdir(arg->flags, CALLED);
21380+ /* smp_mb(); */
21381+ err = vfsub_readdir(hf, fillvdir, arg);
21382+ if (err >= 0)
21383+ err = arg->err;
21384+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
21385+ }
dece6358
AM
21386+
21387+ if (!err && shwh)
21388+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
21389+
21390+ au_nhash_wh_free(&arg->whlist);
1facf9fc 21391+
21392+ out_delist:
dece6358 21393+ au_nhash_de_free(&arg->delist);
1facf9fc 21394+ out:
21395+ return err;
21396+}
21397+
21398+static int read_vdir(struct file *file, int may_read)
21399+{
21400+ int err;
21401+ unsigned long expire;
21402+ unsigned char do_read;
21403+ struct fillvdir_arg arg;
21404+ struct inode *inode;
21405+ struct au_vdir *vdir, *allocated;
21406+
21407+ err = 0;
21408+ inode = file->f_dentry->d_inode;
21409+ IMustLock(inode);
dece6358
AM
21410+ SiMustAnyLock(inode->i_sb);
21411+
1facf9fc 21412+ allocated = NULL;
21413+ do_read = 0;
21414+ expire = au_sbi(inode->i_sb)->si_rdcache;
21415+ vdir = au_ivdir(inode);
21416+ if (!vdir) {
21417+ do_read = 1;
1308ab2a 21418+ vdir = alloc_vdir(file);
1facf9fc 21419+ err = PTR_ERR(vdir);
21420+ if (IS_ERR(vdir))
21421+ goto out;
21422+ err = 0;
21423+ allocated = vdir;
21424+ } else if (may_read
21425+ && (inode->i_version != vdir->vd_version
21426+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
21427+ do_read = 1;
21428+ err = reinit_vdir(vdir);
21429+ if (unlikely(err))
21430+ goto out;
21431+ }
21432+
21433+ if (!do_read)
21434+ return 0; /* success */
21435+
21436+ arg.file = file;
21437+ arg.vdir = vdir;
21438+ err = au_do_read_vdir(&arg);
21439+ if (!err) {
21440+ /* file->f_pos = 0; */
21441+ vdir->vd_version = inode->i_version;
21442+ vdir->vd_last.ul = 0;
21443+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
21444+ if (allocated)
21445+ au_set_ivdir(inode, allocated);
21446+ } else if (allocated)
21447+ au_vdir_free(allocated);
21448+
21449+ out:
21450+ return err;
21451+}
21452+
21453+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
21454+{
21455+ int err, rerr;
21456+ unsigned long ul, n;
21457+ const unsigned int deblk_sz = src->vd_deblk_sz;
21458+
21459+ AuDebugOn(tgt->vd_nblk != 1);
21460+
21461+ err = -ENOMEM;
21462+ if (tgt->vd_nblk < src->vd_nblk) {
21463+ unsigned char **p;
21464+
dece6358
AM
21465+ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
21466+ GFP_NOFS);
1facf9fc 21467+ if (unlikely(!p))
21468+ goto out;
21469+ tgt->vd_deblk = p;
21470+ }
21471+
1308ab2a 21472+ if (tgt->vd_deblk_sz != deblk_sz) {
21473+ unsigned char *p;
21474+
21475+ tgt->vd_deblk_sz = deblk_sz;
21476+ p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
21477+ if (unlikely(!p))
21478+ goto out;
21479+ tgt->vd_deblk[0] = p;
21480+ }
1facf9fc 21481+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 21482+ tgt->vd_version = src->vd_version;
21483+ tgt->vd_jiffy = src->vd_jiffy;
21484+
21485+ n = src->vd_nblk;
21486+ for (ul = 1; ul < n; ul++) {
dece6358
AM
21487+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
21488+ GFP_NOFS);
21489+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 21490+ goto out;
1308ab2a 21491+ tgt->vd_nblk++;
1facf9fc 21492+ }
1308ab2a 21493+ tgt->vd_nblk = n;
21494+ tgt->vd_last.ul = tgt->vd_last.ul;
21495+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
21496+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
21497+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 21498+ /* smp_mb(); */
21499+ return 0; /* success */
21500+
21501+ out:
21502+ rerr = reinit_vdir(tgt);
21503+ BUG_ON(rerr);
21504+ return err;
21505+}
21506+
21507+int au_vdir_init(struct file *file)
21508+{
21509+ int err;
21510+ struct inode *inode;
21511+ struct au_vdir *vdir_cache, *allocated;
21512+
21513+ err = read_vdir(file, !file->f_pos);
21514+ if (unlikely(err))
21515+ goto out;
21516+
21517+ allocated = NULL;
21518+ vdir_cache = au_fvdir_cache(file);
21519+ if (!vdir_cache) {
1308ab2a 21520+ vdir_cache = alloc_vdir(file);
1facf9fc 21521+ err = PTR_ERR(vdir_cache);
21522+ if (IS_ERR(vdir_cache))
21523+ goto out;
21524+ allocated = vdir_cache;
21525+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
21526+ err = reinit_vdir(vdir_cache);
21527+ if (unlikely(err))
21528+ goto out;
21529+ } else
21530+ return 0; /* success */
21531+
21532+ inode = file->f_dentry->d_inode;
21533+ err = copy_vdir(vdir_cache, au_ivdir(inode));
21534+ if (!err) {
21535+ file->f_version = inode->i_version;
21536+ if (allocated)
21537+ au_set_fvdir_cache(file, allocated);
21538+ } else if (allocated)
21539+ au_vdir_free(allocated);
21540+
21541+ out:
21542+ return err;
21543+}
21544+
21545+static loff_t calc_offset(struct au_vdir *vdir)
21546+{
21547+ loff_t offset;
21548+ union au_vdir_deblk_p p;
21549+
21550+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
21551+ offset = vdir->vd_last.p.deblk - p.deblk;
21552+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
21553+ return offset;
21554+}
21555+
21556+/* returns true or false */
21557+static int seek_vdir(struct file *file)
21558+{
21559+ int valid;
21560+ unsigned int deblk_sz;
21561+ unsigned long ul, n;
21562+ loff_t offset;
21563+ union au_vdir_deblk_p p, deblk_end;
21564+ struct au_vdir *vdir_cache;
21565+
21566+ valid = 1;
21567+ vdir_cache = au_fvdir_cache(file);
21568+ offset = calc_offset(vdir_cache);
21569+ AuDbg("offset %lld\n", offset);
21570+ if (file->f_pos == offset)
21571+ goto out;
21572+
21573+ vdir_cache->vd_last.ul = 0;
21574+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
21575+ if (!file->f_pos)
21576+ goto out;
21577+
21578+ valid = 0;
21579+ deblk_sz = vdir_cache->vd_deblk_sz;
21580+ ul = div64_u64(file->f_pos, deblk_sz);
21581+ AuDbg("ul %lu\n", ul);
21582+ if (ul >= vdir_cache->vd_nblk)
21583+ goto out;
21584+
21585+ n = vdir_cache->vd_nblk;
21586+ for (; ul < n; ul++) {
21587+ p.deblk = vdir_cache->vd_deblk[ul];
21588+ deblk_end.deblk = p.deblk + deblk_sz;
21589+ offset = ul;
21590+ offset *= deblk_sz;
21591+ while (!is_deblk_end(&p, &deblk_end) && offset < file->f_pos) {
21592+ unsigned int l;
21593+
21594+ l = calc_size(p.de->de_str.len);
21595+ offset += l;
21596+ p.deblk += l;
21597+ }
21598+ if (!is_deblk_end(&p, &deblk_end)) {
21599+ valid = 1;
21600+ vdir_cache->vd_last.ul = ul;
21601+ vdir_cache->vd_last.p = p;
21602+ break;
21603+ }
21604+ }
21605+
21606+ out:
21607+ /* smp_mb(); */
21608+ AuTraceErr(!valid);
21609+ return valid;
21610+}
21611+
21612+int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir)
21613+{
21614+ int err;
21615+ unsigned int l, deblk_sz;
21616+ union au_vdir_deblk_p deblk_end;
21617+ struct au_vdir *vdir_cache;
21618+ struct au_vdir_de *de;
21619+
21620+ vdir_cache = au_fvdir_cache(file);
21621+ if (!seek_vdir(file))
21622+ return 0;
21623+
21624+ deblk_sz = vdir_cache->vd_deblk_sz;
21625+ while (1) {
21626+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
21627+ deblk_end.deblk += deblk_sz;
21628+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
21629+ de = vdir_cache->vd_last.p.de;
21630+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
21631+ de->de_str.len, de->de_str.name, file->f_pos,
21632+ (unsigned long)de->de_ino, de->de_type);
21633+ err = filldir(dirent, de->de_str.name, de->de_str.len,
21634+ file->f_pos, de->de_ino, de->de_type);
21635+ if (unlikely(err)) {
21636+ AuTraceErr(err);
21637+ /* todo: ignore the error caused by udba? */
21638+ /* return err; */
21639+ return 0;
21640+ }
21641+
21642+ l = calc_size(de->de_str.len);
21643+ vdir_cache->vd_last.p.deblk += l;
21644+ file->f_pos += l;
21645+ }
21646+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
21647+ vdir_cache->vd_last.ul++;
21648+ vdir_cache->vd_last.p.deblk
21649+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
21650+ file->f_pos = deblk_sz * vdir_cache->vd_last.ul;
21651+ continue;
21652+ }
21653+ break;
21654+ }
21655+
21656+ /* smp_mb(); */
21657+ return 0;
21658+}
1308ab2a 21659diff -uprN -x .git linux-2.6.31/fs/aufs/vfsub.c aufs2-2.6.git/fs/aufs/vfsub.c
21660--- linux-2.6.31/fs/aufs/vfsub.c 1970-01-01 00:00:00.000000000 +0000
21661+++ aufs2-2.6.git/fs/aufs/vfsub.c 2009-09-21 21:49:23.411607814 +0000
21662@@ -0,0 +1,751 @@
1facf9fc 21663+/*
21664+ * Copyright (C) 2005-2009 Junjiro R. Okajima
21665+ *
21666+ * This program, aufs is free software; you can redistribute it and/or modify
21667+ * it under the terms of the GNU General Public License as published by
21668+ * the Free Software Foundation; either version 2 of the License, or
21669+ * (at your option) any later version.
dece6358
AM
21670+ *
21671+ * This program is distributed in the hope that it will be useful,
21672+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21673+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21674+ * GNU General Public License for more details.
21675+ *
21676+ * You should have received a copy of the GNU General Public License
21677+ * along with this program; if not, write to the Free Software
21678+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 21679+ */
21680+
21681+/*
21682+ * sub-routines for VFS
21683+ */
21684+
1308ab2a 21685+#include <linux/ima.h>
dece6358
AM
21686+#include <linux/namei.h>
21687+#include <linux/security.h>
21688+#include <linux/splice.h>
1facf9fc 21689+#include <linux/uaccess.h>
21690+#include "aufs.h"
21691+
21692+int vfsub_update_h_iattr(struct path *h_path, int *did)
21693+{
21694+ int err;
21695+ struct kstat st;
21696+ struct super_block *h_sb;
21697+
21698+ /* for remote fs, leave work for its getattr or d_revalidate */
21699+ /* for bad i_attr fs, handle them in aufs_getattr() */
21700+ /* still some fs may acquire i_mutex. we need to skip them */
21701+ err = 0;
21702+ if (!did)
21703+ did = &err;
21704+ h_sb = h_path->dentry->d_sb;
21705+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
21706+ if (*did)
21707+ err = vfs_getattr(h_path->mnt, h_path->dentry, &st);
21708+
21709+ return err;
21710+}
21711+
21712+/* ---------------------------------------------------------------------- */
21713+
1308ab2a 21714+struct file *vfsub_dentry_open(struct path *path, int flags,
21715+ const struct cred *cred)
21716+{
21717+ struct file *file;
21718+
21719+ file = dentry_open(path->dentry, path->mnt, flags, cred);
21720+ if (IS_ERR(file))
21721+ return file;
21722+ /* as NFSD does, just call ima_..._get() simply after dentry_open */
21723+ ima_counts_get(file);
21724+ return file;
21725+}
21726+
1facf9fc 21727+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
21728+{
21729+ struct file *file;
21730+
21731+ lockdep_off();
21732+ file = filp_open(path, oflags, mode);
21733+ lockdep_on();
21734+ if (IS_ERR(file))
21735+ goto out;
21736+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
21737+
21738+ out:
21739+ return file;
21740+}
21741+
21742+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
21743+{
21744+ int err;
21745+
21746+ /* lockdep_off(); */
21747+ err = kern_path(name, flags, path);
21748+ /* lockdep_on(); */
21749+ if (!err && path->dentry->d_inode)
21750+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
21751+ return err;
21752+}
21753+
21754+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
21755+ int len)
21756+{
21757+ struct path path = {
21758+ .mnt = NULL
21759+ };
21760+
1308ab2a 21761+ /* VFS checks it too, but by WARN_ON_ONCE() */
1facf9fc 21762+ IMustLock(parent->d_inode);
21763+
21764+ path.dentry = lookup_one_len(name, parent, len);
21765+ if (IS_ERR(path.dentry))
21766+ goto out;
21767+ if (path.dentry->d_inode)
21768+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
21769+
21770+ out:
21771+ return path.dentry;
21772+}
21773+
21774+struct dentry *vfsub_lookup_hash(struct nameidata *nd)
21775+{
21776+ struct path path = {
21777+ .mnt = nd->path.mnt
21778+ };
21779+
21780+ IMustLock(nd->path.dentry->d_inode);
21781+
21782+ path.dentry = lookup_hash(nd);
21783+ if (!IS_ERR(path.dentry) && path.dentry->d_inode)
21784+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
21785+
21786+ return path.dentry;
21787+}
21788+
21789+/* ---------------------------------------------------------------------- */
21790+
21791+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
21792+ struct dentry *d2, struct au_hinode *hdir2)
21793+{
21794+ struct dentry *d;
21795+
21796+ lockdep_off();
21797+ d = lock_rename(d1, d2);
21798+ lockdep_on();
21799+ au_hin_suspend(hdir1);
21800+ if (hdir1 != hdir2)
21801+ au_hin_suspend(hdir2);
21802+
21803+ return d;
21804+}
21805+
21806+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
21807+ struct dentry *d2, struct au_hinode *hdir2)
21808+{
21809+ au_hin_resume(hdir1);
21810+ if (hdir1 != hdir2)
21811+ au_hin_resume(hdir2);
21812+ lockdep_off();
21813+ unlock_rename(d1, d2);
21814+ lockdep_on();
21815+}
21816+
21817+/* ---------------------------------------------------------------------- */
21818+
21819+int vfsub_create(struct inode *dir, struct path *path, int mode)
21820+{
21821+ int err;
21822+ struct dentry *d;
21823+
21824+ IMustLock(dir);
21825+
21826+ d = path->dentry;
21827+ path->dentry = d->d_parent;
21828+ err = security_path_mknod(path, path->dentry, mode, 0);
21829+ path->dentry = d;
21830+ if (unlikely(err))
21831+ goto out;
21832+
21833+ if (au_test_fs_null_nd(dir->i_sb))
21834+ err = vfs_create(dir, path->dentry, mode, NULL);
21835+ else {
21836+ struct nameidata h_nd;
21837+
21838+ memset(&h_nd, 0, sizeof(h_nd));
21839+ h_nd.flags = LOOKUP_CREATE;
21840+ h_nd.intent.open.flags = O_CREAT
21841+ | vfsub_fmode_to_uint(FMODE_READ);
21842+ h_nd.intent.open.create_mode = mode;
21843+ h_nd.path.dentry = path->dentry->d_parent;
21844+ h_nd.path.mnt = path->mnt;
21845+ path_get(&h_nd.path);
21846+ err = vfs_create(dir, path->dentry, mode, &h_nd);
21847+ path_put(&h_nd.path);
21848+ }
21849+
21850+ if (!err) {
21851+ struct path tmp = *path;
21852+ int did;
21853+
21854+ vfsub_update_h_iattr(&tmp, &did);
21855+ if (did) {
21856+ tmp.dentry = path->dentry->d_parent;
21857+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
21858+ }
21859+ /*ignore*/
21860+ }
21861+
21862+ out:
21863+ return err;
21864+}
21865+
21866+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
21867+{
21868+ int err;
21869+ struct dentry *d;
21870+
21871+ IMustLock(dir);
21872+
21873+ d = path->dentry;
21874+ path->dentry = d->d_parent;
21875+ err = security_path_symlink(path, path->dentry, symname);
21876+ path->dentry = d;
21877+ if (unlikely(err))
21878+ goto out;
21879+
21880+ err = vfs_symlink(dir, path->dentry, symname);
21881+ if (!err) {
21882+ struct path tmp = *path;
21883+ int did;
21884+
21885+ vfsub_update_h_iattr(&tmp, &did);
21886+ if (did) {
21887+ tmp.dentry = path->dentry->d_parent;
21888+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
21889+ }
21890+ /*ignore*/
21891+ }
21892+
21893+ out:
21894+ return err;
21895+}
21896+
21897+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
21898+{
21899+ int err;
21900+ struct dentry *d;
21901+
21902+ IMustLock(dir);
21903+
21904+ d = path->dentry;
21905+ path->dentry = d->d_parent;
21906+ err = security_path_mknod(path, path->dentry, mode, dev);
21907+ path->dentry = d;
21908+ if (unlikely(err))
21909+ goto out;
21910+
21911+ err = vfs_mknod(dir, path->dentry, mode, dev);
21912+ if (!err) {
21913+ struct path tmp = *path;
21914+ int did;
21915+
21916+ vfsub_update_h_iattr(&tmp, &did);
21917+ if (did) {
21918+ tmp.dentry = path->dentry->d_parent;
21919+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
21920+ }
21921+ /*ignore*/
21922+ }
21923+
21924+ out:
21925+ return err;
21926+}
21927+
21928+static int au_test_nlink(struct inode *inode)
21929+{
21930+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
21931+
21932+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
21933+ || inode->i_nlink < link_max)
21934+ return 0;
21935+ return -EMLINK;
21936+}
21937+
21938+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path)
21939+{
21940+ int err;
21941+ struct dentry *d;
21942+
21943+ IMustLock(dir);
21944+
21945+ err = au_test_nlink(src_dentry->d_inode);
21946+ if (unlikely(err))
21947+ return err;
21948+
21949+ d = path->dentry;
21950+ path->dentry = d->d_parent;
21951+ err = security_path_link(src_dentry, path, path->dentry);
21952+ path->dentry = d;
21953+ if (unlikely(err))
21954+ goto out;
21955+
21956+ lockdep_off();
21957+ err = vfs_link(src_dentry, dir, path->dentry);
21958+ lockdep_on();
21959+ if (!err) {
21960+ struct path tmp = *path;
21961+ int did;
21962+
21963+ /* fuse has different memory inode for the same inumber */
21964+ vfsub_update_h_iattr(&tmp, &did);
21965+ if (did) {
21966+ tmp.dentry = path->dentry->d_parent;
21967+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
21968+ tmp.dentry = src_dentry;
21969+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
21970+ }
21971+ /*ignore*/
21972+ }
21973+
21974+ out:
21975+ return err;
21976+}
21977+
21978+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
21979+ struct inode *dir, struct path *path)
21980+{
21981+ int err;
21982+ struct path tmp = {
21983+ .mnt = path->mnt
21984+ };
21985+ struct dentry *d;
21986+
21987+ IMustLock(dir);
21988+ IMustLock(src_dir);
21989+
21990+ d = path->dentry;
21991+ path->dentry = d->d_parent;
21992+ tmp.dentry = src_dentry->d_parent;
21993+ err = security_path_rename(&tmp, src_dentry, path, path->dentry);
21994+ path->dentry = d;
21995+ if (unlikely(err))
21996+ goto out;
21997+
21998+ lockdep_off();
21999+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry);
22000+ lockdep_on();
22001+ if (!err) {
22002+ int did;
22003+
22004+ tmp.dentry = d->d_parent;
22005+ vfsub_update_h_iattr(&tmp, &did);
22006+ if (did) {
22007+ tmp.dentry = src_dentry;
22008+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
22009+ tmp.dentry = src_dentry->d_parent;
22010+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
22011+ }
22012+ /*ignore*/
22013+ }
22014+
22015+ out:
22016+ return err;
22017+}
22018+
22019+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
22020+{
22021+ int err;
22022+ struct dentry *d;
22023+
22024+ IMustLock(dir);
22025+
22026+ d = path->dentry;
22027+ path->dentry = d->d_parent;
22028+ err = security_path_mkdir(path, path->dentry, mode);
22029+ path->dentry = d;
22030+ if (unlikely(err))
22031+ goto out;
22032+
22033+ err = vfs_mkdir(dir, path->dentry, mode);
22034+ if (!err) {
22035+ struct path tmp = *path;
22036+ int did;
22037+
22038+ vfsub_update_h_iattr(&tmp, &did);
22039+ if (did) {
22040+ tmp.dentry = path->dentry->d_parent;
22041+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
22042+ }
22043+ /*ignore*/
22044+ }
22045+
22046+ out:
22047+ return err;
22048+}
22049+
22050+int vfsub_rmdir(struct inode *dir, struct path *path)
22051+{
22052+ int err;
22053+ struct dentry *d;
22054+
22055+ IMustLock(dir);
22056+
22057+ d = path->dentry;
22058+ path->dentry = d->d_parent;
22059+ err = security_path_rmdir(path, path->dentry);
22060+ path->dentry = d;
22061+ if (unlikely(err))
22062+ goto out;
22063+
22064+ lockdep_off();
22065+ err = vfs_rmdir(dir, path->dentry);
22066+ lockdep_on();
22067+ if (!err) {
22068+ struct path tmp = {
22069+ .dentry = path->dentry->d_parent,
22070+ .mnt = path->mnt
22071+ };
22072+
22073+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
22074+ }
22075+
22076+ out:
22077+ return err;
22078+}
22079+
22080+/* ---------------------------------------------------------------------- */
22081+
22082+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
22083+ loff_t *ppos)
22084+{
22085+ ssize_t err;
22086+
22087+ err = vfs_read(file, ubuf, count, ppos);
22088+ if (err >= 0)
22089+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
22090+ return err;
22091+}
22092+
22093+/* todo: kernel_read()? */
22094+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
22095+ loff_t *ppos)
22096+{
22097+ ssize_t err;
22098+ mm_segment_t oldfs;
22099+
22100+ oldfs = get_fs();
22101+ set_fs(KERNEL_DS);
22102+ err = vfsub_read_u(file, (char __user *)kbuf, count, ppos);
22103+ set_fs(oldfs);
22104+ return err;
22105+}
22106+
22107+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
22108+ loff_t *ppos)
22109+{
22110+ ssize_t err;
22111+
22112+ lockdep_off();
22113+ err = vfs_write(file, ubuf, count, ppos);
22114+ lockdep_on();
22115+ if (err >= 0)
22116+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
22117+ return err;
22118+}
22119+
22120+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
22121+{
22122+ ssize_t err;
22123+ mm_segment_t oldfs;
22124+
22125+ oldfs = get_fs();
22126+ set_fs(KERNEL_DS);
22127+ err = vfsub_write_u(file, (const char __user *)kbuf, count, ppos);
22128+ set_fs(oldfs);
22129+ return err;
22130+}
22131+
22132+int vfsub_readdir(struct file *file, filldir_t filldir, void *arg)
22133+{
22134+ int err;
22135+
22136+ lockdep_off();
22137+ err = vfs_readdir(file, filldir, arg);
22138+ lockdep_on();
22139+ if (err >= 0)
22140+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
22141+ return err;
22142+}
22143+
22144+long vfsub_splice_to(struct file *in, loff_t *ppos,
22145+ struct pipe_inode_info *pipe, size_t len,
22146+ unsigned int flags)
22147+{
22148+ long err;
22149+
22150+ lockdep_off();
0fc653ad 22151+ err = do_splice_to(in, ppos, pipe, len, flags);
1facf9fc 22152+ lockdep_on();
22153+ if (err >= 0)
22154+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
22155+ return err;
22156+}
22157+
22158+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
22159+ loff_t *ppos, size_t len, unsigned int flags)
22160+{
22161+ long err;
22162+
22163+ lockdep_off();
0fc653ad 22164+ err = do_splice_from(pipe, out, ppos, len, flags);
1facf9fc 22165+ lockdep_on();
22166+ if (err >= 0)
22167+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
22168+ return err;
22169+}
22170+
22171+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
22172+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
22173+ struct file *h_file)
22174+{
22175+ int err;
22176+ struct inode *h_inode;
22177+
22178+ h_inode = h_path->dentry->d_inode;
22179+ if (!h_file) {
22180+ err = mnt_want_write(h_path->mnt);
22181+ if (err)
22182+ goto out;
22183+ err = inode_permission(h_inode, MAY_WRITE);
22184+ if (err)
22185+ goto out_mnt;
22186+ err = get_write_access(h_inode);
22187+ if (err)
22188+ goto out_mnt;
22189+ err = break_lease(h_inode, vfsub_fmode_to_uint(FMODE_WRITE));
22190+ if (err)
22191+ goto out_inode;
22192+ }
22193+
22194+ err = locks_verify_truncate(h_inode, h_file, length);
22195+ if (!err)
22196+ err = security_path_truncate(h_path, length, attr);
22197+ if (!err) {
22198+ lockdep_off();
22199+ err = do_truncate(h_path->dentry, length, attr, h_file);
22200+ lockdep_on();
22201+ }
22202+
22203+ out_inode:
22204+ if (!h_file)
22205+ put_write_access(h_inode);
22206+ out_mnt:
22207+ if (!h_file)
22208+ mnt_drop_write(h_path->mnt);
22209+ out:
22210+ return err;
22211+}
22212+
22213+/* ---------------------------------------------------------------------- */
22214+
22215+struct au_vfsub_mkdir_args {
22216+ int *errp;
22217+ struct inode *dir;
22218+ struct path *path;
22219+ int mode;
22220+};
22221+
22222+static void au_call_vfsub_mkdir(void *args)
22223+{
22224+ struct au_vfsub_mkdir_args *a = args;
22225+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
22226+}
22227+
22228+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
22229+{
22230+ int err, do_sio, wkq_err;
22231+
22232+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
22233+ if (!do_sio)
22234+ err = vfsub_mkdir(dir, path, mode);
22235+ else {
22236+ struct au_vfsub_mkdir_args args = {
22237+ .errp = &err,
22238+ .dir = dir,
22239+ .path = path,
22240+ .mode = mode
22241+ };
22242+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
22243+ if (unlikely(wkq_err))
22244+ err = wkq_err;
22245+ }
22246+
22247+ return err;
22248+}
22249+
22250+struct au_vfsub_rmdir_args {
22251+ int *errp;
22252+ struct inode *dir;
22253+ struct path *path;
22254+};
22255+
22256+static void au_call_vfsub_rmdir(void *args)
22257+{
22258+ struct au_vfsub_rmdir_args *a = args;
22259+ *a->errp = vfsub_rmdir(a->dir, a->path);
22260+}
22261+
22262+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
22263+{
22264+ int err, do_sio, wkq_err;
22265+
22266+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
22267+ if (!do_sio)
22268+ err = vfsub_rmdir(dir, path);
22269+ else {
22270+ struct au_vfsub_rmdir_args args = {
22271+ .errp = &err,
22272+ .dir = dir,
22273+ .path = path
22274+ };
22275+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
22276+ if (unlikely(wkq_err))
22277+ err = wkq_err;
22278+ }
22279+
22280+ return err;
22281+}
22282+
22283+/* ---------------------------------------------------------------------- */
22284+
22285+struct notify_change_args {
22286+ int *errp;
22287+ struct path *path;
22288+ struct iattr *ia;
22289+};
22290+
22291+static void call_notify_change(void *args)
22292+{
22293+ struct notify_change_args *a = args;
22294+ struct inode *h_inode;
22295+
22296+ h_inode = a->path->dentry->d_inode;
22297+ IMustLock(h_inode);
22298+
22299+ *a->errp = -EPERM;
22300+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
22301+ lockdep_off();
22302+ *a->errp = notify_change(a->path->dentry, a->ia);
22303+ lockdep_on();
22304+ if (!*a->errp)
22305+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
22306+ }
22307+ AuTraceErr(*a->errp);
22308+}
22309+
22310+int vfsub_notify_change(struct path *path, struct iattr *ia)
22311+{
22312+ int err;
22313+ struct notify_change_args args = {
22314+ .errp = &err,
22315+ .path = path,
22316+ .ia = ia
22317+ };
22318+
22319+ call_notify_change(&args);
22320+
22321+ return err;
22322+}
22323+
22324+int vfsub_sio_notify_change(struct path *path, struct iattr *ia)
22325+{
22326+ int err, wkq_err;
22327+ struct notify_change_args args = {
22328+ .errp = &err,
22329+ .path = path,
22330+ .ia = ia
22331+ };
22332+
22333+ wkq_err = au_wkq_wait(call_notify_change, &args);
22334+ if (unlikely(wkq_err))
22335+ err = wkq_err;
22336+
22337+ return err;
22338+}
22339+
22340+/* ---------------------------------------------------------------------- */
22341+
22342+struct unlink_args {
22343+ int *errp;
22344+ struct inode *dir;
22345+ struct path *path;
22346+};
22347+
22348+static void call_unlink(void *args)
22349+{
22350+ struct unlink_args *a = args;
22351+ struct dentry *d = a->path->dentry;
22352+ struct inode *h_inode;
22353+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
22354+ && atomic_read(&d->d_count) == 1);
22355+
22356+ IMustLock(a->dir);
22357+
22358+ a->path->dentry = d->d_parent;
22359+ *a->errp = security_path_unlink(a->path, d);
22360+ a->path->dentry = d;
22361+ if (unlikely(*a->errp))
22362+ return;
22363+
22364+ if (!stop_sillyrename)
22365+ dget(d);
22366+ h_inode = d->d_inode;
22367+ if (h_inode)
22368+ atomic_inc(&h_inode->i_count);
22369+
22370+ lockdep_off();
22371+ *a->errp = vfs_unlink(a->dir, d);
22372+ lockdep_on();
22373+ if (!*a->errp) {
22374+ struct path tmp = {
22375+ .dentry = d->d_parent,
22376+ .mnt = a->path->mnt
22377+ };
22378+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
22379+ }
22380+
22381+ if (!stop_sillyrename)
22382+ dput(d);
22383+ if (h_inode)
22384+ iput(h_inode);
22385+
22386+ AuTraceErr(*a->errp);
22387+}
22388+
22389+/*
22390+ * @dir: must be locked.
22391+ * @dentry: target dentry.
22392+ */
22393+int vfsub_unlink(struct inode *dir, struct path *path, int force)
22394+{
22395+ int err;
22396+ struct unlink_args args = {
22397+ .errp = &err,
22398+ .dir = dir,
22399+ .path = path
22400+ };
22401+
22402+ if (!force)
22403+ call_unlink(&args);
22404+ else {
22405+ int wkq_err;
22406+
22407+ wkq_err = au_wkq_wait(call_unlink, &args);
22408+ if (unlikely(wkq_err))
22409+ err = wkq_err;
22410+ }
22411+
22412+ return err;
22413+}
1308ab2a 22414diff -uprN -x .git linux-2.6.31/fs/aufs/vfsub.h aufs2-2.6.git/fs/aufs/vfsub.h
22415--- linux-2.6.31/fs/aufs/vfsub.h 1970-01-01 00:00:00.000000000 +0000
22416+++ aufs2-2.6.git/fs/aufs/vfsub.h 2009-09-21 21:49:23.411607814 +0000
dece6358 22417@@ -0,0 +1,172 @@
1facf9fc 22418+/*
22419+ * Copyright (C) 2005-2009 Junjiro R. Okajima
22420+ *
22421+ * This program, aufs is free software; you can redistribute it and/or modify
22422+ * it under the terms of the GNU General Public License as published by
22423+ * the Free Software Foundation; either version 2 of the License, or
22424+ * (at your option) any later version.
dece6358
AM
22425+ *
22426+ * This program is distributed in the hope that it will be useful,
22427+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22428+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22429+ * GNU General Public License for more details.
22430+ *
22431+ * You should have received a copy of the GNU General Public License
22432+ * along with this program; if not, write to the Free Software
22433+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22434+ */
22435+
22436+/*
22437+ * sub-routines for VFS
22438+ */
22439+
22440+#ifndef __AUFS_VFSUB_H__
22441+#define __AUFS_VFSUB_H__
22442+
22443+#ifdef __KERNEL__
22444+
22445+#include <linux/fs.h>
22446+#include <linux/fs_stack.h>
1facf9fc 22447+
22448+/* ---------------------------------------------------------------------- */
22449+
22450+/* lock subclass for lower inode */
22451+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
22452+/* reduce? gave up. */
22453+enum {
22454+ AuLsc_I_Begin = I_MUTEX_QUOTA, /* 4 */
22455+ AuLsc_I_PARENT, /* lower inode, parent first */
22456+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 22457+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 22458+ AuLsc_I_CHILD,
22459+ AuLsc_I_CHILD2,
22460+ AuLsc_I_End
22461+};
22462+
22463+/* to debug easier, do not make them inlined functions */
22464+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
22465+#define IMustLock(i) MtxMustLock(&(i)->i_mutex)
22466+
22467+/* ---------------------------------------------------------------------- */
22468+
22469+static inline void vfsub_copy_inode_size(struct inode *inode,
22470+ struct inode *h_inode)
22471+{
22472+ spin_lock(&inode->i_lock);
22473+ fsstack_copy_inode_size(inode, h_inode);
22474+ spin_unlock(&inode->i_lock);
22475+}
22476+
22477+int vfsub_update_h_iattr(struct path *h_path, int *did);
22478+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
1308ab2a 22479+struct file *vfsub_dentry_open(struct path *path, int flags,
22480+ const struct cred *cred);
1facf9fc 22481+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
22482+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
22483+ int len);
22484+struct dentry *vfsub_lookup_hash(struct nameidata *nd);
22485+
22486+/* ---------------------------------------------------------------------- */
22487+
22488+struct au_hinode;
22489+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
22490+ struct dentry *d2, struct au_hinode *hdir2);
22491+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
22492+ struct dentry *d2, struct au_hinode *hdir2);
22493+
22494+int vfsub_create(struct inode *dir, struct path *path, int mode);
22495+int vfsub_symlink(struct inode *dir, struct path *path,
22496+ const char *symname);
22497+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
22498+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
22499+ struct path *path);
22500+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
22501+ struct inode *hdir, struct path *path);
22502+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
22503+int vfsub_rmdir(struct inode *dir, struct path *path);
22504+
1308ab2a 22505+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
22506+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
22507+int vfsub_sio_notify_change(struct path *path, struct iattr *ia);
22508+int vfsub_notify_change(struct path *path, struct iattr *ia);
22509+int vfsub_unlink(struct inode *dir, struct path *path, int force);
22510+
1facf9fc 22511+/* ---------------------------------------------------------------------- */
22512+
22513+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
22514+ loff_t *ppos);
22515+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
22516+ loff_t *ppos);
22517+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
22518+ loff_t *ppos);
22519+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
22520+ loff_t *ppos);
22521+int vfsub_readdir(struct file *file, filldir_t filldir, void *arg);
22522+
1308ab2a 22523+long vfsub_splice_to(struct file *in, loff_t *ppos,
22524+ struct pipe_inode_info *pipe, size_t len,
22525+ unsigned int flags);
22526+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
22527+ loff_t *ppos, size_t len, unsigned int flags);
22528+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
22529+ struct file *h_file);
22530+
1facf9fc 22531+static inline void vfsub_file_accessed(struct file *h_file)
22532+{
22533+ file_accessed(h_file);
22534+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
22535+}
22536+
22537+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
22538+ struct dentry *h_dentry)
22539+{
22540+ struct path h_path = {
22541+ .dentry = h_dentry,
22542+ .mnt = h_mnt
22543+ };
22544+ touch_atime(h_mnt, h_dentry);
22545+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
22546+}
22547+
1facf9fc 22548+/* ---------------------------------------------------------------------- */
22549+
22550+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
22551+{
22552+ loff_t err;
22553+
22554+ lockdep_off();
22555+ err = vfs_llseek(file, offset, origin);
22556+ lockdep_on();
22557+ return err;
22558+}
22559+
22560+/* ---------------------------------------------------------------------- */
22561+
22562+/* dirty workaround for strict type of fmode_t */
22563+union vfsub_fmu {
22564+ fmode_t fm;
22565+ unsigned int ui;
22566+};
22567+
22568+static inline unsigned int vfsub_fmode_to_uint(fmode_t fm)
22569+{
22570+ union vfsub_fmu u = {
22571+ .fm = fm
22572+ };
22573+
22574+ BUILD_BUG_ON(sizeof(u.fm) != sizeof(u.ui));
22575+
22576+ return u.ui;
22577+}
22578+
22579+static inline fmode_t vfsub_uint_to_fmode(unsigned int ui)
22580+{
22581+ union vfsub_fmu u = {
22582+ .ui = ui
22583+ };
22584+
22585+ return u.fm;
22586+}
22587+
1facf9fc 22588+#endif /* __KERNEL__ */
22589+#endif /* __AUFS_VFSUB_H__ */
1308ab2a 22590diff -uprN -x .git linux-2.6.31/fs/aufs/wbr_policy.c aufs2-2.6.git/fs/aufs/wbr_policy.c
22591--- linux-2.6.31/fs/aufs/wbr_policy.c 1970-01-01 00:00:00.000000000 +0000
22592+++ aufs2-2.6.git/fs/aufs/wbr_policy.c 2009-09-21 21:49:23.411607814 +0000
dece6358 22593@@ -0,0 +1,641 @@
1facf9fc 22594+/*
22595+ * Copyright (C) 2005-2009 Junjiro R. Okajima
22596+ *
22597+ * This program, aufs is free software; you can redistribute it and/or modify
22598+ * it under the terms of the GNU General Public License as published by
22599+ * the Free Software Foundation; either version 2 of the License, or
22600+ * (at your option) any later version.
dece6358
AM
22601+ *
22602+ * This program is distributed in the hope that it will be useful,
22603+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22604+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22605+ * GNU General Public License for more details.
22606+ *
22607+ * You should have received a copy of the GNU General Public License
22608+ * along with this program; if not, write to the Free Software
22609+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 22610+ */
22611+
22612+/*
22613+ * policies for selecting one among multiple writable branches
22614+ */
22615+
22616+#include <linux/statfs.h>
22617+#include "aufs.h"
22618+
22619+/* subset of cpup_attr() */
22620+static noinline_for_stack
22621+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
22622+{
22623+ int err, sbits;
22624+ struct iattr ia;
22625+ struct inode *h_isrc;
22626+
22627+ h_isrc = h_src->d_inode;
22628+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
22629+ ia.ia_mode = h_isrc->i_mode;
22630+ ia.ia_uid = h_isrc->i_uid;
22631+ ia.ia_gid = h_isrc->i_gid;
22632+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
22633+ au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc);
22634+ err = vfsub_sio_notify_change(h_path, &ia);
22635+
22636+ /* is this nfs only? */
22637+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
22638+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
22639+ ia.ia_mode = h_isrc->i_mode;
22640+ err = vfsub_sio_notify_change(h_path, &ia);
22641+ }
22642+
22643+ return err;
22644+}
22645+
22646+#define AuCpdown_PARENT_OPQ 1
22647+#define AuCpdown_WHED (1 << 1)
22648+#define AuCpdown_MADE_DIR (1 << 2)
22649+#define AuCpdown_DIROPQ (1 << 3)
22650+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
22651+#define au_fset_cpdown(flags, name) { (flags) |= AuCpdown_##name; }
22652+#define au_fclr_cpdown(flags, name) { (flags) &= ~AuCpdown_##name; }
22653+
22654+struct au_cpdown_dir_args {
22655+ struct dentry *parent;
22656+ unsigned int flags;
22657+};
22658+
22659+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
22660+ struct au_cpdown_dir_args *a)
22661+{
22662+ int err;
22663+ struct dentry *opq_dentry;
22664+
22665+ opq_dentry = au_diropq_create(dentry, bdst);
22666+ err = PTR_ERR(opq_dentry);
22667+ if (IS_ERR(opq_dentry))
22668+ goto out;
22669+ dput(opq_dentry);
22670+ au_fset_cpdown(a->flags, DIROPQ);
22671+
22672+ out:
22673+ return err;
22674+}
22675+
22676+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
22677+ struct inode *dir, aufs_bindex_t bdst)
22678+{
22679+ int err;
22680+ struct path h_path;
22681+ struct au_branch *br;
22682+
22683+ br = au_sbr(dentry->d_sb, bdst);
22684+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
22685+ err = PTR_ERR(h_path.dentry);
22686+ if (IS_ERR(h_path.dentry))
22687+ goto out;
22688+
22689+ err = 0;
22690+ if (h_path.dentry->d_inode) {
22691+ h_path.mnt = br->br_mnt;
22692+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
22693+ dentry);
22694+ }
22695+ dput(h_path.dentry);
22696+
22697+ out:
22698+ return err;
22699+}
22700+
22701+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
22702+ struct dentry *h_parent, void *arg)
22703+{
22704+ int err, rerr;
22705+ aufs_bindex_t bend, bopq, bstart;
22706+ unsigned char parent_opq;
22707+ struct path h_path;
22708+ struct dentry *parent;
22709+ struct inode *h_dir, *h_inode, *inode, *dir;
22710+ struct au_cpdown_dir_args *args = arg;
22711+
22712+ bstart = au_dbstart(dentry);
22713+ /* dentry is di-locked */
22714+ parent = dget_parent(dentry);
22715+ dir = parent->d_inode;
22716+ h_dir = h_parent->d_inode;
22717+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
22718+ IMustLock(h_dir);
22719+
22720+ err = au_lkup_neg(dentry, bdst);
22721+ if (unlikely(err < 0))
22722+ goto out;
22723+ h_path.dentry = au_h_dptr(dentry, bdst);
22724+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
22725+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
22726+ S_IRWXU | S_IRUGO | S_IXUGO);
22727+ if (unlikely(err))
22728+ goto out_put;
22729+ au_fset_cpdown(args->flags, MADE_DIR);
22730+
22731+ bend = au_dbend(dentry);
22732+ bopq = au_dbdiropq(dentry);
22733+ au_fclr_cpdown(args->flags, WHED);
22734+ au_fclr_cpdown(args->flags, DIROPQ);
22735+ if (au_dbwh(dentry) == bdst)
22736+ au_fset_cpdown(args->flags, WHED);
22737+ if (!au_ftest_cpdown(args->flags, PARENT_OPQ) && bopq <= bdst)
22738+ au_fset_cpdown(args->flags, PARENT_OPQ);
22739+ parent_opq = (au_ftest_cpdown(args->flags, PARENT_OPQ)
22740+ && args->parent == dentry);
22741+ h_inode = h_path.dentry->d_inode;
22742+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
22743+ if (au_ftest_cpdown(args->flags, WHED)) {
22744+ err = au_cpdown_dir_opq(dentry, bdst, args);
22745+ if (unlikely(err)) {
22746+ mutex_unlock(&h_inode->i_mutex);
22747+ goto out_dir;
22748+ }
22749+ }
22750+
22751+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
22752+ mutex_unlock(&h_inode->i_mutex);
22753+ if (unlikely(err))
22754+ goto out_opq;
22755+
22756+ if (au_ftest_cpdown(args->flags, WHED)) {
22757+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
22758+ if (unlikely(err))
22759+ goto out_opq;
22760+ }
22761+
22762+ inode = dentry->d_inode;
22763+ if (au_ibend(inode) < bdst)
22764+ au_set_ibend(inode, bdst);
22765+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
22766+ au_hi_flags(inode, /*isdir*/1));
22767+ goto out; /* success */
22768+
22769+ /* revert */
22770+ out_opq:
22771+ if (au_ftest_cpdown(args->flags, DIROPQ)) {
22772+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
22773+ rerr = au_diropq_remove(dentry, bdst);
22774+ mutex_unlock(&h_inode->i_mutex);
22775+ if (unlikely(rerr)) {
22776+ AuIOErr("failed removing diropq for %.*s b%d (%d)\n",
22777+ AuDLNPair(dentry), bdst, rerr);
22778+ err = -EIO;
22779+ goto out;
22780+ }
22781+ }
22782+ out_dir:
22783+ if (au_ftest_cpdown(args->flags, MADE_DIR)) {
22784+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
22785+ if (unlikely(rerr)) {
22786+ AuIOErr("failed removing %.*s b%d (%d)\n",
22787+ AuDLNPair(dentry), bdst, rerr);
22788+ err = -EIO;
22789+ }
22790+ }
22791+ out_put:
22792+ au_set_h_dptr(dentry, bdst, NULL);
22793+ if (au_dbend(dentry) == bdst)
22794+ au_update_dbend(dentry);
22795+ out:
22796+ dput(parent);
22797+ return err;
22798+}
22799+
22800+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
22801+{
22802+ int err;
22803+ struct au_cpdown_dir_args args = {
22804+ .parent = dget_parent(dentry),
22805+ .flags = 0
22806+ };
22807+
22808+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &args);
22809+ dput(args.parent);
22810+
22811+ return err;
22812+}
22813+
22814+/* ---------------------------------------------------------------------- */
22815+
22816+/* policies for create */
22817+
22818+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
22819+{
22820+ for (; bindex >= 0; bindex--)
22821+ if (!au_br_rdonly(au_sbr(sb, bindex)))
22822+ return bindex;
22823+ return -EROFS;
22824+}
22825+
22826+/* top down parent */
22827+static int au_wbr_create_tdp(struct dentry *dentry, int isdir __maybe_unused)
22828+{
22829+ int err;
22830+ aufs_bindex_t bstart, bindex;
22831+ struct super_block *sb;
22832+ struct dentry *parent, *h_parent;
22833+
22834+ sb = dentry->d_sb;
22835+ bstart = au_dbstart(dentry);
22836+ err = bstart;
22837+ if (!au_br_rdonly(au_sbr(sb, bstart)))
22838+ goto out;
22839+
22840+ err = -EROFS;
22841+ parent = dget_parent(dentry);
22842+ for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
22843+ h_parent = au_h_dptr(parent, bindex);
22844+ if (!h_parent || !h_parent->d_inode)
22845+ continue;
22846+
22847+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
22848+ err = bindex;
22849+ break;
22850+ }
22851+ }
22852+ dput(parent);
22853+
22854+ /* bottom up here */
22855+ if (unlikely(err < 0))
22856+ err = au_wbr_bu(sb, bstart - 1);
22857+
22858+ out:
22859+ AuDbg("b%d\n", err);
22860+ return err;
22861+}
22862+
22863+/* ---------------------------------------------------------------------- */
22864+
22865+/* an exception for the policy other than tdp */
22866+static int au_wbr_create_exp(struct dentry *dentry)
22867+{
22868+ int err;
22869+ aufs_bindex_t bwh, bdiropq;
22870+ struct dentry *parent;
22871+
22872+ err = -1;
22873+ bwh = au_dbwh(dentry);
22874+ parent = dget_parent(dentry);
22875+ bdiropq = au_dbdiropq(parent);
22876+ if (bwh >= 0) {
22877+ if (bdiropq >= 0)
22878+ err = min(bdiropq, bwh);
22879+ else
22880+ err = bwh;
22881+ AuDbg("%d\n", err);
22882+ } else if (bdiropq >= 0) {
22883+ err = bdiropq;
22884+ AuDbg("%d\n", err);
22885+ }
22886+ dput(parent);
22887+
22888+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
22889+ err = -1;
22890+
22891+ AuDbg("%d\n", err);
22892+ return err;
22893+}
22894+
22895+/* ---------------------------------------------------------------------- */
22896+
22897+/* round robin */
22898+static int au_wbr_create_init_rr(struct super_block *sb)
22899+{
22900+ int err;
22901+
22902+ err = au_wbr_bu(sb, au_sbend(sb));
22903+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 22904+ /* smp_mb(); */
1facf9fc 22905+
22906+ AuDbg("b%d\n", err);
22907+ return err;
22908+}
22909+
22910+static int au_wbr_create_rr(struct dentry *dentry, int isdir)
22911+{
22912+ int err, nbr;
22913+ unsigned int u;
22914+ aufs_bindex_t bindex, bend;
22915+ struct super_block *sb;
22916+ atomic_t *next;
22917+
22918+ err = au_wbr_create_exp(dentry);
22919+ if (err >= 0)
22920+ goto out;
22921+
22922+ sb = dentry->d_sb;
22923+ next = &au_sbi(sb)->si_wbr_rr_next;
22924+ bend = au_sbend(sb);
22925+ nbr = bend + 1;
22926+ for (bindex = 0; bindex <= bend; bindex++) {
22927+ if (!isdir) {
22928+ err = atomic_dec_return(next) + 1;
22929+ /* modulo for 0 is meaningless */
22930+ if (unlikely(!err))
22931+ err = atomic_dec_return(next) + 1;
22932+ } else
22933+ err = atomic_read(next);
22934+ AuDbg("%d\n", err);
22935+ u = err;
22936+ err = u % nbr;
22937+ AuDbg("%d\n", err);
22938+ if (!au_br_rdonly(au_sbr(sb, err)))
22939+ break;
22940+ err = -EROFS;
22941+ }
22942+
22943+ out:
22944+ AuDbg("%d\n", err);
22945+ return err;
22946+}
22947+
22948+/* ---------------------------------------------------------------------- */
22949+
22950+/* most free space */
22951+static void au_mfs(struct dentry *dentry)
22952+{
22953+ struct super_block *sb;
22954+ struct au_branch *br;
22955+ struct au_wbr_mfs *mfs;
22956+ aufs_bindex_t bindex, bend;
22957+ int err;
22958+ unsigned long long b, bavail;
22959+ /* reduce the stack usage */
22960+ struct kstatfs *st;
22961+
22962+ st = kmalloc(sizeof(*st), GFP_NOFS);
22963+ if (unlikely(!st)) {
22964+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
22965+ return;
22966+ }
22967+
22968+ bavail = 0;
22969+ sb = dentry->d_sb;
22970+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 22971+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 22972+ mfs->mfs_bindex = -EROFS;
22973+ mfs->mfsrr_bytes = 0;
22974+ bend = au_sbend(sb);
22975+ for (bindex = 0; bindex <= bend; bindex++) {
22976+ br = au_sbr(sb, bindex);
22977+ if (au_br_rdonly(br))
22978+ continue;
22979+
22980+ /* sb->s_root for NFS is unreliable */
22981+ err = vfs_statfs(br->br_mnt->mnt_root, st);
22982+ if (unlikely(err)) {
22983+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
22984+ continue;
22985+ }
22986+
22987+ /* when the available size is equal, select the lower one */
22988+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
22989+ || sizeof(b) < sizeof(st->f_bsize));
22990+ b = st->f_bavail * st->f_bsize;
22991+ br->br_wbr->wbr_bytes = b;
22992+ if (b >= bavail) {
22993+ bavail = b;
22994+ mfs->mfs_bindex = bindex;
22995+ mfs->mfs_jiffy = jiffies;
22996+ }
22997+ }
22998+
22999+ mfs->mfsrr_bytes = bavail;
23000+ AuDbg("b%d\n", mfs->mfs_bindex);
23001+ kfree(st);
23002+}
23003+
23004+static int au_wbr_create_mfs(struct dentry *dentry, int isdir __maybe_unused)
23005+{
23006+ int err;
23007+ struct super_block *sb;
23008+ struct au_wbr_mfs *mfs;
23009+
23010+ err = au_wbr_create_exp(dentry);
23011+ if (err >= 0)
23012+ goto out;
23013+
23014+ sb = dentry->d_sb;
23015+ mfs = &au_sbi(sb)->si_wbr_mfs;
23016+ mutex_lock(&mfs->mfs_lock);
23017+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
23018+ || mfs->mfs_bindex < 0
23019+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
23020+ au_mfs(dentry);
23021+ mutex_unlock(&mfs->mfs_lock);
23022+ err = mfs->mfs_bindex;
23023+
23024+ out:
23025+ AuDbg("b%d\n", err);
23026+ return err;
23027+}
23028+
23029+static int au_wbr_create_init_mfs(struct super_block *sb)
23030+{
23031+ struct au_wbr_mfs *mfs;
23032+
23033+ mfs = &au_sbi(sb)->si_wbr_mfs;
23034+ mutex_init(&mfs->mfs_lock);
23035+ mfs->mfs_jiffy = 0;
23036+ mfs->mfs_bindex = -EROFS;
23037+
23038+ return 0;
23039+}
23040+
23041+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
23042+{
23043+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
23044+ return 0;
23045+}
23046+
23047+/* ---------------------------------------------------------------------- */
23048+
23049+/* most free space and then round robin */
23050+static int au_wbr_create_mfsrr(struct dentry *dentry, int isdir)
23051+{
23052+ int err;
23053+ struct au_wbr_mfs *mfs;
23054+
23055+ err = au_wbr_create_mfs(dentry, isdir);
23056+ if (err >= 0) {
23057+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 23058+ mutex_lock(&mfs->mfs_lock);
1facf9fc 23059+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
23060+ err = au_wbr_create_rr(dentry, isdir);
dece6358 23061+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 23062+ }
23063+
23064+ AuDbg("b%d\n", err);
23065+ return err;
23066+}
23067+
23068+static int au_wbr_create_init_mfsrr(struct super_block *sb)
23069+{
23070+ int err;
23071+
23072+ au_wbr_create_init_mfs(sb); /* ignore */
23073+ err = au_wbr_create_init_rr(sb);
23074+
23075+ return err;
23076+}
23077+
23078+/* ---------------------------------------------------------------------- */
23079+
23080+/* top down parent and most free space */
23081+static int au_wbr_create_pmfs(struct dentry *dentry, int isdir)
23082+{
23083+ int err, e2;
23084+ unsigned long long b;
23085+ aufs_bindex_t bindex, bstart, bend;
23086+ struct super_block *sb;
23087+ struct dentry *parent, *h_parent;
23088+ struct au_branch *br;
23089+
23090+ err = au_wbr_create_tdp(dentry, isdir);
23091+ if (unlikely(err < 0))
23092+ goto out;
23093+ parent = dget_parent(dentry);
23094+ bstart = au_dbstart(parent);
23095+ bend = au_dbtaildir(parent);
23096+ if (bstart == bend)
23097+ goto out_parent; /* success */
23098+
23099+ e2 = au_wbr_create_mfs(dentry, isdir);
23100+ if (e2 < 0)
23101+ goto out_parent; /* success */
23102+
23103+ /* when the available size is equal, select upper one */
23104+ sb = dentry->d_sb;
23105+ br = au_sbr(sb, err);
23106+ b = br->br_wbr->wbr_bytes;
23107+ AuDbg("b%d, %llu\n", err, b);
23108+
23109+ for (bindex = bstart; bindex <= bend; bindex++) {
23110+ h_parent = au_h_dptr(parent, bindex);
23111+ if (!h_parent || !h_parent->d_inode)
23112+ continue;
23113+
23114+ br = au_sbr(sb, bindex);
23115+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
23116+ b = br->br_wbr->wbr_bytes;
23117+ err = bindex;
23118+ AuDbg("b%d, %llu\n", err, b);
23119+ }
23120+ }
23121+
23122+ out_parent:
23123+ dput(parent);
23124+ out:
23125+ AuDbg("b%d\n", err);
23126+ return err;
23127+}
23128+
23129+/* ---------------------------------------------------------------------- */
23130+
23131+/* policies for copyup */
23132+
23133+/* top down parent */
23134+static int au_wbr_copyup_tdp(struct dentry *dentry)
23135+{
23136+ return au_wbr_create_tdp(dentry, /*isdir, anything is ok*/0);
23137+}
23138+
23139+/* bottom up parent */
23140+static int au_wbr_copyup_bup(struct dentry *dentry)
23141+{
23142+ int err;
23143+ aufs_bindex_t bindex, bstart;
23144+ struct dentry *parent, *h_parent;
23145+ struct super_block *sb;
23146+
23147+ err = -EROFS;
23148+ sb = dentry->d_sb;
23149+ parent = dget_parent(dentry);
23150+ bstart = au_dbstart(parent);
23151+ for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
23152+ h_parent = au_h_dptr(parent, bindex);
23153+ if (!h_parent || !h_parent->d_inode)
23154+ continue;
23155+
23156+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
23157+ err = bindex;
23158+ break;
23159+ }
23160+ }
23161+ dput(parent);
23162+
23163+ /* bottom up here */
23164+ if (unlikely(err < 0))
23165+ err = au_wbr_bu(sb, bstart - 1);
23166+
23167+ AuDbg("b%d\n", err);
23168+ return err;
23169+}
23170+
23171+/* bottom up */
23172+static int au_wbr_copyup_bu(struct dentry *dentry)
23173+{
23174+ int err;
23175+
23176+ err = au_wbr_bu(dentry->d_sb, au_dbstart(dentry));
23177+
23178+ AuDbg("b%d\n", err);
23179+ return err;
23180+}
23181+
23182+/* ---------------------------------------------------------------------- */
23183+
23184+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
23185+ [AuWbrCopyup_TDP] = {
23186+ .copyup = au_wbr_copyup_tdp
23187+ },
23188+ [AuWbrCopyup_BUP] = {
23189+ .copyup = au_wbr_copyup_bup
23190+ },
23191+ [AuWbrCopyup_BU] = {
23192+ .copyup = au_wbr_copyup_bu
23193+ }
23194+};
23195+
23196+struct au_wbr_create_operations au_wbr_create_ops[] = {
23197+ [AuWbrCreate_TDP] = {
23198+ .create = au_wbr_create_tdp
23199+ },
23200+ [AuWbrCreate_RR] = {
23201+ .create = au_wbr_create_rr,
23202+ .init = au_wbr_create_init_rr
23203+ },
23204+ [AuWbrCreate_MFS] = {
23205+ .create = au_wbr_create_mfs,
23206+ .init = au_wbr_create_init_mfs,
23207+ .fin = au_wbr_create_fin_mfs
23208+ },
23209+ [AuWbrCreate_MFSV] = {
23210+ .create = au_wbr_create_mfs,
23211+ .init = au_wbr_create_init_mfs,
23212+ .fin = au_wbr_create_fin_mfs
23213+ },
23214+ [AuWbrCreate_MFSRR] = {
23215+ .create = au_wbr_create_mfsrr,
23216+ .init = au_wbr_create_init_mfsrr,
23217+ .fin = au_wbr_create_fin_mfs
23218+ },
23219+ [AuWbrCreate_MFSRRV] = {
23220+ .create = au_wbr_create_mfsrr,
23221+ .init = au_wbr_create_init_mfsrr,
23222+ .fin = au_wbr_create_fin_mfs
23223+ },
23224+ [AuWbrCreate_PMFS] = {
23225+ .create = au_wbr_create_pmfs,
23226+ .init = au_wbr_create_init_mfs,
23227+ .fin = au_wbr_create_fin_mfs
23228+ },
23229+ [AuWbrCreate_PMFSV] = {
23230+ .create = au_wbr_create_pmfs,
23231+ .init = au_wbr_create_init_mfs,
23232+ .fin = au_wbr_create_fin_mfs
23233+ }
23234+};
1308ab2a 23235diff -uprN -x .git linux-2.6.31/fs/aufs/whout.c aufs2-2.6.git/fs/aufs/whout.c
23236--- linux-2.6.31/fs/aufs/whout.c 1970-01-01 00:00:00.000000000 +0000
23237+++ aufs2-2.6.git/fs/aufs/whout.c 2009-09-21 21:49:23.411607814 +0000
23238@@ -0,0 +1,1052 @@
1facf9fc 23239+/*
23240+ * Copyright (C) 2005-2009 Junjiro R. Okajima
23241+ *
23242+ * This program, aufs is free software; you can redistribute it and/or modify
23243+ * it under the terms of the GNU General Public License as published by
23244+ * the Free Software Foundation; either version 2 of the License, or
23245+ * (at your option) any later version.
dece6358
AM
23246+ *
23247+ * This program is distributed in the hope that it will be useful,
23248+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23249+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23250+ * GNU General Public License for more details.
23251+ *
23252+ * You should have received a copy of the GNU General Public License
23253+ * along with this program; if not, write to the Free Software
23254+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 23255+ */
23256+
23257+/*
23258+ * whiteout for logical deletion and opaque directory
23259+ */
23260+
23261+#include <linux/fs.h>
23262+#include "aufs.h"
23263+
23264+#define WH_MASK S_IRUGO
23265+
23266+/*
23267+ * If a directory contains this file, then it is opaque. We start with the
23268+ * .wh. flag so that it is blocked by lookup.
23269+ */
23270+static struct qstr diropq_name = {
23271+ .name = AUFS_WH_DIROPQ,
23272+ .len = sizeof(AUFS_WH_DIROPQ) - 1
23273+};
23274+
23275+/*
23276+ * generate whiteout name, which is NOT terminated by NULL.
23277+ * @name: original d_name.name
23278+ * @len: original d_name.len
23279+ * @wh: whiteout qstr
23280+ * returns zero when succeeds, otherwise error.
23281+ * succeeded value as wh->name should be freed by kfree().
23282+ */
23283+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
23284+{
23285+ char *p;
23286+
23287+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
23288+ return -ENAMETOOLONG;
23289+
23290+ wh->len = name->len + AUFS_WH_PFX_LEN;
23291+ p = kmalloc(wh->len, GFP_NOFS);
23292+ wh->name = p;
23293+ if (p) {
23294+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
23295+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
23296+ /* smp_mb(); */
23297+ return 0;
23298+ }
23299+ return -ENOMEM;
23300+}
23301+
23302+/* ---------------------------------------------------------------------- */
23303+
23304+/*
23305+ * test if the @wh_name exists under @h_parent.
23306+ * @try_sio specifies the necessary of super-io.
23307+ */
23308+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
23309+ struct au_branch *br, int try_sio)
23310+{
23311+ int err;
23312+ struct dentry *wh_dentry;
23313+ struct inode *h_dir;
23314+
23315+ h_dir = h_parent->d_inode;
23316+ if (!try_sio)
23317+ wh_dentry = au_lkup_one(wh_name, h_parent, br, /*nd*/NULL);
23318+ else
23319+ wh_dentry = au_sio_lkup_one(wh_name, h_parent, br);
23320+ err = PTR_ERR(wh_dentry);
23321+ if (IS_ERR(wh_dentry))
23322+ goto out;
23323+
23324+ err = 0;
23325+ if (!wh_dentry->d_inode)
23326+ goto out_wh; /* success */
23327+
23328+ err = 1;
23329+ if (S_ISREG(wh_dentry->d_inode->i_mode))
23330+ goto out_wh; /* success */
23331+
23332+ err = -EIO;
23333+ AuIOErr("%.*s Invalid whiteout entry type 0%o.\n",
23334+ AuDLNPair(wh_dentry), wh_dentry->d_inode->i_mode);
23335+
23336+ out_wh:
23337+ dput(wh_dentry);
23338+ out:
23339+ return err;
23340+}
23341+
23342+/*
23343+ * test if the @h_dentry sets opaque or not.
23344+ */
23345+int au_diropq_test(struct dentry *h_dentry, struct au_branch *br)
23346+{
23347+ int err;
23348+ struct inode *h_dir;
23349+
23350+ h_dir = h_dentry->d_inode;
23351+ err = au_wh_test(h_dentry, &diropq_name, br,
23352+ au_test_h_perm_sio(h_dir, MAY_EXEC));
23353+ return err;
23354+}
23355+
23356+/*
23357+ * returns a negative dentry whose name is unique and temporary.
23358+ */
23359+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
23360+ struct qstr *prefix)
23361+{
23362+#define HEX_LEN 4
23363+ struct dentry *dentry;
23364+ int i;
23365+ char defname[AUFS_WH_PFX_LEN * 2 + DNAME_INLINE_LEN_MIN + 1
23366+ + HEX_LEN + 1], *name, *p;
23367+ static unsigned short cnt;
23368+ struct qstr qs;
23369+
23370+ name = defname;
23371+ qs.len = sizeof(defname) - DNAME_INLINE_LEN_MIN + prefix->len - 1;
23372+ if (unlikely(prefix->len > DNAME_INLINE_LEN_MIN)) {
23373+ dentry = ERR_PTR(-ENAMETOOLONG);
23374+ if (unlikely(qs.len >= PATH_MAX))
23375+ goto out;
23376+ dentry = ERR_PTR(-ENOMEM);
23377+ name = kmalloc(qs.len + 1, GFP_NOFS);
23378+ if (unlikely(!name))
23379+ goto out;
23380+ }
23381+
23382+ /* doubly whiteout-ed */
23383+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
23384+ p = name + AUFS_WH_PFX_LEN * 2;
23385+ memcpy(p, prefix->name, prefix->len);
23386+ p += prefix->len;
23387+ *p++ = '.';
23388+ AuDebugOn(name + qs.len + 1 - p <= HEX_LEN);
23389+
23390+ qs.name = name;
23391+ for (i = 0; i < 3; i++) {
23392+ sprintf(p, "%.*d", HEX_LEN, cnt++);
23393+ dentry = au_sio_lkup_one(&qs, h_parent, br);
23394+ if (IS_ERR(dentry) || !dentry->d_inode)
23395+ goto out_name;
23396+ dput(dentry);
23397+ }
23398+ /* AuWarn("could not get random name\n"); */
23399+ dentry = ERR_PTR(-EEXIST);
23400+ AuDbg("%.*s\n", AuLNPair(&qs));
23401+ BUG();
23402+
23403+ out_name:
23404+ if (name != defname)
23405+ kfree(name);
23406+ out:
23407+ return dentry;
23408+#undef HEX_LEN
23409+}
23410+
23411+/*
23412+ * rename the @h_dentry on @br to the whiteouted temporary name.
23413+ */
23414+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
23415+{
23416+ int err;
23417+ struct path h_path = {
23418+ .mnt = br->br_mnt
23419+ };
23420+ struct inode *h_dir;
23421+ struct dentry *h_parent;
23422+
23423+ h_parent = h_dentry->d_parent; /* dir inode is locked */
23424+ h_dir = h_parent->d_inode;
23425+ IMustLock(h_dir);
23426+
23427+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
23428+ err = PTR_ERR(h_path.dentry);
23429+ if (IS_ERR(h_path.dentry))
23430+ goto out;
23431+
23432+ /* under the same dir, no need to lock_rename() */
23433+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path);
23434+ AuTraceErr(err);
23435+ dput(h_path.dentry);
23436+
23437+ out:
23438+ return err;
23439+}
23440+
23441+/* ---------------------------------------------------------------------- */
23442+/*
23443+ * functions for removing a whiteout
23444+ */
23445+
23446+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
23447+{
23448+ int force;
23449+
23450+ /*
23451+ * forces superio when the dir has a sticky bit.
23452+ * this may be a violation of unix fs semantics.
23453+ */
23454+ force = (h_dir->i_mode & S_ISVTX)
23455+ && h_path->dentry->d_inode->i_uid != current_fsuid();
23456+ return vfsub_unlink(h_dir, h_path, force);
23457+}
23458+
23459+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
23460+ struct dentry *dentry)
23461+{
23462+ int err;
23463+
23464+ err = do_unlink_wh(h_dir, h_path);
23465+ if (!err && dentry)
23466+ au_set_dbwh(dentry, -1);
23467+
23468+ return err;
23469+}
23470+
23471+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
23472+ struct au_branch *br)
23473+{
23474+ int err;
23475+ struct path h_path = {
23476+ .mnt = br->br_mnt
23477+ };
23478+
23479+ err = 0;
23480+ h_path.dentry = au_lkup_one(wh, h_parent, br, /*nd*/NULL);
23481+ if (IS_ERR(h_path.dentry))
23482+ err = PTR_ERR(h_path.dentry);
23483+ else {
23484+ if (h_path.dentry->d_inode
23485+ && S_ISREG(h_path.dentry->d_inode->i_mode))
23486+ err = do_unlink_wh(h_parent->d_inode, &h_path);
23487+ dput(h_path.dentry);
23488+ }
23489+
23490+ return err;
23491+}
23492+
23493+/* ---------------------------------------------------------------------- */
23494+/*
23495+ * initialize/clean whiteout for a branch
23496+ */
23497+
23498+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
23499+ const int isdir)
23500+{
23501+ int err;
23502+
23503+ if (!whpath->dentry->d_inode)
23504+ return;
23505+
23506+ err = mnt_want_write(whpath->mnt);
23507+ if (!err) {
23508+ if (isdir)
23509+ err = vfsub_rmdir(h_dir, whpath);
23510+ else
23511+ err = vfsub_unlink(h_dir, whpath, /*force*/0);
23512+ mnt_drop_write(whpath->mnt);
23513+ }
23514+ if (unlikely(err))
23515+ AuWarn("failed removing %.*s (%d), ignored.\n",
23516+ AuDLNPair(whpath->dentry), err);
23517+}
23518+
23519+static int test_linkable(struct dentry *h_root)
23520+{
23521+ struct inode *h_dir = h_root->d_inode;
23522+
23523+ if (h_dir->i_op->link)
23524+ return 0;
23525+
23526+ AuErr("%.*s (%s) doesn't support link(2), use noplink and rw+nolwh\n",
23527+ AuDLNPair(h_root), au_sbtype(h_root->d_sb));
23528+ return -ENOSYS;
23529+}
23530+
23531+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
23532+static int au_whdir(struct inode *h_dir, struct path *path)
23533+{
23534+ int err;
23535+
23536+ err = -EEXIST;
23537+ if (!path->dentry->d_inode) {
23538+ int mode = S_IRWXU;
23539+
23540+ if (au_test_nfs(path->dentry->d_sb))
23541+ mode |= S_IXUGO;
23542+ err = mnt_want_write(path->mnt);
23543+ if (!err) {
23544+ err = vfsub_mkdir(h_dir, path, mode);
23545+ mnt_drop_write(path->mnt);
23546+ }
23547+ } else if (S_ISDIR(path->dentry->d_inode->i_mode))
23548+ err = 0;
23549+ else
23550+ AuErr("unknown %.*s exists\n", AuDLNPair(path->dentry));
23551+
23552+ return err;
23553+}
23554+
23555+struct au_wh_base {
23556+ const struct qstr *name;
23557+ struct dentry *dentry;
23558+};
23559+
23560+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
23561+ struct path *h_path)
23562+{
23563+ h_path->dentry = base[AuBrWh_BASE].dentry;
23564+ au_wh_clean(h_dir, h_path, /*isdir*/0);
23565+ h_path->dentry = base[AuBrWh_PLINK].dentry;
23566+ au_wh_clean(h_dir, h_path, /*isdir*/1);
23567+ h_path->dentry = base[AuBrWh_ORPH].dentry;
23568+ au_wh_clean(h_dir, h_path, /*isdir*/1);
23569+}
23570+
23571+/*
23572+ * returns tri-state,
23573+ * minus: error, caller should print the mesage
23574+ * zero: succuess
23575+ * plus: error, caller should NOT print the mesage
23576+ */
23577+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
23578+ int do_plink, struct au_wh_base base[],
23579+ struct path *h_path)
23580+{
23581+ int err;
23582+ struct inode *h_dir;
23583+
23584+ h_dir = h_root->d_inode;
23585+ h_path->dentry = base[AuBrWh_BASE].dentry;
23586+ au_wh_clean(h_dir, h_path, /*isdir*/0);
23587+ h_path->dentry = base[AuBrWh_PLINK].dentry;
23588+ if (do_plink) {
23589+ err = test_linkable(h_root);
23590+ if (unlikely(err)) {
23591+ err = 1;
23592+ goto out;
23593+ }
23594+
23595+ err = au_whdir(h_dir, h_path);
23596+ if (unlikely(err))
23597+ goto out;
23598+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
23599+ } else
23600+ au_wh_clean(h_dir, h_path, /*isdir*/1);
23601+ h_path->dentry = base[AuBrWh_ORPH].dentry;
23602+ err = au_whdir(h_dir, h_path);
23603+ if (unlikely(err))
23604+ goto out;
23605+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
23606+
23607+ out:
23608+ return err;
23609+}
23610+
23611+/*
23612+ * for the moment, aufs supports the branch filesystem which does not support
23613+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
23614+ * copyup failed. finally, such filesystem will not be used as the writable
23615+ * branch.
23616+ *
23617+ * returns tri-state, see above.
23618+ */
23619+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
23620+ int do_plink, struct au_wh_base base[],
23621+ struct path *h_path)
23622+{
23623+ int err;
23624+ struct inode *h_dir;
23625+
1308ab2a 23626+ WbrWhMustWriteLock(wbr);
23627+
1facf9fc 23628+ err = test_linkable(h_root);
23629+ if (unlikely(err)) {
23630+ err = 1;
23631+ goto out;
23632+ }
23633+
23634+ /*
23635+ * todo: should this create be done in /sbin/mount.aufs helper?
23636+ */
23637+ err = -EEXIST;
23638+ h_dir = h_root->d_inode;
23639+ if (!base[AuBrWh_BASE].dentry->d_inode) {
23640+ err = mnt_want_write(h_path->mnt);
23641+ if (!err) {
23642+ h_path->dentry = base[AuBrWh_BASE].dentry;
23643+ err = vfsub_create(h_dir, h_path, WH_MASK);
23644+ mnt_drop_write(h_path->mnt);
23645+ }
23646+ } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
23647+ err = 0;
23648+ else
23649+ AuErr("unknown %.*s/%.*s exists\n",
23650+ AuDLNPair(h_root), AuDLNPair(base[AuBrWh_BASE].dentry));
23651+ if (unlikely(err))
23652+ goto out;
23653+
23654+ h_path->dentry = base[AuBrWh_PLINK].dentry;
23655+ if (do_plink) {
23656+ err = au_whdir(h_dir, h_path);
23657+ if (unlikely(err))
23658+ goto out;
23659+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
23660+ } else
23661+ au_wh_clean(h_dir, h_path, /*isdir*/1);
23662+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
23663+
23664+ h_path->dentry = base[AuBrWh_ORPH].dentry;
23665+ err = au_whdir(h_dir, h_path);
23666+ if (unlikely(err))
23667+ goto out;
23668+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
23669+
23670+ out:
23671+ return err;
23672+}
23673+
23674+/*
23675+ * initialize the whiteout base file/dir for @br.
23676+ */
23677+int au_wh_init(struct dentry *h_root, struct au_branch *br,
23678+ struct super_block *sb)
23679+{
23680+ int err, i;
23681+ const unsigned char do_plink
23682+ = !!au_opt_test(au_mntflags(sb), PLINK);
23683+ struct path path = {
23684+ .mnt = br->br_mnt
23685+ };
23686+ struct inode *h_dir;
23687+ struct au_wbr *wbr = br->br_wbr;
23688+ static const struct qstr base_name[] = {
23689+ [AuBrWh_BASE] = {
23690+ .name = AUFS_BASE_NAME,
23691+ .len = sizeof(AUFS_BASE_NAME) - 1
23692+ },
23693+ [AuBrWh_PLINK] = {
23694+ .name = AUFS_PLINKDIR_NAME,
23695+ .len = sizeof(AUFS_PLINKDIR_NAME) - 1
23696+ },
23697+ [AuBrWh_ORPH] = {
23698+ .name = AUFS_ORPHDIR_NAME,
23699+ .len = sizeof(AUFS_ORPHDIR_NAME) - 1
23700+ }
23701+ };
23702+ struct au_wh_base base[] = {
23703+ [AuBrWh_BASE] = {
23704+ .name = base_name + AuBrWh_BASE,
23705+ .dentry = NULL
23706+ },
23707+ [AuBrWh_PLINK] = {
23708+ .name = base_name + AuBrWh_PLINK,
23709+ .dentry = NULL
23710+ },
23711+ [AuBrWh_ORPH] = {
23712+ .name = base_name + AuBrWh_ORPH,
23713+ .dentry = NULL
23714+ }
23715+ };
23716+
1308ab2a 23717+ if (wbr)
23718+ WbrWhMustWriteLock(wbr);
1facf9fc 23719+
23720+ h_dir = h_root->d_inode;
23721+ for (i = 0; i < AuBrWh_Last; i++) {
23722+ /* doubly whiteouted */
23723+ struct dentry *d;
23724+
23725+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
23726+ err = PTR_ERR(d);
23727+ if (IS_ERR(d))
23728+ goto out;
23729+
23730+ base[i].dentry = d;
23731+ AuDebugOn(wbr
23732+ && wbr->wbr_wh[i]
23733+ && wbr->wbr_wh[i] != base[i].dentry);
23734+ }
23735+
23736+ if (wbr)
23737+ for (i = 0; i < AuBrWh_Last; i++) {
23738+ dput(wbr->wbr_wh[i]);
23739+ wbr->wbr_wh[i] = NULL;
23740+ }
23741+
23742+ err = 0;
23743+
23744+ switch (br->br_perm) {
23745+ case AuBrPerm_RO:
23746+ case AuBrPerm_ROWH:
23747+ case AuBrPerm_RR:
23748+ case AuBrPerm_RRWH:
23749+ au_wh_init_ro(h_dir, base, &path);
23750+ break;
23751+
23752+ case AuBrPerm_RWNoLinkWH:
23753+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
23754+ if (err > 0)
23755+ goto out;
23756+ else if (err)
23757+ goto out_err;
23758+ break;
23759+
23760+ case AuBrPerm_RW:
23761+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
23762+ if (err > 0)
23763+ goto out;
23764+ else if (err)
23765+ goto out_err;
23766+ break;
23767+
23768+ default:
23769+ BUG();
23770+ }
23771+ goto out; /* success */
23772+
23773+ out_err:
23774+ AuErr("an error(%d) on the writable branch %.*s(%s)\n",
23775+ err, AuDLNPair(h_root), au_sbtype(h_root->d_sb));
23776+ out:
23777+ for (i = 0; i < AuBrWh_Last; i++)
23778+ dput(base[i].dentry);
23779+ return err;
23780+}
23781+
23782+/* ---------------------------------------------------------------------- */
23783+/*
23784+ * whiteouts are all hard-linked usually.
23785+ * when its link count reaches a ceiling, we create a new whiteout base
23786+ * asynchronously.
23787+ */
23788+
23789+struct reinit_br_wh {
23790+ struct super_block *sb;
23791+ struct au_branch *br;
23792+};
23793+
23794+static void reinit_br_wh(void *arg)
23795+{
23796+ int err;
23797+ aufs_bindex_t bindex;
23798+ struct path h_path;
23799+ struct reinit_br_wh *a = arg;
23800+ struct au_wbr *wbr;
23801+ struct inode *dir;
23802+ struct dentry *h_root;
23803+ struct au_hinode *hdir;
23804+
23805+ err = 0;
23806+ wbr = a->br->br_wbr;
23807+ /* big aufs lock */
23808+ si_noflush_write_lock(a->sb);
23809+ if (!au_br_writable(a->br->br_perm))
23810+ goto out;
23811+ bindex = au_br_index(a->sb, a->br->br_id);
23812+ if (unlikely(bindex < 0))
23813+ goto out;
23814+
1308ab2a 23815+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
1facf9fc 23816+ dir = a->sb->s_root->d_inode;
1facf9fc 23817+ hdir = au_hi(dir, bindex);
23818+ h_root = au_h_dptr(a->sb->s_root, bindex);
23819+
23820+ au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
23821+ wbr_wh_write_lock(wbr);
23822+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
23823+ h_root, a->br);
23824+ if (!err) {
23825+ err = mnt_want_write(a->br->br_mnt);
23826+ if (!err) {
23827+ h_path.dentry = wbr->wbr_whbase;
23828+ h_path.mnt = a->br->br_mnt;
23829+ err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0);
23830+ mnt_drop_write(a->br->br_mnt);
23831+ }
23832+ } else {
23833+ AuWarn("%.*s is moved, ignored\n", AuDLNPair(wbr->wbr_whbase));
23834+ err = 0;
23835+ }
23836+ dput(wbr->wbr_whbase);
23837+ wbr->wbr_whbase = NULL;
23838+ if (!err)
23839+ err = au_wh_init(h_root, a->br, a->sb);
23840+ wbr_wh_write_unlock(wbr);
23841+ au_hin_imtx_unlock(hdir);
1308ab2a 23842+ di_read_unlock(a->sb->s_root, AuLock_IR);
1facf9fc 23843+
23844+ out:
23845+ if (wbr)
23846+ atomic_dec(&wbr->wbr_wh_running);
23847+ atomic_dec(&a->br->br_count);
23848+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
23849+ si_write_unlock(a->sb);
23850+ kfree(arg);
23851+ if (unlikely(err))
23852+ AuIOErr("err %d\n", err);
23853+}
23854+
23855+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
23856+{
23857+ int do_dec, wkq_err;
23858+ struct reinit_br_wh *arg;
23859+
23860+ do_dec = 1;
23861+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
23862+ goto out;
23863+
23864+ /* ignore ENOMEM */
23865+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
23866+ if (arg) {
23867+ /*
23868+ * dec(wh_running), kfree(arg) and dec(br_count)
23869+ * in reinit function
23870+ */
23871+ arg->sb = sb;
23872+ arg->br = br;
23873+ atomic_inc(&br->br_count);
23874+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb);
23875+ if (unlikely(wkq_err)) {
23876+ atomic_dec(&br->br_wbr->wbr_wh_running);
23877+ atomic_dec(&br->br_count);
23878+ kfree(arg);
23879+ }
23880+ do_dec = 0;
23881+ }
23882+
23883+ out:
23884+ if (do_dec)
23885+ atomic_dec(&br->br_wbr->wbr_wh_running);
23886+}
23887+
23888+/* ---------------------------------------------------------------------- */
23889+
23890+/*
23891+ * create the whiteout @wh.
23892+ */
23893+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
23894+ struct dentry *wh)
23895+{
23896+ int err;
23897+ struct path h_path = {
23898+ .dentry = wh
23899+ };
23900+ struct au_branch *br;
23901+ struct au_wbr *wbr;
23902+ struct dentry *h_parent;
23903+ struct inode *h_dir;
23904+
23905+ h_parent = wh->d_parent; /* dir inode is locked */
23906+ h_dir = h_parent->d_inode;
23907+ IMustLock(h_dir);
23908+
23909+ br = au_sbr(sb, bindex);
23910+ h_path.mnt = br->br_mnt;
23911+ wbr = br->br_wbr;
23912+ wbr_wh_read_lock(wbr);
23913+ if (wbr->wbr_whbase) {
23914+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path);
23915+ if (!err || err != -EMLINK)
23916+ goto out;
23917+
23918+ /* link count full. re-initialize br_whbase. */
23919+ kick_reinit_br_wh(sb, br);
23920+ }
23921+
23922+ /* return this error in this context */
23923+ err = vfsub_create(h_dir, &h_path, WH_MASK);
23924+
23925+ out:
23926+ wbr_wh_read_unlock(wbr);
23927+ return err;
23928+}
23929+
23930+/* ---------------------------------------------------------------------- */
23931+
23932+/*
23933+ * create or remove the diropq.
23934+ */
23935+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
23936+ unsigned int flags)
23937+{
23938+ struct dentry *opq_dentry, *h_dentry;
23939+ struct super_block *sb;
23940+ struct au_branch *br;
23941+ int err;
23942+
23943+ sb = dentry->d_sb;
23944+ br = au_sbr(sb, bindex);
23945+ h_dentry = au_h_dptr(dentry, bindex);
23946+ opq_dentry = au_lkup_one(&diropq_name, h_dentry, br, /*nd*/NULL);
23947+ if (IS_ERR(opq_dentry))
23948+ goto out;
23949+
23950+ if (au_ftest_diropq(flags, CREATE)) {
23951+ err = link_or_create_wh(sb, bindex, opq_dentry);
23952+ if (!err) {
23953+ au_set_dbdiropq(dentry, bindex);
23954+ goto out; /* success */
23955+ }
23956+ } else {
23957+ struct path tmp = {
23958+ .dentry = opq_dentry,
23959+ .mnt = br->br_mnt
23960+ };
23961+ err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
23962+ if (!err)
23963+ au_set_dbdiropq(dentry, -1);
23964+ }
23965+ dput(opq_dentry);
23966+ opq_dentry = ERR_PTR(err);
23967+
23968+ out:
23969+ return opq_dentry;
23970+}
23971+
23972+struct do_diropq_args {
23973+ struct dentry **errp;
23974+ struct dentry *dentry;
23975+ aufs_bindex_t bindex;
23976+ unsigned int flags;
23977+};
23978+
23979+static void call_do_diropq(void *args)
23980+{
23981+ struct do_diropq_args *a = args;
23982+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
23983+}
23984+
23985+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
23986+ unsigned int flags)
23987+{
23988+ struct dentry *diropq, *h_dentry;
23989+
23990+ h_dentry = au_h_dptr(dentry, bindex);
23991+ if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
23992+ diropq = do_diropq(dentry, bindex, flags);
23993+ else {
23994+ int wkq_err;
23995+ struct do_diropq_args args = {
23996+ .errp = &diropq,
23997+ .dentry = dentry,
23998+ .bindex = bindex,
23999+ .flags = flags
24000+ };
24001+
24002+ wkq_err = au_wkq_wait(call_do_diropq, &args);
24003+ if (unlikely(wkq_err))
24004+ diropq = ERR_PTR(wkq_err);
24005+ }
24006+
24007+ return diropq;
24008+}
24009+
24010+/* ---------------------------------------------------------------------- */
24011+
24012+/*
24013+ * lookup whiteout dentry.
24014+ * @h_parent: lower parent dentry which must exist and be locked
24015+ * @base_name: name of dentry which will be whiteouted
24016+ * returns dentry for whiteout.
24017+ */
24018+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
24019+ struct au_branch *br)
24020+{
24021+ int err;
24022+ struct qstr wh_name;
24023+ struct dentry *wh_dentry;
24024+
24025+ err = au_wh_name_alloc(&wh_name, base_name);
24026+ wh_dentry = ERR_PTR(err);
24027+ if (!err) {
24028+ wh_dentry = au_lkup_one(&wh_name, h_parent, br, /*nd*/NULL);
24029+ kfree(wh_name.name);
24030+ }
24031+ return wh_dentry;
24032+}
24033+
24034+/*
24035+ * link/create a whiteout for @dentry on @bindex.
24036+ */
24037+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
24038+ struct dentry *h_parent)
24039+{
24040+ struct dentry *wh_dentry;
24041+ struct super_block *sb;
24042+ int err;
24043+
24044+ sb = dentry->d_sb;
24045+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
24046+ if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
24047+ err = link_or_create_wh(sb, bindex, wh_dentry);
24048+ if (!err)
24049+ au_set_dbwh(dentry, bindex);
24050+ else {
24051+ dput(wh_dentry);
24052+ wh_dentry = ERR_PTR(err);
24053+ }
24054+ }
24055+
24056+ return wh_dentry;
24057+}
24058+
24059+/* ---------------------------------------------------------------------- */
24060+
24061+/* Delete all whiteouts in this directory on branch bindex. */
24062+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
24063+ aufs_bindex_t bindex, struct au_branch *br)
24064+{
24065+ int err;
24066+ unsigned long ul, n;
24067+ struct qstr wh_name;
24068+ char *p;
24069+ struct hlist_head *head;
24070+ struct au_vdir_wh *tpos;
24071+ struct hlist_node *pos;
24072+ struct au_vdir_destr *str;
24073+
24074+ err = -ENOMEM;
24075+ p = __getname();
24076+ wh_name.name = p;
24077+ if (unlikely(!wh_name.name))
24078+ goto out;
24079+
24080+ err = 0;
24081+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
24082+ p += AUFS_WH_PFX_LEN;
24083+ n = whlist->nh_num;
24084+ head = whlist->nh_head;
24085+ for (ul = 0; !err && ul < n; ul++, head++) {
24086+ hlist_for_each_entry(tpos, pos, head, wh_hash) {
24087+ if (tpos->wh_bindex != bindex)
24088+ continue;
24089+
24090+ str = &tpos->wh_str;
24091+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
24092+ memcpy(p, str->name, str->len);
24093+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
24094+ err = unlink_wh_name(h_dentry, &wh_name, br);
24095+ if (!err)
24096+ continue;
24097+ break;
24098+ }
24099+ AuIOErr("whiteout name too long %.*s\n",
24100+ str->len, str->name);
24101+ err = -EIO;
24102+ break;
24103+ }
24104+ }
24105+ __putname(wh_name.name);
24106+
24107+ out:
24108+ return err;
24109+}
24110+
24111+struct del_wh_children_args {
24112+ int *errp;
24113+ struct dentry *h_dentry;
1308ab2a 24114+ struct au_nhash *whlist;
1facf9fc 24115+ aufs_bindex_t bindex;
24116+ struct au_branch *br;
24117+};
24118+
24119+static void call_del_wh_children(void *args)
24120+{
24121+ struct del_wh_children_args *a = args;
1308ab2a 24122+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 24123+}
24124+
24125+/* ---------------------------------------------------------------------- */
24126+
24127+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
24128+{
24129+ struct au_whtmp_rmdir *whtmp;
dece6358 24130+ int err;
1308ab2a 24131+ unsigned int rdhash;
dece6358
AM
24132+
24133+ SiMustAnyLock(sb);
1facf9fc 24134+
24135+ whtmp = kmalloc(sizeof(*whtmp), gfp);
dece6358
AM
24136+ if (unlikely(!whtmp)) {
24137+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 24138+ goto out;
dece6358 24139+ }
1facf9fc 24140+
24141+ whtmp->dir = NULL;
24142+ whtmp->wh_dentry = NULL;
1308ab2a 24143+ /* no estimation for dir size */
24144+ rdhash = au_sbi(sb)->si_rdhash;
24145+ if (!rdhash)
24146+ rdhash = AUFS_RDHASH_DEF;
24147+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
24148+ if (unlikely(err)) {
24149+ kfree(whtmp);
24150+ whtmp = ERR_PTR(err);
24151+ }
dece6358 24152+
1facf9fc 24153+ out:
dece6358 24154+ return whtmp;
1facf9fc 24155+}
24156+
24157+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
24158+{
24159+ dput(whtmp->wh_dentry);
24160+ iput(whtmp->dir);
dece6358 24161+ au_nhash_wh_free(&whtmp->whlist);
1facf9fc 24162+ kfree(whtmp);
24163+}
24164+
24165+/*
24166+ * rmdir the whiteouted temporary named dir @h_dentry.
24167+ * @whlist: whiteouted children.
24168+ */
24169+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
24170+ struct dentry *wh_dentry, struct au_nhash *whlist)
24171+{
24172+ int err;
24173+ struct path h_tmp;
24174+ struct inode *wh_inode, *h_dir;
24175+ struct au_branch *br;
24176+
24177+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
24178+ IMustLock(h_dir);
24179+
24180+ br = au_sbr(dir->i_sb, bindex);
24181+ wh_inode = wh_dentry->d_inode;
24182+ mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
24183+
24184+ /*
24185+ * someone else might change some whiteouts while we were sleeping.
24186+ * it means this whlist may have an obsoleted entry.
24187+ */
24188+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
24189+ err = del_wh_children(wh_dentry, whlist, bindex, br);
24190+ else {
24191+ int wkq_err;
24192+ struct del_wh_children_args args = {
24193+ .errp = &err,
24194+ .h_dentry = wh_dentry,
1308ab2a 24195+ .whlist = whlist,
1facf9fc 24196+ .bindex = bindex,
24197+ .br = br
24198+ };
24199+
24200+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
24201+ if (unlikely(wkq_err))
24202+ err = wkq_err;
24203+ }
24204+ mutex_unlock(&wh_inode->i_mutex);
24205+
24206+ if (!err) {
24207+ h_tmp.dentry = wh_dentry;
24208+ h_tmp.mnt = br->br_mnt;
24209+ err = vfsub_rmdir(h_dir, &h_tmp);
24210+ /* d_drop(h_dentry); */
24211+ }
24212+
24213+ if (!err) {
24214+ if (au_ibstart(dir) == bindex) {
24215+ au_cpup_attr_timesizes(dir);
24216+ drop_nlink(dir);
24217+ }
24218+ return 0; /* success */
24219+ }
24220+
24221+ AuWarn("failed removing %.*s(%d), ignored\n",
24222+ AuDLNPair(wh_dentry), err);
24223+ return err;
24224+}
24225+
24226+static void call_rmdir_whtmp(void *args)
24227+{
24228+ int err;
24229+ struct au_whtmp_rmdir *a = args;
24230+ struct super_block *sb;
24231+ struct dentry *h_parent;
24232+ struct inode *h_dir;
24233+ struct au_branch *br;
24234+ struct au_hinode *hdir;
24235+
24236+ /* rmdir by nfsd may cause deadlock with this i_mutex */
24237+ /* mutex_lock(&a->dir->i_mutex); */
24238+ sb = a->dir->i_sb;
24239+ si_noflush_read_lock(sb);
24240+ err = au_test_ro(sb, a->bindex, NULL);
24241+ if (unlikely(err))
24242+ goto out;
24243+
24244+ err = -EIO;
24245+ br = au_sbr(sb, a->bindex);
24246+ ii_write_lock_parent(a->dir);
24247+ h_parent = dget_parent(a->wh_dentry);
24248+ h_dir = h_parent->d_inode;
24249+ hdir = au_hi(a->dir, a->bindex);
24250+ au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
24251+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent, br);
24252+ if (!err) {
24253+ err = mnt_want_write(br->br_mnt);
24254+ if (!err) {
24255+ err = au_whtmp_rmdir(a->dir, a->bindex, a->wh_dentry,
dece6358 24256+ &a->whlist);
1facf9fc 24257+ mnt_drop_write(br->br_mnt);
24258+ }
24259+ }
24260+ au_hin_imtx_unlock(hdir);
24261+ dput(h_parent);
24262+ ii_write_unlock(a->dir);
24263+
24264+ out:
24265+ /* mutex_unlock(&a->dir->i_mutex); */
24266+ au_nwt_done(&au_sbi(sb)->si_nowait);
24267+ si_read_unlock(sb);
24268+ au_whtmp_rmdir_free(a);
24269+ if (unlikely(err))
24270+ AuIOErr("err %d\n", err);
24271+}
24272+
24273+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
24274+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
24275+{
24276+ int wkq_err;
24277+
24278+ IMustLock(dir);
24279+
24280+ /* all post-process will be done in do_rmdir_whtmp(). */
24281+ args->dir = au_igrab(dir);
24282+ args->bindex = bindex;
24283+ args->wh_dentry = dget(wh_dentry);
24284+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, dir->i_sb);
24285+ if (unlikely(wkq_err)) {
24286+ AuWarn("rmdir error %.*s (%d), ignored\n",
24287+ AuDLNPair(wh_dentry), wkq_err);
24288+ au_whtmp_rmdir_free(args);
24289+ }
24290+}
1308ab2a 24291diff -uprN -x .git linux-2.6.31/fs/aufs/whout.h aufs2-2.6.git/fs/aufs/whout.h
24292--- linux-2.6.31/fs/aufs/whout.h 1970-01-01 00:00:00.000000000 +0000
24293+++ aufs2-2.6.git/fs/aufs/whout.h 2009-09-21 21:49:23.414941217 +0000
dece6358 24294@@ -0,0 +1,87 @@
1facf9fc 24295+/*
24296+ * Copyright (C) 2005-2009 Junjiro R. Okajima
24297+ *
24298+ * This program, aufs is free software; you can redistribute it and/or modify
24299+ * it under the terms of the GNU General Public License as published by
24300+ * the Free Software Foundation; either version 2 of the License, or
24301+ * (at your option) any later version.
dece6358
AM
24302+ *
24303+ * This program is distributed in the hope that it will be useful,
24304+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24305+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24306+ * GNU General Public License for more details.
24307+ *
24308+ * You should have received a copy of the GNU General Public License
24309+ * along with this program; if not, write to the Free Software
24310+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24311+ */
24312+
24313+/*
24314+ * whiteout for logical deletion and opaque directory
24315+ */
24316+
24317+#ifndef __AUFS_WHOUT_H__
24318+#define __AUFS_WHOUT_H__
24319+
24320+#ifdef __KERNEL__
24321+
1facf9fc 24322+#include <linux/aufs_type.h>
24323+#include "dir.h"
24324+
24325+/* whout.c */
24326+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
24327+struct au_branch;
24328+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
24329+ struct au_branch *br, int try_sio);
24330+int au_diropq_test(struct dentry *h_dentry, struct au_branch *br);
24331+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
24332+ struct qstr *prefix);
24333+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
24334+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
24335+ struct dentry *dentry);
24336+int au_wh_init(struct dentry *h_parent, struct au_branch *br,
24337+ struct super_block *sb);
24338+
24339+/* diropq flags */
24340+#define AuDiropq_CREATE 1
24341+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
24342+#define au_fset_diropq(flags, name) { (flags) |= AuDiropq_##name; }
24343+#define au_fclr_diropq(flags, name) { (flags) &= ~AuDiropq_##name; }
24344+
24345+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
24346+ unsigned int flags);
24347+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
24348+ struct au_branch *br);
24349+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
24350+ struct dentry *h_parent);
24351+
24352+/* real rmdir for the whiteout-ed dir */
24353+struct au_whtmp_rmdir {
24354+ struct inode *dir;
24355+ aufs_bindex_t bindex;
24356+ struct dentry *wh_dentry;
dece6358 24357+ struct au_nhash whlist;
1facf9fc 24358+};
24359+
24360+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
24361+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
24362+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
24363+ struct dentry *wh_dentry, struct au_nhash *whlist);
24364+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
24365+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
24366+
24367+/* ---------------------------------------------------------------------- */
24368+
24369+static inline struct dentry *au_diropq_create(struct dentry *dentry,
24370+ aufs_bindex_t bindex)
24371+{
24372+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
24373+}
24374+
24375+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
24376+{
24377+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
24378+}
24379+
24380+#endif /* __KERNEL__ */
24381+#endif /* __AUFS_WHOUT_H__ */
1308ab2a 24382diff -uprN -x .git linux-2.6.31/fs/aufs/wkq.c aufs2-2.6.git/fs/aufs/wkq.c
24383--- linux-2.6.31/fs/aufs/wkq.c 1970-01-01 00:00:00.000000000 +0000
24384+++ aufs2-2.6.git/fs/aufs/wkq.c 2009-09-21 21:49:23.414941217 +0000
dece6358 24385@@ -0,0 +1,259 @@
1facf9fc 24386+/*
24387+ * Copyright (C) 2005-2009 Junjiro R. Okajima
24388+ *
24389+ * This program, aufs is free software; you can redistribute it and/or modify
24390+ * it under the terms of the GNU General Public License as published by
24391+ * the Free Software Foundation; either version 2 of the License, or
24392+ * (at your option) any later version.
dece6358
AM
24393+ *
24394+ * This program is distributed in the hope that it will be useful,
24395+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24396+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24397+ * GNU General Public License for more details.
24398+ *
24399+ * You should have received a copy of the GNU General Public License
24400+ * along with this program; if not, write to the Free Software
24401+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24402+ */
24403+
24404+/*
24405+ * workqueue for asynchronous/super-io operations
24406+ * todo: try new dredential scheme
24407+ */
24408+
dece6358 24409+#include <linux/module.h>
1facf9fc 24410+#include "aufs.h"
24411+
24412+/* internal workqueue named AUFS_WKQ_NAME */
24413+static struct au_wkq {
24414+ struct workqueue_struct *q;
24415+
24416+ /* balancing */
24417+ atomic_t busy;
24418+} *au_wkq;
24419+
24420+struct au_wkinfo {
24421+ struct work_struct wk;
24422+ struct super_block *sb;
24423+
24424+ unsigned int flags; /* see wkq.h */
24425+
24426+ au_wkq_func_t func;
24427+ void *args;
24428+
24429+ atomic_t *busyp;
24430+ struct completion *comp;
24431+};
24432+
24433+/* ---------------------------------------------------------------------- */
24434+
24435+static int enqueue(struct au_wkq *wkq, struct au_wkinfo *wkinfo)
24436+{
24437+ wkinfo->busyp = &wkq->busy;
24438+ if (au_ftest_wkq(wkinfo->flags, WAIT))
24439+ return !queue_work(wkq->q, &wkinfo->wk);
24440+ else
24441+ return !schedule_work(&wkinfo->wk);
24442+}
24443+
24444+static void do_wkq(struct au_wkinfo *wkinfo)
24445+{
24446+ unsigned int idle, n;
24447+ int i, idle_idx;
24448+
24449+ while (1) {
24450+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
24451+ idle_idx = 0;
24452+ idle = UINT_MAX;
24453+ for (i = 0; i < aufs_nwkq; i++) {
24454+ n = atomic_inc_return(&au_wkq[i].busy);
24455+ if (n == 1 && !enqueue(au_wkq + i, wkinfo))
24456+ return; /* success */
24457+
24458+ if (n < idle) {
24459+ idle_idx = i;
24460+ idle = n;
24461+ }
24462+ atomic_dec(&au_wkq[i].busy);
24463+ }
24464+ } else
24465+ idle_idx = aufs_nwkq;
24466+
24467+ atomic_inc(&au_wkq[idle_idx].busy);
24468+ if (!enqueue(au_wkq + idle_idx, wkinfo))
24469+ return; /* success */
24470+
24471+ /* impossible? */
24472+ AuWarn1("failed to queue_work()\n");
24473+ yield();
24474+ }
24475+}
24476+
24477+static void wkq_func(struct work_struct *wk)
24478+{
24479+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
24480+
24481+ wkinfo->func(wkinfo->args);
dece6358 24482+ atomic_dec_return(wkinfo->busyp);
1facf9fc 24483+ if (au_ftest_wkq(wkinfo->flags, WAIT))
24484+ complete(wkinfo->comp);
24485+ else {
24486+ kobject_put(&au_sbi(wkinfo->sb)->si_kobj);
24487+ module_put(THIS_MODULE);
24488+ kfree(wkinfo);
24489+ }
24490+}
24491+
24492+/*
24493+ * Since struct completion is large, try allocating it dynamically.
24494+ */
24495+#if defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS)
24496+#define AuWkqCompDeclare(name) struct completion *comp = NULL
24497+
24498+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
24499+{
24500+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
24501+ if (*comp) {
24502+ init_completion(*comp);
24503+ wkinfo->comp = *comp;
24504+ return 0;
24505+ }
24506+ return -ENOMEM;
24507+}
24508+
24509+static void au_wkq_comp_free(struct completion *comp)
24510+{
24511+ kfree(comp);
24512+}
24513+
24514+#else
24515+
24516+/* no braces */
24517+#define AuWkqCompDeclare(name) \
24518+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
24519+ struct completion *comp = &_ ## name
24520+
24521+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
24522+{
24523+ wkinfo->comp = *comp;
24524+ return 0;
24525+}
24526+
24527+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
24528+{
24529+ /* empty */
24530+}
24531+#endif /* 4KSTACKS */
24532+
24533+static void au_wkq_run(struct au_wkinfo *wkinfo)
24534+{
24535+ au_dbg_verify_kthread();
24536+ INIT_WORK(&wkinfo->wk, wkq_func);
24537+ do_wkq(wkinfo);
24538+}
24539+
24540+int au_wkq_wait(au_wkq_func_t func, void *args)
24541+{
24542+ int err;
24543+ AuWkqCompDeclare(comp);
24544+ struct au_wkinfo wkinfo = {
24545+ .flags = AuWkq_WAIT,
24546+ .func = func,
24547+ .args = args
24548+ };
24549+
24550+ err = au_wkq_comp_alloc(&wkinfo, &comp);
24551+ if (!err) {
24552+ au_wkq_run(&wkinfo);
24553+ /* no timeout, no interrupt */
24554+ wait_for_completion(wkinfo.comp);
24555+ au_wkq_comp_free(comp);
24556+ }
24557+
24558+ return err;
24559+
24560+}
24561+
24562+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb)
24563+{
24564+ int err;
24565+ struct au_wkinfo *wkinfo;
24566+
24567+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
24568+
24569+ /*
24570+ * wkq_func() must free this wkinfo.
24571+ * it highly depends upon the implementation of workqueue.
24572+ */
24573+ err = 0;
24574+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
24575+ if (wkinfo) {
24576+ wkinfo->sb = sb;
24577+ wkinfo->flags = !AuWkq_WAIT;
24578+ wkinfo->func = func;
24579+ wkinfo->args = args;
24580+ wkinfo->comp = NULL;
24581+ kobject_get(&au_sbi(sb)->si_kobj);
24582+ __module_get(THIS_MODULE);
24583+
24584+ au_wkq_run(wkinfo);
24585+ } else {
24586+ err = -ENOMEM;
24587+ atomic_dec(&au_sbi(sb)->si_nowait.nw_len);
24588+ }
24589+
24590+ return err;
24591+}
24592+
24593+/* ---------------------------------------------------------------------- */
24594+
24595+void au_nwt_init(struct au_nowait_tasks *nwt)
24596+{
24597+ atomic_set(&nwt->nw_len, 0);
24598+ /* smp_mb();*/ /* atomic_set */
24599+ init_waitqueue_head(&nwt->nw_wq);
24600+}
24601+
24602+void au_wkq_fin(void)
24603+{
24604+ int i;
24605+
24606+ for (i = 0; i < aufs_nwkq; i++)
24607+ if (au_wkq[i].q && !IS_ERR(au_wkq[i].q))
24608+ destroy_workqueue(au_wkq[i].q);
24609+ kfree(au_wkq);
24610+}
24611+
24612+int __init au_wkq_init(void)
24613+{
24614+ int err, i;
24615+ struct au_wkq *nowaitq;
24616+
24617+ /* '+1' is for accounting of nowait queue */
24618+ err = -ENOMEM;
24619+ au_wkq = kcalloc(aufs_nwkq + 1, sizeof(*au_wkq), GFP_NOFS);
24620+ if (unlikely(!au_wkq))
24621+ goto out;
24622+
24623+ err = 0;
24624+ for (i = 0; i < aufs_nwkq; i++) {
24625+ au_wkq[i].q = create_singlethread_workqueue(AUFS_WKQ_NAME);
24626+ if (au_wkq[i].q && !IS_ERR(au_wkq[i].q)) {
24627+ atomic_set(&au_wkq[i].busy, 0);
24628+ continue;
24629+ }
24630+
24631+ err = PTR_ERR(au_wkq[i].q);
24632+ au_wkq_fin();
24633+ goto out;
24634+ }
24635+
24636+ /* nowait accounting */
24637+ nowaitq = au_wkq + aufs_nwkq;
24638+ atomic_set(&nowaitq->busy, 0);
24639+ nowaitq->q = NULL;
24640+ /* smp_mb(); */ /* atomic_set */
24641+
24642+ out:
24643+ return err;
24644+}
1308ab2a 24645diff -uprN -x .git linux-2.6.31/fs/aufs/wkq.h aufs2-2.6.git/fs/aufs/wkq.h
24646--- linux-2.6.31/fs/aufs/wkq.h 1970-01-01 00:00:00.000000000 +0000
24647+++ aufs2-2.6.git/fs/aufs/wkq.h 2009-09-21 21:49:23.414941217 +0000
dece6358 24648@@ -0,0 +1,82 @@
1facf9fc 24649+/*
24650+ * Copyright (C) 2005-2009 Junjiro R. Okajima
24651+ *
24652+ * This program, aufs is free software; you can redistribute it and/or modify
24653+ * it under the terms of the GNU General Public License as published by
24654+ * the Free Software Foundation; either version 2 of the License, or
24655+ * (at your option) any later version.
dece6358
AM
24656+ *
24657+ * This program is distributed in the hope that it will be useful,
24658+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24659+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24660+ * GNU General Public License for more details.
24661+ *
24662+ * You should have received a copy of the GNU General Public License
24663+ * along with this program; if not, write to the Free Software
24664+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24665+ */
24666+
24667+/*
24668+ * workqueue for asynchronous/super-io operations
24669+ * todo: try new credentials management scheme
24670+ */
24671+
24672+#ifndef __AUFS_WKQ_H__
24673+#define __AUFS_WKQ_H__
24674+
24675+#ifdef __KERNEL__
24676+
1facf9fc 24677+#include <linux/sched.h>
dece6358 24678+#include <linux/wait.h>
1facf9fc 24679+#include <linux/aufs_type.h>
24680+
dece6358
AM
24681+struct super_block;
24682+
1facf9fc 24683+/* ---------------------------------------------------------------------- */
24684+
24685+/*
24686+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
24687+ */
24688+struct au_nowait_tasks {
24689+ atomic_t nw_len;
24690+ wait_queue_head_t nw_wq;
24691+};
24692+
24693+/* ---------------------------------------------------------------------- */
24694+
24695+typedef void (*au_wkq_func_t)(void *args);
24696+
24697+/* wkq flags */
24698+#define AuWkq_WAIT 1
24699+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
24700+#define au_fset_wkq(flags, name) { (flags) |= AuWkq_##name; }
24701+#define au_fclr_wkq(flags, name) { (flags) &= ~AuWkq_##name; }
24702+
24703+/* wkq.c */
24704+int au_wkq_wait(au_wkq_func_t func, void *args);
24705+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb);
24706+void au_nwt_init(struct au_nowait_tasks *nwt);
24707+int __init au_wkq_init(void);
24708+void au_wkq_fin(void);
24709+
24710+/* ---------------------------------------------------------------------- */
24711+
24712+static inline int au_test_wkq(struct task_struct *tsk)
24713+{
24714+ return !tsk->mm && !strcmp(tsk->comm, AUFS_WKQ_NAME);
24715+}
24716+
24717+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
24718+{
24719+ if (!atomic_dec_return(&nwt->nw_len))
24720+ wake_up_all(&nwt->nw_wq);
24721+}
24722+
24723+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
24724+{
24725+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
24726+ return 0;
24727+}
24728+
24729+#endif /* __KERNEL__ */
24730+#endif /* __AUFS_WKQ_H__ */
1308ab2a 24731diff -uprN -x .git linux-2.6.31/fs/aufs/xino.c aufs2-2.6.git/fs/aufs/xino.c
24732--- linux-2.6.31/fs/aufs/xino.c 1970-01-01 00:00:00.000000000 +0000
24733+++ aufs2-2.6.git/fs/aufs/xino.c 2009-09-21 21:49:23.414941217 +0000
24734@@ -0,0 +1,1203 @@
1facf9fc 24735+/*
24736+ * Copyright (C) 2005-2009 Junjiro R. Okajima
24737+ *
24738+ * This program, aufs is free software; you can redistribute it and/or modify
24739+ * it under the terms of the GNU General Public License as published by
24740+ * the Free Software Foundation; either version 2 of the License, or
24741+ * (at your option) any later version.
dece6358
AM
24742+ *
24743+ * This program is distributed in the hope that it will be useful,
24744+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24745+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24746+ * GNU General Public License for more details.
24747+ *
24748+ * You should have received a copy of the GNU General Public License
24749+ * along with this program; if not, write to the Free Software
24750+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 24751+ */
24752+
24753+/*
24754+ * external inode number translation table and bitmap
24755+ */
24756+
dece6358 24757+#include <linux/file.h>
1facf9fc 24758+#include <linux/seq_file.h>
24759+#include <linux/uaccess.h>
24760+#include "aufs.h"
24761+
24762+ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
24763+ loff_t *pos)
24764+{
24765+ ssize_t err;
24766+ mm_segment_t oldfs;
24767+
24768+ oldfs = get_fs();
24769+ set_fs(KERNEL_DS);
24770+ do {
24771+ /* todo: signal_pending? */
24772+ err = func(file, (char __user *)buf, size, pos);
24773+ } while (err == -EAGAIN || err == -EINTR);
24774+ set_fs(oldfs);
24775+
24776+#if 0 /* reserved for future use */
24777+ if (err > 0)
24778+ fsnotify_access(file->f_dentry);
24779+#endif
24780+
24781+ return err;
24782+}
24783+
24784+/* ---------------------------------------------------------------------- */
24785+
24786+static ssize_t do_xino_fwrite(au_writef_t func, struct file *file, void *buf,
24787+ size_t size, loff_t *pos)
24788+{
24789+ ssize_t err;
24790+ mm_segment_t oldfs;
24791+
24792+ oldfs = get_fs();
24793+ set_fs(KERNEL_DS);
24794+ lockdep_off();
24795+ do {
24796+ /* todo: signal_pending? */
24797+ err = func(file, (const char __user *)buf, size, pos);
24798+ } while (err == -EAGAIN || err == -EINTR);
24799+ lockdep_on();
24800+ set_fs(oldfs);
24801+
24802+#if 0 /* reserved for future use */
24803+ if (err > 0)
24804+ fsnotify_modify(file->f_dentry);
24805+#endif
24806+
24807+ return err;
24808+}
24809+
24810+struct do_xino_fwrite_args {
24811+ ssize_t *errp;
24812+ au_writef_t func;
24813+ struct file *file;
24814+ void *buf;
24815+ size_t size;
24816+ loff_t *pos;
24817+};
24818+
24819+static void call_do_xino_fwrite(void *args)
24820+{
24821+ struct do_xino_fwrite_args *a = args;
24822+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
24823+}
24824+
24825+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
24826+ loff_t *pos)
24827+{
24828+ ssize_t err;
24829+
24830+ /* todo: signal block and no wkq? */
24831+ /* todo: new credential scheme */
24832+ /*
24833+ * it breaks RLIMIT_FSIZE and normal user's limit,
24834+ * users should care about quota and real 'filesystem full.'
24835+ */
24836+ if (!au_test_wkq(current)) {
24837+ int wkq_err;
24838+ struct do_xino_fwrite_args args = {
24839+ .errp = &err,
24840+ .func = func,
24841+ .file = file,
24842+ .buf = buf,
24843+ .size = size,
24844+ .pos = pos
24845+ };
24846+
24847+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
24848+ if (unlikely(wkq_err))
24849+ err = wkq_err;
24850+ } else
24851+ err = do_xino_fwrite(func, file, buf, size, pos);
24852+
24853+ return err;
24854+}
24855+
24856+/* ---------------------------------------------------------------------- */
24857+
24858+/*
24859+ * create a new xinofile at the same place/path as @base_file.
24860+ */
24861+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
24862+{
24863+ struct file *file;
24864+ struct dentry *base, *dentry, *parent;
24865+ struct inode *dir;
24866+ struct qstr *name;
24867+ int err;
1308ab2a 24868+ struct path path;
1facf9fc 24869+
24870+ base = base_file->f_dentry;
24871+ parent = base->d_parent; /* dir inode is locked */
24872+ dir = parent->d_inode;
24873+ IMustLock(dir);
24874+
24875+ file = ERR_PTR(-EINVAL);
24876+ name = &base->d_name;
24877+ dentry = vfsub_lookup_one_len(name->name, parent, name->len);
24878+ if (IS_ERR(dentry)) {
24879+ file = (void *)dentry;
24880+ AuErr("%.*s lookup err %ld\n", AuLNPair(name), PTR_ERR(dentry));
24881+ goto out;
24882+ }
24883+
24884+ /* no need to mnt_want_write() since we call dentry_open() later */
24885+ err = vfs_create(dir, dentry, S_IRUGO | S_IWUGO, NULL);
24886+ if (unlikely(err)) {
24887+ file = ERR_PTR(err);
24888+ AuErr("%.*s create err %d\n", AuLNPair(name), err);
24889+ goto out_dput;
24890+ }
24891+
1308ab2a 24892+ path.dentry = dentry;
24893+ path.mnt = base_file->f_vfsmnt;
24894+ path_get(&path);
24895+ file = vfsub_dentry_open(&path, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE,
24896+ current_cred());
1facf9fc 24897+ if (IS_ERR(file)) {
24898+ AuErr("%.*s open err %ld\n", AuLNPair(name), PTR_ERR(file));
24899+ goto out_dput;
24900+ }
24901+
24902+ err = vfsub_unlink(dir, &file->f_path, /*force*/0);
24903+ if (unlikely(err)) {
24904+ AuErr("%.*s unlink err %d\n", AuLNPair(name), err);
24905+ goto out_fput;
24906+ }
24907+
24908+ if (copy_src) {
24909+ /* no one can touch copy_src xino */
24910+ err = au_copy_file(file, copy_src,
24911+ i_size_read(copy_src->f_dentry->d_inode));
24912+ if (unlikely(err)) {
24913+ AuErr("%.*s copy err %d\n", AuLNPair(name), err);
24914+ goto out_fput;
24915+ }
24916+ }
24917+ goto out_dput; /* success */
24918+
24919+ out_fput:
24920+ fput(file);
24921+ file = ERR_PTR(err);
24922+ out_dput:
24923+ dput(dentry);
24924+ out:
24925+ return file;
24926+}
24927+
24928+struct au_xino_lock_dir {
24929+ struct au_hinode *hdir;
24930+ struct dentry *parent;
24931+ struct mutex *mtx;
24932+};
24933+
24934+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
24935+ struct au_xino_lock_dir *ldir)
24936+{
24937+ aufs_bindex_t brid, bindex;
24938+
24939+ ldir->hdir = NULL;
24940+ bindex = -1;
24941+ brid = au_xino_brid(sb);
24942+ if (brid >= 0)
24943+ bindex = au_br_index(sb, brid);
24944+ if (bindex >= 0) {
24945+ ldir->hdir = au_hi(sb->s_root->d_inode, bindex);
24946+ au_hin_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
24947+ } else {
24948+ ldir->parent = dget_parent(xino->f_dentry);
24949+ ldir->mtx = &ldir->parent->d_inode->i_mutex;
24950+ mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
24951+ }
24952+}
24953+
24954+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
24955+{
24956+ if (ldir->hdir)
24957+ au_hin_imtx_unlock(ldir->hdir);
24958+ else {
24959+ mutex_unlock(ldir->mtx);
24960+ dput(ldir->parent);
24961+ }
24962+}
24963+
24964+/* ---------------------------------------------------------------------- */
24965+
24966+/* trucate xino files asynchronously */
24967+
24968+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
24969+{
24970+ int err;
24971+ aufs_bindex_t bi, bend;
24972+ struct au_branch *br;
24973+ struct file *new_xino, *file;
24974+ struct super_block *h_sb;
24975+ struct au_xino_lock_dir ldir;
24976+
24977+ err = -EINVAL;
24978+ bend = au_sbend(sb);
24979+ if (unlikely(bindex < 0 || bend < bindex))
24980+ goto out;
24981+ br = au_sbr(sb, bindex);
24982+ file = br->br_xino.xi_file;
24983+ if (!file)
24984+ goto out;
24985+
24986+ au_xino_lock_dir(sb, file, &ldir);
24987+ /* mnt_want_write() is unnecessary here */
24988+ new_xino = au_xino_create2(file, file);
24989+ au_xino_unlock_dir(&ldir);
24990+ err = PTR_ERR(new_xino);
24991+ if (IS_ERR(new_xino))
24992+ goto out;
24993+ err = 0;
24994+ fput(file);
24995+ br->br_xino.xi_file = new_xino;
24996+
24997+ h_sb = br->br_mnt->mnt_sb;
24998+ for (bi = 0; bi <= bend; bi++) {
24999+ if (unlikely(bi == bindex))
25000+ continue;
25001+ br = au_sbr(sb, bi);
25002+ if (br->br_mnt->mnt_sb != h_sb)
25003+ continue;
25004+
25005+ fput(br->br_xino.xi_file);
25006+ br->br_xino.xi_file = new_xino;
25007+ get_file(new_xino);
25008+ }
25009+
25010+ out:
25011+ return err;
25012+}
25013+
25014+struct xino_do_trunc_args {
25015+ struct super_block *sb;
25016+ struct au_branch *br;
25017+};
25018+
25019+static void xino_do_trunc(void *_args)
25020+{
25021+ struct xino_do_trunc_args *args = _args;
25022+ struct super_block *sb;
25023+ struct au_branch *br;
25024+ struct inode *dir;
25025+ int err;
25026+ aufs_bindex_t bindex;
25027+
25028+ err = 0;
25029+ sb = args->sb;
25030+ dir = sb->s_root->d_inode;
25031+ br = args->br;
25032+
25033+ si_noflush_write_lock(sb);
25034+ ii_read_lock_parent(dir);
25035+ bindex = au_br_index(sb, br->br_id);
25036+ err = au_xino_trunc(sb, bindex);
dece6358
AM
25037+ if (!err
25038+ && br->br_xino.xi_file->f_dentry->d_inode->i_blocks
1facf9fc 25039+ >= br->br_xino_upper)
25040+ br->br_xino_upper += AUFS_XINO_TRUNC_STEP;
25041+
1facf9fc 25042+ ii_read_unlock(dir);
25043+ if (unlikely(err))
25044+ AuWarn("err b%d, (%d)\n", bindex, err);
25045+ atomic_dec(&br->br_xino_running);
25046+ atomic_dec(&br->br_count);
25047+ au_nwt_done(&au_sbi(sb)->si_nowait);
25048+ si_write_unlock(sb);
25049+ kfree(args);
25050+}
25051+
25052+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
25053+{
25054+ struct xino_do_trunc_args *args;
25055+ int wkq_err;
25056+
25057+ if (br->br_xino.xi_file->f_dentry->d_inode->i_blocks
25058+ < br->br_xino_upper)
25059+ return;
25060+
25061+ if (atomic_inc_return(&br->br_xino_running) > 1)
25062+ goto out;
25063+
25064+ /* lock and kfree() will be called in trunc_xino() */
25065+ args = kmalloc(sizeof(*args), GFP_NOFS);
25066+ if (unlikely(!args)) {
25067+ AuErr1("no memory\n");
25068+ goto out_args;
25069+ }
25070+
dece6358 25071+ atomic_inc_return(&br->br_count);
1facf9fc 25072+ args->sb = sb;
25073+ args->br = br;
25074+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb);
25075+ if (!wkq_err)
25076+ return; /* success */
25077+
25078+ AuErr("wkq %d\n", wkq_err);
dece6358 25079+ atomic_dec_return(&br->br_count);
1facf9fc 25080+
25081+ out_args:
25082+ kfree(args);
25083+ out:
dece6358 25084+ atomic_dec_return(&br->br_xino_running);
1facf9fc 25085+}
25086+
25087+/* ---------------------------------------------------------------------- */
25088+
25089+static int au_xino_do_write(au_writef_t write, struct file *file,
25090+ ino_t h_ino, ino_t ino)
25091+{
25092+ loff_t pos;
25093+ ssize_t sz;
25094+
25095+ pos = h_ino;
25096+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
25097+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
25098+ return -EFBIG;
25099+ }
25100+ pos *= sizeof(ino);
25101+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
25102+ if (sz == sizeof(ino))
25103+ return 0; /* success */
25104+
25105+ AuIOErr("write failed (%zd)\n", sz);
25106+ return -EIO;
25107+}
25108+
25109+/*
25110+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
25111+ * at the position of @h_ino.
25112+ * even if @ino is zero, it is written to the xinofile and means no entry.
25113+ * if the size of the xino file on a specific filesystem exceeds the watermark,
25114+ * try truncating it.
25115+ */
25116+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
25117+ ino_t ino)
25118+{
25119+ int err;
25120+ unsigned int mnt_flags;
25121+ struct au_branch *br;
25122+
25123+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
25124+ || ((loff_t)-1) > 0);
dece6358 25125+ SiMustAnyLock(sb);
1facf9fc 25126+
25127+ mnt_flags = au_mntflags(sb);
25128+ if (!au_opt_test(mnt_flags, XINO))
25129+ return 0;
25130+
25131+ br = au_sbr(sb, bindex);
25132+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
25133+ h_ino, ino);
25134+ if (!err) {
25135+ if (au_opt_test(mnt_flags, TRUNC_XINO)
25136+ && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
25137+ xino_try_trunc(sb, br);
25138+ return 0; /* success */
25139+ }
25140+
25141+ AuIOErr("write failed (%d)\n", err);
25142+ return -EIO;
25143+}
25144+
25145+/* ---------------------------------------------------------------------- */
25146+
25147+/* aufs inode number bitmap */
25148+
25149+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
25150+static ino_t xib_calc_ino(unsigned long pindex, int bit)
25151+{
25152+ ino_t ino;
25153+
25154+ AuDebugOn(bit < 0 || page_bits <= bit);
25155+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
25156+ return ino;
25157+}
25158+
25159+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
25160+{
25161+ AuDebugOn(ino < AUFS_FIRST_INO);
25162+ ino -= AUFS_FIRST_INO;
25163+ *pindex = ino / page_bits;
25164+ *bit = ino % page_bits;
25165+}
25166+
25167+static int xib_pindex(struct super_block *sb, unsigned long pindex)
25168+{
25169+ int err;
25170+ loff_t pos;
25171+ ssize_t sz;
25172+ struct au_sbinfo *sbinfo;
25173+ struct file *xib;
25174+ unsigned long *p;
25175+
25176+ sbinfo = au_sbi(sb);
25177+ MtxMustLock(&sbinfo->si_xib_mtx);
25178+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
25179+ || !au_opt_test(sbinfo->si_mntflags, XINO));
25180+
25181+ if (pindex == sbinfo->si_xib_last_pindex)
25182+ return 0;
25183+
25184+ xib = sbinfo->si_xib;
25185+ p = sbinfo->si_xib_buf;
25186+ pos = sbinfo->si_xib_last_pindex;
25187+ pos *= PAGE_SIZE;
25188+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
25189+ if (unlikely(sz != PAGE_SIZE))
25190+ goto out;
25191+
25192+ pos = pindex;
25193+ pos *= PAGE_SIZE;
25194+ if (i_size_read(xib->f_dentry->d_inode) >= pos + PAGE_SIZE)
25195+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
25196+ else {
25197+ memset(p, 0, PAGE_SIZE);
25198+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
25199+ }
25200+ if (sz == PAGE_SIZE) {
25201+ sbinfo->si_xib_last_pindex = pindex;
25202+ return 0; /* success */
25203+ }
25204+
25205+ out:
25206+ AuIOErr1("write failed (%zd)\n", sz);
25207+ err = sz;
25208+ if (sz >= 0)
25209+ err = -EIO;
25210+ return err;
25211+}
25212+
25213+/* ---------------------------------------------------------------------- */
25214+
25215+int au_xino_write0(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
25216+ ino_t ino)
25217+{
25218+ int err, bit;
25219+ unsigned long pindex;
25220+ struct au_sbinfo *sbinfo;
25221+
25222+ if (!au_opt_test(au_mntflags(sb), XINO))
25223+ return 0;
25224+
25225+ err = 0;
25226+ if (ino) {
25227+ sbinfo = au_sbi(sb);
25228+ xib_calc_bit(ino, &pindex, &bit);
25229+ AuDebugOn(page_bits <= bit);
25230+ mutex_lock(&sbinfo->si_xib_mtx);
25231+ err = xib_pindex(sb, pindex);
25232+ if (!err) {
25233+ clear_bit(bit, sbinfo->si_xib_buf);
25234+ sbinfo->si_xib_next_bit = bit;
25235+ }
25236+ mutex_unlock(&sbinfo->si_xib_mtx);
25237+ }
25238+
25239+ if (!err)
25240+ err = au_xino_write(sb, bindex, h_ino, 0);
25241+ return err;
25242+}
25243+
25244+/* get an unused inode number from bitmap */
25245+ino_t au_xino_new_ino(struct super_block *sb)
25246+{
25247+ ino_t ino;
25248+ unsigned long *p, pindex, ul, pend;
25249+ struct au_sbinfo *sbinfo;
25250+ struct file *file;
25251+ int free_bit, err;
25252+
25253+ if (!au_opt_test(au_mntflags(sb), XINO))
25254+ return iunique(sb, AUFS_FIRST_INO);
25255+
25256+ sbinfo = au_sbi(sb);
25257+ mutex_lock(&sbinfo->si_xib_mtx);
25258+ p = sbinfo->si_xib_buf;
25259+ free_bit = sbinfo->si_xib_next_bit;
25260+ if (free_bit < page_bits && !test_bit(free_bit, p))
25261+ goto out; /* success */
25262+ free_bit = find_first_zero_bit(p, page_bits);
25263+ if (free_bit < page_bits)
25264+ goto out; /* success */
25265+
25266+ pindex = sbinfo->si_xib_last_pindex;
25267+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
25268+ err = xib_pindex(sb, ul);
25269+ if (unlikely(err))
25270+ goto out_err;
25271+ free_bit = find_first_zero_bit(p, page_bits);
25272+ if (free_bit < page_bits)
25273+ goto out; /* success */
25274+ }
25275+
25276+ file = sbinfo->si_xib;
25277+ pend = i_size_read(file->f_dentry->d_inode) / PAGE_SIZE;
25278+ for (ul = pindex + 1; ul <= pend; ul++) {
25279+ err = xib_pindex(sb, ul);
25280+ if (unlikely(err))
25281+ goto out_err;
25282+ free_bit = find_first_zero_bit(p, page_bits);
25283+ if (free_bit < page_bits)
25284+ goto out; /* success */
25285+ }
25286+ BUG();
25287+
25288+ out:
25289+ set_bit(free_bit, p);
25290+ sbinfo->si_xib_next_bit++;
25291+ pindex = sbinfo->si_xib_last_pindex;
25292+ mutex_unlock(&sbinfo->si_xib_mtx);
25293+ ino = xib_calc_ino(pindex, free_bit);
25294+ AuDbg("i%lu\n", (unsigned long)ino);
25295+ return ino;
25296+ out_err:
25297+ mutex_unlock(&sbinfo->si_xib_mtx);
25298+ AuDbg("i0\n");
25299+ return 0;
25300+}
25301+
25302+/*
25303+ * read @ino from xinofile for the specified branch{@sb, @bindex}
25304+ * at the position of @h_ino.
25305+ * if @ino does not exist and @do_new is true, get new one.
25306+ */
25307+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
25308+ ino_t *ino)
25309+{
25310+ int err;
25311+ ssize_t sz;
25312+ loff_t pos;
25313+ struct file *file;
25314+ struct au_sbinfo *sbinfo;
25315+
25316+ *ino = 0;
25317+ if (!au_opt_test(au_mntflags(sb), XINO))
25318+ return 0; /* no xino */
25319+
25320+ err = 0;
25321+ sbinfo = au_sbi(sb);
25322+ pos = h_ino;
25323+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
25324+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
25325+ return -EFBIG;
25326+ }
25327+ pos *= sizeof(*ino);
25328+
25329+ file = au_sbr(sb, bindex)->br_xino.xi_file;
25330+ if (i_size_read(file->f_dentry->d_inode) < pos + sizeof(*ino))
25331+ return 0; /* no ino */
25332+
25333+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
25334+ if (sz == sizeof(*ino))
25335+ return 0; /* success */
25336+
25337+ err = sz;
25338+ if (unlikely(sz >= 0)) {
25339+ err = -EIO;
25340+ AuIOErr("xino read error (%zd)\n", sz);
25341+ }
25342+
25343+ return err;
25344+}
25345+
25346+/* ---------------------------------------------------------------------- */
25347+
25348+/* create and set a new xino file */
25349+
25350+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
25351+{
25352+ struct file *file;
25353+ struct dentry *h_parent, *d;
25354+ struct inode *h_dir;
25355+ int err;
25356+
25357+ /*
25358+ * at mount-time, and the xino file is the default path,
25359+ * hinotify is disabled so we have no inotify events to ignore.
25360+ * when a user specified the xino, we cannot get au_hdir to be ignored.
25361+ */
25362+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE,
25363+ S_IRUGO | S_IWUGO);
25364+ if (IS_ERR(file)) {
25365+ if (!silent)
25366+ AuErr("open %s(%ld)\n", fname, PTR_ERR(file));
25367+ return file;
25368+ }
25369+
25370+ /* keep file count */
25371+ h_parent = dget_parent(file->f_dentry);
25372+ h_dir = h_parent->d_inode;
25373+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
25374+ /* mnt_want_write() is unnecessary here */
25375+ err = vfsub_unlink(h_dir, &file->f_path, /*force*/0);
25376+ mutex_unlock(&h_dir->i_mutex);
25377+ dput(h_parent);
25378+ if (unlikely(err)) {
25379+ if (!silent)
25380+ AuErr("unlink %s(%d)\n", fname, err);
25381+ goto out;
25382+ }
25383+
25384+ err = -EINVAL;
25385+ d = file->f_dentry;
25386+ if (unlikely(sb == d->d_sb)) {
25387+ if (!silent)
25388+ AuErr("%s must be outside\n", fname);
25389+ goto out;
25390+ }
25391+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
25392+ if (!silent)
25393+ AuErr("xino doesn't support %s(%s)\n",
25394+ fname, au_sbtype(d->d_sb));
25395+ goto out;
25396+ }
25397+ return file; /* success */
25398+
25399+ out:
25400+ fput(file);
25401+ file = ERR_PTR(err);
25402+ return file;
25403+}
25404+
25405+/*
25406+ * find another branch who is on the same filesystem of the specified
25407+ * branch{@btgt}. search until @bend.
25408+ */
25409+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
25410+ aufs_bindex_t bend)
25411+{
25412+ aufs_bindex_t bindex;
25413+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
25414+
25415+ for (bindex = 0; bindex < btgt; bindex++)
25416+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
25417+ return bindex;
25418+ for (bindex++; bindex <= bend; bindex++)
25419+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
25420+ return bindex;
25421+ return -1;
25422+}
25423+
25424+/* ---------------------------------------------------------------------- */
25425+
25426+/*
25427+ * initialize the xinofile for the specified branch @br
25428+ * at the place/path where @base_file indicates.
25429+ * test whether another branch is on the same filesystem or not,
25430+ * if @do_test is true.
25431+ */
25432+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
25433+ struct file *base_file, int do_test)
25434+{
25435+ int err;
25436+ ino_t ino;
25437+ aufs_bindex_t bend, bindex;
25438+ struct au_branch *shared_br, *b;
25439+ struct file *file;
25440+ struct super_block *tgt_sb;
25441+
25442+ shared_br = NULL;
25443+ bend = au_sbend(sb);
25444+ if (do_test) {
25445+ tgt_sb = br->br_mnt->mnt_sb;
25446+ for (bindex = 0; bindex <= bend; bindex++) {
25447+ b = au_sbr(sb, bindex);
25448+ if (tgt_sb == b->br_mnt->mnt_sb) {
25449+ shared_br = b;
25450+ break;
25451+ }
25452+ }
25453+ }
25454+
25455+ if (!shared_br || !shared_br->br_xino.xi_file) {
25456+ struct au_xino_lock_dir ldir;
25457+
25458+ au_xino_lock_dir(sb, base_file, &ldir);
25459+ /* mnt_want_write() is unnecessary here */
25460+ file = au_xino_create2(base_file, NULL);
25461+ au_xino_unlock_dir(&ldir);
25462+ err = PTR_ERR(file);
25463+ if (IS_ERR(file))
25464+ goto out;
25465+ br->br_xino.xi_file = file;
25466+ } else {
25467+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
25468+ get_file(br->br_xino.xi_file);
25469+ }
25470+
25471+ ino = AUFS_ROOT_INO;
25472+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
25473+ h_ino, ino);
25474+ if (!err)
25475+ return 0; /* success */
25476+
25477+
25478+ out:
25479+ return err;
25480+}
25481+
25482+/* ---------------------------------------------------------------------- */
25483+
25484+/* trucate a xino bitmap file */
25485+
25486+/* todo: slow */
25487+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
25488+{
25489+ int err, bit;
25490+ ssize_t sz;
25491+ unsigned long pindex;
25492+ loff_t pos, pend;
25493+ struct au_sbinfo *sbinfo;
25494+ au_readf_t func;
25495+ ino_t *ino;
25496+ unsigned long *p;
25497+
25498+ err = 0;
25499+ sbinfo = au_sbi(sb);
dece6358 25500+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 25501+ p = sbinfo->si_xib_buf;
25502+ func = sbinfo->si_xread;
25503+ pend = i_size_read(file->f_dentry->d_inode);
25504+ pos = 0;
25505+ while (pos < pend) {
25506+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
25507+ err = sz;
25508+ if (unlikely(sz <= 0))
25509+ goto out;
25510+
25511+ err = 0;
25512+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
25513+ if (unlikely(*ino < AUFS_FIRST_INO))
25514+ continue;
25515+
25516+ xib_calc_bit(*ino, &pindex, &bit);
25517+ AuDebugOn(page_bits <= bit);
25518+ err = xib_pindex(sb, pindex);
25519+ if (!err)
25520+ set_bit(bit, p);
25521+ else
25522+ goto out;
25523+ }
25524+ }
25525+
25526+ out:
25527+ return err;
25528+}
25529+
25530+static int xib_restore(struct super_block *sb)
25531+{
25532+ int err;
25533+ aufs_bindex_t bindex, bend;
25534+ void *page;
25535+
25536+ err = -ENOMEM;
25537+ page = (void *)__get_free_page(GFP_NOFS);
25538+ if (unlikely(!page))
25539+ goto out;
25540+
25541+ err = 0;
25542+ bend = au_sbend(sb);
25543+ for (bindex = 0; !err && bindex <= bend; bindex++)
25544+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
25545+ err = do_xib_restore
25546+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
25547+ else
25548+ AuDbg("b%d\n", bindex);
25549+ free_page((unsigned long)page);
25550+
25551+ out:
25552+ return err;
25553+}
25554+
25555+int au_xib_trunc(struct super_block *sb)
25556+{
25557+ int err;
25558+ ssize_t sz;
25559+ loff_t pos;
25560+ struct au_xino_lock_dir ldir;
25561+ struct au_sbinfo *sbinfo;
25562+ unsigned long *p;
25563+ struct file *file;
25564+
dece6358
AM
25565+ SiMustWriteLock(sb);
25566+
1facf9fc 25567+ err = 0;
25568+ sbinfo = au_sbi(sb);
25569+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
25570+ goto out;
25571+
25572+ file = sbinfo->si_xib;
25573+ if (i_size_read(file->f_dentry->d_inode) <= PAGE_SIZE)
25574+ goto out;
25575+
25576+ au_xino_lock_dir(sb, file, &ldir);
25577+ /* mnt_want_write() is unnecessary here */
25578+ file = au_xino_create2(sbinfo->si_xib, NULL);
25579+ au_xino_unlock_dir(&ldir);
25580+ err = PTR_ERR(file);
25581+ if (IS_ERR(file))
25582+ goto out;
25583+ fput(sbinfo->si_xib);
25584+ sbinfo->si_xib = file;
25585+
25586+ p = sbinfo->si_xib_buf;
25587+ memset(p, 0, PAGE_SIZE);
25588+ pos = 0;
25589+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
25590+ if (unlikely(sz != PAGE_SIZE)) {
25591+ err = sz;
25592+ AuIOErr("err %d\n", err);
25593+ if (sz >= 0)
25594+ err = -EIO;
25595+ goto out;
25596+ }
25597+
25598+ mutex_lock(&sbinfo->si_xib_mtx);
25599+ /* mnt_want_write() is unnecessary here */
25600+ err = xib_restore(sb);
25601+ mutex_unlock(&sbinfo->si_xib_mtx);
25602+
25603+out:
25604+ return err;
25605+}
25606+
25607+/* ---------------------------------------------------------------------- */
25608+
25609+/*
25610+ * xino mount option handlers
25611+ */
25612+static au_readf_t find_readf(struct file *h_file)
25613+{
25614+ const struct file_operations *fop = h_file->f_op;
25615+
25616+ if (fop) {
25617+ if (fop->read)
25618+ return fop->read;
25619+ if (fop->aio_read)
25620+ return do_sync_read;
25621+ }
25622+ return ERR_PTR(-ENOSYS);
25623+}
25624+
25625+static au_writef_t find_writef(struct file *h_file)
25626+{
25627+ const struct file_operations *fop = h_file->f_op;
25628+
25629+ if (fop) {
25630+ if (fop->write)
25631+ return fop->write;
25632+ if (fop->aio_write)
25633+ return do_sync_write;
25634+ }
25635+ return ERR_PTR(-ENOSYS);
25636+}
25637+
25638+/* xino bitmap */
25639+static void xino_clear_xib(struct super_block *sb)
25640+{
25641+ struct au_sbinfo *sbinfo;
25642+
dece6358
AM
25643+ SiMustWriteLock(sb);
25644+
1facf9fc 25645+ sbinfo = au_sbi(sb);
25646+ sbinfo->si_xread = NULL;
25647+ sbinfo->si_xwrite = NULL;
25648+ if (sbinfo->si_xib)
25649+ fput(sbinfo->si_xib);
25650+ sbinfo->si_xib = NULL;
25651+ free_page((unsigned long)sbinfo->si_xib_buf);
25652+ sbinfo->si_xib_buf = NULL;
25653+}
25654+
25655+static int au_xino_set_xib(struct super_block *sb, struct file *base)
25656+{
25657+ int err;
25658+ loff_t pos;
25659+ struct au_sbinfo *sbinfo;
25660+ struct file *file;
25661+
dece6358
AM
25662+ SiMustWriteLock(sb);
25663+
1facf9fc 25664+ sbinfo = au_sbi(sb);
25665+ file = au_xino_create2(base, sbinfo->si_xib);
25666+ err = PTR_ERR(file);
25667+ if (IS_ERR(file))
25668+ goto out;
25669+ if (sbinfo->si_xib)
25670+ fput(sbinfo->si_xib);
25671+ sbinfo->si_xib = file;
25672+ sbinfo->si_xread = find_readf(file);
25673+ sbinfo->si_xwrite = find_writef(file);
25674+
25675+ err = -ENOMEM;
25676+ if (!sbinfo->si_xib_buf)
25677+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
25678+ if (unlikely(!sbinfo->si_xib_buf))
25679+ goto out_unset;
25680+
25681+ sbinfo->si_xib_last_pindex = 0;
25682+ sbinfo->si_xib_next_bit = 0;
25683+ if (i_size_read(file->f_dentry->d_inode) < PAGE_SIZE) {
25684+ pos = 0;
25685+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
25686+ PAGE_SIZE, &pos);
25687+ if (unlikely(err != PAGE_SIZE))
25688+ goto out_free;
25689+ }
25690+ err = 0;
25691+ goto out; /* success */
25692+
25693+ out_free:
25694+ free_page((unsigned long)sbinfo->si_xib_buf);
25695+ sbinfo->si_xib_buf = NULL;
25696+ if (err >= 0)
25697+ err = -EIO;
25698+ out_unset:
25699+ fput(sbinfo->si_xib);
25700+ sbinfo->si_xib = NULL;
25701+ sbinfo->si_xread = NULL;
25702+ sbinfo->si_xwrite = NULL;
25703+ out:
25704+ return err;
25705+}
25706+
25707+/* xino for each branch */
25708+static void xino_clear_br(struct super_block *sb)
25709+{
25710+ aufs_bindex_t bindex, bend;
25711+ struct au_branch *br;
25712+
25713+ bend = au_sbend(sb);
25714+ for (bindex = 0; bindex <= bend; bindex++) {
25715+ br = au_sbr(sb, bindex);
25716+ if (!br || !br->br_xino.xi_file)
25717+ continue;
25718+
25719+ fput(br->br_xino.xi_file);
25720+ br->br_xino.xi_file = NULL;
25721+ }
25722+}
25723+
25724+static int au_xino_set_br(struct super_block *sb, struct file *base)
25725+{
25726+ int err;
25727+ ino_t ino;
25728+ aufs_bindex_t bindex, bend, bshared;
25729+ struct {
25730+ struct file *old, *new;
25731+ } *fpair, *p;
25732+ struct au_branch *br;
25733+ struct inode *inode;
25734+ au_writef_t writef;
25735+
dece6358
AM
25736+ SiMustWriteLock(sb);
25737+
1facf9fc 25738+ err = -ENOMEM;
25739+ bend = au_sbend(sb);
25740+ fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
25741+ if (unlikely(!fpair))
25742+ goto out;
25743+
25744+ inode = sb->s_root->d_inode;
25745+ ino = AUFS_ROOT_INO;
25746+ writef = au_sbi(sb)->si_xwrite;
25747+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
25748+ br = au_sbr(sb, bindex);
25749+ bshared = is_sb_shared(sb, bindex, bindex - 1);
25750+ if (bshared >= 0) {
25751+ /* shared xino */
25752+ *p = fpair[bshared];
25753+ get_file(p->new);
25754+ }
25755+
25756+ if (!p->new) {
25757+ /* new xino */
25758+ p->old = br->br_xino.xi_file;
25759+ p->new = au_xino_create2(base, br->br_xino.xi_file);
25760+ err = PTR_ERR(p->new);
25761+ if (IS_ERR(p->new)) {
25762+ p->new = NULL;
25763+ goto out_pair;
25764+ }
25765+ }
25766+
25767+ err = au_xino_do_write(writef, p->new,
25768+ au_h_iptr(inode, bindex)->i_ino, ino);
25769+ if (unlikely(err))
25770+ goto out_pair;
25771+ }
25772+
25773+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
25774+ br = au_sbr(sb, bindex);
25775+ if (br->br_xino.xi_file)
25776+ fput(br->br_xino.xi_file);
25777+ get_file(p->new);
25778+ br->br_xino.xi_file = p->new;
25779+ }
25780+
25781+ out_pair:
25782+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
25783+ if (p->new)
25784+ fput(p->new);
25785+ else
25786+ break;
25787+ kfree(fpair);
25788+ out:
25789+ return err;
25790+}
25791+
25792+void au_xino_clr(struct super_block *sb)
25793+{
25794+ struct au_sbinfo *sbinfo;
25795+
25796+ au_xigen_clr(sb);
25797+ xino_clear_xib(sb);
25798+ xino_clear_br(sb);
25799+ sbinfo = au_sbi(sb);
25800+ /* lvalue, do not call au_mntflags() */
25801+ au_opt_clr(sbinfo->si_mntflags, XINO);
25802+}
25803+
25804+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
25805+{
25806+ int err, skip;
25807+ struct dentry *parent, *cur_parent;
25808+ struct qstr *dname, *cur_name;
25809+ struct file *cur_xino;
25810+ struct inode *dir;
25811+ struct au_sbinfo *sbinfo;
25812+
dece6358
AM
25813+ SiMustWriteLock(sb);
25814+
1facf9fc 25815+ err = 0;
25816+ sbinfo = au_sbi(sb);
25817+ parent = dget_parent(xino->file->f_dentry);
25818+ if (remount) {
25819+ skip = 0;
25820+ dname = &xino->file->f_dentry->d_name;
25821+ cur_xino = sbinfo->si_xib;
25822+ if (cur_xino) {
25823+ cur_parent = dget_parent(cur_xino->f_dentry);
25824+ cur_name = &cur_xino->f_dentry->d_name;
25825+ skip = (cur_parent == parent
25826+ && dname->len == cur_name->len
25827+ && !memcmp(dname->name, cur_name->name,
25828+ dname->len));
25829+ dput(cur_parent);
25830+ }
25831+ if (skip)
25832+ goto out;
25833+ }
25834+
25835+ au_opt_set(sbinfo->si_mntflags, XINO);
25836+ dir = parent->d_inode;
25837+ mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
25838+ /* mnt_want_write() is unnecessary here */
25839+ err = au_xino_set_xib(sb, xino->file);
25840+ if (!err)
25841+ err = au_xigen_set(sb, xino->file);
25842+ if (!err)
25843+ err = au_xino_set_br(sb, xino->file);
25844+ mutex_unlock(&dir->i_mutex);
25845+ if (!err)
25846+ goto out; /* success */
25847+
25848+ /* reset all */
25849+ AuIOErr("failed creating xino(%d).\n", err);
25850+
25851+ out:
25852+ dput(parent);
25853+ return err;
25854+}
25855+
25856+/* ---------------------------------------------------------------------- */
25857+
25858+/*
25859+ * create a xinofile at the default place/path.
25860+ */
25861+struct file *au_xino_def(struct super_block *sb)
25862+{
25863+ struct file *file;
25864+ char *page, *p;
25865+ struct au_branch *br;
25866+ struct super_block *h_sb;
25867+ struct path path;
25868+ aufs_bindex_t bend, bindex, bwr;
25869+
25870+ br = NULL;
25871+ bend = au_sbend(sb);
25872+ bwr = -1;
25873+ for (bindex = 0; bindex <= bend; bindex++) {
25874+ br = au_sbr(sb, bindex);
25875+ if (au_br_writable(br->br_perm)
25876+ && !au_test_fs_bad_xino(br->br_mnt->mnt_sb)) {
25877+ bwr = bindex;
25878+ break;
25879+ }
25880+ }
25881+
25882+ if (bwr >= 0) {
25883+ file = ERR_PTR(-ENOMEM);
25884+ page = __getname();
25885+ if (unlikely(!page))
25886+ goto out;
25887+ path.mnt = br->br_mnt;
25888+ path.dentry = au_h_dptr(sb->s_root, bwr);
25889+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
25890+ file = (void *)p;
25891+ if (!IS_ERR(p)) {
25892+ strcat(p, "/" AUFS_XINO_FNAME);
25893+ AuDbg("%s\n", p);
25894+ file = au_xino_create(sb, p, /*silent*/0);
25895+ if (!IS_ERR(file))
25896+ au_xino_brid_set(sb, br->br_id);
25897+ }
25898+ __putname(page);
25899+ } else {
25900+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
25901+ if (IS_ERR(file))
25902+ goto out;
25903+ h_sb = file->f_dentry->d_sb;
25904+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
25905+ AuErr("xino doesn't support %s(%s)\n",
25906+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
25907+ fput(file);
25908+ file = ERR_PTR(-EINVAL);
25909+ }
25910+ if (!IS_ERR(file))
25911+ au_xino_brid_set(sb, -1);
25912+ }
25913+
25914+ out:
25915+ return file;
25916+}
25917+
25918+/* ---------------------------------------------------------------------- */
25919+
25920+int au_xino_path(struct seq_file *seq, struct file *file)
25921+{
25922+ int err;
25923+
25924+ err = au_seq_path(seq, &file->f_path);
25925+ if (unlikely(err < 0))
25926+ goto out;
25927+
25928+ err = 0;
25929+#define Deleted "\\040(deleted)"
25930+ seq->count -= sizeof(Deleted) - 1;
25931+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
25932+ sizeof(Deleted) - 1));
25933+#undef Deleted
25934+
25935+ out:
25936+ return err;
25937+}
1308ab2a 25938diff -uprN -x .git linux-2.6.31/fs/namei.c aufs2-2.6.git/fs/namei.c
25939--- linux-2.6.31/fs/namei.c 2009-09-09 22:13:59.000000000 +0000
25940+++ aufs2-2.6.git/fs/namei.c 2009-09-21 21:49:25.001190884 +0000
25941@@ -1219,7 +1219,7 @@ out:
1facf9fc 25942 * needs parent already locked. Doesn't follow mounts.
25943 * SMP-safe.
25944 */
25945-static struct dentry *lookup_hash(struct nameidata *nd)
25946+struct dentry *lookup_hash(struct nameidata *nd)
25947 {
25948 int err;
25949
1308ab2a 25950@@ -1229,7 +1229,7 @@ static struct dentry *lookup_hash(struct
1facf9fc 25951 return __lookup_hash(&nd->last, nd->path.dentry, nd);
25952 }
25953
25954-static int __lookup_one_len(const char *name, struct qstr *this,
25955+int __lookup_one_len(const char *name, struct qstr *this,
25956 struct dentry *base, int len)
25957 {
25958 unsigned long hash;
1308ab2a 25959diff -uprN -x .git linux-2.6.31/fs/splice.c aufs2-2.6.git/fs/splice.c
25960--- linux-2.6.31/fs/splice.c 2009-09-09 22:13:59.000000000 +0000
25961+++ aufs2-2.6.git/fs/splice.c 2009-09-21 21:49:25.471607719 +0000
25962@@ -1057,8 +1057,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
0fc653ad 25963 /*
25964 * Attempt to initiate a splice from pipe to file.
25965 */
25966-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
25967- loff_t *ppos, size_t len, unsigned int flags)
25968+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
25969+ loff_t *ppos, size_t len, unsigned int flags)
25970 {
1308ab2a 25971 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
25972 loff_t *, size_t, unsigned int);
25973@@ -1084,9 +1084,9 @@ static long do_splice_from(struct pipe_i
0fc653ad 25974 /*
25975 * Attempt to initiate a splice from a file to a pipe.
25976 */
25977-static long do_splice_to(struct file *in, loff_t *ppos,
25978- struct pipe_inode_info *pipe, size_t len,
25979- unsigned int flags)
25980+long do_splice_to(struct file *in, loff_t *ppos,
25981+ struct pipe_inode_info *pipe, size_t len,
25982+ unsigned int flags)
25983 {
1308ab2a 25984 ssize_t (*splice_read)(struct file *, loff_t *,
25985 struct pipe_inode_info *, size_t, unsigned int);
25986diff -uprN -x .git linux-2.6.31/include/linux/Kbuild aufs2-2.6.git/include/linux/Kbuild
25987--- linux-2.6.31/include/linux/Kbuild 2009-09-09 22:13:59.000000000 +0000
25988+++ aufs2-2.6.git/include/linux/Kbuild 2009-09-21 21:49:26.084940677 +0000
25989@@ -34,6 +34,7 @@ header-y += atmppp.h
25990 header-y += atmsap.h
25991 header-y += atmsvc.h
25992 header-y += atm_zatm.h
25993+header-y += aufs_type.h
25994 header-y += auto_fs4.h
25995 header-y += ax25.h
25996 header-y += b1lli.h
25997diff -uprN -x .git linux-2.6.31/include/linux/aufs_type.h aufs2-2.6.git/include/linux/aufs_type.h
25998--- linux-2.6.31/include/linux/aufs_type.h 1970-01-01 00:00:00.000000000 +0000
25999+++ aufs2-2.6.git/include/linux/aufs_type.h 2009-09-21 21:49:26.101190816 +0000
26000@@ -0,0 +1,184 @@
1facf9fc 26001+/*
26002+ * Copyright (C) 2005-2009 Junjiro R. Okajima
26003+ *
26004+ * This program, aufs is free software; you can redistribute it and/or modify
26005+ * it under the terms of the GNU General Public License as published by
26006+ * the Free Software Foundation; either version 2 of the License, or
26007+ * (at your option) any later version.
dece6358
AM
26008+ *
26009+ * This program is distributed in the hope that it will be useful,
26010+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26011+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26012+ * GNU General Public License for more details.
26013+ *
26014+ * You should have received a copy of the GNU General Public License
26015+ * along with this program; if not, write to the Free Software
26016+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1facf9fc 26017+ */
26018+
26019+#ifndef __AUFS_TYPE_H__
26020+#define __AUFS_TYPE_H__
26021+
26022+#include <linux/ioctl.h>
1308ab2a 26023+#include <linux/types.h>
1facf9fc 26024+
1308ab2a 26025+#define AUFS_VERSION "2-31"
1facf9fc 26026+
26027+/* todo? move this to linux-2.6.19/include/magic.h */
26028+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
26029+
26030+/* ---------------------------------------------------------------------- */
26031+
26032+#ifdef CONFIG_AUFS_BRANCH_MAX_127
1308ab2a 26033+typedef __s8 aufs_bindex_t;
1facf9fc 26034+#define AUFS_BRANCH_MAX 127
26035+#else
1308ab2a 26036+typedef __s16 aufs_bindex_t;
1facf9fc 26037+#ifdef CONFIG_AUFS_BRANCH_MAX_511
26038+#define AUFS_BRANCH_MAX 511
26039+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
26040+#define AUFS_BRANCH_MAX 1023
26041+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
26042+#define AUFS_BRANCH_MAX 32767
26043+#endif
26044+#endif
26045+
26046+#ifdef __KERNEL__
26047+#ifndef AUFS_BRANCH_MAX
26048+#error unknown CONFIG_AUFS_BRANCH_MAX value
26049+#endif
26050+#endif /* __KERNEL__ */
26051+
26052+/* ---------------------------------------------------------------------- */
26053+
26054+#define AUFS_NAME "aufs"
26055+#define AUFS_FSTYPE AUFS_NAME
26056+
26057+#define AUFS_ROOT_INO 2
26058+#define AUFS_FIRST_INO 11
26059+
26060+#define AUFS_WH_PFX ".wh."
26061+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
26062+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
26063+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
26064+#define AUFS_XINO_TRUNC_INIT 64 /* blocks */
26065+#define AUFS_XINO_TRUNC_STEP 4 /* blocks */
26066+#define AUFS_DIRWH_DEF 3
26067+#define AUFS_RDCACHE_DEF 10 /* seconds */
26068+#define AUFS_RDBLK_DEF 512 /* bytes */
26069+#define AUFS_RDHASH_DEF 32
26070+#define AUFS_WKQ_NAME AUFS_NAME "d"
26071+#define AUFS_NWKQ_DEF 4
26072+#define AUFS_MFS_SECOND_DEF 30 /* seconds */
26073+#define AUFS_PLINK_WARN 100 /* number of plinks */
26074+
26075+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
26076+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
26077+
26078+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
26079+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
26080+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
26081+
26082+/* doubly whiteouted */
26083+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
26084+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
26085+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
26086+
26087+/* branch permission */
26088+#define AUFS_BRPERM_RW "rw"
26089+#define AUFS_BRPERM_RO "ro"
26090+#define AUFS_BRPERM_RR "rr"
26091+#define AUFS_BRPERM_WH "wh"
26092+#define AUFS_BRPERM_NLWH "nolwh"
26093+#define AUFS_BRPERM_ROWH AUFS_BRPERM_RO "+" AUFS_BRPERM_WH
26094+#define AUFS_BRPERM_RRWH AUFS_BRPERM_RR "+" AUFS_BRPERM_WH
26095+#define AUFS_BRPERM_RWNLWH AUFS_BRPERM_RW "+" AUFS_BRPERM_NLWH
26096+
26097+/* ---------------------------------------------------------------------- */
26098+
26099+/* ioctl */
26100+enum {
26101+ AuCtl_PLINK_MAINT,
1308ab2a 26102+ AuCtl_PLINK_CLEAN,
26103+
26104+ /* readdir in userspace */
26105+ AuCtl_RDU,
26106+ AuCtl_RDU_INO
26107+};
26108+
26109+/* borrowed from linux/include/linux/kernel.h */
26110+#ifndef ALIGN
26111+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
26112+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
26113+#endif
26114+
26115+/* borrowed from linux/include/linux/compiler-gcc3.h */
26116+#ifndef __aligned
26117+#define __aligned(x) __attribute__((aligned(x)))
26118+#define __packed __attribute__((packed))
26119+#endif
26120+
26121+struct au_rdu_cookie {
26122+ __u64 h_pos;
26123+ __s16 bindex;
26124+ __u8 flags;
26125+ __u8 pad;
26126+ __u32 generation;
26127+} __aligned(8);
26128+
26129+struct au_rdu_ent {
26130+ __u64 ino;
26131+ __s16 bindex;
26132+ __u8 type;
26133+ __u8 nlen;
26134+ __u8 wh;
26135+ char name[0];
26136+} __aligned(8);
26137+
26138+static inline int au_rdu_len(int nlen)
26139+{
26140+ /* include the terminating NULL */
26141+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
26142+ sizeof(__u64));
26143+}
26144+
26145+union au_rdu_ent_ul {
26146+ struct au_rdu_ent __user *e;
26147+ unsigned long ul;
26148+};
26149+
26150+enum {
26151+ AufsCtlRduV_SZ,
26152+ AufsCtlRduV_SZ_PTR,
26153+ AufsCtlRduV_End
1facf9fc 26154+};
26155+
1308ab2a 26156+struct aufs_rdu {
26157+ /* input */
26158+ union {
26159+ __u64 sz; /* AuCtl_RDU */
26160+ __u64 nent; /* AuCtl_RDU_INO */
26161+ };
26162+ union au_rdu_ent_ul ent;
26163+ __u16 verify[AufsCtlRduV_End];
26164+
26165+ /* input/output */
26166+ __u32 blk;
26167+
26168+ /* output */
26169+ union au_rdu_ent_ul tail;
26170+ /* number of entries which were added in a single call */
26171+ __u64 rent;
26172+ __u8 full;
26173+ __u8 shwh;
26174+
26175+ struct au_rdu_cookie cookie;
26176+} __aligned(8);
26177+
1facf9fc 26178+#define AuCtlType 'A'
26179+#define AUFS_CTL_PLINK_MAINT _IO(AuCtlType, AuCtl_PLINK_MAINT)
26180+#define AUFS_CTL_PLINK_CLEAN _IO(AuCtlType, AuCtl_PLINK_CLEAN)
1308ab2a 26181+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
26182+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
1facf9fc 26183+
26184+#endif /* __AUFS_TYPE_H__ */
1308ab2a 26185diff -uprN -x .git linux-2.6.31/include/linux/namei.h aufs2-2.6.git/include/linux/namei.h
26186--- linux-2.6.31/include/linux/namei.h 2009-09-09 22:13:59.000000000 +0000
26187+++ aufs2-2.6.git/include/linux/namei.h 2009-09-21 21:49:26.484529184 +0000
26188@@ -75,6 +75,9 @@ extern struct file *lookup_instantiate_f
1facf9fc 26189 extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
26190 extern void release_open_intent(struct nameidata *);
26191
26192+extern struct dentry *lookup_hash(struct nameidata *nd);
26193+extern int __lookup_one_len(const char *name, struct qstr *this,
26194+ struct dentry *base, int len);
26195 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
26196 extern struct dentry *lookup_one_noperm(const char *, struct dentry *);
26197
1308ab2a 26198diff -uprN -x .git linux-2.6.31/include/linux/splice.h aufs2-2.6.git/include/linux/splice.h
26199--- linux-2.6.31/include/linux/splice.h 2009-09-09 22:13:59.000000000 +0000
26200+++ aufs2-2.6.git/include/linux/splice.h 2009-09-21 21:49:26.544523817 +0000
26201@@ -82,4 +82,10 @@ extern ssize_t splice_to_pipe(struct pip
1facf9fc 26202 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
26203 splice_direct_actor *);
26204
0fc653ad 26205+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
1facf9fc 26206+ loff_t *ppos, size_t len, unsigned int flags);
0fc653ad 26207+extern long do_splice_to(struct file *in, loff_t *ppos,
1facf9fc 26208+ struct pipe_inode_info *pipe, size_t len,
26209+ unsigned int flags);
26210+
26211 #endif
This page took 3.804292 seconds and 4 git commands to generate.