]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-aufs2.patch
- updated for 2.6.32.56
[packages/kernel.git] / kernel-aufs2.patch
1 diff -uprN -x .git linux-2.6.31/Documentation/ABI/testing/debugfs-aufs aufs2-2.6.git/Documentation/ABI/testing/debugfs-aufs
2 --- linux-2.6.31/Documentation/ABI/testing/debugfs-aufs 1970-01-01 00:00:00.000000000 +0000
3 +++ aufs2-2.6.git/Documentation/ABI/testing/debugfs-aufs        2009-09-14 14:52:35.032396516 +0000
4 @@ -0,0 +1,40 @@
5 +What:          /debug/aufs/si_<id>/
6 +Date:          March 2009
7 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
8 +Description:
9 +               Under /debug/aufs, a directory named si_<id> is created
10 +               per aufs mount, where <id> is a unique id generated
11 +               internally.
12 +
13 +What:          /debug/aufs/si_<id>/xib
14 +Date:          March 2009
15 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
16 +Description:
17 +               It shows the consumed blocks by xib (External Inode Number
18 +               Bitmap), its block size and file size.
19 +               When the aufs mount option 'noxino' is specified, it
20 +               will be empty. About XINO files, see
21 +               Documentation/filesystems/aufs/aufs.5 in detail.
22 +
23 +What:          /debug/aufs/si_<id>/xino0, xino1 ... xinoN
24 +Date:          March 2009
25 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
26 +Description:
27 +               It shows the consumed blocks by xino (External Inode Number
28 +               Translation Table), its link count, block size and file
29 +               size.
30 +               When the aufs mount option 'noxino' is specified, it
31 +               will be empty. About XINO files, see
32 +               Documentation/filesystems/aufs/aufs.5 in detail.
33 +
34 +What:          /debug/aufs/si_<id>/xigen
35 +Date:          March 2009
36 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
37 +Description:
38 +               It shows the consumed blocks by xigen (External Inode
39 +               Generation Table), its block size and file size.
40 +               If CONFIG_AUFS_EXPORT is disabled, this entry will not
41 +               be created.
42 +               When the aufs mount option 'noxino' is specified, it
43 +               will be empty. About XINO files, see
44 +               Documentation/filesystems/aufs/aufs.5 in detail.
45 diff -uprN -x .git linux-2.6.31/Documentation/ABI/testing/sysfs-aufs aufs2-2.6.git/Documentation/ABI/testing/sysfs-aufs
46 --- linux-2.6.31/Documentation/ABI/testing/sysfs-aufs   1970-01-01 00:00:00.000000000 +0000
47 +++ aufs2-2.6.git/Documentation/ABI/testing/sysfs-aufs  2009-09-14 14:52:35.032396516 +0000
48 @@ -0,0 +1,25 @@
49 +What:          /sys/fs/aufs/si_<id>/
50 +Date:          March 2009
51 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
52 +Description:
53 +               Under /sys/fs/aufs, a directory named si_<id> is created
54 +               per aufs mount, where <id> is a unique id generated
55 +               internally.
56 +
57 +What:          /sys/fs/aufs/si_<id>/br0, br1 ... brN
58 +Date:          March 2009
59 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
60 +Description:
61 +               It shows the abolute path of a member directory (which
62 +               is called branch) in aufs, and its permission.
63 +
64 +What:          /sys/fs/aufs/si_<id>/xi_path
65 +Date:          March 2009
66 +Contact:       J. R. Okajima <hooanon05@yahoo.co.jp>
67 +Description:
68 +               It shows the abolute path of XINO (External Inode Number
69 +               Bitmap, Translation Table and Generation Table) file
70 +               even if it is the default path.
71 +               When the aufs mount option 'noxino' is specified, it
72 +               will be empty. About XINO files, see
73 +               Documentation/filesystems/aufs/aufs.5 in detail.
74 diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/README aufs2-2.6.git/Documentation/filesystems/aufs/README
75 --- linux-2.6.31/Documentation/filesystems/aufs/README  1970-01-01 00:00:00.000000000 +0000
76 +++ aufs2-2.6.git/Documentation/filesystems/aufs/README 2009-09-21 21:48:58.761610020 +0000
77 @@ -0,0 +1,342 @@
78 +
79 +Aufs2 -- advanced multi layered unification filesystem version 2
80 +http://aufs.sf.net
81 +Junjiro R. Okajima
82 +
83 +
84 +0. Introduction
85 +----------------------------------------
86 +In the early days, aufs was entirely re-designed and re-implemented
87 +Unionfs Version 1.x series. After many original ideas, approaches,
88 +improvements and implementations, it becomes totally different from
89 +Unionfs while keeping the basic features.
90 +Recently, Unionfs Version 2.x series begin taking some of the same
91 +approaches to aufs1's.
92 +Unionfs is being developed by Professor Erez Zadok at Stony Brook
93 +University and his team.
94 +
95 +This version of AUFS, aufs2 has several purposes.
96 +- to be reviewed easily and widely.
97 +- to make the source files simpler and smaller by dropping several
98 +  original features.
99 +
100 +Through this work, I found some bad things in aufs1 source code and
101 +fixed them. Some of the dropped features will be reverted in the future,
102 +but not all I'm afraid.
103 +Aufs2 supports linux-2.6.27 and later. If you want older kernel version
104 +support, try aufs1 from CVS on SourceForge.
105 +
106 +Note: it becomes clear that "Aufs was rejected. Let's give it up."
107 +According to Christoph Hellwig, linux rejects all union-type filesystems
108 +but UnionMount.
109 +<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
110 +
111 +
112 +1. Features
113 +----------------------------------------
114 +- unite several directories into a single virtual filesystem. The member
115 +  directory is called as a branch.
116 +- you can specify the permission flags to the branch, which are 'readonly',
117 +  'readwrite' and 'whiteout-able.'
118 +- by upper writable branch, internal copyup and whiteout, files/dirs on
119 +  readonly branch are modifiable logically.
120 +- dynamic branch manipulation, add, del.
121 +- etc...
122 +
123 +Also there are many enhancements in aufs1, such as:
124 +- readdir(3) in userspace.
125 +- keep inode number by external inode number table
126 +- keep the timestamps of file/dir in internal copyup operation
127 +- seekable directory, supporting NFS readdir.
128 +- support mmap(2) including /proc/PID/exe symlink, without page-copy
129 +- whiteout is hardlinked in order to reduce the consumption of inodes
130 +  on branch
131 +- do not copyup, nor create a whiteout when it is unnecessary
132 +- revert a single systemcall when an error occurs in aufs
133 +- remount interface instead of ioctl
134 +- maintain /etc/mtab by an external command, /sbin/mount.aufs.
135 +- loopback mounted filesystem as a branch
136 +- kernel thread for removing the dir who has a plenty of whiteouts
137 +- support copyup sparse file (a file which has a 'hole' in it)
138 +- default permission flags for branches
139 +- selectable permission flags for ro branch, whether whiteout can
140 +  exist or not
141 +- export via NFS.
142 +- support <sysfs>/fs/aufs and <debugfs>/aufs.
143 +- support multiple writable branches, some policies to select one
144 +  among multiple writable branches.
145 +- a new semantics for link(2) and rename(2) to support multiple
146 +  writable branches.
147 +- no glibc changes are required.
148 +- pseudo hardlink (hardlink over branches)
149 +- allow a direct access manually to a file on branch, e.g. bypassing aufs.
150 +  including NFS or remote filesystem branch.
151 +- and more...
152 +
153 +Currently these features are dropped temporary from this version, aufs2.
154 +See design/08plan.txt in detail.
155 +- test only the highest one for the directory permission (dirperm1)
156 +- show whiteout mode (shwh)
157 +- copyup on open (coo=)
158 +- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
159 +  (robr)
160 +- statistics of aufs thread (/sys/fs/aufs/stat)
161 +- delegation mode (dlgt)
162 +  a delegation of the internal branch access to support task I/O
163 +  accounting, which also supports Linux Security Modules (LSM) mainly
164 +  for Suse AppArmor.
165 +- intent.open/create (file open in a single lookup)
166 +
167 +Features or just an idea in the future (see also design/*.txt),
168 +- reorder the branch index without del/re-add.
169 +- permanent xino files for NFSD
170 +- an option for refreshing the opened files after add/del branches
171 +- 'move' policy for copy-up between two writable branches, after
172 +  checking free space.
173 +- O_DIRECT
174 +- light version, without branch manipulation. (unnecessary?)
175 +- copyup in userspace
176 +- inotify in userspace
177 +- readv/writev
178 +- xattr, acl
179 +
180 +
181 +2. Download
182 +----------------------------------------
183 +Kindly one of aufs user, the Center for Scientific Computing and Free
184 +Software (C3SL), Federal University of Parana offered me a public GIT
185 +tree space.
186 +
187 +There are three GIT trees, aufs2-2.6, aufs2-standalone and aufs2-util.
188 +While the aufs2-util is always necessary, you need either of aufs2-2.6
189 +or aufs2-standalone.
190 +
191 +The aufs2-2.6 tree includes the whole linux-2.6 GIT tree,
192 +git://git.kernel.org/.../torvalds/linux-2.6.git.
193 +And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
194 +build aufs2 as an externel kernel module.
195 +If you already have linux-2.6 GIT tree, you may want to pull and merge
196 +the "aufs2" branch from this tree.
197 +
198 +On the other hand, the aufs2-standalone tree has only aufs2 source files
199 +and a necessary patch, and you can select CONFIG_AUFS_FS=m. In other
200 +words, the aufs2-standalone tree is generated from aufs2-2.6 tree by,
201 +- extract new files and modifications.
202 +- generate some patch files from modifications.
203 +- generate a ChangeLog file from git-log.
204 +- commit the files newly and no log messages. this is not git-pull.
205 +
206 +Both of aufs2-2.6 and aufs2-standalone trees have a branch whose name is
207 +in form of "aufs2-xx" where "xx" represents the linux kernel version,
208 +"linux-2.6.xx".
209 +
210 +o aufs2-2.6 tree
211 +$ git clone --reference /your/linux-2.6/git/tree \
212 +       http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git \
213 +       aufs2-2.6.git
214 +- if you don't have linux-2.6 GIT tree, then remove "--reference ..."
215 +$ cd aufs2-2.6.git
216 +$ git checkout origin/aufs2-xx # for instance, aufs2-27 for linux-2.6.27
217 +                               # aufs2 (no -xx) for the latest -rc version.
218 +
219 +o aufs2-standalone tree
220 +$ git clone http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-standalone.git \
221 +       aufs2-standalone.git
222 +$ cd aufs2-standalone.git
223 +$ git checkout origin/aufs2-xx # for instance, aufs2-27 for linux-2.6.27
224 +                               # aufs2 (no -xx) for the latest -rc version.
225 +
226 +o aufs2-util tree
227 +$ git clone http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-util.git \
228 +       aufs2-util.git
229 +$ cd aufs2-util.git
230 +- no particular tag/branch currently.
231 +
232 +o for advanced users
233 +$ git clone git://git.kernel.org/.../torvalds/linux-2.6.git linux-2.6.git
234 +  It will take very long time.
235 +
236 +$ cd linux-2.6.git
237 +$ git remote add aufs2 http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git
238 +$ git checkout -b aufs2-27 v2.6.27
239 +$ git pull aufs2 aufs2-27
240 +  It may take long time again.
241 +  Once pulling completes, you've got linux-2.6.27 and aufs2 for it in a
242 +  branch named aufs2-27, and you can configure and build it.
243 +
244 +Or
245 +
246 +$ git checkout -t -b aufs2 master
247 +$ git pull aufs2 aufs2
248 +  then you've got the latest linux kernel and the latest aufs2 in a
249 +  branch named aufs2, and you can configure and build it.
250 +  But aufs is released once a week, so you may meet a compilation error
251 +  due to mismatching between the mainline and aufs2.
252 +
253 +Or you may want build linux-2.6.xx.yy instead of linux-2.6.xx, then here
254 +is an approach using linux-2.6-stable GIT tree.
255 +
256 +$ cd linux-2.6.git/..
257 +$ git clone -q --reference ./linux-2.6.git git://git.kernel.org/.../linux-2.6-stable.git \
258 +       linux-2.6-stable.git
259 +  It will take very long time.
260 +
261 +$ cd linux-2.6-stable.git
262 +$ git remote add aufs2 http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git
263 +$ git checkout -b aufs2-27.1 v2.6.27.1
264 +$ git pull aufs2 aufs2-27
265 +  then you've got linux-2.6.27.1 and aufs2 for 2.6.27 in a branch named
266 +  aufs2-27.1, and you can configure and build it.
267 +  But the changes made by v2.6.xx.yy may conflict with aufs2-xx, since
268 +  aufs2-xx is for v2.6.xx only. In this case, you may find some patchces
269 +  for v2.6.xx.yy in aufs2-standalone.git#aufs2-xx branch if someone else
270 +  have ever requested me to support v2.6.xx.yy and I did it.
271 +
272 +You can also check what was changed by pulling aufs2.
273 +$ git diff v2.6.27.1..aufs2-27.1
274 +
275 +If you want to check the changed files other than fs/aufs, then try this.
276 +$ git diff v2.6.27.1..aufs2-27.1 |
277 +> awk '
278 +> /^diff / {new=1}
279 +> /^diff.*aufs/ {new=0}
280 +> new {print}
281 +> '
282 +
283 +
284 +3. Configuration and Compilation
285 +----------------------------------------
286 +For aufs2-2.6 tree,
287 +- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS.
288 +- set other aufs configurations if necessary.
289 +
290 +For aufs2-standalone tree,
291 +There are several ways to build.
292 +
293 +You may feel why aufs2-standalone.patch needs to export so many kernel
294 +symbols. Because you selected aufs2-standalone tree instead of aufs2-2.6
295 +tree. The number of necessary symbols to export essentially is zero.
296 +All other symbols are for the external module.
297 +If you don't like aufs2-standalone.patch, then try aufs2-2.6 tree.
298 +
299 +1.
300 +- apply ./aufs2-kbuild.patch to your kernel source files.
301 +- apply ./aufs2-base.patch too.
302 +- apply ./aufs2-standalone.patch too, if you have a plan to set
303 +  CONFIG_AUFS_FS=m. otherwise you don't need ./aufs2-standalone.patch.
304 +- copy ./{Documentation,fs,include} files to your kernel source tree.
305 +- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS, you can select either
306 +  =m or =y.
307 +- and build your kernel as usual.
308 +- install it and reboot your system.
309 +
310 +2.
311 +- module only (CONFIG_AUFS_FS=m).
312 +- apply ./aufs2-base.patch to your kernel source files.
313 +- apply ./aufs2-standalone.patch too.
314 +- build your kernel and reboot.
315 +- edit ./config.mk and set other aufs configurations if necessary.
316 +  Note: You should read ./fs/aufs/Kconfig carefully which describes
317 +  every aufs configurations.
318 +- build the module by simple "make".
319 +- you can specify ${KDIR} make variable which points to your kernel
320 +  source tree.
321 +- copy the build ./aufs.ko to /lib/modules/..., and run depmod -a (or
322 +  reboot simply).
323 +- no need to apply aufs2-kbuild.patch, nor copying source files to your
324 +  kernel source tree.
325 +
326 +And then,
327 +- read README in aufs2-util, build and install it
328 +- if you want to use readdir(3) in userspace, then run
329 +  "make install_ulib" too. And refer to the aufs manual in detail.
330 +
331 +
332 +4. Usage
333 +----------------------------------------
334 +At first, make sure aufs2-util are installed, and please read the aufs
335 +manual, aufs.5 in aufs2-util.git tree.
336 +$ man -l aufs.5
337 +
338 +And then,
339 +$ mkdir /tmp/rw /tmp/aufs
340 +# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
341 +
342 +Here is another example. The result is equivalent.
343 +# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
344 +  Or
345 +# mount -t aufs -o br:/tmp/rw none /tmp/aufs
346 +# mount -o remount,append:${HOME} /tmp/aufs
347 +
348 +Then, you can see whole tree of your home dir through /tmp/aufs. If
349 +you modify a file under /tmp/aufs, the one on your home directory is
350 +not affected, instead the same named file will be newly created under
351 +/tmp/rw. And all of your modification to a file will be applied to
352 +the one under /tmp/rw. This is called the file based Copy on Write
353 +(COW) method.
354 +Aufs mount options are described in aufs.5.
355 +
356 +Additionally, there are some sample usages of aufs which are a
357 +diskless system with network booting, and LiveCD over NFS.
358 +See sample dir in CVS tree on SourceForge.
359 +
360 +
361 +5. Contact
362 +----------------------------------------
363 +When you have any problems or strange behaviour in aufs, please let me
364 +know with:
365 +- /proc/mounts (instead of the output of mount(8))
366 +- /sys/module/aufs/*
367 +- /sys/fs/aufs/* (if you have them)
368 +- /debug/aufs/* (if you have them)
369 +- linux kernel version
370 +  if your kernel is not plain, for example modified by distributor,
371 +  the url where i can download its source is necessary too.
372 +- aufs version which was printed at loading the module or booting the
373 +  system, instead of the date you downloaded.
374 +- configuration (define/undefine CONFIG_AUFS_xxx)
375 +- kernel configuration or /proc/config.gz (if you have it)
376 +- behaviour which you think to be incorrect
377 +- actual operation, reproducible one is better
378 +- mailto: aufs-users at lists.sourceforge.net
379 +
380 +Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
381 +and Feature Requests) on SourceForge. Please join and write to
382 +aufs-users ML.
383 +
384 +
385 +6. Acknowledgements
386 +----------------------------------------
387 +Thanks to everyone who have tried and are using aufs, whoever
388 +have reported a bug or any feedback.
389 +
390 +Especially donors:
391 +Tomas Matejicek(slax.org) made a donation (much more than once).
392 +Dai Itasaka made a donation (2007/8).
393 +Chuck Smith made a donation (2008/4, 10 and 12).
394 +Henk Schoneveld made a donation (2008/9).
395 +Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
396 +Francois Dupoux made a donation (2008/11).
397 +Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
398 +aufs2 GIT tree (2009/2).
399 +William Grant made a donation (2009/3).
400 +Patrick Lane made a donation (2009/4).
401 +The Mail Archive (mail-archive.com) made donations (2009/5).
402 +Nippy Networks (Ed Wildgoose) a donation (2009/7).
403 +
404 +Thank you very much.
405 +Donations are always, including future donations, very important and
406 +helpful for me to keep on developing aufs.
407 +
408 +
409 +7.
410 +----------------------------------------
411 +If you are an experienced user, no explanation is needed. Aufs is
412 +just a linux filesystem.
413 +
414 +
415 +Enjoy!
416 +
417 +# Local variables: ;
418 +# mode: text;
419 +# End: ;
420 diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/01intro.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/01intro.txt
421 --- linux-2.6.31/Documentation/filesystems/aufs/design/01intro.txt      1970-01-01 00:00:00.000000000 +0000
422 +++ aufs2-2.6.git/Documentation/filesystems/aufs/design/01intro.txt     2009-09-21 21:48:58.761610020 +0000
423 @@ -0,0 +1,137 @@
424 +
425 +# Copyright (C) 2005-2009 Junjiro R. Okajima
426 +# 
427 +# This program is free software; you can redistribute it and/or modify
428 +# it under the terms of the GNU General Public License as published by
429 +# the Free Software Foundation; either version 2 of the License, or
430 +# (at your option) any later version.
431 +# 
432 +# This program is distributed in the hope that it will be useful,
433 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
434 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
435 +# GNU General Public License for more details.
436 +# 
437 +# You should have received a copy of the GNU General Public License
438 +# along with this program; if not, write to the Free Software
439 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
440 +
441 +Introduction
442 +----------------------------------------
443 +
444 +aufs [ei ju: ef es] | [a u f s]
445 +1. abbrev. for "advanced multi-layered unification filesystem".
446 +2. abbrev. for "another unionfs".
447 +3. abbrev. for "auf das" in German which means "on the" in English.
448 +   Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
449 +   But "Filesystem aufs Filesystem" is hard to understand.
450 +
451 +AUFS is a filesystem with features:
452 +- multi layered stackable unification filesystem, the member directory
453 +  is called as a branch.
454 +- branch permission and attribute, 'readonly', 'real-readonly',
455 +  'readwrite', 'whiteout-able', 'link-able whiteout' and their
456 +  combination.
457 +- internal "file copy-on-write".
458 +- logical deletion, whiteout.
459 +- dynamic branch manipulation, adding, deleting and changing permission.
460 +- allow bypassing aufs, user's direct branch access.
461 +- external inode number translation table and bitmap which maintains the
462 +  persistent aufs inode number.
463 +- seekable directory, including NFS readdir.
464 +- file mapping, mmap and sharing pages.
465 +- pseudo-link, hardlink over branches.
466 +- loopback mounted filesystem as a branch.
467 +- several policies to select one among multiple writable branches.
468 +- revert a single systemcall when an error occurs in aufs.
469 +- and more...
470 +
471 +
472 +Multi Layered Stackable Unification Filesystem
473 +----------------------------------------------------------------------
474 +Most people already knows what it is.
475 +It is a filesystem which unifies several directories and provides a
476 +merged single directory. When users access a file, the access will be
477 +passed/re-directed/converted (sorry, I am not sure which English word is
478 +correct) to the real file on the member filesystem. The member
479 +filesystem is called 'lower filesystem' or 'branch' and has a mode
480 +'readonly' and 'readwrite.' And the deletion for a file on the lower
481 +readonly branch is handled by creating 'whiteout' on the upper writable
482 +branch.
483 +
484 +On LKML, there have been discussions about UnionMount (Jan Blunck and
485 +Bharata B Rao) and Unionfs (Erez Zadok). They took different approaches
486 +to implement the merged-view.
487 +The former tries putting it into VFS, and the latter implements as a
488 +separate filesystem.
489 +(If I misunderstand about these implementations, please let me know and
490 +I shall correct it. Because it is a long time ago when I read their
491 +source files last time).
492 +UnionMount's approach will be able to small, but may be hard to share
493 +branches between several UnionMount since the whiteout in it is
494 +implemented in the inode on branch filesystem and always
495 +shared. According to Bharata's post, readdir does not seems to be
496 +finished yet.
497 +Unionfs has a longer history. When I started implementing a stacking filesystem
498 +(Aug 2005), it already existed. It has virtual super_block, inode,
499 +dentry and file objects and they have an array pointing lower same kind
500 +objects. After contributing many patches for Unionfs, I re-started my
501 +project AUFS (Jun 2006).
502 +
503 +In AUFS, the structure of filesystem resembles to Unionfs, but I
504 +implemented my own ideas, approaches and enhancements and it became
505 +totally different one.
506 +
507 +
508 +Several characters/aspects of aufs
509 +----------------------------------------------------------------------
510 +
511 +Aufs has several characters or aspects.
512 +1. a filesystem, callee of VFS helper
513 +2. sub-VFS, caller of VFS helper for branches
514 +3. a virtual filesystem which maintains persistent inode number
515 +4. reader/writer of files on branches such like an application
516 +
517 +1. Caller of VFS Helper
518 +As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
519 +unlink(2) from an application reaches sys_unlink() kernel function and
520 +then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
521 +calls filesystem specific unlink operation. Actually aufs implements the
522 +unlink operation but it behaves like a redirector.
523 +
524 +2. Caller of VFS Helper for Branches
525 +aufs_unlink() passes the unlink request to the branch filesystem as if
526 +it were called from VFS. So the called unlink operation of the branch
527 +filesystem acts as usual. As a caller of VFS helper, aufs should handle
528 +every necessary pre/post operation for the branch filesystem.
529 +- acquire the lock for the parent dir on a branch
530 +- lookup in a branch
531 +- revalidate dentry on a branch
532 +- mnt_want_write() for a branch
533 +- vfs_unlink() for a branch
534 +- mnt_drop_write() for a branch
535 +- release the lock on a branch
536 +
537 +3. Persistent Inode Number
538 +One of the most important issue for a filesystem is to maintain inode
539 +numbers. This is particularly important to support exporting a
540 +filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
541 +backend block device for its own. But some storage is necessary to
542 +maintain inode number. It may be a large space and may not suit to keep
543 +in memory. Aufs rents some space from its first writable branch
544 +filesystem (by default) and creates file(s) on it. These files are
545 +created by aufs internally and removed soon (currently) keeping opened.
546 +Note: Because these files are removed, they are totally gone after
547 +      unmounting aufs. It means the inode numbers are not persistent
548 +      across unmount or reboot. I have a plan to make them really
549 +      persistent which will be important for aufs on NFS server.
550 +
551 +4. Read/Write Files Internally (copy-on-write)
552 +Because a branch can be readonly, when you write a file on it, aufs will
553 +"copy-up" it to the upper writable branch internally. And then write the
554 +originally requested thing to the file. Generally kernel doesn't
555 +open/read/write file actively. In aufs, even a single write may cause a
556 +internal "file copy". This behaviour is very similar to cp(1) command.
557 +
558 +Some people may think it is better to pass such work to user space
559 +helper, instead of doing in kernel space. Actually I am still thinking
560 +about it. But currently I have implemented it in kernel space.
561 diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/02struct.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/02struct.txt
562 --- linux-2.6.31/Documentation/filesystems/aufs/design/02struct.txt     1970-01-01 00:00:00.000000000 +0000
563 +++ aufs2-2.6.git/Documentation/filesystems/aufs/design/02struct.txt    2009-09-21 21:48:58.761610020 +0000
564 @@ -0,0 +1,218 @@
565 +
566 +# Copyright (C) 2005-2009 Junjiro R. Okajima
567 +# 
568 +# This program is free software; you can redistribute it and/or modify
569 +# it under the terms of the GNU General Public License as published by
570 +# the Free Software Foundation; either version 2 of the License, or
571 +# (at your option) any later version.
572 +# 
573 +# This program is distributed in the hope that it will be useful,
574 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
575 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
576 +# GNU General Public License for more details.
577 +# 
578 +# You should have received a copy of the GNU General Public License
579 +# along with this program; if not, write to the Free Software
580 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
581 +
582 +Basic Aufs Internal Structure
583 +
584 +Superblock/Inode/Dentry/File Objects
585 +----------------------------------------------------------------------
586 +As like an ordinary filesystem, aufs has its own
587 +superblock/inode/dentry/file objects. All these objects have a
588 +dynamically allocated array and store the same kind of pointers to the
589 +lower filesystem, branch.
590 +For example, when you build a union with one readwrite branch and one
591 +readonly, mounted /au, /rw and /ro respectively.
592 +- /au = /rw + /ro
593 +- /ro/fileA exists but /rw/fileA
594 +
595 +Aufs lookup operation finds /ro/fileA and gets dentry for that. These
596 +pointers are stored in a aufs dentry. The array in aufs dentry will be,
597 +- [0] = NULL
598 +- [1] = /ro/fileA
599 +
600 +This style of an array is essentially same to the aufs
601 +superblock/inode/dentry/file objects.
602 +
603 +Because aufs supports manipulating branches, ie. add/delete/change
604 +dynamically, these objects has its own generation. When branches are
605 +changed, the generation in aufs superblock is incremented. And a
606 +generation in other object are compared when it is accessed.
607 +When a generation in other objects are obsoleted, aufs refreshes the
608 +internal array.
609 +
610 +
611 +Superblock
612 +----------------------------------------------------------------------
613 +Additionally aufs superblock has some data for policies to select one
614 +among multiple writable branches, XIB files, pseudo-links and kobject.
615 +See below in detail.
616 +About the policies which supports copy-down a directory, see policy.txt
617 +too.
618 +
619 +
620 +Branch and XINO(External Inode Number Translation Table)
621 +----------------------------------------------------------------------
622 +Every branch has its own xino (external inode number translation table)
623 +file. The xino file is created and unlinked by aufs internally. When two
624 +members of a union exist on the same filesystem, they share the single
625 +xino file.
626 +The struct of a xino file is simple, just a sequence of aufs inode
627 +numbers which is indexed by the lower inode number.
628 +In the above sample, assume the inode number of /ro/fileA is i111 and
629 +aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
630 +4(8) bytes at 111 * 4(8) bytes offset in the xino file.
631 +
632 +When the inode numbers are not contiguous, the xino file will be sparse
633 +which has a hole in it and doesn't consume as much disk space as it
634 +might appear. If your branch filesystem consumes disk space for such
635 +holes, then you should specify 'xino=' option at mounting aufs.
636 +
637 +Also a writable branch has three kinds of "whiteout bases". All these
638 +are existed when the branch is joined to aufs and the names are
639 +whiteout-ed doubly, so that users will never see their names in aufs
640 +hierarchy.
641 +1. a regular file which will be linked to all whiteouts.
642 +2. a directory to store a pseudo-link.
643 +3. a directory to store an "orphan-ed" file temporary.
644 +
645 +1. Whiteout Base
646 +   When you remove a file on a readonly branch, aufs handles it as a
647 +   logical deletion and creates a whiteout on the upper writable branch
648 +   as a hardlink of this file in order not to consume inode on the
649 +   writable branch.
650 +2. Pseudo-link Dir
651 +   See below, Pseudo-link.
652 +3. Step-Parent Dir
653 +   When "fileC" exists on the lower readonly branch only and it is
654 +   opened and removed with its parent dir, and then user writes
655 +   something into it, then aufs copies-up fileC to this
656 +   directory. Because there is no other dir to store fileC. After
657 +   creating a file under this dir, the file is unlinked.
658 +
659 +Because aufs supports manipulating branches, ie. add/delete/change
660 +dynamically, a branch has its own id. When the branch order changes, aufs
661 +finds the new index by searching the branch id.
662 +
663 +
664 +Pseudo-link
665 +----------------------------------------------------------------------
666 +Assume "fileA" exists on the lower readonly branch only and it is
667 +hardlinked to "fileB" on the branch. When you write something to fileA,
668 +aufs copies-up it to the upper writable branch. Additionally aufs
669 +creates a hardlink under the Pseudo-link Directory of the writable
670 +branch. The inode of a pseudo-link is kept in aufs super_block as a
671 +simple list. If fileB is read after unlinking fileA, aufs returns
672 +filedata from the pseudo-link instead of the lower readonly
673 +branch. Because the pseudo-link is based upon the inode, to keep the
674 +inode number by xino (see above) is important.
675 +
676 +All the hardlinks under the Pseudo-link Directory of the writable branch
677 +should be restored in a proper location later. Aufs provides a utility
678 +to do this. The userspace helpers executed at remounting and unmounting
679 +aufs by default.
680 +
681 +
682 +XIB(external inode number bitmap)
683 +----------------------------------------------------------------------
684 +Addition to the xino file per a branch, aufs has an external inode number
685 +bitmap in a superblock object. It is also a file such like a xino file.
686 +It is a simple bitmap to mark whether the aufs inode number is in-use or
687 +not.
688 +To reduce the file I/O, aufs prepares a single memory page to cache xib.
689 +
690 +Aufs implements a feature to truncate/refresh both of xino and xib to
691 +reduce the number of consumed disk blocks for these files.
692 +
693 +
694 +Virtual or Vertical Dir
695 +----------------------------------------------------------------------
696 +In order to support multiple layers (branches), aufs readdir operation
697 +constructs a virtual dir block on memory. For readdir, aufs calls
698 +vfs_readdir() internally for each dir on branches, merges their entries
699 +with eliminating the whiteout-ed ones, and sets it to file (dir)
700 +object. So the file object has its entry list until it is closed. The
701 +entry list will be updated when the file position is zero and becomes
702 +old. This decision is made in aufs automatically.
703 +
704 +The dynamically allocated memory block for the name of entries has a
705 +unit of 512 bytes (by default) and stores the names contiguously (no
706 +padding). Another block for each entry is handled by kmem_cache too.
707 +During building dir blocks, aufs creates hash list and judging whether
708 +the entry is whiteouted by its upper branch or already listed.
709 +
710 +Some people may call it can be a security hole or invite DoS attack
711 +since the opened and once readdir-ed dir (file object) holds its entry
712 +list and becomes a pressure for system memory. But I'd say it is similar
713 +to files under /proc or /sys. The virtual files in them also holds a
714 +memory page (generally) while they are opened. When an idea to reduce
715 +memory for them is introduced, it will be applied to aufs too.
716 +For those who really hate this situation, I've developed readdir(3)
717 +library which operates this merging in userspace. You just need to set
718 +LD_PRELOAD environment variable, and aufs will not consume no memory in
719 +kernel space for readdir(3).
720 +
721 +
722 +Workqueue
723 +----------------------------------------------------------------------
724 +Aufs sometimes requires privilege access to a branch. For instance,
725 +in copy-up/down operation. When a user process is going to make changes
726 +to a file which exists in the lower readonly branch only, and the mode
727 +of one of ancestor directories may not be writable by a user
728 +process. Here aufs copy-up the file with its ancestors and they may
729 +require privilege to set its owner/group/mode/etc.
730 +This is a typical case of a application character of aufs (see
731 +Introduction).
732 +
733 +Aufs uses workqueue synchronously for this case. It creates its own
734 +workqueue. The workqueue is a kernel thread and has privilege. Aufs
735 +passes the request to call mkdir or write (for example), and wait for
736 +its completion. This approach solves a problem of a signal handler
737 +simply.
738 +If aufs didn't adopt the workqueue and changed the privilege of the
739 +process, and if the mkdir/write call arises SIGXFSZ or other signal,
740 +then the user process might gain a privilege or the generated core file
741 +was owned by a superuser. But I have a plan to switch to a new
742 +credential approach which will be introduced in linux-2.6.29.
743 +
744 +Also aufs uses the system global workqueue ("events" kernel thread) too
745 +for asynchronous tasks, such like handling inotify, re-creating a
746 +whiteout base and etc. This is unrelated to a privilege.
747 +Most of aufs operation tries acquiring a rw_semaphore for aufs
748 +superblock at the beginning, at the same time waits for the completion
749 +of all queued asynchronous tasks.
750 +
751 +
752 +Whiteout
753 +----------------------------------------------------------------------
754 +The whiteout in aufs is very similar to Unionfs's. That is represented
755 +by its filename. UnionMount takes an approach of a file mode, but I am
756 +afraid several utilities (find(1) or something) will have to support it.
757 +
758 +Basically the whiteout represents "logical deletion" which stops aufs to
759 +lookup further, but also it represents "dir is opaque" which also stop
760 +lookup.
761 +
762 +In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
763 +In order to make several functions in a single systemcall to be
764 +revertible, aufs adopts an approach to rename a directory to a temporary
765 +unique whiteouted name.
766 +For example, in rename(2) dir where the target dir already existed, aufs
767 +renames the target dir to a temporary unique whiteouted name before the
768 +actual rename on a branch and then handles other actions (make it opaque,
769 +update the attributes, etc). If an error happens in these actions, aufs
770 +simply renames the whiteouted name back and returns an error. If all are
771 +succeeded, aufs registers a function to remove the whiteouted unique
772 +temporary name completely and asynchronously to the system global
773 +workqueue.
774 +
775 +
776 +Copy-up
777 +----------------------------------------------------------------------
778 +It is a well-known feature or concept.
779 +When user modifies a file on a readonly branch, aufs operate "copy-up"
780 +internally and makes change to the new file on the upper writable branch.
781 +When the trigger systemcall does not update the timestamps of the parent
782 +dir, aufs reverts it after copy-up.
783 diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/03lookup.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/03lookup.txt
784 --- linux-2.6.31/Documentation/filesystems/aufs/design/03lookup.txt     1970-01-01 00:00:00.000000000 +0000
785 +++ aufs2-2.6.git/Documentation/filesystems/aufs/design/03lookup.txt    2009-09-21 21:48:58.761610020 +0000
786 @@ -0,0 +1,104 @@
787 +
788 +# Copyright (C) 2005-2009 Junjiro R. Okajima
789 +# 
790 +# This program is free software; you can redistribute it and/or modify
791 +# it under the terms of the GNU General Public License as published by
792 +# the Free Software Foundation; either version 2 of the License, or
793 +# (at your option) any later version.
794 +# 
795 +# This program is distributed in the hope that it will be useful,
796 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
797 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
798 +# GNU General Public License for more details.
799 +# 
800 +# You should have received a copy of the GNU General Public License
801 +# along with this program; if not, write to the Free Software
802 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
803 +
804 +Lookup in a Branch
805 +----------------------------------------------------------------------
806 +Since aufs has a character of sub-VFS (see Introduction), it operates
807 +lookup for branches as VFS does. It may be a heavy work. Generally
808 +speaking struct nameidata is a bigger structure and includes many
809 +information. But almost all lookup operation in aufs is the simplest
810 +case, ie. lookup only an entry directly connected to its parent. Digging
811 +down the directory hierarchy is unnecessary.
812 +
813 +VFS has a function lookup_one_len() for that use, but it is not usable
814 +for a branch filesystem which requires struct nameidata. So aufs
815 +implements a simple lookup wrapper function. When a branch filesystem
816 +allows NULL as nameidata, it calls lookup_one_len(). Otherwise it builds
817 +a simplest nameidata and calls lookup_hash().
818 +Here aufs applies "a principle in NFSD", ie. if the filesystem supports
819 +NFS-export, then it has to support NULL as a nameidata parameter for
820 +->create(), ->lookup() and ->d_revalidate(). So the lookup wrapper in
821 +aufs tests if ->s_export_op in the branch is NULL or not.
822 +
823 +When a branch is a remote filesystem, aufs trusts its ->d_revalidate().
824 +For d_revalidate, aufs implements three levels of revalidate tests. See
825 +"Revalidate Dentry and UDBA" in detail.
826 +
827 +
828 +Loopback Mount
829 +----------------------------------------------------------------------
830 +Basically aufs supports any type of filesystem and block device for a
831 +branch (actually there are some exceptions). But it is prohibited to add
832 +a loopback mounted one whose backend file exists in a filesystem which is
833 +already added to aufs. The reason is to protect aufs from a recursive
834 +lookup. If it was allowed, the aufs lookup operation might re-enter a
835 +lookup for the loopback mounted branch in the same context, and will
836 +cause a deadlock.
837 +
838 +
839 +Revalidate Dentry and UDBA (User's Direct Branch Access)
840 +----------------------------------------------------------------------
841 +Generally VFS helpers re-validate a dentry as a part of lookup.
842 +0. digging down the directory hierarchy.
843 +1. lock the parent dir by its i_mutex.
844 +2. lookup the final (child) entry.
845 +3. revalidate it.
846 +4. call the actual operation (create, unlink, etc.)
847 +5. unlock the parent dir
848 +
849 +If the filesystem implements its ->d_revalidate() (step 3), then it is
850 +called. Actually aufs implements it and checks the dentry on a branch is
851 +still valid.
852 +But it is not enough. Because aufs has to release the lock for the
853 +parent dir on a branch at the end of ->lookup() (step 2) and
854 +->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
855 +held by VFS.
856 +If the file on a branch is changed directly, eg. bypassing aufs, after
857 +aufs released the lock, then the subsequent operation may cause
858 +something unpleasant result.
859 +
860 +This situation is a result of VFS architecture, ->lookup() and
861 +->d_revalidate() is separated. But I never say it is wrong. It is a good
862 +design from VFS's point of view. It is just not suitable for sub-VFS
863 +character in aufs.
864 +
865 +Aufs supports such case by three level of revalidation which is
866 +selectable by user.
867 +1. Simple Revalidate
868 +   Addition to the native flow in VFS's, confirm the child-parent
869 +   relationship on the branch just after locking the parent dir on the
870 +   branch in the "actual operation" (step 4). When this validation
871 +   fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
872 +   checks the validation of the dentry on branches.
873 +2. Monitor Changes Internally by Inotify
874 +   Addition to above, in the "actual operation" (step 4) aufs re-lookup
875 +   the dentry on the branch, and returns EBUSY if it finds different
876 +   dentry.
877 +   Additionally, aufs sets the inotify watch for every dir on branches
878 +   during it is in cache. When the event is notified, aufs registers a
879 +   function to kernel 'events' thread by schedule_work(). And the
880 +   function sets some special status to the cached aufs dentry and inode
881 +   private data. If they are not cached, then aufs has nothing to
882 +   do. When the same file is accessed through aufs (step 0-3) later,
883 +   aufs will detect the status and refresh all necessary data.
884 +   In this mode, aufs has to ignore the event which is fired by aufs
885 +   itself.
886 +3. No Extra Validation
887 +   This is the simplest test and doesn't add any additional revalidation
888 +   test, and skip therevalidatin in step 4. It is useful and improves
889 +   aufs performance when system surely hide the aufs branches from user,
890 +   by over-mounting something (or another method).
891 diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/04branch.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/04branch.txt
892 --- linux-2.6.31/Documentation/filesystems/aufs/design/04branch.txt     1970-01-01 00:00:00.000000000 +0000
893 +++ aufs2-2.6.git/Documentation/filesystems/aufs/design/04branch.txt    2009-09-21 21:48:58.761610020 +0000
894 @@ -0,0 +1,76 @@
895 +
896 +# Copyright (C) 2005-2009 Junjiro R. Okajima
897 +# 
898 +# This program is free software; you can redistribute it and/or modify
899 +# it under the terms of the GNU General Public License as published by
900 +# the Free Software Foundation; either version 2 of the License, or
901 +# (at your option) any later version.
902 +# 
903 +# This program is distributed in the hope that it will be useful,
904 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
905 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
906 +# GNU General Public License for more details.
907 +# 
908 +# You should have received a copy of the GNU General Public License
909 +# along with this program; if not, write to the Free Software
910 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
911 +
912 +Branch Manipulation
913 +
914 +Since aufs supports dynamic branch manipulation, ie. add/remove a branch
915 +and changing its permission/attribute, there are a lot of works to do.
916 +
917 +
918 +Add a Branch
919 +----------------------------------------------------------------------
920 +o Confirm the adding dir exists outside of aufs, including loopback
921 +  mount.
922 +- and other various attributes...
923 +o Initialize the xino file and whiteout bases if necessary.
924 +  See struct.txt.
925 +
926 +o Check the owner/group/mode of the directory
927 +  When the owner/group/mode of the adding directory differs from the
928 +  existing branch, aufs issues a warning because it may impose a
929 +  security risk.
930 +  For example, when a upper writable branch has a world writable empty
931 +  top directory, a malicious user can create any files on the writable
932 +  branch directly, like copy-up and modify manually. If something like
933 +  /etc/{passwd,shadow} exists on the lower readonly branch but the upper
934 +  writable branch, and the writable branch is world-writable, then a
935 +  malicious guy may create /etc/passwd on the writable branch directly
936 +  and the infected file will be valid in aufs.
937 +  I am afraid it can be a security issue, but nothing to do except
938 +  producing a warning.
939 +
940 +
941 +Delete a Branch
942 +----------------------------------------------------------------------
943 +o Confirm the deleting branch is not busy
944 +  To be general, there is one merit to adopt "remount" interface to
945 +  manipulate branches. It is to discard caches. At deleting a branch,
946 +  aufs checks the still cached (and connected) dentries and inodes. If
947 +  there are any, then they are all in-use. An inode without its
948 +  corresponding dentry can be alive alone (for example, inotify case).
949 +
950 +  For the cached one, aufs checks whether the same named entry exists on
951 +  other branches.
952 +  If the cached one is a directory, because aufs provides a merged view
953 +  to users, as long as one dir is left on any branch aufs can show the
954 +  dir to users. In this case, the branch can be removed from aufs.
955 +  Otherwise aufs rejects deleting the branch.
956 +
957 +  If any file on the deleting branch is opened by aufs, then aufs
958 +  rejects deleting.
959 +
960 +
961 +Modify the Permission of a Branch
962 +----------------------------------------------------------------------
963 +o Re-initialize or remove the xino file and whiteout bases if necessary.
964 +  See struct.txt.
965 +
966 +o rw --> ro: Confirm the modifying branch is not busy
967 +  Aufs rejects the request if any of these conditions are true.
968 +  - a file on the branch is mmap-ed.
969 +  - a regular file on the branch is opened for write and there is no
970 +    same named entry on the upper branch.
971 diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/05wbr_policy.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/05wbr_policy.txt
972 --- linux-2.6.31/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 00:00:00.000000000 +0000
973 +++ aufs2-2.6.git/Documentation/filesystems/aufs/design/05wbr_policy.txt        2009-09-21 21:48:58.761610020 +0000
974 @@ -0,0 +1,65 @@
975 +
976 +# Copyright (C) 2005-2009 Junjiro R. Okajima
977 +# 
978 +# This program is free software; you can redistribute it and/or modify
979 +# it under the terms of the GNU General Public License as published by
980 +# the Free Software Foundation; either version 2 of the License, or
981 +# (at your option) any later version.
982 +# 
983 +# This program is distributed in the hope that it will be useful,
984 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
985 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
986 +# GNU General Public License for more details.
987 +# 
988 +# You should have received a copy of the GNU General Public License
989 +# along with this program; if not, write to the Free Software
990 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
991 +
992 +Policies to Select One among Multiple Writable Branches
993 +----------------------------------------------------------------------
994 +When the number of writable branch is more than one, aufs has to decide
995 +the target branch for file creation or copy-up. By default, the highest
996 +writable branch which has the parent (or ancestor) dir of the target
997 +file is chosen (top-down-parent policy).
998 +By user's request, aufs implements some other policies to select the
999 +writable branch, for file creation two policies, round-robin and
1000 +most-free-space policies. For copy-up three policies, top-down-parent,
1001 +bottom-up-parent and bottom-up policies.
1002 +
1003 +As expected, the round-robin policy selects the branch in circular. When
1004 +you have two writable branches and creates 10 new files, 5 files will be
1005 +created for each branch. mkdir(2) systemcall is an exception. When you
1006 +create 10 new directories, all will be created on the same branch.
1007 +And the most-free-space policy selects the one which has most free
1008 +space among the writable branches. The amount of free space will be
1009 +checked by aufs internally, and users can specify its time interval.
1010 +
1011 +The policies for copy-up is more simple,
1012 +top-down-parent is equivalent to the same named on in create policy,
1013 +bottom-up-parent selects the writable branch where the parent dir
1014 +exists and the nearest upper one from the copyup-source,
1015 +bottom-up selects the nearest upper writable branch from the
1016 +copyup-source, regardless the existence of the parent dir.
1017 +
1018 +There are some rules or exceptions to apply these policies.
1019 +- If there is a readonly branch above the policy-selected branch and
1020 +  the parent dir is marked as opaque (a variation of whiteout), or the
1021 +  target (creating) file is whiteout-ed on the upper readonly branch,
1022 +  then the result of the policy is ignored and the target file will be
1023 +  created on the nearest upper writable branch than the readonly branch.
1024 +- If there is a writable branch above the policy-selected branch and
1025 +  the parent dir is marked as opaque or the target file is whiteouted
1026 +  on the branch, then the result of the policy is ignored and the target
1027 +  file will be created on the highest one among the upper writable
1028 +  branches who has diropq or whiteout. In case of whiteout, aufs removes
1029 +  it as usual.
1030 +- link(2) and rename(2) systemcalls are exceptions in every policy.
1031 +  They try selecting the branch where the source exists as possible
1032 +  since copyup a large file will take long time. If it can't be,
1033 +  ie. the branch where the source exists is readonly, then they will
1034 +  follow the copyup policy.
1035 +- There is an exception for rename(2) when the target exists.
1036 +  If the rename target exists, aufs compares the index of the branches
1037 +  where the source and the target exists and selects the higher
1038 +  one. If the selected branch is readonly, then aufs follows the
1039 +  copyup policy.
1040 diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/06fmode_exec.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/06fmode_exec.txt
1041 --- linux-2.6.31/Documentation/filesystems/aufs/design/06fmode_exec.txt 1970-01-01 00:00:00.000000000 +0000
1042 +++ aufs2-2.6.git/Documentation/filesystems/aufs/design/06fmode_exec.txt        2009-09-21 21:48:58.761610020 +0000
1043 @@ -0,0 +1,33 @@
1044 +
1045 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1046 +# 
1047 +# This program is free software; you can redistribute it and/or modify
1048 +# it under the terms of the GNU General Public License as published by
1049 +# the Free Software Foundation; either version 2 of the License, or
1050 +# (at your option) any later version.
1051 +# 
1052 +# This program is distributed in the hope that it will be useful,
1053 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1054 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1055 +# GNU General Public License for more details.
1056 +# 
1057 +# You should have received a copy of the GNU General Public License
1058 +# along with this program; if not, write to the Free Software
1059 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1060 +
1061 +FMODE_EXEC and deny_write()
1062 +----------------------------------------------------------------------
1063 +Generally Unix prevents an executing file from writing its filedata.
1064 +In linux it is implemented by deny_write() and allow_write().
1065 +When a file is executed by exec() family, open_exec() (and sys_uselib())
1066 +they opens the file and calls deny_write(). If the file is aufs's virtual
1067 +one, it has no meaning. The file which deny_write() is really necessary
1068 +is the file on a branch. But the FMODE_EXEC flag is not passed to
1069 +->open() operation. So aufs adopt a dirty trick.
1070 +
1071 +- in order to get FMODE_EXEC, aufs ->lookup() and ->d_revalidate() set
1072 +  nd->intent.open.file->private_data to nd->intent.open.flags temporary.
1073 +- in aufs ->open(), when FMODE_EXEC is set in file->private_data, it
1074 +  calls deny_write() for the file on a branch.
1075 +- when the aufs file is released, allow_write() for the file on a branch
1076 +  is called.
1077 diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/07mmap.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/07mmap.txt
1078 --- linux-2.6.31/Documentation/filesystems/aufs/design/07mmap.txt       1970-01-01 00:00:00.000000000 +0000
1079 +++ aufs2-2.6.git/Documentation/filesystems/aufs/design/07mmap.txt      2009-09-21 21:48:58.761610020 +0000
1080 @@ -0,0 +1,53 @@
1081 +
1082 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1083 +# 
1084 +# This program is free software; you can redistribute it and/or modify
1085 +# it under the terms of the GNU General Public License as published by
1086 +# the Free Software Foundation; either version 2 of the License, or
1087 +# (at your option) any later version.
1088 +# 
1089 +# This program is distributed in the hope that it will be useful,
1090 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1091 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1092 +# GNU General Public License for more details.
1093 +# 
1094 +# You should have received a copy of the GNU General Public License
1095 +# along with this program; if not, write to the Free Software
1096 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1097 +
1098 +mmap(2) -- File Memory Mapping
1099 +----------------------------------------------------------------------
1100 +In aufs, the file-mapped pages are shared between the file on a branch
1101 +and the virtual one in aufs by overriding vm_operation, particularly
1102 +->fault().
1103 +
1104 +In aufs_mmap(),
1105 +- get and store vm_ops of the real file on a branch.
1106 +- map the file of aufs by generic_file_mmap() and set aufs's vm
1107 +  operations.
1108 +
1109 +In aufs_fault(),
1110 +- get the file of aufs from the passed vma, sleep if needed.
1111 +- get the real file on a branch from the aufs file.
1112 +- a race may happen. for instance a multithreaded library. so some lock
1113 +  is implemented.
1114 +- call ->fault() in the previously stored vm_ops with setting the
1115 +  real file on a branch to vm_file.
1116 +- restore vm_file and wake_up if someone else got sleep.
1117 +
1118 +When a branch is added to or deleted from aufs, the same-named file may
1119 +unveil and its contents will be replaced by the new one when a process
1120 +read(2) through previously opened file.
1121 +(Some users may not want to refresh the filedata. For such users, I
1122 +have a plan to implement a mount option 'refrof' which decides to
1123 +refresh the opened files or not. See plan.txt too.)
1124 +In this case, an already mapped file will not be updated since the
1125 +contents are a part of a process already and it should not be changed by
1126 +aufs branch manipulation. (Even if MAP_SHARED is specified, currently).
1127 +Of course, in case of the deleting branch has a busy file, it cannot be
1128 +deleted from the union.
1129 +
1130 +In Unionfs, it took an approach which the memory pages mapped to
1131 +filedata are copied from the lower (real) file into the Unionfs's
1132 +virtual one and handles it by address_space operations. Recently Unionfs
1133 +changed it to this approach which aufs adopted since Jul 2006.
1134 diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/08export.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/08export.txt
1135 --- linux-2.6.31/Documentation/filesystems/aufs/design/08export.txt     1970-01-01 00:00:00.000000000 +0000
1136 +++ aufs2-2.6.git/Documentation/filesystems/aufs/design/08export.txt    2009-09-21 21:48:58.761610020 +0000
1137 @@ -0,0 +1,59 @@
1138 +
1139 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1140 +# 
1141 +# This program is free software; you can redistribute it and/or modify
1142 +# it under the terms of the GNU General Public License as published by
1143 +# the Free Software Foundation; either version 2 of the License, or
1144 +# (at your option) any later version.
1145 +# 
1146 +# This program is distributed in the hope that it will be useful,
1147 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1148 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1149 +# GNU General Public License for more details.
1150 +# 
1151 +# You should have received a copy of the GNU General Public License
1152 +# along with this program; if not, write to the Free Software
1153 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1154 +
1155 +Export Aufs via NFS
1156 +----------------------------------------------------------------------
1157 +Here is an approach.
1158 +- like xino/xib, add a new file 'xigen' which stores aufs inode
1159 +  generation.
1160 +- iget_locked(): initialize aufs inode generation for a new inode, and
1161 +  store it in xigen file.
1162 +- destroy_inode(): increment aufs inode generation and store it in xigen
1163 +  file. it is necessary even if it is not unlinked, because any data of
1164 +  inode may be changed by UDBA.
1165 +- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
1166 +  build file handle by
1167 +  + branch id (4 bytes)
1168 +  + superblock generation (4 bytes)
1169 +  + inode number (4 or 8 bytes)
1170 +  + parent dir inode number (4 or 8 bytes)
1171 +  + inode generation (4 bytes))
1172 +  + return value of exportfs_encode_fh() for the parent on a branch (4
1173 +    bytes)
1174 +  + file handle for a branch (by exportfs_encode_fh())
1175 +- fh_to_dentry():
1176 +  + find the index of a branch from its id in handle, and check it is
1177 +    still exist in aufs.
1178 +  + 1st level: get the inode number from handle and search it in cache.
1179 +  + 2nd level: if not found, get the parent inode number from handle and
1180 +    search it in cache. and then open the parent dir, find the matching
1181 +    inode number by vfs_readdir() and get its name, and call
1182 +    lookup_one_len() for the target dentry.
1183 +  + 3rd level: if the parent dir is not cached, call
1184 +    exportfs_decode_fh() for a branch and get the parent on a branch,
1185 +    build a pathname of it, convert it a pathname in aufs, call
1186 +    path_lookup(). now aufs gets a parent dir dentry, then handle it as
1187 +    the 2nd level.
1188 +  + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
1189 +    for every branch, but not itself. to get this, (currently) aufs
1190 +    searches in current->nsproxy->mnt_ns list. it may not be a good
1191 +    idea, but I didn't get other approach.
1192 +  + test the generation of the gotten inode.
1193 +- every inode operation: they may get EBUSY due to UDBA. in this case,
1194 +  convert it into ESTALE for NFSD.
1195 +- readdir(): call lockdep_on/off() because filldir in NFSD calls
1196 +  lookup_one_len(), vfs_getattr(), encode_fh() and others.
1197 diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/09shwh.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/09shwh.txt
1198 --- linux-2.6.31/Documentation/filesystems/aufs/design/09shwh.txt       1970-01-01 00:00:00.000000000 +0000
1199 +++ aufs2-2.6.git/Documentation/filesystems/aufs/design/09shwh.txt      2009-09-21 21:48:58.761610020 +0000
1200 @@ -0,0 +1,53 @@
1201 +
1202 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1203 +# 
1204 +# This program is free software; you can redistribute it and/or modify
1205 +# it under the terms of the GNU General Public License as published by
1206 +# the Free Software Foundation; either version 2 of the License, or
1207 +# (at your option) any later version.
1208 +# 
1209 +# This program is distributed in the hope that it will be useful,
1210 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1211 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1212 +# GNU General Public License for more details.
1213 +# 
1214 +# You should have received a copy of the GNU General Public License
1215 +# along with this program; if not, write to the Free Software
1216 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1217 +
1218 +Show Whiteout Mode (shwh)
1219 +----------------------------------------------------------------------
1220 +Generally aufs hides the name of whiteouts. But in some cases, to show
1221 +them is very useful for users. For instance, creating a new middle layer
1222 +(branch) by merging existing layers.
1223 +
1224 +(borrowing aufs1 HOW-TO from a user, Michael Towers)
1225 +When you have three branches,
1226 +- Bottom: 'system', squashfs (underlying base system), read-only
1227 +- Middle: 'mods', squashfs, read-only
1228 +- Top: 'overlay', ram (tmpfs), read-write
1229 +
1230 +The top layer is loaded at boot time and saved at shutdown, to preserve
1231 +the changes made to the system during the session.
1232 +When larger changes have been made, or smaller changes have accumulated,
1233 +the size of the saved top layer data grows. At this point, it would be
1234 +nice to be able to merge the two overlay branches ('mods' and 'overlay')
1235 +and rewrite the 'mods' squashfs, clearing the top layer and thus
1236 +restoring save and load speed.
1237 +
1238 +This merging is simplified by the use of another aufs mount, of just the
1239 +two overlay branches using the 'shwh' option.
1240 +# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
1241 +       aufs /livesys/merge_union
1242 +
1243 +A merged view of these two branches is then available at
1244 +/livesys/merge_union, and the new feature is that the whiteouts are
1245 +visible!
1246 +Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
1247 +writing to all branches. Also the default mode for all branches is 'ro'.
1248 +It is now possible to save the combined contents of the two overlay
1249 +branches to a new squashfs, e.g.:
1250 +# mksquashfs /livesys/merge_union /path/to/newmods.squash
1251 +
1252 +This new squashfs archive can be stored on the boot device and the
1253 +initramfs will use it to replace the old one at the next boot.
1254 diff -uprN -x .git linux-2.6.31/Documentation/filesystems/aufs/design/99plan.txt aufs2-2.6.git/Documentation/filesystems/aufs/design/99plan.txt
1255 --- linux-2.6.31/Documentation/filesystems/aufs/design/99plan.txt       1970-01-01 00:00:00.000000000 +0000
1256 +++ aufs2-2.6.git/Documentation/filesystems/aufs/design/99plan.txt      2009-09-21 21:48:58.765776628 +0000
1257 @@ -0,0 +1,96 @@
1258 +
1259 +# Copyright (C) 2005-2009 Junjiro R. Okajima
1260 +# 
1261 +# This program is free software; you can redistribute it and/or modify
1262 +# it under the terms of the GNU General Public License as published by
1263 +# the Free Software Foundation; either version 2 of the License, or
1264 +# (at your option) any later version.
1265 +# 
1266 +# This program is distributed in the hope that it will be useful,
1267 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
1268 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1269 +# GNU General Public License for more details.
1270 +# 
1271 +# You should have received a copy of the GNU General Public License
1272 +# along with this program; if not, write to the Free Software
1273 +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1274 +
1275 +Plan
1276 +
1277 +Restoring some features which was implemented in aufs1.
1278 +They were dropped in aufs2 in order to make source files simpler and
1279 +easier to be reviewed.
1280 +
1281 +
1282 +Test Only the Highest One for the Directory Permission (dirperm1 option)
1283 +----------------------------------------------------------------------
1284 +Let's try case study.
1285 +- aufs has two branches, upper readwrite and lower readonly.
1286 +  /au = /rw + /ro
1287 +- "dirA" exists under /ro, but /rw. and its mode is 0700.
1288 +- user invoked "chmod a+rx /au/dirA"
1289 +- then "dirA" becomes world readable?
1290 +
1291 +In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1292 +or it may be a natively readonly filesystem. If aufs respects the lower
1293 +branch, it should not respond readdir request from other users. But user
1294 +allowed it by chmod. Should really aufs rejects showing the entries
1295 +under /ro/dirA?
1296 +
1297 +To be honest, I don't have a best solution for this case. So I
1298 +implemented 'dirperm1' and 'nodirperm1' option in aufs1, and leave it to
1299 +users.
1300 +When dirperm1 is specified, aufs checks only the highest one for the
1301 +directory permission, and shows the entries. Otherwise, as usual, checks
1302 +every dir existing on all branches and rejects the request.
1303 +
1304 +As a side effect, dirperm1 option improves the performance of aufs
1305 +because the number of permission check is reduced.
1306 +
1307 +
1308 +Being Another Aufs's Readonly Branch (robr)
1309 +----------------------------------------------------------------------
1310 +Aufs1 allows aufs to be another aufs's readonly branch.
1311 +This feature was developed by a user's request. But it may not be used
1312 +currecnly.
1313 +
1314 +
1315 +Copy-up on Open (coo=)
1316 +----------------------------------------------------------------------
1317 +By default the internal copy-up is executed when it is really necessary.
1318 +It is not done when a file is opened for writing, but when write(2) is
1319 +done. Users who have many (over 100) branches want to know and analyse
1320 +when and what file is copied-up. To insert a new upper branch which
1321 +contains such files only may improve the performance of aufs.
1322 +
1323 +Aufs1 implemented "coo=none | leaf | all" option.
1324 +
1325 +
1326 +Refresh the Opened File (refrof)
1327 +----------------------------------------------------------------------
1328 +This option is implemented in aufs1 but incomplete.
1329 +
1330 +When user reads from a file, he expects to get its latest filedata
1331 +generally. If the file is removed and a new same named file is created,
1332 +the content he gets is unchanged, ie. the unlinked filedata.
1333 +
1334 +Let's try case study again.
1335 +- aufs has two branches.
1336 +  /au = /rw + /ro
1337 +- "fileA" exists under /ro, but /rw.
1338 +- user opened "/au/fileA".
1339 +- he or someone else inserts a branch (/new) between /rw and /ro.
1340 +  /au = /rw + /new + /ro
1341 +- the new branch has "fileA".
1342 +- user reads from the opened "fileA"
1343 +- which filedata should aufs return, from /ro or /new?
1344 +
1345 +Some people says it has to be "from /ro" and it is a semantics of Unix.
1346 +The others say it should be "from /new" because the file is not removed
1347 +and it is equivalent to the case of someone else modifies the file.
1348 +
1349 +Here again I don't have a best and final answer. I got an idea to
1350 +implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the
1351 +Opened File) is specified (by default), aufs returns the filedata from
1352 +/new.
1353 +Otherwise from /new.
1354 diff -uprN -x .git linux-2.6.31/fs/Kconfig aufs2-2.6.git/fs/Kconfig
1355 --- linux-2.6.31/fs/Kconfig     2009-09-09 22:13:59.000000000 +0000
1356 +++ aufs2-2.6.git/fs/Kconfig    2009-09-21 21:49:23.315008102 +0000
1357 @@ -187,6 +187,7 @@ source "fs/sysv/Kconfig"
1358  source "fs/ufs/Kconfig"
1359  source "fs/exofs/Kconfig"
1360  source "fs/nilfs2/Kconfig"
1361 +source "fs/aufs/Kconfig"
1362  
1363  endif # MISC_FILESYSTEMS
1364  
1365 diff -uprN -x .git linux-2.6.31/fs/Makefile aufs2-2.6.git/fs/Makefile
1366 --- linux-2.6.31/fs/Makefile    2009-09-09 22:13:59.000000000 +0000
1367 +++ aufs2-2.6.git/fs/Makefile   2009-09-14 14:52:35.065948532 +0000
1368 @@ -124,3 +124,4 @@ obj-$(CONFIG_OCFS2_FS)              += ocfs2/
1369  obj-$(CONFIG_BTRFS_FS)         += btrfs/
1370  obj-$(CONFIG_GFS2_FS)           += gfs2/
1371  obj-$(CONFIG_EXOFS_FS)          += exofs/
1372 +obj-$(CONFIG_AUFS_FS)           += aufs/
1373 diff -uprN -x .git linux-2.6.31/fs/aufs/Kconfig aufs2-2.6.git/fs/aufs/Kconfig
1374 --- linux-2.6.31/fs/aufs/Kconfig        1970-01-01 00:00:00.000000000 +0000
1375 +++ aufs2-2.6.git/fs/aufs/Kconfig       2009-09-21 21:49:23.374524295 +0000
1376 @@ -0,0 +1,140 @@
1377 +config AUFS_FS
1378 +       bool "Aufs (Advanced multi layered unification filesystem) support"
1379 +       depends on EXPERIMENTAL
1380 +       help
1381 +       Aufs is a stackable unification filesystem such as Unionfs,
1382 +       which unifies several directories and provides a merged single
1383 +       directory.
1384 +       In the early days, aufs was entirely re-designed and
1385 +       re-implemented Unionfs Version 1.x series. Introducing many
1386 +       original ideas, approaches and improvements, it becomes totally
1387 +       different from Unionfs while keeping the basic features.
1388 +
1389 +if AUFS_FS
1390 +choice
1391 +       prompt "Maximum number of branches"
1392 +       default AUFS_BRANCH_MAX_127
1393 +       help
1394 +       Specifies the maximum number of branches (or member directories)
1395 +       in a single aufs. The larger value consumes more system
1396 +       resources and has a minor impact to performance.
1397 +config AUFS_BRANCH_MAX_127
1398 +       bool "127"
1399 +       help
1400 +       Specifies the maximum number of branches (or member directories)
1401 +       in a single aufs. The larger value consumes more system
1402 +       resources and has a minor impact to performance.
1403 +config AUFS_BRANCH_MAX_511
1404 +       bool "511"
1405 +       help
1406 +       Specifies the maximum number of branches (or member directories)
1407 +       in a single aufs. The larger value consumes more system
1408 +       resources and has a minor impact to performance.
1409 +config AUFS_BRANCH_MAX_1023
1410 +       bool "1023"
1411 +       help
1412 +       Specifies the maximum number of branches (or member directories)
1413 +       in a single aufs. The larger value consumes more system
1414 +       resources and has a minor impact to performance.
1415 +config AUFS_BRANCH_MAX_32767
1416 +       bool "32767"
1417 +       help
1418 +       Specifies the maximum number of branches (or member directories)
1419 +       in a single aufs. The larger value consumes more system
1420 +       resources and has a minor impact to performance.
1421 +endchoice
1422 +
1423 +config AUFS_HINOTIFY
1424 +       bool "Use inotify to detect actions on a branch"
1425 +       depends on INOTIFY
1426 +       help
1427 +       If you want to modify files on branches directly, eg. bypassing aufs,
1428 +       and want aufs to detect the changes of them fully, then enable this
1429 +       option and use 'udba=inotify' mount option.
1430 +       It will have a negative impact to the performance.
1431 +       See detail in aufs.5.
1432 +
1433 +config AUFS_EXPORT
1434 +       bool "NFS-exportable aufs"
1435 +       depends on EXPORTFS = y
1436 +       help
1437 +       If you want to export your mounted aufs via NFS, then enable this
1438 +       option. There are several requirements for this configuration.
1439 +       See detail in aufs.5.
1440 +
1441 +config AUFS_RDU
1442 +       bool "Readdir in userspace"
1443 +       help
1444 +       If you have millions of files under a single aufs directory, and
1445 +       meet the out of memory, then enable this option and set
1446 +       environment variables for your readdir(3).
1447 +       See detail in aufs.5.
1448 +
1449 +config AUFS_SHWH
1450 +       bool "Show whiteouts"
1451 +       help
1452 +       If you want to make the whiteouts in aufs visible, then enable
1453 +       this option and specify 'shwh' mount option. Although it may
1454 +       sounds like philosophy or something, but in technically it
1455 +       simply shows the name of whiteout with keeping its behaviour.
1456 +
1457 +config AUFS_BR_RAMFS
1458 +       bool "Ramfs (initramfs/rootfs) as an aufs branch"
1459 +       help
1460 +       If you want to use ramfs as an aufs branch fs, then enable this
1461 +       option. Generally tmpfs is recommended.
1462 +       Aufs prohibited them to be a branch fs by default, because
1463 +       initramfs becomes unusable after switch_root or something
1464 +       generally. If you sets initramfs as an aufs branch and boot your
1465 +       system by switch_root, you will meet a problem easily since the
1466 +       files in initramfs may be inaccessible.
1467 +       Unless you are going to use ramfs as an aufs branch fs without
1468 +       switch_root or something, leave it N.
1469 +
1470 +config AUFS_BR_FUSE
1471 +       bool "Fuse fs as an aufs branch"
1472 +       depends on FUSE_FS
1473 +       select AUFS_POLL
1474 +       help
1475 +       If you want to use fuse-based userspace filesystem as an aufs
1476 +       branch fs, then enable this option.
1477 +       It implements the internal poll(2) operation which is
1478 +       implemented by fuse only (curretnly).
1479 +
1480 +config AUFS_DEBUG
1481 +       bool "Debug aufs"
1482 +       help
1483 +       Enable this to compile aufs internal debug code.
1484 +       It will have a negative impact to the performance.
1485 +
1486 +config AUFS_MAGIC_SYSRQ
1487 +       bool
1488 +       depends on AUFS_DEBUG && MAGIC_SYSRQ
1489 +       default y
1490 +       help
1491 +       Automatic configuration for internal use.
1492 +       When aufs supports Magic SysRq, enabled automatically.
1493 +
1494 +config AUFS_BDEV_LOOP
1495 +       bool
1496 +       depends on BLK_DEV_LOOP
1497 +       default y
1498 +       help
1499 +       Automatic configuration for internal use.
1500 +       Convert =[ym] into =y.
1501 +
1502 +config AUFS_INO_T_64
1503 +       bool
1504 +       depends on AUFS_EXPORT
1505 +       depends on 64BIT && !(ALPHA || S390)
1506 +       default y
1507 +       help
1508 +       Automatic configuration for internal use.
1509 +       /* typedef unsigned long/int __kernel_ino_t */
1510 +       /* alpha and s390x are int */
1511 +
1512 +config AUFS_POLL
1513 +       bool
1514 +       help
1515 +       Automatic configuration for internal use.
1516 +endif
1517 diff -uprN -x .git linux-2.6.31/fs/aufs/Makefile aufs2-2.6.git/fs/aufs/Makefile
1518 --- linux-2.6.31/fs/aufs/Makefile       1970-01-01 00:00:00.000000000 +0000
1519 +++ aufs2-2.6.git/fs/aufs/Makefile      2009-09-21 21:49:23.374524295 +0000
1520 @@ -0,0 +1,23 @@
1521 +
1522 +include ${srctree}/${src}/magic.mk
1523 +
1524 +obj-$(CONFIG_AUFS_FS) += aufs.o
1525 +aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
1526 +       wkq.o vfsub.o dcsub.o \
1527 +       cpup.o whout.o plink.o wbr_policy.o \
1528 +       dinfo.o dentry.o \
1529 +       finfo.o file.o f_op.o \
1530 +       dir.o vdir.o \
1531 +       iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
1532 +       ioctl.o
1533 +
1534 +# all are boolean
1535 +aufs-$(CONFIG_SYSFS) += sysfs.o
1536 +aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
1537 +aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
1538 +aufs-$(CONFIG_AUFS_HINOTIFY) += hinotify.o
1539 +aufs-$(CONFIG_AUFS_EXPORT) += export.o
1540 +aufs-$(CONFIG_AUFS_POLL) += poll.o
1541 +aufs-$(CONFIG_AUFS_RDU) += rdu.o
1542 +aufs-$(CONFIG_AUFS_DEBUG) += debug.o
1543 +aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
1544 diff -uprN -x .git linux-2.6.31/fs/aufs/aufs.h aufs2-2.6.git/fs/aufs/aufs.h
1545 --- linux-2.6.31/fs/aufs/aufs.h 1970-01-01 00:00:00.000000000 +0000
1546 +++ aufs2-2.6.git/fs/aufs/aufs.h        2009-09-21 21:49:23.374524295 +0000
1547 @@ -0,0 +1,51 @@
1548 +/*
1549 + * Copyright (C) 2005-2009 Junjiro R. Okajima
1550 + *
1551 + * This program, aufs is free software; you can redistribute it and/or modify
1552 + * it under the terms of the GNU General Public License as published by
1553 + * the Free Software Foundation; either version 2 of the License, or
1554 + * (at your option) any later version.
1555 + *
1556 + * This program is distributed in the hope that it will be useful,
1557 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1558 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1559 + * GNU General Public License for more details.
1560 + *
1561 + * You should have received a copy of the GNU General Public License
1562 + * along with this program; if not, write to the Free Software
1563 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1564 + */
1565 +
1566 +/*
1567 + * all header files
1568 + */
1569 +
1570 +#ifndef __AUFS_H__
1571 +#define __AUFS_H__
1572 +
1573 +#ifdef __KERNEL__
1574 +
1575 +#include "debug.h"
1576 +
1577 +#include "branch.h"
1578 +#include "cpup.h"
1579 +#include "dcsub.h"
1580 +#include "dbgaufs.h"
1581 +#include "dentry.h"
1582 +#include "dir.h"
1583 +#include "file.h"
1584 +#include "fstype.h"
1585 +#include "inode.h"
1586 +#include "loop.h"
1587 +#include "module.h"
1588 +#include "opts.h"
1589 +#include "rwsem.h"
1590 +#include "spl.h"
1591 +#include "super.h"
1592 +#include "sysaufs.h"
1593 +#include "vfsub.h"
1594 +#include "whout.h"
1595 +#include "wkq.h"
1596 +
1597 +#endif /* __KERNEL__ */
1598 +#endif /* __AUFS_H__ */
1599 diff -uprN -x .git linux-2.6.31/fs/aufs/branch.c aufs2-2.6.git/fs/aufs/branch.c
1600 --- linux-2.6.31/fs/aufs/branch.c       1970-01-01 00:00:00.000000000 +0000
1601 +++ aufs2-2.6.git/fs/aufs/branch.c      2009-09-21 21:49:23.374524295 +0000
1602 @@ -0,0 +1,970 @@
1603 +/*
1604 + * Copyright (C) 2005-2009 Junjiro R. Okajima
1605 + *
1606 + * This program, aufs is free software; you can redistribute it and/or modify
1607 + * it under the terms of the GNU General Public License as published by
1608 + * the Free Software Foundation; either version 2 of the License, or
1609 + * (at your option) any later version.
1610 + *
1611 + * This program is distributed in the hope that it will be useful,
1612 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1613 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1614 + * GNU General Public License for more details.
1615 + *
1616 + * You should have received a copy of the GNU General Public License
1617 + * along with this program; if not, write to the Free Software
1618 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1619 + */
1620 +
1621 +/*
1622 + * branch management
1623 + */
1624 +
1625 +#include <linux/file.h>
1626 +#include "aufs.h"
1627 +
1628 +/*
1629 + * free a single branch
1630 + */
1631 +static void au_br_do_free(struct au_branch *br)
1632 +{
1633 +       int i;
1634 +       struct au_wbr *wbr;
1635 +
1636 +       if (br->br_xino.xi_file)
1637 +               fput(br->br_xino.xi_file);
1638 +       mutex_destroy(&br->br_xino.xi_nondir_mtx);
1639 +
1640 +       AuDebugOn(atomic_read(&br->br_count));
1641 +
1642 +       wbr = br->br_wbr;
1643 +       if (wbr) {
1644 +               for (i = 0; i < AuBrWh_Last; i++)
1645 +                       dput(wbr->wbr_wh[i]);
1646 +               AuDebugOn(atomic_read(&wbr->wbr_wh_running));
1647 +               AuRwDestroy(&wbr->wbr_wh_rwsem);
1648 +       }
1649 +
1650 +       /* some filesystems acquire extra lock */
1651 +       lockdep_off();
1652 +       mntput(br->br_mnt);
1653 +       lockdep_on();
1654 +
1655 +       kfree(wbr);
1656 +       kfree(br);
1657 +}
1658 +
1659 +/*
1660 + * frees all branches
1661 + */
1662 +void au_br_free(struct au_sbinfo *sbinfo)
1663 +{
1664 +       aufs_bindex_t bmax;
1665 +       struct au_branch **br;
1666 +
1667 +       AuRwMustWriteLock(&sbinfo->si_rwsem);
1668 +
1669 +       bmax = sbinfo->si_bend + 1;
1670 +       br = sbinfo->si_branch;
1671 +       while (bmax--)
1672 +               au_br_do_free(*br++);
1673 +}
1674 +
1675 +/*
1676 + * find the index of a branch which is specified by @br_id.
1677 + */
1678 +int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
1679 +{
1680 +       aufs_bindex_t bindex, bend;
1681 +
1682 +       bend = au_sbend(sb);
1683 +       for (bindex = 0; bindex <= bend; bindex++)
1684 +               if (au_sbr_id(sb, bindex) == br_id)
1685 +                       return bindex;
1686 +       return -1;
1687 +}
1688 +
1689 +/* ---------------------------------------------------------------------- */
1690 +
1691 +/*
1692 + * add a branch
1693 + */
1694 +
1695 +static int test_overlap(struct super_block *sb, struct dentry *h_d1,
1696 +                       struct dentry *h_d2)
1697 +{
1698 +       if (unlikely(h_d1 == h_d2))
1699 +               return 1;
1700 +       return !!au_test_subdir(h_d1, h_d2)
1701 +               || !!au_test_subdir(h_d2, h_d1)
1702 +               || au_test_loopback_overlap(sb, h_d1, h_d2)
1703 +               || au_test_loopback_overlap(sb, h_d2, h_d1);
1704 +}
1705 +
1706 +/*
1707 + * returns a newly allocated branch. @new_nbranch is a number of branches
1708 + * after adding a branch.
1709 + */
1710 +static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
1711 +                                    int perm)
1712 +{
1713 +       struct au_branch *add_branch;
1714 +       struct dentry *root;
1715 +
1716 +       root = sb->s_root;
1717 +       add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
1718 +       if (unlikely(!add_branch))
1719 +               goto out;
1720 +
1721 +       add_branch->br_wbr = NULL;
1722 +       if (au_br_writable(perm)) {
1723 +               /* may be freed separately at changing the branch permission */
1724 +               add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
1725 +                                            GFP_NOFS);
1726 +               if (unlikely(!add_branch->br_wbr))
1727 +                       goto out_br;
1728 +       }
1729 +
1730 +       if (unlikely(au_sbr_realloc(au_sbi(sb), new_nbranch)
1731 +                    || au_di_realloc(au_di(root), new_nbranch)
1732 +                    || au_ii_realloc(au_ii(root->d_inode), new_nbranch)))
1733 +               goto out_wbr;
1734 +       return add_branch; /* success */
1735 +
1736 + out_wbr:
1737 +       kfree(add_branch->br_wbr);
1738 + out_br:
1739 +       kfree(add_branch);
1740 + out:
1741 +       return ERR_PTR(-ENOMEM);
1742 +}
1743 +
1744 +/*
1745 + * test if the branch permission is legal or not.
1746 + */
1747 +static int test_br(struct inode *inode, int brperm, char *path)
1748 +{
1749 +       int err;
1750 +
1751 +       err = 0;
1752 +       if (unlikely(au_br_writable(brperm) && IS_RDONLY(inode))) {
1753 +               AuErr("write permission for readonly mount or inode, %s\n",
1754 +                     path);
1755 +               err = -EINVAL;
1756 +       }
1757 +
1758 +       return err;
1759 +}
1760 +
1761 +/*
1762 + * returns:
1763 + * 0: success, the caller will add it
1764 + * plus: success, it is already unified, the caller should ignore it
1765 + * minus: error
1766 + */
1767 +static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
1768 +{
1769 +       int err;
1770 +       aufs_bindex_t bend, bindex;
1771 +       struct dentry *root;
1772 +       struct inode *inode, *h_inode;
1773 +
1774 +       root = sb->s_root;
1775 +       bend = au_sbend(sb);
1776 +       if (unlikely(bend >= 0
1777 +                    && au_find_dbindex(root, add->path.dentry) >= 0)) {
1778 +               err = 1;
1779 +               if (!remount) {
1780 +                       err = -EINVAL;
1781 +                       AuErr("%s duplicated\n", add->pathname);
1782 +               }
1783 +               goto out;
1784 +       }
1785 +
1786 +       err = -ENOSPC; /* -E2BIG; */
1787 +       if (unlikely(AUFS_BRANCH_MAX <= add->bindex
1788 +                    || AUFS_BRANCH_MAX - 1 <= bend)) {
1789 +               AuErr("number of branches exceeded %s\n", add->pathname);
1790 +               goto out;
1791 +       }
1792 +
1793 +       err = -EDOM;
1794 +       if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
1795 +               AuErr("bad index %d\n", add->bindex);
1796 +               goto out;
1797 +       }
1798 +
1799 +       inode = add->path.dentry->d_inode;
1800 +       err = -ENOENT;
1801 +       if (unlikely(!inode->i_nlink)) {
1802 +               AuErr("no existence %s\n", add->pathname);
1803 +               goto out;
1804 +       }
1805 +
1806 +       err = -EINVAL;
1807 +       if (unlikely(inode->i_sb == sb)) {
1808 +               AuErr("%s must be outside\n", add->pathname);
1809 +               goto out;
1810 +       }
1811 +
1812 +       if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
1813 +               AuErr("unsupported filesystem, %s (%s)\n",
1814 +                     add->pathname, au_sbtype(inode->i_sb));
1815 +               goto out;
1816 +       }
1817 +
1818 +       err = test_br(add->path.dentry->d_inode, add->perm, add->pathname);
1819 +       if (unlikely(err))
1820 +               goto out;
1821 +
1822 +       if (bend < 0)
1823 +               return 0; /* success */
1824 +
1825 +       err = -EINVAL;
1826 +       for (bindex = 0; bindex <= bend; bindex++)
1827 +               if (unlikely(test_overlap(sb, add->path.dentry,
1828 +                                         au_h_dptr(root, bindex)))) {
1829 +                       AuErr("%s is overlapped\n", add->pathname);
1830 +                       goto out;
1831 +               }
1832 +
1833 +       err = 0;
1834 +       if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
1835 +               h_inode = au_h_dptr(root, 0)->d_inode;
1836 +               if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
1837 +                   || h_inode->i_uid != inode->i_uid
1838 +                   || h_inode->i_gid != inode->i_gid)
1839 +                       AuWarn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
1840 +                              add->pathname,
1841 +                              inode->i_uid, inode->i_gid,
1842 +                              (inode->i_mode & S_IALLUGO),
1843 +                              h_inode->i_uid, h_inode->i_gid,
1844 +                              (h_inode->i_mode & S_IALLUGO));
1845 +       }
1846 +
1847 + out:
1848 +       return err;
1849 +}
1850 +
1851 +/*
1852 + * initialize or clean the whiteouts for an adding branch
1853 + */
1854 +static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
1855 +                        int new_perm, struct dentry *h_root)
1856 +{
1857 +       int err, old_perm;
1858 +       aufs_bindex_t bindex;
1859 +       struct mutex *h_mtx;
1860 +       struct au_wbr *wbr;
1861 +       struct au_hinode *hdir;
1862 +
1863 +       wbr = br->br_wbr;
1864 +       old_perm = br->br_perm;
1865 +       br->br_perm = new_perm;
1866 +       hdir = NULL;
1867 +       h_mtx = NULL;
1868 +       bindex = au_br_index(sb, br->br_id);
1869 +       if (0 <= bindex) {
1870 +               hdir = au_hi(sb->s_root->d_inode, bindex);
1871 +               au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1872 +       } else {
1873 +               h_mtx = &h_root->d_inode->i_mutex;
1874 +               mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
1875 +       }
1876 +       if (!wbr)
1877 +               err = au_wh_init(h_root, br, sb);
1878 +       else {
1879 +               wbr_wh_write_lock(wbr);
1880 +               err = au_wh_init(h_root, br, sb);
1881 +               wbr_wh_write_unlock(wbr);
1882 +       }
1883 +       if (hdir)
1884 +               au_hin_imtx_unlock(hdir);
1885 +       else
1886 +               mutex_unlock(h_mtx);
1887 +       br->br_perm = old_perm;
1888 +
1889 +       if (!err && wbr && !au_br_writable(new_perm)) {
1890 +               kfree(wbr);
1891 +               br->br_wbr = NULL;
1892 +       }
1893 +
1894 +       return err;
1895 +}
1896 +
1897 +static int au_wbr_init(struct au_branch *br, struct super_block *sb,
1898 +                      int perm, struct path *path)
1899 +{
1900 +       int err;
1901 +       struct au_wbr *wbr;
1902 +
1903 +       wbr = br->br_wbr;
1904 +       au_rw_init(&wbr->wbr_wh_rwsem);
1905 +       memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
1906 +       atomic_set(&wbr->wbr_wh_running, 0);
1907 +       wbr->wbr_bytes = 0;
1908 +
1909 +       err = au_br_init_wh(sb, br, perm, path->dentry);
1910 +
1911 +       return err;
1912 +}
1913 +
1914 +/* intialize a new branch */
1915 +static int au_br_init(struct au_branch *br, struct super_block *sb,
1916 +                     struct au_opt_add *add)
1917 +{
1918 +       int err;
1919 +
1920 +       err = 0;
1921 +       memset(&br->br_xino, 0, sizeof(br->br_xino));
1922 +       mutex_init(&br->br_xino.xi_nondir_mtx);
1923 +       br->br_perm = add->perm;
1924 +       br->br_mnt = add->path.mnt; /* set first, mntget() later */
1925 +       atomic_set(&br->br_count, 0);
1926 +       br->br_xino_upper = AUFS_XINO_TRUNC_INIT;
1927 +       atomic_set(&br->br_xino_running, 0);
1928 +       br->br_id = au_new_br_id(sb);
1929 +
1930 +       if (au_br_writable(add->perm)) {
1931 +               err = au_wbr_init(br, sb, add->perm, &add->path);
1932 +               if (unlikely(err))
1933 +                       goto out;
1934 +       }
1935 +
1936 +       if (au_opt_test(au_mntflags(sb), XINO)) {
1937 +               err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino,
1938 +                                au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
1939 +               if (unlikely(err)) {
1940 +                       AuDebugOn(br->br_xino.xi_file);
1941 +                       goto out;
1942 +               }
1943 +       }
1944 +
1945 +       sysaufs_br_init(br);
1946 +       mntget(add->path.mnt);
1947 +
1948 + out:
1949 +       return err;
1950 +}
1951 +
1952 +static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
1953 +                            struct au_branch *br, aufs_bindex_t bend,
1954 +                            aufs_bindex_t amount)
1955 +{
1956 +       struct au_branch **brp;
1957 +
1958 +       AuRwMustWriteLock(&sbinfo->si_rwsem);
1959 +
1960 +       brp = sbinfo->si_branch + bindex;
1961 +       memmove(brp + 1, brp, sizeof(*brp) * amount);
1962 +       *brp = br;
1963 +       sbinfo->si_bend++;
1964 +       if (unlikely(bend < 0))
1965 +               sbinfo->si_bend = 0;
1966 +}
1967 +
1968 +static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
1969 +                            aufs_bindex_t bend, aufs_bindex_t amount)
1970 +{
1971 +       struct au_hdentry *hdp;
1972 +
1973 +       AuRwMustWriteLock(&dinfo->di_rwsem);
1974 +
1975 +       hdp = dinfo->di_hdentry + bindex;
1976 +       memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
1977 +       au_h_dentry_init(hdp);
1978 +       dinfo->di_bend++;
1979 +       if (unlikely(bend < 0))
1980 +               dinfo->di_bstart = 0;
1981 +}
1982 +
1983 +static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
1984 +                            aufs_bindex_t bend, aufs_bindex_t amount)
1985 +{
1986 +       struct au_hinode *hip;
1987 +
1988 +       AuRwMustWriteLock(&iinfo->ii_rwsem);
1989 +
1990 +       hip = iinfo->ii_hinode + bindex;
1991 +       memmove(hip + 1, hip, sizeof(*hip) * amount);
1992 +       hip->hi_inode = NULL;
1993 +       au_hin_init(hip, NULL);
1994 +       iinfo->ii_bend++;
1995 +       if (unlikely(bend < 0))
1996 +               iinfo->ii_bstart = 0;
1997 +}
1998 +
1999 +static void au_br_do_add(struct super_block *sb, struct dentry *h_dentry,
2000 +                        struct au_branch *br, aufs_bindex_t bindex)
2001 +{
2002 +       struct dentry *root;
2003 +       struct inode *root_inode;
2004 +       aufs_bindex_t bend, amount;
2005 +
2006 +       root = sb->s_root;
2007 +       root_inode = root->d_inode;
2008 +       au_plink_block_maintain(sb);
2009 +       bend = au_sbend(sb);
2010 +       amount = bend + 1 - bindex;
2011 +       au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
2012 +       au_br_do_add_hdp(au_di(root), bindex, bend, amount);
2013 +       au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
2014 +       au_set_h_dptr(root, bindex, dget(h_dentry));
2015 +       au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode),
2016 +                     /*flags*/0);
2017 +}
2018 +
2019 +int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
2020 +{
2021 +       int err;
2022 +       aufs_bindex_t bend, add_bindex;
2023 +       struct dentry *root, *h_dentry;
2024 +       struct inode *root_inode;
2025 +       struct au_branch *add_branch;
2026 +
2027 +       root = sb->s_root;
2028 +       root_inode = root->d_inode;
2029 +       IMustLock(root_inode);
2030 +       err = test_add(sb, add, remount);
2031 +       if (unlikely(err < 0))
2032 +               goto out;
2033 +       if (err) {
2034 +               err = 0;
2035 +               goto out; /* success */
2036 +       }
2037 +
2038 +       bend = au_sbend(sb);
2039 +       add_branch = au_br_alloc(sb, bend + 2, add->perm);
2040 +       err = PTR_ERR(add_branch);
2041 +       if (IS_ERR(add_branch))
2042 +               goto out;
2043 +
2044 +       err = au_br_init(add_branch, sb, add);
2045 +       if (unlikely(err)) {
2046 +               au_br_do_free(add_branch);
2047 +               goto out;
2048 +       }
2049 +
2050 +       add_bindex = add->bindex;
2051 +       h_dentry = add->path.dentry;
2052 +       if (!remount)
2053 +               au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2054 +       else {
2055 +               sysaufs_brs_del(sb, add_bindex);
2056 +               au_br_do_add(sb, h_dentry, add_branch, add_bindex);
2057 +               sysaufs_brs_add(sb, add_bindex);
2058 +       }
2059 +
2060 +       if (!add_bindex) {
2061 +               au_cpup_attr_all(root_inode, /*force*/1);
2062 +               sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
2063 +       } else
2064 +               au_add_nlink(root_inode, h_dentry->d_inode);
2065 +
2066 +       /*
2067 +        * this test/set prevents aufs from handling unnecesary inotify events
2068 +        * of xino files, in a case of re-adding a writable branch which was
2069 +        * once detached from aufs.
2070 +        */
2071 +       if (au_xino_brid(sb) < 0
2072 +           && au_br_writable(add_branch->br_perm)
2073 +           && !au_test_fs_bad_xino(h_dentry->d_sb)
2074 +           && add_branch->br_xino.xi_file
2075 +           && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry)
2076 +               au_xino_brid_set(sb, add_branch->br_id);
2077 +
2078 + out:
2079 +       return err;
2080 +}
2081 +
2082 +/* ---------------------------------------------------------------------- */
2083 +
2084 +/*
2085 + * delete a branch
2086 + */
2087 +
2088 +/* to show the line number, do not make it inlined function */
2089 +#define AuVerbose(do_info, fmt, args...) do { \
2090 +       if (do_info) \
2091 +               AuInfo(fmt, ##args); \
2092 +} while (0)
2093 +
2094 +/*
2095 + * test if the branch is deletable or not.
2096 + */
2097 +static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
2098 +                           unsigned int sigen)
2099 +{
2100 +       int err, i, j, ndentry;
2101 +       aufs_bindex_t bstart, bend;
2102 +       unsigned char verbose;
2103 +       struct au_dcsub_pages dpages;
2104 +       struct au_dpage *dpage;
2105 +       struct dentry *d;
2106 +       struct inode *inode;
2107 +
2108 +       err = au_dpages_init(&dpages, GFP_NOFS);
2109 +       if (unlikely(err))
2110 +               goto out;
2111 +       err = au_dcsub_pages(&dpages, root, NULL, NULL);
2112 +       if (unlikely(err))
2113 +               goto out_dpages;
2114 +
2115 +       verbose = !!au_opt_test(au_mntflags(root->d_sb), VERBOSE);
2116 +       for (i = 0; !err && i < dpages.ndpage; i++) {
2117 +               dpage = dpages.dpages + i;
2118 +               ndentry = dpage->ndentry;
2119 +               for (j = 0; !err && j < ndentry; j++) {
2120 +                       d = dpage->dentries[j];
2121 +                       AuDebugOn(!atomic_read(&d->d_count));
2122 +                       inode = d->d_inode;
2123 +                       if (au_digen(d) == sigen && au_iigen(inode) == sigen)
2124 +                               di_read_lock_child(d, AuLock_IR);
2125 +                       else {
2126 +                               di_write_lock_child(d);
2127 +                               err = au_reval_dpath(d, sigen);
2128 +                               if (!err)
2129 +                                       di_downgrade_lock(d, AuLock_IR);
2130 +                               else {
2131 +                                       di_write_unlock(d);
2132 +                                       break;
2133 +                               }
2134 +                       }
2135 +
2136 +                       bstart = au_dbstart(d);
2137 +                       bend = au_dbend(d);
2138 +                       if (bstart <= bindex
2139 +                           && bindex <= bend
2140 +                           && au_h_dptr(d, bindex)
2141 +                           && (!S_ISDIR(inode->i_mode) || bstart == bend)) {
2142 +                               err = -EBUSY;
2143 +                               AuVerbose(verbose, "busy %.*s\n", AuDLNPair(d));
2144 +                       }
2145 +                       di_read_unlock(d, AuLock_IR);
2146 +               }
2147 +       }
2148 +
2149 + out_dpages:
2150 +       au_dpages_free(&dpages);
2151 + out:
2152 +       return err;
2153 +}
2154 +
2155 +static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
2156 +                          unsigned int sigen)
2157 +{
2158 +       int err;
2159 +       struct inode *i;
2160 +       aufs_bindex_t bstart, bend;
2161 +       unsigned char verbose;
2162 +
2163 +       err = 0;
2164 +       verbose = !!au_opt_test(au_mntflags(sb), VERBOSE);
2165 +       list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
2166 +               AuDebugOn(!atomic_read(&i->i_count));
2167 +               if (!list_empty(&i->i_dentry))
2168 +                       continue;
2169 +
2170 +               if (au_iigen(i) == sigen)
2171 +                       ii_read_lock_child(i);
2172 +               else {
2173 +                       ii_write_lock_child(i);
2174 +                       err = au_refresh_hinode_self(i, /*do_attr*/1);
2175 +                       if (!err)
2176 +                               ii_downgrade_lock(i);
2177 +                       else {
2178 +                               ii_write_unlock(i);
2179 +                               break;
2180 +                       }
2181 +               }
2182 +
2183 +               bstart = au_ibstart(i);
2184 +               bend = au_ibend(i);
2185 +               if (bstart <= bindex
2186 +                   && bindex <= bend
2187 +                   && au_h_iptr(i, bindex)
2188 +                   && (!S_ISDIR(i->i_mode) || bstart == bend)) {
2189 +                       err = -EBUSY;
2190 +                       AuVerbose(verbose, "busy i%lu\n", i->i_ino);
2191 +                       ii_read_unlock(i);
2192 +                       break;
2193 +               }
2194 +               ii_read_unlock(i);
2195 +       }
2196 +
2197 +       return err;
2198 +}
2199 +
2200 +static int test_children_busy(struct dentry *root, aufs_bindex_t bindex)
2201 +{
2202 +       int err;
2203 +       unsigned int sigen;
2204 +
2205 +       sigen = au_sigen(root->d_sb);
2206 +       DiMustNoWaiters(root);
2207 +       IiMustNoWaiters(root->d_inode);
2208 +       di_write_unlock(root);
2209 +       err = test_dentry_busy(root, bindex, sigen);
2210 +       if (!err)
2211 +               err = test_inode_busy(root->d_sb, bindex, sigen);
2212 +       di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
2213 +
2214 +       return err;
2215 +}
2216 +
2217 +static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
2218 +                            const aufs_bindex_t bindex,
2219 +                            const aufs_bindex_t bend)
2220 +{
2221 +       struct au_branch **brp, **p;
2222 +
2223 +       AuRwMustWriteLock(&sbinfo->si_rwsem);
2224 +
2225 +       brp = sbinfo->si_branch + bindex;
2226 +       if (bindex < bend)
2227 +               memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
2228 +       sbinfo->si_branch[0 + bend] = NULL;
2229 +       sbinfo->si_bend--;
2230 +
2231 +       p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, GFP_NOFS);
2232 +       if (p)
2233 +               sbinfo->si_branch = p;
2234 +}
2235 +
2236 +static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
2237 +                            const aufs_bindex_t bend)
2238 +{
2239 +       struct au_hdentry *hdp, *p;
2240 +
2241 +       AuRwMustWriteLock(&dinfo->di_rwsem);
2242 +
2243 +       hdp = dinfo->di_hdentry + bindex;
2244 +       if (bindex < bend)
2245 +               memmove(hdp, hdp + 1, sizeof(*hdp) * (bend - bindex));
2246 +       dinfo->di_hdentry[0 + bend].hd_dentry = NULL;
2247 +       dinfo->di_bend--;
2248 +
2249 +       p = krealloc(dinfo->di_hdentry, sizeof(*p) * bend, GFP_NOFS);
2250 +       if (p)
2251 +               dinfo->di_hdentry = p;
2252 +}
2253 +
2254 +static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
2255 +                            const aufs_bindex_t bend)
2256 +{
2257 +       struct au_hinode *hip, *p;
2258 +
2259 +       AuRwMustWriteLock(&iinfo->ii_rwsem);
2260 +
2261 +       hip = iinfo->ii_hinode + bindex;
2262 +       if (bindex < bend)
2263 +               memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
2264 +       iinfo->ii_hinode[0 + bend].hi_inode = NULL;
2265 +       au_hin_init(iinfo->ii_hinode + bend, NULL);
2266 +       iinfo->ii_bend--;
2267 +
2268 +       p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, GFP_NOFS);
2269 +       if (p)
2270 +               iinfo->ii_hinode = p;
2271 +}
2272 +
2273 +static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
2274 +                        struct au_branch *br)
2275 +{
2276 +       aufs_bindex_t bend;
2277 +       struct au_sbinfo *sbinfo;
2278 +       struct dentry *root;
2279 +       struct inode *inode;
2280 +
2281 +       SiMustWriteLock(sb);
2282 +
2283 +       root = sb->s_root;
2284 +       inode = root->d_inode;
2285 +       au_plink_block_maintain(sb);
2286 +       sbinfo = au_sbi(sb);
2287 +       bend = sbinfo->si_bend;
2288 +
2289 +       dput(au_h_dptr(root, bindex));
2290 +       au_hiput(au_hi(inode, bindex));
2291 +       au_br_do_free(br);
2292 +
2293 +       au_br_do_del_brp(sbinfo, bindex, bend);
2294 +       au_br_do_del_hdp(au_di(root), bindex, bend);
2295 +       au_br_do_del_hip(au_ii(inode), bindex, bend);
2296 +}
2297 +
2298 +int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
2299 +{
2300 +       int err, rerr, i;
2301 +       unsigned int mnt_flags;
2302 +       aufs_bindex_t bindex, bend, br_id;
2303 +       unsigned char do_wh, verbose;
2304 +       struct au_branch *br;
2305 +       struct au_wbr *wbr;
2306 +
2307 +       err = 0;
2308 +       bindex = au_find_dbindex(sb->s_root, del->h_path.dentry);
2309 +       if (bindex < 0) {
2310 +               if (remount)
2311 +                       goto out; /* success */
2312 +               err = -ENOENT;
2313 +               AuErr("%s no such branch\n", del->pathname);
2314 +               goto out;
2315 +       }
2316 +       AuDbg("bindex b%d\n", bindex);
2317 +
2318 +       err = -EBUSY;
2319 +       mnt_flags = au_mntflags(sb);
2320 +       verbose = !!au_opt_test(mnt_flags, VERBOSE);
2321 +       bend = au_sbend(sb);
2322 +       if (unlikely(!bend)) {
2323 +               AuVerbose(verbose, "no more branches left\n");
2324 +               goto out;
2325 +       }
2326 +       br = au_sbr(sb, bindex);
2327 +       i = atomic_read(&br->br_count);
2328 +       if (unlikely(i)) {
2329 +               AuVerbose(verbose, "%d file(s) opened\n", i);
2330 +               goto out;
2331 +       }
2332 +
2333 +       wbr = br->br_wbr;
2334 +       do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
2335 +       if (do_wh) {
2336 +               /* instead of WbrWhMustWriteLock(wbr) */
2337 +               SiMustWriteLock(sb);
2338 +               for (i = 0; i < AuBrWh_Last; i++) {
2339 +                       dput(wbr->wbr_wh[i]);
2340 +                       wbr->wbr_wh[i] = NULL;
2341 +               }
2342 +       }
2343 +
2344 +       err = test_children_busy(sb->s_root, bindex);
2345 +       if (unlikely(err)) {
2346 +               if (do_wh)
2347 +                       goto out_wh;
2348 +               goto out;
2349 +       }
2350 +
2351 +       err = 0;
2352 +       br_id = br->br_id;
2353 +       if (!remount)
2354 +               au_br_do_del(sb, bindex, br);
2355 +       else {
2356 +               sysaufs_brs_del(sb, bindex);
2357 +               au_br_do_del(sb, bindex, br);
2358 +               sysaufs_brs_add(sb, bindex);
2359 +       }
2360 +
2361 +       if (!bindex) {
2362 +               au_cpup_attr_all(sb->s_root->d_inode, /*force*/1);
2363 +               sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
2364 +       } else
2365 +               au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode);
2366 +       if (au_opt_test(mnt_flags, PLINK))
2367 +               au_plink_half_refresh(sb, br_id);
2368 +
2369 +       if (au_xino_brid(sb) == br->br_id)
2370 +               au_xino_brid_set(sb, -1);
2371 +       goto out; /* success */
2372 +
2373 + out_wh:
2374 +       /* revert */
2375 +       rerr = au_br_init_wh(sb, br, br->br_perm, del->h_path.dentry);
2376 +       if (rerr)
2377 +               AuWarn("failed re-creating base whiteout, %s. (%d)\n",
2378 +                      del->pathname, rerr);
2379 + out:
2380 +       return err;
2381 +}
2382 +
2383 +/* ---------------------------------------------------------------------- */
2384 +
2385 +/*
2386 + * change a branch permission
2387 + */
2388 +
2389 +static void au_warn_ima(void)
2390 +{
2391 +#ifdef CONFIG_IMA
2392 +       /* since it doesn't support mark_files_ro() */
2393 +       AuWarn("RW -> RO makes IMA to produce wrong message");
2394 +#endif
2395 +}
2396 +
2397 +static int do_need_sigen_inc(int a, int b)
2398 +{
2399 +       return au_br_whable(a) && !au_br_whable(b);
2400 +}
2401 +
2402 +static int need_sigen_inc(int old, int new)
2403 +{
2404 +       return do_need_sigen_inc(old, new)
2405 +               || do_need_sigen_inc(new, old);
2406 +}
2407 +
2408 +static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
2409 +{
2410 +       int err;
2411 +       unsigned long n, ul, bytes, files;
2412 +       aufs_bindex_t bstart;
2413 +       struct file *file, *hf, **a;
2414 +       const int step_bytes = 1024, /* memory allocation unit */
2415 +               step_files = step_bytes / sizeof(*a);
2416 +
2417 +       err = -ENOMEM;
2418 +       n = 0;
2419 +       bytes = step_bytes;
2420 +       files = step_files;
2421 +       a = kmalloc(bytes, GFP_NOFS);
2422 +       if (unlikely(!a))
2423 +               goto out;
2424 +
2425 +       /* no need file_list_lock() since sbinfo is locked? defered? */
2426 +       list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
2427 +               if (special_file(file->f_dentry->d_inode->i_mode))
2428 +                       continue;
2429 +
2430 +               AuDbg("%.*s\n", AuDLNPair(file->f_dentry));
2431 +               fi_read_lock(file);
2432 +               if (unlikely(au_test_mmapped(file))) {
2433 +                       err = -EBUSY;
2434 +                       FiMustNoWaiters(file);
2435 +                       fi_read_unlock(file);
2436 +                       goto out_free;
2437 +               }
2438 +
2439 +               bstart = au_fbstart(file);
2440 +               if (!S_ISREG(file->f_dentry->d_inode->i_mode)
2441 +                   || !(file->f_mode & FMODE_WRITE)
2442 +                   || bstart != bindex) {
2443 +                       FiMustNoWaiters(file);
2444 +                       fi_read_unlock(file);
2445 +                       continue;
2446 +               }
2447 +
2448 +               hf = au_h_fptr(file, bstart);
2449 +               FiMustNoWaiters(file);
2450 +               fi_read_unlock(file);
2451 +
2452 +               if (n < files)
2453 +                       a[n++] = hf;
2454 +               else {
2455 +                       void *p;
2456 +
2457 +                       err = -ENOMEM;
2458 +                       bytes += step_bytes;
2459 +                       files += step_files;
2460 +                       p = krealloc(a, bytes, GFP_NOFS);
2461 +                       if (p) {
2462 +                               a = p;
2463 +                               a[n++] = hf;
2464 +                       } else
2465 +                               goto out_free;
2466 +               }
2467 +       }
2468 +
2469 +       err = 0;
2470 +       if (n)
2471 +               au_warn_ima();
2472 +       for (ul = 0; ul < n; ul++) {
2473 +               /* todo: already flushed? */
2474 +               /* cf. fs/super.c:mark_files_ro() */
2475 +               hf = a[ul];
2476 +               hf->f_mode &= ~FMODE_WRITE;
2477 +               if (!file_check_writeable(hf)) {
2478 +                       file_release_write(hf);
2479 +                       mnt_drop_write(hf->f_vfsmnt);
2480 +               }
2481 +       }
2482 +
2483 + out_free:
2484 +       kfree(a);
2485 + out:
2486 +       return err;
2487 +}
2488 +
2489 +int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
2490 +             int *do_update)
2491 +{
2492 +       int err, rerr;
2493 +       aufs_bindex_t bindex;
2494 +       struct path path;
2495 +       struct dentry *root;
2496 +       struct au_branch *br;
2497 +
2498 +       root = sb->s_root;
2499 +       au_plink_block_maintain(sb);
2500 +       bindex = au_find_dbindex(root, mod->h_root);
2501 +       if (bindex < 0) {
2502 +               if (remount)
2503 +                       return 0; /* success */
2504 +               err = -ENOENT;
2505 +               AuErr("%s no such branch\n", mod->path);
2506 +               goto out;
2507 +       }
2508 +       AuDbg("bindex b%d\n", bindex);
2509 +
2510 +       err = test_br(mod->h_root->d_inode, mod->perm, mod->path);
2511 +       if (unlikely(err))
2512 +               goto out;
2513 +
2514 +       br = au_sbr(sb, bindex);
2515 +       if (br->br_perm == mod->perm)
2516 +               return 0; /* success */
2517 +
2518 +       if (au_br_writable(br->br_perm)) {
2519 +               /* remove whiteout base */
2520 +               err = au_br_init_wh(sb, br, mod->perm, mod->h_root);
2521 +               if (unlikely(err))
2522 +                       goto out;
2523 +
2524 +               if (!au_br_writable(mod->perm)) {
2525 +                       /* rw --> ro, file might be mmapped */
2526 +                       DiMustNoWaiters(root);
2527 +                       IiMustNoWaiters(root->d_inode);
2528 +                       di_write_unlock(root);
2529 +                       err = au_br_mod_files_ro(sb, bindex);
2530 +                       /* aufs_write_lock() calls ..._child() */
2531 +                       di_write_lock_child(root);
2532 +
2533 +                       if (unlikely(err)) {
2534 +                               rerr = -ENOMEM;
2535 +                               br->br_wbr = kmalloc(sizeof(*br->br_wbr),
2536 +                                                    GFP_NOFS);
2537 +                               if (br->br_wbr) {
2538 +                                       path.mnt = br->br_mnt;
2539 +                                       path.dentry = mod->h_root;
2540 +                                       rerr = au_wbr_init(br, sb, br->br_perm,
2541 +                                                          &path);
2542 +                               }
2543 +                               if (unlikely(rerr)) {
2544 +                                       AuIOErr("nested error %d (%d)\n",
2545 +                                               rerr, err);
2546 +                                       br->br_perm = mod->perm;
2547 +                               }
2548 +                       }
2549 +               }
2550 +       } else if (au_br_writable(mod->perm)) {
2551 +               /* ro --> rw */
2552 +               err = -ENOMEM;
2553 +               br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
2554 +               if (br->br_wbr) {
2555 +                       path.mnt = br->br_mnt;
2556 +                       path.dentry = mod->h_root;
2557 +                       err = au_wbr_init(br, sb, mod->perm, &path);
2558 +                       if (unlikely(err)) {
2559 +                               kfree(br->br_wbr);
2560 +                               br->br_wbr = NULL;
2561 +                       }
2562 +               }
2563 +       }
2564 +
2565 +       if (!err) {
2566 +               *do_update |= need_sigen_inc(br->br_perm, mod->perm);
2567 +               br->br_perm = mod->perm;
2568 +       }
2569 +
2570 + out:
2571 +       return err;
2572 +}
2573 diff -uprN -x .git linux-2.6.31/fs/aufs/branch.h aufs2-2.6.git/fs/aufs/branch.h
2574 --- linux-2.6.31/fs/aufs/branch.h       1970-01-01 00:00:00.000000000 +0000
2575 +++ aufs2-2.6.git/fs/aufs/branch.h      2009-09-21 21:49:23.377863284 +0000
2576 @@ -0,0 +1,219 @@
2577 +/*
2578 + * Copyright (C) 2005-2009 Junjiro R. Okajima
2579 + *
2580 + * This program, aufs is free software; you can redistribute it and/or modify
2581 + * it under the terms of the GNU General Public License as published by
2582 + * the Free Software Foundation; either version 2 of the License, or
2583 + * (at your option) any later version.
2584 + *
2585 + * This program is distributed in the hope that it will be useful,
2586 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
2587 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2588 + * GNU General Public License for more details.
2589 + *
2590 + * You should have received a copy of the GNU General Public License
2591 + * along with this program; if not, write to the Free Software
2592 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
2593 + */
2594 +
2595 +/*
2596 + * branch filesystems and xino for them
2597 + */
2598 +
2599 +#ifndef __AUFS_BRANCH_H__
2600 +#define __AUFS_BRANCH_H__
2601 +
2602 +#ifdef __KERNEL__
2603 +
2604 +#include <linux/fs.h>
2605 +#include <linux/mount.h>
2606 +#include <linux/aufs_type.h>
2607 +#include "rwsem.h"
2608 +#include "super.h"
2609 +
2610 +/* ---------------------------------------------------------------------- */
2611 +
2612 +/* a xino file */
2613 +struct au_xino_file {
2614 +       struct file             *xi_file;
2615 +       struct mutex            xi_nondir_mtx;
2616 +
2617 +       /* todo: make xino files an array to support huge inode number */
2618 +
2619 +#ifdef CONFIG_DEBUG_FS
2620 +       struct dentry            *xi_dbgaufs;
2621 +#endif
2622 +};
2623 +
2624 +/* members for writable branch only */
2625 +enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
2626 +struct au_wbr {
2627 +       struct au_rwsem         wbr_wh_rwsem;
2628 +       struct dentry           *wbr_wh[AuBrWh_Last];
2629 +       atomic_t                wbr_wh_running;
2630 +#define wbr_whbase             wbr_wh[AuBrWh_BASE]     /* whiteout base */
2631 +#define wbr_plink              wbr_wh[AuBrWh_PLINK]    /* pseudo-link dir */
2632 +#define wbr_orph               wbr_wh[AuBrWh_ORPH]     /* dir for orphans */
2633 +
2634 +       /* mfs mode */
2635 +       unsigned long long      wbr_bytes;
2636 +};
2637 +
2638 +/* protected by superblock rwsem */
2639 +struct au_branch {
2640 +       struct au_xino_file     br_xino;
2641 +
2642 +       aufs_bindex_t           br_id;
2643 +
2644 +       int                     br_perm;
2645 +       struct vfsmount         *br_mnt;
2646 +       atomic_t                br_count;
2647 +
2648 +       struct au_wbr           *br_wbr;
2649 +
2650 +       /* xino truncation */
2651 +       blkcnt_t                br_xino_upper;  /* watermark in blocks */
2652 +       atomic_t                br_xino_running;
2653 +
2654 +#ifdef CONFIG_SYSFS
2655 +       /* an entry under sysfs per mount-point */
2656 +       char                    br_name[8];
2657 +       struct attribute        br_attr;
2658 +#endif
2659 +};
2660 +
2661 +/* ---------------------------------------------------------------------- */
2662 +
2663 +/* branch permission and attribute */
2664 +enum {
2665 +       AuBrPerm_RW,            /* writable, linkable wh */
2666 +       AuBrPerm_RO,            /* readonly, no wh */
2667 +       AuBrPerm_RR,            /* natively readonly, no wh */
2668 +
2669 +       AuBrPerm_RWNoLinkWH,    /* un-linkable whiteouts */
2670 +
2671 +       AuBrPerm_ROWH,          /* whiteout-able */
2672 +       AuBrPerm_RRWH,          /* whiteout-able */
2673 +
2674 +       AuBrPerm_Last
2675 +};
2676 +
2677 +static inline int au_br_writable(int brperm)
2678 +{
2679 +       return brperm == AuBrPerm_RW || brperm == AuBrPerm_RWNoLinkWH;
2680 +}
2681 +
2682 +static inline int au_br_whable(int brperm)
2683 +{
2684 +       return brperm == AuBrPerm_RW
2685 +               || brperm == AuBrPerm_ROWH
2686 +               || brperm == AuBrPerm_RRWH;
2687 +}
2688 +
2689 +static inline int au_br_rdonly(struct au_branch *br)
2690 +{
2691 +       return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY)
2692 +               || !au_br_writable(br->br_perm))
2693 +               ? -EROFS : 0;
2694 +}
2695 +
2696 +static inline int au_br_hinotifyable(int brperm __maybe_unused)
2697 +{
2698 +#ifdef CONFIG_AUFS_HINOTIFY
2699 +       return brperm != AuBrPerm_RR && brperm != AuBrPerm_RRWH;
2700 +#else
2701 +       return 0;
2702 +#endif
2703 +}
2704 +
2705 +/* ---------------------------------------------------------------------- */
2706 +
2707 +/* branch.c */
2708 +struct au_sbinfo;
2709 +void au_br_free(struct au_sbinfo *sinfo);
2710 +int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
2711 +struct au_opt_add;
2712 +int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
2713 +struct au_opt_del;
2714 +int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
2715 +struct au_opt_mod;
2716 +int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
2717 +             int *do_update);
2718 +
2719 +/* xino.c */
2720 +static const loff_t au_loff_max = LLONG_MAX;
2721 +
2722 +int au_xib_trunc(struct super_block *sb);
2723 +ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
2724 +                  loff_t *pos);
2725 +ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
2726 +                   loff_t *pos);
2727 +struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
2728 +struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
2729 +ino_t au_xino_new_ino(struct super_block *sb);
2730 +int au_xino_write0(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
2731 +                  ino_t ino);
2732 +int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
2733 +                 ino_t ino);
2734 +int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
2735 +                ino_t *ino);
2736 +int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
2737 +              struct file *base_file, int do_test);
2738 +int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
2739 +
2740 +struct au_opt_xino;
2741 +int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
2742 +void au_xino_clr(struct super_block *sb);
2743 +struct file *au_xino_def(struct super_block *sb);
2744 +int au_xino_path(struct seq_file *seq, struct file *file);
2745 +
2746 +/* ---------------------------------------------------------------------- */
2747 +
2748 +/* Superblock to branch */
2749 +static inline
2750 +aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
2751 +{
2752 +       return au_sbr(sb, bindex)->br_id;
2753 +}
2754 +
2755 +static inline
2756 +struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
2757 +{
2758 +       return au_sbr(sb, bindex)->br_mnt;
2759 +}
2760 +
2761 +static inline
2762 +struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
2763 +{
2764 +       return au_sbr_mnt(sb, bindex)->mnt_sb;
2765 +}
2766 +
2767 +static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
2768 +{
2769 +       atomic_dec_return(&au_sbr(sb, bindex)->br_count);
2770 +}
2771 +
2772 +static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
2773 +{
2774 +       return au_sbr(sb, bindex)->br_perm;
2775 +}
2776 +
2777 +static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
2778 +{
2779 +       return au_br_whable(au_sbr_perm(sb, bindex));
2780 +}
2781 +
2782 +/* ---------------------------------------------------------------------- */
2783 +
2784 +/*
2785 + * wbr_wh_read_lock, wbr_wh_write_lock
2786 + * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
2787 + */
2788 +AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
2789 +
2790 +#define WbrWhMustNoWaiters(wbr)        AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
2791 +#define WbrWhMustAnyLock(wbr)  AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
2792 +#define WbrWhMustWriteLock(wbr)        AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
2793 +
2794 +#endif /* __KERNEL__ */
2795 +#endif /* __AUFS_BRANCH_H__ */
2796 diff -uprN -x .git linux-2.6.31/fs/aufs/cpup.c aufs2-2.6.git/fs/aufs/cpup.c
2797 --- linux-2.6.31/fs/aufs/cpup.c 1970-01-01 00:00:00.000000000 +0000
2798 +++ aufs2-2.6.git/fs/aufs/cpup.c        2009-09-21 21:49:23.377863284 +0000
2799 @@ -0,0 +1,1048 @@
2800 +/*
2801 + * Copyright (C) 2005-2009 Junjiro R. Okajima
2802 + *
2803 + * This program, aufs is free software; you can redistribute it and/or modify
2804 + * it under the terms of the GNU General Public License as published by
2805 + * the Free Software Foundation; either version 2 of the License, or
2806 + * (at your option) any later version.
2807 + *
2808 + * This program is distributed in the hope that it will be useful,
2809 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
2810 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2811 + * GNU General Public License for more details.
2812 + *
2813 + * You should have received a copy of the GNU General Public License
2814 + * along with this program; if not, write to the Free Software
2815 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
2816 + */
2817 +
2818 +/*
2819 + * copy-up functions, see wbr_policy.c for copy-down
2820 + */
2821 +
2822 +#include <linux/file.h>
2823 +#include <linux/fs_stack.h>
2824 +#include <linux/mm.h>
2825 +#include <linux/uaccess.h>
2826 +#include "aufs.h"
2827 +
2828 +void au_cpup_attr_flags(struct inode *dst, struct inode *src)
2829 +{
2830 +       const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
2831 +               | S_NOATIME | S_NOCMTIME;
2832 +
2833 +       dst->i_flags |= src->i_flags & ~mask;
2834 +       if (au_test_fs_notime(dst->i_sb))
2835 +               dst->i_flags |= S_NOATIME | S_NOCMTIME;
2836 +}
2837 +
2838 +void au_cpup_attr_timesizes(struct inode *inode)
2839 +{
2840 +       struct inode *h_inode;
2841 +
2842 +       h_inode = au_h_iptr(inode, au_ibstart(inode));
2843 +       fsstack_copy_attr_times(inode, h_inode);
2844 +       vfsub_copy_inode_size(inode, h_inode);
2845 +}
2846 +
2847 +void au_cpup_attr_nlink(struct inode *inode, int force)
2848 +{
2849 +       struct inode *h_inode;
2850 +       struct super_block *sb;
2851 +       aufs_bindex_t bindex, bend;
2852 +
2853 +       sb = inode->i_sb;
2854 +       bindex = au_ibstart(inode);
2855 +       h_inode = au_h_iptr(inode, bindex);
2856 +       if (!force
2857 +           && !S_ISDIR(h_inode->i_mode)
2858 +           && au_opt_test(au_mntflags(sb), PLINK)
2859 +           && au_plink_test(inode))
2860 +               return;
2861 +
2862 +       inode->i_nlink = h_inode->i_nlink;
2863 +
2864 +       /*
2865 +        * fewer nlink makes find(1) noisy, but larger nlink doesn't.
2866 +        * it may includes whplink directory.
2867 +        */
2868 +       if (S_ISDIR(h_inode->i_mode)) {
2869 +               bend = au_ibend(inode);
2870 +               for (bindex++; bindex <= bend; bindex++) {
2871 +                       h_inode = au_h_iptr(inode, bindex);
2872 +                       if (h_inode)
2873 +                               au_add_nlink(inode, h_inode);
2874 +               }
2875 +       }
2876 +}
2877 +
2878 +void au_cpup_attr_changeable(struct inode *inode)
2879 +{
2880 +       struct inode *h_inode;
2881 +
2882 +       h_inode = au_h_iptr(inode, au_ibstart(inode));
2883 +       inode->i_mode = h_inode->i_mode;
2884 +       inode->i_uid = h_inode->i_uid;
2885 +       inode->i_gid = h_inode->i_gid;
2886 +       au_cpup_attr_timesizes(inode);
2887 +       au_cpup_attr_flags(inode, h_inode);
2888 +}
2889 +
2890 +void au_cpup_igen(struct inode *inode, struct inode *h_inode)
2891 +{
2892 +       struct au_iinfo *iinfo = au_ii(inode);
2893 +
2894 +       IiMustWriteLock(inode);
2895 +
2896 +       iinfo->ii_higen = h_inode->i_generation;
2897 +       iinfo->ii_hsb1 = h_inode->i_sb;
2898 +}
2899 +
2900 +void au_cpup_attr_all(struct inode *inode, int force)
2901 +{
2902 +       struct inode *h_inode;
2903 +
2904 +       h_inode = au_h_iptr(inode, au_ibstart(inode));
2905 +       au_cpup_attr_changeable(inode);
2906 +       if (inode->i_nlink > 0)
2907 +               au_cpup_attr_nlink(inode, force);
2908 +       inode->i_rdev = h_inode->i_rdev;
2909 +       inode->i_blkbits = h_inode->i_blkbits;
2910 +       au_cpup_igen(inode, h_inode);
2911 +}
2912 +
2913 +/* ---------------------------------------------------------------------- */
2914 +
2915 +/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
2916 +
2917 +/* keep the timestamps of the parent dir when cpup */
2918 +void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
2919 +                   struct path *h_path)
2920 +{
2921 +       struct inode *h_inode;
2922 +
2923 +       dt->dt_dentry = dentry;
2924 +       dt->dt_h_path = *h_path;
2925 +       h_inode = h_path->dentry->d_inode;
2926 +       dt->dt_atime = h_inode->i_atime;
2927 +       dt->dt_mtime = h_inode->i_mtime;
2928 +       /* smp_mb(); */
2929 +}
2930 +
2931 +void au_dtime_revert(struct au_dtime *dt)
2932 +{
2933 +       struct iattr attr;
2934 +       int err;
2935 +
2936 +       attr.ia_atime = dt->dt_atime;
2937 +       attr.ia_mtime = dt->dt_mtime;
2938 +       attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
2939 +               | ATTR_ATIME | ATTR_ATIME_SET;
2940 +
2941 +       err = vfsub_notify_change(&dt->dt_h_path, &attr);
2942 +       if (unlikely(err))
2943 +               AuWarn("restoring timestamps failed(%d). ignored\n", err);
2944 +}
2945 +
2946 +/* ---------------------------------------------------------------------- */
2947 +
2948 +static noinline_for_stack
2949 +int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src)
2950 +{
2951 +       int err, sbits;
2952 +       struct iattr ia;
2953 +       struct path h_path;
2954 +       struct inode *h_isrc, *h_idst;
2955 +
2956 +       h_path.dentry = au_h_dptr(dst, bindex);
2957 +       h_idst = h_path.dentry->d_inode;
2958 +       h_path.mnt = au_sbr_mnt(dst->d_sb, bindex);
2959 +       h_isrc = h_src->d_inode;
2960 +       ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
2961 +               | ATTR_ATIME | ATTR_MTIME
2962 +               | ATTR_ATIME_SET | ATTR_MTIME_SET;
2963 +       ia.ia_uid = h_isrc->i_uid;
2964 +       ia.ia_gid = h_isrc->i_gid;
2965 +       ia.ia_atime = h_isrc->i_atime;
2966 +       ia.ia_mtime = h_isrc->i_mtime;
2967 +       if (h_idst->i_mode != h_isrc->i_mode
2968 +           && !S_ISLNK(h_idst->i_mode)) {
2969 +               ia.ia_valid |= ATTR_MODE;
2970 +               ia.ia_mode = h_isrc->i_mode;
2971 +       }
2972 +       sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
2973 +       au_cpup_attr_flags(h_idst, h_isrc);
2974 +       err = vfsub_notify_change(&h_path, &ia);
2975 +
2976 +       /* is this nfs only? */
2977 +       if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
2978 +               ia.ia_valid = ATTR_FORCE | ATTR_MODE;
2979 +               ia.ia_mode = h_isrc->i_mode;
2980 +               err = vfsub_notify_change(&h_path, &ia);
2981 +       }
2982 +
2983 +       return err;
2984 +}
2985 +
2986 +/* ---------------------------------------------------------------------- */
2987 +
2988 +static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
2989 +                          char *buf, unsigned long blksize)
2990 +{
2991 +       int err;
2992 +       size_t sz, rbytes, wbytes;
2993 +       unsigned char all_zero;
2994 +       char *p, *zp;
2995 +       struct mutex *h_mtx;
2996 +       /* reduce stack usage */
2997 +       struct iattr *ia;
2998 +
2999 +       zp = page_address(ZERO_PAGE(0));
3000 +       if (unlikely(!zp))
3001 +               return -ENOMEM; /* possible? */
3002 +
3003 +       err = 0;
3004 +       all_zero = 0;
3005 +       while (len) {
3006 +               AuDbg("len %lld\n", len);
3007 +               sz = blksize;
3008 +               if (len < blksize)
3009 +                       sz = len;
3010 +
3011 +               rbytes = 0;
3012 +               /* todo: signal_pending? */
3013 +               while (!rbytes || err == -EAGAIN || err == -EINTR) {
3014 +                       rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
3015 +                       err = rbytes;
3016 +               }
3017 +               if (unlikely(err < 0))
3018 +                       break;
3019 +
3020 +               all_zero = 0;
3021 +               if (len >= rbytes && rbytes == blksize)
3022 +                       all_zero = !memcmp(buf, zp, rbytes);
3023 +               if (!all_zero) {
3024 +                       wbytes = rbytes;
3025 +                       p = buf;
3026 +                       while (wbytes) {
3027 +                               size_t b;
3028 +
3029 +                               b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
3030 +                               err = b;
3031 +                               /* todo: signal_pending? */
3032 +                               if (unlikely(err == -EAGAIN || err == -EINTR))
3033 +                                       continue;
3034 +                               if (unlikely(err < 0))
3035 +                                       break;
3036 +                               wbytes -= b;
3037 +                               p += b;
3038 +                       }
3039 +               } else {
3040 +                       loff_t res;
3041 +
3042 +                       AuLabel(hole);
3043 +                       res = vfsub_llseek(dst, rbytes, SEEK_CUR);
3044 +                       err = res;
3045 +                       if (unlikely(res < 0))
3046 +                               break;
3047 +               }
3048 +               len -= rbytes;
3049 +               err = 0;
3050 +       }
3051 +
3052 +       /* the last block may be a hole */
3053 +       if (!err && all_zero) {
3054 +               AuLabel(last hole);
3055 +
3056 +               err = 1;
3057 +               if (au_test_nfs(dst->f_dentry->d_sb)) {
3058 +                       /* nfs requires this step to make last hole */
3059 +                       /* is this only nfs? */
3060 +                       do {
3061 +                               /* todo: signal_pending? */
3062 +                               err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
3063 +                       } while (err == -EAGAIN || err == -EINTR);
3064 +                       if (err == 1)
3065 +                               dst->f_pos--;
3066 +               }
3067 +
3068 +               if (err == 1) {
3069 +                       ia = (void *)buf;
3070 +                       ia->ia_size = dst->f_pos;
3071 +                       ia->ia_valid = ATTR_SIZE | ATTR_FILE;
3072 +                       ia->ia_file = dst;
3073 +                       h_mtx = &dst->f_dentry->d_inode->i_mutex;
3074 +                       mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
3075 +                       err = vfsub_notify_change(&dst->f_path, ia);
3076 +                       mutex_unlock(h_mtx);
3077 +               }
3078 +       }
3079 +
3080 +       return err;
3081 +}
3082 +
3083 +int au_copy_file(struct file *dst, struct file *src, loff_t len)
3084 +{
3085 +       int err;
3086 +       unsigned long blksize;
3087 +       unsigned char do_kfree;
3088 +       char *buf;
3089 +
3090 +       err = -ENOMEM;
3091 +       blksize = dst->f_dentry->d_sb->s_blocksize;
3092 +       if (!blksize || PAGE_SIZE < blksize)
3093 +               blksize = PAGE_SIZE;
3094 +       AuDbg("blksize %lu\n", blksize);
3095 +       do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
3096 +       if (do_kfree)
3097 +               buf = kmalloc(blksize, GFP_NOFS);
3098 +       else
3099 +               buf = (void *)__get_free_page(GFP_NOFS);
3100 +       if (unlikely(!buf))
3101 +               goto out;
3102 +
3103 +       if (len > (1 << 22))
3104 +               AuDbg("copying a large file %lld\n", (long long)len);
3105 +
3106 +       src->f_pos = 0;
3107 +       dst->f_pos = 0;
3108 +       err = au_do_copy_file(dst, src, len, buf, blksize);
3109 +       if (do_kfree)
3110 +               kfree(buf);
3111 +       else
3112 +               free_page((unsigned long)buf);
3113 +
3114 + out:
3115 +       return err;
3116 +}
3117 +
3118 +/*
3119 + * to support a sparse file which is opened with O_APPEND,
3120 + * we need to close the file.
3121 + */
3122 +static int au_cp_regular(struct dentry *dentry, aufs_bindex_t bdst,
3123 +                       aufs_bindex_t bsrc, loff_t len)
3124 +{
3125 +       int err, i;
3126 +       enum { SRC, DST };
3127 +       struct {
3128 +               aufs_bindex_t bindex;
3129 +               unsigned int flags;
3130 +               struct dentry *dentry;
3131 +               struct file *file;
3132 +               void *label, *label_file;
3133 +       } *f, file[] = {
3134 +               {
3135 +                       .bindex = bsrc,
3136 +                       .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
3137 +                       .file = NULL,
3138 +                       .label = &&out,
3139 +                       .label_file = &&out_src
3140 +               },
3141 +               {
3142 +                       .bindex = bdst,
3143 +                       .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
3144 +                       .file = NULL,
3145 +                       .label = &&out_src,
3146 +                       .label_file = &&out_dst
3147 +               }
3148 +       };
3149 +       struct super_block *sb;
3150 +
3151 +       /* bsrc branch can be ro/rw. */
3152 +       sb = dentry->d_sb;
3153 +       f = file;
3154 +       for (i = 0; i < 2; i++, f++) {
3155 +               f->dentry = au_h_dptr(dentry, f->bindex);
3156 +               f->file = au_h_open(dentry, f->bindex, f->flags, /*file*/NULL);
3157 +               err = PTR_ERR(f->file);
3158 +               if (IS_ERR(f->file))
3159 +                       goto *f->label;
3160 +               err = -EINVAL;
3161 +               if (unlikely(!f->file->f_op))
3162 +                       goto *f->label_file;
3163 +       }
3164 +
3165 +       /* try stopping to update while we copyup */
3166 +       IMustLock(file[SRC].dentry->d_inode);
3167 +       err = au_copy_file(file[DST].file, file[SRC].file, len);
3168 +
3169 + out_dst:
3170 +       fput(file[DST].file);
3171 +       au_sbr_put(sb, file[DST].bindex);
3172 + out_src:
3173 +       fput(file[SRC].file);
3174 +       au_sbr_put(sb, file[SRC].bindex);
3175 + out:
3176 +       return err;
3177 +}
3178 +
3179 +static int au_do_cpup_regular(struct dentry *dentry, aufs_bindex_t bdst,
3180 +                             aufs_bindex_t bsrc, loff_t len,
3181 +                             struct inode *h_dir, struct path *h_path)
3182 +{
3183 +       int err, rerr;
3184 +       loff_t l;
3185 +
3186 +       err = 0;
3187 +       l = i_size_read(au_h_iptr(dentry->d_inode, bsrc));
3188 +       if (len == -1 || l < len)
3189 +               len = l;
3190 +       if (len)
3191 +               err = au_cp_regular(dentry, bdst, bsrc, len);
3192 +       if (!err)
3193 +               goto out; /* success */
3194 +
3195 +       rerr = vfsub_unlink(h_dir, h_path, /*force*/0);
3196 +       if (rerr) {
3197 +               AuIOErr("failed unlinking cpup-ed %.*s(%d, %d)\n",
3198 +                       AuDLNPair(h_path->dentry), err, rerr);
3199 +               err = -EIO;
3200 +       }
3201 +
3202 + out:
3203 +       return err;
3204 +}
3205 +
3206 +static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
3207 +                             struct inode *h_dir)
3208 +{
3209 +       int err, symlen;
3210 +       mm_segment_t old_fs;
3211 +       char *sym;
3212 +
3213 +       err = -ENOSYS;
3214 +       if (unlikely(!h_src->d_inode->i_op->readlink))
3215 +               goto out;
3216 +
3217 +       err = -ENOMEM;
3218 +       sym = __getname();
3219 +       if (unlikely(!sym))
3220 +               goto out;
3221 +
3222 +       old_fs = get_fs();
3223 +       set_fs(KERNEL_DS);
3224 +       symlen = h_src->d_inode->i_op->readlink(h_src, (char __user *)sym,
3225 +                                               PATH_MAX);
3226 +       err = symlen;
3227 +       set_fs(old_fs);
3228 +
3229 +       if (symlen > 0) {
3230 +               sym[symlen] = 0;
3231 +               err = vfsub_symlink(h_dir, h_path, sym);
3232 +       }
3233 +       __putname(sym);
3234 +
3235 + out:
3236 +       return err;
3237 +}
3238 +
3239 +/* return with the lower dst inode is locked */
3240 +static noinline_for_stack
3241 +int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst,
3242 +              aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3243 +              struct dentry *dst_parent)
3244 +{
3245 +       int err;
3246 +       umode_t mode;
3247 +       unsigned int mnt_flags;
3248 +       unsigned char isdir;
3249 +       const unsigned char do_dt = !!au_ftest_cpup(flags, DTIME);
3250 +       struct au_dtime dt;
3251 +       struct path h_path;
3252 +       struct dentry *h_src, *h_dst, *h_parent;
3253 +       struct inode *h_inode, *h_dir;
3254 +       struct super_block *sb;
3255 +
3256 +       /* bsrc branch can be ro/rw. */
3257 +       h_src = au_h_dptr(dentry, bsrc);
3258 +       h_inode = h_src->d_inode;
3259 +       AuDebugOn(h_inode != au_h_iptr(dentry->d_inode, bsrc));
3260 +
3261 +       /* try stopping to be referenced while we are creating */
3262 +       h_dst = au_h_dptr(dentry, bdst);
3263 +       h_parent = h_dst->d_parent; /* dir inode is locked */
3264 +       h_dir = h_parent->d_inode;
3265 +       IMustLock(h_dir);
3266 +       AuDebugOn(h_parent != h_dst->d_parent);
3267 +
3268 +       sb = dentry->d_sb;
3269 +       h_path.mnt = au_sbr_mnt(sb, bdst);
3270 +       if (do_dt) {
3271 +               h_path.dentry = h_parent;
3272 +               au_dtime_store(&dt, dst_parent, &h_path);
3273 +       }
3274 +       h_path.dentry = h_dst;
3275 +
3276 +       isdir = 0;
3277 +       mode = h_inode->i_mode;
3278 +       switch (mode & S_IFMT) {
3279 +       case S_IFREG:
3280 +               /* try stopping to update while we are referencing */
3281 +               IMustLock(h_inode);
3282 +               err = vfsub_create(h_dir, &h_path, mode | S_IWUSR);
3283 +               if (!err)
3284 +                       err = au_do_cpup_regular
3285 +                               (dentry, bdst, bsrc, len,
3286 +                                au_h_iptr(dst_parent->d_inode, bdst), &h_path);
3287 +               break;
3288 +       case S_IFDIR:
3289 +               isdir = 1;
3290 +               err = vfsub_mkdir(h_dir, &h_path, mode);
3291 +               if (!err) {
3292 +                       /*
3293 +                        * strange behaviour from the users view,
3294 +                        * particularry setattr case
3295 +                        */
3296 +                       if (au_ibstart(dst_parent->d_inode) == bdst)
3297 +                               au_cpup_attr_nlink(dst_parent->d_inode,
3298 +                                                  /*force*/1);
3299 +                       au_cpup_attr_nlink(dentry->d_inode, /*force*/1);
3300 +               }
3301 +               break;
3302 +       case S_IFLNK:
3303 +               err = au_do_cpup_symlink(&h_path, h_src, h_dir);
3304 +               break;
3305 +       case S_IFCHR:
3306 +       case S_IFBLK:
3307 +               AuDebugOn(!capable(CAP_MKNOD));
3308 +               /*FALLTHROUGH*/
3309 +       case S_IFIFO:
3310 +       case S_IFSOCK:
3311 +               err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
3312 +               break;
3313 +       default:
3314 +               AuIOErr("Unknown inode type 0%o\n", mode);
3315 +               err = -EIO;
3316 +       }
3317 +
3318 +       mnt_flags = au_mntflags(sb);
3319 +       if (!au_opt_test(mnt_flags, UDBA_NONE)
3320 +           && !isdir
3321 +           && au_opt_test(mnt_flags, XINO)
3322 +           && h_inode->i_nlink == 1
3323 +           /* todo: unnecessary? */
3324 +           /* && dentry->d_inode->i_nlink == 1 */
3325 +           && bdst < bsrc
3326 +           && !au_ftest_cpup(flags, KEEPLINO))
3327 +               au_xino_write(sb, bsrc, h_inode->i_ino, /*ino*/0);
3328 +               /* ignore this error */
3329 +
3330 +       if (do_dt)
3331 +               au_dtime_revert(&dt);
3332 +       return err;
3333 +}
3334 +
3335 +/*
3336 + * copyup the @dentry from @bsrc to @bdst.
3337 + * the caller must set the both of lower dentries.
3338 + * @len is for truncating when it is -1 copyup the entire file.
3339 + * in link/rename cases, @dst_parent may be different from the real one.
3340 + */
3341 +static int au_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3342 +                         aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3343 +                         struct dentry *dst_parent)
3344 +{
3345 +       int err, rerr;
3346 +       aufs_bindex_t old_ibstart;
3347 +       unsigned char isdir, plink;
3348 +       struct au_dtime dt;
3349 +       struct path h_path;
3350 +       struct dentry *h_src, *h_dst, *h_parent;
3351 +       struct inode *dst_inode, *h_dir, *inode;
3352 +       struct super_block *sb;
3353 +
3354 +       AuDebugOn(bsrc <= bdst);
3355 +
3356 +       sb = dentry->d_sb;
3357 +       h_path.mnt = au_sbr_mnt(sb, bdst);
3358 +       h_dst = au_h_dptr(dentry, bdst);
3359 +       h_parent = h_dst->d_parent; /* dir inode is locked */
3360 +       h_dir = h_parent->d_inode;
3361 +       IMustLock(h_dir);
3362 +
3363 +       h_src = au_h_dptr(dentry, bsrc);
3364 +       inode = dentry->d_inode;
3365 +
3366 +       if (!dst_parent)
3367 +               dst_parent = dget_parent(dentry);
3368 +       else
3369 +               dget(dst_parent);
3370 +
3371 +       plink = !!au_opt_test(au_mntflags(sb), PLINK);
3372 +       dst_inode = au_h_iptr(inode, bdst);
3373 +       if (dst_inode) {
3374 +               if (unlikely(!plink)) {
3375 +                       err = -EIO;
3376 +                       AuIOErr("i%lu exists on a upper branch "
3377 +                               "but plink is disabled\n", inode->i_ino);
3378 +                       goto out;
3379 +               }
3380 +
3381 +               if (dst_inode->i_nlink) {
3382 +                       const int do_dt = au_ftest_cpup(flags, DTIME);
3383 +
3384 +                       h_src = au_plink_lkup(inode, bdst);
3385 +                       err = PTR_ERR(h_src);
3386 +                       if (IS_ERR(h_src))
3387 +                               goto out;
3388 +                       if (unlikely(!h_src->d_inode)) {
3389 +                               err = -EIO;
3390 +                               AuIOErr("i%lu exists on a upper branch "
3391 +                                       "but plink is broken\n", inode->i_ino);
3392 +                               dput(h_src);
3393 +                               goto out;
3394 +                       }
3395 +
3396 +                       if (do_dt) {
3397 +                               h_path.dentry = h_parent;
3398 +                               au_dtime_store(&dt, dst_parent, &h_path);
3399 +                       }
3400 +                       h_path.dentry = h_dst;
3401 +                       err = vfsub_link(h_src, h_dir, &h_path);
3402 +                       if (do_dt)
3403 +                               au_dtime_revert(&dt);
3404 +                       dput(h_src);
3405 +                       goto out;
3406 +               } else
3407 +                       /* todo: cpup_wh_file? */
3408 +                       /* udba work */
3409 +                       au_update_brange(inode, 1);
3410 +       }
3411 +
3412 +       old_ibstart = au_ibstart(inode);
3413 +       err = cpup_entry(dentry, bdst, bsrc, len, flags, dst_parent);
3414 +       if (unlikely(err))
3415 +               goto out;
3416 +       dst_inode = h_dst->d_inode;
3417 +       mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
3418 +
3419 +       err = cpup_iattr(dentry, bdst, h_src);
3420 +       isdir = S_ISDIR(dst_inode->i_mode);
3421 +       if (!err) {
3422 +               if (bdst < old_ibstart)
3423 +                       au_set_ibstart(inode, bdst);
3424 +               au_set_h_iptr(inode, bdst, au_igrab(dst_inode),
3425 +                             au_hi_flags(inode, isdir));
3426 +               mutex_unlock(&dst_inode->i_mutex);
3427 +               if (!isdir
3428 +                   && h_src->d_inode->i_nlink > 1
3429 +                   && plink)
3430 +                       au_plink_append(inode, bdst, h_dst);
3431 +               goto out; /* success */
3432 +       }
3433 +
3434 +       /* revert */
3435 +       h_path.dentry = h_parent;
3436 +       mutex_unlock(&dst_inode->i_mutex);
3437 +       au_dtime_store(&dt, dst_parent, &h_path);
3438 +       h_path.dentry = h_dst;
3439 +       if (!isdir)
3440 +               rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
3441 +       else
3442 +               rerr = vfsub_rmdir(h_dir, &h_path);
3443 +       au_dtime_revert(&dt);
3444 +       if (rerr) {
3445 +               AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
3446 +               err = -EIO;
3447 +       }
3448 +
3449 + out:
3450 +       dput(dst_parent);
3451 +       return err;
3452 +}
3453 +
3454 +struct au_cpup_single_args {
3455 +       int *errp;
3456 +       struct dentry *dentry;
3457 +       aufs_bindex_t bdst, bsrc;
3458 +       loff_t len;
3459 +       unsigned int flags;
3460 +       struct dentry *dst_parent;
3461 +};
3462 +
3463 +static void au_call_cpup_single(void *args)
3464 +{
3465 +       struct au_cpup_single_args *a = args;
3466 +       *a->errp = au_cpup_single(a->dentry, a->bdst, a->bsrc, a->len,
3467 +                                 a->flags, a->dst_parent);
3468 +}
3469 +
3470 +int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3471 +                      aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3472 +                      struct dentry *dst_parent)
3473 +{
3474 +       int err, wkq_err;
3475 +       umode_t mode;
3476 +       struct dentry *h_dentry;
3477 +
3478 +       h_dentry = au_h_dptr(dentry, bsrc);
3479 +       mode = h_dentry->d_inode->i_mode & S_IFMT;
3480 +       if ((mode != S_IFCHR && mode != S_IFBLK)
3481 +           || capable(CAP_MKNOD))
3482 +               err = au_cpup_single(dentry, bdst, bsrc, len, flags,
3483 +                                    dst_parent);
3484 +       else {
3485 +               struct au_cpup_single_args args = {
3486 +                       .errp           = &err,
3487 +                       .dentry         = dentry,
3488 +                       .bdst           = bdst,
3489 +                       .bsrc           = bsrc,
3490 +                       .len            = len,
3491 +                       .flags          = flags,
3492 +                       .dst_parent     = dst_parent
3493 +               };
3494 +               wkq_err = au_wkq_wait(au_call_cpup_single, &args);
3495 +               if (unlikely(wkq_err))
3496 +                       err = wkq_err;
3497 +       }
3498 +
3499 +       return err;
3500 +}
3501 +
3502 +/*
3503 + * copyup the @dentry from the first active lower branch to @bdst,
3504 + * using au_cpup_single().
3505 + */
3506 +static int au_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3507 +                         unsigned int flags)
3508 +{
3509 +       int err;
3510 +       aufs_bindex_t bsrc, bend;
3511 +
3512 +       bend = au_dbend(dentry);
3513 +       for (bsrc = bdst + 1; bsrc <= bend; bsrc++)
3514 +               if (au_h_dptr(dentry, bsrc))
3515 +                       break;
3516 +
3517 +       err = au_lkup_neg(dentry, bdst);
3518 +       if (!err) {
3519 +               err = au_cpup_single(dentry, bdst, bsrc, len, flags, NULL);
3520 +               if (!err)
3521 +                       return 0; /* success */
3522 +
3523 +               /* revert */
3524 +               au_set_h_dptr(dentry, bdst, NULL);
3525 +               au_set_dbstart(dentry, bsrc);
3526 +       }
3527 +
3528 +       return err;
3529 +}
3530 +
3531 +struct au_cpup_simple_args {
3532 +       int *errp;
3533 +       struct dentry *dentry;
3534 +       aufs_bindex_t bdst;
3535 +       loff_t len;
3536 +       unsigned int flags;
3537 +};
3538 +
3539 +static void au_call_cpup_simple(void *args)
3540 +{
3541 +       struct au_cpup_simple_args *a = args;
3542 +       *a->errp = au_cpup_simple(a->dentry, a->bdst, a->len, a->flags);
3543 +}
3544 +
3545 +int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3546 +                      unsigned int flags)
3547 +{
3548 +       int err, wkq_err;
3549 +       unsigned char do_sio;
3550 +       struct dentry *parent;
3551 +       struct inode *h_dir;
3552 +
3553 +       parent = dget_parent(dentry);
3554 +       h_dir = au_h_iptr(parent->d_inode, bdst);
3555 +       do_sio = !!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE);
3556 +       if (!do_sio) {
3557 +               /*
3558 +                * testing CAP_MKNOD is for generic fs,
3559 +                * but CAP_FSETID is for xfs only, currently.
3560 +                */
3561 +               umode_t mode = dentry->d_inode->i_mode;
3562 +               do_sio = (((mode & (S_IFCHR | S_IFBLK))
3563 +                          && !capable(CAP_MKNOD))
3564 +                         || ((mode & (S_ISUID | S_ISGID))
3565 +                             && !capable(CAP_FSETID)));
3566 +       }
3567 +       if (!do_sio)
3568 +               err = au_cpup_simple(dentry, bdst, len, flags);
3569 +       else {
3570 +               struct au_cpup_simple_args args = {
3571 +                       .errp           = &err,
3572 +                       .dentry         = dentry,
3573 +                       .bdst           = bdst,
3574 +                       .len            = len,
3575 +                       .flags          = flags
3576 +               };
3577 +               wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
3578 +               if (unlikely(wkq_err))
3579 +                       err = wkq_err;
3580 +       }
3581 +
3582 +       dput(parent);
3583 +       return err;
3584 +}
3585 +
3586 +/* ---------------------------------------------------------------------- */
3587 +
3588 +/*
3589 + * copyup the deleted file for writing.
3590 + */
3591 +static int au_do_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst,
3592 +                        struct dentry *wh_dentry, struct file *file,
3593 +                        loff_t len)
3594 +{
3595 +       int err;
3596 +       aufs_bindex_t bstart;
3597 +       struct au_dinfo *dinfo;
3598 +       struct dentry *h_d_dst, *h_d_start;
3599 +
3600 +       dinfo = au_di(dentry);
3601 +       AuRwMustWriteLock(&dinfo->di_rwsem);
3602 +
3603 +       bstart = dinfo->di_bstart;
3604 +       h_d_dst = dinfo->di_hdentry[0 + bdst].hd_dentry;
3605 +       dinfo->di_bstart = bdst;
3606 +       dinfo->di_hdentry[0 + bdst].hd_dentry = wh_dentry;
3607 +       h_d_start = dinfo->di_hdentry[0 + bstart].hd_dentry;
3608 +       if (file)
3609 +               dinfo->di_hdentry[0 + bstart].hd_dentry
3610 +                       = au_h_fptr(file, au_fbstart(file))->f_dentry;
3611 +       err = au_cpup_single(dentry, bdst, bstart, len, !AuCpup_DTIME,
3612 +                            /*h_parent*/NULL);
3613 +       if (!err && file) {
3614 +               err = au_reopen_nondir(file);
3615 +               dinfo->di_hdentry[0 + bstart].hd_dentry = h_d_start;
3616 +       }
3617 +       dinfo->di_hdentry[0 + bdst].hd_dentry = h_d_dst;
3618 +       dinfo->di_bstart = bstart;
3619 +
3620 +       return err;
3621 +}
3622 +
3623 +static int au_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3624 +                     struct file *file)
3625 +{
3626 +       int err;
3627 +       struct au_dtime dt;
3628 +       struct dentry *parent, *h_parent, *wh_dentry;
3629 +       struct au_branch *br;
3630 +       struct path h_path;
3631 +
3632 +       br = au_sbr(dentry->d_sb, bdst);
3633 +       parent = dget_parent(dentry);
3634 +       h_parent = au_h_dptr(parent, bdst);
3635 +       wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
3636 +       err = PTR_ERR(wh_dentry);
3637 +       if (IS_ERR(wh_dentry))
3638 +               goto out;
3639 +
3640 +       h_path.dentry = h_parent;
3641 +       h_path.mnt = br->br_mnt;
3642 +       au_dtime_store(&dt, parent, &h_path);
3643 +       err = au_do_cpup_wh(dentry, bdst, wh_dentry, file, len);
3644 +       if (unlikely(err))
3645 +               goto out_wh;
3646 +
3647 +       dget(wh_dentry);
3648 +       h_path.dentry = wh_dentry;
3649 +       err = vfsub_unlink(h_parent->d_inode, &h_path, /*force*/0);
3650 +       if (unlikely(err)) {
3651 +               AuIOErr("failed remove copied-up tmp file %.*s(%d)\n",
3652 +                       AuDLNPair(wh_dentry), err);
3653 +               err = -EIO;
3654 +       }
3655 +       au_dtime_revert(&dt);
3656 +       au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
3657 +
3658 + out_wh:
3659 +       dput(wh_dentry);
3660 + out:
3661 +       dput(parent);
3662 +       return err;
3663 +}
3664 +
3665 +struct au_cpup_wh_args {
3666 +       int *errp;
3667 +       struct dentry *dentry;
3668 +       aufs_bindex_t bdst;
3669 +       loff_t len;
3670 +       struct file *file;
3671 +};
3672 +
3673 +static void au_call_cpup_wh(void *args)
3674 +{
3675 +       struct au_cpup_wh_args *a = args;
3676 +       *a->errp = au_cpup_wh(a->dentry, a->bdst, a->len, a->file);
3677 +}
3678 +
3679 +int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3680 +                  struct file *file)
3681 +{
3682 +       int err, wkq_err;
3683 +       struct dentry *parent, *h_orph, *h_parent, *h_dentry;
3684 +       struct inode *dir, *h_dir, *h_tmpdir, *h_inode;
3685 +       struct au_wbr *wbr;
3686 +
3687 +       parent = dget_parent(dentry);
3688 +       dir = parent->d_inode;
3689 +       h_orph = NULL;
3690 +       h_parent = NULL;
3691 +       h_dir = au_igrab(au_h_iptr(dir, bdst));
3692 +       h_tmpdir = h_dir;
3693 +       if (!h_dir->i_nlink) {
3694 +               wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
3695 +               h_orph = wbr->wbr_orph;
3696 +
3697 +               h_parent = dget(au_h_dptr(parent, bdst));
3698 +               au_set_h_dptr(parent, bdst, NULL);
3699 +               au_set_h_dptr(parent, bdst, dget(h_orph));
3700 +               h_tmpdir = h_orph->d_inode;
3701 +               au_set_h_iptr(dir, bdst, NULL, 0);
3702 +               au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
3703 +
3704 +               /* this temporary unlock is safe */
3705 +               if (file)
3706 +                       h_dentry = au_h_fptr(file, au_fbstart(file))->f_dentry;
3707 +               else
3708 +                       h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
3709 +               h_inode = h_dentry->d_inode;
3710 +               IMustLock(h_inode);
3711 +               mutex_unlock(&h_inode->i_mutex);
3712 +               mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
3713 +               mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
3714 +       }
3715 +
3716 +       if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE))
3717 +               err = au_cpup_wh(dentry, bdst, len, file);
3718 +       else {
3719 +               struct au_cpup_wh_args args = {
3720 +                       .errp   = &err,
3721 +                       .dentry = dentry,
3722 +                       .bdst   = bdst,
3723 +                       .len    = len,
3724 +                       .file   = file
3725 +               };
3726 +               wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
3727 +               if (unlikely(wkq_err))
3728 +                       err = wkq_err;
3729 +       }
3730 +
3731 +       if (h_orph) {
3732 +               mutex_unlock(&h_tmpdir->i_mutex);
3733 +               au_set_h_iptr(dir, bdst, NULL, 0);
3734 +               au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
3735 +               au_set_h_dptr(parent, bdst, NULL);
3736 +               au_set_h_dptr(parent, bdst, h_parent);
3737 +       }
3738 +       iput(h_dir);
3739 +       dput(parent);
3740 +
3741 +       return err;
3742 +}
3743 +
3744 +/* ---------------------------------------------------------------------- */
3745 +
3746 +/*
3747 + * generic routine for both of copy-up and copy-down.
3748 + */
3749 +/* cf. revalidate function in file.c */
3750 +int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
3751 +              int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
3752 +                        struct dentry *h_parent, void *arg),
3753 +              void *arg)
3754 +{
3755 +       int err;
3756 +       struct au_pin pin;
3757 +       struct dentry *d, *parent, *h_parent, *real_parent;
3758 +
3759 +       err = 0;
3760 +       parent = dget_parent(dentry);
3761 +       if (IS_ROOT(parent))
3762 +               goto out;
3763 +
3764 +       au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
3765 +                   au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
3766 +
3767 +       /* do not use au_dpage */
3768 +       real_parent = parent;
3769 +       while (1) {
3770 +               dput(parent);
3771 +               parent = dget_parent(dentry);
3772 +               h_parent = au_h_dptr(parent, bdst);
3773 +               if (h_parent)
3774 +                       goto out; /* success */
3775 +
3776 +               /* find top dir which is necessary to cpup */
3777 +               do {
3778 +                       d = parent;
3779 +                       dput(parent);
3780 +                       parent = dget_parent(d);
3781 +                       di_read_lock_parent3(parent, !AuLock_IR);
3782 +                       h_parent = au_h_dptr(parent, bdst);
3783 +                       di_read_unlock(parent, !AuLock_IR);
3784 +               } while (!h_parent);
3785 +
3786 +               if (d != real_parent)
3787 +                       di_write_lock_child3(d);
3788 +
3789 +               /* somebody else might create while we were sleeping */
3790 +               if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
3791 +                       if (au_h_dptr(d, bdst))
3792 +                               au_update_dbstart(d);
3793 +
3794 +                       au_pin_set_dentry(&pin, d);
3795 +                       err = au_do_pin(&pin);
3796 +                       if (!err) {
3797 +                               err = cp(d, bdst, h_parent, arg);
3798 +                               au_unpin(&pin);
3799 +                       }
3800 +               }
3801 +
3802 +               if (d != real_parent)
3803 +                       di_write_unlock(d);
3804 +               if (unlikely(err))
3805 +                       break;
3806 +       }
3807 +
3808 + out:
3809 +       dput(parent);
3810 +       return err;
3811 +}
3812 +
3813 +static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
3814 +                      struct dentry *h_parent __maybe_unused ,
3815 +                      void *arg __maybe_unused)
3816 +{
3817 +       return au_sio_cpup_simple(dentry, bdst, -1, AuCpup_DTIME);
3818 +}
3819 +
3820 +int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
3821 +{
3822 +       return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
3823 +}
3824 +
3825 +int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
3826 +{
3827 +       int err;
3828 +       struct dentry *parent;
3829 +       struct inode *dir;
3830 +
3831 +       parent = dget_parent(dentry);
3832 +       dir = parent->d_inode;
3833 +       err = 0;
3834 +       if (au_h_iptr(dir, bdst))
3835 +               goto out;
3836 +
3837 +       di_read_unlock(parent, AuLock_IR);
3838 +       di_write_lock_parent(parent);
3839 +       /* someone else might change our inode while we were sleeping */
3840 +       if (!au_h_iptr(dir, bdst))
3841 +               err = au_cpup_dirs(dentry, bdst);
3842 +       di_downgrade_lock(parent, AuLock_IR);
3843 +
3844 + out:
3845 +       dput(parent);
3846 +       return err;
3847 +}
3848 diff -uprN -x .git linux-2.6.31/fs/aufs/cpup.h aufs2-2.6.git/fs/aufs/cpup.h
3849 --- linux-2.6.31/fs/aufs/cpup.h 1970-01-01 00:00:00.000000000 +0000
3850 +++ aufs2-2.6.git/fs/aufs/cpup.h        2009-09-21 21:49:23.377863284 +0000
3851 @@ -0,0 +1,81 @@
3852 +/*
3853 + * Copyright (C) 2005-2009 Junjiro R. Okajima
3854 + *
3855 + * This program, aufs is free software; you can redistribute it and/or modify
3856 + * it under the terms of the GNU General Public License as published by
3857 + * the Free Software Foundation; either version 2 of the License, or
3858 + * (at your option) any later version.
3859 + *
3860 + * This program is distributed in the hope that it will be useful,
3861 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
3862 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3863 + * GNU General Public License for more details.
3864 + *
3865 + * You should have received a copy of the GNU General Public License
3866 + * along with this program; if not, write to the Free Software
3867 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
3868 + */
3869 +
3870 +/*
3871 + * copy-up/down functions
3872 + */
3873 +
3874 +#ifndef __AUFS_CPUP_H__
3875 +#define __AUFS_CPUP_H__
3876 +
3877 +#ifdef __KERNEL__
3878 +
3879 +#include <linux/path.h>
3880 +#include <linux/time.h>
3881 +#include <linux/aufs_type.h>
3882 +
3883 +struct inode;
3884 +struct file;
3885 +
3886 +void au_cpup_attr_flags(struct inode *dst, struct inode *src);
3887 +void au_cpup_attr_timesizes(struct inode *inode);
3888 +void au_cpup_attr_nlink(struct inode *inode, int force);
3889 +void au_cpup_attr_changeable(struct inode *inode);
3890 +void au_cpup_igen(struct inode *inode, struct inode *h_inode);
3891 +void au_cpup_attr_all(struct inode *inode, int force);
3892 +
3893 +/* ---------------------------------------------------------------------- */
3894 +
3895 +/* cpup flags */
3896 +#define AuCpup_DTIME   1               /* do dtime_store/revert */
3897 +#define AuCpup_KEEPLINO        (1 << 1)        /* do not clear the lower xino,
3898 +                                          for link(2) */
3899 +#define au_ftest_cpup(flags, name)     ((flags) & AuCpup_##name)
3900 +#define au_fset_cpup(flags, name)      { (flags) |= AuCpup_##name; }
3901 +#define au_fclr_cpup(flags, name)      { (flags) &= ~AuCpup_##name; }
3902 +
3903 +int au_copy_file(struct file *dst, struct file *src, loff_t len);
3904 +int au_sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst,
3905 +                      aufs_bindex_t bsrc, loff_t len, unsigned int flags,
3906 +                      struct dentry *dst_parent);
3907 +int au_sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3908 +                      unsigned int flags);
3909 +int au_sio_cpup_wh(struct dentry *dentry, aufs_bindex_t bdst, loff_t len,
3910 +                  struct file *file);
3911 +
3912 +int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
3913 +              int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
3914 +                        struct dentry *h_parent, void *arg),
3915 +              void *arg);
3916 +int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
3917 +int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
3918 +
3919 +/* ---------------------------------------------------------------------- */
3920 +
3921 +/* keep timestamps when copyup */
3922 +struct au_dtime {
3923 +       struct dentry *dt_dentry;
3924 +       struct path dt_h_path;
3925 +       struct timespec dt_atime, dt_mtime;
3926 +};
3927 +void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
3928 +                   struct path *h_path);
3929 +void au_dtime_revert(struct au_dtime *dt);
3930 +
3931 +#endif /* __KERNEL__ */
3932 +#endif /* __AUFS_CPUP_H__ */
3933 diff -uprN -x .git linux-2.6.31/fs/aufs/dbgaufs.c aufs2-2.6.git/fs/aufs/dbgaufs.c
3934 --- linux-2.6.31/fs/aufs/dbgaufs.c      1970-01-01 00:00:00.000000000 +0000
3935 +++ aufs2-2.6.git/fs/aufs/dbgaufs.c     2009-09-21 21:49:23.377863284 +0000
3936 @@ -0,0 +1,331 @@
3937 +/*
3938 + * Copyright (C) 2005-2009 Junjiro R. Okajima
3939 + *
3940 + * This program, aufs is free software; you can redistribute it and/or modify
3941 + * it under the terms of the GNU General Public License as published by
3942 + * the Free Software Foundation; either version 2 of the License, or
3943 + * (at your option) any later version.
3944 + *
3945 + * This program is distributed in the hope that it will be useful,
3946 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
3947 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3948 + * GNU General Public License for more details.
3949 + *
3950 + * You should have received a copy of the GNU General Public License
3951 + * along with this program; if not, write to the Free Software
3952 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
3953 + */
3954 +
3955 +/*
3956 + * debugfs interface
3957 + */
3958 +
3959 +#include <linux/debugfs.h>
3960 +#include "aufs.h"
3961 +
3962 +#ifndef CONFIG_SYSFS
3963 +#error DEBUG_FS depends upon SYSFS
3964 +#endif
3965 +
3966 +static struct dentry *dbgaufs;
3967 +static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
3968 +
3969 +/* 20 is max digits length of ulong 64 */
3970 +struct dbgaufs_arg {
3971 +       int n;
3972 +       char a[20 * 4];
3973 +};
3974 +
3975 +/*
3976 + * common function for all XINO files
3977 + */
3978 +static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
3979 +                             struct file *file)
3980 +{
3981 +       kfree(file->private_data);
3982 +       return 0;
3983 +}
3984 +
3985 +static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
3986 +{
3987 +       int err;
3988 +       struct kstat st;
3989 +       struct dbgaufs_arg *p;
3990 +
3991 +       err = -ENOMEM;
3992 +       p = kmalloc(sizeof(*p), GFP_NOFS);
3993 +       if (unlikely(!p))
3994 +               goto out;
3995 +
3996 +       err = 0;
3997 +       p->n = 0;
3998 +       file->private_data = p;
3999 +       if (!xf)
4000 +               goto out;
4001 +
4002 +       err = vfs_getattr(xf->f_vfsmnt, xf->f_dentry, &st);
4003 +       if (!err) {
4004 +               if (do_fcnt)
4005 +                       p->n = snprintf
4006 +                               (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
4007 +                                (long)file_count(xf), st.blocks, st.blksize,
4008 +                                (long long)st.size);
4009 +               else
4010 +                       p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
4011 +                                       st.blocks, st.blksize,
4012 +                                       (long long)st.size);
4013 +               AuDebugOn(p->n >= sizeof(p->a));
4014 +       } else {
4015 +               p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
4016 +               err = 0;
4017 +       }
4018 +
4019 + out:
4020 +       return err;
4021 +
4022 +}
4023 +
4024 +static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
4025 +                              size_t count, loff_t *ppos)
4026 +{
4027 +       struct dbgaufs_arg *p;
4028 +
4029 +       p = file->private_data;
4030 +       return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
4031 +}
4032 +
4033 +/* ---------------------------------------------------------------------- */
4034 +
4035 +static int dbgaufs_xib_open(struct inode *inode, struct file *file)
4036 +{
4037 +       int err;
4038 +       struct au_sbinfo *sbinfo;
4039 +       struct super_block *sb;
4040 +
4041 +       sbinfo = inode->i_private;
4042 +       sb = sbinfo->si_sb;
4043 +       si_noflush_read_lock(sb);
4044 +       err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
4045 +       si_read_unlock(sb);
4046 +       return err;
4047 +}
4048 +
4049 +static const struct file_operations dbgaufs_xib_fop = {
4050 +       .open           = dbgaufs_xib_open,
4051 +       .release        = dbgaufs_xi_release,
4052 +       .read           = dbgaufs_xi_read
4053 +};
4054 +
4055 +/* ---------------------------------------------------------------------- */
4056 +
4057 +#define DbgaufsXi_PREFIX "xi"
4058 +
4059 +static int dbgaufs_xino_open(struct inode *inode, struct file *file)
4060 +{
4061 +       int err;
4062 +       long l;
4063 +       struct au_sbinfo *sbinfo;
4064 +       struct super_block *sb;
4065 +       struct file *xf;
4066 +       struct qstr *name;
4067 +
4068 +       err = -ENOENT;
4069 +       xf = NULL;
4070 +       name = &file->f_dentry->d_name;
4071 +       if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
4072 +                    || memcmp(name->name, DbgaufsXi_PREFIX,
4073 +                              sizeof(DbgaufsXi_PREFIX) - 1)))
4074 +               goto out;
4075 +       err = strict_strtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
4076 +       if (unlikely(err))
4077 +               goto out;
4078 +
4079 +       sbinfo = inode->i_private;
4080 +       sb = sbinfo->si_sb;
4081 +       si_noflush_read_lock(sb);
4082 +       if (l <= au_sbend(sb)) {
4083 +               xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
4084 +               err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
4085 +       } else
4086 +               err = -ENOENT;
4087 +       si_read_unlock(sb);
4088 +
4089 + out:
4090 +       return err;
4091 +}
4092 +
4093 +static const struct file_operations dbgaufs_xino_fop = {
4094 +       .open           = dbgaufs_xino_open,
4095 +       .release        = dbgaufs_xi_release,
4096 +       .read           = dbgaufs_xi_read
4097 +};
4098 +
4099 +void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
4100 +{
4101 +       aufs_bindex_t bend;
4102 +       struct au_branch *br;
4103 +       struct au_xino_file *xi;
4104 +
4105 +       if (!au_sbi(sb)->si_dbgaufs)
4106 +               return;
4107 +
4108 +       bend = au_sbend(sb);
4109 +       for (; bindex <= bend; bindex++) {
4110 +               br = au_sbr(sb, bindex);
4111 +               xi = &br->br_xino;
4112 +               if (xi->xi_dbgaufs) {
4113 +                       debugfs_remove(xi->xi_dbgaufs);
4114 +                       xi->xi_dbgaufs = NULL;
4115 +               }
4116 +       }
4117 +}
4118 +
4119 +void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
4120 +{
4121 +       struct au_sbinfo *sbinfo;
4122 +       struct dentry *parent;
4123 +       struct au_branch *br;
4124 +       struct au_xino_file *xi;
4125 +       aufs_bindex_t bend;
4126 +       char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
4127 +
4128 +       sbinfo = au_sbi(sb);
4129 +       parent = sbinfo->si_dbgaufs;
4130 +       if (!parent)
4131 +               return;
4132 +
4133 +       bend = au_sbend(sb);
4134 +       for (; bindex <= bend; bindex++) {
4135 +               snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
4136 +               br = au_sbr(sb, bindex);
4137 +               xi = &br->br_xino;
4138 +               AuDebugOn(xi->xi_dbgaufs);
4139 +               xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
4140 +                                                    sbinfo, &dbgaufs_xino_fop);
4141 +               /* ignore an error */
4142 +               if (unlikely(!xi->xi_dbgaufs))
4143 +                       AuWarn1("failed %s under debugfs\n", name);
4144 +       }
4145 +}
4146 +
4147 +/* ---------------------------------------------------------------------- */
4148 +
4149 +#ifdef CONFIG_AUFS_EXPORT
4150 +static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
4151 +{
4152 +       int err;
4153 +       struct au_sbinfo *sbinfo;
4154 +       struct super_block *sb;
4155 +
4156 +       sbinfo = inode->i_private;
4157 +       sb = sbinfo->si_sb;
4158 +       si_noflush_read_lock(sb);
4159 +       err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
4160 +       si_read_unlock(sb);
4161 +       return err;
4162 +}
4163 +
4164 +static const struct file_operations dbgaufs_xigen_fop = {
4165 +       .open           = dbgaufs_xigen_open,
4166 +       .release        = dbgaufs_xi_release,
4167 +       .read           = dbgaufs_xi_read
4168 +};
4169 +
4170 +static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4171 +{
4172 +       int err;
4173 +
4174 +       /*
4175 +        * This function is a dynamic '__init' fucntion actually,
4176 +        * so the tiny check for si_rwsem is unnecessary.
4177 +        */
4178 +       /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4179 +
4180 +       err = -EIO;
4181 +       sbinfo->si_dbgaufs_xigen = debugfs_create_file
4182 +               ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4183 +                &dbgaufs_xigen_fop);
4184 +       if (sbinfo->si_dbgaufs_xigen)
4185 +               err = 0;
4186 +
4187 +       return err;
4188 +}
4189 +#else
4190 +static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
4191 +{
4192 +       return 0;
4193 +}
4194 +#endif /* CONFIG_AUFS_EXPORT */
4195 +
4196 +/* ---------------------------------------------------------------------- */
4197 +
4198 +void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
4199 +{
4200 +       /*
4201 +        * This function is a dynamic '__init' fucntion actually,
4202 +        * so the tiny check for si_rwsem is unnecessary.
4203 +        */
4204 +       /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4205 +
4206 +       debugfs_remove_recursive(sbinfo->si_dbgaufs);
4207 +       sbinfo->si_dbgaufs = NULL;
4208 +       kobject_put(&sbinfo->si_kobj);
4209 +}
4210 +
4211 +int dbgaufs_si_init(struct au_sbinfo *sbinfo)
4212 +{
4213 +       int err;
4214 +       char name[SysaufsSiNameLen];
4215 +
4216 +       /*
4217 +        * This function is a dynamic '__init' fucntion actually,
4218 +        * so the tiny check for si_rwsem is unnecessary.
4219 +        */
4220 +       /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
4221 +
4222 +       err = -ENOENT;
4223 +       if (!dbgaufs) {
4224 +               AuErr1("/debug/aufs is uninitialized\n");
4225 +               goto out;
4226 +       }
4227 +
4228 +       err = -EIO;
4229 +       sysaufs_name(sbinfo, name);
4230 +       sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
4231 +       if (unlikely(!sbinfo->si_dbgaufs))
4232 +               goto out;
4233 +       kobject_get(&sbinfo->si_kobj);
4234 +
4235 +       sbinfo->si_dbgaufs_xib = debugfs_create_file
4236 +               ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
4237 +                &dbgaufs_xib_fop);
4238 +       if (unlikely(!sbinfo->si_dbgaufs_xib))
4239 +               goto out_dir;
4240 +
4241 +       err = dbgaufs_xigen_init(sbinfo);
4242 +       if (!err)
4243 +               goto out; /* success */
4244 +
4245 + out_dir:
4246 +       dbgaufs_si_fin(sbinfo);
4247 + out:
4248 +       return err;
4249 +}
4250 +
4251 +/* ---------------------------------------------------------------------- */
4252 +
4253 +void dbgaufs_fin(void)
4254 +{
4255 +       debugfs_remove(dbgaufs);
4256 +}
4257 +
4258 +int __init dbgaufs_init(void)
4259 +{
4260 +       int err;
4261 +
4262 +       err = -EIO;
4263 +       dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
4264 +       if (dbgaufs)
4265 +               err = 0;
4266 +       return err;
4267 +}
4268 diff -uprN -x .git linux-2.6.31/fs/aufs/dbgaufs.h aufs2-2.6.git/fs/aufs/dbgaufs.h
4269 --- linux-2.6.31/fs/aufs/dbgaufs.h      1970-01-01 00:00:00.000000000 +0000
4270 +++ aufs2-2.6.git/fs/aufs/dbgaufs.h     2009-09-21 21:49:23.377863284 +0000
4271 @@ -0,0 +1,79 @@
4272 +/*
4273 + * Copyright (C) 2005-2009 Junjiro R. Okajima
4274 + *
4275 + * This program, aufs is free software; you can redistribute it and/or modify
4276 + * it under the terms of the GNU General Public License as published by
4277 + * the Free Software Foundation; either version 2 of the License, or
4278 + * (at your option) any later version.
4279 + *
4280 + * This program is distributed in the hope that it will be useful,
4281 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4282 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4283 + * GNU General Public License for more details.
4284 + *
4285 + * You should have received a copy of the GNU General Public License
4286 + * along with this program; if not, write to the Free Software
4287 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
4288 + */
4289 +
4290 +/*
4291 + * debugfs interface
4292 + */
4293 +
4294 +#ifndef __DBGAUFS_H__
4295 +#define __DBGAUFS_H__
4296 +
4297 +#ifdef __KERNEL__
4298 +
4299 +#include <linux/init.h>
4300 +#include <linux/aufs_type.h>
4301 +
4302 +struct super_block;
4303 +struct au_sbinfo;
4304 +
4305 +#ifdef CONFIG_DEBUG_FS
4306 +/* dbgaufs.c */
4307 +void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
4308 +void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
4309 +void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
4310 +int dbgaufs_si_init(struct au_sbinfo *sbinfo);
4311 +void dbgaufs_fin(void);
4312 +int __init dbgaufs_init(void);
4313 +
4314 +#else
4315 +
4316 +static inline
4317 +void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
4318 +{
4319 +       /* empty */
4320 +}
4321 +
4322 +static inline
4323 +void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
4324 +{
4325 +       /* empty */
4326 +}
4327 +
4328 +static inline
4329 +void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
4330 +{
4331 +       /* empty */
4332 +}
4333 +
4334 +static inline
4335 +int dbgaufs_si_init(struct au_sbinfo *sbinfo)
4336 +{
4337 +       return 0;
4338 +}
4339 +
4340 +#define dbgaufs_fin()  do {} while (0)
4341 +
4342 +static inline
4343 +int __init dbgaufs_init(void)
4344 +{
4345 +       return 0;
4346 +}
4347 +#endif /* CONFIG_DEBUG_FS */
4348 +
4349 +#endif /* __KERNEL__ */
4350 +#endif /* __DBGAUFS_H__ */
4351 diff -uprN -x .git linux-2.6.31/fs/aufs/dcsub.c aufs2-2.6.git/fs/aufs/dcsub.c
4352 --- linux-2.6.31/fs/aufs/dcsub.c        1970-01-01 00:00:00.000000000 +0000
4353 +++ aufs2-2.6.git/fs/aufs/dcsub.c       2009-09-21 21:49:23.377863284 +0000
4354 @@ -0,0 +1,223 @@
4355 +/*
4356 + * Copyright (C) 2005-2009 Junjiro R. Okajima
4357 + *
4358 + * This program, aufs is free software; you can redistribute it and/or modify
4359 + * it under the terms of the GNU General Public License as published by
4360 + * the Free Software Foundation; either version 2 of the License, or
4361 + * (at your option) any later version.
4362 + *
4363 + * This program is distributed in the hope that it will be useful,
4364 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4365 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4366 + * GNU General Public License for more details.
4367 + *
4368 + * You should have received a copy of the GNU General Public License
4369 + * along with this program; if not, write to the Free Software
4370 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
4371 + */
4372 +
4373 +/*
4374 + * sub-routines for dentry cache
4375 + */
4376 +
4377 +#include "aufs.h"
4378 +
4379 +static void au_dpage_free(struct au_dpage *dpage)
4380 +{
4381 +       int i;
4382 +       struct dentry **p;
4383 +
4384 +       p = dpage->dentries;
4385 +       for (i = 0; i < dpage->ndentry; i++)
4386 +               dput(*p++);
4387 +       free_page((unsigned long)dpage->dentries);
4388 +}
4389 +
4390 +int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
4391 +{
4392 +       int err;
4393 +       void *p;
4394 +
4395 +       err = -ENOMEM;
4396 +       dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
4397 +       if (unlikely(!dpages->dpages))
4398 +               goto out;
4399 +
4400 +       p = (void *)__get_free_page(gfp);
4401 +       if (unlikely(!p))
4402 +               goto out_dpages;
4403 +
4404 +       dpages->dpages[0].ndentry = 0;
4405 +       dpages->dpages[0].dentries = p;
4406 +       dpages->ndpage = 1;
4407 +       return 0; /* success */
4408 +
4409 + out_dpages:
4410 +       kfree(dpages->dpages);
4411 + out:
4412 +       return err;
4413 +}
4414 +
4415 +void au_dpages_free(struct au_dcsub_pages *dpages)
4416 +{
4417 +       int i;
4418 +       struct au_dpage *p;
4419 +
4420 +       p = dpages->dpages;
4421 +       for (i = 0; i < dpages->ndpage; i++)
4422 +               au_dpage_free(p++);
4423 +       kfree(dpages->dpages);
4424 +}
4425 +
4426 +static int au_dpages_append(struct au_dcsub_pages *dpages,
4427 +                           struct dentry *dentry, gfp_t gfp)
4428 +{
4429 +       int err, sz;
4430 +       struct au_dpage *dpage;
4431 +       void *p;
4432 +
4433 +       dpage = dpages->dpages + dpages->ndpage - 1;
4434 +       sz = PAGE_SIZE / sizeof(dentry);
4435 +       if (unlikely(dpage->ndentry >= sz)) {
4436 +               AuLabel(new dpage);
4437 +               err = -ENOMEM;
4438 +               sz = dpages->ndpage * sizeof(*dpages->dpages);
4439 +               p = au_kzrealloc(dpages->dpages, sz,
4440 +                                sz + sizeof(*dpages->dpages), gfp);
4441 +               if (unlikely(!p))
4442 +                       goto out;
4443 +
4444 +               dpages->dpages = p;
4445 +               dpage = dpages->dpages + dpages->ndpage;
4446 +               p = (void *)__get_free_page(gfp);
4447 +               if (unlikely(!p))
4448 +                       goto out;
4449 +
4450 +               dpage->ndentry = 0;
4451 +               dpage->dentries = p;
4452 +               dpages->ndpage++;
4453 +       }
4454 +
4455 +       dpage->dentries[dpage->ndentry++] = dget(dentry);
4456 +       return 0; /* success */
4457 +
4458 + out:
4459 +       return err;
4460 +}
4461 +
4462 +int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
4463 +                  au_dpages_test test, void *arg)
4464 +{
4465 +       int err;
4466 +       struct dentry *this_parent = root;
4467 +       struct list_head *next;
4468 +       struct super_block *sb = root->d_sb;
4469 +
4470 +       err = 0;
4471 +       spin_lock(&dcache_lock);
4472 + repeat:
4473 +       next = this_parent->d_subdirs.next;
4474 + resume:
4475 +       if (this_parent->d_sb == sb
4476 +           && !IS_ROOT(this_parent)
4477 +           && atomic_read(&this_parent->d_count)
4478 +           && this_parent->d_inode
4479 +           && (!test || test(this_parent, arg))) {
4480 +               err = au_dpages_append(dpages, this_parent, GFP_ATOMIC);
4481 +               if (unlikely(err))
4482 +                       goto out;
4483 +       }
4484 +
4485 +       while (next != &this_parent->d_subdirs) {
4486 +               struct list_head *tmp = next;
4487 +               struct dentry *dentry = list_entry(tmp, struct dentry,
4488 +                                                  d_u.d_child);
4489 +               next = tmp->next;
4490 +               if (/*d_unhashed(dentry) || */!dentry->d_inode)
4491 +                       continue;
4492 +               if (!list_empty(&dentry->d_subdirs)) {
4493 +                       this_parent = dentry;
4494 +                       goto repeat;
4495 +               }
4496 +               if (dentry->d_sb == sb
4497 +                   && atomic_read(&dentry->d_count)
4498 +                   && (!test || test(dentry, arg))) {
4499 +                       err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
4500 +                       if (unlikely(err))
4501 +                               goto out;
4502 +               }
4503 +       }
4504 +
4505 +       if (this_parent != root) {
4506 +               next = this_parent->d_u.d_child.next;
4507 +               this_parent = this_parent->d_parent; /* dcache_lock is locked */
4508 +               goto resume;
4509 +       }
4510 + out:
4511 +       spin_unlock(&dcache_lock);
4512 +       return err;
4513 +}
4514 +
4515 +int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
4516 +                      int do_include, au_dpages_test test, void *arg)
4517 +{
4518 +       int err;
4519 +
4520 +       err = 0;
4521 +       spin_lock(&dcache_lock);
4522 +       if (do_include && (!test || test(dentry, arg))) {
4523 +               err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
4524 +               if (unlikely(err))
4525 +                       goto out;
4526 +       }
4527 +       while (!IS_ROOT(dentry)) {
4528 +               dentry = dentry->d_parent; /* dcache_lock is locked */
4529 +               if (!test || test(dentry, arg)) {
4530 +                       err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
4531 +                       if (unlikely(err))
4532 +                               break;
4533 +               }
4534 +       }
4535 +
4536 + out:
4537 +       spin_unlock(&dcache_lock);
4538 +
4539 +       return err;
4540 +}
4541 +
4542 +struct dentry *au_test_subdir(struct dentry *d1, struct dentry *d2)
4543 +{
4544 +       struct dentry *trap, **dentries;
4545 +       int err, i, j;
4546 +       struct au_dcsub_pages dpages;
4547 +       struct au_dpage *dpage;
4548 +
4549 +       trap = ERR_PTR(-ENOMEM);
4550 +       err = au_dpages_init(&dpages, GFP_NOFS);
4551 +       if (unlikely(err))
4552 +               goto out;
4553 +       err = au_dcsub_pages_rev(&dpages, d1, /*do_include*/1, NULL, NULL);
4554 +       if (unlikely(err))
4555 +               goto out_dpages;
4556 +
4557 +       trap = d1;
4558 +       for (i = 0; !err && i < dpages.ndpage; i++) {
4559 +               dpage = dpages.dpages + i;
4560 +               dentries = dpage->dentries;
4561 +               for (j = 0; !err && j < dpage->ndentry; j++) {
4562 +                       struct dentry *d;
4563 +
4564 +                       d = dentries[j];
4565 +                       err = (d == d2);
4566 +                       if (!err)
4567 +                               trap = d;
4568 +               }
4569 +       }
4570 +       if (!err)
4571 +               trap = NULL;
4572 +
4573 + out_dpages:
4574 +       au_dpages_free(&dpages);
4575 + out:
4576 +       return trap;
4577 +}
4578 diff -uprN -x .git linux-2.6.31/fs/aufs/dcsub.h aufs2-2.6.git/fs/aufs/dcsub.h
4579 --- linux-2.6.31/fs/aufs/dcsub.h        1970-01-01 00:00:00.000000000 +0000
4580 +++ aufs2-2.6.git/fs/aufs/dcsub.h       2009-09-21 21:49:23.377863284 +0000
4581 @@ -0,0 +1,54 @@
4582 +/*
4583 + * Copyright (C) 2005-2009 Junjiro R. Okajima
4584 + *
4585 + * This program, aufs is free software; you can redistribute it and/or modify
4586 + * it under the terms of the GNU General Public License as published by
4587 + * the Free Software Foundation; either version 2 of the License, or
4588 + * (at your option) any later version.
4589 + *
4590 + * This program is distributed in the hope that it will be useful,
4591 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4592 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4593 + * GNU General Public License for more details.
4594 + *
4595 + * You should have received a copy of the GNU General Public License
4596 + * along with this program; if not, write to the Free Software
4597 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
4598 + */
4599 +
4600 +/*
4601 + * sub-routines for dentry cache
4602 + */
4603 +
4604 +#ifndef __AUFS_DCSUB_H__
4605 +#define __AUFS_DCSUB_H__
4606 +
4607 +#ifdef __KERNEL__
4608 +
4609 +#include <linux/types.h>
4610 +
4611 +struct dentry;
4612 +
4613 +struct au_dpage {
4614 +       int ndentry;
4615 +       struct dentry **dentries;
4616 +};
4617 +
4618 +struct au_dcsub_pages {
4619 +       int ndpage;
4620 +       struct au_dpage *dpages;
4621 +};
4622 +
4623 +/* ---------------------------------------------------------------------- */
4624 +
4625 +int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
4626 +void au_dpages_free(struct au_dcsub_pages *dpages);
4627 +typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
4628 +int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
4629 +                  au_dpages_test test, void *arg);
4630 +int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
4631 +                      int do_include, au_dpages_test test, void *arg);
4632 +struct dentry *au_test_subdir(struct dentry *d1, struct dentry *d2);
4633 +
4634 +#endif /* __KERNEL__ */
4635 +#endif /* __AUFS_DCSUB_H__ */
4636 diff -uprN -x .git linux-2.6.31/fs/aufs/debug.c aufs2-2.6.git/fs/aufs/debug.c
4637 --- linux-2.6.31/fs/aufs/debug.c        1970-01-01 00:00:00.000000000 +0000
4638 +++ aufs2-2.6.git/fs/aufs/debug.c       2009-09-21 21:49:23.377863284 +0000
4639 @@ -0,0 +1,431 @@
4640 +/*
4641 + * Copyright (C) 2005-2009 Junjiro R. Okajima
4642 + *
4643 + * This program, aufs is free software; you can redistribute it and/or modify
4644 + * it under the terms of the GNU General Public License as published by
4645 + * the Free Software Foundation; either version 2 of the License, or
4646 + * (at your option) any later version.
4647 + *
4648 + * This program is distributed in the hope that it will be useful,
4649 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4650 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4651 + * GNU General Public License for more details.
4652 + *
4653 + * You should have received a copy of the GNU General Public License
4654 + * along with this program; if not, write to the Free Software
4655 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
4656 + */
4657 +
4658 +/*
4659 + * debug print functions
4660 + */
4661 +
4662 +#include <linux/module.h>
4663 +#include <linux/vt_kern.h>
4664 +#include "aufs.h"
4665 +
4666 +int aufs_debug;
4667 +MODULE_PARM_DESC(debug, "debug print");
4668 +module_param_named(debug, aufs_debug, int, S_IRUGO | S_IWUSR | S_IWGRP);
4669 +
4670 +char *au_plevel = KERN_DEBUG;
4671 +#define dpri(fmt, arg...) do { \
4672 +       if (au_debug_test()) \
4673 +               printk("%s" fmt, au_plevel, ##arg); \
4674 +} while (0)
4675 +
4676 +/* ---------------------------------------------------------------------- */
4677 +
4678 +void au_dpri_whlist(struct au_nhash *whlist)
4679 +{
4680 +       unsigned long ul, n;
4681 +       struct hlist_head *head;
4682 +       struct au_vdir_wh *tpos;
4683 +       struct hlist_node *pos;
4684 +
4685 +       n = whlist->nh_num;
4686 +       head = whlist->nh_head;
4687 +       for (ul = 0; ul < n; ul++) {
4688 +               hlist_for_each_entry(tpos, pos, head, wh_hash)
4689 +                       dpri("b%d, %.*s, %d\n",
4690 +                            tpos->wh_bindex,
4691 +                            tpos->wh_str.len, tpos->wh_str.name,
4692 +                            tpos->wh_str.len);
4693 +               head++;
4694 +       }
4695 +}
4696 +
4697 +void au_dpri_vdir(struct au_vdir *vdir)
4698 +{
4699 +       unsigned long ul;
4700 +       union au_vdir_deblk_p p;
4701 +       unsigned char *o;
4702 +
4703 +       if (!vdir || IS_ERR(vdir)) {
4704 +               dpri("err %ld\n", PTR_ERR(vdir));
4705 +               return;
4706 +       }
4707 +
4708 +       dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
4709 +            vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
4710 +            vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
4711 +       for (ul = 0; ul < vdir->vd_nblk; ul++) {
4712 +               p.deblk = vdir->vd_deblk[ul];
4713 +               o = p.deblk;
4714 +               dpri("[%lu]: %p\n", ul, o);
4715 +       }
4716 +}
4717 +
4718 +static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode,
4719 +                       struct dentry *wh)
4720 +{
4721 +       char *n = NULL;
4722 +       int l = 0;
4723 +
4724 +       if (!inode || IS_ERR(inode)) {
4725 +               dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
4726 +               return -1;
4727 +       }
4728 +
4729 +       /* the type of i_blocks depends upon CONFIG_LSF */
4730 +       BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
4731 +                    && sizeof(inode->i_blocks) != sizeof(u64));
4732 +       if (wh) {
4733 +               n = (void *)wh->d_name.name;
4734 +               l = wh->d_name.len;
4735 +       }
4736 +
4737 +       dpri("i%d: i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
4738 +            " ct %lld, np %lu, st 0x%lx, f 0x%x, g %x%s%.*s\n",
4739 +            bindex,
4740 +            inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
4741 +            atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
4742 +            i_size_read(inode), (unsigned long long)inode->i_blocks,
4743 +            (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
4744 +            inode->i_mapping ? inode->i_mapping->nrpages : 0,
4745 +            inode->i_state, inode->i_flags, inode->i_generation,
4746 +            l ? ", wh " : "", l, n);
4747 +       return 0;
4748 +}
4749 +
4750 +void au_dpri_inode(struct inode *inode)
4751 +{
4752 +       struct au_iinfo *iinfo;
4753 +       aufs_bindex_t bindex;
4754 +       int err;
4755 +
4756 +       err = do_pri_inode(-1, inode, NULL);
4757 +       if (err || !au_test_aufs(inode->i_sb))
4758 +               return;
4759 +
4760 +       iinfo = au_ii(inode);
4761 +       if (!iinfo)
4762 +               return;
4763 +       dpri("i-1: bstart %d, bend %d, gen %d\n",
4764 +            iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode));
4765 +       if (iinfo->ii_bstart < 0)
4766 +               return;
4767 +       for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++)
4768 +               do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode,
4769 +                            iinfo->ii_hinode[0 + bindex].hi_whdentry);
4770 +}
4771 +
4772 +static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
4773 +{
4774 +       struct dentry *wh = NULL;
4775 +
4776 +       if (!dentry || IS_ERR(dentry)) {
4777 +               dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
4778 +               return -1;
4779 +       }
4780 +       /* do not call dget_parent() here */
4781 +       dpri("d%d: %.*s?/%.*s, %s, cnt %d, flags 0x%x\n",
4782 +            bindex,
4783 +            AuDLNPair(dentry->d_parent), AuDLNPair(dentry),
4784 +            dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
4785 +            atomic_read(&dentry->d_count), dentry->d_flags);
4786 +       if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) {
4787 +               struct au_iinfo *iinfo = au_ii(dentry->d_inode);
4788 +               if (iinfo)
4789 +                       wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
4790 +       }
4791 +       do_pri_inode(bindex, dentry->d_inode, wh);
4792 +       return 0;
4793 +}
4794 +
4795 +void au_dpri_dentry(struct dentry *dentry)
4796 +{
4797 +       struct au_dinfo *dinfo;
4798 +       aufs_bindex_t bindex;
4799 +       int err;
4800 +
4801 +       err = do_pri_dentry(-1, dentry);
4802 +       if (err || !au_test_aufs(dentry->d_sb))
4803 +               return;
4804 +
4805 +       dinfo = au_di(dentry);
4806 +       if (!dinfo)
4807 +               return;
4808 +       dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n",
4809 +            dinfo->di_bstart, dinfo->di_bend,
4810 +            dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry));
4811 +       if (dinfo->di_bstart < 0)
4812 +               return;
4813 +       for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
4814 +               do_pri_dentry(bindex, dinfo->di_hdentry[0 + bindex].hd_dentry);
4815 +}
4816 +
4817 +static int do_pri_file(aufs_bindex_t bindex, struct file *file)
4818 +{
4819 +       char a[32];
4820 +
4821 +       if (!file || IS_ERR(file)) {
4822 +               dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
4823 +               return -1;
4824 +       }
4825 +       a[0] = 0;
4826 +       if (bindex < 0
4827 +           && file->f_dentry
4828 +           && au_test_aufs(file->f_dentry->d_sb)
4829 +           && au_fi(file))
4830 +               snprintf(a, sizeof(a), ", mmapped %d", au_test_mmapped(file));
4831 +       dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, pos %llu%s\n",
4832 +            bindex, file->f_mode, file->f_flags, (long)file_count(file),
4833 +            file->f_pos, a);
4834 +       if (file->f_dentry)
4835 +               do_pri_dentry(bindex, file->f_dentry);
4836 +       return 0;
4837 +}
4838 +
4839 +void au_dpri_file(struct file *file)
4840 +{
4841 +       struct au_finfo *finfo;
4842 +       aufs_bindex_t bindex;
4843 +       int err;
4844 +
4845 +       err = do_pri_file(-1, file);
4846 +       if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb))
4847 +               return;
4848 +
4849 +       finfo = au_fi(file);
4850 +       if (!finfo)
4851 +               return;
4852 +       if (finfo->fi_bstart < 0)
4853 +               return;
4854 +       for (bindex = finfo->fi_bstart; bindex <= finfo->fi_bend; bindex++) {
4855 +               struct au_hfile *hf;
4856 +
4857 +               hf = finfo->fi_hfile + bindex;
4858 +               do_pri_file(bindex, hf ? hf->hf_file : NULL);
4859 +       }
4860 +}
4861 +
4862 +static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
4863 +{
4864 +       struct vfsmount *mnt;
4865 +       struct super_block *sb;
4866 +
4867 +       if (!br || IS_ERR(br))
4868 +               goto out;
4869 +       mnt = br->br_mnt;
4870 +       if (!mnt || IS_ERR(mnt))
4871 +               goto out;
4872 +       sb = mnt->mnt_sb;
4873 +       if (!sb || IS_ERR(sb))
4874 +               goto out;
4875 +
4876 +       dpri("s%d: {perm 0x%x, cnt %d, wbr %p}, "
4877 +            "%s, dev 0x%02x%02x, flags 0x%lx, cnt(BIAS) %d, active %d, "
4878 +            "xino %d\n",
4879 +            bindex, br->br_perm, atomic_read(&br->br_count), br->br_wbr,
4880 +            au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
4881 +            sb->s_flags, sb->s_count - S_BIAS,
4882 +            atomic_read(&sb->s_active), !!br->br_xino.xi_file);
4883 +       return 0;
4884 +
4885 + out:
4886 +       dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
4887 +       return -1;
4888 +}
4889 +
4890 +void au_dpri_sb(struct super_block *sb)
4891 +{
4892 +       struct au_sbinfo *sbinfo;
4893 +       aufs_bindex_t bindex;
4894 +       int err;
4895 +       /* to reuduce stack size */
4896 +       struct {
4897 +               struct vfsmount mnt;
4898 +               struct au_branch fake;
4899 +       } *a;
4900 +
4901 +       /* this function can be called from magic sysrq */
4902 +       a = kzalloc(sizeof(*a), GFP_ATOMIC);
4903 +       if (unlikely(!a)) {
4904 +               dpri("no memory\n");
4905 +               return;
4906 +       }
4907 +
4908 +       a->mnt.mnt_sb = sb;
4909 +       a->fake.br_perm = 0;
4910 +       a->fake.br_mnt = &a->mnt;
4911 +       a->fake.br_xino.xi_file = NULL;
4912 +       atomic_set(&a->fake.br_count, 0);
4913 +       smp_mb(); /* atomic_set */
4914 +       err = do_pri_br(-1, &a->fake);
4915 +       kfree(a);
4916 +       dpri("dev 0x%x\n", sb->s_dev);
4917 +       if (err || !au_test_aufs(sb))
4918 +               return;
4919 +
4920 +       sbinfo = au_sbi(sb);
4921 +       if (!sbinfo)
4922 +               return;
4923 +       dpri("nw %d, gen %u, kobj %d\n",
4924 +            atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
4925 +            atomic_read(&sbinfo->si_kobj.kref.refcount));
4926 +       for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
4927 +               do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
4928 +}
4929 +
4930 +/* ---------------------------------------------------------------------- */
4931 +
4932 +void au_dbg_sleep_jiffy(int jiffy)
4933 +{
4934 +       while (jiffy)
4935 +               jiffy = schedule_timeout_uninterruptible(jiffy);
4936 +}
4937 +
4938 +void au_dbg_iattr(struct iattr *ia)
4939 +{
4940 +#define AuBit(name)    if (ia->ia_valid & ATTR_ ## name) \
4941 +                               dpri(#name "\n")
4942 +       AuBit(MODE);
4943 +       AuBit(UID);
4944 +       AuBit(GID);
4945 +       AuBit(SIZE);
4946 +       AuBit(ATIME);
4947 +       AuBit(MTIME);
4948 +       AuBit(CTIME);
4949 +       AuBit(ATIME_SET);
4950 +       AuBit(MTIME_SET);
4951 +       AuBit(FORCE);
4952 +       AuBit(ATTR_FLAG);
4953 +       AuBit(KILL_SUID);
4954 +       AuBit(KILL_SGID);
4955 +       AuBit(FILE);
4956 +       AuBit(KILL_PRIV);
4957 +       AuBit(OPEN);
4958 +       AuBit(TIMES_SET);
4959 +#undef AuBit
4960 +       dpri("ia_file %p\n", ia->ia_file);
4961 +}
4962 +
4963 +/* ---------------------------------------------------------------------- */
4964 +
4965 +void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen)
4966 +{
4967 +       struct dentry *parent;
4968 +
4969 +       parent = dget_parent(dentry);
4970 +       AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode)
4971 +                 || IS_ROOT(dentry)
4972 +                 || au_digen(parent) != sigen);
4973 +       dput(parent);
4974 +}
4975 +
4976 +void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen)
4977 +{
4978 +       struct dentry *parent;
4979 +
4980 +       parent = dget_parent(dentry);
4981 +       AuDebugOn(S_ISDIR(dentry->d_inode->i_mode)
4982 +                 || au_digen(parent) != sigen);
4983 +       dput(parent);
4984 +}
4985 +
4986 +void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
4987 +{
4988 +       int err, i, j;
4989 +       struct au_dcsub_pages dpages;
4990 +       struct au_dpage *dpage;
4991 +       struct dentry **dentries;
4992 +
4993 +       err = au_dpages_init(&dpages, GFP_NOFS);
4994 +       AuDebugOn(err);
4995 +       err = au_dcsub_pages_rev(&dpages, parent, /*do_include*/1, NULL, NULL);
4996 +       AuDebugOn(err);
4997 +       for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
4998 +               dpage = dpages.dpages + i;
4999 +               dentries = dpage->dentries;
5000 +               for (j = dpage->ndentry - 1; !err && j >= 0; j--)
5001 +                       AuDebugOn(au_digen(dentries[j]) != sigen);
5002 +       }
5003 +       au_dpages_free(&dpages);
5004 +}
5005 +
5006 +void au_dbg_verify_hf(struct au_finfo *finfo)
5007 +{
5008 +       struct au_hfile *hf;
5009 +       aufs_bindex_t bend, bindex;
5010 +
5011 +       if (finfo->fi_bstart >= 0) {
5012 +               bend = finfo->fi_bend;
5013 +               for (bindex = finfo->fi_bstart; bindex <= bend; bindex++) {
5014 +                       hf = finfo->fi_hfile + bindex;
5015 +                       AuDebugOn(hf->hf_file || hf->hf_br);
5016 +               }
5017 +       }
5018 +}
5019 +
5020 +void au_dbg_verify_kthread(void)
5021 +{
5022 +       if (au_test_wkq(current)) {
5023 +               au_dbg_blocked();
5024 +               BUG();
5025 +       }
5026 +}
5027 +
5028 +/* ---------------------------------------------------------------------- */
5029 +
5030 +void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused)
5031 +{
5032 +#ifdef AuForceNoPlink
5033 +       au_opt_clr(sbinfo->si_mntflags, PLINK);
5034 +#endif
5035 +#ifdef AuForceNoXino
5036 +       au_opt_clr(sbinfo->si_mntflags, XINO);
5037 +#endif
5038 +#ifdef AuForceNoRefrof
5039 +       au_opt_clr(sbinfo->si_mntflags, REFROF);
5040 +#endif
5041 +#ifdef AuForceHinotify
5042 +       au_opt_set_udba(sbinfo->si_mntflags, UDBA_HINOTIFY);
5043 +#endif
5044 +#ifdef AuForceRd0
5045 +       sbinfo->si_rdblk = 0;
5046 +       sbinfo->si_rdhash = 0;
5047 +#endif
5048 +}
5049 +
5050 +int __init au_debug_init(void)
5051 +{
5052 +       aufs_bindex_t bindex;
5053 +       struct au_vdir_destr destr;
5054 +
5055 +       bindex = -1;
5056 +       AuDebugOn(bindex >= 0);
5057 +
5058 +       destr.len = -1;
5059 +       AuDebugOn(destr.len < NAME_MAX);
5060 +
5061 +#ifdef CONFIG_4KSTACKS
5062 +       AuWarn("CONFIG_4KSTACKS is defined.\n");
5063 +#endif
5064 +
5065 +#ifdef AuForceNoBrs
5066 +       sysaufs_brs = 0;
5067 +#endif
5068 +
5069 +       return 0;
5070 +}
5071 diff -uprN -x .git linux-2.6.31/fs/aufs/debug.h aufs2-2.6.git/fs/aufs/debug.h
5072 --- linux-2.6.31/fs/aufs/debug.h        1970-01-01 00:00:00.000000000 +0000
5073 +++ aufs2-2.6.git/fs/aufs/debug.h       2009-09-21 21:49:23.377863284 +0000
5074 @@ -0,0 +1,261 @@
5075 +/*
5076 + * Copyright (C) 2005-2009 Junjiro R. Okajima
5077 + *
5078 + * This program, aufs is free software; you can redistribute it and/or modify
5079 + * it under the terms of the GNU General Public License as published by
5080 + * the Free Software Foundation; either version 2 of the License, or
5081 + * (at your option) any later version.
5082 + *
5083 + * This program is distributed in the hope that it will be useful,
5084 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5085 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
5086 + * GNU General Public License for more details.
5087 + *
5088 + * You should have received a copy of the GNU General Public License
5089 + * along with this program; if not, write to the Free Software
5090 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
5091 + */
5092 +
5093 +/*
5094 + * debug print functions
5095 + */
5096 +
5097 +#ifndef __AUFS_DEBUG_H__
5098 +#define __AUFS_DEBUG_H__
5099 +
5100 +#ifdef __KERNEL__
5101 +
5102 +#include <asm/system.h>
5103 +#include <linux/bug.h>
5104 +/* #include <linux/err.h> */
5105 +#include <linux/init.h>
5106 +/* #include <linux/kernel.h> */
5107 +#include <linux/delay.h>
5108 +/* #include <linux/kd.h> */
5109 +/* #include <linux/vt_kern.h> */
5110 +#include <linux/sysrq.h>
5111 +#include <linux/aufs_type.h>
5112 +
5113 +#ifdef CONFIG_AUFS_DEBUG
5114 +#define AuDebugOn(a)           BUG_ON(a)
5115 +
5116 +/* module parameter */
5117 +extern int aufs_debug;
5118 +static inline void au_debug(int n)
5119 +{
5120 +       aufs_debug = n;
5121 +       smp_mb();
5122 +}
5123 +
5124 +static inline int au_debug_test(void)
5125 +{
5126 +       return aufs_debug;
5127 +}
5128 +#else
5129 +#define AuDebugOn(a)           do {} while (0)
5130 +#define au_debug()             do {} while (0)
5131 +static inline int au_debug_test(void)
5132 +{
5133 +       return 0;
5134 +}
5135 +#endif /* CONFIG_AUFS_DEBUG */
5136 +
5137 +/* ---------------------------------------------------------------------- */
5138 +
5139 +/* debug print */
5140 +
5141 +#define AuDpri(lvl, fmt, arg...) \
5142 +       printk(lvl AUFS_NAME " %s:%d:%s[%d]: " fmt, \
5143 +              __func__, __LINE__, current->comm, current->pid, ##arg)
5144 +#define AuDbg(fmt, arg...) do { \
5145 +       if (au_debug_test()) \
5146 +               AuDpri(KERN_DEBUG, "DEBUG: " fmt, ##arg); \
5147 +} while (0)
5148 +#define AuLabel(l)             AuDbg(#l "\n")
5149 +#define AuInfo(fmt, arg...)    AuDpri(KERN_INFO, fmt, ##arg)
5150 +#define AuWarn(fmt, arg...)    AuDpri(KERN_WARNING, fmt, ##arg)
5151 +#define AuErr(fmt, arg...)     AuDpri(KERN_ERR, fmt, ##arg)
5152 +#define AuIOErr(fmt, arg...)   AuErr("I/O Error, " fmt, ##arg)
5153 +#define AuWarn1(fmt, arg...) do { \
5154 +       static unsigned char _c; \
5155 +       if (!_c++) \
5156 +               AuWarn(fmt, ##arg); \
5157 +} while (0)
5158 +
5159 +#define AuErr1(fmt, arg...) do { \
5160 +       static unsigned char _c; \
5161 +       if (!_c++) \
5162 +               AuErr(fmt, ##arg); \
5163 +} while (0)
5164 +
5165 +#define AuIOErr1(fmt, arg...) do { \
5166 +       static unsigned char _c; \
5167 +       if (!_c++) \
5168 +               AuIOErr(fmt, ##arg); \
5169 +} while (0)
5170 +
5171 +#define AuUnsupportMsg "This operation is not supported." \
5172 +                       " Please report this application to aufs-users ML."
5173 +#define AuUnsupport(fmt, args...) do { \
5174 +       AuErr(AuUnsupportMsg "\n" fmt, ##args); \
5175 +       dump_stack(); \
5176 +} while (0)
5177 +
5178 +#define AuTraceErr(e) do { \
5179 +       if (unlikely((e) < 0)) \
5180 +               AuDbg("err %d\n", (int)(e)); \
5181 +} while (0)
5182 +
5183 +#define AuTraceErrPtr(p) do { \
5184 +       if (IS_ERR(p)) \
5185 +               AuDbg("err %ld\n", PTR_ERR(p)); \
5186 +} while (0)
5187 +
5188 +/* dirty macros for debug print, use with "%.*s" and caution */
5189 +#define AuLNPair(qstr)         (qstr)->len, (qstr)->name
5190 +#define AuDLNPair(d)           AuLNPair(&(d)->d_name)
5191 +
5192 +/* ---------------------------------------------------------------------- */
5193 +
5194 +struct au_sbinfo;
5195 +struct au_finfo;
5196 +struct dentry;
5197 +#ifdef CONFIG_AUFS_DEBUG
5198 +extern char *au_plevel;
5199 +struct au_nhash;
5200 +void au_dpri_whlist(struct au_nhash *whlist);
5201 +struct au_vdir;
5202 +void au_dpri_vdir(struct au_vdir *vdir);
5203 +struct inode;
5204 +void au_dpri_inode(struct inode *inode);
5205 +void au_dpri_dentry(struct dentry *dentry);
5206 +struct file;
5207 +void au_dpri_file(struct file *filp);
5208 +struct super_block;
5209 +void au_dpri_sb(struct super_block *sb);
5210 +
5211 +void au_dbg_sleep_jiffy(int jiffy);
5212 +struct iattr;
5213 +void au_dbg_iattr(struct iattr *ia);
5214 +
5215 +void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen);
5216 +void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen);
5217 +void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
5218 +void au_dbg_verify_hf(struct au_finfo *finfo);
5219 +void au_dbg_verify_kthread(void);
5220 +
5221 +int __init au_debug_init(void);
5222 +void au_debug_sbinfo_init(struct au_sbinfo *sbinfo);
5223 +#define AuDbgWhlist(w) do { \
5224 +       AuDbg(#w "\n"); \
5225 +       au_dpri_whlist(w); \
5226 +} while (0)
5227 +
5228 +#define AuDbgVdir(v) do { \
5229 +       AuDbg(#v "\n"); \
5230 +       au_dpri_vdir(v); \
5231 +} while (0)
5232 +
5233 +#define AuDbgInode(i) do { \
5234 +       AuDbg(#i "\n"); \
5235 +       au_dpri_inode(i); \
5236 +} while (0)
5237 +
5238 +#define AuDbgDentry(d) do { \
5239 +       AuDbg(#d "\n"); \
5240 +       au_dpri_dentry(d); \
5241 +} while (0)
5242 +
5243 +#define AuDbgFile(f) do { \
5244 +       AuDbg(#f "\n"); \
5245 +       au_dpri_file(f); \
5246 +} while (0)
5247 +
5248 +#define AuDbgSb(sb) do { \
5249 +       AuDbg(#sb "\n"); \
5250 +       au_dpri_sb(sb); \
5251 +} while (0)
5252 +
5253 +#define AuDbgSleep(sec) do { \
5254 +       AuDbg("sleep %d sec\n", sec); \
5255 +       ssleep(sec); \
5256 +} while (0)
5257 +
5258 +#define AuDbgSleepJiffy(jiffy) do { \
5259 +       AuDbg("sleep %d jiffies\n", jiffy); \
5260 +       au_dbg_sleep_jiffy(jiffy); \
5261 +} while (0)
5262 +
5263 +#define AuDbgIAttr(ia) do { \
5264 +       AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \
5265 +       au_dbg_iattr(ia); \
5266 +} while (0)
5267 +#else
5268 +static inline void au_dbg_verify_dir_parent(struct dentry *dentry,
5269 +                                           unsigned int sigen)
5270 +{
5271 +       /* empty */
5272 +}
5273 +static inline void au_dbg_verify_nondir_parent(struct dentry *dentry,
5274 +                                              unsigned int sigen)
5275 +{
5276 +       /* empty */
5277 +}
5278 +static inline void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
5279 +{
5280 +       /* empty */
5281 +}
5282 +static inline void au_dbg_verify_hf(struct au_finfo *finfo)
5283 +{
5284 +       /* empty */
5285 +}
5286 +static inline void au_dbg_verify_kthread(void)
5287 +{
5288 +       /* empty */
5289 +}
5290 +
5291 +static inline int au_debug_init(void)
5292 +{
5293 +       return 0;
5294 +}
5295 +static inline void au_debug_sbinfo_init(struct au_sbinfo *sbinfo)
5296 +{
5297 +       /* empty */
5298 +}
5299 +#define AuDbgWhlist(w)         do {} while (0)
5300 +#define AuDbgVdir(v)           do {} while (0)
5301 +#define AuDbgInode(i)          do {} while (0)
5302 +#define AuDbgDentry(d)         do {} while (0)
5303 +#define AuDbgFile(f)           do {} while (0)
5304 +#define AuDbgSb(sb)            do {} while (0)
5305 +#define AuDbgSleep(sec)                do {} while (0)
5306 +#define AuDbgSleepJiffy(jiffy) do {} while (0)
5307 +#define AuDbgIAttr(ia)         do {} while (0)
5308 +#endif /* CONFIG_AUFS_DEBUG */
5309 +
5310 +/* ---------------------------------------------------------------------- */
5311 +
5312 +#ifdef CONFIG_AUFS_MAGIC_SYSRQ
5313 +int __init au_sysrq_init(void);
5314 +void au_sysrq_fin(void);
5315 +
5316 +#ifdef CONFIG_HW_CONSOLE
5317 +#define au_dbg_blocked() do { \
5318 +       WARN_ON(1); \
5319 +       handle_sysrq('w', vc_cons[fg_console].d->vc_tty); \
5320 +} while (0)
5321 +#else
5322 +#define au_dbg_blocked()       do {} while (0)
5323 +#endif
5324 +
5325 +#else
5326 +static inline int au_sysrq_init(void)
5327 +{
5328 +       return 0;
5329 +}
5330 +#define au_sysrq_fin()         do {} while (0)
5331 +#define au_dbg_blocked()       do {} while (0)
5332 +#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
5333 +
5334 +#endif /* __KERNEL__ */
5335 +#endif /* __AUFS_DEBUG_H__ */
5336 diff -uprN -x .git linux-2.6.31/fs/aufs/dentry.c aufs2-2.6.git/fs/aufs/dentry.c
5337 --- linux-2.6.31/fs/aufs/dentry.c       1970-01-01 00:00:00.000000000 +0000
5338 +++ aufs2-2.6.git/fs/aufs/dentry.c      2009-09-21 21:49:23.377863284 +0000
5339 @@ -0,0 +1,879 @@
5340 +/*
5341 + * Copyright (C) 2005-2009 Junjiro R. Okajima
5342 + *
5343 + * This program, aufs is free software; you can redistribute it and/or modify
5344 + * it under the terms of the GNU General Public License as published by
5345 + * the Free Software Foundation; either version 2 of the License, or
5346 + * (at your option) any later version.
5347 + *
5348 + * This program is distributed in the hope that it will be useful,
5349 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5350 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
5351 + * GNU General Public License for more details.
5352 + *
5353 + * You should have received a copy of the GNU General Public License
5354 + * along with this program; if not, write to the Free Software
5355 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
5356 + */
5357 +
5358 +/*
5359 + * lookup and dentry operations
5360 + */
5361 +
5362 +#include <linux/namei.h>
5363 +#include "aufs.h"
5364 +
5365 +static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd)
5366 +{
5367 +       if (nd) {
5368 +               *h_nd = *nd;
5369 +
5370 +               /*
5371 +                * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
5372 +                * due to whiteout and branch permission.
5373 +                */
5374 +               h_nd->flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
5375 +                                | LOOKUP_FOLLOW);
5376 +               /* unnecessary? */
5377 +               h_nd->intent.open.file = NULL;
5378 +       } else
5379 +               memset(h_nd, 0, sizeof(*h_nd));
5380 +}
5381 +
5382 +struct au_lkup_one_args {
5383 +       struct dentry **errp;
5384 +       struct qstr *name;
5385 +       struct dentry *h_parent;
5386 +       struct au_branch *br;
5387 +       struct nameidata *nd;
5388 +};
5389 +
5390 +struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
5391 +                          struct au_branch *br, struct nameidata *nd)
5392 +{
5393 +       struct dentry *h_dentry;
5394 +       int err;
5395 +       struct nameidata h_nd;
5396 +
5397 +       if (au_test_fs_null_nd(h_parent->d_sb))
5398 +               return vfsub_lookup_one_len(name->name, h_parent, name->len);
5399 +
5400 +       au_h_nd(&h_nd, nd);
5401 +       h_nd.path.dentry = h_parent;
5402 +       h_nd.path.mnt = br->br_mnt;
5403 +
5404 +       err = __lookup_one_len(name->name, &h_nd.last, NULL, name->len);
5405 +       h_dentry = ERR_PTR(err);
5406 +       if (!err) {
5407 +               path_get(&h_nd.path);
5408 +               h_dentry = vfsub_lookup_hash(&h_nd);
5409 +               path_put(&h_nd.path);
5410 +       }
5411 +
5412 +       return h_dentry;
5413 +}
5414 +
5415 +static void au_call_lkup_one(void *args)
5416 +{
5417 +       struct au_lkup_one_args *a = args;
5418 +       *a->errp = au_lkup_one(a->name, a->h_parent, a->br, a->nd);
5419 +}
5420 +
5421 +#define AuLkup_ALLOW_NEG       1
5422 +#define au_ftest_lkup(flags, name)     ((flags) & AuLkup_##name)
5423 +#define au_fset_lkup(flags, name)      { (flags) |= AuLkup_##name; }
5424 +#define au_fclr_lkup(flags, name)      { (flags) &= ~AuLkup_##name; }
5425 +
5426 +struct au_do_lookup_args {
5427 +       unsigned int            flags;
5428 +       mode_t                  type;
5429 +       struct nameidata        *nd;
5430 +};
5431 +
5432 +/*
5433 + * returns positive/negative dentry, NULL or an error.
5434 + * NULL means whiteout-ed or not-found.
5435 + */
5436 +static struct dentry*
5437 +au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
5438 +            aufs_bindex_t bindex, struct qstr *wh_name,
5439 +            struct au_do_lookup_args *args)
5440 +{
5441 +       struct dentry *h_dentry;
5442 +       struct inode *h_inode, *inode;
5443 +       struct qstr *name;
5444 +       struct au_branch *br;
5445 +       int wh_found, opq;
5446 +       unsigned char wh_able;
5447 +       const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
5448 +
5449 +       name = &dentry->d_name;
5450 +       wh_found = 0;
5451 +       br = au_sbr(dentry->d_sb, bindex);
5452 +       wh_able = !!au_br_whable(br->br_perm);
5453 +       if (wh_able)
5454 +               wh_found = au_wh_test(h_parent, wh_name, br, /*try_sio*/0);
5455 +       h_dentry = ERR_PTR(wh_found);
5456 +       if (!wh_found)
5457 +               goto real_lookup;
5458 +       if (unlikely(wh_found < 0))
5459 +               goto out;
5460 +
5461 +       /* We found a whiteout */
5462 +       /* au_set_dbend(dentry, bindex); */
5463 +       au_set_dbwh(dentry, bindex);
5464 +       if (!allow_neg)
5465 +               return NULL; /* success */
5466 +
5467 + real_lookup:
5468 +       h_dentry = au_lkup_one(name, h_parent, br, args->nd);
5469 +       if (IS_ERR(h_dentry))
5470 +               goto out;
5471 +
5472 +       h_inode = h_dentry->d_inode;
5473 +       if (!h_inode) {
5474 +               if (!allow_neg)
5475 +                       goto out_neg;
5476 +       } else if (wh_found
5477 +                  || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
5478 +               goto out_neg;
5479 +
5480 +       if (au_dbend(dentry) <= bindex)
5481 +               au_set_dbend(dentry, bindex);
5482 +       if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
5483 +               au_set_dbstart(dentry, bindex);
5484 +       au_set_h_dptr(dentry, bindex, h_dentry);
5485 +
5486 +       inode = dentry->d_inode;
5487 +       if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able
5488 +           || (inode && !S_ISDIR(inode->i_mode)))
5489 +               goto out; /* success */
5490 +
5491 +       mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
5492 +       opq = au_diropq_test(h_dentry, br);
5493 +       mutex_unlock(&h_inode->i_mutex);
5494 +       if (opq > 0)
5495 +               au_set_dbdiropq(dentry, bindex);
5496 +       else if (unlikely(opq < 0)) {
5497 +               au_set_h_dptr(dentry, bindex, NULL);
5498 +               h_dentry = ERR_PTR(opq);
5499 +       }
5500 +       goto out;
5501 +
5502 + out_neg:
5503 +       dput(h_dentry);
5504 +       h_dentry = NULL;
5505 + out:
5506 +       return h_dentry;
5507 +}
5508 +
5509 +static int au_test_shwh(struct super_block *sb, const struct qstr *name)
5510 +{
5511 +       if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
5512 +                    && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
5513 +               return -EPERM;
5514 +       return 0;
5515 +}
5516 +
5517 +/*
5518 + * returns the number of lower positive dentries,
5519 + * otherwise an error.
5520 + * can be called at unlinking with @type is zero.
5521 + */
5522 +int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
5523 +                  struct nameidata *nd)
5524 +{
5525 +       int npositive, err;
5526 +       aufs_bindex_t bindex, btail, bdiropq;
5527 +       unsigned char isdir;
5528 +       struct qstr whname;
5529 +       struct au_do_lookup_args args = {
5530 +               .flags  = 0,
5531 +               .type   = type,
5532 +               .nd     = nd
5533 +       };
5534 +       const struct qstr *name = &dentry->d_name;
5535 +       struct dentry *parent;
5536 +       struct inode *inode;
5537 +
5538 +       parent = dget_parent(dentry);
5539 +       err = au_test_shwh(dentry->d_sb, name);
5540 +       if (unlikely(err))
5541 +               goto out;
5542 +
5543 +       err = au_wh_name_alloc(&whname, name);
5544 +       if (unlikely(err))
5545 +               goto out;
5546 +
5547 +       inode = dentry->d_inode;
5548 +       isdir = !!(inode && S_ISDIR(inode->i_mode));
5549 +       if (!type)
5550 +               au_fset_lkup(args.flags, ALLOW_NEG);
5551 +
5552 +       npositive = 0;
5553 +       btail = au_dbtaildir(parent);
5554 +       for (bindex = bstart; bindex <= btail; bindex++) {
5555 +               struct dentry *h_parent, *h_dentry;
5556 +               struct inode *h_inode, *h_dir;
5557 +
5558 +               h_dentry = au_h_dptr(dentry, bindex);
5559 +               if (h_dentry) {
5560 +                       if (h_dentry->d_inode)
5561 +                               npositive++;
5562 +                       if (type != S_IFDIR)
5563 +                               break;
5564 +                       continue;
5565 +               }
5566 +               h_parent = au_h_dptr(parent, bindex);
5567 +               if (!h_parent)
5568 +                       continue;
5569 +               h_dir = h_parent->d_inode;
5570 +               if (!h_dir || !S_ISDIR(h_dir->i_mode))
5571 +                       continue;
5572 +
5573 +               mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
5574 +               h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
5575 +                                       &args);
5576 +               mutex_unlock(&h_dir->i_mutex);
5577 +               err = PTR_ERR(h_dentry);
5578 +               if (IS_ERR(h_dentry))
5579 +                       goto out_wh;
5580 +               au_fclr_lkup(args.flags, ALLOW_NEG);
5581 +
5582 +               if (au_dbwh(dentry) >= 0)
5583 +                       break;
5584 +               if (!h_dentry)
5585 +                       continue;
5586 +               h_inode = h_dentry->d_inode;
5587 +               if (!h_inode)
5588 +                       continue;
5589 +               npositive++;
5590 +               if (!args.type)
5591 +                       args.type = h_inode->i_mode & S_IFMT;
5592 +               if (args.type != S_IFDIR)
5593 +                       break;
5594 +               else if (isdir) {
5595 +                       /* the type of lower may be different */
5596 +                       bdiropq = au_dbdiropq(dentry);
5597 +                       if (bdiropq >= 0 && bdiropq <= bindex)
5598 +                               break;
5599 +               }
5600 +       }
5601 +
5602 +       if (npositive) {
5603 +               AuLabel(positive);
5604 +               au_update_dbstart(dentry);
5605 +       }
5606 +       err = npositive;
5607 +       if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
5608 +                    && au_dbstart(dentry) < 0))
5609 +               /* both of real entry and whiteout found */
5610 +               err = -EIO;
5611 +
5612 + out_wh:
5613 +       kfree(whname.name);
5614 + out:
5615 +       dput(parent);
5616 +       return err;
5617 +}
5618 +
5619 +struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
5620 +                              struct au_branch *br)
5621 +{
5622 +       struct dentry *dentry;
5623 +       int wkq_err;
5624 +
5625 +       if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
5626 +               dentry = au_lkup_one(name, parent, br, /*nd*/NULL);
5627 +       else {
5628 +               struct au_lkup_one_args args = {
5629 +                       .errp           = &dentry,
5630 +                       .name           = name,
5631 +                       .h_parent       = parent,
5632 +                       .br             = br,
5633 +                       .nd             = NULL
5634 +               };
5635 +
5636 +               wkq_err = au_wkq_wait(au_call_lkup_one, &args);
5637 +               if (unlikely(wkq_err))
5638 +                       dentry = ERR_PTR(wkq_err);
5639 +       }
5640 +
5641 +       return dentry;
5642 +}
5643 +
5644 +/*
5645 + * lookup @dentry on @bindex which should be negative.
5646 + */
5647 +int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex)
5648 +{
5649 +       int err;
5650 +       struct dentry *parent, *h_parent, *h_dentry;
5651 +       struct qstr *name;
5652 +
5653 +       name = &dentry->d_name;
5654 +       parent = dget_parent(dentry);
5655 +       h_parent = au_h_dptr(parent, bindex);
5656 +       h_dentry = au_sio_lkup_one(name, h_parent,
5657 +                                  au_sbr(dentry->d_sb, bindex));
5658 +       err = PTR_ERR(h_dentry);
5659 +       if (IS_ERR(h_dentry))
5660 +               goto out;
5661 +       if (unlikely(h_dentry->d_inode)) {
5662 +               err = -EIO;
5663 +               AuIOErr("b%d %.*s should be negative.\n",
5664 +                       bindex, AuDLNPair(h_dentry));
5665 +               dput(h_dentry);
5666 +               goto out;
5667 +       }
5668 +
5669 +       if (bindex < au_dbstart(dentry))
5670 +               au_set_dbstart(dentry, bindex);
5671 +       if (au_dbend(dentry) < bindex)
5672 +               au_set_dbend(dentry, bindex);
5673 +       au_set_h_dptr(dentry, bindex, h_dentry);
5674 +       err = 0;
5675 +
5676 + out:
5677 +       dput(parent);
5678 +       return err;
5679 +}
5680 +
5681 +/* ---------------------------------------------------------------------- */
5682 +
5683 +/* subset of struct inode */
5684 +struct au_iattr {
5685 +       unsigned long           i_ino;
5686 +       /* unsigned int         i_nlink; */
5687 +       uid_t                   i_uid;
5688 +       gid_t                   i_gid;
5689 +       u64                     i_version;
5690 +/*
5691 +       loff_t                  i_size;
5692 +       blkcnt_t                i_blocks;
5693 +*/
5694 +       umode_t                 i_mode;
5695 +};
5696 +
5697 +static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
5698 +{
5699 +       ia->i_ino = h_inode->i_ino;
5700 +       /* ia->i_nlink = h_inode->i_nlink; */
5701 +       ia->i_uid = h_inode->i_uid;
5702 +       ia->i_gid = h_inode->i_gid;
5703 +       ia->i_version = h_inode->i_version;
5704 +/*
5705 +       ia->i_size = h_inode->i_size;
5706 +       ia->i_blocks = h_inode->i_blocks;
5707 +*/
5708 +       ia->i_mode = (h_inode->i_mode & S_IFMT);
5709 +}
5710 +
5711 +static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
5712 +{
5713 +       return ia->i_ino != h_inode->i_ino
5714 +               /* || ia->i_nlink != h_inode->i_nlink */
5715 +               || ia->i_uid != h_inode->i_uid
5716 +               || ia->i_gid != h_inode->i_gid
5717 +               || ia->i_version != h_inode->i_version
5718 +/*
5719 +               || ia->i_size != h_inode->i_size
5720 +               || ia->i_blocks != h_inode->i_blocks
5721 +*/
5722 +               || ia->i_mode != (h_inode->i_mode & S_IFMT);
5723 +}
5724 +
5725 +static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
5726 +                             struct au_branch *br)
5727 +{
5728 +       int err;
5729 +       struct au_iattr ia;
5730 +       struct inode *h_inode;
5731 +       struct dentry *h_d;
5732 +       struct super_block *h_sb;
5733 +
5734 +       err = 0;
5735 +       memset(&ia, -1, sizeof(ia));
5736 +       h_sb = h_dentry->d_sb;
5737 +       h_inode = h_dentry->d_inode;
5738 +       if (h_inode)
5739 +               au_iattr_save(&ia, h_inode);
5740 +       else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
5741 +               /* nfs d_revalidate may return 0 for negative dentry */
5742 +               /* fuse d_revalidate always return 0 for negative dentry */
5743 +               goto out;
5744 +
5745 +       /* main purpose is namei.c:cached_lookup() and d_revalidate */
5746 +       h_d = au_lkup_one(&h_dentry->d_name, h_parent, br, /*nd*/NULL);
5747 +       err = PTR_ERR(h_d);
5748 +       if (IS_ERR(h_d))
5749 +               goto out;
5750 +
5751 +       err = 0;
5752 +       if (unlikely(h_d != h_dentry
5753 +                    || h_d->d_inode != h_inode
5754 +                    || (h_inode && au_iattr_test(&ia, h_inode))))
5755 +               err = au_busy_or_stale();
5756 +       dput(h_d);
5757 +
5758 + out:
5759 +       AuTraceErr(err);
5760 +       return err;
5761 +}
5762 +
5763 +int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
5764 +               struct dentry *h_parent, struct au_branch *br)
5765 +{
5766 +       int err;
5767 +
5768 +       err = 0;
5769 +       if (udba == AuOpt_UDBA_REVAL) {
5770 +               IMustLock(h_dir);
5771 +               err = (h_dentry->d_parent->d_inode != h_dir);
5772 +       } else if (udba == AuOpt_UDBA_HINOTIFY)
5773 +               err = au_h_verify_dentry(h_dentry, h_parent, br);
5774 +
5775 +       return err;
5776 +}
5777 +
5778 +/* ---------------------------------------------------------------------- */
5779 +
5780 +static void au_do_refresh_hdentry(struct au_hdentry *p, struct au_dinfo *dinfo,
5781 +                                 struct dentry *parent)
5782 +{
5783 +       struct dentry *h_d, *h_dp;
5784 +       struct au_hdentry tmp, *q;
5785 +       struct super_block *sb;
5786 +       aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
5787 +
5788 +       AuRwMustWriteLock(&dinfo->di_rwsem);
5789 +
5790 +       bend = dinfo->di_bend;
5791 +       bwh = dinfo->di_bwh;
5792 +       bdiropq = dinfo->di_bdiropq;
5793 +       for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
5794 +               h_d = p->hd_dentry;
5795 +               if (!h_d)
5796 +                       continue;
5797 +
5798 +               h_dp = dget_parent(h_d);
5799 +               if (h_dp == au_h_dptr(parent, bindex)) {
5800 +                       dput(h_dp);
5801 +                       continue;
5802 +               }
5803 +
5804 +               new_bindex = au_find_dbindex(parent, h_dp);
5805 +               dput(h_dp);
5806 +               if (dinfo->di_bwh == bindex)
5807 +                       bwh = new_bindex;
5808 +               if (dinfo->di_bdiropq == bindex)
5809 +                       bdiropq = new_bindex;
5810 +               if (new_bindex < 0) {
5811 +                       au_hdput(p);
5812 +                       p->hd_dentry = NULL;
5813 +                       continue;
5814 +               }
5815 +
5816 +               /* swap two lower dentries, and loop again */
5817 +               q = dinfo->di_hdentry + new_bindex;
5818 +               tmp = *q;
5819 +               *q = *p;
5820 +               *p = tmp;
5821 +               if (tmp.hd_dentry) {
5822 +                       bindex--;
5823 +                       p--;
5824 +               }
5825 +       }
5826 +
5827 +       sb = parent->d_sb;
5828 +       dinfo->di_bwh = -1;
5829 +       if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
5830 +               dinfo->di_bwh = bwh;
5831 +
5832 +       dinfo->di_bdiropq = -1;
5833 +       if (bdiropq >= 0
5834 +           && bdiropq <= au_sbend(sb)
5835 +           && au_sbr_whable(sb, bdiropq))
5836 +               dinfo->di_bdiropq = bdiropq;
5837 +
5838 +       bend = au_dbend(parent);
5839 +       p = dinfo->di_hdentry;
5840 +       for (bindex = 0; bindex <= bend; bindex++, p++)
5841 +               if (p->hd_dentry) {
5842 +                       dinfo->di_bstart = bindex;
5843 +                       break;
5844 +               }
5845 +
5846 +       p = dinfo->di_hdentry + bend;
5847 +       for (bindex = bend; bindex >= 0; bindex--, p--)
5848 +               if (p->hd_dentry) {
5849 +                       dinfo->di_bend = bindex;
5850 +                       break;
5851 +               }
5852 +}
5853 +
5854 +/*
5855 + * returns the number of found lower positive dentries,
5856 + * otherwise an error.
5857 + */
5858 +int au_refresh_hdentry(struct dentry *dentry, mode_t type)
5859 +{
5860 +       int npositive, err;
5861 +       unsigned int sigen;
5862 +       aufs_bindex_t bstart;
5863 +       struct au_dinfo *dinfo;
5864 +       struct super_block *sb;
5865 +       struct dentry *parent;
5866 +
5867 +       DiMustWriteLock(dentry);
5868 +
5869 +       sb = dentry->d_sb;
5870 +       AuDebugOn(IS_ROOT(dentry));
5871 +       sigen = au_sigen(sb);
5872 +       parent = dget_parent(dentry);
5873 +       AuDebugOn(au_digen(parent) != sigen
5874 +                 || au_iigen(parent->d_inode) != sigen);
5875 +
5876 +       dinfo = au_di(dentry);
5877 +       err = au_di_realloc(dinfo, au_sbend(sb) + 1);
5878 +       npositive = err;
5879 +       if (unlikely(err))
5880 +               goto out;
5881 +       au_do_refresh_hdentry(dinfo->di_hdentry + dinfo->di_bstart, dinfo,
5882 +                             parent);
5883 +
5884 +       npositive = 0;
5885 +       bstart = au_dbstart(parent);
5886 +       if (type != S_IFDIR && dinfo->di_bstart == bstart)
5887 +               goto out_dgen; /* success */
5888 +
5889 +       npositive = au_lkup_dentry(dentry, bstart, type, /*nd*/NULL);
5890 +       if (npositive < 0)
5891 +               goto out;
5892 +       if (dinfo->di_bwh >= 0 && dinfo->di_bwh <= dinfo->di_bstart)
5893 +               d_drop(dentry);
5894 +
5895 + out_dgen:
5896 +       au_update_digen(dentry);
5897 + out:
5898 +       dput(parent);
5899 +       AuTraceErr(npositive);
5900 +       return npositive;
5901 +}
5902 +
5903 +static noinline_for_stack
5904 +int au_do_h_d_reval(struct dentry *h_dentry, struct nameidata *nd,
5905 +                   struct dentry *dentry, aufs_bindex_t bindex)
5906 +{
5907 +       int err, valid;
5908 +       int (*reval)(struct dentry *, struct nameidata *);
5909 +
5910 +       err = 0;
5911 +       reval = NULL;
5912 +       if (h_dentry->d_op)
5913 +               reval = h_dentry->d_op->d_revalidate;
5914 +       if (!reval)
5915 +               goto out;
5916 +
5917 +       AuDbg("b%d\n", bindex);
5918 +       if (au_test_fs_null_nd(h_dentry->d_sb))
5919 +               /* it may return tri-state */
5920 +               valid = reval(h_dentry, NULL);
5921 +       else {
5922 +               struct nameidata h_nd;
5923 +               int locked;
5924 +               struct dentry *parent;
5925 +
5926 +               au_h_nd(&h_nd, nd);
5927 +               parent = nd->path.dentry;
5928 +               locked = (nd && nd->path.dentry != dentry);
5929 +               if (locked)
5930 +                       di_read_lock_parent(parent, AuLock_IR);
5931 +               BUG_ON(bindex > au_dbend(parent));
5932 +               h_nd.path.dentry = au_h_dptr(parent, bindex);
5933 +               BUG_ON(!h_nd.path.dentry);
5934 +               h_nd.path.mnt = au_sbr(parent->d_sb, bindex)->br_mnt;
5935 +               path_get(&h_nd.path);
5936 +               valid = reval(h_dentry, &h_nd);
5937 +               path_put(&h_nd.path);
5938 +               if (locked)
5939 +                       di_read_unlock(parent, AuLock_IR);
5940 +       }
5941 +
5942 +       if (unlikely(valid < 0))
5943 +               err = valid;
5944 +       else if (!valid)
5945 +               err = -EINVAL;
5946 +
5947 + out:
5948 +       AuTraceErr(err);
5949 +       return err;
5950 +}
5951 +
5952 +/* todo: remove this */
5953 +static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
5954 +                         struct nameidata *nd, int do_udba)
5955 +{
5956 +       int err;
5957 +       umode_t mode, h_mode;
5958 +       aufs_bindex_t bindex, btail, bstart, ibs, ibe;
5959 +       unsigned char plus, unhashed, is_root, h_plus;
5960 +       struct inode *first, *h_inode, *h_cached_inode;
5961 +       struct dentry *h_dentry;
5962 +       struct qstr *name, *h_name;
5963 +
5964 +       err = 0;
5965 +       plus = 0;
5966 +       mode = 0;
5967 +       first = NULL;
5968 +       ibs = -1;
5969 +       ibe = -1;
5970 +       unhashed = !!d_unhashed(dentry);
5971 +       is_root = !!IS_ROOT(dentry);
5972 +       name = &dentry->d_name;
5973 +
5974 +       /*
5975 +        * Theoretically, REVAL test should be unnecessary in case of INOTIFY.
5976 +        * But inotify doesn't fire some necessary events,
5977 +        *      IN_ATTRIB for atime/nlink/pageio
5978 +        *      IN_DELETE for NFS dentry
5979 +        * Let's do REVAL test too.
5980 +        */
5981 +       if (do_udba && inode) {
5982 +               mode = (inode->i_mode & S_IFMT);
5983 +               plus = (inode->i_nlink > 0);
5984 +               first = au_h_iptr(inode, au_ibstart(inode));
5985 +               ibs = au_ibstart(inode);
5986 +               ibe = au_ibend(inode);
5987 +       }
5988 +
5989 +       bstart = au_dbstart(dentry);
5990 +       btail = bstart;
5991 +       if (inode && S_ISDIR(inode->i_mode))
5992 +               btail = au_dbtaildir(dentry);
5993 +       for (bindex = bstart; bindex <= btail; bindex++) {
5994 +               h_dentry = au_h_dptr(dentry, bindex);
5995 +               if (!h_dentry)
5996 +                       continue;
5997 +
5998 +               AuDbg("b%d, %.*s\n", bindex, AuDLNPair(h_dentry));
5999 +               h_name = &h_dentry->d_name;
6000 +               if (unlikely(do_udba
6001 +                            && !is_root
6002 +                            && (unhashed != !!d_unhashed(h_dentry)
6003 +                                || name->len != h_name->len
6004 +                                || memcmp(name->name, h_name->name, name->len))
6005 +                           )) {
6006 +                       AuDbg("unhash 0x%x 0x%x, %.*s %.*s\n",
6007 +                                 unhashed, d_unhashed(h_dentry),
6008 +                                 AuDLNPair(dentry), AuDLNPair(h_dentry));
6009 +                       goto err;
6010 +               }
6011 +
6012 +               err = au_do_h_d_reval(h_dentry, nd, dentry, bindex);
6013 +               if (unlikely(err))
6014 +                       /* do not goto err, to keep the errno */
6015 +                       break;
6016 +
6017 +               /* todo: plink too? */
6018 +               if (!do_udba)
6019 +                       continue;
6020 +
6021 +               /* UDBA tests */
6022 +               h_inode = h_dentry->d_inode;
6023 +               if (unlikely(!!inode != !!h_inode))
6024 +                       goto err;
6025 +
6026 +               h_plus = plus;
6027 +               h_mode = mode;
6028 +               h_cached_inode = h_inode;
6029 +               if (h_inode) {
6030 +                       h_mode = (h_inode->i_mode & S_IFMT);
6031 +                       h_plus = (h_inode->i_nlink > 0);
6032 +               }
6033 +               if (inode && ibs <= bindex && bindex <= ibe)
6034 +                       h_cached_inode = au_h_iptr(inode, bindex);
6035 +
6036 +               if (unlikely(plus != h_plus
6037 +                            || mode != h_mode
6038 +                            || h_cached_inode != h_inode))
6039 +                       goto err;
6040 +               continue;
6041 +
6042 +       err:
6043 +               err = -EINVAL;
6044 +               break;
6045 +       }
6046 +
6047 +       return err;
6048 +}
6049 +
6050 +static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
6051 +{
6052 +       int err;
6053 +       struct dentry *parent;
6054 +       struct inode *inode;
6055 +
6056 +       inode = dentry->d_inode;
6057 +       if (au_digen(dentry) == sigen && au_iigen(inode) == sigen)
6058 +               return 0;
6059 +
6060 +       parent = dget_parent(dentry);
6061 +       di_read_lock_parent(parent, AuLock_IR);
6062 +       AuDebugOn(au_digen(parent) != sigen
6063 +                 || au_iigen(parent->d_inode) != sigen);
6064 +       au_dbg_verify_gen(parent, sigen);
6065 +
6066 +       /* returns a number of positive dentries */
6067 +       err = au_refresh_hdentry(dentry, inode->i_mode & S_IFMT);
6068 +       if (err >= 0)
6069 +               err = au_refresh_hinode(inode, dentry);
6070 +
6071 +       di_read_unlock(parent, AuLock_IR);
6072 +       dput(parent);
6073 +       return err;
6074 +}
6075 +
6076 +int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
6077 +{
6078 +       int err;
6079 +       struct dentry *d, *parent;
6080 +       struct inode *inode;
6081 +
6082 +       if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIRS))
6083 +               return simple_reval_dpath(dentry, sigen);
6084 +
6085 +       /* slow loop, keep it simple and stupid */
6086 +       /* cf: au_cpup_dirs() */
6087 +       err = 0;
6088 +       parent = NULL;
6089 +       while (au_digen(dentry) != sigen
6090 +              || au_iigen(dentry->d_inode) != sigen) {
6091 +               d = dentry;
6092 +               while (1) {
6093 +                       dput(parent);
6094 +                       parent = dget_parent(d);
6095 +                       if (au_digen(parent) == sigen
6096 +                           && au_iigen(parent->d_inode) == sigen)
6097 +                               break;
6098 +                       d = parent;
6099 +               }
6100 +
6101 +               inode = d->d_inode;
6102 +               if (d != dentry)
6103 +                       di_write_lock_child(d);
6104 +
6105 +               /* someone might update our dentry while we were sleeping */
6106 +               if (au_digen(d) != sigen || au_iigen(d->d_inode) != sigen) {
6107 +                       di_read_lock_parent(parent, AuLock_IR);
6108 +                       /* returns a number of positive dentries */
6109 +                       err = au_refresh_hdentry(d, inode->i_mode & S_IFMT);
6110 +                       if (err >= 0)
6111 +                               err = au_refresh_hinode(inode, d);
6112 +                       di_read_unlock(parent, AuLock_IR);
6113 +               }
6114 +
6115 +               if (d != dentry)
6116 +                       di_write_unlock(d);
6117 +               dput(parent);
6118 +               if (unlikely(err))
6119 +                       break;
6120 +       }
6121 +
6122 +       return err;
6123 +}
6124 +
6125 +/*
6126 + * if valid returns 1, otherwise 0.
6127 + */
6128 +static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
6129 +{
6130 +       int valid, err;
6131 +       unsigned int sigen;
6132 +       unsigned char do_udba;
6133 +       struct super_block *sb;
6134 +       struct inode *inode;
6135 +
6136 +       err = -EINVAL;
6137 +       sb = dentry->d_sb;
6138 +       inode = dentry->d_inode;
6139 +       aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW);
6140 +       sigen = au_sigen(sb);
6141 +       if (au_digen(dentry) != sigen) {
6142 +               AuDebugOn(IS_ROOT(dentry));
6143 +               if (inode)
6144 +                       err = au_reval_dpath(dentry, sigen);
6145 +               if (unlikely(err))
6146 +                       goto out_dgrade;
6147 +               AuDebugOn(au_digen(dentry) != sigen);
6148 +       }
6149 +       if (inode && au_iigen(inode) != sigen) {
6150 +               AuDebugOn(IS_ROOT(dentry));
6151 +               err = au_refresh_hinode(inode, dentry);
6152 +               if (unlikely(err))
6153 +                       goto out_dgrade;
6154 +               AuDebugOn(au_iigen(inode) != sigen);
6155 +       }
6156 +       di_downgrade_lock(dentry, AuLock_IR);
6157 +
6158 +       AuDebugOn(au_digen(dentry) != sigen);
6159 +       AuDebugOn(inode && au_iigen(inode) != sigen);
6160 +       err = -EINVAL;
6161 +       do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
6162 +       if (do_udba && inode) {
6163 +               aufs_bindex_t bstart = au_ibstart(inode);
6164 +
6165 +               if (bstart >= 0
6166 +                   && au_test_higen(inode, au_h_iptr(inode, bstart)))
6167 +                       goto out;
6168 +       }
6169 +
6170 +       err = h_d_revalidate(dentry, inode, nd, do_udba);
6171 +       if (unlikely(!err && do_udba && au_dbstart(dentry) < 0))
6172 +               /* both of real entry and whiteout found */
6173 +               err = -EIO;
6174 +       goto out;
6175 +
6176 + out_dgrade:
6177 +       di_downgrade_lock(dentry, AuLock_IR);
6178 + out:
6179 +       aufs_read_unlock(dentry, AuLock_IR);
6180 +       AuTraceErr(err);
6181 +       valid = !err;
6182 +       if (!valid)
6183 +               AuDbg("%.*s invalid\n", AuDLNPair(dentry));
6184 +       return valid;
6185 +}
6186 +
6187 +static void aufs_d_release(struct dentry *dentry)
6188 +{
6189 +       struct au_dinfo *dinfo;
6190 +       aufs_bindex_t bend, bindex;
6191 +
6192 +       dinfo = dentry->d_fsdata;
6193 +       if (!dinfo)
6194 +               return;
6195 +
6196 +       /* dentry may not be revalidated */
6197 +       bindex = dinfo->di_bstart;
6198 +       if (bindex >= 0) {
6199 +               struct au_hdentry *p;
6200 +
6201 +               bend = dinfo->di_bend;
6202 +               p = dinfo->di_hdentry + bindex;
6203 +               while (bindex++ <= bend) {
6204 +                       if (p->hd_dentry)
6205 +                               au_hdput(p);
6206 +                       p++;
6207 +               }
6208 +       }
6209 +       kfree(dinfo->di_hdentry);
6210 +       AuRwDestroy(&dinfo->di_rwsem);
6211 +       au_cache_free_dinfo(dinfo);
6212 +       au_hin_di_reinit(dentry);
6213 +}
6214 +
6215 +struct dentry_operations aufs_dop = {
6216 +       .d_revalidate   = aufs_d_revalidate,
6217 +       .d_release      = aufs_d_release
6218 +};
6219 diff -uprN -x .git linux-2.6.31/fs/aufs/dentry.h aufs2-2.6.git/fs/aufs/dentry.h
6220 --- linux-2.6.31/fs/aufs/dentry.h       1970-01-01 00:00:00.000000000 +0000
6221 +++ aufs2-2.6.git/fs/aufs/dentry.h      2009-09-21 21:49:23.399892755 +0000
6222 @@ -0,0 +1,231 @@
6223 +/*
6224 + * Copyright (C) 2005-2009 Junjiro R. Okajima
6225 + *
6226 + * This program, aufs is free software; you can redistribute it and/or modify
6227 + * it under the terms of the GNU General Public License as published by
6228 + * the Free Software Foundation; either version 2 of the License, or
6229 + * (at your option) any later version.
6230 + *
6231 + * This program is distributed in the hope that it will be useful,
6232 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6233 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
6234 + * GNU General Public License for more details.
6235 + *
6236 + * You should have received a copy of the GNU General Public License
6237 + * along with this program; if not, write to the Free Software
6238 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
6239 + */
6240 +
6241 +/*
6242 + * lookup and dentry operations
6243 + */
6244 +
6245 +#ifndef __AUFS_DENTRY_H__
6246 +#define __AUFS_DENTRY_H__
6247 +
6248 +#ifdef __KERNEL__
6249 +
6250 +#include <linux/dcache.h>
6251 +#include <linux/aufs_type.h>
6252 +#include "rwsem.h"
6253 +
6254 +/* make a single member structure for future use */
6255 +/* todo: remove this structure */
6256 +struct au_hdentry {
6257 +       struct dentry           *hd_dentry;
6258 +};
6259 +
6260 +struct au_dinfo {
6261 +       atomic_t                di_generation;
6262 +
6263 +       struct au_rwsem         di_rwsem;
6264 +       aufs_bindex_t           di_bstart, di_bend, di_bwh, di_bdiropq;
6265 +       struct au_hdentry       *di_hdentry;
6266 +};
6267 +
6268 +/* ---------------------------------------------------------------------- */
6269 +
6270 +/* dentry.c */
6271 +extern struct dentry_operations aufs_dop;
6272 +struct au_branch;
6273 +struct dentry *au_lkup_one(struct qstr *name, struct dentry *h_parent,
6274 +                          struct au_branch *br, struct nameidata *nd);
6275 +struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent,
6276 +                              struct au_branch *br);
6277 +int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
6278 +               struct dentry *h_parent, struct au_branch *br);
6279 +
6280 +int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type,
6281 +                  struct nameidata *nd);
6282 +int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex);
6283 +int au_refresh_hdentry(struct dentry *dentry, mode_t type);
6284 +int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
6285 +
6286 +/* dinfo.c */
6287 +int au_alloc_dinfo(struct dentry *dentry);
6288 +int au_di_realloc(struct au_dinfo *dinfo, int nbr);
6289 +
6290 +void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
6291 +void di_read_unlock(struct dentry *d, int flags);
6292 +void di_downgrade_lock(struct dentry *d, int flags);
6293 +void di_write_lock(struct dentry *d, unsigned int lsc);
6294 +void di_write_unlock(struct dentry *d);
6295 +void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
6296 +void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
6297 +void di_write_unlock2(struct dentry *d1, struct dentry *d2);
6298 +
6299 +struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
6300 +aufs_bindex_t au_dbtail(struct dentry *dentry);
6301 +aufs_bindex_t au_dbtaildir(struct dentry *dentry);
6302 +
6303 +void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
6304 +                  struct dentry *h_dentry);
6305 +void au_update_digen(struct dentry *dentry);
6306 +void au_update_dbrange(struct dentry *dentry, int do_put_zero);
6307 +void au_update_dbstart(struct dentry *dentry);
6308 +void au_update_dbend(struct dentry *dentry);
6309 +int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
6310 +
6311 +/* ---------------------------------------------------------------------- */
6312 +
6313 +static inline struct au_dinfo *au_di(struct dentry *dentry)
6314 +{
6315 +       return dentry->d_fsdata;
6316 +}
6317 +
6318 +/* ---------------------------------------------------------------------- */
6319 +
6320 +/* lock subclass for dinfo */
6321 +enum {
6322 +       AuLsc_DI_CHILD,         /* child first */
6323 +       AuLsc_DI_CHILD2,        /* rename(2), link(2), and cpup at hinotify */
6324 +       AuLsc_DI_CHILD3,        /* copyup dirs */
6325 +       AuLsc_DI_PARENT,
6326 +       AuLsc_DI_PARENT2,
6327 +       AuLsc_DI_PARENT3
6328 +};
6329 +
6330 +/*
6331 + * di_read_lock_child, di_write_lock_child,
6332 + * di_read_lock_child2, di_write_lock_child2,
6333 + * di_read_lock_child3, di_write_lock_child3,
6334 + * di_read_lock_parent, di_write_lock_parent,
6335 + * di_read_lock_parent2, di_write_lock_parent2,
6336 + * di_read_lock_parent3, di_write_lock_parent3,
6337 + */
6338 +#define AuReadLockFunc(name, lsc) \
6339 +static inline void di_read_lock_##name(struct dentry *d, int flags) \
6340 +{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
6341 +
6342 +#define AuWriteLockFunc(name, lsc) \
6343 +static inline void di_write_lock_##name(struct dentry *d) \
6344 +{ di_write_lock(d, AuLsc_DI_##lsc); }
6345 +
6346 +#define AuRWLockFuncs(name, lsc) \
6347 +       AuReadLockFunc(name, lsc) \
6348 +       AuWriteLockFunc(name, lsc)
6349 +
6350 +AuRWLockFuncs(child, CHILD);
6351 +AuRWLockFuncs(child2, CHILD2);
6352 +AuRWLockFuncs(child3, CHILD3);
6353 +AuRWLockFuncs(parent, PARENT);
6354 +AuRWLockFuncs(parent2, PARENT2);
6355 +AuRWLockFuncs(parent3, PARENT3);
6356 +
6357 +#undef AuReadLockFunc
6358 +#undef AuWriteLockFunc
6359 +#undef AuRWLockFuncs
6360 +
6361 +#define DiMustNoWaiters(d)     AuRwMustNoWaiters(&au_di(d)->di_rwsem)
6362 +#define DiMustAnyLock(d)       AuRwMustAnyLock(&au_di(d)->di_rwsem)
6363 +#define DiMustWriteLock(d)     AuRwMustWriteLock(&au_di(d)->di_rwsem)
6364 +
6365 +/* ---------------------------------------------------------------------- */
6366 +
6367 +/* todo: memory barrier? */
6368 +static inline unsigned int au_digen(struct dentry *d)
6369 +{
6370 +       return atomic_read(&au_di(d)->di_generation);
6371 +}
6372 +
6373 +static inline void au_h_dentry_init(struct au_hdentry *hdentry)
6374 +{
6375 +       hdentry->hd_dentry = NULL;
6376 +}
6377 +
6378 +static inline void au_hdput(struct au_hdentry *hd)
6379 +{
6380 +       dput(hd->hd_dentry);
6381 +}
6382 +
6383 +static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
6384 +{
6385 +       DiMustAnyLock(dentry);
6386 +       return au_di(dentry)->di_bstart;
6387 +}
6388 +
6389 +static inline aufs_bindex_t au_dbend(struct dentry *dentry)
6390 +{
6391 +       DiMustAnyLock(dentry);
6392 +       return au_di(dentry)->di_bend;
6393 +}
6394 +
6395 +static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
6396 +{
6397 +       DiMustAnyLock(dentry);
6398 +       return au_di(dentry)->di_bwh;
6399 +}
6400 +
6401 +static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
6402 +{
6403 +       DiMustAnyLock(dentry);
6404 +       return au_di(dentry)->di_bdiropq;
6405 +}
6406 +
6407 +/* todo: hard/soft set? */
6408 +static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
6409 +{
6410 +       DiMustWriteLock(dentry);
6411 +       au_di(dentry)->di_bstart = bindex;
6412 +}
6413 +
6414 +static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
6415 +{
6416 +       DiMustWriteLock(dentry);
6417 +       au_di(dentry)->di_bend = bindex;
6418 +}
6419 +
6420 +static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
6421 +{
6422 +       DiMustWriteLock(dentry);
6423 +       /* dbwh can be outside of bstart - bend range */
6424 +       au_di(dentry)->di_bwh = bindex;
6425 +}
6426 +
6427 +static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
6428 +{
6429 +       DiMustWriteLock(dentry);
6430 +       au_di(dentry)->di_bdiropq = bindex;
6431 +}
6432 +
6433 +/* ---------------------------------------------------------------------- */
6434 +
6435 +#ifdef CONFIG_AUFS_HINOTIFY
6436 +static inline void au_digen_dec(struct dentry *d)
6437 +{
6438 +       atomic_dec_return(&au_di(d)->di_generation);
6439 +}
6440 +
6441 +static inline void au_hin_di_reinit(struct dentry *dentry)
6442 +{
6443 +       dentry->d_fsdata = NULL;
6444 +}
6445 +#else
6446 +static inline void au_hin_di_reinit(struct dentry *dentry __maybe_unused)
6447 +{
6448 +       /* empty */
6449 +}
6450 +#endif /* CONFIG_AUFS_HINOTIFY */
6451 +
6452 +#endif /* __KERNEL__ */
6453 +#endif /* __AUFS_DENTRY_H__ */
6454 diff -uprN -x .git linux-2.6.31/fs/aufs/dinfo.c aufs2-2.6.git/fs/aufs/dinfo.c
6455 --- linux-2.6.31/fs/aufs/dinfo.c        1970-01-01 00:00:00.000000000 +0000
6456 +++ aufs2-2.6.git/fs/aufs/dinfo.c       2009-09-21 21:49:23.399892755 +0000
6457 @@ -0,0 +1,367 @@
6458 +/*
6459 + * Copyright (C) 2005-2009 Junjiro R. Okajima
6460 + *
6461 + * This program, aufs is free software; you can redistribute it and/or modify
6462 + * it under the terms of the GNU General Public License as published by
6463 + * the Free Software Foundation; either version 2 of the License, or
6464 + * (at your option) any later version.
6465 + *
6466 + * This program is distributed in the hope that it will be useful,
6467 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6468 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
6469 + * GNU General Public License for more details.
6470 + *
6471 + * You should have received a copy of the GNU General Public License
6472 + * along with this program; if not, write to the Free Software
6473 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
6474 + */
6475 +
6476 +/*
6477 + * dentry private data
6478 + */
6479 +
6480 +#include "aufs.h"
6481 +
6482 +int au_alloc_dinfo(struct dentry *dentry)
6483 +{
6484 +       struct au_dinfo *dinfo;
6485 +       struct super_block *sb;
6486 +       int nbr;
6487 +
6488 +       dinfo = au_cache_alloc_dinfo();
6489 +       if (unlikely(!dinfo))
6490 +               goto out;
6491 +
6492 +       sb = dentry->d_sb;
6493 +       nbr = au_sbend(sb) + 1;
6494 +       if (nbr <= 0)
6495 +               nbr = 1;
6496 +       dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
6497 +       if (unlikely(!dinfo->di_hdentry))
6498 +               goto out_dinfo;
6499 +
6500 +       atomic_set(&dinfo->di_generation, au_sigen(sb));
6501 +       /* smp_mb(); */ /* atomic_set */
6502 +       au_rw_init_wlock_nested(&dinfo->di_rwsem, AuLsc_DI_CHILD);
6503 +       dinfo->di_bstart = -1;
6504 +       dinfo->di_bend = -1;
6505 +       dinfo->di_bwh = -1;
6506 +       dinfo->di_bdiropq = -1;
6507 +
6508 +       dentry->d_fsdata = dinfo;
6509 +       dentry->d_op = &aufs_dop;
6510 +       return 0; /* success */
6511 +
6512 + out_dinfo:
6513 +       au_cache_free_dinfo(dinfo);
6514 + out:
6515 +       return -ENOMEM;
6516 +}
6517 +
6518 +int au_di_realloc(struct au_dinfo *dinfo, int nbr)
6519 +{
6520 +       int err, sz;
6521 +       struct au_hdentry *hdp;
6522 +
6523 +       AuRwMustWriteLock(&dinfo->di_rwsem);
6524 +
6525 +       err = -ENOMEM;
6526 +       sz = sizeof(*hdp) * (dinfo->di_bend + 1);
6527 +       if (!sz)
6528 +               sz = sizeof(*hdp);
6529 +       hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
6530 +       if (hdp) {
6531 +               dinfo->di_hdentry = hdp;
6532 +               err = 0;
6533 +       }
6534 +
6535 +       return err;
6536 +}
6537 +
6538 +/* ---------------------------------------------------------------------- */
6539 +
6540 +static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
6541 +{
6542 +       switch (lsc) {
6543 +       case AuLsc_DI_CHILD:
6544 +               ii_write_lock_child(inode);
6545 +               break;
6546 +       case AuLsc_DI_CHILD2:
6547 +               ii_write_lock_child2(inode);
6548 +               break;
6549 +       case AuLsc_DI_CHILD3:
6550 +               ii_write_lock_child3(inode);
6551 +               break;
6552 +       case AuLsc_DI_PARENT:
6553 +               ii_write_lock_parent(inode);
6554 +               break;
6555 +       case AuLsc_DI_PARENT2:
6556 +               ii_write_lock_parent2(inode);
6557 +               break;
6558 +       case AuLsc_DI_PARENT3:
6559 +               ii_write_lock_parent3(inode);
6560 +               break;
6561 +       default:
6562 +               BUG();
6563 +       }
6564 +}
6565 +
6566 +static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
6567 +{
6568 +       switch (lsc) {
6569 +       case AuLsc_DI_CHILD:
6570 +               ii_read_lock_child(inode);
6571 +               break;
6572 +       case AuLsc_DI_CHILD2:
6573 +               ii_read_lock_child2(inode);
6574 +               break;
6575 +       case AuLsc_DI_CHILD3:
6576 +               ii_read_lock_child3(inode);
6577 +               break;
6578 +       case AuLsc_DI_PARENT:
6579 +               ii_read_lock_parent(inode);
6580 +               break;
6581 +       case AuLsc_DI_PARENT2:
6582 +               ii_read_lock_parent2(inode);
6583 +               break;
6584 +       case AuLsc_DI_PARENT3:
6585 +               ii_read_lock_parent3(inode);
6586 +               break;
6587 +       default:
6588 +               BUG();
6589 +       }
6590 +}
6591 +
6592 +void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
6593 +{
6594 +       au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
6595 +       if (d->d_inode) {
6596 +               if (au_ftest_lock(flags, IW))
6597 +                       do_ii_write_lock(d->d_inode, lsc);
6598 +               else if (au_ftest_lock(flags, IR))
6599 +                       do_ii_read_lock(d->d_inode, lsc);
6600 +       }
6601 +}
6602 +
6603 +void di_read_unlock(struct dentry *d, int flags)
6604 +{
6605 +       if (d->d_inode) {
6606 +               if (au_ftest_lock(flags, IW))
6607 +                       ii_write_unlock(d->d_inode);
6608 +               else if (au_ftest_lock(flags, IR))
6609 +                       ii_read_unlock(d->d_inode);
6610 +       }
6611 +       au_rw_read_unlock(&au_di(d)->di_rwsem);
6612 +}
6613 +
6614 +void di_downgrade_lock(struct dentry *d, int flags)
6615 +{
6616 +       if (d->d_inode && au_ftest_lock(flags, IR))
6617 +               ii_downgrade_lock(d->d_inode);
6618 +       au_rw_dgrade_lock(&au_di(d)->di_rwsem);
6619 +}
6620 +
6621 +void di_write_lock(struct dentry *d, unsigned int lsc)
6622 +{
6623 +       au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
6624 +       if (d->d_inode)
6625 +               do_ii_write_lock(d->d_inode, lsc);
6626 +}
6627 +
6628 +void di_write_unlock(struct dentry *d)
6629 +{
6630 +       if (d->d_inode)
6631 +               ii_write_unlock(d->d_inode);
6632 +       au_rw_write_unlock(&au_di(d)->di_rwsem);
6633 +}
6634 +
6635 +void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
6636 +{
6637 +       AuDebugOn(d1 == d2
6638 +                 || d1->d_inode == d2->d_inode
6639 +                 || d1->d_sb != d2->d_sb);
6640 +
6641 +       if (isdir && au_test_subdir(d1, d2)) {
6642 +               di_write_lock_child(d1);
6643 +               di_write_lock_child2(d2);
6644 +       } else {
6645 +               /* there should be no races */
6646 +               di_write_lock_child(d2);
6647 +               di_write_lock_child2(d1);
6648 +       }
6649 +}
6650 +
6651 +void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
6652 +{
6653 +       AuDebugOn(d1 == d2
6654 +                 || d1->d_inode == d2->d_inode
6655 +                 || d1->d_sb != d2->d_sb);
6656 +
6657 +       if (isdir && au_test_subdir(d1, d2)) {
6658 +               di_write_lock_parent(d1);
6659 +               di_write_lock_parent2(d2);
6660 +       } else {
6661 +               /* there should be no races */
6662 +               di_write_lock_parent(d2);
6663 +               di_write_lock_parent2(d1);
6664 +       }
6665 +}
6666 +
6667 +void di_write_unlock2(struct dentry *d1, struct dentry *d2)
6668 +{
6669 +       di_write_unlock(d1);
6670 +       if (d1->d_inode == d2->d_inode)
6671 +               au_rw_write_unlock(&au_di(d2)->di_rwsem);
6672 +       else
6673 +               di_write_unlock(d2);
6674 +}
6675 +
6676 +/* ---------------------------------------------------------------------- */
6677 +
6678 +struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
6679 +{
6680 +       struct dentry *d;
6681 +
6682 +       DiMustAnyLock(dentry);
6683 +
6684 +       if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
6685 +               return NULL;
6686 +       AuDebugOn(bindex < 0);
6687 +       d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
6688 +       AuDebugOn(d && (atomic_read(&d->d_count) <= 0));
6689 +       return d;
6690 +}
6691 +
6692 +aufs_bindex_t au_dbtail(struct dentry *dentry)
6693 +{
6694 +       aufs_bindex_t bend, bwh;
6695 +
6696 +       bend = au_dbend(dentry);
6697 +       if (0 <= bend) {
6698 +               bwh = au_dbwh(dentry);
6699 +               if (!bwh)
6700 +                       return bwh;
6701 +               if (0 < bwh && bwh < bend)
6702 +                       return bwh - 1;
6703 +       }
6704 +       return bend;
6705 +}
6706 +
6707 +aufs_bindex_t au_dbtaildir(struct dentry *dentry)
6708 +{
6709 +       aufs_bindex_t bend, bopq;
6710 +
6711 +       bend = au_dbtail(dentry);
6712 +       if (0 <= bend) {
6713 +               bopq = au_dbdiropq(dentry);
6714 +               if (0 <= bopq && bopq < bend)
6715 +                       bend = bopq;
6716 +       }
6717 +       return bend;
6718 +}
6719 +
6720 +/* ---------------------------------------------------------------------- */
6721 +
6722 +void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
6723 +                  struct dentry *h_dentry)
6724 +{
6725 +       struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
6726 +
6727 +       DiMustWriteLock(dentry);
6728 +
6729 +       if (hd->hd_dentry)
6730 +               au_hdput(hd);
6731 +       hd->hd_dentry = h_dentry;
6732 +}
6733 +
6734 +void au_update_digen(struct dentry *dentry)
6735 +{
6736 +       atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
6737 +       /* smp_mb(); */ /* atomic_set */
6738 +}
6739 +
6740 +void au_update_dbrange(struct dentry *dentry, int do_put_zero)
6741 +{
6742 +       struct au_dinfo *dinfo;
6743 +       struct dentry *h_d;
6744 +
6745 +       DiMustWriteLock(dentry);
6746 +
6747 +       dinfo = au_di(dentry);
6748 +       if (!dinfo || dinfo->di_bstart < 0)
6749 +               return;
6750 +
6751 +       if (do_put_zero) {
6752 +               aufs_bindex_t bindex, bend;
6753 +
6754 +               bend = dinfo->di_bend;
6755 +               for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
6756 +                       h_d = dinfo->di_hdentry[0 + bindex].hd_dentry;
6757 +                       if (h_d && !h_d->d_inode)
6758 +                               au_set_h_dptr(dentry, bindex, NULL);
6759 +               }
6760 +       }
6761 +
6762 +       dinfo->di_bstart = -1;
6763 +       while (++dinfo->di_bstart <= dinfo->di_bend)
6764 +               if (dinfo->di_hdentry[0 + dinfo->di_bstart].hd_dentry)
6765 +                       break;
6766 +       if (dinfo->di_bstart > dinfo->di_bend) {
6767 +               dinfo->di_bstart = -1;
6768 +               dinfo->di_bend = -1;
6769 +               return;
6770 +       }
6771 +
6772 +       dinfo->di_bend++;
6773 +       while (0 <= --dinfo->di_bend)
6774 +               if (dinfo->di_hdentry[0 + dinfo->di_bend].hd_dentry)
6775 +                       break;
6776 +       AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
6777 +}
6778 +
6779 +void au_update_dbstart(struct dentry *dentry)
6780 +{
6781 +       aufs_bindex_t bindex, bend;
6782 +       struct dentry *h_dentry;
6783 +
6784 +       bend = au_dbend(dentry);
6785 +       for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
6786 +               h_dentry = au_h_dptr(dentry, bindex);
6787 +               if (!h_dentry)
6788 +                       continue;
6789 +               if (h_dentry->d_inode) {
6790 +                       au_set_dbstart(dentry, bindex);
6791 +                       return;
6792 +               }
6793 +               au_set_h_dptr(dentry, bindex, NULL);
6794 +       }
6795 +}
6796 +
6797 +void au_update_dbend(struct dentry *dentry)
6798 +{
6799 +       aufs_bindex_t bindex, bstart;
6800 +       struct dentry *h_dentry;
6801 +
6802 +       bstart = au_dbstart(dentry);
6803 +       for (bindex = au_dbend(dentry); bindex <= bstart; bindex--) {
6804 +               h_dentry = au_h_dptr(dentry, bindex);
6805 +               if (!h_dentry)
6806 +                       continue;
6807 +               if (h_dentry->d_inode) {
6808 +                       au_set_dbend(dentry, bindex);
6809 +                       return;
6810 +               }
6811 +               au_set_h_dptr(dentry, bindex, NULL);
6812 +       }
6813 +}
6814 +
6815 +int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
6816 +{
6817 +       aufs_bindex_t bindex, bend;
6818 +
6819 +       bend = au_dbend(dentry);
6820 +       for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
6821 +               if (au_h_dptr(dentry, bindex) == h_dentry)
6822 +                       return bindex;
6823 +       return -1;
6824 +}
6825 diff -uprN -x .git linux-2.6.31/fs/aufs/dir.c aufs2-2.6.git/fs/aufs/dir.c
6826 --- linux-2.6.31/fs/aufs/dir.c  1970-01-01 00:00:00.000000000 +0000
6827 +++ aufs2-2.6.git/fs/aufs/dir.c 2009-09-21 21:49:23.399892755 +0000
6828 @@ -0,0 +1,593 @@
6829 +/*
6830 + * Copyright (C) 2005-2009 Junjiro R. Okajima
6831 + *
6832 + * This program, aufs is free software; you can redistribute it and/or modify
6833 + * it under the terms of the GNU General Public License as published by
6834 + * the Free Software Foundation; either version 2 of the License, or
6835 + * (at your option) any later version.
6836 + *
6837 + * This program is distributed in the hope that it will be useful,
6838 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6839 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
6840 + * GNU General Public License for more details.
6841 + *
6842 + * You should have received a copy of the GNU General Public License
6843 + * along with this program; if not, write to the Free Software
6844 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
6845 + */
6846 +
6847 +/*
6848 + * directory operations
6849 + */
6850 +
6851 +#include <linux/file.h>
6852 +#include <linux/fs_stack.h>
6853 +#include "aufs.h"
6854 +
6855 +void au_add_nlink(struct inode *dir, struct inode *h_dir)
6856 +{
6857 +       AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
6858 +
6859 +       dir->i_nlink += h_dir->i_nlink - 2;
6860 +       if (h_dir->i_nlink < 2)
6861 +               dir->i_nlink += 2;
6862 +}
6863 +
6864 +void au_sub_nlink(struct inode *dir, struct inode *h_dir)
6865 +{
6866 +       AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
6867 +
6868 +       dir->i_nlink -= h_dir->i_nlink - 2;
6869 +       if (h_dir->i_nlink < 2)
6870 +               dir->i_nlink -= 2;
6871 +}
6872 +
6873 +loff_t au_dir_size(struct file *file, struct dentry *dentry)
6874 +{
6875 +       loff_t sz;
6876 +       aufs_bindex_t bindex, bend;
6877 +       struct file *h_file;
6878 +       struct dentry *h_dentry;
6879 +
6880 +       sz = 0;
6881 +       if (file) {
6882 +               AuDebugOn(!file->f_dentry);
6883 +               AuDebugOn(!file->f_dentry->d_inode);
6884 +               AuDebugOn(!S_ISDIR(file->f_dentry->d_inode->i_mode));
6885 +
6886 +               bend = au_fbend(file);
6887 +               for (bindex = au_fbstart(file);
6888 +                    bindex <= bend && sz < KMALLOC_MAX_SIZE;
6889 +                    bindex++) {
6890 +                       h_file = au_h_fptr(file, bindex);
6891 +                       if (h_file
6892 +                           && h_file->f_dentry
6893 +                           && h_file->f_dentry->d_inode)
6894 +                               sz += i_size_read(h_file->f_dentry->d_inode);
6895 +               }
6896 +       } else {
6897 +               AuDebugOn(!dentry);
6898 +               AuDebugOn(!dentry->d_inode);
6899 +               AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode));
6900 +
6901 +               bend = au_dbtaildir(dentry);
6902 +               for (bindex = au_dbstart(dentry);
6903 +                    bindex <= bend && sz < KMALLOC_MAX_SIZE;
6904 +                    bindex++) {
6905 +                       h_dentry = au_h_dptr(dentry, bindex);
6906 +                       if (h_dentry && h_dentry->d_inode)
6907 +                               sz += i_size_read(h_dentry->d_inode);
6908 +               }
6909 +       }
6910 +       if (sz < KMALLOC_MAX_SIZE)
6911 +               sz = roundup_pow_of_two(sz);
6912 +       if (sz > KMALLOC_MAX_SIZE)
6913 +               sz = KMALLOC_MAX_SIZE;
6914 +       else if (sz < NAME_MAX) {
6915 +               BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
6916 +               sz = AUFS_RDBLK_DEF;
6917 +       }
6918 +       return sz;
6919 +}
6920 +
6921 +/* ---------------------------------------------------------------------- */
6922 +
6923 +static int reopen_dir(struct file *file)
6924 +{
6925 +       int err;
6926 +       unsigned int flags;
6927 +       aufs_bindex_t bindex, btail, bstart;
6928 +       struct dentry *dentry, *h_dentry;
6929 +       struct file *h_file;
6930 +
6931 +       /* open all lower dirs */
6932 +       dentry = file->f_dentry;
6933 +       bstart = au_dbstart(dentry);
6934 +       for (bindex = au_fbstart(file); bindex < bstart; bindex++)
6935 +               au_set_h_fptr(file, bindex, NULL);
6936 +       au_set_fbstart(file, bstart);
6937 +
6938 +       btail = au_dbtaildir(dentry);
6939 +       for (bindex = au_fbend(file); btail < bindex; bindex--)
6940 +               au_set_h_fptr(file, bindex, NULL);
6941 +       au_set_fbend(file, btail);
6942 +
6943 +       spin_lock(&file->f_lock);
6944 +       flags = file->f_flags;
6945 +       spin_unlock(&file->f_lock);
6946 +       for (bindex = bstart; bindex <= btail; bindex++) {
6947 +               h_dentry = au_h_dptr(dentry, bindex);
6948 +               if (!h_dentry)
6949 +                       continue;
6950 +               h_file = au_h_fptr(file, bindex);
6951 +               if (h_file)
6952 +                       continue;
6953 +
6954 +               h_file = au_h_open(dentry, bindex, flags, file);
6955 +               err = PTR_ERR(h_file);
6956 +               if (IS_ERR(h_file))
6957 +                       goto out; /* close all? */
6958 +               au_set_h_fptr(file, bindex, h_file);
6959 +       }
6960 +       au_update_figen(file);
6961 +       /* todo: necessary? */
6962 +       /* file->f_ra = h_file->f_ra; */
6963 +       err = 0;
6964 +
6965 + out:
6966 +       return err;
6967 +}
6968 +
6969 +static int do_open_dir(struct file *file, int flags)
6970 +{
6971 +       int err;
6972 +       aufs_bindex_t bindex, btail;
6973 +       struct dentry *dentry, *h_dentry;
6974 +       struct file *h_file;
6975 +
6976 +       FiMustWriteLock(file);
6977 +
6978 +       err = 0;
6979 +       dentry = file->f_dentry;
6980 +       au_set_fvdir_cache(file, NULL);
6981 +       au_fi(file)->fi_maintain_plink = 0;
6982 +       file->f_version = dentry->d_inode->i_version;
6983 +       bindex = au_dbstart(dentry);
6984 +       au_set_fbstart(file, bindex);
6985 +       btail = au_dbtaildir(dentry);
6986 +       au_set_fbend(file, btail);
6987 +       for (; !err && bindex <= btail; bindex++) {
6988 +               h_dentry = au_h_dptr(dentry, bindex);
6989 +               if (!h_dentry)
6990 +                       continue;
6991 +
6992 +               h_file = au_h_open(dentry, bindex, flags, file);
6993 +               if (IS_ERR(h_file)) {
6994 +                       err = PTR_ERR(h_file);
6995 +                       break;
6996 +               }
6997 +               au_set_h_fptr(file, bindex, h_file);
6998 +       }
6999 +       au_update_figen(file);
7000 +       /* todo: necessary? */
7001 +       /* file->f_ra = h_file->f_ra; */
7002 +       if (!err)
7003 +               return 0; /* success */
7004 +
7005 +       /* close all */
7006 +       for (bindex = au_fbstart(file); bindex <= btail; bindex++)
7007 +               au_set_h_fptr(file, bindex, NULL);
7008 +       au_set_fbstart(file, -1);
7009 +       au_set_fbend(file, -1);
7010 +       return err;
7011 +}
7012 +
7013 +static int aufs_open_dir(struct inode *inode __maybe_unused,
7014 +                        struct file *file)
7015 +{
7016 +       return au_do_open(file, do_open_dir);
7017 +}
7018 +
7019 +static int aufs_release_dir(struct inode *inode __maybe_unused,
7020 +                           struct file *file)
7021 +{
7022 +       struct au_vdir *vdir_cache;
7023 +       struct super_block *sb;
7024 +       struct au_sbinfo *sbinfo;
7025 +
7026 +       sb = file->f_dentry->d_sb;
7027 +       si_noflush_read_lock(sb);
7028 +       fi_write_lock(file);
7029 +       vdir_cache = au_fvdir_cache(file);
7030 +       if (vdir_cache)
7031 +               au_vdir_free(vdir_cache);
7032 +       if (au_fi(file)->fi_maintain_plink) {
7033 +               sbinfo = au_sbi(sb);
7034 +               /* clear the flag without write-lock */
7035 +               sbinfo->au_si_status &= ~AuSi_MAINTAIN_PLINK;
7036 +               smp_mb();
7037 +               wake_up_all(&sbinfo->si_plink_wq);
7038 +       }
7039 +       fi_write_unlock(file);
7040 +       au_finfo_fin(file);
7041 +       si_read_unlock(sb);
7042 +       return 0;
7043 +}
7044 +
7045 +/* ---------------------------------------------------------------------- */
7046 +
7047 +static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
7048 +{
7049 +       int err;
7050 +       aufs_bindex_t bend, bindex;
7051 +       struct inode *inode;
7052 +       struct super_block *sb;
7053 +
7054 +       err = 0;
7055 +       sb = dentry->d_sb;
7056 +       inode = dentry->d_inode;
7057 +       IMustLock(inode);
7058 +       bend = au_dbend(dentry);
7059 +       for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
7060 +               struct path h_path;
7061 +               struct inode *h_inode;
7062 +
7063 +               if (au_test_ro(sb, bindex, inode))
7064 +                       continue;
7065 +               h_path.dentry = au_h_dptr(dentry, bindex);
7066 +               if (!h_path.dentry)
7067 +                       continue;
7068 +               h_inode = h_path.dentry->d_inode;
7069 +               if (!h_inode)
7070 +                       continue;
7071 +
7072 +               /* no mnt_want_write() */
7073 +               /* cf. fs/nsfd/vfs.c and fs/nfsd/nfs4recover.c */
7074 +               /* todo: inotiry fired? */
7075 +               h_path.mnt = au_sbr_mnt(sb, bindex);
7076 +               mutex_lock(&h_inode->i_mutex);
7077 +               err = filemap_fdatawrite(h_inode->i_mapping);
7078 +               AuDebugOn(!h_inode->i_fop);
7079 +               if (!err && h_inode->i_fop->fsync)
7080 +                       err = h_inode->i_fop->fsync(NULL, h_path.dentry,
7081 +                                                   datasync);
7082 +               if (!err)
7083 +                       err = filemap_fdatawrite(h_inode->i_mapping);
7084 +               if (!err)
7085 +                       vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
7086 +               mutex_unlock(&h_inode->i_mutex);
7087 +       }
7088 +
7089 +       return err;
7090 +}
7091 +
7092 +static int au_do_fsync_dir(struct file *file, int datasync)
7093 +{
7094 +       int err;
7095 +       aufs_bindex_t bend, bindex;
7096 +       struct file *h_file;
7097 +       struct super_block *sb;
7098 +       struct inode *inode;
7099 +       struct mutex *h_mtx;
7100 +
7101 +       err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
7102 +       if (unlikely(err))
7103 +               goto out;
7104 +
7105 +       sb = file->f_dentry->d_sb;
7106 +       inode = file->f_dentry->d_inode;
7107 +       bend = au_fbend(file);
7108 +       for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
7109 +               h_file = au_h_fptr(file, bindex);
7110 +               if (!h_file || au_test_ro(sb, bindex, inode))
7111 +                       continue;
7112 +
7113 +               err = vfs_fsync(h_file, h_file->f_dentry, datasync);
7114 +               if (!err) {
7115 +                       h_mtx = &h_file->f_dentry->d_inode->i_mutex;
7116 +                       mutex_lock(h_mtx);
7117 +                       vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
7118 +                       /*ignore*/
7119 +                       mutex_unlock(h_mtx);
7120 +               }
7121 +       }
7122 +
7123 + out:
7124 +       return err;
7125 +}
7126 +
7127 +/*
7128 + * @file may be NULL
7129 + */
7130 +static int aufs_fsync_dir(struct file *file, struct dentry *dentry,
7131 +                         int datasync)
7132 +{
7133 +       int err;
7134 +       struct super_block *sb;
7135 +
7136 +       IMustLock(dentry->d_inode);
7137 +
7138 +       err = 0;
7139 +       sb = dentry->d_sb;
7140 +       si_noflush_read_lock(sb);
7141 +       if (file)
7142 +               err = au_do_fsync_dir(file, datasync);
7143 +       else {
7144 +               di_write_lock_child(dentry);
7145 +               err = au_do_fsync_dir_no_file(dentry, datasync);
7146 +       }
7147 +       au_cpup_attr_timesizes(dentry->d_inode);
7148 +       di_write_unlock(dentry);
7149 +       if (file)
7150 +               fi_write_unlock(file);
7151 +
7152 +       si_read_unlock(sb);
7153 +       return err;
7154 +}
7155 +
7156 +/* ---------------------------------------------------------------------- */
7157 +
7158 +static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir)
7159 +{
7160 +       int err;
7161 +       struct dentry *dentry;
7162 +       struct inode *inode;
7163 +       struct super_block *sb;
7164 +
7165 +       dentry = file->f_dentry;
7166 +       inode = dentry->d_inode;
7167 +       IMustLock(inode);
7168 +
7169 +       sb = dentry->d_sb;
7170 +       si_read_lock(sb, AuLock_FLUSH);
7171 +       err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
7172 +       if (unlikely(err))
7173 +               goto out;
7174 +       err = au_vdir_init(file);
7175 +       di_downgrade_lock(dentry, AuLock_IR);
7176 +       if (unlikely(err))
7177 +               goto out_unlock;
7178 +
7179 +       if (!au_test_nfsd(current)) {
7180 +               err = au_vdir_fill_de(file, dirent, filldir);
7181 +               fsstack_copy_attr_atime(inode,
7182 +                                       au_h_iptr(inode, au_ibstart(inode)));
7183 +       } else {
7184 +               /*
7185 +                * nfsd filldir may call lookup_one_len(), vfs_getattr(),
7186 +                * encode_fh() and others.
7187 +                */
7188 +               struct inode *h_inode = au_h_iptr(inode, au_ibstart(inode));
7189 +
7190 +               di_read_unlock(dentry, AuLock_IR);
7191 +               si_read_unlock(sb);
7192 +               lockdep_off();
7193 +               err = au_vdir_fill_de(file, dirent, filldir);
7194 +               lockdep_on();
7195 +               fsstack_copy_attr_atime(inode, h_inode);
7196 +               fi_write_unlock(file);
7197 +
7198 +               AuTraceErr(err);
7199 +               return err;
7200 +       }
7201 +
7202 + out_unlock:
7203 +       di_read_unlock(dentry, AuLock_IR);
7204 +       fi_write_unlock(file);
7205 + out:
7206 +       si_read_unlock(sb);
7207 +       return err;
7208 +}
7209 +
7210 +/* ---------------------------------------------------------------------- */
7211 +
7212 +#define AuTestEmpty_WHONLY     1
7213 +#define AuTestEmpty_CALLED     (1 << 1)
7214 +#define AuTestEmpty_SHWH       (1 << 2)
7215 +#define au_ftest_testempty(flags, name)        ((flags) & AuTestEmpty_##name)
7216 +#define au_fset_testempty(flags, name) { (flags) |= AuTestEmpty_##name; }
7217 +#define au_fclr_testempty(flags, name) { (flags) &= ~AuTestEmpty_##name; }
7218 +
7219 +#ifndef CONFIG_AUFS_SHWH
7220 +#undef AuTestEmpty_SHWH
7221 +#define AuTestEmpty_SHWH       0
7222 +#endif
7223 +
7224 +struct test_empty_arg {
7225 +       struct au_nhash *whlist;
7226 +       unsigned int flags;
7227 +       int err;
7228 +       aufs_bindex_t bindex;
7229 +};
7230 +
7231 +static int test_empty_cb(void *__arg, const char *__name, int namelen,
7232 +                        loff_t offset __maybe_unused, u64 ino,
7233 +                        unsigned int d_type)
7234 +{
7235 +       struct test_empty_arg *arg = __arg;
7236 +       char *name = (void *)__name;
7237 +
7238 +       arg->err = 0;
7239 +       au_fset_testempty(arg->flags, CALLED);
7240 +       /* smp_mb(); */
7241 +       if (name[0] == '.'
7242 +           && (namelen == 1 || (name[1] == '.' && namelen == 2)))
7243 +               goto out; /* success */
7244 +
7245 +       if (namelen <= AUFS_WH_PFX_LEN
7246 +           || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
7247 +               if (au_ftest_testempty(arg->flags, WHONLY)
7248 +                   && !au_nhash_test_known_wh(arg->whlist, name, namelen))
7249 +                       arg->err = -ENOTEMPTY;
7250 +               goto out;
7251 +       }
7252 +
7253 +       name += AUFS_WH_PFX_LEN;
7254 +       namelen -= AUFS_WH_PFX_LEN;
7255 +       if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
7256 +               arg->err = au_nhash_append_wh
7257 +                       (arg->whlist, name, namelen, ino, d_type, arg->bindex,
7258 +                        au_ftest_testempty(arg->flags, SHWH));
7259 +
7260 + out:
7261 +       /* smp_mb(); */
7262 +       AuTraceErr(arg->err);
7263 +       return arg->err;
7264 +}
7265 +
7266 +static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
7267 +{
7268 +       int err;
7269 +       struct file *h_file;
7270 +
7271 +       h_file = au_h_open(dentry, arg->bindex,
7272 +                          O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
7273 +                          /*file*/NULL);
7274 +       err = PTR_ERR(h_file);
7275 +       if (IS_ERR(h_file))
7276 +               goto out;
7277 +
7278 +       err = 0;
7279 +       if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
7280 +           && !h_file->f_dentry->d_inode->i_nlink)
7281 +               goto out_put;
7282 +
7283 +       do {
7284 +               arg->err = 0;
7285 +               au_fclr_testempty(arg->flags, CALLED);
7286 +               /* smp_mb(); */
7287 +               err = vfsub_readdir(h_file, test_empty_cb, arg);
7288 +               if (err >= 0)
7289 +                       err = arg->err;
7290 +       } while (!err && au_ftest_testempty(arg->flags, CALLED));
7291 +
7292 + out_put:
7293 +       fput(h_file);
7294 +       au_sbr_put(dentry->d_sb, arg->bindex);
7295 + out:
7296 +       return err;
7297 +}
7298 +
7299 +struct do_test_empty_args {
7300 +       int *errp;
7301 +       struct dentry *dentry;
7302 +       struct test_empty_arg *arg;
7303 +};
7304 +
7305 +static void call_do_test_empty(void *args)
7306 +{
7307 +       struct do_test_empty_args *a = args;
7308 +       *a->errp = do_test_empty(a->dentry, a->arg);
7309 +}
7310 +
7311 +static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
7312 +{
7313 +       int err, wkq_err;
7314 +       struct dentry *h_dentry;
7315 +       struct inode *h_inode;
7316 +
7317 +       h_dentry = au_h_dptr(dentry, arg->bindex);
7318 +       h_inode = h_dentry->d_inode;
7319 +       mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
7320 +       err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
7321 +       mutex_unlock(&h_inode->i_mutex);
7322 +       if (!err)
7323 +               err = do_test_empty(dentry, arg);
7324 +       else {
7325 +               struct do_test_empty_args args = {
7326 +                       .errp   = &err,
7327 +                       .dentry = dentry,
7328 +                       .arg    = arg
7329 +               };
7330 +               unsigned int flags = arg->flags;
7331 +
7332 +               wkq_err = au_wkq_wait(call_do_test_empty, &args);
7333 +               if (unlikely(wkq_err))
7334 +                       err = wkq_err;
7335 +               arg->flags = flags;
7336 +       }
7337 +
7338 +       return err;
7339 +}
7340 +
7341 +int au_test_empty_lower(struct dentry *dentry)
7342 +{
7343 +       int err;
7344 +       unsigned int rdhash;
7345 +       aufs_bindex_t bindex, bstart, btail;
7346 +       struct au_nhash whlist;
7347 +       struct test_empty_arg arg;
7348 +
7349 +       SiMustAnyLock(dentry->d_sb);
7350 +
7351 +       rdhash = au_sbi(dentry->d_sb)->si_rdhash;
7352 +       if (!rdhash)
7353 +               rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
7354 +       err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
7355 +       if (unlikely(err))
7356 +               goto out;
7357 +
7358 +       arg.flags = 0;
7359 +       arg.whlist = &whlist;
7360 +       bstart = au_dbstart(dentry);
7361 +       if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
7362 +               au_fset_testempty(arg.flags, SHWH);
7363 +       arg.bindex = bstart;
7364 +       err = do_test_empty(dentry, &arg);
7365 +       if (unlikely(err))
7366 +               goto out_whlist;
7367 +
7368 +       au_fset_testempty(arg.flags, WHONLY);
7369 +       btail = au_dbtaildir(dentry);
7370 +       for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
7371 +               struct dentry *h_dentry;
7372 +
7373 +               h_dentry = au_h_dptr(dentry, bindex);
7374 +               if (h_dentry && h_dentry->d_inode) {
7375 +                       arg.bindex = bindex;
7376 +                       err = do_test_empty(dentry, &arg);
7377 +               }
7378 +       }
7379 +
7380 + out_whlist:
7381 +       au_nhash_wh_free(&whlist);
7382 + out:
7383 +       return err;
7384 +}
7385 +
7386 +int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
7387 +{
7388 +       int err;
7389 +       struct test_empty_arg arg;
7390 +       aufs_bindex_t bindex, btail;
7391 +
7392 +       err = 0;
7393 +       arg.whlist = whlist;
7394 +       arg.flags = AuTestEmpty_WHONLY;
7395 +       if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
7396 +               au_fset_testempty(arg.flags, SHWH);
7397 +       btail = au_dbtaildir(dentry);
7398 +       for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
7399 +               struct dentry *h_dentry;
7400 +
7401 +               h_dentry = au_h_dptr(dentry, bindex);
7402 +               if (h_dentry && h_dentry->d_inode) {
7403 +                       arg.bindex = bindex;
7404 +                       err = sio_test_empty(dentry, &arg);
7405 +               }
7406 +       }
7407 +
7408 +       return err;
7409 +}
7410 +
7411 +/* ---------------------------------------------------------------------- */
7412 +
7413 +const struct file_operations aufs_dir_fop = {
7414 +       .read           = generic_read_dir,
7415 +       .readdir        = aufs_readdir,
7416 +       .unlocked_ioctl = aufs_ioctl_dir,
7417 +       .open           = aufs_open_dir,
7418 +       .release        = aufs_release_dir,
7419 +       .flush          = aufs_flush,
7420 +       .fsync          = aufs_fsync_dir
7421 +};
7422 diff -uprN -x .git linux-2.6.31/fs/aufs/dir.h aufs2-2.6.git/fs/aufs/dir.h
7423 --- linux-2.6.31/fs/aufs/dir.h  1970-01-01 00:00:00.000000000 +0000
7424 +++ aufs2-2.6.git/fs/aufs/dir.h 2009-09-21 21:49:23.399892755 +0000
7425 @@ -0,0 +1,127 @@
7426 +/*
7427 + * Copyright (C) 2005-2009 Junjiro R. Okajima
7428 + *
7429 + * This program, aufs is free software; you can redistribute it and/or modify
7430 + * it under the terms of the GNU General Public License as published by
7431 + * the Free Software Foundation; either version 2 of the License, or
7432 + * (at your option) any later version.
7433 + *
7434 + * This program is distributed in the hope that it will be useful,
7435 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7436 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
7437 + * GNU General Public License for more details.
7438 + *
7439 + * You should have received a copy of the GNU General Public License
7440 + * along with this program; if not, write to the Free Software
7441 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
7442 + */
7443 +
7444 +/*
7445 + * directory operations
7446 + */
7447 +
7448 +#ifndef __AUFS_DIR_H__
7449 +#define __AUFS_DIR_H__
7450 +
7451 +#ifdef __KERNEL__
7452 +
7453 +#include <linux/fs.h>
7454 +#include <linux/aufs_type.h>
7455 +
7456 +/* ---------------------------------------------------------------------- */
7457 +
7458 +/* need to be faster and smaller */
7459 +
7460 +struct au_nhash {
7461 +       unsigned int            nh_num;
7462 +       struct hlist_head       *nh_head;
7463 +};
7464 +
7465 +struct au_vdir_destr {
7466 +       unsigned char   len;
7467 +       unsigned char   name[0];
7468 +} __packed;
7469 +
7470 +struct au_vdir_dehstr {
7471 +       struct hlist_node       hash;
7472 +       struct au_vdir_destr    *str;
7473 +};
7474 +
7475 +struct au_vdir_de {
7476 +       ino_t                   de_ino;
7477 +       unsigned char           de_type;
7478 +       /* caution: packed */
7479 +       struct au_vdir_destr    de_str;
7480 +} __packed;
7481 +
7482 +struct au_vdir_wh {
7483 +       struct hlist_node       wh_hash;
7484 +#ifdef CONFIG_AUFS_SHWH
7485 +       ino_t                   wh_ino;
7486 +       aufs_bindex_t           wh_bindex;
7487 +       unsigned char           wh_type;
7488 +#else
7489 +       aufs_bindex_t           wh_bindex;
7490 +#endif
7491 +       /* caution: packed */
7492 +       struct au_vdir_destr    wh_str;
7493 +} __packed;
7494 +
7495 +union au_vdir_deblk_p {
7496 +       unsigned char           *deblk;
7497 +       struct au_vdir_de       *de;
7498 +};
7499 +
7500 +struct au_vdir {
7501 +       unsigned char   **vd_deblk;
7502 +       unsigned long   vd_nblk;
7503 +       struct {
7504 +               unsigned long           ul;
7505 +               union au_vdir_deblk_p   p;
7506 +       } vd_last;
7507 +
7508 +       unsigned long   vd_version;
7509 +       unsigned int    vd_deblk_sz;
7510 +       unsigned long   vd_jiffy;
7511 +};
7512 +
7513 +/* ---------------------------------------------------------------------- */
7514 +
7515 +/* dir.c */
7516 +extern const struct file_operations aufs_dir_fop;
7517 +void au_add_nlink(struct inode *dir, struct inode *h_dir);
7518 +void au_sub_nlink(struct inode *dir, struct inode *h_dir);
7519 +loff_t au_dir_size(struct file *file, struct dentry *dentry);
7520 +int au_test_empty_lower(struct dentry *dentry);
7521 +int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
7522 +
7523 +/* vdir.c */
7524 +unsigned int au_rdhash_est(loff_t sz);
7525 +int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
7526 +void au_nhash_wh_free(struct au_nhash *whlist);
7527 +int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
7528 +                           int limit);
7529 +int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
7530 +int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
7531 +                      unsigned int d_type, aufs_bindex_t bindex,
7532 +                      unsigned char shwh);
7533 +void au_vdir_free(struct au_vdir *vdir);
7534 +int au_vdir_init(struct file *file);
7535 +int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir);
7536 +
7537 +/* ioctl.c */
7538 +long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
7539 +
7540 +#ifdef CONFIG_AUFS_RDU
7541 +/* rdu.c */
7542 +long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
7543 +#else
7544 +static inline long au_rdu_ioctl(struct file *file, unsigned int cmd,
7545 +                               unsigned long arg)
7546 +{
7547 +       return -EINVAL;
7548 +}
7549 +#endif
7550 +
7551 +#endif /* __KERNEL__ */
7552 +#endif /* __AUFS_DIR_H__ */
7553 diff -uprN -x .git linux-2.6.31/fs/aufs/export.c aufs2-2.6.git/fs/aufs/export.c
7554 --- linux-2.6.31/fs/aufs/export.c       1970-01-01 00:00:00.000000000 +0000
7555 +++ aufs2-2.6.git/fs/aufs/export.c      2009-09-21 21:49:23.399892755 +0000
7556 @@ -0,0 +1,746 @@
7557 +/*
7558 + * Copyright (C) 2005-2009 Junjiro R. Okajima
7559 + *
7560 + * This program, aufs is free software; you can redistribute it and/or modify
7561 + * it under the terms of the GNU General Public License as published by
7562 + * the Free Software Foundation; either version 2 of the License, or
7563 + * (at your option) any later version.
7564 + *
7565 + * This program is distributed in the hope that it will be useful,
7566 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7567 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
7568 + * GNU General Public License for more details.
7569 + *
7570 + * You should have received a copy of the GNU General Public License
7571 + * along with this program; if not, write to the Free Software
7572 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
7573 + */
7574 +
7575 +/*
7576 + * export via nfs
7577 + */
7578 +
7579 +#include <linux/exportfs.h>
7580 +#include <linux/file.h>
7581 +#include <linux/mnt_namespace.h>
7582 +#include <linux/namei.h>
7583 +#include <linux/nsproxy.h>
7584 +#include <linux/random.h>
7585 +#include "aufs.h"
7586 +
7587 +union conv {
7588 +#ifdef CONFIG_AUFS_INO_T_64
7589 +       __u32 a[2];
7590 +#else
7591 +       __u32 a[1];
7592 +#endif
7593 +       ino_t ino;
7594 +};
7595 +
7596 +static ino_t decode_ino(__u32 *a)
7597 +{
7598 +       union conv u;
7599 +
7600 +       BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
7601 +       u.a[0] = a[0];
7602 +#ifdef CONFIG_AUFS_INO_T_64
7603 +       u.a[1] = a[1];
7604 +#endif
7605 +       return u.ino;
7606 +}
7607 +
7608 +static void encode_ino(__u32 *a, ino_t ino)
7609 +{
7610 +       union conv u;
7611 +
7612 +       u.ino = ino;
7613 +       a[0] = u.a[0];
7614 +#ifdef CONFIG_AUFS_INO_T_64
7615 +       a[1] = u.a[1];
7616 +#endif
7617 +}
7618 +
7619 +/* NFS file handle */
7620 +enum {
7621 +       Fh_br_id,
7622 +       Fh_sigen,
7623 +#ifdef CONFIG_AUFS_INO_T_64
7624 +       /* support 64bit inode number */
7625 +       Fh_ino1,
7626 +       Fh_ino2,
7627 +       Fh_dir_ino1,
7628 +       Fh_dir_ino2,
7629 +#else
7630 +       Fh_ino1,
7631 +       Fh_dir_ino1,
7632 +#endif
7633 +       Fh_igen,
7634 +       Fh_h_type,
7635 +       Fh_tail,
7636 +
7637 +       Fh_ino = Fh_ino1,
7638 +       Fh_dir_ino = Fh_dir_ino1
7639 +};
7640 +
7641 +static int au_test_anon(struct dentry *dentry)
7642 +{
7643 +       return !!(dentry->d_flags & DCACHE_DISCONNECTED);
7644 +}
7645 +
7646 +/* ---------------------------------------------------------------------- */
7647 +/* inode generation external table */
7648 +
7649 +int au_xigen_inc(struct inode *inode)
7650 +{
7651 +       int err;
7652 +       loff_t pos;
7653 +       ssize_t sz;
7654 +       __u32 igen;
7655 +       struct super_block *sb;
7656 +       struct au_sbinfo *sbinfo;
7657 +
7658 +       err = 0;
7659 +       sb = inode->i_sb;
7660 +       sbinfo = au_sbi(sb);
7661 +       /*
7662 +        * temporary workaround for escaping from SiMustAnyLock() in
7663 +        * au_mntflags(), since this function is called from au_iinfo_fin().
7664 +        */
7665 +       if (unlikely(!au_opt_test(sbinfo->si_mntflags, XINO)))
7666 +               goto out;
7667 +
7668 +       pos = inode->i_ino;
7669 +       pos *= sizeof(igen);
7670 +       igen = inode->i_generation + 1;
7671 +       sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
7672 +                        sizeof(igen), &pos);
7673 +       if (sz == sizeof(igen))
7674 +               goto out; /* success */
7675 +
7676 +       err = sz;
7677 +       if (unlikely(sz >= 0)) {
7678 +               err = -EIO;
7679 +               AuIOErr("xigen error (%zd)\n", sz);
7680 +       }
7681 +
7682 + out:
7683 +       return err;
7684 +}
7685 +
7686 +int au_xigen_new(struct inode *inode)
7687 +{
7688 +       int err;
7689 +       loff_t pos;
7690 +       ssize_t sz;
7691 +       struct super_block *sb;
7692 +       struct au_sbinfo *sbinfo;
7693 +       struct file *file;
7694 +
7695 +       err = 0;
7696 +       /* todo: dirty, at mount time */
7697 +       if (inode->i_ino == AUFS_ROOT_INO)
7698 +               goto out;
7699 +       sb = inode->i_sb;
7700 +       SiMustAnyLock(sb);
7701 +       if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
7702 +               goto out;
7703 +
7704 +       err = -EFBIG;
7705 +       pos = inode->i_ino;
7706 +       if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
7707 +               AuIOErr1("too large i%lld\n", pos);
7708 +               goto out;
7709 +       }
7710 +       pos *= sizeof(inode->i_generation);
7711 +
7712 +       err = 0;
7713 +       sbinfo = au_sbi(sb);
7714 +       file = sbinfo->si_xigen;
7715 +       BUG_ON(!file);
7716 +
7717 +       if (i_size_read(file->f_dentry->d_inode)
7718 +           < pos + sizeof(inode->i_generation)) {
7719 +               inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
7720 +               sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
7721 +                                sizeof(inode->i_generation), &pos);
7722 +       } else
7723 +               sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
7724 +                               sizeof(inode->i_generation), &pos);
7725 +       if (sz == sizeof(inode->i_generation))
7726 +               goto out; /* success */
7727 +
7728 +       err = sz;
7729 +       if (unlikely(sz >= 0)) {
7730 +               err = -EIO;
7731 +               AuIOErr("xigen error (%zd)\n", sz);
7732 +       }
7733 +
7734 + out:
7735 +       return err;
7736 +}
7737 +
7738 +int au_xigen_set(struct super_block *sb, struct file *base)
7739 +{
7740 +       int err;
7741 +       struct au_sbinfo *sbinfo;
7742 +       struct file *file;
7743 +
7744 +       SiMustWriteLock(sb);
7745 +
7746 +       sbinfo = au_sbi(sb);
7747 +       file = au_xino_create2(base, sbinfo->si_xigen);
7748 +       err = PTR_ERR(file);
7749 +       if (IS_ERR(file))
7750 +               goto out;
7751 +       err = 0;
7752 +       if (sbinfo->si_xigen)
7753 +               fput(sbinfo->si_xigen);
7754 +       sbinfo->si_xigen = file;
7755 +
7756 + out:
7757 +       return err;
7758 +}
7759 +
7760 +void au_xigen_clr(struct super_block *sb)
7761 +{
7762 +       struct au_sbinfo *sbinfo;
7763 +
7764 +       SiMustWriteLock(sb);
7765 +
7766 +       sbinfo = au_sbi(sb);
7767 +       if (sbinfo->si_xigen) {
7768 +               fput(sbinfo->si_xigen);
7769 +               sbinfo->si_xigen = NULL;
7770 +       }
7771 +}
7772 +
7773 +/* ---------------------------------------------------------------------- */
7774 +
7775 +static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
7776 +                                   ino_t dir_ino)
7777 +{
7778 +       struct dentry *dentry, *d;
7779 +       struct inode *inode;
7780 +       unsigned int sigen;
7781 +
7782 +       dentry = NULL;
7783 +       inode = ilookup(sb, ino);
7784 +       if (!inode)
7785 +               goto out;
7786 +
7787 +       dentry = ERR_PTR(-ESTALE);
7788 +       sigen = au_sigen(sb);
7789 +       if (unlikely(is_bad_inode(inode)
7790 +                    || IS_DEADDIR(inode)
7791 +                    || sigen != au_iigen(inode)))
7792 +               goto out_iput;
7793 +
7794 +       dentry = NULL;
7795 +       if (!dir_ino || S_ISDIR(inode->i_mode))
7796 +               dentry = d_find_alias(inode);
7797 +       else {
7798 +               spin_lock(&dcache_lock);
7799 +               list_for_each_entry(d, &inode->i_dentry, d_alias)
7800 +                       if (!au_test_anon(d)
7801 +                           && d->d_parent->d_inode->i_ino == dir_ino) {
7802 +                               dentry = dget_locked(d);
7803 +                               break;
7804 +                       }
7805 +               spin_unlock(&dcache_lock);
7806 +       }
7807 +       if (unlikely(dentry && sigen != au_digen(dentry))) {
7808 +               dput(dentry);
7809 +               dentry = ERR_PTR(-ESTALE);
7810 +       }
7811 +
7812 + out_iput:
7813 +       iput(inode);
7814 + out:
7815 +       return dentry;
7816 +}
7817 +
7818 +/* ---------------------------------------------------------------------- */
7819 +
7820 +/* todo: dirty? */
7821 +/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
7822 +static struct vfsmount *au_mnt_get(struct super_block *sb)
7823 +{
7824 +       struct mnt_namespace *ns;
7825 +       struct vfsmount *pos, *mnt;
7826 +
7827 +       spin_lock(&vfsmount_lock);
7828 +       /* no get/put ?? */
7829 +       AuDebugOn(!current->nsproxy);
7830 +       ns = current->nsproxy->mnt_ns;
7831 +       AuDebugOn(!ns);
7832 +       mnt = NULL;
7833 +       /* the order (reverse) will not be a problem */
7834 +       list_for_each_entry(pos, &ns->list, mnt_list)
7835 +               if (pos->mnt_sb == sb) {
7836 +                       mnt = mntget(pos);
7837 +                       break;
7838 +               }
7839 +       spin_unlock(&vfsmount_lock);
7840 +       AuDebugOn(!mnt);
7841 +
7842 +       return mnt;
7843 +}
7844 +
7845 +struct au_nfsd_si_lock {
7846 +       const unsigned int sigen;
7847 +       const aufs_bindex_t br_id;
7848 +       unsigned char force_lock;
7849 +};
7850 +
7851 +static aufs_bindex_t si_nfsd_read_lock(struct super_block *sb,
7852 +                                      struct au_nfsd_si_lock *nsi_lock)
7853 +{
7854 +       aufs_bindex_t bindex;
7855 +
7856 +       si_read_lock(sb, AuLock_FLUSH);
7857 +
7858 +       /* branch id may be wrapped around */
7859 +       bindex = au_br_index(sb, nsi_lock->br_id);
7860 +       if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
7861 +               goto out; /* success */
7862 +
7863 +       if (!nsi_lock->force_lock)
7864 +               si_read_unlock(sb);
7865 +       bindex = -1;
7866 +
7867 + out:
7868 +       return bindex;
7869 +}
7870 +
7871 +struct find_name_by_ino {
7872 +       int called, found;
7873 +       ino_t ino;
7874 +       char *name;
7875 +       int namelen;
7876 +};
7877 +
7878 +static int
7879 +find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset,
7880 +                u64 ino, unsigned int d_type)
7881 +{
7882 +       struct find_name_by_ino *a = arg;
7883 +
7884 +       a->called++;
7885 +       if (a->ino != ino)
7886 +               return 0;
7887 +
7888 +       memcpy(a->name, name, namelen);
7889 +       a->namelen = namelen;
7890 +       a->found = 1;
7891 +       return 1;
7892 +}
7893 +
7894 +static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
7895 +                                    struct au_nfsd_si_lock *nsi_lock)
7896 +{
7897 +       struct dentry *dentry, *parent;
7898 +       struct file *file;
7899 +       struct inode *dir;
7900 +       struct find_name_by_ino arg;
7901 +       int err;
7902 +
7903 +       parent = path->dentry;
7904 +       if (nsi_lock)
7905 +               si_read_unlock(parent->d_sb);
7906 +       path_get(path);
7907 +       file = vfsub_dentry_open(path, au_dir_roflags, current_cred());
7908 +       dentry = (void *)file;
7909 +       if (IS_ERR(file))
7910 +               goto out;
7911 +
7912 +       dentry = ERR_PTR(-ENOMEM);
7913 +       arg.name = __getname();
7914 +       if (unlikely(!arg.name))
7915 +               goto out_file;
7916 +       arg.ino = ino;
7917 +       arg.found = 0;
7918 +       do {
7919 +               arg.called = 0;
7920 +               /* smp_mb(); */
7921 +               err = vfsub_readdir(file, find_name_by_ino, &arg);
7922 +       } while (!err && !arg.found && arg.called);
7923 +       dentry = ERR_PTR(err);
7924 +       if (unlikely(err))
7925 +               goto out_name;
7926 +       dentry = ERR_PTR(-ENOENT);
7927 +       if (!arg.found)
7928 +               goto out_name;
7929 +
7930 +       /* do not call au_lkup_one() */
7931 +       dir = parent->d_inode;
7932 +       mutex_lock(&dir->i_mutex);
7933 +       dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
7934 +       mutex_unlock(&dir->i_mutex);
7935 +       AuTraceErrPtr(dentry);
7936 +       if (IS_ERR(dentry))
7937 +               goto out_name;
7938 +       AuDebugOn(au_test_anon(dentry));
7939 +       if (unlikely(!dentry->d_inode)) {
7940 +               dput(dentry);
7941 +               dentry = ERR_PTR(-ENOENT);
7942 +       }
7943 +
7944 + out_name:
7945 +       __putname(arg.name);
7946 + out_file:
7947 +       fput(file);
7948 + out:
7949 +       if (unlikely(nsi_lock
7950 +                    && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
7951 +               if (!IS_ERR(dentry)) {
7952 +                       dput(dentry);
7953 +                       dentry = ERR_PTR(-ESTALE);
7954 +               }
7955 +       AuTraceErrPtr(dentry);
7956 +       return dentry;
7957 +}
7958 +
7959 +static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
7960 +                                       ino_t dir_ino,
7961 +                                       struct au_nfsd_si_lock *nsi_lock)
7962 +{
7963 +       struct dentry *dentry;
7964 +       struct path path;
7965 +
7966 +       if (dir_ino != AUFS_ROOT_INO) {
7967 +               path.dentry = decode_by_ino(sb, dir_ino, 0);
7968 +               dentry = path.dentry;
7969 +               if (!path.dentry || IS_ERR(path.dentry))
7970 +                       goto out;
7971 +               AuDebugOn(au_test_anon(path.dentry));
7972 +       } else
7973 +               path.dentry = dget(sb->s_root);
7974 +
7975 +       path.mnt = au_mnt_get(sb);
7976 +       dentry = au_lkup_by_ino(&path, ino, nsi_lock);
7977 +       path_put(&path);
7978 +
7979 + out:
7980 +       AuTraceErrPtr(dentry);
7981 +       return dentry;
7982 +}
7983 +
7984 +/* ---------------------------------------------------------------------- */
7985 +
7986 +static int h_acceptable(void *expv, struct dentry *dentry)
7987 +{
7988 +       return 1;
7989 +}
7990 +
7991 +static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
7992 +                          char *buf, int len, struct super_block *sb)
7993 +{
7994 +       char *p;
7995 +       int n;
7996 +       struct path path;
7997 +
7998 +       p = d_path(h_rootpath, buf, len);
7999 +       if (IS_ERR(p))
8000 +               goto out;
8001 +       n = strlen(p);
8002 +
8003 +       path.mnt = h_rootpath->mnt;
8004 +       path.dentry = h_parent;
8005 +       p = d_path(&path, buf, len);
8006 +       if (IS_ERR(p))
8007 +               goto out;
8008 +       if (n != 1)
8009 +               p += n;
8010 +
8011 +       path.mnt = au_mnt_get(sb);
8012 +       path.dentry = sb->s_root;
8013 +       p = d_path(&path, buf, len - strlen(p));
8014 +       mntput(path.mnt);
8015 +       if (IS_ERR(p))
8016 +               goto out;
8017 +       if (n != 1)
8018 +               p[strlen(p)] = '/';
8019 +
8020 + out:
8021 +       AuTraceErrPtr(p);
8022 +       return p;
8023 +}
8024 +
8025 +static
8026 +struct dentry *decode_by_path(struct super_block *sb, aufs_bindex_t bindex,
8027 +                             ino_t ino, __u32 *fh, int fh_len,
8028 +                             struct au_nfsd_si_lock *nsi_lock)
8029 +{
8030 +       struct dentry *dentry, *h_parent, *root;
8031 +       struct super_block *h_sb;
8032 +       char *pathname, *p;
8033 +       struct vfsmount *h_mnt;
8034 +       struct au_branch *br;
8035 +       int err;
8036 +       struct path path;
8037 +
8038 +       br = au_sbr(sb, bindex);
8039 +       /* au_br_get(br); */
8040 +       h_mnt = br->br_mnt;
8041 +       h_sb = h_mnt->mnt_sb;
8042 +       /* todo: call lower fh_to_dentry()? fh_to_parent()? */
8043 +       h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
8044 +                                     fh_len - Fh_tail, fh[Fh_h_type],
8045 +                                     h_acceptable, /*context*/NULL);
8046 +       dentry = h_parent;
8047 +       if (unlikely(!h_parent || IS_ERR(h_parent))) {
8048 +               AuWarn1("%s decode_fh failed, %ld\n",
8049 +                       au_sbtype(h_sb), PTR_ERR(h_parent));
8050 +               goto out;
8051 +       }
8052 +       dentry = NULL;
8053 +       if (unlikely(au_test_anon(h_parent))) {
8054 +               AuWarn1("%s decode_fh returned a disconnected dentry\n",
8055 +                       au_sbtype(h_sb));
8056 +               goto out_h_parent;
8057 +       }
8058 +
8059 +       dentry = ERR_PTR(-ENOMEM);
8060 +       pathname = (void *)__get_free_page(GFP_NOFS);
8061 +       if (unlikely(!pathname))
8062 +               goto out_h_parent;
8063 +
8064 +       root = sb->s_root;
8065 +       path.mnt = h_mnt;
8066 +       di_read_lock_parent(root, !AuLock_IR);
8067 +       path.dentry = au_h_dptr(root, bindex);
8068 +       di_read_unlock(root, !AuLock_IR);
8069 +       p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
8070 +       dentry = (void *)p;
8071 +       if (IS_ERR(p))
8072 +               goto out_pathname;
8073 +
8074 +       si_read_unlock(sb);
8075 +       err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
8076 +       dentry = ERR_PTR(err);
8077 +       if (unlikely(err))
8078 +               goto out_relock;
8079 +
8080 +       dentry = ERR_PTR(-ENOENT);
8081 +       AuDebugOn(au_test_anon(path.dentry));
8082 +       if (unlikely(!path.dentry->d_inode))
8083 +               goto out_path;
8084 +
8085 +       if (ino != path.dentry->d_inode->i_ino)
8086 +               dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
8087 +       else
8088 +               dentry = dget(path.dentry);
8089 +
8090 + out_path:
8091 +       path_put(&path);
8092 + out_relock:
8093 +       if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
8094 +               if (!IS_ERR(dentry)) {
8095 +                       dput(dentry);
8096 +                       dentry = ERR_PTR(-ESTALE);
8097 +               }
8098 + out_pathname:
8099 +       free_page((unsigned long)pathname);
8100 + out_h_parent:
8101 +       dput(h_parent);
8102 + out:
8103 +       /* au_br_put(br); */
8104 +       AuTraceErrPtr(dentry);
8105 +       return dentry;
8106 +}
8107 +
8108 +/* ---------------------------------------------------------------------- */
8109 +
8110 +static struct dentry *
8111 +aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
8112 +                 int fh_type)
8113 +{
8114 +       struct dentry *dentry;
8115 +       __u32 *fh = fid->raw;
8116 +       ino_t ino, dir_ino;
8117 +       aufs_bindex_t bindex;
8118 +       struct au_nfsd_si_lock nsi_lock = {
8119 +               .sigen          = fh[Fh_sigen],
8120 +               .br_id          = fh[Fh_br_id],
8121 +               .force_lock     = 0
8122 +       };
8123 +
8124 +       AuDebugOn(fh_len < Fh_tail);
8125 +
8126 +       dentry = ERR_PTR(-ESTALE);
8127 +       /* branch id may be wrapped around */
8128 +       bindex = si_nfsd_read_lock(sb, &nsi_lock);
8129 +       if (unlikely(bindex < 0))
8130 +               goto out;
8131 +       nsi_lock.force_lock = 1;
8132 +
8133 +       /* is this inode still cached? */
8134 +       ino = decode_ino(fh + Fh_ino);
8135 +       AuDebugOn(ino == AUFS_ROOT_INO);
8136 +       dir_ino = decode_ino(fh + Fh_dir_ino);
8137 +       dentry = decode_by_ino(sb, ino, dir_ino);
8138 +       if (IS_ERR(dentry))
8139 +               goto out_unlock;
8140 +       if (dentry)
8141 +               goto accept;
8142 +
8143 +       /* is the parent dir cached? */
8144 +       dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
8145 +       if (IS_ERR(dentry))
8146 +               goto out_unlock;
8147 +       if (dentry)
8148 +               goto accept;
8149 +
8150 +       /* lookup path */
8151 +       dentry = decode_by_path(sb, bindex, ino, fh, fh_len, &nsi_lock);
8152 +       if (IS_ERR(dentry))
8153 +               goto out_unlock;
8154 +       if (unlikely(!dentry))
8155 +               /* todo?: make it ESTALE */
8156 +               goto out_unlock;
8157 +
8158 + accept:
8159 +       if (dentry->d_inode->i_generation == fh[Fh_igen])
8160 +               goto out_unlock; /* success */
8161 +
8162 +       dput(dentry);
8163 +       dentry = ERR_PTR(-ESTALE);
8164 + out_unlock:
8165 +       si_read_unlock(sb);
8166 + out:
8167 +       AuTraceErrPtr(dentry);
8168 +       return dentry;
8169 +}
8170 +
8171 +#if 0 /* reserved for future use */
8172 +/* support subtreecheck option */
8173 +static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
8174 +                                       int fh_len, int fh_type)
8175 +{
8176 +       struct dentry *parent;
8177 +       __u32 *fh = fid->raw;
8178 +       ino_t dir_ino;
8179 +
8180 +       dir_ino = decode_ino(fh + Fh_dir_ino);
8181 +       parent = decode_by_ino(sb, dir_ino, 0);
8182 +       if (IS_ERR(parent))
8183 +               goto out;
8184 +       if (!parent)
8185 +               parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
8186 +                                       dir_ino, fh, fh_len);
8187 +
8188 + out:
8189 +       AuTraceErrPtr(parent);
8190 +       return parent;
8191 +}
8192 +#endif
8193 +
8194 +/* ---------------------------------------------------------------------- */
8195 +
8196 +static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len,
8197 +                         int connectable)
8198 +{
8199 +       int err;
8200 +       aufs_bindex_t bindex, bend;
8201 +       struct super_block *sb, *h_sb;
8202 +       struct inode *inode;
8203 +       struct dentry *parent, *h_parent;
8204 +       struct au_branch *br;
8205 +
8206 +       AuDebugOn(au_test_anon(dentry));
8207 +
8208 +       parent = NULL;
8209 +       err = -ENOSPC;
8210 +       if (unlikely(*max_len <= Fh_tail)) {
8211 +               AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
8212 +               goto out;
8213 +       }
8214 +
8215 +       err = FILEID_ROOT;
8216 +       if (IS_ROOT(dentry)) {
8217 +               AuDebugOn(dentry->d_inode->i_ino != AUFS_ROOT_INO);
8218 +               goto out;
8219 +       }
8220 +
8221 +       err = -EIO;
8222 +       h_parent = NULL;
8223 +       sb = dentry->d_sb;
8224 +       aufs_read_lock(dentry, AuLock_FLUSH | AuLock_IR);
8225 +       parent = dget_parent(dentry);
8226 +       di_read_lock_parent(parent, !AuLock_IR);
8227 +       inode = dentry->d_inode;
8228 +       AuDebugOn(!inode);
8229 +#ifdef CONFIG_AUFS_DEBUG
8230 +       if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
8231 +               AuWarn1("NFS-exporting requires xino\n");
8232 +#endif
8233 +
8234 +       bend = au_dbtaildir(parent);
8235 +       for (bindex = au_dbstart(parent); bindex <= bend; bindex++) {
8236 +               h_parent = au_h_dptr(parent, bindex);
8237 +               if (h_parent) {
8238 +                       dget(h_parent);
8239 +                       break;
8240 +               }
8241 +       }
8242 +       if (unlikely(!h_parent))
8243 +               goto out_unlock;
8244 +
8245 +       err = -EPERM;
8246 +       br = au_sbr(sb, bindex);
8247 +       h_sb = br->br_mnt->mnt_sb;
8248 +       if (unlikely(!h_sb->s_export_op)) {
8249 +               AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
8250 +               goto out_dput;
8251 +       }
8252 +
8253 +       fh[Fh_br_id] = br->br_id;
8254 +       fh[Fh_sigen] = au_sigen(sb);
8255 +       encode_ino(fh + Fh_ino, inode->i_ino);
8256 +       encode_ino(fh + Fh_dir_ino, parent->d_inode->i_ino);
8257 +       fh[Fh_igen] = inode->i_generation;
8258 +
8259 +       *max_len -= Fh_tail;
8260 +       fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
8261 +                                          max_len,
8262 +                                          /*connectable or subtreecheck*/0);
8263 +       err = fh[Fh_h_type];
8264 +       *max_len += Fh_tail;
8265 +       /* todo: macros? */
8266 +       if (err != 255)
8267 +               err = 99;
8268 +       else
8269 +               AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
8270 +
8271 + out_dput:
8272 +       dput(h_parent);
8273 + out_unlock:
8274 +       di_read_unlock(parent, !AuLock_IR);
8275 +       dput(parent);
8276 +       aufs_read_unlock(dentry, AuLock_IR);
8277 + out:
8278 +       if (unlikely(err < 0))
8279 +               err = 255;
8280 +       return err;
8281 +}
8282 +
8283 +/* ---------------------------------------------------------------------- */
8284 +
8285 +static struct export_operations aufs_export_op = {
8286 +       .fh_to_dentry   = aufs_fh_to_dentry,
8287 +       /* .fh_to_parent        = aufs_fh_to_parent, */
8288 +       .encode_fh      = aufs_encode_fh
8289 +};
8290 +
8291 +void au_export_init(struct super_block *sb)
8292 +{
8293 +       struct au_sbinfo *sbinfo;
8294 +       __u32 u;
8295 +
8296 +       sb->s_export_op = &aufs_export_op;
8297 +       sbinfo = au_sbi(sb);
8298 +       sbinfo->si_xigen = NULL;
8299 +       get_random_bytes(&u, sizeof(u));
8300 +       BUILD_BUG_ON(sizeof(u) != sizeof(int));
8301 +       atomic_set(&sbinfo->si_xigen_next, u);
8302 +}
8303 diff -uprN -x .git linux-2.6.31/fs/aufs/f_op.c aufs2-2.6.git/fs/aufs/f_op.c
8304 --- linux-2.6.31/fs/aufs/f_op.c 1970-01-01 00:00:00.000000000 +0000
8305 +++ aufs2-2.6.git/fs/aufs/f_op.c        2009-09-21 21:49:23.401607657 +0000
8306 @@ -0,0 +1,826 @@
8307 +/*
8308 + * Copyright (C) 2005-2009 Junjiro R. Okajima
8309 + *
8310 + * This program, aufs is free software; you can redistribute it and/or modify
8311 + * it under the terms of the GNU General Public License as published by
8312 + * the Free Software Foundation; either version 2 of the License, or
8313 + * (at your option) any later version.
8314 + *
8315 + * This program is distributed in the hope that it will be useful,
8316 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
8317 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
8318 + * GNU General Public License for more details.
8319 + *
8320 + * You should have received a copy of the GNU General Public License
8321 + * along with this program; if not, write to the Free Software
8322 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
8323 + */
8324 +
8325 +/*
8326 + * file and vm operations
8327 + */
8328 +
8329 +#include <linux/file.h>
8330 +#include <linux/fs_stack.h>
8331 +#include <linux/ima.h>
8332 +#include <linux/mman.h>
8333 +#include <linux/mm.h>
8334 +#include <linux/security.h>
8335 +#include "aufs.h"
8336 +
8337 +/* common function to regular file and dir */
8338 +int aufs_flush(struct file *file, fl_owner_t id)
8339 +{
8340 +       int err;
8341 +       aufs_bindex_t bindex, bend;
8342 +       struct dentry *dentry;
8343 +       struct file *h_file;
8344 +
8345 +       dentry = file->f_dentry;
8346 +       si_noflush_read_lock(dentry->d_sb);
8347 +       fi_read_lock(file);
8348 +       di_read_lock_child(dentry, AuLock_IW);
8349 +
8350 +       err = 0;
8351 +       bend = au_fbend(file);
8352 +       for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
8353 +               h_file = au_h_fptr(file, bindex);
8354 +               if (!h_file || !h_file->f_op || !h_file->f_op->flush)
8355 +                       continue;
8356 +
8357 +               err = h_file->f_op->flush(h_file, id);
8358 +               if (!err)
8359 +                       vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
8360 +               /*ignore*/
8361 +       }
8362 +       au_cpup_attr_timesizes(dentry->d_inode);
8363 +
8364 +       di_read_unlock(dentry, AuLock_IW);
8365 +       fi_read_unlock(file);
8366 +       si_read_unlock(dentry->d_sb);
8367 +       return err;
8368 +}
8369 +
8370 +/* ---------------------------------------------------------------------- */
8371 +
8372 +static int do_open_nondir(struct file *file, int flags)
8373 +{
8374 +       int err;
8375 +       aufs_bindex_t bindex;
8376 +       struct file *h_file;
8377 +       struct dentry *dentry;
8378 +       struct au_finfo *finfo;
8379 +
8380 +       FiMustWriteLock(file);
8381 +
8382 +       err = 0;
8383 +       dentry = file->f_dentry;
8384 +       finfo = au_fi(file);
8385 +       finfo->fi_h_vm_ops = NULL;
8386 +       finfo->fi_vm_ops = NULL;
8387 +       bindex = au_dbstart(dentry);
8388 +       /* O_TRUNC is processed already */
8389 +       BUG_ON(au_test_ro(dentry->d_sb, bindex, dentry->d_inode)
8390 +              && (flags & O_TRUNC));
8391 +
8392 +       h_file = au_h_open(dentry, bindex, flags, file);
8393 +       if (IS_ERR(h_file))
8394 +               err = PTR_ERR(h_file);
8395 +       else {
8396 +               au_set_fbstart(file, bindex);
8397 +               au_set_fbend(file, bindex);
8398 +               au_set_h_fptr(file, bindex, h_file);
8399 +               au_update_figen(file);
8400 +               /* todo: necessary? */
8401 +               /* file->f_ra = h_file->f_ra; */
8402 +       }
8403 +       return err;
8404 +}
8405 +
8406 +static int aufs_open_nondir(struct inode *inode __maybe_unused,
8407 +                           struct file *file)
8408 +{
8409 +       return au_do_open(file, do_open_nondir);
8410 +}
8411 +
8412 +static int aufs_release_nondir(struct inode *inode __maybe_unused,
8413 +                              struct file *file)
8414 +{
8415 +       struct super_block *sb = file->f_dentry->d_sb;
8416 +
8417 +       si_noflush_read_lock(sb);
8418 +       kfree(au_fi(file)->fi_vm_ops);
8419 +       au_finfo_fin(file);
8420 +       si_read_unlock(sb);
8421 +       return 0;
8422 +}
8423 +
8424 +/* ---------------------------------------------------------------------- */
8425 +
8426 +static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
8427 +                        loff_t *ppos)
8428 +{
8429 +       ssize_t err;
8430 +       struct dentry *dentry;
8431 +       struct file *h_file;
8432 +       struct super_block *sb;
8433 +
8434 +       dentry = file->f_dentry;
8435 +       sb = dentry->d_sb;
8436 +       si_read_lock(sb, AuLock_FLUSH);
8437 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
8438 +       if (unlikely(err))
8439 +               goto out;
8440 +
8441 +       h_file = au_h_fptr(file, au_fbstart(file));
8442 +       err = vfsub_read_u(h_file, buf, count, ppos);
8443 +       /* todo: necessary? */
8444 +       /* file->f_ra = h_file->f_ra; */
8445 +       fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
8446 +
8447 +       di_read_unlock(dentry, AuLock_IR);
8448 +       fi_read_unlock(file);
8449 + out:
8450 +       si_read_unlock(sb);
8451 +       return err;
8452 +}
8453 +
8454 +static ssize_t aufs_write(struct file *file, const char __user *ubuf,
8455 +                         size_t count, loff_t *ppos)
8456 +{
8457 +       ssize_t err;
8458 +       aufs_bindex_t bstart;
8459 +       struct au_pin pin;
8460 +       struct dentry *dentry;
8461 +       struct inode *inode;
8462 +       struct super_block *sb;
8463 +       struct file *h_file;
8464 +       char __user *buf = (char __user *)ubuf;
8465 +
8466 +       dentry = file->f_dentry;
8467 +       sb = dentry->d_sb;
8468 +       inode = dentry->d_inode;
8469 +       mutex_lock(&inode->i_mutex);
8470 +       si_read_lock(sb, AuLock_FLUSH);
8471 +
8472 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8473 +       if (unlikely(err))
8474 +               goto out;
8475 +
8476 +       err = au_ready_to_write(file, -1, &pin);
8477 +       di_downgrade_lock(dentry, AuLock_IR);
8478 +       if (unlikely(err))
8479 +               goto out_unlock;
8480 +
8481 +       bstart = au_fbstart(file);
8482 +       h_file = au_h_fptr(file, bstart);
8483 +       au_unpin(&pin);
8484 +       err = vfsub_write_u(h_file, buf, count, ppos);
8485 +       au_cpup_attr_timesizes(inode);
8486 +       inode->i_mode = h_file->f_dentry->d_inode->i_mode;
8487 +
8488 + out_unlock:
8489 +       di_read_unlock(dentry, AuLock_IR);
8490 +       fi_write_unlock(file);
8491 + out:
8492 +       si_read_unlock(sb);
8493 +       mutex_unlock(&inode->i_mutex);
8494 +       return err;
8495 +}
8496 +
8497 +static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
8498 +                            unsigned long nv, loff_t pos)
8499 +{
8500 +       ssize_t err;
8501 +       struct file *file, *h_file;
8502 +       struct dentry *dentry;
8503 +       struct super_block *sb;
8504 +
8505 +       file = kio->ki_filp;
8506 +       dentry = file->f_dentry;
8507 +       sb = dentry->d_sb;
8508 +       si_read_lock(sb, AuLock_FLUSH);
8509 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
8510 +       if (unlikely(err))
8511 +               goto out;
8512 +
8513 +       err = -ENOSYS;
8514 +       h_file = au_h_fptr(file, au_fbstart(file));
8515 +       if (h_file->f_op && h_file->f_op->aio_read) {
8516 +               err = security_file_permission(h_file, MAY_READ);
8517 +               if (unlikely(err))
8518 +                       goto out_unlock;
8519 +               if (!is_sync_kiocb(kio)) {
8520 +                       get_file(h_file);
8521 +                       fput(file);
8522 +               }
8523 +               kio->ki_filp = h_file;
8524 +               err = h_file->f_op->aio_read(kio, iov, nv, pos);
8525 +               /* todo: necessary? */
8526 +               /* file->f_ra = h_file->f_ra; */
8527 +               fsstack_copy_attr_atime(dentry->d_inode,
8528 +                                       h_file->f_dentry->d_inode);
8529 +       } else
8530 +               /* currently there is no such fs */
8531 +               WARN_ON_ONCE(h_file->f_op && h_file->f_op->read);
8532 +
8533 + out_unlock:
8534 +       di_read_unlock(dentry, AuLock_IR);
8535 +       fi_read_unlock(file);
8536 + out:
8537 +       si_read_unlock(sb);
8538 +       return err;
8539 +}
8540 +
8541 +static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
8542 +                             unsigned long nv, loff_t pos)
8543 +{
8544 +       ssize_t err;
8545 +       aufs_bindex_t bstart;
8546 +       struct au_pin pin;
8547 +       struct dentry *dentry;
8548 +       struct inode *inode;
8549 +       struct super_block *sb;
8550 +       struct file *file, *h_file;
8551 +
8552 +       file = kio->ki_filp;
8553 +       dentry = file->f_dentry;
8554 +       sb = dentry->d_sb;
8555 +       inode = dentry->d_inode;
8556 +       mutex_lock(&inode->i_mutex);
8557 +       si_read_lock(sb, AuLock_FLUSH);
8558 +
8559 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8560 +       if (unlikely(err))
8561 +               goto out;
8562 +
8563 +       err = au_ready_to_write(file, -1, &pin);
8564 +       di_downgrade_lock(dentry, AuLock_IR);
8565 +       if (unlikely(err))
8566 +               goto out_unlock;
8567 +
8568 +       err = -ENOSYS;
8569 +       bstart = au_fbstart(file);
8570 +       h_file = au_h_fptr(file, bstart);
8571 +       au_unpin(&pin);
8572 +       if (h_file->f_op && h_file->f_op->aio_write) {
8573 +               err = security_file_permission(h_file, MAY_WRITE);
8574 +               if (unlikely(err))
8575 +                       goto out_unlock;
8576 +               if (!is_sync_kiocb(kio)) {
8577 +                       get_file(h_file);
8578 +                       fput(file);
8579 +               }
8580 +               kio->ki_filp = h_file;
8581 +               err = h_file->f_op->aio_write(kio, iov, nv, pos);
8582 +               au_cpup_attr_timesizes(inode);
8583 +               inode->i_mode = h_file->f_dentry->d_inode->i_mode;
8584 +       } else
8585 +               /* currently there is no such fs */
8586 +               WARN_ON_ONCE(h_file->f_op && h_file->f_op->write);
8587 +
8588 + out_unlock:
8589 +       di_read_unlock(dentry, AuLock_IR);
8590 +       fi_write_unlock(file);
8591 + out:
8592 +       si_read_unlock(sb);
8593 +       mutex_unlock(&inode->i_mutex);
8594 +       return err;
8595 +}
8596 +
8597 +static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
8598 +                               struct pipe_inode_info *pipe, size_t len,
8599 +                               unsigned int flags)
8600 +{
8601 +       ssize_t err;
8602 +       struct file *h_file;
8603 +       struct dentry *dentry;
8604 +       struct super_block *sb;
8605 +
8606 +       dentry = file->f_dentry;
8607 +       sb = dentry->d_sb;
8608 +       si_read_lock(sb, AuLock_FLUSH);
8609 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
8610 +       if (unlikely(err))
8611 +               goto out;
8612 +
8613 +       err = -EINVAL;
8614 +       h_file = au_h_fptr(file, au_fbstart(file));
8615 +       if (au_test_loopback_kthread()) {
8616 +               file->f_mapping = h_file->f_mapping;
8617 +               smp_mb(); /* unnecessary? */
8618 +       }
8619 +       err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
8620 +       /* todo: necessasry? */
8621 +       /* file->f_ra = h_file->f_ra; */
8622 +       fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
8623 +
8624 +       di_read_unlock(dentry, AuLock_IR);
8625 +       fi_read_unlock(file);
8626 +
8627 + out:
8628 +       si_read_unlock(sb);
8629 +       return err;
8630 +}
8631 +
8632 +static ssize_t
8633 +aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
8634 +                 size_t len, unsigned int flags)
8635 +{
8636 +       ssize_t err;
8637 +       struct au_pin pin;
8638 +       struct dentry *dentry;
8639 +       struct inode *inode;
8640 +       struct super_block *sb;
8641 +       struct file *h_file;
8642 +
8643 +       dentry = file->f_dentry;
8644 +       inode = dentry->d_inode;
8645 +       mutex_lock(&inode->i_mutex);
8646 +       sb = dentry->d_sb;
8647 +       si_read_lock(sb, AuLock_FLUSH);
8648 +
8649 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8650 +       if (unlikely(err))
8651 +               goto out;
8652 +
8653 +       err = au_ready_to_write(file, -1, &pin);
8654 +       di_downgrade_lock(dentry, AuLock_IR);
8655 +       if (unlikely(err))
8656 +               goto out_unlock;
8657 +
8658 +       h_file = au_h_fptr(file, au_fbstart(file));
8659 +       au_unpin(&pin);
8660 +       err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
8661 +       au_cpup_attr_timesizes(inode);
8662 +       inode->i_mode = h_file->f_dentry->d_inode->i_mode;
8663 +
8664 + out_unlock:
8665 +       di_read_unlock(dentry, AuLock_IR);
8666 +       fi_write_unlock(file);
8667 + out:
8668 +       si_read_unlock(sb);
8669 +       mutex_unlock(&inode->i_mutex);
8670 +       return err;
8671 +}
8672 +
8673 +/* ---------------------------------------------------------------------- */
8674 +
8675 +static struct file *au_safe_file(struct vm_area_struct *vma)
8676 +{
8677 +       struct file *file;
8678 +
8679 +       file = vma->vm_file;
8680 +       if (file->private_data && au_test_aufs(file->f_dentry->d_sb))
8681 +               return file;
8682 +       return NULL;
8683 +}
8684 +
8685 +static void au_reset_file(struct vm_area_struct *vma, struct file *file)
8686 +{
8687 +       vma->vm_file = file;
8688 +       /* smp_mb(); */ /* flush vm_file */
8689 +}
8690 +
8691 +static int aufs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
8692 +{
8693 +       int err;
8694 +       static DECLARE_WAIT_QUEUE_HEAD(wq);
8695 +       struct file *file, *h_file;
8696 +       struct au_finfo *finfo;
8697 +
8698 +       /* todo: non-robr mode, user vm_file as it is? */
8699 +       wait_event(wq, (file = au_safe_file(vma)));
8700 +
8701 +       /* do not revalidate, no si lock */
8702 +       finfo = au_fi(file);
8703 +       h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
8704 +       AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
8705 +
8706 +       mutex_lock(&finfo->fi_vm_mtx);
8707 +       vma->vm_file = h_file;
8708 +       err = finfo->fi_h_vm_ops->fault(vma, vmf);
8709 +       /* todo: necessary? */
8710 +       /* file->f_ra = h_file->f_ra; */
8711 +       au_reset_file(vma, file);
8712 +       mutex_unlock(&finfo->fi_vm_mtx);
8713 +#if 0 /* def CONFIG_SMP */
8714 +       /* wake_up_nr(&wq, online_cpu - 1); */
8715 +       wake_up_all(&wq);
8716 +#else
8717 +       wake_up(&wq);
8718 +#endif
8719 +
8720 +       return err;
8721 +}
8722 +
8723 +static int aufs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
8724 +{
8725 +       int err;
8726 +       static DECLARE_WAIT_QUEUE_HEAD(wq);
8727 +       struct file *file, *h_file;
8728 +       struct au_finfo *finfo;
8729 +
8730 +       wait_event(wq, (file = au_safe_file(vma)));
8731 +
8732 +       finfo = au_fi(file);
8733 +       h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
8734 +       AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
8735 +
8736 +       mutex_lock(&finfo->fi_vm_mtx);
8737 +       vma->vm_file = h_file;
8738 +       err = finfo->fi_h_vm_ops->page_mkwrite(vma, vmf);
8739 +       au_reset_file(vma, file);
8740 +       mutex_unlock(&finfo->fi_vm_mtx);
8741 +       wake_up(&wq);
8742 +
8743 +       return err;
8744 +}
8745 +
8746 +static void aufs_vm_close(struct vm_area_struct *vma)
8747 +{
8748 +       static DECLARE_WAIT_QUEUE_HEAD(wq);
8749 +       struct file *file, *h_file;
8750 +       struct au_finfo *finfo;
8751 +
8752 +       wait_event(wq, (file = au_safe_file(vma)));
8753 +
8754 +       finfo = au_fi(file);
8755 +       h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file;
8756 +       AuDebugOn(!h_file || !finfo->fi_h_vm_ops);
8757 +
8758 +       mutex_lock(&finfo->fi_vm_mtx);
8759 +       vma->vm_file = h_file;
8760 +       finfo->fi_h_vm_ops->close(vma);
8761 +       au_reset_file(vma, file);
8762 +       mutex_unlock(&finfo->fi_vm_mtx);
8763 +       wake_up(&wq);
8764 +}
8765 +
8766 +static struct vm_operations_struct aufs_vm_ops = {
8767 +       /* .close and .page_mkwrite are not set by default */
8768 +       .fault          = aufs_fault,
8769 +};
8770 +
8771 +/* ---------------------------------------------------------------------- */
8772 +
8773 +static unsigned long au_prot_conv(unsigned long flags)
8774 +{
8775 +       unsigned long prot;
8776 +
8777 +       prot = 0;
8778 +       if (flags & VM_READ)
8779 +               prot |= PROT_READ;
8780 +       if (flags & VM_WRITE)
8781 +               prot |= PROT_WRITE;
8782 +       if (flags & VM_EXEC)
8783 +               prot |= PROT_EXEC;
8784 +       return prot;
8785 +}
8786 +
8787 +static struct vm_operations_struct *au_vm_ops(struct file *h_file,
8788 +                                             struct vm_area_struct *vma)
8789 +{
8790 +       struct vm_operations_struct *vm_ops;
8791 +       int err;
8792 +
8793 +       vm_ops = ERR_PTR(-ENODEV);
8794 +       if (!h_file->f_op || !h_file->f_op->mmap)
8795 +               goto out;
8796 +
8797 +       err = ima_file_mmap(h_file, au_prot_conv(vma->vm_flags));
8798 +       vm_ops = ERR_PTR(err);
8799 +       if (err)
8800 +               goto out;
8801 +
8802 +       err = h_file->f_op->mmap(h_file, vma);
8803 +       vm_ops = ERR_PTR(err);
8804 +       if (unlikely(err))
8805 +               goto out;
8806 +
8807 +       vm_ops = vma->vm_ops;
8808 +       err = do_munmap(current->mm, vma->vm_start,
8809 +                       vma->vm_end - vma->vm_start);
8810 +       if (unlikely(err)) {
8811 +               AuIOErr("failed internal unmapping %.*s, %d\n",
8812 +                       AuDLNPair(h_file->f_dentry), err);
8813 +               vm_ops = ERR_PTR(-EIO);
8814 +       }
8815 +
8816 + out:
8817 +       return vm_ops;
8818 +}
8819 +
8820 +static int au_custom_vm_ops(struct au_finfo *finfo, struct vm_area_struct *vma)
8821 +{
8822 +       int err;
8823 +       struct vm_operations_struct *h_ops;
8824 +
8825 +       AuRwMustAnyLock(&finfo->fi_rwsem);
8826 +
8827 +       err = 0;
8828 +       h_ops = finfo->fi_h_vm_ops;
8829 +       AuDebugOn(!h_ops);
8830 +       if ((!h_ops->page_mkwrite && !h_ops->close)
8831 +           || finfo->fi_vm_ops)
8832 +               goto out;
8833 +
8834 +       err = -ENOMEM;
8835 +       finfo->fi_vm_ops = kmemdup(&aufs_vm_ops, sizeof(aufs_vm_ops), GFP_NOFS);
8836 +       if (unlikely(!finfo->fi_vm_ops))
8837 +               goto out;
8838 +
8839 +       err = 0;
8840 +       if (h_ops->page_mkwrite)
8841 +               finfo->fi_vm_ops->page_mkwrite = aufs_page_mkwrite;
8842 +       if (h_ops->close)
8843 +               finfo->fi_vm_ops->close = aufs_vm_close;
8844 +
8845 +       vma->vm_ops = finfo->fi_vm_ops;
8846 +
8847 + out:
8848 +       return err;
8849 +}
8850 +
8851 +static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
8852 +{
8853 +       int err;
8854 +       unsigned char wlock, mmapped;
8855 +       struct dentry *dentry;
8856 +       struct super_block *sb;
8857 +       struct file *h_file;
8858 +       struct vm_operations_struct *vm_ops;
8859 +
8860 +       dentry = file->f_dentry;
8861 +       wlock = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
8862 +       sb = dentry->d_sb;
8863 +       si_read_lock(sb, AuLock_FLUSH);
8864 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8865 +       if (unlikely(err))
8866 +               goto out;
8867 +
8868 +       mmapped = !!au_test_mmapped(file);
8869 +       if (wlock) {
8870 +               struct au_pin pin;
8871 +
8872 +               err = au_ready_to_write(file, -1, &pin);
8873 +               di_downgrade_lock(dentry, AuLock_IR);
8874 +               if (unlikely(err))
8875 +                       goto out_unlock;
8876 +               au_unpin(&pin);
8877 +       } else
8878 +               di_downgrade_lock(dentry, AuLock_IR);
8879 +
8880 +       h_file = au_h_fptr(file, au_fbstart(file));
8881 +       if (!mmapped && au_test_fs_bad_mapping(h_file->f_dentry->d_sb)) {
8882 +               /*
8883 +                * by this assignment, f_mapping will differs from aufs inode
8884 +                * i_mapping.
8885 +                * if someone else mixes the use of f_dentry->d_inode and
8886 +                * f_mapping->host, then a problem may arise.
8887 +                */
8888 +               file->f_mapping = h_file->f_mapping;
8889 +       }
8890 +
8891 +       vm_ops = NULL;
8892 +       if (!mmapped) {
8893 +               vm_ops = au_vm_ops(h_file, vma);
8894 +               err = PTR_ERR(vm_ops);
8895 +               if (IS_ERR(vm_ops))
8896 +                       goto out_unlock;
8897 +       }
8898 +
8899 +       /*
8900 +        * unnecessary to handle MAP_DENYWRITE and deny_write_access()?
8901 +        * currently MAP_DENYWRITE from userspace is ignored, but elf loader
8902 +        * sets it. when FMODE_EXEC is set (by open_exec() or sys_uselib()),
8903 +        * both of the aufs file and the lower file is deny_write_access()-ed.
8904 +        * finally I hope we can skip handlling MAP_DENYWRITE here.
8905 +        */
8906 +       err = generic_file_mmap(file, vma);
8907 +       if (unlikely(err))
8908 +               goto out_unlock;
8909 +
8910 +       vma->vm_ops = &aufs_vm_ops;
8911 +       if (!mmapped) {
8912 +               struct au_finfo *finfo = au_fi(file);
8913 +
8914 +               finfo->fi_h_vm_ops = vm_ops;
8915 +               mutex_init(&finfo->fi_vm_mtx);
8916 +       }
8917 +
8918 +       err = au_custom_vm_ops(au_fi(file), vma);
8919 +       if (unlikely(err))
8920 +               goto out_unlock;
8921 +
8922 +       vfsub_file_accessed(h_file);
8923 +       fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
8924 +
8925 + out_unlock:
8926 +       di_read_unlock(dentry, AuLock_IR);
8927 +       fi_write_unlock(file);
8928 + out:
8929 +       si_read_unlock(sb);
8930 +       return err;
8931 +}
8932 +
8933 +/* ---------------------------------------------------------------------- */
8934 +
8935 +static int aufs_fsync_nondir(struct file *file, struct dentry *dentry,
8936 +                            int datasync)
8937 +{
8938 +       int err;
8939 +       struct au_pin pin;
8940 +       struct inode *inode;
8941 +       struct file *h_file;
8942 +       struct super_block *sb;
8943 +
8944 +       inode = dentry->d_inode;
8945 +       IMustLock(file->f_mapping->host);
8946 +       if (inode != file->f_mapping->host) {
8947 +               mutex_unlock(&file->f_mapping->host->i_mutex);
8948 +               mutex_lock(&inode->i_mutex);
8949 +       }
8950 +       IMustLock(inode);
8951 +
8952 +       sb = dentry->d_sb;
8953 +       si_read_lock(sb, AuLock_FLUSH);
8954 +
8955 +       err = 0; /* -EBADF; */ /* posix? */
8956 +       if (unlikely(!(file->f_mode & FMODE_WRITE)))
8957 +               goto out;
8958 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
8959 +       if (unlikely(err))
8960 +               goto out;
8961 +
8962 +       err = au_ready_to_write(file, -1, &pin);
8963 +       di_downgrade_lock(dentry, AuLock_IR);
8964 +       if (unlikely(err))
8965 +               goto out_unlock;
8966 +       au_unpin(&pin);
8967 +
8968 +       err = -EINVAL;
8969 +       h_file = au_h_fptr(file, au_fbstart(file));
8970 +       if (h_file->f_op && h_file->f_op->fsync) {
8971 +               struct dentry *h_d;
8972 +               struct mutex *h_mtx;
8973 +
8974 +               /*
8975 +                * no filemap_fdatawrite() since aufs file has no its own
8976 +                * mapping, but dir.
8977 +                */
8978 +               h_d = h_file->f_dentry;
8979 +               h_mtx = &h_d->d_inode->i_mutex;
8980 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
8981 +               err = h_file->f_op->fsync(h_file, h_d, datasync);
8982 +               if (!err)
8983 +                       vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
8984 +               /*ignore*/
8985 +               au_cpup_attr_timesizes(inode);
8986 +               mutex_unlock(h_mtx);
8987 +       }
8988 +
8989 + out_unlock:
8990 +       di_read_unlock(dentry, AuLock_IR);
8991 +       fi_write_unlock(file);
8992 + out:
8993 +       si_read_unlock(sb);
8994 +       if (inode != file->f_mapping->host) {
8995 +               mutex_unlock(&inode->i_mutex);
8996 +               mutex_lock(&file->f_mapping->host->i_mutex);
8997 +       }
8998 +       return err;
8999 +}
9000 +
9001 +/* no one supports this operation, currently */
9002 +#if 0
9003 +static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
9004 +{
9005 +       int err;
9006 +       struct au_pin pin;
9007 +       struct dentry *dentry;
9008 +       struct inode *inode;
9009 +       struct file *file, *h_file;
9010 +       struct super_block *sb;
9011 +
9012 +       file = kio->ki_filp;
9013 +       dentry = file->f_dentry;
9014 +       inode = dentry->d_inode;
9015 +       mutex_lock(&inode->i_mutex);
9016 +
9017 +       sb = dentry->d_sb;
9018 +       si_read_lock(sb, AuLock_FLUSH);
9019 +
9020 +       err = 0; /* -EBADF; */ /* posix? */
9021 +       if (unlikely(!(file->f_mode & FMODE_WRITE)))
9022 +               goto out;
9023 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
9024 +       if (unlikely(err))
9025 +               goto out;
9026 +
9027 +       err = au_ready_to_write(file, -1, &pin);
9028 +       di_downgrade_lock(dentry, AuLock_IR);
9029 +       if (unlikely(err))
9030 +               goto out_unlock;
9031 +       au_unpin(&pin);
9032 +
9033 +       err = -ENOSYS;
9034 +       h_file = au_h_fptr(file, au_fbstart(file));
9035 +       if (h_file->f_op && h_file->f_op->aio_fsync) {
9036 +               struct dentry *h_d;
9037 +               struct mutex *h_mtx;
9038 +
9039 +               h_d = h_file->f_dentry;
9040 +               h_mtx = &h_d->d_inode->i_mutex;
9041 +               if (!is_sync_kiocb(kio)) {
9042 +                       get_file(h_file);
9043 +                       fput(file);
9044 +               }
9045 +               kio->ki_filp = h_file;
9046 +               err = h_file->f_op->aio_fsync(kio, datasync);
9047 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
9048 +               if (!err)
9049 +                       vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
9050 +               /*ignore*/
9051 +               au_cpup_attr_timesizes(inode);
9052 +               mutex_unlock(h_mtx);
9053 +       }
9054 +
9055 + out_unlock:
9056 +       di_read_unlock(dentry, AuLock_IR);
9057 +       fi_write_unlock(file);
9058 + out:
9059 +       si_read_unlock(sb);
9060 +       mutex_unlock(&inode->i_mutex);
9061 +       return err;
9062 +}
9063 +#endif
9064 +
9065 +static int aufs_fasync(int fd, struct file *file, int flag)
9066 +{
9067 +       int err;
9068 +       struct file *h_file;
9069 +       struct dentry *dentry;
9070 +       struct super_block *sb;
9071 +
9072 +       dentry = file->f_dentry;
9073 +       sb = dentry->d_sb;
9074 +       si_read_lock(sb, AuLock_FLUSH);
9075 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
9076 +       if (unlikely(err))
9077 +               goto out;
9078 +
9079 +       h_file = au_h_fptr(file, au_fbstart(file));
9080 +       if (h_file->f_op && h_file->f_op->fasync)
9081 +               err = h_file->f_op->fasync(fd, h_file, flag);
9082 +
9083 +       di_read_unlock(dentry, AuLock_IR);
9084 +       fi_read_unlock(file);
9085 +
9086 + out:
9087 +       si_read_unlock(sb);
9088 +       return err;
9089 +}
9090 +
9091 +/* ---------------------------------------------------------------------- */
9092 +
9093 +/* no one supports this operation, currently */
9094 +#if 0
9095 +static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
9096 +                            size_t len, loff_t *pos , int more)
9097 +{
9098 +}
9099 +#endif
9100 +
9101 +/* ---------------------------------------------------------------------- */
9102 +
9103 +const struct file_operations aufs_file_fop = {
9104 +       /*
9105 +        * while generic_file_llseek/_unlocked() don't use BKL,
9106 +        * don't use it since it operates file->f_mapping->host.
9107 +        * in aufs, it may be a real file and may confuse users by UDBA.
9108 +        */
9109 +       /* .llseek              = generic_file_llseek, */
9110 +
9111 +       .read           = aufs_read,
9112 +       .write          = aufs_write,
9113 +       .aio_read       = aufs_aio_read,
9114 +       .aio_write      = aufs_aio_write,
9115 +#ifdef CONFIG_AUFS_POLL
9116 +       .poll           = aufs_poll,
9117 +#endif
9118 +       .mmap           = aufs_mmap,
9119 +       .open           = aufs_open_nondir,
9120 +       .flush          = aufs_flush,
9121 +       .release        = aufs_release_nondir,
9122 +       .fsync          = aufs_fsync_nondir,
9123 +       /* .aio_fsync   = aufs_aio_fsync_nondir, */
9124 +       .fasync         = aufs_fasync,
9125 +       /* .sendpage    = aufs_sendpage, */
9126 +       .splice_write   = aufs_splice_write,
9127 +       .splice_read    = aufs_splice_read,
9128 +#if 0
9129 +       .aio_splice_write = aufs_aio_splice_write,
9130 +       .aio_splice_read  = aufs_aio_splice_read
9131 +#endif
9132 +};
9133 diff -uprN -x .git linux-2.6.31/fs/aufs/file.c aufs2-2.6.git/fs/aufs/file.c
9134 --- linux-2.6.31/fs/aufs/file.c 1970-01-01 00:00:00.000000000 +0000
9135 +++ aufs2-2.6.git/fs/aufs/file.c        2009-09-21 21:49:23.401607657 +0000
9136 @@ -0,0 +1,568 @@
9137 +/*
9138 + * Copyright (C) 2005-2009 Junjiro R. Okajima
9139 + *
9140 + * This program, aufs is free software; you can redistribute it and/or modify
9141 + * it under the terms of the GNU General Public License as published by
9142 + * the Free Software Foundation; either version 2 of the License, or
9143 + * (at your option) any later version.
9144 + *
9145 + * This program is distributed in the hope that it will be useful,
9146 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
9147 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9148 + * GNU General Public License for more details.
9149 + *
9150 + * You should have received a copy of the GNU General Public License
9151 + * along with this program; if not, write to the Free Software
9152 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
9153 + */
9154 +
9155 +/*
9156 + * handling file/dir, and address_space operation
9157 + */
9158 +
9159 +#include <linux/file.h>
9160 +#include <linux/fsnotify.h>
9161 +#include <linux/namei.h>
9162 +#include <linux/pagemap.h>
9163 +#include "aufs.h"
9164 +
9165 +/* drop flags for writing */
9166 +unsigned int au_file_roflags(unsigned int flags)
9167 +{
9168 +       flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
9169 +       flags |= O_RDONLY | O_NOATIME;
9170 +       return flags;
9171 +}
9172 +
9173 +/* common functions to regular file and dir */
9174 +struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
9175 +                      struct file *file)
9176 +{
9177 +       struct file *h_file;
9178 +       struct dentry *h_dentry;
9179 +       struct inode *h_inode;
9180 +       struct super_block *sb;
9181 +       struct au_branch *br;
9182 +       int err, exec_flag;
9183 +       struct path h_path;
9184 +
9185 +       /* a race condition can happen between open and unlink/rmdir */
9186 +       h_file = ERR_PTR(-ENOENT);
9187 +       h_dentry = au_h_dptr(dentry, bindex);
9188 +       if (au_test_nfsd(current) && !h_dentry)
9189 +               goto out;
9190 +       h_inode = h_dentry->d_inode;
9191 +       if (au_test_nfsd(current) && !h_inode)
9192 +               goto out;
9193 +       if (unlikely((!d_unhashed(dentry) && d_unhashed(h_dentry))
9194 +                    || !h_inode))
9195 +               goto out;
9196 +
9197 +       sb = dentry->d_sb;
9198 +       br = au_sbr(sb, bindex);
9199 +       h_file = ERR_PTR(-EACCES);
9200 +       exec_flag = flags & vfsub_fmode_to_uint(FMODE_EXEC);
9201 +       if (exec_flag && (br->br_mnt->mnt_flags & MNT_NOEXEC))
9202 +                       goto out;
9203 +
9204 +       /* drop flags for writing */
9205 +       if (au_test_ro(sb, bindex, dentry->d_inode))
9206 +               flags = au_file_roflags(flags);
9207 +       flags &= ~O_CREAT;
9208 +       atomic_inc(&br->br_count);
9209 +       h_path.dentry = h_dentry;
9210 +       h_path.mnt = br->br_mnt;
9211 +       path_get(&h_path);
9212 +       h_file = vfsub_dentry_open(&h_path, flags, current_cred());
9213 +       if (IS_ERR(h_file))
9214 +               goto out_br;
9215 +
9216 +       if (exec_flag) {
9217 +               err = deny_write_access(h_file);
9218 +               if (unlikely(err)) {
9219 +                       fput(h_file);
9220 +                       h_file = ERR_PTR(err);
9221 +                       goto out_br;
9222 +               }
9223 +       }
9224 +       fsnotify_open(h_dentry);
9225 +       goto out; /* success */
9226 +
9227 + out_br:
9228 +       atomic_dec(&br->br_count);
9229 + out:
9230 +       return h_file;
9231 +}
9232 +
9233 +int au_do_open(struct file *file, int (*open)(struct file *file, int flags))
9234 +{
9235 +       int err;
9236 +       unsigned int flags;
9237 +       struct dentry *dentry;
9238 +       struct super_block *sb;
9239 +
9240 +       dentry = file->f_dentry;
9241 +       sb = dentry->d_sb;
9242 +       si_read_lock(sb, AuLock_FLUSH);
9243 +       err = au_finfo_init(file);
9244 +       if (unlikely(err))
9245 +               goto out;
9246 +
9247 +       di_read_lock_child(dentry, AuLock_IR);
9248 +       spin_lock(&file->f_lock);
9249 +       flags = file->f_flags;
9250 +       spin_unlock(&file->f_lock);
9251 +       err = open(file, flags);
9252 +       di_read_unlock(dentry, AuLock_IR);
9253 +
9254 +       fi_write_unlock(file);
9255 +       if (unlikely(err))
9256 +               au_finfo_fin(file);
9257 + out:
9258 +       si_read_unlock(sb);
9259 +       return err;
9260 +}
9261 +
9262 +int au_reopen_nondir(struct file *file)
9263 +{
9264 +       int err;
9265 +       unsigned int flags;
9266 +       aufs_bindex_t bstart, bindex, bend;
9267 +       struct dentry *dentry;
9268 +       struct file *h_file, *h_file_tmp;
9269 +
9270 +       dentry = file->f_dentry;
9271 +       bstart = au_dbstart(dentry);
9272 +       h_file_tmp = NULL;
9273 +       if (au_fbstart(file) == bstart) {
9274 +               h_file = au_h_fptr(file, bstart);
9275 +               if (file->f_mode == h_file->f_mode)
9276 +                       return 0; /* success */
9277 +               h_file_tmp = h_file;
9278 +               get_file(h_file_tmp);
9279 +               au_set_h_fptr(file, bstart, NULL);
9280 +       }
9281 +       AuDebugOn(au_fbstart(file) < bstart
9282 +                 || au_fi(file)->fi_hfile[0 + bstart].hf_file);
9283 +
9284 +       spin_lock(&file->f_lock);
9285 +       flags = file->f_flags & ~O_TRUNC;
9286 +       spin_unlock(&file->f_lock);
9287 +       h_file = au_h_open(dentry, bstart, flags, file);
9288 +       err = PTR_ERR(h_file);
9289 +       if (IS_ERR(h_file))
9290 +               goto out; /* todo: close all? */
9291 +
9292 +       err = 0;
9293 +       au_set_fbstart(file, bstart);
9294 +       au_set_h_fptr(file, bstart, h_file);
9295 +       au_update_figen(file);
9296 +       /* todo: necessary? */
9297 +       /* file->f_ra = h_file->f_ra; */
9298 +
9299 +       /* close lower files */
9300 +       bend = au_fbend(file);
9301 +       for (bindex = bstart + 1; bindex <= bend; bindex++)
9302 +               au_set_h_fptr(file, bindex, NULL);
9303 +       au_set_fbend(file, bstart);
9304 +
9305 + out:
9306 +       if (h_file_tmp)
9307 +               fput(h_file_tmp);
9308 +       return err;
9309 +}
9310 +
9311 +/* ---------------------------------------------------------------------- */
9312 +
9313 +static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
9314 +                       struct dentry *hi_wh)
9315 +{
9316 +       int err;
9317 +       aufs_bindex_t bstart;
9318 +       struct au_dinfo *dinfo;
9319 +       struct dentry *h_dentry;
9320 +
9321 +       dinfo = au_di(file->f_dentry);
9322 +       AuRwMustWriteLock(&dinfo->di_rwsem);
9323 +
9324 +       bstart = dinfo->di_bstart;
9325 +       dinfo->di_bstart = btgt;
9326 +       h_dentry = dinfo->di_hdentry[0 + btgt].hd_dentry;
9327 +       dinfo->di_hdentry[0 + btgt].hd_dentry = hi_wh;
9328 +       err = au_reopen_nondir(file);
9329 +       dinfo->di_hdentry[0 + btgt].hd_dentry = h_dentry;
9330 +       dinfo->di_bstart = bstart;
9331 +
9332 +       return err;
9333 +}
9334 +
9335 +static int au_ready_to_write_wh(struct file *file, loff_t len,
9336 +                               aufs_bindex_t bcpup)
9337 +{
9338 +       int err;
9339 +       struct inode *inode;
9340 +       struct dentry *dentry, *hi_wh;
9341 +       struct super_block *sb;
9342 +
9343 +       dentry = file->f_dentry;
9344 +       inode = dentry->d_inode;
9345 +       hi_wh = au_hi_wh(inode, bcpup);
9346 +       if (!hi_wh)
9347 +               err = au_sio_cpup_wh(dentry, bcpup, len, file);
9348 +       else
9349 +               /* already copied-up after unlink */
9350 +               err = au_reopen_wh(file, bcpup, hi_wh);
9351 +
9352 +       sb = dentry->d_sb;
9353 +       if (!err && inode->i_nlink > 1 && au_opt_test(au_mntflags(sb), PLINK))
9354 +               au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup));
9355 +
9356 +       return err;
9357 +}
9358 +
9359 +/*
9360 + * prepare the @file for writing.
9361 + */
9362 +int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
9363 +{
9364 +       int err;
9365 +       aufs_bindex_t bstart, bcpup;
9366 +       struct dentry *dentry, *parent, *h_dentry;
9367 +       struct inode *h_inode, *inode;
9368 +       struct super_block *sb;
9369 +
9370 +       dentry = file->f_dentry;
9371 +       sb = dentry->d_sb;
9372 +       bstart = au_fbstart(file);
9373 +       inode = dentry->d_inode;
9374 +       err = au_test_ro(sb, bstart, inode);
9375 +       if (!err && (au_h_fptr(file, bstart)->f_mode & FMODE_WRITE)) {
9376 +               err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0);
9377 +               goto out;
9378 +       }
9379 +
9380 +       /* need to cpup */
9381 +       parent = dget_parent(dentry);
9382 +       di_write_lock_parent(parent);
9383 +       err = AuWbrCopyup(au_sbi(sb), dentry);
9384 +       bcpup = err;
9385 +       if (unlikely(err < 0))
9386 +               goto out_dgrade;
9387 +       err = 0;
9388 +
9389 +       if (!au_h_dptr(parent, bcpup)) {
9390 +               err = au_cpup_dirs(dentry, bcpup);
9391 +               if (unlikely(err))
9392 +                       goto out_dgrade;
9393 +       }
9394 +
9395 +       err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE,
9396 +                    AuPin_DI_LOCKED | AuPin_MNT_WRITE);
9397 +       if (unlikely(err))
9398 +               goto out_dgrade;
9399 +
9400 +       h_dentry = au_h_fptr(file, bstart)->f_dentry;
9401 +       h_inode = h_dentry->d_inode;
9402 +       mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
9403 +       if (d_unhashed(dentry) /* || d_unhashed(h_dentry) */
9404 +           /* || !h_inode->i_nlink */) {
9405 +               err = au_ready_to_write_wh(file, len, bcpup);
9406 +               di_downgrade_lock(parent, AuLock_IR);
9407 +       } else {
9408 +               di_downgrade_lock(parent, AuLock_IR);
9409 +               if (!au_h_dptr(dentry, bcpup))
9410 +                       err = au_sio_cpup_simple(dentry, bcpup, len,
9411 +                                                AuCpup_DTIME);
9412 +               if (!err)
9413 +                       err = au_reopen_nondir(file);
9414 +       }
9415 +       mutex_unlock(&h_inode->i_mutex);
9416 +
9417 +       if (!err) {
9418 +               au_pin_set_parent_lflag(pin, /*lflag*/0);
9419 +               goto out_dput; /* success */
9420 +       }
9421 +       au_unpin(pin);
9422 +       goto out_unlock;
9423 +
9424 + out_dgrade:
9425 +       di_downgrade_lock(parent, AuLock_IR);
9426 + out_unlock:
9427 +       di_read_unlock(parent, AuLock_IR);
9428 + out_dput:
9429 +       dput(parent);
9430 + out:
9431 +       return err;
9432 +}
9433 +
9434 +/* ---------------------------------------------------------------------- */
9435 +
9436 +static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
9437 +{
9438 +       int err;
9439 +       aufs_bindex_t bstart;
9440 +       struct au_pin pin;
9441 +       struct au_finfo *finfo;
9442 +       struct dentry *dentry, *parent, *hi_wh;
9443 +       struct inode *inode;
9444 +       struct super_block *sb;
9445 +
9446 +       FiMustWriteLock(file);
9447 +
9448 +       err = 0;
9449 +       finfo = au_fi(file);
9450 +       dentry = file->f_dentry;
9451 +       sb = dentry->d_sb;
9452 +       inode = dentry->d_inode;
9453 +       bstart = au_ibstart(inode);
9454 +       if (bstart == finfo->fi_bstart)
9455 +               goto out;
9456 +
9457 +       parent = dget_parent(dentry);
9458 +       if (au_test_ro(sb, bstart, inode)) {
9459 +               di_read_lock_parent(parent, !AuLock_IR);
9460 +               err = AuWbrCopyup(au_sbi(sb), dentry);
9461 +               bstart = err;
9462 +               di_read_unlock(parent, !AuLock_IR);
9463 +               if (unlikely(err < 0))
9464 +                       goto out_parent;
9465 +               err = 0;
9466 +       }
9467 +
9468 +       di_read_lock_parent(parent, AuLock_IR);
9469 +       hi_wh = au_hi_wh(inode, bstart);
9470 +       if (au_opt_test(au_mntflags(sb), PLINK)
9471 +           && au_plink_test(inode)
9472 +           && !d_unhashed(dentry)) {
9473 +               err = au_test_and_cpup_dirs(dentry, bstart);
9474 +               if (unlikely(err))
9475 +                       goto out_unlock;
9476 +
9477 +               /* always superio. */
9478 +               err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE,
9479 +                            AuPin_DI_LOCKED | AuPin_MNT_WRITE);
9480 +               if (!err)
9481 +                       err = au_sio_cpup_simple(dentry, bstart, -1,
9482 +                                                AuCpup_DTIME);
9483 +               au_unpin(&pin);
9484 +       } else if (hi_wh) {
9485 +               /* already copied-up after unlink */
9486 +               err = au_reopen_wh(file, bstart, hi_wh);
9487 +               *need_reopen = 0;
9488 +       }
9489 +
9490 + out_unlock:
9491 +       di_read_unlock(parent, AuLock_IR);
9492 + out_parent:
9493 +       dput(parent);
9494 + out:
9495 +       return err;
9496 +}
9497 +
9498 +static void au_do_refresh_file(struct file *file)
9499 +{
9500 +       aufs_bindex_t bindex, bend, new_bindex, brid;
9501 +       struct au_hfile *p, tmp, *q;
9502 +       struct au_finfo *finfo;
9503 +       struct super_block *sb;
9504 +
9505 +       FiMustWriteLock(file);
9506 +
9507 +       sb = file->f_dentry->d_sb;
9508 +       finfo = au_fi(file);
9509 +       p = finfo->fi_hfile + finfo->fi_bstart;
9510 +       brid = p->hf_br->br_id;
9511 +       bend = finfo->fi_bend;
9512 +       for (bindex = finfo->fi_bstart; bindex <= bend; bindex++, p++) {
9513 +               if (!p->hf_file)
9514 +                       continue;
9515 +
9516 +               new_bindex = au_br_index(sb, p->hf_br->br_id);
9517 +               if (new_bindex == bindex)
9518 +                       continue;
9519 +               if (new_bindex < 0) {
9520 +                       au_set_h_fptr(file, bindex, NULL);
9521 +                       continue;
9522 +               }
9523 +
9524 +               /* swap two lower inode, and loop again */
9525 +               q = finfo->fi_hfile + new_bindex;
9526 +               tmp = *q;
9527 +               *q = *p;
9528 +               *p = tmp;
9529 +               if (tmp.hf_file) {
9530 +                       bindex--;
9531 +                       p--;
9532 +               }
9533 +       }
9534 +
9535 +       p = finfo->fi_hfile;
9536 +       if (!au_test_mmapped(file) && !d_unhashed(file->f_dentry)) {
9537 +               bend = au_sbend(sb);
9538 +               for (finfo->fi_bstart = 0; finfo->fi_bstart <= bend;
9539 +                    finfo->fi_bstart++, p++)
9540 +                       if (p->hf_file) {
9541 +                               if (p->hf_file->f_dentry
9542 +                                   && p->hf_file->f_dentry->d_inode)
9543 +                                       break;
9544 +                               else
9545 +                                       au_hfput(p, file);
9546 +                       }
9547 +       } else {
9548 +               bend = au_br_index(sb, brid);
9549 +               for (finfo->fi_bstart = 0; finfo->fi_bstart < bend;
9550 +                    finfo->fi_bstart++, p++)
9551 +                       if (p->hf_file)
9552 +                               au_hfput(p, file);
9553 +               bend = au_sbend(sb);
9554 +       }
9555 +
9556 +       p = finfo->fi_hfile + bend;
9557 +       for (finfo->fi_bend = bend; finfo->fi_bend >= finfo->fi_bstart;
9558 +            finfo->fi_bend--, p--)
9559 +               if (p->hf_file) {
9560 +                       if (p->hf_file->f_dentry
9561 +                           && p->hf_file->f_dentry->d_inode)
9562 +                               break;
9563 +                       else
9564 +                               au_hfput(p, file);
9565 +               }
9566 +       AuDebugOn(finfo->fi_bend < finfo->fi_bstart);
9567 +}
9568 +
9569 +/*
9570 + * after branch manipulating, refresh the file.
9571 + */
9572 +static int refresh_file(struct file *file, int (*reopen)(struct file *file))
9573 +{
9574 +       int err, need_reopen;
9575 +       struct dentry *dentry;
9576 +       aufs_bindex_t bend, bindex;
9577 +
9578 +       dentry = file->f_dentry;
9579 +       err = au_fi_realloc(au_fi(file), au_sbend(dentry->d_sb) + 1);
9580 +       if (unlikely(err))
9581 +               goto out;
9582 +       au_do_refresh_file(file);
9583 +
9584 +       err = 0;
9585 +       need_reopen = 1;
9586 +       if (!au_test_mmapped(file))
9587 +               err = au_file_refresh_by_inode(file, &need_reopen);
9588 +       if (!err && need_reopen && !d_unhashed(dentry))
9589 +               err = reopen(file);
9590 +       if (!err) {
9591 +               au_update_figen(file);
9592 +               return 0; /* success */
9593 +       }
9594 +
9595 +       /* error, close all lower files */
9596 +       bend = au_fbend(file);
9597 +       for (bindex = au_fbstart(file); bindex <= bend; bindex++)
9598 +               au_set_h_fptr(file, bindex, NULL);
9599 +
9600 + out:
9601 +       return err;
9602 +}
9603 +
9604 +/* common function to regular file and dir */
9605 +int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
9606 +                         int wlock)
9607 +{
9608 +       int err;
9609 +       unsigned int sigen, figen;
9610 +       aufs_bindex_t bstart;
9611 +       unsigned char pseudo_link;
9612 +       struct dentry *dentry;
9613 +
9614 +       err = 0;
9615 +       dentry = file->f_dentry;
9616 +       sigen = au_sigen(dentry->d_sb);
9617 +       fi_write_lock(file);
9618 +       figen = au_figen(file);
9619 +       di_write_lock_child(dentry);
9620 +       bstart = au_dbstart(dentry);
9621 +       pseudo_link = (bstart != au_ibstart(dentry->d_inode));
9622 +       if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
9623 +               if (!wlock) {
9624 +                       di_downgrade_lock(dentry, AuLock_IR);
9625 +                       fi_downgrade_lock(file);
9626 +               }
9627 +               goto out; /* success */
9628 +       }
9629 +
9630 +       AuDbg("sigen %d, figen %d\n", sigen, figen);
9631 +       if (sigen != au_digen(dentry)
9632 +           || sigen != au_iigen(dentry->d_inode)) {
9633 +               err = au_reval_dpath(dentry, sigen);
9634 +               if (unlikely(err < 0))
9635 +                       goto out;
9636 +               AuDebugOn(au_digen(dentry) != sigen
9637 +                         || au_iigen(dentry->d_inode) != sigen);
9638 +       }
9639 +
9640 +       err = refresh_file(file, reopen);
9641 +       if (!err) {
9642 +               if (!wlock) {
9643 +                       di_downgrade_lock(dentry, AuLock_IR);
9644 +                       fi_downgrade_lock(file);
9645 +               }
9646 +       } else {
9647 +               di_write_unlock(dentry);
9648 +               fi_write_unlock(file);
9649 +       }
9650 +
9651 + out:
9652 +       return err;
9653 +}
9654 +
9655 +/* ---------------------------------------------------------------------- */
9656 +
9657 +/* cf. aufs_nopage() */
9658 +/* for madvise(2) */
9659 +static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
9660 +{
9661 +       unlock_page(page);
9662 +       return 0;
9663 +}
9664 +
9665 +/* they will never be called. */
9666 +#ifdef CONFIG_AUFS_DEBUG
9667 +static int aufs_write_begin(struct file *file, struct address_space *mapping,
9668 +                           loff_t pos, unsigned len, unsigned flags,
9669 +                           struct page **pagep, void **fsdata)
9670 +{ AuUnsupport(); return 0; }
9671 +static int aufs_write_end(struct file *file, struct address_space *mapping,
9672 +                         loff_t pos, unsigned len, unsigned copied,
9673 +                         struct page *page, void *fsdata)
9674 +{ AuUnsupport(); return 0; }
9675 +static int aufs_writepage(struct page *page, struct writeback_control *wbc)
9676 +{ AuUnsupport(); return 0; }
9677 +static void aufs_sync_page(struct page *page)
9678 +{ AuUnsupport(); }
9679 +
9680 +static int aufs_set_page_dirty(struct page *page)
9681 +{ AuUnsupport(); return 0; }
9682 +static void aufs_invalidatepage(struct page *page, unsigned long offset)
9683 +{ AuUnsupport(); }
9684 +static int aufs_releasepage(struct page *page, gfp_t gfp)
9685 +{ AuUnsupport(); return 0; }
9686 +static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
9687 +                             const struct iovec *iov, loff_t offset,
9688 +                             unsigned long nr_segs)
9689 +{ AuUnsupport(); return 0; }
9690 +#endif /* CONFIG_AUFS_DEBUG */
9691 +
9692 +struct address_space_operations aufs_aop = {
9693 +       .readpage       = aufs_readpage,
9694 +#ifdef CONFIG_AUFS_DEBUG
9695 +       .writepage      = aufs_writepage,
9696 +       .sync_page      = aufs_sync_page,
9697 +       .set_page_dirty = aufs_set_page_dirty,
9698 +       .write_begin    = aufs_write_begin,
9699 +       .write_end      = aufs_write_end,
9700 +       .invalidatepage = aufs_invalidatepage,
9701 +       .releasepage    = aufs_releasepage,
9702 +       .direct_IO      = aufs_direct_IO,
9703 +#endif /* CONFIG_AUFS_DEBUG */
9704 +};
9705 diff -uprN -x .git linux-2.6.31/fs/aufs/file.h aufs2-2.6.git/fs/aufs/file.h
9706 --- linux-2.6.31/fs/aufs/file.h 1970-01-01 00:00:00.000000000 +0000
9707 +++ aufs2-2.6.git/fs/aufs/file.h        2009-09-21 21:49:23.401607657 +0000
9708 @@ -0,0 +1,175 @@
9709 +/*
9710 + * Copyright (C) 2005-2009 Junjiro R. Okajima
9711 + *
9712 + * This program, aufs is free software; you can redistribute it and/or modify
9713 + * it under the terms of the GNU General Public License as published by
9714 + * the Free Software Foundation; either version 2 of the License, or
9715 + * (at your option) any later version.
9716 + *
9717 + * This program is distributed in the hope that it will be useful,
9718 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
9719 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9720 + * GNU General Public License for more details.
9721 + *
9722 + * You should have received a copy of the GNU General Public License
9723 + * along with this program; if not, write to the Free Software
9724 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
9725 + */
9726 +
9727 +/*
9728 + * file operations
9729 + */
9730 +
9731 +#ifndef __AUFS_FILE_H__
9732 +#define __AUFS_FILE_H__
9733 +
9734 +#ifdef __KERNEL__
9735 +
9736 +#include <linux/fs.h>
9737 +#include <linux/poll.h>
9738 +#include <linux/aufs_type.h>
9739 +#include "rwsem.h"
9740 +
9741 +struct au_branch;
9742 +struct au_hfile {
9743 +       struct file             *hf_file;
9744 +       struct au_branch        *hf_br;
9745 +};
9746 +
9747 +struct au_vdir;
9748 +struct au_finfo {
9749 +       atomic_t                fi_generation;
9750 +
9751 +       struct au_rwsem         fi_rwsem;
9752 +       struct au_hfile         *fi_hfile;
9753 +       aufs_bindex_t           fi_bstart, fi_bend;
9754 +
9755 +       union {
9756 +               /* non-dir only */
9757 +               struct {
9758 +                       struct vm_operations_struct     *fi_h_vm_ops;
9759 +                       struct vm_operations_struct     *fi_vm_ops;
9760 +                       struct mutex                    fi_vm_mtx;
9761 +               };
9762 +
9763 +               /* dir only */
9764 +               struct {
9765 +                       struct au_vdir          *fi_vdir_cache;
9766 +                       int                     fi_maintain_plink;
9767 +               };
9768 +       };
9769 +};
9770 +
9771 +/* ---------------------------------------------------------------------- */
9772 +
9773 +/* file.c */
9774 +extern struct address_space_operations aufs_aop;
9775 +unsigned int au_file_roflags(unsigned int flags);
9776 +struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
9777 +                      struct file *file);
9778 +int au_do_open(struct file *file, int (*open)(struct file *file, int flags));
9779 +int au_reopen_nondir(struct file *file);
9780 +struct au_pin;
9781 +int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
9782 +int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
9783 +                         int wlock);
9784 +
9785 +/* poll.c */
9786 +#ifdef CONFIG_AUFS_POLL
9787 +unsigned int aufs_poll(struct file *file, poll_table *wait);
9788 +#endif
9789 +
9790 +/* f_op.c */
9791 +extern const struct file_operations aufs_file_fop;
9792 +int aufs_flush(struct file *file, fl_owner_t id);
9793 +
9794 +/* finfo.c */
9795 +void au_hfput(struct au_hfile *hf, struct file *file);
9796 +void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
9797 +                  struct file *h_file);
9798 +
9799 +void au_update_figen(struct file *file);
9800 +
9801 +void au_finfo_fin(struct file *file);
9802 +int au_finfo_init(struct file *file);
9803 +int au_fi_realloc(struct au_finfo *finfo, int nbr);
9804 +
9805 +/* ---------------------------------------------------------------------- */
9806 +
9807 +static inline struct au_finfo *au_fi(struct file *file)
9808 +{
9809 +       return file->private_data;
9810 +}
9811 +
9812 +/* ---------------------------------------------------------------------- */
9813 +
9814 +/*
9815 + * fi_read_lock, fi_write_lock,
9816 + * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
9817 + */
9818 +AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
9819 +
9820 +#define FiMustNoWaiters(f)     AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
9821 +#define FiMustAnyLock(f)       AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
9822 +#define FiMustWriteLock(f)     AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
9823 +
9824 +/* ---------------------------------------------------------------------- */
9825 +
9826 +/* todo: hard/soft set? */
9827 +static inline aufs_bindex_t au_fbstart(struct file *file)
9828 +{
9829 +       FiMustAnyLock(file);
9830 +       return au_fi(file)->fi_bstart;
9831 +}
9832 +
9833 +static inline aufs_bindex_t au_fbend(struct file *file)
9834 +{
9835 +       FiMustAnyLock(file);
9836 +       return au_fi(file)->fi_bend;
9837 +}
9838 +
9839 +static inline struct au_vdir *au_fvdir_cache(struct file *file)
9840 +{
9841 +       FiMustAnyLock(file);
9842 +       return au_fi(file)->fi_vdir_cache;
9843 +}
9844 +
9845 +static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
9846 +{
9847 +       FiMustWriteLock(file);
9848 +       au_fi(file)->fi_bstart = bindex;
9849 +}
9850 +
9851 +static inline void au_set_fbend(struct file *file, aufs_bindex_t bindex)
9852 +{
9853 +       FiMustWriteLock(file);
9854 +       au_fi(file)->fi_bend = bindex;
9855 +}
9856 +
9857 +static inline void au_set_fvdir_cache(struct file *file,
9858 +                                     struct au_vdir *vdir_cache)
9859 +{
9860 +       FiMustWriteLock(file);
9861 +       au_fi(file)->fi_vdir_cache = vdir_cache;
9862 +}
9863 +
9864 +static inline struct file *au_h_fptr(struct file *file, aufs_bindex_t bindex)
9865 +{
9866 +       FiMustAnyLock(file);
9867 +       return au_fi(file)->fi_hfile[0 + bindex].hf_file;
9868 +}
9869 +
9870 +/* todo: memory barrier? */
9871 +static inline unsigned int au_figen(struct file *f)
9872 +{
9873 +       return atomic_read(&au_fi(f)->fi_generation);
9874 +}
9875 +
9876 +static inline int au_test_mmapped(struct file *f)
9877 +{
9878 +       /* FiMustAnyLock(f); */
9879 +       return !!(au_fi(f)->fi_h_vm_ops);
9880 +}
9881 +
9882 +#endif /* __KERNEL__ */
9883 +#endif /* __AUFS_FILE_H__ */
9884 diff -uprN -x .git linux-2.6.31/fs/aufs/finfo.c aufs2-2.6.git/fs/aufs/finfo.c
9885 --- linux-2.6.31/fs/aufs/finfo.c        1970-01-01 00:00:00.000000000 +0000
9886 +++ aufs2-2.6.git/fs/aufs/finfo.c       2009-09-21 21:49:23.401607657 +0000
9887 @@ -0,0 +1,128 @@
9888 +/*
9889 + * Copyright (C) 2005-2009 Junjiro R. Okajima
9890 + *
9891 + * This program, aufs is free software; you can redistribute it and/or modify
9892 + * it under the terms of the GNU General Public License as published by
9893 + * the Free Software Foundation; either version 2 of the License, or
9894 + * (at your option) any later version.
9895 + *
9896 + * This program is distributed in the hope that it will be useful,
9897 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
9898 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9899 + * GNU General Public License for more details.
9900 + *
9901 + * You should have received a copy of the GNU General Public License
9902 + * along with this program; if not, write to the Free Software
9903 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
9904 + */
9905 +
9906 +/*
9907 + * file private data
9908 + */
9909 +
9910 +#include <linux/file.h>
9911 +#include "aufs.h"
9912 +
9913 +void au_hfput(struct au_hfile *hf, struct file *file)
9914 +{
9915 +       if (file->f_flags & vfsub_fmode_to_uint(FMODE_EXEC))
9916 +               allow_write_access(hf->hf_file);
9917 +       fput(hf->hf_file);
9918 +       hf->hf_file = NULL;
9919 +       atomic_dec_return(&hf->hf_br->br_count);
9920 +       hf->hf_br = NULL;
9921 +}
9922 +
9923 +void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
9924 +{
9925 +       struct au_finfo *finfo = au_fi(file);
9926 +       struct au_hfile *hf;
9927 +
9928 +       hf = finfo->fi_hfile + bindex;
9929 +       if (hf->hf_file)
9930 +               au_hfput(hf, file);
9931 +       if (val) {
9932 +               hf->hf_file = val;
9933 +               hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
9934 +       }
9935 +}
9936 +
9937 +void au_update_figen(struct file *file)
9938 +{
9939 +       atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
9940 +       /* smp_mb(); */ /* atomic_set */
9941 +}
9942 +
9943 +/* ---------------------------------------------------------------------- */
9944 +
9945 +void au_finfo_fin(struct file *file)
9946 +{
9947 +       struct au_finfo *finfo;
9948 +       aufs_bindex_t bindex, bend;
9949 +
9950 +       fi_write_lock(file);
9951 +       bend = au_fbend(file);
9952 +       bindex = au_fbstart(file);
9953 +       if (bindex >= 0)
9954 +               /*
9955 +                * calls fput() instead of filp_close(),
9956 +                * since no dnotify or lock for the lower file.
9957 +                */
9958 +               for (; bindex <= bend; bindex++)
9959 +                       au_set_h_fptr(file, bindex, NULL);
9960 +
9961 +       finfo = au_fi(file);
9962 +       au_dbg_verify_hf(finfo);
9963 +       kfree(finfo->fi_hfile);
9964 +       fi_write_unlock(file);
9965 +       AuRwDestroy(&finfo->fi_rwsem);
9966 +       au_cache_free_finfo(finfo);
9967 +}
9968 +
9969 +int au_finfo_init(struct file *file)
9970 +{
9971 +       struct au_finfo *finfo;
9972 +       struct dentry *dentry;
9973 +
9974 +       dentry = file->f_dentry;
9975 +       finfo = au_cache_alloc_finfo();
9976 +       if (unlikely(!finfo))
9977 +               goto out;
9978 +
9979 +       finfo->fi_hfile = kcalloc(au_sbend(dentry->d_sb) + 1,
9980 +                                 sizeof(*finfo->fi_hfile), GFP_NOFS);
9981 +       if (unlikely(!finfo->fi_hfile))
9982 +               goto out_finfo;
9983 +
9984 +       au_rw_init_wlock(&finfo->fi_rwsem);
9985 +       finfo->fi_bstart = -1;
9986 +       finfo->fi_bend = -1;
9987 +       atomic_set(&finfo->fi_generation, au_digen(dentry));
9988 +       /* smp_mb(); */ /* atomic_set */
9989 +
9990 +       file->private_data = finfo;
9991 +       return 0; /* success */
9992 +
9993 + out_finfo:
9994 +       au_cache_free_finfo(finfo);
9995 + out:
9996 +       return -ENOMEM;
9997 +}
9998 +
9999 +int au_fi_realloc(struct au_finfo *finfo, int nbr)
10000 +{
10001 +       int err, sz;
10002 +       struct au_hfile *hfp;
10003 +
10004 +       err = -ENOMEM;
10005 +       sz = sizeof(*hfp) * (finfo->fi_bend + 1);
10006 +       if (!sz)
10007 +               sz = sizeof(*hfp);
10008 +       hfp = au_kzrealloc(finfo->fi_hfile, sz, sizeof(*hfp) * nbr, GFP_NOFS);
10009 +       if (hfp) {
10010 +               finfo->fi_hfile = hfp;
10011 +               err = 0;
10012 +       }
10013 +
10014 +       return err;
10015 +}
10016 diff -uprN -x .git linux-2.6.31/fs/aufs/fstype.h aufs2-2.6.git/fs/aufs/fstype.h
10017 --- linux-2.6.31/fs/aufs/fstype.h       1970-01-01 00:00:00.000000000 +0000
10018 +++ aufs2-2.6.git/fs/aufs/fstype.h      2009-09-21 21:49:23.401607657 +0000
10019 @@ -0,0 +1,485 @@
10020 +/*
10021 + * Copyright (C) 2005-2009 Junjiro R. Okajima
10022 + *
10023 + * This program, aufs is free software; you can redistribute it and/or modify
10024 + * it under the terms of the GNU General Public License as published by
10025 + * the Free Software Foundation; either version 2 of the License, or
10026 + * (at your option) any later version.
10027 + *
10028 + * This program is distributed in the hope that it will be useful,
10029 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
10030 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10031 + * GNU General Public License for more details.
10032 + *
10033 + * You should have received a copy of the GNU General Public License
10034 + * along with this program; if not, write to the Free Software
10035 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
10036 + */
10037 +
10038 +/*
10039 + * judging filesystem type
10040 + */
10041 +
10042 +#ifndef __AUFS_FSTYPE_H__
10043 +#define __AUFS_FSTYPE_H__
10044 +
10045 +#ifdef __KERNEL__
10046 +
10047 +#include <linux/cramfs_fs.h>
10048 +#include <linux/fs.h>
10049 +#include <linux/magic.h>
10050 +#include <linux/romfs_fs.h>
10051 +#include <linux/aufs_type.h>
10052 +
10053 +static inline int au_test_aufs(struct super_block *sb)
10054 +{
10055 +       return sb->s_magic == AUFS_SUPER_MAGIC;
10056 +}
10057 +
10058 +static inline const char *au_sbtype(struct super_block *sb)
10059 +{
10060 +       return sb->s_type->name;
10061 +}
10062 +
10063 +static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
10064 +{
10065 +#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
10066 +       return sb->s_magic == ROMFS_MAGIC;
10067 +#else
10068 +       return 0;
10069 +#endif
10070 +}
10071 +
10072 +static inline int au_test_romfs(struct super_block *sb __maybe_unused)
10073 +{
10074 +#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
10075 +       return sb->s_magic == ISOFS_SUPER_MAGIC;
10076 +#else
10077 +       return 0;
10078 +#endif
10079 +}
10080 +
10081 +static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
10082 +{
10083 +#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
10084 +       return sb->s_magic == CRAMFS_MAGIC;
10085 +#endif
10086 +       return 0;
10087 +}
10088 +
10089 +static inline int au_test_nfs(struct super_block *sb __maybe_unused)
10090 +{
10091 +#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
10092 +       return sb->s_magic == NFS_SUPER_MAGIC;
10093 +#else
10094 +       return 0;
10095 +#endif
10096 +}
10097 +
10098 +static inline int au_test_fuse(struct super_block *sb __maybe_unused)
10099 +{
10100 +#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
10101 +       return sb->s_magic == FUSE_SUPER_MAGIC;
10102 +#else
10103 +       return 0;
10104 +#endif
10105 +}
10106 +
10107 +static inline int au_test_xfs(struct super_block *sb __maybe_unused)
10108 +{
10109 +#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
10110 +       return sb->s_magic == XFS_SB_MAGIC;
10111 +#else
10112 +       return 0;
10113 +#endif
10114 +}
10115 +
10116 +static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
10117 +{
10118 +#ifdef CONFIG_TMPFS
10119 +       return sb->s_magic == TMPFS_MAGIC;
10120 +#else
10121 +       return 0;
10122 +#endif
10123 +}
10124 +
10125 +static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
10126 +{
10127 +#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
10128 +       return !strcmp(au_sbtype(sb), "ecryptfs");
10129 +#else
10130 +       return 0;
10131 +#endif
10132 +}
10133 +
10134 +static inline int au_test_smbfs(struct super_block *sb __maybe_unused)
10135 +{
10136 +#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE)
10137 +       return sb->s_magic == SMB_SUPER_MAGIC;
10138 +#else
10139 +       return 0;
10140 +#endif
10141 +}
10142 +
10143 +static inline int au_test_ocfs2(struct super_block *sb __maybe_unused)
10144 +{
10145 +#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE)
10146 +       return sb->s_magic == OCFS2_SUPER_MAGIC;
10147 +#else
10148 +       return 0;
10149 +#endif
10150 +}
10151 +
10152 +static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused)
10153 +{
10154 +#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE)
10155 +       return sb->s_magic == DLMFS_MAGIC;
10156 +#else
10157 +       return 0;
10158 +#endif
10159 +}
10160 +
10161 +static inline int au_test_coda(struct super_block *sb __maybe_unused)
10162 +{
10163 +#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE)
10164 +       return sb->s_magic == CODA_SUPER_MAGIC;
10165 +#else
10166 +       return 0;
10167 +#endif
10168 +}
10169 +
10170 +static inline int au_test_v9fs(struct super_block *sb __maybe_unused)
10171 +{
10172 +#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE)
10173 +       return sb->s_magic == V9FS_MAGIC;
10174 +#else
10175 +       return 0;
10176 +#endif
10177 +}
10178 +
10179 +static inline int au_test_ext4(struct super_block *sb __maybe_unused)
10180 +{
10181 +#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE)
10182 +       return sb->s_magic == EXT4_SUPER_MAGIC;
10183 +#else
10184 +       return 0;
10185 +#endif
10186 +}
10187 +
10188 +static inline int au_test_sysv(struct super_block *sb __maybe_unused)
10189 +{
10190 +#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE)
10191 +       return !strcmp(au_sbtype(sb), "sysv");
10192 +#else
10193 +       return 0;
10194 +#endif
10195 +}
10196 +
10197 +static inline int au_test_ramfs(struct super_block *sb)
10198 +{
10199 +       return sb->s_magic == RAMFS_MAGIC;
10200 +}
10201 +
10202 +static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
10203 +{
10204 +#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
10205 +       return sb->s_magic == UBIFS_SUPER_MAGIC;
10206 +#else
10207 +       return 0;
10208 +#endif
10209 +}
10210 +
10211 +static inline int au_test_procfs(struct super_block *sb __maybe_unused)
10212 +{
10213 +#ifdef CONFIG_PROC_FS
10214 +       return sb->s_magic == PROC_SUPER_MAGIC;
10215 +#else
10216 +       return 0;
10217 +#endif
10218 +}
10219 +
10220 +static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
10221 +{
10222 +#ifdef CONFIG_SYSFS
10223 +       return sb->s_magic == SYSFS_MAGIC;
10224 +#else
10225 +       return 0;
10226 +#endif
10227 +}
10228 +
10229 +static inline int au_test_configfs(struct super_block *sb __maybe_unused)
10230 +{
10231 +#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
10232 +       return sb->s_magic == CONFIGFS_MAGIC;
10233 +#else
10234 +       return 0;
10235 +#endif
10236 +}
10237 +
10238 +static inline int au_test_minix(struct super_block *sb __maybe_unused)
10239 +{
10240 +#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
10241 +       return sb->s_magic == MINIX3_SUPER_MAGIC
10242 +               || sb->s_magic == MINIX2_SUPER_MAGIC
10243 +               || sb->s_magic == MINIX2_SUPER_MAGIC2
10244 +               || sb->s_magic == MINIX_SUPER_MAGIC
10245 +               || sb->s_magic == MINIX_SUPER_MAGIC2;
10246 +#else
10247 +       return 0;
10248 +#endif
10249 +}
10250 +
10251 +static inline int au_test_cifs(struct super_block *sb __maybe_unused)
10252 +{
10253 +#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE)
10254 +       return sb->s_magic == CIFS_MAGIC_NUMBER;
10255 +#else
10256 +       return 0;
10257 +#endif
10258 +}
10259 +
10260 +static inline int au_test_fat(struct super_block *sb __maybe_unused)
10261 +{
10262 +#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
10263 +       return sb->s_magic == MSDOS_SUPER_MAGIC;
10264 +#else
10265 +       return 0;
10266 +#endif
10267 +}
10268 +
10269 +static inline int au_test_msdos(struct super_block *sb)
10270 +{
10271 +       return au_test_fat(sb);
10272 +}
10273 +
10274 +static inline int au_test_vfat(struct super_block *sb)
10275 +{
10276 +       return au_test_fat(sb);
10277 +}
10278 +
10279 +static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
10280 +{
10281 +#ifdef CONFIG_SECURITYFS
10282 +       return sb->s_magic == SECURITYFS_MAGIC;
10283 +#else
10284 +       return 0;
10285 +#endif
10286 +}
10287 +
10288 +static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
10289 +{
10290 +#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
10291 +       return sb->s_magic == SQUASHFS_MAGIC;
10292 +#else
10293 +       return 0;
10294 +#endif
10295 +}
10296 +
10297 +static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
10298 +{
10299 +#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
10300 +       return sb->s_magic == BTRFS_SUPER_MAGIC;
10301 +#else
10302 +       return 0;
10303 +#endif
10304 +}
10305 +
10306 +static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
10307 +{
10308 +#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
10309 +       return sb->s_magic == XENFS_SUPER_MAGIC;
10310 +#else
10311 +       return 0;
10312 +#endif
10313 +}
10314 +
10315 +static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
10316 +{
10317 +#ifdef CONFIG_DEBUG_FS
10318 +       return sb->s_magic == DEBUGFS_MAGIC;
10319 +#else
10320 +       return 0;
10321 +#endif
10322 +}
10323 +
10324 +static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
10325 +{
10326 +#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
10327 +       return sb->s_magic == NILFS_SUPER_MAGIC;
10328 +#else
10329 +       return 0;
10330 +#endif
10331 +}
10332 +
10333 +/* ---------------------------------------------------------------------- */
10334 +/*
10335 + * they can't be an aufs branch.
10336 + */
10337 +static inline int au_test_fs_unsuppoted(struct super_block *sb)
10338 +{
10339 +       return
10340 +#ifndef CONFIG_AUFS_BR_RAMFS
10341 +               au_test_ramfs(sb) ||
10342 +#endif
10343 +               au_test_procfs(sb)
10344 +               || au_test_sysfs(sb)
10345 +               || au_test_configfs(sb)
10346 +               || au_test_debugfs(sb)
10347 +               || au_test_securityfs(sb)
10348 +               || au_test_xenfs(sb)
10349 +               || au_test_ecryptfs(sb)
10350 +               /* || !strcmp(au_sbtype(sb), "unionfs") */
10351 +               || au_test_aufs(sb); /* will be supported in next version */
10352 +}
10353 +
10354 +/*
10355 + * If the filesystem supports NFS-export, then it has to support NULL as
10356 + * a nameidata parameter for ->create(), ->lookup() and ->d_revalidate().
10357 + * We can apply this principle when we handle a lower filesystem.
10358 + */
10359 +static inline int au_test_fs_null_nd(struct super_block *sb)
10360 +{
10361 +       return !!sb->s_export_op;
10362 +}
10363 +
10364 +static inline int au_test_fs_remote(struct super_block *sb)
10365 +{
10366 +       return !au_test_tmpfs(sb)
10367 +#ifdef CONFIG_AUFS_BR_RAMFS
10368 +               && !au_test_ramfs(sb)
10369 +#endif
10370 +               && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
10371 +}
10372 +
10373 +/* ---------------------------------------------------------------------- */
10374 +
10375 +/*
10376 + * Note: these functions (below) are created after reading ->getattr() in all
10377 + * filesystems under linux/fs. it means we have to do so in every update...
10378 + */
10379 +
10380 +/*
10381 + * some filesystems require getattr to refresh the inode attributes before
10382 + * referencing.
10383 + * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
10384 + * and leave the work for d_revalidate()
10385 + */
10386 +static inline int au_test_fs_refresh_iattr(struct super_block *sb)
10387 +{
10388 +       return au_test_nfs(sb)
10389 +               || au_test_fuse(sb)
10390 +               /* || au_test_smbfs(sb) */      /* untested */
10391 +               /* || au_test_ocfs2(sb) */      /* untested */
10392 +               /* || au_test_btrfs(sb) */      /* untested */
10393 +               /* || au_test_coda(sb) */       /* untested */
10394 +               /* || au_test_v9fs(sb) */       /* untested */
10395 +               ;
10396 +}
10397 +
10398 +/*
10399 + * filesystems which don't maintain i_size or i_blocks.
10400 + */
10401 +static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
10402 +{
10403 +       return au_test_xfs(sb)
10404 +               /* || au_test_ext4(sb) */       /* untested */
10405 +               /* || au_test_ocfs2(sb) */      /* untested */
10406 +               /* || au_test_ocfs2_dlmfs(sb) */ /* untested */
10407 +               /* || au_test_sysv(sb) */       /* untested */
10408 +               /* || au_test_ubifs(sb) */      /* untested */
10409 +               /* || au_test_minix(sb) */      /* untested */
10410 +               ;
10411 +}
10412 +
10413 +/*
10414 + * filesystems which don't store the correct value in some of their inode
10415 + * attributes.
10416 + */
10417 +static inline int au_test_fs_bad_iattr(struct super_block *sb)
10418 +{
10419 +       return au_test_fs_bad_iattr_size(sb)
10420 +               /* || au_test_cifs(sb) */       /* untested */
10421 +               || au_test_fat(sb)
10422 +               || au_test_msdos(sb)
10423 +               || au_test_vfat(sb);
10424 +}
10425 +
10426 +/* they don't check i_nlink in link(2) */
10427 +static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
10428 +{
10429 +       return au_test_tmpfs(sb)
10430 +#ifdef CONFIG_AUFS_BR_RAMFS
10431 +               || au_test_ramfs(sb)
10432 +#endif
10433 +               || au_test_ubifs(sb);
10434 +}
10435 +
10436 +/*
10437 + * filesystems which sets S_NOATIME and S_NOCMTIME.
10438 + */
10439 +static inline int au_test_fs_notime(struct super_block *sb)
10440 +{
10441 +       return au_test_nfs(sb)
10442 +               || au_test_fuse(sb)
10443 +               || au_test_ubifs(sb)
10444 +               /* || au_test_cifs(sb) */       /* untested */
10445 +               ;
10446 +}
10447 +
10448 +/*
10449 + * filesystems which requires replacing i_mapping.
10450 + */
10451 +static inline int au_test_fs_bad_mapping(struct super_block *sb)
10452 +{
10453 +       return au_test_fuse(sb)
10454 +               || au_test_ubifs(sb);
10455 +}
10456 +
10457 +/* temporary support for i#1 in cramfs */
10458 +static inline int au_test_fs_unique_ino(struct inode *inode)
10459 +{
10460 +       if (au_test_cramfs(inode->i_sb))
10461 +               return inode->i_ino != 1;
10462 +       return 1;
10463 +}
10464 +
10465 +/* ---------------------------------------------------------------------- */
10466 +
10467 +/*
10468 + * the filesystem where the xino files placed must support i/o after unlink and
10469 + * maintain i_size and i_blocks.
10470 + */
10471 +static inline int au_test_fs_bad_xino(struct super_block *sb)
10472 +{
10473 +       return au_test_fs_remote(sb)
10474 +               || au_test_fs_bad_iattr_size(sb)
10475 +#ifdef CONFIG_AUFS_BR_RAMFS
10476 +               || !(au_test_ramfs(sb) || au_test_fs_null_nd(sb))
10477 +#else
10478 +               || !au_test_fs_null_nd(sb) /* to keep xino code simple */
10479 +#endif
10480 +               /* don't want unnecessary work for xino */
10481 +               || au_test_aufs(sb)
10482 +               || au_test_ecryptfs(sb)
10483 +               || au_test_nilfs(sb);
10484 +}
10485 +
10486 +static inline int au_test_fs_trunc_xino(struct super_block *sb)
10487 +{
10488 +       return au_test_tmpfs(sb)
10489 +               || au_test_ramfs(sb);
10490 +}
10491 +
10492 +/*
10493 + * test if the @sb is real-readonly.
10494 + */
10495 +static inline int au_test_fs_rr(struct super_block *sb)
10496 +{
10497 +       return au_test_squashfs(sb)
10498 +               || au_test_iso9660(sb)
10499 +               || au_test_cramfs(sb)
10500 +               || au_test_romfs(sb);
10501 +}
10502 +
10503 +#endif /* __KERNEL__ */
10504 +#endif /* __AUFS_FSTYPE_H__ */
10505 diff -uprN -x .git linux-2.6.31/fs/aufs/hinotify.c aufs2-2.6.git/fs/aufs/hinotify.c
10506 --- linux-2.6.31/fs/aufs/hinotify.c     1970-01-01 00:00:00.000000000 +0000
10507 +++ aufs2-2.6.git/fs/aufs/hinotify.c    2009-09-21 21:49:23.401607657 +0000
10508 @@ -0,0 +1,755 @@
10509 +/*
10510 + * Copyright (C) 2005-2009 Junjiro R. Okajima
10511 + *
10512 + * This program, aufs is free software; you can redistribute it and/or modify
10513 + * it under the terms of the GNU General Public License as published by
10514 + * the Free Software Foundation; either version 2 of the License, or
10515 + * (at your option) any later version.
10516 + *
10517 + * This program is distributed in the hope that it will be useful,
10518 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
10519 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10520 + * GNU General Public License for more details.
10521 + *
10522 + * You should have received a copy of the GNU General Public License
10523 + * along with this program; if not, write to the Free Software
10524 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
10525 + */
10526 +
10527 +/*
10528 + * inotify for the lower directories
10529 + */
10530 +
10531 +#include "aufs.h"
10532 +
10533 +static const __u32 AuHinMask = (IN_MOVE | IN_DELETE | IN_CREATE);
10534 +static struct inotify_handle *au_hin_handle;
10535 +
10536 +AuCacheFuncs(hinotify, HINOTIFY);
10537 +
10538 +int au_hin_alloc(struct au_hinode *hinode, struct inode *inode,
10539 +                struct inode *h_inode)
10540 +{
10541 +       int err;
10542 +       struct au_hinotify *hin;
10543 +       s32 wd;
10544 +
10545 +       err = -ENOMEM;
10546 +       hin = au_cache_alloc_hinotify();
10547 +       if (hin) {
10548 +               AuDebugOn(hinode->hi_notify);
10549 +               hinode->hi_notify = hin;
10550 +               hin->hin_aufs_inode = inode;
10551 +
10552 +               inotify_init_watch(&hin->hin_watch);
10553 +               wd = inotify_add_watch(au_hin_handle, &hin->hin_watch, h_inode,
10554 +                                      AuHinMask);
10555 +               if (wd >= 0)
10556 +                       return 0; /* success */
10557 +
10558 +               err = wd;
10559 +               put_inotify_watch(&hin->hin_watch);
10560 +               au_cache_free_hinotify(hin);
10561 +               hinode->hi_notify = NULL;
10562 +       }
10563 +
10564 +       return err;
10565 +}
10566 +
10567 +void au_hin_free(struct au_hinode *hinode)
10568 +{
10569 +       int err;
10570 +       struct au_hinotify *hin;
10571 +
10572 +       hin = hinode->hi_notify;
10573 +       if (hin) {
10574 +               err = 0;
10575 +               if (atomic_read(&hin->hin_watch.count))
10576 +                       err = inotify_rm_watch(au_hin_handle, &hin->hin_watch);
10577 +               if (unlikely(err))
10578 +                       /* it means the watch is already removed */
10579 +                       AuWarn("failed inotify_rm_watch() %d\n", err);
10580 +               au_cache_free_hinotify(hin);
10581 +               hinode->hi_notify = NULL;
10582 +       }
10583 +}
10584 +
10585 +/* ---------------------------------------------------------------------- */
10586 +
10587 +void au_hin_ctl(struct au_hinode *hinode, int do_set)
10588 +{
10589 +       struct inode *h_inode;
10590 +       struct inotify_watch *watch;
10591 +
10592 +       if (!hinode->hi_notify)
10593 +               return;
10594 +
10595 +       h_inode = hinode->hi_inode;
10596 +       IMustLock(h_inode);
10597 +
10598 +       /* todo: try inotify_find_update_watch()? */
10599 +       watch = &hinode->hi_notify->hin_watch;
10600 +       mutex_lock(&h_inode->inotify_mutex);
10601 +       /* mutex_lock(&watch->ih->mutex); */
10602 +       if (do_set) {
10603 +               AuDebugOn(watch->mask & AuHinMask);
10604 +               watch->mask |= AuHinMask;
10605 +       } else {
10606 +               AuDebugOn(!(watch->mask & AuHinMask));
10607 +               watch->mask &= ~AuHinMask;
10608 +       }
10609 +       /* mutex_unlock(&watch->ih->mutex); */
10610 +       mutex_unlock(&h_inode->inotify_mutex);
10611 +}
10612 +
10613 +void au_reset_hinotify(struct inode *inode, unsigned int flags)
10614 +{
10615 +       aufs_bindex_t bindex, bend;
10616 +       struct inode *hi;
10617 +       struct dentry *iwhdentry;
10618 +
10619 +       bend = au_ibend(inode);
10620 +       for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
10621 +               hi = au_h_iptr(inode, bindex);
10622 +               if (!hi)
10623 +                       continue;
10624 +
10625 +               /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
10626 +               iwhdentry = au_hi_wh(inode, bindex);
10627 +               if (iwhdentry)
10628 +                       dget(iwhdentry);
10629 +               au_igrab(hi);
10630 +               au_set_h_iptr(inode, bindex, NULL, 0);
10631 +               au_set_h_iptr(inode, bindex, au_igrab(hi),
10632 +                             flags & ~AuHi_XINO);
10633 +               iput(hi);
10634 +               dput(iwhdentry);
10635 +               /* mutex_unlock(&hi->i_mutex); */
10636 +       }
10637 +}
10638 +
10639 +/* ---------------------------------------------------------------------- */
10640 +
10641 +static int hin_xino(struct inode *inode, struct inode *h_inode)
10642 +{
10643 +       int err;
10644 +       aufs_bindex_t bindex, bend, bfound, bstart;
10645 +       struct inode *h_i;
10646 +
10647 +       err = 0;
10648 +       if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
10649 +               AuWarn("branch root dir was changed\n");
10650 +               goto out;
10651 +       }
10652 +
10653 +       bfound = -1;
10654 +       bend = au_ibend(inode);
10655 +       bstart = au_ibstart(inode);
10656 +#if 0 /* reserved for future use */
10657 +       if (bindex == bend) {
10658 +               /* keep this ino in rename case */
10659 +               goto out;
10660 +       }
10661 +#endif
10662 +       for (bindex = bstart; bindex <= bend; bindex++) {
10663 +               if (au_h_iptr(inode, bindex) == h_inode) {
10664 +                       bfound = bindex;
10665 +                       break;
10666 +               }
10667 +       }
10668 +       if (bfound < 0)
10669 +               goto out;
10670 +
10671 +       for (bindex = bstart; bindex <= bend; bindex++) {
10672 +               h_i = au_h_iptr(inode, bindex);
10673 +               if (!h_i)
10674 +                       continue;
10675 +
10676 +               err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
10677 +               /* ignore this error */
10678 +               /* bad action? */
10679 +       }
10680 +
10681 +       /* children inode number will be broken */
10682 +
10683 + out:
10684 +       AuTraceErr(err);
10685 +       return err;
10686 +}
10687 +
10688 +static int hin_gen_tree(struct dentry *dentry)
10689 +{
10690 +       int err, i, j, ndentry;
10691 +       struct au_dcsub_pages dpages;
10692 +       struct au_dpage *dpage;
10693 +       struct dentry **dentries;
10694 +
10695 +       err = au_dpages_init(&dpages, GFP_NOFS);
10696 +       if (unlikely(err))
10697 +               goto out;
10698 +       err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
10699 +       if (unlikely(err))
10700 +               goto out_dpages;
10701 +
10702 +       for (i = 0; i < dpages.ndpage; i++) {
10703 +               dpage = dpages.dpages + i;
10704 +               dentries = dpage->dentries;
10705 +               ndentry = dpage->ndentry;
10706 +               for (j = 0; j < ndentry; j++) {
10707 +                       struct dentry *d;
10708 +
10709 +                       d = dentries[j];
10710 +                       if (IS_ROOT(d))
10711 +                               continue;
10712 +
10713 +                       d_drop(d);
10714 +                       au_digen_dec(d);
10715 +                       if (d->d_inode)
10716 +                               /* todo: reset children xino?
10717 +                                  cached children only? */
10718 +                               au_iigen_dec(d->d_inode);
10719 +               }
10720 +       }
10721 +
10722 + out_dpages:
10723 +       au_dpages_free(&dpages);
10724 +
10725 +       /* discard children */
10726 +       dentry_unhash(dentry);
10727 +       dput(dentry);
10728 + out:
10729 +       return err;
10730 +}
10731 +
10732 +/*
10733 + * return 0 if processed.
10734 + */
10735 +static int hin_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
10736 +                           const unsigned int isdir)
10737 +{
10738 +       int err;
10739 +       struct dentry *d;
10740 +       struct qstr *dname;
10741 +
10742 +       err = 1;
10743 +       if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
10744 +               AuWarn("branch root dir was changed\n");
10745 +               err = 0;
10746 +               goto out;
10747 +       }
10748 +
10749 +       if (!isdir) {
10750 +               AuDebugOn(!name);
10751 +               au_iigen_dec(inode);
10752 +               spin_lock(&dcache_lock);
10753 +               list_for_each_entry(d, &inode->i_dentry, d_alias) {
10754 +                       dname = &d->d_name;
10755 +                       if (dname->len != nlen
10756 +                           && memcmp(dname->name, name, nlen))
10757 +                               continue;
10758 +                       err = 0;
10759 +                       spin_lock(&d->d_lock);
10760 +                       __d_drop(d);
10761 +                       au_digen_dec(d);
10762 +                       spin_unlock(&d->d_lock);
10763 +                       break;
10764 +               }
10765 +               spin_unlock(&dcache_lock);
10766 +       } else {
10767 +               au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIRS);
10768 +               d = d_find_alias(inode);
10769 +               if (!d) {
10770 +                       au_iigen_dec(inode);
10771 +                       goto out;
10772 +               }
10773 +
10774 +               dname = &d->d_name;
10775 +               if (dname->len == nlen && !memcmp(dname->name, name, nlen))
10776 +                       err = hin_gen_tree(d);
10777 +               dput(d);
10778 +       }
10779 +
10780 + out:
10781 +       AuTraceErr(err);
10782 +       return err;
10783 +}
10784 +
10785 +static int hin_gen_by_name(struct dentry *dentry, const unsigned int isdir)
10786 +{
10787 +       int err;
10788 +       struct inode *inode;
10789 +
10790 +       inode = dentry->d_inode;
10791 +       if (IS_ROOT(dentry)
10792 +           /* || (inode && inode->i_ino == AUFS_ROOT_INO) */
10793 +               ) {
10794 +               AuWarn("branch root dir was changed\n");
10795 +               return 0;
10796 +       }
10797 +
10798 +       err = 0;
10799 +       if (!isdir) {
10800 +               d_drop(dentry);
10801 +               au_digen_dec(dentry);
10802 +               if (inode)
10803 +                       au_iigen_dec(inode);
10804 +       } else {
10805 +               au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIRS);
10806 +               if (inode)
10807 +                       err = hin_gen_tree(dentry);
10808 +       }
10809 +
10810 +       AuTraceErr(err);
10811 +       return err;
10812 +}
10813 +
10814 +/* ---------------------------------------------------------------------- */
10815 +
10816 +/* hinotify job flags */
10817 +#define AuHinJob_XINO0         1
10818 +#define AuHinJob_GEN           (1 << 1)
10819 +#define AuHinJob_DIRENT                (1 << 2)
10820 +#define AuHinJob_ISDIR         (1 << 3)
10821 +#define AuHinJob_TRYXINO0      (1 << 4)
10822 +#define AuHinJob_MNTPNT                (1 << 5)
10823 +#define au_ftest_hinjob(flags, name)   ((flags) & AuHinJob_##name)
10824 +#define au_fset_hinjob(flags, name)    { (flags) |= AuHinJob_##name; }
10825 +#define au_fclr_hinjob(flags, name)    { (flags) &= ~AuHinJob_##name; }
10826 +
10827 +struct hin_job_args {
10828 +       unsigned int flags;
10829 +       struct inode *inode, *h_inode, *dir, *h_dir;
10830 +       struct dentry *dentry;
10831 +       char *h_name;
10832 +       int h_nlen;
10833 +};
10834 +
10835 +static int hin_job(struct hin_job_args *a)
10836 +{
10837 +       const unsigned int isdir = au_ftest_hinjob(a->flags, ISDIR);
10838 +
10839 +       /* reset xino */
10840 +       if (au_ftest_hinjob(a->flags, XINO0) && a->inode)
10841 +               hin_xino(a->inode, a->h_inode); /* ignore this error */
10842 +
10843 +       if (au_ftest_hinjob(a->flags, TRYXINO0)
10844 +           && a->inode
10845 +           && a->h_inode) {
10846 +               mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
10847 +               if (!a->h_inode->i_nlink)
10848 +                       hin_xino(a->inode, a->h_inode); /* ignore this error */
10849 +               mutex_unlock(&a->h_inode->i_mutex);
10850 +       }
10851 +
10852 +       /* make the generation obsolete */
10853 +       if (au_ftest_hinjob(a->flags, GEN)) {
10854 +               int err = -1;
10855 +               if (a->inode)
10856 +                       err = hin_gen_by_inode(a->h_name, a->h_nlen, a->inode,
10857 +                                              isdir);
10858 +               if (err && a->dentry)
10859 +                       hin_gen_by_name(a->dentry, isdir);
10860 +               /* ignore this error */
10861 +       }
10862 +
10863 +       /* make dir entries obsolete */
10864 +       if (au_ftest_hinjob(a->flags, DIRENT) && a->inode) {
10865 +               struct au_vdir *vdir;
10866 +
10867 +               vdir = au_ivdir(a->inode);
10868 +               if (vdir)
10869 +                       vdir->vd_jiffy = 0;
10870 +               /* IMustLock(a->inode); */
10871 +               /* a->inode->i_version++; */
10872 +       }
10873 +
10874 +       /* can do nothing but warn */
10875 +       if (au_ftest_hinjob(a->flags, MNTPNT)
10876 +           && a->dentry
10877 +           && d_mountpoint(a->dentry))
10878 +               AuWarn("mount-point %.*s is removed or renamed\n",
10879 +                      AuDLNPair(a->dentry));
10880 +
10881 +       return 0;
10882 +}
10883 +
10884 +/* ---------------------------------------------------------------------- */
10885 +
10886 +static char *in_name(u32 mask)
10887 +{
10888 +#ifdef CONFIG_AUFS_DEBUG
10889 +#define test_ret(flag) if (mask & flag) \
10890 +                               return #flag;
10891 +       test_ret(IN_ACCESS);
10892 +       test_ret(IN_MODIFY);
10893 +       test_ret(IN_ATTRIB);
10894 +       test_ret(IN_CLOSE_WRITE);
10895 +       test_ret(IN_CLOSE_NOWRITE);
10896 +       test_ret(IN_OPEN);
10897 +       test_ret(IN_MOVED_FROM);
10898 +       test_ret(IN_MOVED_TO);
10899 +       test_ret(IN_CREATE);
10900 +       test_ret(IN_DELETE);
10901 +       test_ret(IN_DELETE_SELF);
10902 +       test_ret(IN_MOVE_SELF);
10903 +       test_ret(IN_UNMOUNT);
10904 +       test_ret(IN_Q_OVERFLOW);
10905 +       test_ret(IN_IGNORED);
10906 +       return "";
10907 +#undef test_ret
10908 +#else
10909 +       return "??";
10910 +#endif
10911 +}
10912 +
10913 +static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
10914 +                                          struct inode *dir)
10915 +{
10916 +       struct dentry *dentry, *d, *parent;
10917 +       struct qstr *dname;
10918 +
10919 +       parent = d_find_alias(dir);
10920 +       if (!parent)
10921 +               return NULL;
10922 +
10923 +       dentry = NULL;
10924 +       spin_lock(&dcache_lock);
10925 +       list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) {
10926 +               /* AuDbg("%.*s\n", AuDLNPair(d)); */
10927 +               dname = &d->d_name;
10928 +               if (dname->len != nlen || memcmp(dname->name, name, nlen))
10929 +                       continue;
10930 +               if (!atomic_read(&d->d_count) || !d->d_fsdata) {
10931 +                       spin_lock(&d->d_lock);
10932 +                       __d_drop(d);
10933 +                       spin_unlock(&d->d_lock);
10934 +                       continue;
10935 +               }
10936 +
10937 +               dentry = dget(d);
10938 +               break;
10939 +       }
10940 +       spin_unlock(&dcache_lock);
10941 +       dput(parent);
10942 +
10943 +       if (dentry)
10944 +               di_write_lock_child(dentry);
10945 +
10946 +       return dentry;
10947 +}
10948 +
10949 +static struct inode *lookup_wlock_by_ino(struct super_block *sb,
10950 +                                        aufs_bindex_t bindex, ino_t h_ino)
10951 +{
10952 +       struct inode *inode;
10953 +       ino_t ino;
10954 +       int err;
10955 +
10956 +       inode = NULL;
10957 +       err = au_xino_read(sb, bindex, h_ino, &ino);
10958 +       if (!err && ino)
10959 +               inode = ilookup(sb, ino);
10960 +       if (!inode)
10961 +               goto out;
10962 +
10963 +       if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
10964 +               AuWarn("wrong root branch\n");
10965 +               iput(inode);
10966 +               inode = NULL;
10967 +               goto out;
10968 +       }
10969 +
10970 +       ii_write_lock_child(inode);
10971 +
10972 + out:
10973 +       return inode;
10974 +}
10975 +
10976 +enum { CHILD, PARENT };
10977 +struct postproc_args {
10978 +       struct inode *h_dir, *dir, *h_child_inode;
10979 +       u32 mask;
10980 +       unsigned int flags[2];
10981 +       unsigned int h_child_nlen;
10982 +       char h_child_name[];
10983 +};
10984 +
10985 +static void postproc(void *_args)
10986 +{
10987 +       struct postproc_args *a = _args;
10988 +       struct super_block *sb;
10989 +       aufs_bindex_t bindex, bend, bfound;
10990 +       unsigned char xino, try_iput;
10991 +       int err;
10992 +       struct inode *inode;
10993 +       ino_t h_ino;
10994 +       struct hin_job_args args;
10995 +       struct dentry *dentry;
10996 +       struct au_sbinfo *sbinfo;
10997 +
10998 +       AuDebugOn(!_args);
10999 +       AuDebugOn(!a->h_dir);
11000 +       AuDebugOn(!a->dir);
11001 +       AuDebugOn(!a->mask);
11002 +       AuDbg("mask 0x%x %s, i%lu, hi%lu, hci%lu\n",
11003 +             a->mask, in_name(a->mask), a->dir->i_ino, a->h_dir->i_ino,
11004 +             a->h_child_inode ? a->h_child_inode->i_ino : 0);
11005 +
11006 +       inode = NULL;
11007 +       dentry = NULL;
11008 +       /*
11009 +        * do not lock a->dir->i_mutex here
11010 +        * because of d_revalidate() may cause a deadlock.
11011 +        */
11012 +       sb = a->dir->i_sb;
11013 +       AuDebugOn(!sb);
11014 +       sbinfo = au_sbi(sb);
11015 +       AuDebugOn(!sbinfo);
11016 +       /* big aufs lock */
11017 +       si_noflush_write_lock(sb);
11018 +
11019 +       ii_read_lock_parent(a->dir);
11020 +       bfound = -1;
11021 +       bend = au_ibend(a->dir);
11022 +       for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
11023 +               if (au_h_iptr(a->dir, bindex) == a->h_dir) {
11024 +                       bfound = bindex;
11025 +                       break;
11026 +               }
11027 +       ii_read_unlock(a->dir);
11028 +       if (unlikely(bfound < 0))
11029 +               goto out;
11030 +
11031 +       xino = !!au_opt_test(au_mntflags(sb), XINO);
11032 +       h_ino = 0;
11033 +       if (a->h_child_inode)
11034 +               h_ino = a->h_child_inode->i_ino;
11035 +
11036 +       if (a->h_child_nlen
11037 +           && (au_ftest_hinjob(a->flags[CHILD], GEN)
11038 +               || au_ftest_hinjob(a->flags[CHILD], MNTPNT)))
11039 +               dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
11040 +                                             a->dir);
11041 +       try_iput = 0;
11042 +       if (dentry)
11043 +               inode = dentry->d_inode;
11044 +       if (xino && !inode && h_ino
11045 +           && (au_ftest_hinjob(a->flags[CHILD], XINO0)
11046 +               || au_ftest_hinjob(a->flags[CHILD], TRYXINO0)
11047 +               || au_ftest_hinjob(a->flags[CHILD], GEN))) {
11048 +               inode = lookup_wlock_by_ino(sb, bfound, h_ino);
11049 +               try_iput = 1;
11050 +           }
11051 +
11052 +       args.flags = a->flags[CHILD];
11053 +       args.dentry = dentry;
11054 +       args.inode = inode;
11055 +       args.h_inode = a->h_child_inode;
11056 +       args.dir = a->dir;
11057 +       args.h_dir = a->h_dir;
11058 +       args.h_name = a->h_child_name;
11059 +       args.h_nlen = a->h_child_nlen;
11060 +       err = hin_job(&args);
11061 +       if (dentry) {
11062 +               if (dentry->d_fsdata)
11063 +                       di_write_unlock(dentry);
11064 +               dput(dentry);
11065 +       }
11066 +       if (inode && try_iput) {
11067 +               ii_write_unlock(inode);
11068 +               iput(inode);
11069 +       }
11070 +
11071 +       ii_write_lock_parent(a->dir);
11072 +       args.flags = a->flags[PARENT];
11073 +       args.dentry = NULL;
11074 +       args.inode = a->dir;
11075 +       args.h_inode = a->h_dir;
11076 +       args.dir = NULL;
11077 +       args.h_dir = NULL;
11078 +       args.h_name = NULL;
11079 +       args.h_nlen = 0;
11080 +       err = hin_job(&args);
11081 +       ii_write_unlock(a->dir);
11082 +
11083 + out:
11084 +       au_nwt_done(&sbinfo->si_nowait);
11085 +       si_write_unlock(sb);
11086 +
11087 +       iput(a->h_child_inode);
11088 +       iput(a->h_dir);
11089 +       iput(a->dir);
11090 +       kfree(a);
11091 +}
11092 +
11093 +/* ---------------------------------------------------------------------- */
11094 +
11095 +static void aufs_inotify(struct inotify_watch *watch, u32 wd __maybe_unused,
11096 +                        u32 mask, u32 cookie __maybe_unused,
11097 +                        const char *h_child_name, struct inode *h_child_inode)
11098 +{
11099 +       struct au_hinotify *hinotify;
11100 +       struct postproc_args *args;
11101 +       int len, wkq_err;
11102 +       unsigned char isdir, isroot, wh;
11103 +       char *p;
11104 +       struct inode *dir;
11105 +       unsigned int flags[2];
11106 +
11107 +       /* if IN_UNMOUNT happens, there must be another bug */
11108 +       AuDebugOn(mask & IN_UNMOUNT);
11109 +       if (mask & (IN_IGNORED | IN_UNMOUNT)) {
11110 +               put_inotify_watch(watch);
11111 +               return;
11112 +       }
11113 +#ifdef AuDbgHinotify
11114 +       au_debug(1);
11115 +       if (1 || !h_child_name || strcmp(h_child_name, AUFS_XINO_FNAME)) {
11116 +               AuDbg("i%lu, wd %d, mask 0x%x %s, cookie 0x%x, hcname %s,"
11117 +                     " hi%lu\n",
11118 +                     watch->inode->i_ino, wd, mask, in_name(mask), cookie,
11119 +                     h_child_name ? h_child_name : "",
11120 +                     h_child_inode ? h_child_inode->i_ino : 0);
11121 +               WARN_ON(1);
11122 +       }
11123 +       au_debug(0);
11124 +#endif
11125 +
11126 +       hinotify = container_of(watch, struct au_hinotify, hin_watch);
11127 +       AuDebugOn(!hinotify || !hinotify->hin_aufs_inode);
11128 +       dir = igrab(hinotify->hin_aufs_inode);
11129 +       if (!dir)
11130 +               return;
11131 +
11132 +       isroot = (dir->i_ino == AUFS_ROOT_INO);
11133 +       len = 0;
11134 +       wh = 0;
11135 +       if (h_child_name) {
11136 +               len = strlen(h_child_name);
11137 +               if (!memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
11138 +                       h_child_name += AUFS_WH_PFX_LEN;
11139 +                       len -= AUFS_WH_PFX_LEN;
11140 +                       wh = 1;
11141 +               }
11142 +       }
11143 +
11144 +       isdir = 0;
11145 +       if (h_child_inode)
11146 +               isdir = !!S_ISDIR(h_child_inode->i_mode);
11147 +       flags[PARENT] = AuHinJob_ISDIR;
11148 +       flags[CHILD] = 0;
11149 +       if (isdir)
11150 +               flags[CHILD] = AuHinJob_ISDIR;
11151 +       switch (mask & IN_ALL_EVENTS) {
11152 +       case IN_MOVED_FROM:
11153 +       case IN_MOVED_TO:
11154 +               AuDebugOn(!h_child_name || !h_child_inode);
11155 +               au_fset_hinjob(flags[CHILD], GEN);
11156 +               au_fset_hinjob(flags[CHILD], XINO0);
11157 +               au_fset_hinjob(flags[CHILD], MNTPNT);
11158 +               au_fset_hinjob(flags[PARENT], DIRENT);
11159 +               break;
11160 +
11161 +       case IN_CREATE:
11162 +               AuDebugOn(!h_child_name || !h_child_inode);
11163 +               au_fset_hinjob(flags[PARENT], DIRENT);
11164 +               au_fset_hinjob(flags[CHILD], GEN);
11165 +               break;
11166 +
11167 +       case IN_DELETE:
11168 +               /*
11169 +                * aufs never be able to get this child inode.
11170 +                * revalidation should be in d_revalidate()
11171 +                * by checking i_nlink, i_generation or d_unhashed().
11172 +                */
11173 +               AuDebugOn(!h_child_name);
11174 +               au_fset_hinjob(flags[PARENT], DIRENT);
11175 +               au_fset_hinjob(flags[CHILD], GEN);
11176 +               au_fset_hinjob(flags[CHILD], TRYXINO0);
11177 +               au_fset_hinjob(flags[CHILD], MNTPNT);
11178 +               break;
11179 +
11180 +       default:
11181 +               AuDebugOn(1);
11182 +       }
11183 +
11184 +       if (wh)
11185 +               h_child_inode = NULL;
11186 +
11187 +       /* iput() and kfree() will be called in postproc() */
11188 +       /*
11189 +        * inotify_mutex is already acquired and kmalloc/prune_icache may lock
11190 +        * iprune_mutex. strange.
11191 +        */
11192 +       lockdep_off();
11193 +       args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
11194 +       lockdep_on();
11195 +       if (unlikely(!args)) {
11196 +               AuErr1("no memory\n");
11197 +               iput(dir);
11198 +               return;
11199 +       }
11200 +       args->flags[PARENT] = flags[PARENT];
11201 +       args->flags[CHILD] = flags[CHILD];
11202 +       args->mask = mask;
11203 +       args->dir = dir;
11204 +       args->h_dir = igrab(watch->inode);
11205 +       if (h_child_inode)
11206 +               h_child_inode = igrab(h_child_inode); /* can be NULL */
11207 +       args->h_child_inode = h_child_inode;
11208 +       args->h_child_nlen = len;
11209 +       if (len) {
11210 +               p = (void *)args;
11211 +               p += sizeof(*args);
11212 +               memcpy(p, h_child_name, len + 1);
11213 +       }
11214 +
11215 +       lockdep_off();
11216 +       wkq_err = au_wkq_nowait(postproc, args, dir->i_sb);
11217 +       lockdep_on();
11218 +       if (unlikely(wkq_err))
11219 +               AuErr("wkq %d\n", wkq_err);
11220 +}
11221 +
11222 +static void aufs_inotify_destroy(struct inotify_watch *watch __maybe_unused)
11223 +{
11224 +       return;
11225 +}
11226 +
11227 +static struct inotify_operations aufs_inotify_ops = {
11228 +       .handle_event   = aufs_inotify,
11229 +       .destroy_watch  = aufs_inotify_destroy
11230 +};
11231 +
11232 +/* ---------------------------------------------------------------------- */
11233 +
11234 +static void au_hin_destroy_cache(void)
11235 +{
11236 +       kmem_cache_destroy(au_cachep[AuCache_HINOTIFY]);
11237 +       au_cachep[AuCache_HINOTIFY] = NULL;
11238 +}
11239 +
11240 +int __init au_hinotify_init(void)
11241 +{
11242 +       int err;
11243 +
11244 +       err = -ENOMEM;
11245 +       au_cachep[AuCache_HINOTIFY] = AuCache(au_hinotify);
11246 +       if (au_cachep[AuCache_HINOTIFY]) {
11247 +               err = 0;
11248 +               au_hin_handle = inotify_init(&aufs_inotify_ops);
11249 +               if (IS_ERR(au_hin_handle)) {
11250 +                       err = PTR_ERR(au_hin_handle);
11251 +                       au_hin_destroy_cache();
11252 +               }
11253 +       }
11254 +       AuTraceErr(err);
11255 +       return err;
11256 +}
11257 +
11258 +void au_hinotify_fin(void)
11259 +{
11260 +       inotify_destroy(au_hin_handle);
11261 +       if (au_cachep[AuCache_HINOTIFY])
11262 +               au_hin_destroy_cache();
11263 +}
11264 diff -uprN -x .git linux-2.6.31/fs/aufs/i_op.c aufs2-2.6.git/fs/aufs/i_op.c
11265 --- linux-2.6.31/fs/aufs/i_op.c 1970-01-01 00:00:00.000000000 +0000
11266 +++ aufs2-2.6.git/fs/aufs/i_op.c        2009-09-21 21:49:23.401607657 +0000
11267 @@ -0,0 +1,883 @@
11268 +/*
11269 + * Copyright (C) 2005-2009 Junjiro R. Okajima
11270 + *
11271 + * This program, aufs is free software; you can redistribute it and/or modify
11272 + * it under the terms of the GNU General Public License as published by
11273 + * the Free Software Foundation; either version 2 of the License, or
11274 + * (at your option) any later version.
11275 + *
11276 + * This program is distributed in the hope that it will be useful,
11277 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
11278 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11279 + * GNU General Public License for more details.
11280 + *
11281 + * You should have received a copy of the GNU General Public License
11282 + * along with this program; if not, write to the Free Software
11283 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
11284 + */
11285 +
11286 +/*
11287 + * inode operations (except add/del/rename)
11288 + */
11289 +
11290 +#include <linux/device_cgroup.h>
11291 +#include <linux/fs_stack.h>
11292 +#include <linux/mm.h>
11293 +#include <linux/namei.h>
11294 +#include <linux/security.h>
11295 +#include <linux/uaccess.h>
11296 +#include "aufs.h"
11297 +
11298 +static int h_permission(struct inode *h_inode, int mask,
11299 +                       struct vfsmount *h_mnt, int brperm)
11300 +{
11301 +       int err;
11302 +       const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
11303 +
11304 +       err = -EACCES;
11305 +       if ((write_mask && IS_IMMUTABLE(h_inode))
11306 +           || ((mask & MAY_EXEC)
11307 +               && S_ISREG(h_inode->i_mode)
11308 +               && ((h_mnt->mnt_flags & MNT_NOEXEC)
11309 +                   || !(h_inode->i_mode & S_IXUGO))))
11310 +               goto out;
11311 +
11312 +       /*
11313 +        * - skip the lower fs test in the case of write to ro branch.
11314 +        * - nfs dir permission write check is optimized, but a policy for
11315 +        *   link/rename requires a real check.
11316 +        */
11317 +       if ((write_mask && !au_br_writable(brperm))
11318 +           || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
11319 +               && write_mask && !(mask & MAY_READ))
11320 +           || !h_inode->i_op->permission) {
11321 +               /* AuLabel(generic_permission); */
11322 +               err = generic_permission(h_inode, mask, NULL);
11323 +       } else {
11324 +               /* AuLabel(h_inode->permission); */
11325 +               err = h_inode->i_op->permission(h_inode, mask);
11326 +               AuTraceErr(err);
11327 +       }
11328 +
11329 +       if (!err)
11330 +               err = devcgroup_inode_permission(h_inode, mask);
11331 +       if (!err)
11332 +               err = security_inode_permission
11333 +                       (h_inode, mask & (MAY_READ | MAY_WRITE | MAY_EXEC
11334 +                                         | MAY_APPEND));
11335 +
11336 +#if 0
11337 +       if (!err) {
11338 +               /* todo: do we need to call ima_path_check()? */
11339 +               struct path h_path = {
11340 +                       .dentry =
11341 +                       .mnt    = h_mnt
11342 +               };
11343 +               err = ima_path_check(&h_path,
11344 +                                    mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
11345 +                                    IMA_COUNT_LEAVE);
11346 +       }
11347 +#endif
11348 +
11349 + out:
11350 +       return err;
11351 +}
11352 +
11353 +static int aufs_permission(struct inode *inode, int mask)
11354 +{
11355 +       int err;
11356 +       aufs_bindex_t bindex, bend;
11357 +       const unsigned char isdir = !!S_ISDIR(inode->i_mode);
11358 +       const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
11359 +       struct inode *h_inode;
11360 +       struct super_block *sb;
11361 +       struct au_branch *br;
11362 +
11363 +       sb = inode->i_sb;
11364 +       si_read_lock(sb, AuLock_FLUSH);
11365 +       ii_read_lock_child(inode);
11366 +
11367 +       if (!isdir || write_mask) {
11368 +               err = au_busy_or_stale();
11369 +               h_inode = au_h_iptr(inode, au_ibstart(inode));
11370 +               if (unlikely(!h_inode
11371 +                            || (h_inode->i_mode & S_IFMT)
11372 +                            != (inode->i_mode & S_IFMT)))
11373 +                       goto out;
11374 +
11375 +               err = 0;
11376 +               bindex = au_ibstart(inode);
11377 +               br = au_sbr(sb, bindex);
11378 +               err = h_permission(h_inode, mask, br->br_mnt, br->br_perm);
11379 +               if (write_mask && !err) {
11380 +                       /* test whether the upper writable branch exists */
11381 +                       err = -EROFS;
11382 +                       for (; bindex >= 0; bindex--)
11383 +                               if (!au_br_rdonly(au_sbr(sb, bindex))) {
11384 +                                       err = 0;
11385 +                                       break;
11386 +                               }
11387 +               }
11388 +               goto out;
11389 +       }
11390 +
11391 +       /* non-write to dir */
11392 +       err = 0;
11393 +       bend = au_ibend(inode);
11394 +       for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
11395 +               h_inode = au_h_iptr(inode, bindex);
11396 +               if (h_inode) {
11397 +                       err = au_busy_or_stale();
11398 +                       if (unlikely(!S_ISDIR(h_inode->i_mode)))
11399 +                               break;
11400 +
11401 +                       br = au_sbr(sb, bindex);
11402 +                       err = h_permission(h_inode, mask, br->br_mnt,
11403 +                                          br->br_perm);
11404 +               }
11405 +       }
11406 +
11407 + out:
11408 +       ii_read_unlock(inode);
11409 +       si_read_unlock(sb);
11410 +       return err;
11411 +}
11412 +
11413 +/* ---------------------------------------------------------------------- */
11414 +
11415 +static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
11416 +                                 struct nameidata *nd)
11417 +{
11418 +       struct dentry *ret, *parent;
11419 +       struct inode *inode, *h_inode;
11420 +       struct mutex *mtx;
11421 +       struct super_block *sb;
11422 +       int err, npositive;
11423 +       aufs_bindex_t bstart;
11424 +
11425 +       IMustLock(dir);
11426 +
11427 +       sb = dir->i_sb;
11428 +       si_read_lock(sb, AuLock_FLUSH);
11429 +       err = au_alloc_dinfo(dentry);
11430 +       ret = ERR_PTR(err);
11431 +       if (unlikely(err))
11432 +               goto out;
11433 +
11434 +       parent = dentry->d_parent; /* dir inode is locked */
11435 +       di_read_lock_parent(parent, AuLock_IR);
11436 +       npositive = au_lkup_dentry(dentry, au_dbstart(parent), /*type*/0, nd);
11437 +       di_read_unlock(parent, AuLock_IR);
11438 +       err = npositive;
11439 +       ret = ERR_PTR(err);
11440 +       if (unlikely(err < 0))
11441 +               goto out_unlock;
11442 +
11443 +       inode = NULL;
11444 +       if (npositive) {
11445 +               bstart = au_dbstart(dentry);
11446 +               h_inode = au_h_dptr(dentry, bstart)->d_inode;
11447 +               if (!S_ISDIR(h_inode->i_mode)) {
11448 +                       /*
11449 +                        * stop 'race'-ing between hardlinks under different
11450 +                        * parents.
11451 +                        */
11452 +                       mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
11453 +                       mutex_lock(mtx);
11454 +                       inode = au_new_inode(dentry, /*must_new*/0);
11455 +                       mutex_unlock(mtx);
11456 +               } else
11457 +                       inode = au_new_inode(dentry, /*must_new*/0);
11458 +               ret = (void *)inode;
11459 +       }
11460 +       if (IS_ERR(inode))
11461 +               goto out_unlock;
11462 +
11463 +       ret = d_splice_alias(inode, dentry);
11464 +       if (unlikely(IS_ERR(ret) && inode))
11465 +               ii_write_unlock(inode);
11466 +
11467 + out_unlock:
11468 +       di_write_unlock(dentry);
11469 + out:
11470 +       si_read_unlock(sb);
11471 +       return ret;
11472 +}
11473 +
11474 +/* ---------------------------------------------------------------------- */
11475 +
11476 +static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
11477 +                         const unsigned char add_entry, aufs_bindex_t bcpup,
11478 +                         aufs_bindex_t bstart)
11479 +{
11480 +       int err;
11481 +       struct dentry *h_parent;
11482 +       struct inode *h_dir;
11483 +
11484 +       if (add_entry) {
11485 +               au_update_dbstart(dentry);
11486 +               IMustLock(parent->d_inode);
11487 +       } else
11488 +               di_write_lock_parent(parent);
11489 +
11490 +       err = 0;
11491 +       if (!au_h_dptr(parent, bcpup)) {
11492 +               if (bstart < bcpup)
11493 +                       err = au_cpdown_dirs(dentry, bcpup);
11494 +               else
11495 +                       err = au_cpup_dirs(dentry, bcpup);
11496 +       }
11497 +       if (!err && add_entry) {
11498 +               h_parent = au_h_dptr(parent, bcpup);
11499 +               h_dir = h_parent->d_inode;
11500 +               mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
11501 +               err = au_lkup_neg(dentry, bcpup);
11502 +               /* todo: no unlock here */
11503 +               mutex_unlock(&h_dir->i_mutex);
11504 +               if (bstart < bcpup && au_dbstart(dentry) < 0) {
11505 +                       au_set_dbstart(dentry, 0);
11506 +                       au_update_dbrange(dentry, /*do_put_zero*/0);
11507 +               }
11508 +       }
11509 +
11510 +       if (!add_entry)
11511 +               di_write_unlock(parent);
11512 +       if (!err)
11513 +               err = bcpup; /* success */
11514 +
11515 +       return err;
11516 +}
11517 +
11518 +/*
11519 + * decide the branch and the parent dir where we will create a new entry.
11520 + * returns new bindex or an error.
11521 + * copyup the parent dir if needed.
11522 + */
11523 +int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
11524 +             struct au_wr_dir_args *args)
11525 +{
11526 +       int err;
11527 +       aufs_bindex_t bcpup, bstart, src_bstart;
11528 +       const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
11529 +                                                        ADD_ENTRY);
11530 +       struct super_block *sb;
11531 +       struct dentry *parent;
11532 +       struct au_sbinfo *sbinfo;
11533 +
11534 +       sb = dentry->d_sb;
11535 +       sbinfo = au_sbi(sb);
11536 +       parent = dget_parent(dentry);
11537 +       bstart = au_dbstart(dentry);
11538 +       bcpup = bstart;
11539 +       if (args->force_btgt < 0) {
11540 +               if (src_dentry) {
11541 +                       src_bstart = au_dbstart(src_dentry);
11542 +                       if (src_bstart < bstart)
11543 +                               bcpup = src_bstart;
11544 +               } else if (add_entry) {
11545 +                       err = AuWbrCreate(sbinfo, dentry,
11546 +                                         au_ftest_wrdir(args->flags, ISDIR));
11547 +                       bcpup = err;
11548 +               }
11549 +
11550 +               if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
11551 +                       if (add_entry)
11552 +                               err = AuWbrCopyup(sbinfo, dentry);
11553 +                       else {
11554 +                               if (!IS_ROOT(dentry)) {
11555 +                                       di_read_lock_parent(parent, !AuLock_IR);
11556 +                                       err = AuWbrCopyup(sbinfo, dentry);
11557 +                                       di_read_unlock(parent, !AuLock_IR);
11558 +                               } else
11559 +                                       err = AuWbrCopyup(sbinfo, dentry);
11560 +                       }
11561 +                       bcpup = err;
11562 +                       if (unlikely(err < 0))
11563 +                               goto out;
11564 +               }
11565 +       } else {
11566 +               bcpup = args->force_btgt;
11567 +               AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
11568 +       }
11569 +       AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
11570 +       if (bstart < bcpup)
11571 +               au_update_dbrange(dentry, /*do_put_zero*/1);
11572 +
11573 +       err = bcpup;
11574 +       if (bcpup == bstart)
11575 +               goto out; /* success */
11576 +
11577 +       /* copyup the new parent into the branch we process */
11578 +       err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
11579 +
11580 + out:
11581 +       dput(parent);
11582 +       return err;
11583 +}
11584 +
11585 +/* ---------------------------------------------------------------------- */
11586 +
11587 +struct dentry *au_pinned_h_parent(struct au_pin *pin)
11588 +{
11589 +       if (pin && pin->parent)
11590 +               return au_h_dptr(pin->parent, pin->bindex);
11591 +       return NULL;
11592 +}
11593 +
11594 +void au_unpin(struct au_pin *p)
11595 +{
11596 +       if (au_ftest_pin(p->flags, MNT_WRITE))
11597 +               mnt_drop_write(p->h_mnt);
11598 +       if (!p->hdir)
11599 +               return;
11600 +
11601 +       au_hin_imtx_unlock(p->hdir);
11602 +       if (!au_ftest_pin(p->flags, DI_LOCKED))
11603 +               di_read_unlock(p->parent, AuLock_IR);
11604 +       iput(p->hdir->hi_inode);
11605 +       dput(p->parent);
11606 +       p->parent = NULL;
11607 +       p->hdir = NULL;
11608 +       p->h_mnt = NULL;
11609 +}
11610 +
11611 +int au_do_pin(struct au_pin *p)
11612 +{
11613 +       int err;
11614 +       struct super_block *sb;
11615 +       struct dentry *h_dentry, *h_parent;
11616 +       struct au_branch *br;
11617 +       struct inode *h_dir;
11618 +
11619 +       err = 0;
11620 +       sb = p->dentry->d_sb;
11621 +       br = au_sbr(sb, p->bindex);
11622 +       if (IS_ROOT(p->dentry)) {
11623 +               if (au_ftest_pin(p->flags, MNT_WRITE)) {
11624 +                       p->h_mnt = br->br_mnt;
11625 +                       err = mnt_want_write(p->h_mnt);
11626 +                       if (unlikely(err)) {
11627 +                               au_fclr_pin(p->flags, MNT_WRITE);
11628 +                               goto out_err;
11629 +                       }
11630 +               }
11631 +               goto out;
11632 +       }
11633 +
11634 +       h_dentry = NULL;
11635 +       if (p->bindex <= au_dbend(p->dentry))
11636 +               h_dentry = au_h_dptr(p->dentry, p->bindex);
11637 +
11638 +       p->parent = dget_parent(p->dentry);
11639 +       if (!au_ftest_pin(p->flags, DI_LOCKED))
11640 +               di_read_lock(p->parent, AuLock_IR, p->lsc_di);
11641 +
11642 +       h_dir = NULL;
11643 +       h_parent = au_h_dptr(p->parent, p->bindex);
11644 +       p->hdir = au_hi(p->parent->d_inode, p->bindex);
11645 +       if (p->hdir)
11646 +               h_dir = p->hdir->hi_inode;
11647 +
11648 +       /* udba case */
11649 +       if (unlikely(!p->hdir || !h_dir)) {
11650 +               if (!au_ftest_pin(p->flags, DI_LOCKED))
11651 +                       di_read_unlock(p->parent, AuLock_IR);
11652 +               dput(p->parent);
11653 +               p->parent = NULL;
11654 +               goto out_err;
11655 +       }
11656 +
11657 +       au_igrab(h_dir);
11658 +       au_hin_imtx_lock_nested(p->hdir, p->lsc_hi);
11659 +
11660 +       if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) {
11661 +               err = -EBUSY;
11662 +               goto out_unpin;
11663 +       }
11664 +       if (h_dentry) {
11665 +               err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
11666 +               if (unlikely(err)) {
11667 +                       au_fclr_pin(p->flags, MNT_WRITE);
11668 +                       goto out_unpin;
11669 +               }
11670 +       }
11671 +
11672 +       if (au_ftest_pin(p->flags, MNT_WRITE)) {
11673 +               p->h_mnt = br->br_mnt;
11674 +               err = mnt_want_write(p->h_mnt);
11675 +               if (unlikely(err)) {
11676 +                       au_fclr_pin(p->flags, MNT_WRITE);
11677 +                       goto out_unpin;
11678 +               }
11679 +       }
11680 +       goto out; /* success */
11681 +
11682 + out_unpin:
11683 +       au_unpin(p);
11684 + out_err:
11685 +       AuErr("err %d\n", err);
11686 +       err = au_busy_or_stale();
11687 + out:
11688 +       return err;
11689 +}
11690 +
11691 +void au_pin_init(struct au_pin *p, struct dentry *dentry,
11692 +                aufs_bindex_t bindex, int lsc_di, int lsc_hi,
11693 +                unsigned int udba, unsigned char flags)
11694 +{
11695 +       p->dentry = dentry;
11696 +       p->udba = udba;
11697 +       p->lsc_di = lsc_di;
11698 +       p->lsc_hi = lsc_hi;
11699 +       p->flags = flags;
11700 +       p->bindex = bindex;
11701 +
11702 +       p->parent = NULL;
11703 +       p->hdir = NULL;
11704 +       p->h_mnt = NULL;
11705 +}
11706 +
11707 +int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
11708 +          unsigned int udba, unsigned char flags)
11709 +{
11710 +       au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
11711 +                   udba, flags);
11712 +       return au_do_pin(pin);
11713 +}
11714 +
11715 +/* ---------------------------------------------------------------------- */
11716 +
11717 +#define AuIcpup_DID_CPUP       1
11718 +#define au_ftest_icpup(flags, name)    ((flags) & AuIcpup_##name)
11719 +#define au_fset_icpup(flags, name)     { (flags) |= AuIcpup_##name; }
11720 +#define au_fclr_icpup(flags, name)     { (flags) &= ~AuIcpup_##name; }
11721 +
11722 +struct au_icpup_args {
11723 +       unsigned char flags;
11724 +       unsigned char pin_flags;
11725 +       aufs_bindex_t btgt;
11726 +       struct au_pin pin;
11727 +       struct path h_path;
11728 +       struct inode *h_inode;
11729 +};
11730 +
11731 +static int au_lock_and_icpup(struct dentry *dentry, struct iattr *ia,
11732 +                            struct au_icpup_args *a)
11733 +{
11734 +       int err;
11735 +       unsigned int udba;
11736 +       loff_t sz;
11737 +       aufs_bindex_t bstart;
11738 +       struct dentry *hi_wh, *parent;
11739 +       struct inode *inode;
11740 +       struct au_wr_dir_args wr_dir_args = {
11741 +               .force_btgt     = -1,
11742 +               .flags          = 0
11743 +       };
11744 +
11745 +       di_write_lock_child(dentry);
11746 +       bstart = au_dbstart(dentry);
11747 +       inode = dentry->d_inode;
11748 +       if (S_ISDIR(inode->i_mode))
11749 +               au_fset_wrdir(wr_dir_args.flags, ISDIR);
11750 +       /* plink or hi_wh() case */
11751 +       if (bstart != au_ibstart(inode))
11752 +               wr_dir_args.force_btgt = au_ibstart(inode);
11753 +       err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
11754 +       if (unlikely(err < 0))
11755 +               goto out_dentry;
11756 +       a->btgt = err;
11757 +       if (err != bstart)
11758 +               au_fset_icpup(a->flags, DID_CPUP);
11759 +
11760 +       err = 0;
11761 +       a->pin_flags = AuPin_MNT_WRITE;
11762 +       parent = NULL;
11763 +       if (!IS_ROOT(dentry)) {
11764 +               au_fset_pin(a->pin_flags, DI_LOCKED);
11765 +               parent = dget_parent(dentry);
11766 +               di_write_lock_parent(parent);
11767 +       }
11768 +
11769 +       udba = au_opt_udba(dentry->d_sb);
11770 +       if (d_unhashed(dentry) || (ia->ia_valid & ATTR_FILE))
11771 +               udba = AuOpt_UDBA_NONE;
11772 +       err = au_pin(&a->pin, dentry, a->btgt, udba, a->pin_flags);
11773 +       if (unlikely(err)) {
11774 +               if (parent) {
11775 +                       di_write_unlock(parent);
11776 +                       dput(parent);
11777 +               }
11778 +               goto out_dentry;
11779 +       }
11780 +       a->h_path.dentry = au_h_dptr(dentry, bstart);
11781 +       a->h_inode = a->h_path.dentry->d_inode;
11782 +       mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
11783 +       sz = -1;
11784 +       if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
11785 +               sz = ia->ia_size;
11786 +
11787 +       hi_wh = NULL;
11788 +       if (au_ftest_icpup(a->flags, DID_CPUP) && d_unhashed(dentry)) {
11789 +               hi_wh = au_hi_wh(inode, a->btgt);
11790 +               if (!hi_wh) {
11791 +                       err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
11792 +                       if (unlikely(err))
11793 +                               goto out_unlock;
11794 +                       hi_wh = au_hi_wh(inode, a->btgt);
11795 +                       /* todo: revalidate hi_wh? */
11796 +               }
11797 +       }
11798 +
11799 +       if (parent) {
11800 +               au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
11801 +               di_downgrade_lock(parent, AuLock_IR);
11802 +               dput(parent);
11803 +       }
11804 +       if (!au_ftest_icpup(a->flags, DID_CPUP))
11805 +               goto out; /* success */
11806 +
11807 +       if (!d_unhashed(dentry)) {
11808 +               err = au_sio_cpup_simple(dentry, a->btgt, sz, AuCpup_DTIME);
11809 +               if (!err)
11810 +                       a->h_path.dentry = au_h_dptr(dentry, a->btgt);
11811 +       } else if (!hi_wh)
11812 +               a->h_path.dentry = au_h_dptr(dentry, a->btgt);
11813 +       else
11814 +               a->h_path.dentry = hi_wh; /* do not dget here */
11815 +
11816 + out_unlock:
11817 +       mutex_unlock(&a->h_inode->i_mutex);
11818 +       a->h_inode = a->h_path.dentry->d_inode;
11819 +       if (!err) {
11820 +               mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
11821 +               goto out; /* success */
11822 +       }
11823 +
11824 +       au_unpin(&a->pin);
11825 +
11826 + out_dentry:
11827 +       di_write_unlock(dentry);
11828 + out:
11829 +       return err;
11830 +}
11831 +
11832 +static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
11833 +{
11834 +       int err;
11835 +       struct inode *inode;
11836 +       struct super_block *sb;
11837 +       struct file *file;
11838 +       struct au_icpup_args *a;
11839 +
11840 +       err = -ENOMEM;
11841 +       a = kzalloc(sizeof(*a), GFP_NOFS);
11842 +       if (unlikely(!a))
11843 +               goto out;
11844 +
11845 +       inode = dentry->d_inode;
11846 +       IMustLock(inode);
11847 +       sb = dentry->d_sb;
11848 +       si_read_lock(sb, AuLock_FLUSH);
11849 +
11850 +       file = NULL;
11851 +       if (ia->ia_valid & ATTR_FILE) {
11852 +               /* currently ftruncate(2) only */
11853 +               file = ia->ia_file;
11854 +               fi_write_lock(file);
11855 +               ia->ia_file = au_h_fptr(file, au_fbstart(file));
11856 +       }
11857 +
11858 +       if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
11859 +               ia->ia_valid &= ~ATTR_MODE;
11860 +
11861 +       err = au_lock_and_icpup(dentry, ia, a);
11862 +       if (unlikely(err < 0))
11863 +               goto out_si;
11864 +       if (au_ftest_icpup(a->flags, DID_CPUP)) {
11865 +               ia->ia_file = NULL;
11866 +               ia->ia_valid &= ~ATTR_FILE;
11867 +       }
11868 +
11869 +       a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
11870 +       if (ia->ia_valid & ATTR_SIZE) {
11871 +               struct file *f;
11872 +
11873 +               if (ia->ia_size < i_size_read(inode)) {
11874 +                       /* unmap only */
11875 +                       err = vmtruncate(inode, ia->ia_size);
11876 +                       if (unlikely(err))
11877 +                               goto out_unlock;
11878 +               }
11879 +
11880 +               f = NULL;
11881 +               if (ia->ia_valid & ATTR_FILE)
11882 +                       f = ia->ia_file;
11883 +               mutex_unlock(&a->h_inode->i_mutex);
11884 +               err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
11885 +               mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
11886 +       } else
11887 +               err = vfsub_notify_change(&a->h_path, ia);
11888 +       if (!err)
11889 +               au_cpup_attr_changeable(inode);
11890 +
11891 + out_unlock:
11892 +       mutex_unlock(&a->h_inode->i_mutex);
11893 +       au_unpin(&a->pin);
11894 +       di_write_unlock(dentry);
11895 + out_si:
11896 +       if (file) {
11897 +               fi_write_unlock(file);
11898 +               ia->ia_file = file;
11899 +               ia->ia_valid |= ATTR_FILE;
11900 +       }
11901 +       si_read_unlock(sb);
11902 +       kfree(a);
11903 + out:
11904 +       return err;
11905 +}
11906 +
11907 +static int au_getattr_lock_reval(struct dentry *dentry, unsigned int sigen)
11908 +{
11909 +       int err;
11910 +       struct inode *inode;
11911 +       struct dentry *parent;
11912 +
11913 +       err = 0;
11914 +       inode = dentry->d_inode;
11915 +       di_write_lock_child(dentry);
11916 +       if (au_digen(dentry) != sigen || au_iigen(inode) != sigen) {
11917 +               parent = dget_parent(dentry);
11918 +               di_read_lock_parent(parent, AuLock_IR);
11919 +               /* returns a number of positive dentries */
11920 +               err = au_refresh_hdentry(dentry, inode->i_mode & S_IFMT);
11921 +               if (err >= 0)
11922 +                       err = au_refresh_hinode(inode, dentry);
11923 +               di_read_unlock(parent, AuLock_IR);
11924 +               dput(parent);
11925 +       }
11926 +       di_downgrade_lock(dentry, AuLock_IR);
11927 +       if (unlikely(err))
11928 +               di_read_unlock(dentry, AuLock_IR);
11929 +
11930 +       AuTraceErr(err);
11931 +       return err;
11932 +}
11933 +
11934 +static void au_refresh_iattr(struct inode *inode, struct kstat *st,
11935 +                            unsigned int nlink)
11936 +{
11937 +       inode->i_mode = st->mode;
11938 +       inode->i_uid = st->uid;
11939 +       inode->i_gid = st->gid;
11940 +       inode->i_atime = st->atime;
11941 +       inode->i_mtime = st->mtime;
11942 +       inode->i_ctime = st->ctime;
11943 +
11944 +       au_cpup_attr_nlink(inode, /*force*/0);
11945 +       if (S_ISDIR(inode->i_mode)) {
11946 +               inode->i_nlink -= nlink;
11947 +               inode->i_nlink += st->nlink;
11948 +       }
11949 +
11950 +       spin_lock(&inode->i_lock);
11951 +       inode->i_blocks = st->blocks;
11952 +       i_size_write(inode, st->size);
11953 +       spin_unlock(&inode->i_lock);
11954 +}
11955 +
11956 +static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
11957 +                       struct dentry *dentry, struct kstat *st)
11958 +{
11959 +       int err;
11960 +       unsigned int mnt_flags;
11961 +       aufs_bindex_t bindex;
11962 +       unsigned char udba_none, positive;
11963 +       struct super_block *sb, *h_sb;
11964 +       struct inode *inode;
11965 +       struct vfsmount *h_mnt;
11966 +       struct dentry *h_dentry;
11967 +
11968 +       err = 0;
11969 +       sb = dentry->d_sb;
11970 +       inode = dentry->d_inode;
11971 +       si_read_lock(sb, AuLock_FLUSH);
11972 +       mnt_flags = au_mntflags(sb);
11973 +       udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
11974 +
11975 +       /* support fstat(2) */
11976 +       if (!d_unhashed(dentry) && !udba_none) {
11977 +               unsigned int sigen = au_sigen(sb);
11978 +               if (au_digen(dentry) == sigen && au_iigen(inode) == sigen)
11979 +                       di_read_lock_child(dentry, AuLock_IR);
11980 +               else {
11981 +                       AuDebugOn(IS_ROOT(dentry));
11982 +                       err = au_getattr_lock_reval(dentry, sigen);
11983 +                       if (unlikely(err))
11984 +                               goto out;
11985 +               }
11986 +       } else
11987 +               di_read_lock_child(dentry, AuLock_IR);
11988 +
11989 +       bindex = au_ibstart(inode);
11990 +       h_mnt = au_sbr_mnt(sb, bindex);
11991 +       h_sb = h_mnt->mnt_sb;
11992 +       if (!au_test_fs_bad_iattr(h_sb) && udba_none)
11993 +               goto out_fill; /* success */
11994 +
11995 +       h_dentry = NULL;
11996 +       if (au_dbstart(dentry) == bindex)
11997 +               h_dentry = dget(au_h_dptr(dentry, bindex));
11998 +       else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
11999 +               h_dentry = au_plink_lkup(inode, bindex);
12000 +               if (IS_ERR(h_dentry))
12001 +                       goto out_fill; /* pretending success */
12002 +       }
12003 +       /* illegally overlapped or something */
12004 +       if (unlikely(!h_dentry))
12005 +               goto out_fill; /* pretending success */
12006 +
12007 +       positive = !!h_dentry->d_inode;
12008 +       if (positive)
12009 +               err = vfs_getattr(h_mnt, h_dentry, st);
12010 +       dput(h_dentry);
12011 +       if (!err) {
12012 +               if (positive)
12013 +                       au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
12014 +               goto out_fill; /* success */
12015 +       }
12016 +       goto out_unlock;
12017 +
12018 + out_fill:
12019 +       generic_fillattr(inode, st);
12020 + out_unlock:
12021 +       di_read_unlock(dentry, AuLock_IR);
12022 + out:
12023 +       si_read_unlock(sb);
12024 +       return err;
12025 +}
12026 +
12027 +/* ---------------------------------------------------------------------- */
12028 +
12029 +static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
12030 +                     int bufsiz)
12031 +{
12032 +       int err;
12033 +       struct super_block *sb;
12034 +       struct dentry *h_dentry;
12035 +
12036 +       err = -EINVAL;
12037 +       h_dentry = au_h_dptr(dentry, bindex);
12038 +       if (unlikely(/* !h_dentry
12039 +                    || !h_dentry->d_inode
12040 +                    || !h_dentry->d_inode->i_op
12041 +                    || */ !h_dentry->d_inode->i_op->readlink))
12042 +               goto out;
12043 +
12044 +       err = security_inode_readlink(h_dentry);
12045 +       if (unlikely(err))
12046 +               goto out;
12047 +
12048 +       sb = dentry->d_sb;
12049 +       if (!au_test_ro(sb, bindex, dentry->d_inode)) {
12050 +               vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
12051 +               fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
12052 +       }
12053 +       err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
12054 +
12055 + out:
12056 +       return err;
12057 +}
12058 +
12059 +static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
12060 +{
12061 +       int err;
12062 +
12063 +       aufs_read_lock(dentry, AuLock_IR);
12064 +       err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
12065 +       aufs_read_unlock(dentry, AuLock_IR);
12066 +
12067 +       return err;
12068 +}
12069 +
12070 +static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
12071 +{
12072 +       int err;
12073 +       char *buf;
12074 +       mm_segment_t old_fs;
12075 +
12076 +       err = -ENOMEM;
12077 +       buf = __getname();
12078 +       if (unlikely(!buf))
12079 +               goto out;
12080 +
12081 +       aufs_read_lock(dentry, AuLock_IR);
12082 +       old_fs = get_fs();
12083 +       set_fs(KERNEL_DS);
12084 +       err = h_readlink(dentry, au_dbstart(dentry), (char __user *)buf,
12085 +                        PATH_MAX);
12086 +       set_fs(old_fs);
12087 +       aufs_read_unlock(dentry, AuLock_IR);
12088 +
12089 +       if (err >= 0) {
12090 +               buf[err] = 0;
12091 +               /* will be freed by put_link */
12092 +               nd_set_link(nd, buf);
12093 +               return NULL; /* success */
12094 +       }
12095 +       __putname(buf);
12096 +
12097 + out:
12098 +       path_put(&nd->path);
12099 +       AuTraceErr(err);
12100 +       return ERR_PTR(err);
12101 +}
12102 +
12103 +static void aufs_put_link(struct dentry *dentry __maybe_unused,
12104 +                         struct nameidata *nd, void *cookie __maybe_unused)
12105 +{
12106 +       __putname(nd_get_link(nd));
12107 +}
12108 +
12109 +/* ---------------------------------------------------------------------- */
12110 +
12111 +static void aufs_truncate_range(struct inode *inode __maybe_unused,
12112 +                               loff_t start __maybe_unused,
12113 +                               loff_t end __maybe_unused)
12114 +{
12115 +       AuUnsupport();
12116 +}
12117 +
12118 +/* ---------------------------------------------------------------------- */
12119 +
12120 +struct inode_operations aufs_symlink_iop = {
12121 +       .permission     = aufs_permission,
12122 +       .setattr        = aufs_setattr,
12123 +       .getattr        = aufs_getattr,
12124 +       .readlink       = aufs_readlink,
12125 +       .follow_link    = aufs_follow_link,
12126 +       .put_link       = aufs_put_link
12127 +};
12128 +
12129 +struct inode_operations aufs_dir_iop = {
12130 +       .create         = aufs_create,
12131 +       .lookup         = aufs_lookup,
12132 +       .link           = aufs_link,
12133 +       .unlink         = aufs_unlink,
12134 +       .symlink        = aufs_symlink,
12135 +       .mkdir          = aufs_mkdir,
12136 +       .rmdir          = aufs_rmdir,
12137 +       .mknod          = aufs_mknod,
12138 +       .rename         = aufs_rename,
12139 +
12140 +       .permission     = aufs_permission,
12141 +       .setattr        = aufs_setattr,
12142 +       .getattr        = aufs_getattr
12143 +};
12144 +
12145 +struct inode_operations aufs_iop = {
12146 +       .permission     = aufs_permission,
12147 +       .setattr        = aufs_setattr,
12148 +       .getattr        = aufs_getattr,
12149 +       .truncate_range = aufs_truncate_range
12150 +};
12151 diff -uprN -x .git linux-2.6.31/fs/aufs/i_op_add.c aufs2-2.6.git/fs/aufs/i_op_add.c
12152 --- linux-2.6.31/fs/aufs/i_op_add.c     1970-01-01 00:00:00.000000000 +0000
12153 +++ aufs2-2.6.git/fs/aufs/i_op_add.c    2009-09-21 21:49:23.401607657 +0000
12154 @@ -0,0 +1,649 @@
12155 +/*
12156 + * Copyright (C) 2005-2009 Junjiro R. Okajima
12157 + *
12158 + * This program, aufs is free software; you can redistribute it and/or modify
12159 + * it under the terms of the GNU General Public License as published by
12160 + * the Free Software Foundation; either version 2 of the License, or
12161 + * (at your option) any later version.
12162 + *
12163 + * This program is distributed in the hope that it will be useful,
12164 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
12165 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12166 + * GNU General Public License for more details.
12167 + *
12168 + * You should have received a copy of the GNU General Public License
12169 + * along with this program; if not, write to the Free Software
12170 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
12171 + */
12172 +
12173 +/*
12174 + * inode operations (add entry)
12175 + */
12176 +
12177 +#include "aufs.h"
12178 +
12179 +/*
12180 + * final procedure of adding a new entry, except link(2).
12181 + * remove whiteout, instantiate, copyup the parent dir's times and size
12182 + * and update version.
12183 + * if it failed, re-create the removed whiteout.
12184 + */
12185 +static int epilog(struct inode *dir, aufs_bindex_t bindex,
12186 +                 struct dentry *wh_dentry, struct dentry *dentry)
12187 +{
12188 +       int err, rerr;
12189 +       aufs_bindex_t bwh;
12190 +       struct path h_path;
12191 +       struct inode *inode, *h_dir;
12192 +       struct dentry *wh;
12193 +
12194 +       bwh = -1;
12195 +       if (wh_dentry) {
12196 +               h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
12197 +               IMustLock(h_dir);
12198 +               AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
12199 +               bwh = au_dbwh(dentry);
12200 +               h_path.dentry = wh_dentry;
12201 +               h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
12202 +               err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
12203 +                                         dentry);
12204 +               if (unlikely(err))
12205 +                       goto out;
12206 +       }
12207 +
12208 +       inode = au_new_inode(dentry, /*must_new*/1);
12209 +       if (!IS_ERR(inode)) {
12210 +               d_instantiate(dentry, inode);
12211 +               dir = dentry->d_parent->d_inode; /* dir inode is locked */
12212 +               IMustLock(dir);
12213 +               if (au_ibstart(dir) == au_dbstart(dentry))
12214 +                       au_cpup_attr_timesizes(dir);
12215 +               dir->i_version++;
12216 +               return 0; /* success */
12217 +       }
12218 +
12219 +       err = PTR_ERR(inode);
12220 +       if (!wh_dentry)
12221 +               goto out;
12222 +
12223 +       /* revert */
12224 +       /* dir inode is locked */
12225 +       wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
12226 +       rerr = PTR_ERR(wh);
12227 +       if (IS_ERR(wh)) {
12228 +               AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
12229 +                       AuDLNPair(dentry), err, rerr);
12230 +               err = -EIO;
12231 +       } else
12232 +               dput(wh);
12233 +
12234 + out:
12235 +       return err;
12236 +}
12237 +
12238 +/*
12239 + * simple tests for the adding inode operations.
12240 + * following the checks in vfs, plus the parent-child relationship.
12241 + */
12242 +int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
12243 +              struct dentry *h_parent, int isdir)
12244 +{
12245 +       int err;
12246 +       umode_t h_mode;
12247 +       struct dentry *h_dentry;
12248 +       struct inode *h_inode;
12249 +
12250 +       h_dentry = au_h_dptr(dentry, bindex);
12251 +       h_inode = h_dentry->d_inode;
12252 +       if (!dentry->d_inode) {
12253 +               err = -EEXIST;
12254 +               if (unlikely(h_inode))
12255 +                       goto out;
12256 +       } else {
12257 +               /* rename(2) case */
12258 +               err = -EIO;
12259 +               if (unlikely(!h_inode || !h_inode->i_nlink))
12260 +                       goto out;
12261 +
12262 +               h_mode = h_inode->i_mode;
12263 +               if (!isdir) {
12264 +                       err = -EISDIR;
12265 +                       if (unlikely(S_ISDIR(h_mode)))
12266 +                               goto out;
12267 +               } else if (unlikely(!S_ISDIR(h_mode))) {
12268 +                       err = -ENOTDIR;
12269 +                       goto out;
12270 +               }
12271 +       }
12272 +
12273 +       err = -EIO;
12274 +       /* expected parent dir is locked */
12275 +       if (unlikely(h_parent != h_dentry->d_parent))
12276 +               goto out;
12277 +       err = 0;
12278 +
12279 + out:
12280 +       return err;
12281 +}
12282 +
12283 +/*
12284 + * initial procedure of adding a new entry.
12285 + * prepare writable branch and the parent dir, lock it,
12286 + * and lookup whiteout for the new entry.
12287 + */
12288 +static struct dentry*
12289 +lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
12290 +                 struct dentry *src_dentry, struct au_pin *pin,
12291 +                 struct au_wr_dir_args *wr_dir_args)
12292 +{
12293 +       struct dentry *wh_dentry, *h_parent;
12294 +       struct super_block *sb;
12295 +       struct au_branch *br;
12296 +       int err;
12297 +       unsigned int udba;
12298 +       aufs_bindex_t bcpup;
12299 +
12300 +       err = au_wr_dir(dentry, src_dentry, wr_dir_args);
12301 +       bcpup = err;
12302 +       wh_dentry = ERR_PTR(err);
12303 +       if (unlikely(err < 0))
12304 +               goto out;
12305 +
12306 +       sb = dentry->d_sb;
12307 +       udba = au_opt_udba(sb);
12308 +       err = au_pin(pin, dentry, bcpup, udba,
12309 +                    AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12310 +       wh_dentry = ERR_PTR(err);
12311 +       if (unlikely(err))
12312 +               goto out;
12313 +
12314 +       h_parent = au_pinned_h_parent(pin);
12315 +       if (udba != AuOpt_UDBA_NONE
12316 +           && au_dbstart(dentry) == bcpup) {
12317 +               err = au_may_add(dentry, bcpup, h_parent,
12318 +                                au_ftest_wrdir(wr_dir_args->flags, ISDIR));
12319 +               wh_dentry = ERR_PTR(err);
12320 +               if (unlikely(err))
12321 +                       goto out_unpin;
12322 +       }
12323 +
12324 +       br = au_sbr(sb, bcpup);
12325 +       if (dt) {
12326 +               struct path tmp = {
12327 +                       .dentry = h_parent,
12328 +                       .mnt    = br->br_mnt
12329 +               };
12330 +               au_dtime_store(dt, au_pinned_parent(pin), &tmp);
12331 +       }
12332 +
12333 +       wh_dentry = NULL;
12334 +       if (bcpup != au_dbwh(dentry))
12335 +               goto out; /* success */
12336 +
12337 +       wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
12338 +
12339 + out_unpin:
12340 +       if (IS_ERR(wh_dentry))
12341 +               au_unpin(pin);
12342 + out:
12343 +       return wh_dentry;
12344 +}
12345 +
12346 +/* ---------------------------------------------------------------------- */
12347 +
12348 +enum { Mknod, Symlink, Creat };
12349 +struct simple_arg {
12350 +       int type;
12351 +       union {
12352 +               struct {
12353 +                       int mode;
12354 +                       struct nameidata *nd;
12355 +               } c;
12356 +               struct {
12357 +                       const char *symname;
12358 +               } s;
12359 +               struct {
12360 +                       int mode;
12361 +                       dev_t dev;
12362 +               } m;
12363 +       } u;
12364 +};
12365 +
12366 +static int add_simple(struct inode *dir, struct dentry *dentry,
12367 +                     struct simple_arg *arg)
12368 +{
12369 +       int err;
12370 +       aufs_bindex_t bstart;
12371 +       unsigned char created;
12372 +       struct au_dtime dt;
12373 +       struct au_pin pin;
12374 +       struct path h_path;
12375 +       struct dentry *wh_dentry, *parent;
12376 +       struct inode *h_dir;
12377 +       struct au_wr_dir_args wr_dir_args = {
12378 +               .force_btgt     = -1,
12379 +               .flags          = AuWrDir_ADD_ENTRY
12380 +       };
12381 +
12382 +       IMustLock(dir);
12383 +
12384 +       parent = dentry->d_parent; /* dir inode is locked */
12385 +       aufs_read_lock(dentry, AuLock_DW);
12386 +       di_write_lock_parent(parent);
12387 +       wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
12388 +                                     &wr_dir_args);
12389 +       err = PTR_ERR(wh_dentry);
12390 +       if (IS_ERR(wh_dentry))
12391 +               goto out;
12392 +
12393 +       bstart = au_dbstart(dentry);
12394 +       h_path.dentry = au_h_dptr(dentry, bstart);
12395 +       h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
12396 +       h_dir = au_pinned_h_dir(&pin);
12397 +       switch (arg->type) {
12398 +       case Creat:
12399 +               err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
12400 +               break;
12401 +       case Symlink:
12402 +               err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
12403 +               break;
12404 +       case Mknod:
12405 +               err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
12406 +               break;
12407 +       default:
12408 +               BUG();
12409 +       }
12410 +       created = !err;
12411 +       if (!err)
12412 +               err = epilog(dir, bstart, wh_dentry, dentry);
12413 +
12414 +       /* revert */
12415 +       if (unlikely(created && err && h_path.dentry->d_inode)) {
12416 +               int rerr;
12417 +               rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
12418 +               if (rerr) {
12419 +                       AuIOErr("%.*s revert failure(%d, %d)\n",
12420 +                               AuDLNPair(dentry), err, rerr);
12421 +                       err = -EIO;
12422 +               }
12423 +               au_dtime_revert(&dt);
12424 +               d_drop(dentry);
12425 +       }
12426 +
12427 +       au_unpin(&pin);
12428 +       dput(wh_dentry);
12429 +
12430 + out:
12431 +       if (unlikely(err)) {
12432 +               au_update_dbstart(dentry);
12433 +               d_drop(dentry);
12434 +       }
12435 +       di_write_unlock(parent);
12436 +       aufs_read_unlock(dentry, AuLock_DW);
12437 +       return err;
12438 +}
12439 +
12440 +int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
12441 +{
12442 +       struct simple_arg arg = {
12443 +               .type = Mknod,
12444 +               .u.m = {
12445 +                       .mode   = mode,
12446 +                       .dev    = dev
12447 +               }
12448 +       };
12449 +       return add_simple(dir, dentry, &arg);
12450 +}
12451 +
12452 +int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
12453 +{
12454 +       struct simple_arg arg = {
12455 +               .type = Symlink,
12456 +               .u.s.symname = symname
12457 +       };
12458 +       return add_simple(dir, dentry, &arg);
12459 +}
12460 +
12461 +int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
12462 +               struct nameidata *nd)
12463 +{
12464 +       struct simple_arg arg = {
12465 +               .type = Creat,
12466 +               .u.c = {
12467 +                       .mode   = mode,
12468 +                       .nd     = nd
12469 +               }
12470 +       };
12471 +       return add_simple(dir, dentry, &arg);
12472 +}
12473 +
12474 +/* ---------------------------------------------------------------------- */
12475 +
12476 +struct au_link_args {
12477 +       aufs_bindex_t bdst, bsrc;
12478 +       struct au_pin pin;
12479 +       struct path h_path;
12480 +       struct dentry *src_parent, *parent;
12481 +};
12482 +
12483 +static int au_cpup_before_link(struct dentry *src_dentry,
12484 +                              struct au_link_args *a)
12485 +{
12486 +       int err;
12487 +       struct dentry *h_src_dentry;
12488 +       struct mutex *h_mtx;
12489 +
12490 +       di_read_lock_parent(a->src_parent, AuLock_IR);
12491 +       err = au_test_and_cpup_dirs(src_dentry, a->bdst);
12492 +       if (unlikely(err))
12493 +               goto out;
12494 +
12495 +       h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
12496 +       h_mtx = &h_src_dentry->d_inode->i_mutex;
12497 +       err = au_pin(&a->pin, src_dentry, a->bdst,
12498 +                    au_opt_udba(src_dentry->d_sb),
12499 +                    AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12500 +       if (unlikely(err))
12501 +               goto out;
12502 +       mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
12503 +       err = au_sio_cpup_simple(src_dentry, a->bdst, -1,
12504 +                                AuCpup_DTIME /* | AuCpup_KEEPLINO */);
12505 +       mutex_unlock(h_mtx);
12506 +       au_unpin(&a->pin);
12507 +
12508 + out:
12509 +       di_read_unlock(a->src_parent, AuLock_IR);
12510 +       return err;
12511 +}
12512 +
12513 +static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
12514 +{
12515 +       int err;
12516 +       unsigned char plink;
12517 +       struct inode *h_inode, *inode;
12518 +       struct dentry *h_src_dentry;
12519 +       struct super_block *sb;
12520 +
12521 +       plink = 0;
12522 +       h_inode = NULL;
12523 +       sb = src_dentry->d_sb;
12524 +       inode = src_dentry->d_inode;
12525 +       if (au_ibstart(inode) <= a->bdst)
12526 +               h_inode = au_h_iptr(inode, a->bdst);
12527 +       if (!h_inode || !h_inode->i_nlink) {
12528 +               /* copyup src_dentry as the name of dentry. */
12529 +               au_set_dbstart(src_dentry, a->bdst);
12530 +               au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
12531 +               h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
12532 +               mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
12533 +               err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc, -1,
12534 +                                        AuCpup_KEEPLINO, a->parent);
12535 +               mutex_unlock(&h_inode->i_mutex);
12536 +               au_set_h_dptr(src_dentry, a->bdst, NULL);
12537 +               au_set_dbstart(src_dentry, a->bsrc);
12538 +       } else {
12539 +               /* the inode of src_dentry already exists on a.bdst branch */
12540 +               h_src_dentry = d_find_alias(h_inode);
12541 +               if (!h_src_dentry && au_plink_test(inode)) {
12542 +                       plink = 1;
12543 +                       h_src_dentry = au_plink_lkup(inode, a->bdst);
12544 +                       err = PTR_ERR(h_src_dentry);
12545 +                       if (IS_ERR(h_src_dentry))
12546 +                               goto out;
12547 +
12548 +                       if (unlikely(!h_src_dentry->d_inode)) {
12549 +                               dput(h_src_dentry);
12550 +                               h_src_dentry = NULL;
12551 +                       }
12552 +
12553 +               }
12554 +               if (h_src_dentry) {
12555 +                       err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
12556 +                                        &a->h_path);
12557 +                       dput(h_src_dentry);
12558 +               } else {
12559 +                       AuIOErr("no dentry found for hi%lu on b%d\n",
12560 +                               h_inode->i_ino, a->bdst);
12561 +                       err = -EIO;
12562 +               }
12563 +       }
12564 +
12565 +       if (!err && !plink)
12566 +               au_plink_append(inode, a->bdst, a->h_path.dentry);
12567 +
12568 +out:
12569 +       return err;
12570 +}
12571 +
12572 +int aufs_link(struct dentry *src_dentry, struct inode *dir,
12573 +             struct dentry *dentry)
12574 +{
12575 +       int err, rerr;
12576 +       struct au_dtime dt;
12577 +       struct au_link_args *a;
12578 +       struct dentry *wh_dentry, *h_src_dentry;
12579 +       struct inode *inode;
12580 +       struct super_block *sb;
12581 +       struct au_wr_dir_args wr_dir_args = {
12582 +               /* .force_btgt  = -1, */
12583 +               .flags          = AuWrDir_ADD_ENTRY
12584 +       };
12585 +
12586 +       IMustLock(dir);
12587 +       inode = src_dentry->d_inode;
12588 +       IMustLock(inode);
12589 +
12590 +       err = -ENOENT;
12591 +       if (unlikely(!inode->i_nlink))
12592 +               goto out;
12593 +
12594 +       err = -ENOMEM;
12595 +       a = kzalloc(sizeof(*a), GFP_NOFS);
12596 +       if (unlikely(!a))
12597 +               goto out;
12598 +
12599 +       a->parent = dentry->d_parent; /* dir inode is locked */
12600 +       aufs_read_and_write_lock2(dentry, src_dentry, /*AuLock_FLUSH*/0);
12601 +       a->src_parent = dget_parent(src_dentry);
12602 +       wr_dir_args.force_btgt = au_dbstart(src_dentry);
12603 +
12604 +       di_write_lock_parent(a->parent);
12605 +       wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
12606 +       wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
12607 +                                     &wr_dir_args);
12608 +       err = PTR_ERR(wh_dentry);
12609 +       if (IS_ERR(wh_dentry))
12610 +               goto out_unlock;
12611 +
12612 +       err = 0;
12613 +       sb = dentry->d_sb;
12614 +       a->bdst = au_dbstart(dentry);
12615 +       a->h_path.dentry = au_h_dptr(dentry, a->bdst);
12616 +       a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
12617 +       a->bsrc = au_dbstart(src_dentry);
12618 +       if (au_opt_test(au_mntflags(sb), PLINK)) {
12619 +               if (a->bdst < a->bsrc
12620 +                   /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
12621 +                       err = au_cpup_or_link(src_dentry, a);
12622 +               else {
12623 +                       h_src_dentry = au_h_dptr(src_dentry, a->bdst);
12624 +                       err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
12625 +                                        &a->h_path);
12626 +               }
12627 +       } else {
12628 +               /*
12629 +                * copyup src_dentry to the branch we process,
12630 +                * and then link(2) to it.
12631 +                */
12632 +               if (a->bdst < a->bsrc
12633 +                   /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
12634 +                       au_unpin(&a->pin);
12635 +                       di_write_unlock(a->parent);
12636 +                       err = au_cpup_before_link(src_dentry, a);
12637 +                       di_write_lock_parent(a->parent);
12638 +                       if (!err)
12639 +                               err = au_pin(&a->pin, dentry, a->bdst,
12640 +                                            au_opt_udba(sb),
12641 +                                            AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12642 +                       if (unlikely(err))
12643 +                               goto out_wh;
12644 +               }
12645 +               if (!err) {
12646 +                       h_src_dentry = au_h_dptr(src_dentry, a->bdst);
12647 +                       err = -ENOENT;
12648 +                       if (h_src_dentry && h_src_dentry->d_inode)
12649 +                               err = vfsub_link(h_src_dentry,
12650 +                                                au_pinned_h_dir(&a->pin),
12651 +                                                &a->h_path);
12652 +               }
12653 +       }
12654 +       if (unlikely(err))
12655 +               goto out_unpin;
12656 +
12657 +       if (wh_dentry) {
12658 +               a->h_path.dentry = wh_dentry;
12659 +               err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
12660 +                                         dentry);
12661 +               if (unlikely(err))
12662 +                       goto out_revert;
12663 +       }
12664 +
12665 +       dir->i_version++;
12666 +       if (au_ibstart(dir) == au_dbstart(dentry))
12667 +               au_cpup_attr_timesizes(dir);
12668 +       inc_nlink(inode);
12669 +       inode->i_ctime = dir->i_ctime;
12670 +       if (!d_unhashed(a->h_path.dentry))
12671 +               d_instantiate(dentry, au_igrab(inode));
12672 +       else
12673 +               /* some filesystem calls d_drop() */
12674 +               d_drop(dentry);
12675 +       goto out_unpin; /* success */
12676 +
12677 + out_revert:
12678 +       rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
12679 +       if (!rerr)
12680 +               goto out_dt;
12681 +       AuIOErr("%.*s reverting failed(%d, %d)\n",
12682 +               AuDLNPair(dentry), err, rerr);
12683 +       err = -EIO;
12684 + out_dt:
12685 +       d_drop(dentry);
12686 +       au_dtime_revert(&dt);
12687 + out_unpin:
12688 +       au_unpin(&a->pin);
12689 + out_wh:
12690 +       dput(wh_dentry);
12691 + out_unlock:
12692 +       if (unlikely(err)) {
12693 +               au_update_dbstart(dentry);
12694 +               d_drop(dentry);
12695 +       }
12696 +       di_write_unlock(a->parent);
12697 +       dput(a->src_parent);
12698 +       aufs_read_and_write_unlock2(dentry, src_dentry);
12699 +       kfree(a);
12700 + out:
12701 +       return err;
12702 +}
12703 +
12704 +int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
12705 +{
12706 +       int err, rerr;
12707 +       aufs_bindex_t bindex;
12708 +       unsigned char diropq;
12709 +       struct path h_path;
12710 +       struct dentry *wh_dentry, *parent, *opq_dentry;
12711 +       struct mutex *h_mtx;
12712 +       struct super_block *sb;
12713 +       struct {
12714 +               struct au_pin pin;
12715 +               struct au_dtime dt;
12716 +       } *a; /* reduce the stack usage */
12717 +       struct au_wr_dir_args wr_dir_args = {
12718 +               .force_btgt     = -1,
12719 +               .flags          = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
12720 +       };
12721 +
12722 +       IMustLock(dir);
12723 +
12724 +       err = -ENOMEM;
12725 +       a = kmalloc(sizeof(*a), GFP_NOFS);
12726 +       if (unlikely(!a))
12727 +               goto out;
12728 +
12729 +       aufs_read_lock(dentry, AuLock_DW);
12730 +       parent = dentry->d_parent; /* dir inode is locked */
12731 +       di_write_lock_parent(parent);
12732 +       wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
12733 +                                     &a->pin, &wr_dir_args);
12734 +       err = PTR_ERR(wh_dentry);
12735 +       if (IS_ERR(wh_dentry))
12736 +               goto out_free;
12737 +
12738 +       sb = dentry->d_sb;
12739 +       bindex = au_dbstart(dentry);
12740 +       h_path.dentry = au_h_dptr(dentry, bindex);
12741 +       h_path.mnt = au_sbr_mnt(sb, bindex);
12742 +       err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
12743 +       if (unlikely(err))
12744 +               goto out_unlock;
12745 +
12746 +       /* make the dir opaque */
12747 +       diropq = 0;
12748 +       h_mtx = &h_path.dentry->d_inode->i_mutex;
12749 +       if (wh_dentry
12750 +           || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
12751 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
12752 +               opq_dentry = au_diropq_create(dentry, bindex);
12753 +               mutex_unlock(h_mtx);
12754 +               err = PTR_ERR(opq_dentry);
12755 +               if (IS_ERR(opq_dentry))
12756 +                       goto out_dir;
12757 +               dput(opq_dentry);
12758 +               diropq = 1;
12759 +       }
12760 +
12761 +       err = epilog(dir, bindex, wh_dentry, dentry);
12762 +       if (!err) {
12763 +               inc_nlink(dir);
12764 +               goto out_unlock; /* success */
12765 +       }
12766 +
12767 +       /* revert */
12768 +       if (diropq) {
12769 +               AuLabel(revert opq);
12770 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
12771 +               rerr = au_diropq_remove(dentry, bindex);
12772 +               mutex_unlock(h_mtx);
12773 +               if (rerr) {
12774 +                       AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
12775 +                               AuDLNPair(dentry), err, rerr);
12776 +                       err = -EIO;
12777 +               }
12778 +       }
12779 +
12780 + out_dir:
12781 +       AuLabel(revert dir);
12782 +       rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
12783 +       if (rerr) {
12784 +               AuIOErr("%.*s reverting dir failed(%d, %d)\n",
12785 +                       AuDLNPair(dentry), err, rerr);
12786 +               err = -EIO;
12787 +       }
12788 +       d_drop(dentry);
12789 +       au_dtime_revert(&a->dt);
12790 + out_unlock:
12791 +       au_unpin(&a->pin);
12792 +       dput(wh_dentry);
12793 + out_free:
12794 +       if (unlikely(err)) {
12795 +               au_update_dbstart(dentry);
12796 +               d_drop(dentry);
12797 +       }
12798 +       di_write_unlock(parent);
12799 +       aufs_read_unlock(dentry, AuLock_DW);
12800 +       kfree(a);
12801 + out:
12802 +       return err;
12803 +}
12804 diff -uprN -x .git linux-2.6.31/fs/aufs/i_op_del.c aufs2-2.6.git/fs/aufs/i_op_del.c
12805 --- linux-2.6.31/fs/aufs/i_op_del.c     1970-01-01 00:00:00.000000000 +0000
12806 +++ aufs2-2.6.git/fs/aufs/i_op_del.c    2009-09-21 21:49:23.401607657 +0000
12807 @@ -0,0 +1,468 @@
12808 +/*
12809 + * Copyright (C) 2005-2009 Junjiro R. Okajima
12810 + *
12811 + * This program, aufs is free software; you can redistribute it and/or modify
12812 + * it under the terms of the GNU General Public License as published by
12813 + * the Free Software Foundation; either version 2 of the License, or
12814 + * (at your option) any later version.
12815 + *
12816 + * This program is distributed in the hope that it will be useful,
12817 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
12818 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12819 + * GNU General Public License for more details.
12820 + *
12821 + * You should have received a copy of the GNU General Public License
12822 + * along with this program; if not, write to the Free Software
12823 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
12824 + */
12825 +
12826 +/*
12827 + * inode operations (del entry)
12828 + */
12829 +
12830 +#include "aufs.h"
12831 +
12832 +/*
12833 + * decide if a new whiteout for @dentry is necessary or not.
12834 + * when it is necessary, prepare the parent dir for the upper branch whose
12835 + * branch index is @bcpup for creation. the actual creation of the whiteout will
12836 + * be done by caller.
12837 + * return value:
12838 + * 0: wh is unnecessary
12839 + * plus: wh is necessary
12840 + * minus: error
12841 + */
12842 +int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
12843 +{
12844 +       int need_wh, err;
12845 +       aufs_bindex_t bstart;
12846 +       struct super_block *sb;
12847 +
12848 +       sb = dentry->d_sb;
12849 +       bstart = au_dbstart(dentry);
12850 +       if (*bcpup < 0) {
12851 +               *bcpup = bstart;
12852 +               if (au_test_ro(sb, bstart, dentry->d_inode)) {
12853 +                       err = AuWbrCopyup(au_sbi(sb), dentry);
12854 +                       *bcpup = err;
12855 +                       if (unlikely(err < 0))
12856 +                               goto out;
12857 +               }
12858 +       } else
12859 +               AuDebugOn(bstart < *bcpup
12860 +                         || au_test_ro(sb, *bcpup, dentry->d_inode));
12861 +       AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
12862 +
12863 +       if (*bcpup != bstart) {
12864 +               err = au_cpup_dirs(dentry, *bcpup);
12865 +               if (unlikely(err))
12866 +                       goto out;
12867 +               need_wh = 1;
12868 +       } else {
12869 +               aufs_bindex_t old_bend, new_bend, bdiropq = -1;
12870 +
12871 +               old_bend = au_dbend(dentry);
12872 +               if (isdir) {
12873 +                       bdiropq = au_dbdiropq(dentry);
12874 +                       au_set_dbdiropq(dentry, -1);
12875 +               }
12876 +               need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
12877 +                                        /*nd*/NULL);
12878 +               err = need_wh;
12879 +               if (isdir)
12880 +                       au_set_dbdiropq(dentry, bdiropq);
12881 +               if (unlikely(err < 0))
12882 +                       goto out;
12883 +               new_bend = au_dbend(dentry);
12884 +               if (!need_wh && old_bend != new_bend) {
12885 +                       au_set_h_dptr(dentry, new_bend, NULL);
12886 +                       au_set_dbend(dentry, old_bend);
12887 +               }
12888 +       }
12889 +       AuDbg("need_wh %d\n", need_wh);
12890 +       err = need_wh;
12891 +
12892 + out:
12893 +       return err;
12894 +}
12895 +
12896 +/*
12897 + * simple tests for the del-entry operations.
12898 + * following the checks in vfs, plus the parent-child relationship.
12899 + */
12900 +int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
12901 +              struct dentry *h_parent, int isdir)
12902 +{
12903 +       int err;
12904 +       umode_t h_mode;
12905 +       struct dentry *h_dentry, *h_latest;
12906 +       struct inode *h_inode;
12907 +
12908 +       h_dentry = au_h_dptr(dentry, bindex);
12909 +       h_inode = h_dentry->d_inode;
12910 +       if (dentry->d_inode) {
12911 +               err = -ENOENT;
12912 +               if (unlikely(!h_inode || !h_inode->i_nlink))
12913 +                       goto out;
12914 +
12915 +               h_mode = h_inode->i_mode;
12916 +               if (!isdir) {
12917 +                       err = -EISDIR;
12918 +                       if (unlikely(S_ISDIR(h_mode)))
12919 +                               goto out;
12920 +               } else if (unlikely(!S_ISDIR(h_mode))) {
12921 +                       err = -ENOTDIR;
12922 +                       goto out;
12923 +               }
12924 +       } else {
12925 +               /* rename(2) case */
12926 +               err = -EIO;
12927 +               if (unlikely(h_inode))
12928 +                       goto out;
12929 +       }
12930 +
12931 +       err = -ENOENT;
12932 +       /* expected parent dir is locked */
12933 +       if (unlikely(h_parent != h_dentry->d_parent))
12934 +               goto out;
12935 +       err = 0;
12936 +
12937 +       /*
12938 +        * rmdir a dir may break the consistency on some filesystem.
12939 +        * let's try heavy test.
12940 +        */
12941 +       err = -EACCES;
12942 +       if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
12943 +               goto out;
12944 +
12945 +       h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
12946 +                                  au_sbr(dentry->d_sb, bindex));
12947 +       err = -EIO;
12948 +       if (IS_ERR(h_latest))
12949 +               goto out;
12950 +       if (h_latest == h_dentry)
12951 +               err = 0;
12952 +       dput(h_latest);
12953 +
12954 + out:
12955 +       return err;
12956 +}
12957 +
12958 +/*
12959 + * decide the branch where we operate for @dentry. the branch index will be set
12960 + * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
12961 + * dir for reverting.
12962 + * when a new whiteout is necessary, create it.
12963 + */
12964 +static struct dentry*
12965 +lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
12966 +                   struct au_dtime *dt, struct au_pin *pin)
12967 +{
12968 +       struct dentry *wh_dentry;
12969 +       struct super_block *sb;
12970 +       struct path h_path;
12971 +       int err, need_wh;
12972 +       unsigned int udba;
12973 +       aufs_bindex_t bcpup;
12974 +
12975 +       need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
12976 +       wh_dentry = ERR_PTR(need_wh);
12977 +       if (unlikely(need_wh < 0))
12978 +               goto out;
12979 +
12980 +       sb = dentry->d_sb;
12981 +       udba = au_opt_udba(sb);
12982 +       bcpup = *rbcpup;
12983 +       err = au_pin(pin, dentry, bcpup, udba,
12984 +                    AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12985 +       wh_dentry = ERR_PTR(err);
12986 +       if (unlikely(err))
12987 +               goto out;
12988 +
12989 +       h_path.dentry = au_pinned_h_parent(pin);
12990 +       if (udba != AuOpt_UDBA_NONE
12991 +           && au_dbstart(dentry) == bcpup) {
12992 +               err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
12993 +               wh_dentry = ERR_PTR(err);
12994 +               if (unlikely(err))
12995 +                       goto out_unpin;
12996 +       }
12997 +
12998 +       h_path.mnt = au_sbr_mnt(sb, bcpup);
12999 +       au_dtime_store(dt, au_pinned_parent(pin), &h_path);
13000 +       wh_dentry = NULL;
13001 +       if (!need_wh)
13002 +               goto out; /* success, no need to create whiteout */
13003 +
13004 +       wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
13005 +       if (!IS_ERR(wh_dentry))
13006 +               goto out; /* success */
13007 +       /* returns with the parent is locked and wh_dentry is dget-ed */
13008 +
13009 + out_unpin:
13010 +       au_unpin(pin);
13011 + out:
13012 +       return wh_dentry;
13013 +}
13014 +
13015 +/*
13016 + * when removing a dir, rename it to a unique temporary whiteout-ed name first
13017 + * in order to be revertible and save time for removing many child whiteouts
13018 + * under the dir.
13019 + * returns 1 when there are too many child whiteout and caller should remove
13020 + * them asynchronously. returns 0 when the number of children is enough small to
13021 + * remove now or the branch fs is a remote fs.
13022 + * otherwise return an error.
13023 + */
13024 +static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
13025 +                          struct au_nhash *whlist, struct inode *dir)
13026 +{
13027 +       int rmdir_later, err, dirwh;
13028 +       struct dentry *h_dentry;
13029 +       struct super_block *sb;
13030 +
13031 +       sb = dentry->d_sb;
13032 +       SiMustAnyLock(sb);
13033 +       h_dentry = au_h_dptr(dentry, bindex);
13034 +       err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
13035 +       if (unlikely(err))
13036 +               goto out;
13037 +
13038 +       /* stop monitoring */
13039 +       au_hin_free(au_hi(dentry->d_inode, bindex));
13040 +
13041 +       if (!au_test_fs_remote(h_dentry->d_sb)) {
13042 +               dirwh = au_sbi(sb)->si_dirwh;
13043 +               rmdir_later = (dirwh <= 1);
13044 +               if (!rmdir_later)
13045 +                       rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
13046 +                                                             dirwh);
13047 +               if (rmdir_later)
13048 +                       return rmdir_later;
13049 +       }
13050 +
13051 +       err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
13052 +       if (unlikely(err)) {
13053 +               AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
13054 +                       AuDLNPair(h_dentry), bindex, err);
13055 +               err = 0;
13056 +       }
13057 +
13058 + out:
13059 +       return err;
13060 +}
13061 +
13062 +/*
13063 + * final procedure for deleting a entry.
13064 + * maintain dentry and iattr.
13065 + */
13066 +static void epilog(struct inode *dir, struct dentry *dentry,
13067 +                  aufs_bindex_t bindex)
13068 +{
13069 +       struct inode *inode;
13070 +
13071 +       inode = dentry->d_inode;
13072 +       d_drop(dentry);
13073 +       inode->i_ctime = dir->i_ctime;
13074 +
13075 +       if (atomic_read(&dentry->d_count) == 1) {
13076 +               au_set_h_dptr(dentry, au_dbstart(dentry), NULL);
13077 +               au_update_dbstart(dentry);
13078 +       }
13079 +       if (au_ibstart(dir) == bindex)
13080 +               au_cpup_attr_timesizes(dir);
13081 +       dir->i_version++;
13082 +}
13083 +
13084 +/*
13085 + * when an error happened, remove the created whiteout and revert everything.
13086 + */
13087 +static int do_revert(int err, struct inode *dir, aufs_bindex_t bwh,
13088 +                    struct dentry *wh_dentry, struct dentry *dentry,
13089 +                    struct au_dtime *dt)
13090 +{
13091 +       int rerr;
13092 +       struct path h_path = {
13093 +               .dentry = wh_dentry,
13094 +               .mnt    = au_sbr_mnt(dir->i_sb, bwh)
13095 +       };
13096 +
13097 +       rerr = au_wh_unlink_dentry(au_h_iptr(dir, bwh), &h_path, dentry);
13098 +       if (!rerr) {
13099 +               au_set_dbwh(dentry, bwh);
13100 +               au_dtime_revert(dt);
13101 +               return 0;
13102 +       }
13103 +
13104 +       AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
13105 +               AuDLNPair(dentry), err, rerr);
13106 +       return -EIO;
13107 +}
13108 +
13109 +/* ---------------------------------------------------------------------- */
13110 +
13111 +int aufs_unlink(struct inode *dir, struct dentry *dentry)
13112 +{
13113 +       int err;
13114 +       aufs_bindex_t bwh, bindex, bstart;
13115 +       struct au_dtime dt;
13116 +       struct au_pin pin;
13117 +       struct path h_path;
13118 +       struct inode *inode, *h_dir;
13119 +       struct dentry *parent, *wh_dentry;
13120 +
13121 +       IMustLock(dir);
13122 +       inode = dentry->d_inode;
13123 +       if (unlikely(!inode))
13124 +               return -ENOENT; /* possible? */
13125 +       IMustLock(inode);
13126 +
13127 +       aufs_read_lock(dentry, AuLock_DW);
13128 +       parent = dentry->d_parent; /* dir inode is locked */
13129 +       di_write_lock_parent(parent);
13130 +
13131 +       bstart = au_dbstart(dentry);
13132 +       bwh = au_dbwh(dentry);
13133 +       bindex = -1;
13134 +       wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
13135 +       err = PTR_ERR(wh_dentry);
13136 +       if (IS_ERR(wh_dentry))
13137 +               goto out;
13138 +
13139 +       h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
13140 +       h_path.dentry = au_h_dptr(dentry, bstart);
13141 +       dget(h_path.dentry);
13142 +       if (bindex == bstart) {
13143 +               h_dir = au_pinned_h_dir(&pin);
13144 +               err = vfsub_unlink(h_dir, &h_path, /*force*/0);
13145 +       } else {
13146 +               /* dir inode is locked */
13147 +               h_dir = wh_dentry->d_parent->d_inode;
13148 +               IMustLock(h_dir);
13149 +               err = 0;
13150 +       }
13151 +
13152 +       if (!err) {
13153 +               drop_nlink(inode);
13154 +               epilog(dir, dentry, bindex);
13155 +
13156 +               /* update target timestamps */
13157 +               if (bindex == bstart) {
13158 +                       vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
13159 +                       inode->i_ctime = h_path.dentry->d_inode->i_ctime;
13160 +               } else
13161 +                       /* todo: this timestamp may be reverted later */
13162 +                       inode->i_ctime = h_dir->i_ctime;
13163 +               goto out_unlock; /* success */
13164 +       }
13165 +
13166 +       /* revert */
13167 +       if (wh_dentry) {
13168 +               int rerr;
13169 +
13170 +               rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt);
13171 +               if (rerr)
13172 +                       err = rerr;
13173 +       }
13174 +
13175 + out_unlock:
13176 +       au_unpin(&pin);
13177 +       dput(wh_dentry);
13178 +       dput(h_path.dentry);
13179 + out:
13180 +       di_write_unlock(parent);
13181 +       aufs_read_unlock(dentry, AuLock_DW);
13182 +       return err;
13183 +}
13184 +
13185 +int aufs_rmdir(struct inode *dir, struct dentry *dentry)
13186 +{
13187 +       int err, rmdir_later;
13188 +       aufs_bindex_t bwh, bindex, bstart;
13189 +       struct au_dtime dt;
13190 +       struct au_pin pin;
13191 +       struct inode *inode;
13192 +       struct dentry *parent, *wh_dentry, *h_dentry;
13193 +       struct au_whtmp_rmdir *args;
13194 +
13195 +       IMustLock(dir);
13196 +       inode = dentry->d_inode;
13197 +       err = -ENOENT; /* possible? */
13198 +       if (unlikely(!inode))
13199 +               goto out;
13200 +       IMustLock(inode);
13201 +
13202 +       aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH);
13203 +       err = -ENOMEM;
13204 +       args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
13205 +       if (unlikely(!args))
13206 +               goto out_unlock;
13207 +
13208 +       parent = dentry->d_parent; /* dir inode is locked */
13209 +       di_write_lock_parent(parent);
13210 +       err = au_test_empty(dentry, &args->whlist);
13211 +       if (unlikely(err))
13212 +               goto out_args;
13213 +
13214 +       bstart = au_dbstart(dentry);
13215 +       bwh = au_dbwh(dentry);
13216 +       bindex = -1;
13217 +       wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
13218 +       err = PTR_ERR(wh_dentry);
13219 +       if (IS_ERR(wh_dentry))
13220 +               goto out_args;
13221 +
13222 +       h_dentry = au_h_dptr(dentry, bstart);
13223 +       dget(h_dentry);
13224 +       rmdir_later = 0;
13225 +       if (bindex == bstart) {
13226 +               err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
13227 +               if (err > 0) {
13228 +                       rmdir_later = err;
13229 +                       err = 0;
13230 +               }
13231 +       } else {
13232 +               /* stop monitoring */
13233 +               au_hin_free(au_hi(inode, bstart));
13234 +
13235 +               /* dir inode is locked */
13236 +               IMustLock(wh_dentry->d_parent->d_inode);
13237 +               err = 0;
13238 +       }
13239 +
13240 +       if (!err) {
13241 +               clear_nlink(inode);
13242 +               au_set_dbdiropq(dentry, -1);
13243 +               epilog(dir, dentry, bindex);
13244 +
13245 +               if (rmdir_later) {
13246 +                       au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
13247 +                       args = NULL;
13248 +               }
13249 +
13250 +               goto out_unpin; /* success */
13251 +       }
13252 +
13253 +       /* revert */
13254 +       AuLabel(revert);
13255 +       if (wh_dentry) {
13256 +               int rerr;
13257 +
13258 +               rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt);
13259 +               if (rerr)
13260 +                       err = rerr;
13261 +       }
13262 +
13263 + out_unpin:
13264 +       au_unpin(&pin);
13265 +       dput(wh_dentry);
13266 +       dput(h_dentry);
13267 + out_args:
13268 +       di_write_unlock(parent);
13269 +       if (args)
13270 +               au_whtmp_rmdir_free(args);
13271 + out_unlock:
13272 +       aufs_read_unlock(dentry, AuLock_DW);
13273 + out:
13274 +       return err;
13275 +}
13276 diff -uprN -x .git linux-2.6.31/fs/aufs/i_op_ren.c aufs2-2.6.git/fs/aufs/i_op_ren.c
13277 --- linux-2.6.31/fs/aufs/i_op_ren.c     1970-01-01 00:00:00.000000000 +0000
13278 +++ aufs2-2.6.git/fs/aufs/i_op_ren.c    2009-09-21 21:49:23.404940801 +0000
13279 @@ -0,0 +1,957 @@
13280 +/*
13281 + * Copyright (C) 2005-2009 Junjiro R. Okajima
13282 + *
13283 + * This program, aufs is free software; you can redistribute it and/or modify
13284 + * it under the terms of the GNU General Public License as published by
13285 + * the Free Software Foundation; either version 2 of the License, or
13286 + * (at your option) any later version.
13287 + *
13288 + * This program is distributed in the hope that it will be useful,
13289 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
13290 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13291 + * GNU General Public License for more details.
13292 + *
13293 + * You should have received a copy of the GNU General Public License
13294 + * along with this program; if not, write to the Free Software
13295 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
13296 + */
13297 +
13298 +/*
13299 + * inode operation (rename entry)
13300 + * todo: this is crazy monster
13301 + */
13302 +
13303 +#include "aufs.h"
13304 +
13305 +enum { AuSRC, AuDST, AuSrcDst };
13306 +enum { AuPARENT, AuCHILD, AuParentChild };
13307 +
13308 +#define AuRen_ISDIR    1
13309 +#define AuRen_ISSAMEDIR        (1 << 1)
13310 +#define AuRen_WHSRC    (1 << 2)
13311 +#define AuRen_WHDST    (1 << 3)
13312 +#define AuRen_MNT_WRITE        (1 << 4)
13313 +#define AuRen_DT_DSTDIR        (1 << 5)
13314 +#define AuRen_DIROPQ   (1 << 6)
13315 +#define AuRen_CPUP     (1 << 7)
13316 +#define au_ftest_ren(flags, name)      ((flags) & AuRen_##name)
13317 +#define au_fset_ren(flags, name)       { (flags) |= AuRen_##name; }
13318 +#define au_fclr_ren(flags, name)       { (flags) &= ~AuRen_##name; }
13319 +
13320 +struct au_ren_args {
13321 +       struct {
13322 +               struct dentry *dentry, *h_dentry, *parent, *h_parent,
13323 +                       *wh_dentry;
13324 +               struct inode *dir, *inode;
13325 +               struct au_hinode *hdir;
13326 +               struct au_dtime dt[AuParentChild];
13327 +               aufs_bindex_t bstart;
13328 +       } sd[AuSrcDst];
13329 +
13330 +#define src_dentry     sd[AuSRC].dentry
13331 +#define src_dir                sd[AuSRC].dir
13332 +#define src_inode      sd[AuSRC].inode
13333 +#define src_h_dentry   sd[AuSRC].h_dentry
13334 +#define src_parent     sd[AuSRC].parent
13335 +#define src_h_parent   sd[AuSRC].h_parent
13336 +#define src_wh_dentry  sd[AuSRC].wh_dentry
13337 +#define src_hdir       sd[AuSRC].hdir
13338 +#define src_h_dir      sd[AuSRC].hdir->hi_inode
13339 +#define src_dt         sd[AuSRC].dt
13340 +#define src_bstart     sd[AuSRC].bstart
13341 +
13342 +#define dst_dentry     sd[AuDST].dentry
13343 +#define dst_dir                sd[AuDST].dir
13344 +#define dst_inode      sd[AuDST].inode
13345 +#define dst_h_dentry   sd[AuDST].h_dentry
13346 +#define dst_parent     sd[AuDST].parent
13347 +#define dst_h_parent   sd[AuDST].h_parent
13348 +#define dst_wh_dentry  sd[AuDST].wh_dentry
13349 +#define dst_hdir       sd[AuDST].hdir
13350 +#define dst_h_dir      sd[AuDST].hdir->hi_inode
13351 +#define dst_dt         sd[AuDST].dt
13352 +#define dst_bstart     sd[AuDST].bstart
13353 +
13354 +       struct dentry *h_trap;
13355 +       struct au_branch *br;
13356 +       struct au_hinode *src_hinode;
13357 +       struct path h_path;
13358 +       struct au_nhash whlist;
13359 +       aufs_bindex_t btgt;
13360 +
13361 +       unsigned int flags;
13362 +
13363 +       struct au_whtmp_rmdir *thargs;
13364 +       struct dentry *h_dst;
13365 +};
13366 +
13367 +/* ---------------------------------------------------------------------- */
13368 +
13369 +/*
13370 + * functions for reverting.
13371 + * when an error happened in a single rename systemcall, we should revert
13372 + * everything as if nothing happend.
13373 + * we don't need to revert the copied-up/down the parent dir since they are
13374 + * harmless.
13375 + */
13376 +
13377 +#define RevertFailure(fmt, args...) do { \
13378 +       AuIOErr("revert failure: " fmt " (%d, %d)\n", \
13379 +               ##args, err, rerr); \
13380 +       err = -EIO; \
13381 +} while (0)
13382 +
13383 +static void au_ren_rev_diropq(int err, struct au_ren_args *a)
13384 +{
13385 +       int rerr;
13386 +
13387 +       au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
13388 +       rerr = au_diropq_remove(a->src_dentry, a->btgt);
13389 +       au_hin_imtx_unlock(a->src_hinode);
13390 +       if (rerr)
13391 +               RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
13392 +}
13393 +
13394 +
13395 +static void au_ren_rev_rename(int err, struct au_ren_args *a)
13396 +{
13397 +       int rerr;
13398 +
13399 +       a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
13400 +                                      a->br, /*nd*/NULL);
13401 +       rerr = PTR_ERR(a->h_path.dentry);
13402 +       if (IS_ERR(a->h_path.dentry)) {
13403 +               RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
13404 +               return;
13405 +       }
13406 +
13407 +       rerr = vfsub_rename(a->dst_h_dir,
13408 +                           au_h_dptr(a->src_dentry, a->btgt),
13409 +                           a->src_h_dir, &a->h_path);
13410 +       d_drop(a->h_path.dentry);
13411 +       dput(a->h_path.dentry);
13412 +       /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
13413 +       if (rerr)
13414 +               RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
13415 +}
13416 +
13417 +static void au_ren_rev_cpup(int err, struct au_ren_args *a)
13418 +{
13419 +       int rerr;
13420 +
13421 +       a->h_path.dentry = a->dst_h_dentry;
13422 +       rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
13423 +       au_set_h_dptr(a->src_dentry, a->btgt, NULL);
13424 +       au_set_dbstart(a->src_dentry, a->src_bstart);
13425 +       if (rerr)
13426 +               RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
13427 +}
13428 +
13429 +
13430 +static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
13431 +{
13432 +       int rerr;
13433 +
13434 +       a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
13435 +                                      a->br, /*nd*/NULL);
13436 +       rerr = PTR_ERR(a->h_path.dentry);
13437 +       if (IS_ERR(a->h_path.dentry)) {
13438 +               RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
13439 +               return;
13440 +       }
13441 +       if (a->h_path.dentry->d_inode) {
13442 +               d_drop(a->h_path.dentry);
13443 +               dput(a->h_path.dentry);
13444 +               return;
13445 +       }
13446 +
13447 +       rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
13448 +       d_drop(a->h_path.dentry);
13449 +       dput(a->h_path.dentry);
13450 +       if (!rerr) {
13451 +               au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
13452 +               au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
13453 +       } else
13454 +               RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
13455 +}
13456 +
13457 +static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
13458 +{
13459 +       int rerr;
13460 +
13461 +       a->h_path.dentry = a->src_wh_dentry;
13462 +       rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
13463 +       if (rerr)
13464 +               RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
13465 +}
13466 +
13467 +static void au_ren_rev_drop(struct au_ren_args *a)
13468 +{
13469 +       struct dentry *d, *h_d;
13470 +       int i;
13471 +       aufs_bindex_t bend, bindex;
13472 +
13473 +       for (i = 0; i < AuSrcDst; i++) {
13474 +               d = a->sd[i].dentry;
13475 +               d_drop(d);
13476 +               bend = au_dbend(d);
13477 +               for (bindex = au_dbstart(d); bindex <= bend; bindex++) {
13478 +                       h_d = au_h_dptr(d, bindex);
13479 +                       if (h_d)
13480 +                               d_drop(h_d);
13481 +               }
13482 +       }
13483 +
13484 +       au_update_dbstart(a->dst_dentry);
13485 +       if (a->thargs)
13486 +               d_drop(a->h_dst);
13487 +}
13488 +#undef RevertFailure
13489 +
13490 +/* ---------------------------------------------------------------------- */
13491 +
13492 +/*
13493 + * when we have to copyup the renaming entry, do it with the rename-target name
13494 + * in order to minimize the cost (the later actual rename is unnecessary).
13495 + * otherwise rename it on the target branch.
13496 + */
13497 +static int au_ren_or_cpup(struct au_ren_args *a)
13498 +{
13499 +       int err;
13500 +       struct dentry *d;
13501 +
13502 +       d = a->src_dentry;
13503 +       if (au_dbstart(d) == a->btgt) {
13504 +               a->h_path.dentry = a->dst_h_dentry;
13505 +               if (au_ftest_ren(a->flags, DIROPQ)
13506 +                   && au_dbdiropq(d) == a->btgt)
13507 +                       au_fclr_ren(a->flags, DIROPQ);
13508 +               AuDebugOn(au_dbstart(d) != a->btgt);
13509 +               err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
13510 +                                  a->dst_h_dir, &a->h_path);
13511 +       } else {
13512 +               struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
13513 +
13514 +               au_fset_ren(a->flags, CPUP);
13515 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
13516 +               au_set_dbstart(d, a->btgt);
13517 +               au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
13518 +               err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
13519 +                                        !AuCpup_DTIME, a->dst_parent);
13520 +               if (unlikely(err)) {
13521 +                       au_set_h_dptr(d, a->btgt, NULL);
13522 +                       au_set_dbstart(d, a->src_bstart);
13523 +               }
13524 +               mutex_unlock(h_mtx);
13525 +       }
13526 +
13527 +       return err;
13528 +}
13529 +
13530 +/* cf. aufs_rmdir() */
13531 +static int au_ren_del_whtmp(struct au_ren_args *a)
13532 +{
13533 +       int err;
13534 +       struct inode *dir;
13535 +
13536 +       dir = a->dst_dir;
13537 +       SiMustAnyLock(dir->i_sb);
13538 +       if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
13539 +                                    au_sbi(dir->i_sb)->si_dirwh)
13540 +           || au_test_fs_remote(a->h_dst->d_sb)) {
13541 +               err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
13542 +               if (unlikely(err))
13543 +                       AuWarn("failed removing whtmp dir %.*s (%d), "
13544 +                              "ignored.\n", AuDLNPair(a->h_dst), err);
13545 +       } else {
13546 +               au_nhash_wh_free(&a->thargs->whlist);
13547 +               a->thargs->whlist = a->whlist;
13548 +               a->whlist.nh_num = 0;
13549 +               au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
13550 +               dput(a->h_dst);
13551 +               a->thargs = NULL;
13552 +       }
13553 +
13554 +       return 0;
13555 +}
13556 +
13557 +/* make it 'opaque' dir. */
13558 +static int au_ren_diropq(struct au_ren_args *a)
13559 +{
13560 +       int err;
13561 +       struct dentry *diropq;
13562 +
13563 +       err = 0;
13564 +       a->src_hinode = au_hi(a->src_inode, a->btgt);
13565 +       au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
13566 +       diropq = au_diropq_create(a->src_dentry, a->btgt);
13567 +       au_hin_imtx_unlock(a->src_hinode);
13568 +       if (IS_ERR(diropq))
13569 +               err = PTR_ERR(diropq);
13570 +       dput(diropq);
13571 +
13572 +       return err;
13573 +}
13574 +
13575 +static int do_rename(struct au_ren_args *a)
13576 +{
13577 +       int err;
13578 +       struct dentry *d, *h_d;
13579 +
13580 +       /* prepare workqueue args for asynchronous rmdir */
13581 +       h_d = a->dst_h_dentry;
13582 +       if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
13583 +               err = -ENOMEM;
13584 +               a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
13585 +               if (unlikely(!a->thargs))
13586 +                       goto out;
13587 +               a->h_dst = dget(h_d);
13588 +       }
13589 +
13590 +       /* create whiteout for src_dentry */
13591 +       if (au_ftest_ren(a->flags, WHSRC)) {
13592 +               a->src_wh_dentry
13593 +                       = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
13594 +               err = PTR_ERR(a->src_wh_dentry);
13595 +               if (IS_ERR(a->src_wh_dentry))
13596 +                       goto out_thargs;
13597 +       }
13598 +
13599 +       /* lookup whiteout for dentry */
13600 +       if (au_ftest_ren(a->flags, WHDST)) {
13601 +               h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
13602 +                                a->br);
13603 +               err = PTR_ERR(h_d);
13604 +               if (IS_ERR(h_d))
13605 +                       goto out_whsrc;
13606 +               if (!h_d->d_inode)
13607 +                       dput(h_d);
13608 +               else
13609 +                       a->dst_wh_dentry = h_d;
13610 +       }
13611 +
13612 +       /* rename dentry to tmpwh */
13613 +       if (a->thargs) {
13614 +               err = au_whtmp_ren(a->dst_h_dentry, a->br);
13615 +               if (unlikely(err))
13616 +                       goto out_whdst;
13617 +
13618 +               d = a->dst_dentry;
13619 +               au_set_h_dptr(d, a->btgt, NULL);
13620 +               err = au_lkup_neg(d, a->btgt);
13621 +               if (unlikely(err))
13622 +                       goto out_whtmp;
13623 +               a->dst_h_dentry = au_h_dptr(d, a->btgt);
13624 +       }
13625 +
13626 +       /* cpup src */
13627 +       if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
13628 +               struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
13629 +
13630 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
13631 +               err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
13632 +                                        !AuCpup_DTIME);
13633 +               mutex_unlock(h_mtx);
13634 +               if (unlikely(err))
13635 +                       goto out_whtmp;
13636 +       }
13637 +
13638 +       /* rename by vfs_rename or cpup */
13639 +       d = a->dst_dentry;
13640 +       if (au_ftest_ren(a->flags, ISDIR)
13641 +           && (a->dst_wh_dentry
13642 +               || au_dbdiropq(d) == a->btgt
13643 +               /* hide the lower to keep xino */
13644 +               || a->btgt < au_dbend(d)
13645 +               || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
13646 +               au_fset_ren(a->flags, DIROPQ);
13647 +       err = au_ren_or_cpup(a);
13648 +       if (unlikely(err))
13649 +               /* leave the copied-up one */
13650 +               goto out_whtmp;
13651 +
13652 +       /* make dir opaque */
13653 +       if (au_ftest_ren(a->flags, DIROPQ)) {
13654 +               err = au_ren_diropq(a);
13655 +               if (unlikely(err))
13656 +                       goto out_rename;
13657 +       }
13658 +
13659 +       /* update target timestamps */
13660 +       AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
13661 +       a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
13662 +       vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
13663 +       a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
13664 +
13665 +       /* remove whiteout for dentry */
13666 +       if (a->dst_wh_dentry) {
13667 +               a->h_path.dentry = a->dst_wh_dentry;
13668 +               err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
13669 +                                         a->dst_dentry);
13670 +               if (unlikely(err))
13671 +                       goto out_diropq;
13672 +       }
13673 +
13674 +       /* remove whtmp */
13675 +       if (a->thargs)
13676 +               au_ren_del_whtmp(a); /* ignore this error */
13677 +
13678 +       err = 0;
13679 +       goto out_success;
13680 +
13681 + out_diropq:
13682 +       if (au_ftest_ren(a->flags, DIROPQ))
13683 +               au_ren_rev_diropq(err, a);
13684 + out_rename:
13685 +       if (!au_ftest_ren(a->flags, CPUP))
13686 +               au_ren_rev_rename(err, a);
13687 +       else
13688 +               au_ren_rev_cpup(err, a);
13689 + out_whtmp:
13690 +       if (a->thargs)
13691 +               au_ren_rev_whtmp(err, a);
13692 + out_whdst:
13693 +       dput(a->dst_wh_dentry);
13694 +       a->dst_wh_dentry = NULL;
13695 + out_whsrc:
13696 +       if (a->src_wh_dentry)
13697 +               au_ren_rev_whsrc(err, a);
13698 +       au_ren_rev_drop(a);
13699 + out_success:
13700 +       dput(a->src_wh_dentry);
13701 +       dput(a->dst_wh_dentry);
13702 + out_thargs:
13703 +       if (a->thargs) {
13704 +               dput(a->h_dst);
13705 +               au_whtmp_rmdir_free(a->thargs);
13706 +               a->thargs = NULL;
13707 +       }
13708 + out:
13709 +       return err;
13710 +}
13711 +
13712 +/* ---------------------------------------------------------------------- */
13713 +
13714 +/*
13715 + * test if @dentry dir can be rename destination or not.
13716 + * success means, it is a logically empty dir.
13717 + */
13718 +static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
13719 +{
13720 +       return au_test_empty(dentry, whlist);
13721 +}
13722 +
13723 +/*
13724 + * test if @dentry dir can be rename source or not.
13725 + * if it can, return 0 and @children is filled.
13726 + * success means,
13727 + * - it is a logically empty dir.
13728 + * - or, it exists on writable branch and has no children including whiteouts
13729 + *       on the lower branch.
13730 + */
13731 +static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
13732 +{
13733 +       int err;
13734 +       unsigned int rdhash;
13735 +       aufs_bindex_t bstart;
13736 +
13737 +       bstart = au_dbstart(dentry);
13738 +       if (bstart != btgt) {
13739 +               struct au_nhash whlist;
13740 +
13741 +               SiMustAnyLock(dentry->d_sb);
13742 +               rdhash = au_sbi(dentry->d_sb)->si_rdhash;
13743 +               if (!rdhash)
13744 +                       rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
13745 +                                                          dentry));
13746 +               err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
13747 +               if (unlikely(err))
13748 +                       goto out;
13749 +               err = au_test_empty(dentry, &whlist);
13750 +               au_nhash_wh_free(&whlist);
13751 +               goto out;
13752 +       }
13753 +
13754 +       if (bstart == au_dbtaildir(dentry))
13755 +               return 0; /* success */
13756 +
13757 +       err = au_test_empty_lower(dentry);
13758 +
13759 + out:
13760 +       if (err == -ENOTEMPTY) {
13761 +               AuWarn1("renaming dir who has child(ren) on multiple branches,"
13762 +                       " is not supported\n");
13763 +               err = -EXDEV;
13764 +       }
13765 +       return err;
13766 +}
13767 +
13768 +/* side effect: sets whlist and h_dentry */
13769 +static int au_ren_may_dir(struct au_ren_args *a)
13770 +{
13771 +       int err;
13772 +       unsigned int rdhash;
13773 +       struct dentry *d;
13774 +
13775 +       d = a->dst_dentry;
13776 +       SiMustAnyLock(d->d_sb);
13777 +
13778 +       err = 0;
13779 +       if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
13780 +               rdhash = au_sbi(d->d_sb)->si_rdhash;
13781 +               if (!rdhash)
13782 +                       rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
13783 +               err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
13784 +               if (unlikely(err))
13785 +                       goto out;
13786 +
13787 +               au_set_dbstart(d, a->dst_bstart);
13788 +               err = may_rename_dstdir(d, &a->whlist);
13789 +               au_set_dbstart(d, a->btgt);
13790 +       }
13791 +       a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
13792 +       if (unlikely(err))
13793 +               goto out;
13794 +
13795 +       d = a->src_dentry;
13796 +       a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
13797 +       if (au_ftest_ren(a->flags, ISDIR)) {
13798 +               err = may_rename_srcdir(d, a->btgt);
13799 +               if (unlikely(err)) {
13800 +                       au_nhash_wh_free(&a->whlist);
13801 +                       a->whlist.nh_num = 0;
13802 +               }
13803 +       }
13804 + out:
13805 +       return err;
13806 +}
13807 +
13808 +/* ---------------------------------------------------------------------- */
13809 +
13810 +/*
13811 + * simple tests for rename.
13812 + * following the checks in vfs, plus the parent-child relationship.
13813 + */
13814 +static int au_may_ren(struct au_ren_args *a)
13815 +{
13816 +       int err, isdir;
13817 +       struct inode *h_inode;
13818 +
13819 +       if (a->src_bstart == a->btgt) {
13820 +               err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
13821 +                                au_ftest_ren(a->flags, ISDIR));
13822 +               if (unlikely(err))
13823 +                       goto out;
13824 +               err = -EINVAL;
13825 +               if (unlikely(a->src_h_dentry == a->h_trap))
13826 +                       goto out;
13827 +       }
13828 +
13829 +       err = 0;
13830 +       if (a->dst_bstart != a->btgt)
13831 +               goto out;
13832 +
13833 +       err = -EIO;
13834 +       h_inode = a->dst_h_dentry->d_inode;
13835 +       isdir = !!au_ftest_ren(a->flags, ISDIR);
13836 +       if (!a->dst_dentry->d_inode) {
13837 +               if (unlikely(h_inode))
13838 +                       goto out;
13839 +               err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
13840 +                                isdir);
13841 +       } else {
13842 +               if (unlikely(!h_inode || !h_inode->i_nlink))
13843 +                       goto out;
13844 +               err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
13845 +                                isdir);
13846 +               if (unlikely(err))
13847 +                       goto out;
13848 +               err = -ENOTEMPTY;
13849 +               if (unlikely(a->dst_h_dentry == a->h_trap))
13850 +                       goto out;
13851 +               err = 0;
13852 +       }
13853 +
13854 + out:
13855 +       if (unlikely(err == -ENOENT || err == -EEXIST))
13856 +               err = -EIO;
13857 +       return err;
13858 +}
13859 +
13860 +/* ---------------------------------------------------------------------- */
13861 +
13862 +/*
13863 + * locking order
13864 + * (VFS)
13865 + * - src_dir and dir by lock_rename()
13866 + * - inode if exitsts
13867 + * (aufs)
13868 + * - lock all
13869 + *   + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
13870 + *     + si_read_lock
13871 + *     + di_write_lock2_child()
13872 + *       + di_write_lock_child()
13873 + *        + ii_write_lock_child()
13874 + *       + di_write_lock_child2()
13875 + *        + ii_write_lock_child2()
13876 + *     + src_parent and parent
13877 + *       + di_write_lock_parent()
13878 + *        + ii_write_lock_parent()
13879 + *       + di_write_lock_parent2()
13880 + *        + ii_write_lock_parent2()
13881 + *   + lower src_dir and dir by vfsub_lock_rename()
13882 + *   + verify the every relationships between child and parent. if any
13883 + *     of them failed, unlock all and return -EBUSY.
13884 + */
13885 +static void au_ren_unlock(struct au_ren_args *a)
13886 +{
13887 +       struct super_block *sb;
13888 +
13889 +       sb = a->dst_dentry->d_sb;
13890 +       if (au_ftest_ren(a->flags, MNT_WRITE))
13891 +               mnt_drop_write(a->br->br_mnt);
13892 +       vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
13893 +                           a->dst_h_parent, a->dst_hdir);
13894 +}
13895 +
13896 +static int au_ren_lock(struct au_ren_args *a)
13897 +{
13898 +       int err;
13899 +       unsigned int udba;
13900 +
13901 +       err = 0;
13902 +       a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
13903 +       a->src_hdir = au_hi(a->src_dir, a->btgt);
13904 +       a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
13905 +       a->dst_hdir = au_hi(a->dst_dir, a->btgt);
13906 +       a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
13907 +                                     a->dst_h_parent, a->dst_hdir);
13908 +       udba = au_opt_udba(a->src_dentry->d_sb);
13909 +       if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
13910 +                    || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
13911 +               err = au_busy_or_stale();
13912 +       if (!err && au_dbstart(a->src_dentry) == a->btgt)
13913 +               err = au_h_verify(a->src_h_dentry, udba,
13914 +                                 a->src_h_parent->d_inode, a->src_h_parent,
13915 +                                 a->br);
13916 +       if (!err && au_dbstart(a->dst_dentry) == a->btgt)
13917 +               err = au_h_verify(a->dst_h_dentry, udba,
13918 +                                 a->dst_h_parent->d_inode, a->dst_h_parent,
13919 +                                 a->br);
13920 +       if (!err) {
13921 +               err = mnt_want_write(a->br->br_mnt);
13922 +               if (unlikely(err))
13923 +                       goto out_unlock;
13924 +               au_fset_ren(a->flags, MNT_WRITE);
13925 +               goto out; /* success */
13926 +       }
13927 +
13928 +       err = au_busy_or_stale();
13929 +
13930 + out_unlock:
13931 +       au_ren_unlock(a);
13932 + out:
13933 +       return err;
13934 +}
13935 +
13936 +/* ---------------------------------------------------------------------- */
13937 +
13938 +static void au_ren_refresh_dir(struct au_ren_args *a)
13939 +{
13940 +       struct inode *dir;
13941 +
13942 +       dir = a->dst_dir;
13943 +       dir->i_version++;
13944 +       if (au_ftest_ren(a->flags, ISDIR)) {
13945 +               /* is this updating defined in POSIX? */
13946 +               au_cpup_attr_timesizes(a->src_inode);
13947 +               au_cpup_attr_nlink(dir, /*force*/1);
13948 +               if (a->dst_inode) {
13949 +                       clear_nlink(a->dst_inode);
13950 +                       au_cpup_attr_timesizes(a->dst_inode);
13951 +               }
13952 +       }
13953 +       if (au_ibstart(dir) == a->btgt)
13954 +               au_cpup_attr_timesizes(dir);
13955 +
13956 +       if (au_ftest_ren(a->flags, ISSAMEDIR))
13957 +               return;
13958 +
13959 +       dir = a->src_dir;
13960 +       dir->i_version++;
13961 +       if (au_ftest_ren(a->flags, ISDIR))
13962 +               au_cpup_attr_nlink(dir, /*force*/1);
13963 +       if (au_ibstart(dir) == a->btgt)
13964 +               au_cpup_attr_timesizes(dir);
13965 +}
13966 +
13967 +static void au_ren_refresh(struct au_ren_args *a)
13968 +{
13969 +       aufs_bindex_t bend, bindex;
13970 +       struct dentry *d, *h_d;
13971 +       struct inode *i, *h_i;
13972 +       struct super_block *sb;
13973 +
13974 +       d = a->src_dentry;
13975 +       au_set_dbwh(d, -1);
13976 +       bend = au_dbend(d);
13977 +       for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
13978 +               h_d = au_h_dptr(d, bindex);
13979 +               if (h_d)
13980 +                       au_set_h_dptr(d, bindex, NULL);
13981 +       }
13982 +       au_set_dbend(d, a->btgt);
13983 +
13984 +       sb = d->d_sb;
13985 +       i = a->src_inode;
13986 +       if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
13987 +               return; /* success */
13988 +
13989 +       bend = au_ibend(i);
13990 +       for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
13991 +               h_i = au_h_iptr(i, bindex);
13992 +               if (h_i) {
13993 +                       au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
13994 +                       /* ignore this error */
13995 +                       au_set_h_iptr(i, bindex, NULL, 0);
13996 +               }
13997 +       }
13998 +       au_set_ibend(i, a->btgt);
13999 +}
14000 +
14001 +/* ---------------------------------------------------------------------- */
14002 +
14003 +/* mainly for link(2) and rename(2) */
14004 +int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
14005 +{
14006 +       aufs_bindex_t bdiropq, bwh;
14007 +       struct dentry *parent;
14008 +       struct au_branch *br;
14009 +
14010 +       parent = dentry->d_parent;
14011 +       IMustLock(parent->d_inode); /* dir is locked */
14012 +
14013 +       bdiropq = au_dbdiropq(parent);
14014 +       bwh = au_dbwh(dentry);
14015 +       br = au_sbr(dentry->d_sb, btgt);
14016 +       if (au_br_rdonly(br)
14017 +           || (0 <= bdiropq && bdiropq < btgt)
14018 +           || (0 <= bwh && bwh < btgt))
14019 +               btgt = -1;
14020 +
14021 +       AuDbg("btgt %d\n", btgt);
14022 +       return btgt;
14023 +}
14024 +
14025 +/* sets src_bstart, dst_bstart and btgt */
14026 +static int au_ren_wbr(struct au_ren_args *a)
14027 +{
14028 +       int err;
14029 +       struct au_wr_dir_args wr_dir_args = {
14030 +               /* .force_btgt  = -1, */
14031 +               .flags          = AuWrDir_ADD_ENTRY
14032 +       };
14033 +
14034 +       a->src_bstart = au_dbstart(a->src_dentry);
14035 +       a->dst_bstart = au_dbstart(a->dst_dentry);
14036 +       if (au_ftest_ren(a->flags, ISDIR))
14037 +               au_fset_wrdir(wr_dir_args.flags, ISDIR);
14038 +       wr_dir_args.force_btgt = a->src_bstart;
14039 +       if (a->dst_inode && a->dst_bstart < a->src_bstart)
14040 +               wr_dir_args.force_btgt = a->dst_bstart;
14041 +       wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
14042 +       err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
14043 +       a->btgt = err;
14044 +
14045 +       return err;
14046 +}
14047 +
14048 +static void au_ren_dt(struct au_ren_args *a)
14049 +{
14050 +       a->h_path.dentry = a->src_h_parent;
14051 +       au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
14052 +       if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
14053 +               a->h_path.dentry = a->dst_h_parent;
14054 +               au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
14055 +       }
14056 +
14057 +       au_fclr_ren(a->flags, DT_DSTDIR);
14058 +       if (!au_ftest_ren(a->flags, ISDIR))
14059 +               return;
14060 +
14061 +       a->h_path.dentry = a->src_h_dentry;
14062 +       au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
14063 +       if (a->dst_h_dentry->d_inode) {
14064 +               au_fset_ren(a->flags, DT_DSTDIR);
14065 +               a->h_path.dentry = a->dst_h_dentry;
14066 +               au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
14067 +       }
14068 +}
14069 +
14070 +static void au_ren_rev_dt(int err, struct au_ren_args *a)
14071 +{
14072 +       struct dentry *h_d;
14073 +       struct mutex *h_mtx;
14074 +
14075 +       au_dtime_revert(a->src_dt + AuPARENT);
14076 +       if (!au_ftest_ren(a->flags, ISSAMEDIR))
14077 +               au_dtime_revert(a->dst_dt + AuPARENT);
14078 +
14079 +       if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
14080 +               h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
14081 +               h_mtx = &h_d->d_inode->i_mutex;
14082 +               mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
14083 +               au_dtime_revert(a->src_dt + AuCHILD);
14084 +               mutex_unlock(h_mtx);
14085 +
14086 +               if (au_ftest_ren(a->flags, DT_DSTDIR)) {
14087 +                       h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
14088 +                       h_mtx = &h_d->d_inode->i_mutex;
14089 +                       mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
14090 +                       au_dtime_revert(a->dst_dt + AuCHILD);
14091 +                       mutex_unlock(h_mtx);
14092 +               }
14093 +       }
14094 +}
14095 +
14096 +/* ---------------------------------------------------------------------- */
14097 +
14098 +int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
14099 +               struct inode *_dst_dir, struct dentry *_dst_dentry)
14100 +{
14101 +       int err;
14102 +       /* reduce stack space */
14103 +       struct au_ren_args *a;
14104 +
14105 +       IMustLock(_src_dir);
14106 +       IMustLock(_dst_dir);
14107 +
14108 +       err = -ENOMEM;
14109 +       BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
14110 +       a = kzalloc(sizeof(*a), GFP_NOFS);
14111 +       if (unlikely(!a))
14112 +               goto out;
14113 +
14114 +       a->src_dir = _src_dir;
14115 +       a->src_dentry = _src_dentry;
14116 +       a->src_inode = a->src_dentry->d_inode;
14117 +       a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
14118 +       a->dst_dir = _dst_dir;
14119 +       a->dst_dentry = _dst_dentry;
14120 +       a->dst_inode = a->dst_dentry->d_inode;
14121 +       a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
14122 +       if (a->dst_inode) {
14123 +               IMustLock(a->dst_inode);
14124 +               au_igrab(a->dst_inode);
14125 +       }
14126 +
14127 +       err = -ENOTDIR;
14128 +       if (S_ISDIR(a->src_inode->i_mode)) {
14129 +               au_fset_ren(a->flags, ISDIR);
14130 +               if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
14131 +                       goto out_free;
14132 +               aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
14133 +                                         AuLock_DIR | AuLock_FLUSH);
14134 +       } else
14135 +               aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
14136 +                                         AuLock_FLUSH);
14137 +
14138 +       au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
14139 +       di_write_lock_parent(a->dst_parent);
14140 +
14141 +       /* which branch we process */
14142 +       err = au_ren_wbr(a);
14143 +       if (unlikely(err < 0))
14144 +               goto out_unlock;
14145 +       a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
14146 +       a->h_path.mnt = a->br->br_mnt;
14147 +
14148 +       /* are they available to be renamed */
14149 +       err = au_ren_may_dir(a);
14150 +       if (unlikely(err))
14151 +               goto out_children;
14152 +
14153 +       /* prepare the writable parent dir on the same branch */
14154 +       if (a->dst_bstart == a->btgt) {
14155 +               au_fset_ren(a->flags, WHDST);
14156 +       } else {
14157 +               err = au_cpup_dirs(a->dst_dentry, a->btgt);
14158 +               if (unlikely(err))
14159 +                       goto out_children;
14160 +       }
14161 +
14162 +       if (a->src_dir != a->dst_dir) {
14163 +               /*
14164 +                * this temporary unlock is safe,
14165 +                * because both dir->i_mutex are locked.
14166 +                */
14167 +               di_write_unlock(a->dst_parent);
14168 +               di_write_lock_parent(a->src_parent);
14169 +               err = au_wr_dir_need_wh(a->src_dentry,
14170 +                                       au_ftest_ren(a->flags, ISDIR),
14171 +                                       &a->btgt);
14172 +               di_write_unlock(a->src_parent);
14173 +               di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
14174 +               au_fclr_ren(a->flags, ISSAMEDIR);
14175 +       } else
14176 +               err = au_wr_dir_need_wh(a->src_dentry,
14177 +                                       au_ftest_ren(a->flags, ISDIR),
14178 +                                       &a->btgt);
14179 +       if (unlikely(err < 0))
14180 +               goto out_children;
14181 +       if (err)
14182 +               au_fset_ren(a->flags, WHSRC);
14183 +
14184 +       /* lock them all */
14185 +       err = au_ren_lock(a);
14186 +       if (unlikely(err))
14187 +               goto out_children;
14188 +
14189 +       if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE)) {
14190 +               err = au_may_ren(a);
14191 +               if (unlikely(err))
14192 +                       goto out_hdir;
14193 +       }
14194 +
14195 +       /* store timestamps to be revertible */
14196 +       au_ren_dt(a);
14197 +
14198 +       /* here we go */
14199 +       err = do_rename(a);
14200 +       if (unlikely(err))
14201 +               goto out_dt;
14202 +
14203 +       /* update dir attributes */
14204 +       au_ren_refresh_dir(a);
14205 +
14206 +       /* dput/iput all lower dentries */
14207 +       au_ren_refresh(a);
14208 +
14209 +       goto out_hdir; /* success */
14210 +
14211 + out_dt:
14212 +       au_ren_rev_dt(err, a);
14213 + out_hdir:
14214 +       au_ren_unlock(a);
14215 + out_children:
14216 +       au_nhash_wh_free(&a->whlist);
14217 + out_unlock:
14218 +       if (unlikely(err && au_ftest_ren(a->flags, ISDIR))) {
14219 +               au_update_dbstart(a->dst_dentry);
14220 +               d_drop(a->dst_dentry);
14221 +       }
14222 +       if (!err)
14223 +               d_move(a->src_dentry, a->dst_dentry);
14224 +       if (au_ftest_ren(a->flags, ISSAMEDIR))
14225 +               di_write_unlock(a->dst_parent);
14226 +       else
14227 +               di_write_unlock2(a->src_parent, a->dst_parent);
14228 +       aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
14229 + out_free:
14230 +       iput(a->dst_inode);
14231 +       if (a->thargs)
14232 +               au_whtmp_rmdir_free(a->thargs);
14233 +       kfree(a);
14234 + out:
14235 +       return err;
14236 +}
14237 diff -uprN -x .git linux-2.6.31/fs/aufs/iinfo.c aufs2-2.6.git/fs/aufs/iinfo.c
14238 --- linux-2.6.31/fs/aufs/iinfo.c        1970-01-01 00:00:00.000000000 +0000
14239 +++ aufs2-2.6.git/fs/aufs/iinfo.c       2009-09-21 21:49:23.404940801 +0000
14240 @@ -0,0 +1,283 @@
14241 +/*
14242 + * Copyright (C) 2005-2009 Junjiro R. Okajima
14243 + *
14244 + * This program, aufs is free software; you can redistribute it and/or modify
14245 + * it under the terms of the GNU General Public License as published by
14246 + * the Free Software Foundation; either version 2 of the License, or
14247 + * (at your option) any later version.
14248 + *
14249 + * This program is distributed in the hope that it will be useful,
14250 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
14251 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14252 + * GNU General Public License for more details.
14253 + *
14254 + * You should have received a copy of the GNU General Public License
14255 + * along with this program; if not, write to the Free Software
14256 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
14257 + */
14258 +
14259 +/*
14260 + * inode private data
14261 + */
14262 +
14263 +#include "aufs.h"
14264 +
14265 +struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
14266 +{
14267 +       struct inode *h_inode;
14268 +
14269 +       IiMustAnyLock(inode);
14270 +
14271 +       h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
14272 +       AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
14273 +       return h_inode;
14274 +}
14275 +
14276 +/* todo: hard/soft set? */
14277 +void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
14278 +{
14279 +       struct au_iinfo *iinfo = au_ii(inode);
14280 +       struct inode *h_inode;
14281 +
14282 +       IiMustWriteLock(inode);
14283 +
14284 +       iinfo->ii_bstart = bindex;
14285 +       h_inode = iinfo->ii_hinode[bindex + 0].hi_inode;
14286 +       if (h_inode)
14287 +               au_cpup_igen(inode, h_inode);
14288 +}
14289 +
14290 +void au_hiput(struct au_hinode *hinode)
14291 +{
14292 +       au_hin_free(hinode);
14293 +       dput(hinode->hi_whdentry);
14294 +       iput(hinode->hi_inode);
14295 +}
14296 +
14297 +unsigned int au_hi_flags(struct inode *inode, int isdir)
14298 +{
14299 +       unsigned int flags;
14300 +       const unsigned int mnt_flags = au_mntflags(inode->i_sb);
14301 +
14302 +       flags = 0;
14303 +       if (au_opt_test(mnt_flags, XINO))
14304 +               au_fset_hi(flags, XINO);
14305 +       if (isdir && au_opt_test(mnt_flags, UDBA_HINOTIFY))
14306 +               au_fset_hi(flags, HINOTIFY);
14307 +       return flags;
14308 +}
14309 +
14310 +void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
14311 +                  struct inode *h_inode, unsigned int flags)
14312 +{
14313 +       struct au_hinode *hinode;
14314 +       struct inode *hi;
14315 +       struct au_iinfo *iinfo = au_ii(inode);
14316 +
14317 +       IiMustWriteLock(inode);
14318 +
14319 +       hinode = iinfo->ii_hinode + bindex;
14320 +       hi = hinode->hi_inode;
14321 +       AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
14322 +       AuDebugOn(h_inode && hi);
14323 +
14324 +       if (hi)
14325 +               au_hiput(hinode);
14326 +       hinode->hi_inode = h_inode;
14327 +       if (h_inode) {
14328 +               int err;
14329 +               struct super_block *sb = inode->i_sb;
14330 +               struct au_branch *br;
14331 +
14332 +               if (bindex == iinfo->ii_bstart)
14333 +                       au_cpup_igen(inode, h_inode);
14334 +               br = au_sbr(sb, bindex);
14335 +               hinode->hi_id = br->br_id;
14336 +               if (au_ftest_hi(flags, XINO)) {
14337 +                       err = au_xino_write(sb, bindex, h_inode->i_ino,
14338 +                                           inode->i_ino);
14339 +                       if (unlikely(err))
14340 +                               AuIOErr1("failed au_xino_write() %d\n", err);
14341 +               }
14342 +
14343 +               if (au_ftest_hi(flags, HINOTIFY)
14344 +                   && au_br_hinotifyable(br->br_perm)) {
14345 +                       err = au_hin_alloc(hinode, inode, h_inode);
14346 +                       if (unlikely(err))
14347 +                               AuIOErr1("au_hin_alloc() %d\n", err);
14348 +               }
14349 +       }
14350 +}
14351 +
14352 +void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
14353 +                 struct dentry *h_wh)
14354 +{
14355 +       struct au_hinode *hinode;
14356 +
14357 +       IiMustWriteLock(inode);
14358 +
14359 +       hinode = au_ii(inode)->ii_hinode + bindex;
14360 +       AuDebugOn(hinode->hi_whdentry);
14361 +       hinode->hi_whdentry = h_wh;
14362 +}
14363 +
14364 +void au_update_iigen(struct inode *inode)
14365 +{
14366 +       atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
14367 +       /* smp_mb(); */ /* atomic_set */
14368 +}
14369 +
14370 +/* it may be called at remount time, too */
14371 +void au_update_brange(struct inode *inode, int do_put_zero)
14372 +{
14373 +       struct au_iinfo *iinfo;
14374 +
14375 +       iinfo = au_ii(inode);
14376 +       if (!iinfo || iinfo->ii_bstart < 0)
14377 +               return;
14378 +
14379 +       IiMustWriteLock(inode);
14380 +
14381 +       if (do_put_zero) {
14382 +               aufs_bindex_t bindex;
14383 +
14384 +               for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
14385 +                    bindex++) {
14386 +                       struct inode *h_i;
14387 +
14388 +                       h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
14389 +                       if (h_i && !h_i->i_nlink)
14390 +                               au_set_h_iptr(inode, bindex, NULL, 0);
14391 +               }
14392 +       }
14393 +
14394 +       iinfo->ii_bstart = -1;
14395 +       while (++iinfo->ii_bstart <= iinfo->ii_bend)
14396 +               if (iinfo->ii_hinode[0 + iinfo->ii_bstart].hi_inode)
14397 +                       break;
14398 +       if (iinfo->ii_bstart > iinfo->ii_bend) {
14399 +               iinfo->ii_bstart = -1;
14400 +               iinfo->ii_bend = -1;
14401 +               return;
14402 +       }
14403 +
14404 +       iinfo->ii_bend++;
14405 +       while (0 <= --iinfo->ii_bend)
14406 +               if (iinfo->ii_hinode[0 + iinfo->ii_bend].hi_inode)
14407 +                       break;
14408 +       AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend || iinfo->ii_bend < 0);
14409 +}
14410 +
14411 +/* ---------------------------------------------------------------------- */
14412 +
14413 +int au_iinfo_init(struct inode *inode)
14414 +{
14415 +       struct au_iinfo *iinfo;
14416 +       struct super_block *sb;
14417 +       int nbr, i;
14418 +
14419 +       sb = inode->i_sb;
14420 +       iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
14421 +       nbr = au_sbend(sb) + 1;
14422 +       if (unlikely(nbr <= 0))
14423 +               nbr = 1;
14424 +       iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
14425 +       if (iinfo->ii_hinode) {
14426 +               for (i = 0; i < nbr; i++)
14427 +                       iinfo->ii_hinode[i].hi_id = -1;
14428 +
14429 +               atomic_set(&iinfo->ii_generation, au_sigen(sb));
14430 +               /* smp_mb(); */ /* atomic_set */
14431 +               au_rw_init(&iinfo->ii_rwsem);
14432 +               iinfo->ii_bstart = -1;
14433 +               iinfo->ii_bend = -1;
14434 +               iinfo->ii_vdir = NULL;
14435 +               return 0;
14436 +       }
14437 +       return -ENOMEM;
14438 +}
14439 +
14440 +int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
14441 +{
14442 +       int err, sz;
14443 +       struct au_hinode *hip;
14444 +
14445 +       AuRwMustWriteLock(&iinfo->ii_rwsem);
14446 +
14447 +       err = -ENOMEM;
14448 +       sz = sizeof(*hip) * (iinfo->ii_bend + 1);
14449 +       if (!sz)
14450 +               sz = sizeof(*hip);
14451 +       hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
14452 +       if (hip) {
14453 +               iinfo->ii_hinode = hip;
14454 +               err = 0;
14455 +       }
14456 +
14457 +       return err;
14458 +}
14459 +
14460 +static int au_iinfo_write0(struct super_block *sb, struct au_hinode *hinode,
14461 +                          ino_t ino)
14462 +{
14463 +       int err;
14464 +       aufs_bindex_t bindex;
14465 +       unsigned char locked;
14466 +
14467 +       err = 0;
14468 +       locked = !!si_noflush_read_trylock(sb);
14469 +       bindex = au_br_index(sb, hinode->hi_id);
14470 +       if (bindex >= 0)
14471 +               err = au_xino_write0(sb, bindex, hinode->hi_inode->i_ino, ino);
14472 +       /* error action? */
14473 +       if (locked)
14474 +               si_read_unlock(sb);
14475 +       return err;
14476 +}
14477 +
14478 +void au_iinfo_fin(struct inode *inode)
14479 +{
14480 +       ino_t ino;
14481 +       aufs_bindex_t bend;
14482 +       unsigned char unlinked = !inode->i_nlink;
14483 +       struct au_iinfo *iinfo;
14484 +       struct au_hinode *hi;
14485 +       struct super_block *sb;
14486 +
14487 +       if (unlinked) {
14488 +               int err = au_xigen_inc(inode);
14489 +               if (unlikely(err))
14490 +                       AuWarn1("failed resetting i_generation, %d\n", err);
14491 +       }
14492 +
14493 +       iinfo = au_ii(inode);
14494 +       /* bad_inode case */
14495 +       if (!iinfo)
14496 +               return;
14497 +
14498 +       if (iinfo->ii_vdir)
14499 +               au_vdir_free(iinfo->ii_vdir);
14500 +
14501 +       if (iinfo->ii_bstart >= 0) {
14502 +               sb = inode->i_sb;
14503 +               ino = 0;
14504 +               if (unlinked)
14505 +                       ino = inode->i_ino;
14506 +               hi = iinfo->ii_hinode + iinfo->ii_bstart;
14507 +               bend = iinfo->ii_bend;
14508 +               while (iinfo->ii_bstart++ <= bend) {
14509 +                       if (hi->hi_inode) {
14510 +                               if (unlinked || !hi->hi_inode->i_nlink) {
14511 +                                       au_iinfo_write0(sb, hi, ino);
14512 +                                       /* ignore this error */
14513 +                                       ino = 0;
14514 +                               }
14515 +                               au_hiput(hi);
14516 +                       }
14517 +                       hi++;
14518 +               }
14519 +       }
14520 +
14521 +       kfree(iinfo->ii_hinode);
14522 +       AuRwDestroy(&iinfo->ii_rwsem);
14523 +}
14524 diff -uprN -x .git linux-2.6.31/fs/aufs/inode.c aufs2-2.6.git/fs/aufs/inode.c
14525 --- linux-2.6.31/fs/aufs/inode.c        1970-01-01 00:00:00.000000000 +0000
14526 +++ aufs2-2.6.git/fs/aufs/inode.c       2009-09-21 21:49:23.404940801 +0000
14527 @@ -0,0 +1,414 @@
14528 +/*
14529 + * Copyright (C) 2005-2009 Junjiro R. Okajima
14530 + *
14531 + * This program, aufs is free software; you can redistribute it and/or modify
14532 + * it under the terms of the GNU General Public License as published by
14533 + * the Free Software Foundation; either version 2 of the License, or
14534 + * (at your option) any later version.
14535 + *
14536 + * This program is distributed in the hope that it will be useful,
14537 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
14538 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14539 + * GNU General Public License for more details.
14540 + *
14541 + * You should have received a copy of the GNU General Public License
14542 + * along with this program; if not, write to the Free Software
14543 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
14544 + */
14545 +
14546 +/*
14547 + * inode functions
14548 + */
14549 +
14550 +#include "aufs.h"
14551 +
14552 +struct inode *au_igrab(struct inode *inode)
14553 +{
14554 +       if (inode) {
14555 +               AuDebugOn(!atomic_read(&inode->i_count));
14556 +               atomic_inc_return(&inode->i_count);
14557 +       }
14558 +       return inode;
14559 +}
14560 +
14561 +static void au_refresh_hinode_attr(struct inode *inode, int do_version)
14562 +{
14563 +       au_cpup_attr_all(inode, /*force*/0);
14564 +       au_update_iigen(inode);
14565 +       if (do_version)
14566 +               inode->i_version++;
14567 +}
14568 +
14569 +int au_refresh_hinode_self(struct inode *inode, int do_attr)
14570 +{
14571 +       int err;
14572 +       aufs_bindex_t bindex, new_bindex;
14573 +       unsigned char update;
14574 +       struct inode *first;
14575 +       struct au_hinode *p, *q, tmp;
14576 +       struct super_block *sb;
14577 +       struct au_iinfo *iinfo;
14578 +
14579 +       IiMustWriteLock(inode);
14580 +
14581 +       update = 0;
14582 +       sb = inode->i_sb;
14583 +       iinfo = au_ii(inode);
14584 +       err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
14585 +       if (unlikely(err))
14586 +               goto out;
14587 +
14588 +       p = iinfo->ii_hinode + iinfo->ii_bstart;
14589 +       first = p->hi_inode;
14590 +       err = 0;
14591 +       for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
14592 +            bindex++, p++) {
14593 +               if (!p->hi_inode)
14594 +                       continue;
14595 +
14596 +               new_bindex = au_br_index(sb, p->hi_id);
14597 +               if (new_bindex == bindex)
14598 +                       continue;
14599 +
14600 +               if (new_bindex < 0) {
14601 +                       update++;
14602 +                       au_hiput(p);
14603 +                       p->hi_inode = NULL;
14604 +                       continue;
14605 +               }
14606 +
14607 +               if (new_bindex < iinfo->ii_bstart)
14608 +                       iinfo->ii_bstart = new_bindex;
14609 +               if (iinfo->ii_bend < new_bindex)
14610 +                       iinfo->ii_bend = new_bindex;
14611 +               /* swap two lower inode, and loop again */
14612 +               q = iinfo->ii_hinode + new_bindex;
14613 +               tmp = *q;
14614 +               *q = *p;
14615 +               *p = tmp;
14616 +               if (tmp.hi_inode) {
14617 +                       bindex--;
14618 +                       p--;
14619 +               }
14620 +       }
14621 +       au_update_brange(inode, /*do_put_zero*/0);
14622 +       if (do_attr)
14623 +               au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
14624 +
14625 + out:
14626 +       return err;
14627 +}
14628 +
14629 +int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
14630 +{
14631 +       int err, update;
14632 +       unsigned int flags;
14633 +       aufs_bindex_t bindex, bend;
14634 +       unsigned char isdir;
14635 +       struct inode *first;
14636 +       struct au_hinode *p;
14637 +       struct au_iinfo *iinfo;
14638 +
14639 +       err = au_refresh_hinode_self(inode, /*do_attr*/0);
14640 +       if (unlikely(err))
14641 +               goto out;
14642 +
14643 +       update = 0;
14644 +       iinfo = au_ii(inode);
14645 +       p = iinfo->ii_hinode + iinfo->ii_bstart;
14646 +       first = p->hi_inode;
14647 +       isdir = S_ISDIR(inode->i_mode);
14648 +       flags = au_hi_flags(inode, isdir);
14649 +       bend = au_dbend(dentry);
14650 +       for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
14651 +               struct inode *h_i;
14652 +               struct dentry *h_d;
14653 +
14654 +               h_d = au_h_dptr(dentry, bindex);
14655 +               if (!h_d || !h_d->d_inode)
14656 +                       continue;
14657 +
14658 +               if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
14659 +                       h_i = au_h_iptr(inode, bindex);
14660 +                       if (h_i) {
14661 +                               if (h_i == h_d->d_inode)
14662 +                                       continue;
14663 +                               err = -EIO;
14664 +                               break;
14665 +                       }
14666 +               }
14667 +               if (bindex < iinfo->ii_bstart)
14668 +                       iinfo->ii_bstart = bindex;
14669 +               if (iinfo->ii_bend < bindex)
14670 +                       iinfo->ii_bend = bindex;
14671 +               au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
14672 +               update = 1;
14673 +       }
14674 +       au_update_brange(inode, /*do_put_zero*/0);
14675 +
14676 +       if (unlikely(err))
14677 +               goto out;
14678 +
14679 +       au_refresh_hinode_attr(inode, update && isdir);
14680 +
14681 + out:
14682 +       AuTraceErr(err);
14683 +       return err;
14684 +}
14685 +
14686 +static int set_inode(struct inode *inode, struct dentry *dentry)
14687 +{
14688 +       int err;
14689 +       unsigned int flags;
14690 +       umode_t mode;
14691 +       aufs_bindex_t bindex, bstart, btail;
14692 +       unsigned char isdir;
14693 +       struct dentry *h_dentry;
14694 +       struct inode *h_inode;
14695 +       struct au_iinfo *iinfo;
14696 +
14697 +       IiMustWriteLock(inode);
14698 +
14699 +       err = 0;
14700 +       isdir = 0;
14701 +       bstart = au_dbstart(dentry);
14702 +       h_inode = au_h_dptr(dentry, bstart)->d_inode;
14703 +       mode = h_inode->i_mode;
14704 +       switch (mode & S_IFMT) {
14705 +       case S_IFREG:
14706 +               btail = au_dbtail(dentry);
14707 +               inode->i_op = &aufs_iop;
14708 +               inode->i_fop = &aufs_file_fop;
14709 +               inode->i_mapping->a_ops = &aufs_aop;
14710 +               break;
14711 +       case S_IFDIR:
14712 +               isdir = 1;
14713 +               btail = au_dbtaildir(dentry);
14714 +               inode->i_op = &aufs_dir_iop;
14715 +               inode->i_fop = &aufs_dir_fop;
14716 +               break;
14717 +       case S_IFLNK:
14718 +               btail = au_dbtail(dentry);
14719 +               inode->i_op = &aufs_symlink_iop;
14720 +               break;
14721 +       case S_IFBLK:
14722 +       case S_IFCHR:
14723 +       case S_IFIFO:
14724 +       case S_IFSOCK:
14725 +               btail = au_dbtail(dentry);
14726 +               inode->i_op = &aufs_iop;
14727 +               init_special_inode(inode, mode, h_inode->i_rdev);
14728 +               break;
14729 +       default:
14730 +               AuIOErr("Unknown file type 0%o\n", mode);
14731 +               err = -EIO;
14732 +               goto out;
14733 +       }
14734 +
14735 +       /* do not set inotify for whiteouted dirs (SHWH mode) */
14736 +       flags = au_hi_flags(inode, isdir);
14737 +       if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
14738 +           && au_ftest_hi(flags, HINOTIFY)
14739 +           && dentry->d_name.len > AUFS_WH_PFX_LEN
14740 +           && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
14741 +               au_fclr_hi(flags, HINOTIFY);
14742 +       iinfo = au_ii(inode);
14743 +       iinfo->ii_bstart = bstart;
14744 +       iinfo->ii_bend = btail;
14745 +       for (bindex = bstart; bindex <= btail; bindex++) {
14746 +               h_dentry = au_h_dptr(dentry, bindex);
14747 +               if (h_dentry)
14748 +                       au_set_h_iptr(inode, bindex,
14749 +                                     au_igrab(h_dentry->d_inode), flags);
14750 +       }
14751 +       au_cpup_attr_all(inode, /*force*/1);
14752 +
14753 + out:
14754 +       return err;
14755 +}
14756 +
14757 +/* successful returns with iinfo write_locked */
14758 +static int reval_inode(struct inode *inode, struct dentry *dentry, int *matched)
14759 +{
14760 +       int err;
14761 +       aufs_bindex_t bindex, bend;
14762 +       struct inode *h_inode, *h_dinode;
14763 +
14764 +       *matched = 0;
14765 +
14766 +       /*
14767 +        * before this function, if aufs got any iinfo lock, it must be only
14768 +        * one, the parent dir.
14769 +        * it can happen by UDBA and the obsoleted inode number.
14770 +        */
14771 +       err = -EIO;
14772 +       if (unlikely(inode->i_ino == parent_ino(dentry)))
14773 +               goto out;
14774 +
14775 +       err = 0;
14776 +       ii_write_lock_new_child(inode);
14777 +       h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
14778 +       bend = au_ibend(inode);
14779 +       for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
14780 +               h_inode = au_h_iptr(inode, bindex);
14781 +               if (h_inode && h_inode == h_dinode) {
14782 +                       *matched = 1;
14783 +                       err = 0;
14784 +                       if (au_iigen(inode) != au_digen(dentry))
14785 +                               err = au_refresh_hinode(inode, dentry);
14786 +                       break;
14787 +               }
14788 +       }
14789 +
14790 +       if (unlikely(err))
14791 +               ii_write_unlock(inode);
14792 + out:
14793 +       return err;
14794 +}
14795 +
14796 +int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
14797 +          unsigned int d_type, ino_t *ino)
14798 +{
14799 +       int err;
14800 +       struct mutex *mtx;
14801 +       const int isdir = (d_type == DT_DIR);
14802 +
14803 +       /* prevent hardlinks from race condition */
14804 +       mtx = NULL;
14805 +       if (!isdir) {
14806 +               mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
14807 +               mutex_lock(mtx);
14808 +       }
14809 +       err = au_xino_read(sb, bindex, h_ino, ino);
14810 +       if (unlikely(err))
14811 +               goto out;
14812 +
14813 +       if (!*ino) {
14814 +               err = -EIO;
14815 +               *ino = au_xino_new_ino(sb);
14816 +               if (unlikely(!*ino))
14817 +                       goto out;
14818 +               err = au_xino_write(sb, bindex, h_ino, *ino);
14819 +               if (unlikely(err))
14820 +                       goto out;
14821 +       }
14822 +
14823 + out:
14824 +       if (!isdir)
14825 +               mutex_unlock(mtx);
14826 +       return err;
14827 +}
14828 +
14829 +/* successful returns with iinfo write_locked */
14830 +/* todo: return with unlocked? */
14831 +struct inode *au_new_inode(struct dentry *dentry, int must_new)
14832 +{
14833 +       struct inode *inode;
14834 +       struct dentry *h_dentry;
14835 +       struct super_block *sb;
14836 +       ino_t h_ino, ino;
14837 +       int err, match;
14838 +       aufs_bindex_t bstart;
14839 +
14840 +       sb = dentry->d_sb;
14841 +       bstart = au_dbstart(dentry);
14842 +       h_dentry = au_h_dptr(dentry, bstart);
14843 +       h_ino = h_dentry->d_inode->i_ino;
14844 +       err = au_xino_read(sb, bstart, h_ino, &ino);
14845 +       inode = ERR_PTR(err);
14846 +       if (unlikely(err))
14847 +               goto out;
14848 + new_ino:
14849 +       if (!ino) {
14850 +               ino = au_xino_new_ino(sb);
14851 +               if (unlikely(!ino)) {
14852 +                       inode = ERR_PTR(-EIO);
14853 +                       goto out;
14854 +               }
14855 +       }
14856 +
14857 +       AuDbg("i%lu\n", (unsigned long)ino);
14858 +       inode = au_iget_locked(sb, ino);
14859 +       err = PTR_ERR(inode);
14860 +       if (IS_ERR(inode))
14861 +               goto out;
14862 +
14863 +       AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
14864 +       if (inode->i_state & I_NEW) {
14865 +               ii_write_lock_new_child(inode);
14866 +               err = set_inode(inode, dentry);
14867 +               unlock_new_inode(inode);
14868 +               if (!err)
14869 +                       goto out; /* success */
14870 +
14871 +               iget_failed(inode);
14872 +               ii_write_unlock(inode);
14873 +               goto out_iput;
14874 +       } else if (!must_new) {
14875 +               err = reval_inode(inode, dentry, &match);
14876 +               if (!err)
14877 +                       goto out; /* success */
14878 +               else if (match)
14879 +                       goto out_iput;
14880 +       }
14881 +
14882 +       if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
14883 +               AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
14884 +                       " b%d, %s, %.*s, hi%lu, i%lu.\n",
14885 +                       bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
14886 +                       (unsigned long)h_ino, (unsigned long)ino);
14887 +       ino = 0;
14888 +       err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
14889 +       if (!err) {
14890 +               iput(inode);
14891 +               goto new_ino;
14892 +       }
14893 +
14894 + out_iput:
14895 +       iput(inode);
14896 +       inode = ERR_PTR(err);
14897 + out:
14898 +       return inode;
14899 +}
14900 +
14901 +/* ---------------------------------------------------------------------- */
14902 +
14903 +int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
14904 +              struct inode *inode)
14905 +{
14906 +       int err;
14907 +
14908 +       err = au_br_rdonly(au_sbr(sb, bindex));
14909 +
14910 +       /* pseudo-link after flushed may happen out of bounds */
14911 +       if (!err
14912 +           && inode
14913 +           && au_ibstart(inode) <= bindex
14914 +           && bindex <= au_ibend(inode)) {
14915 +               /*
14916 +                * permission check is unnecessary since vfsub routine
14917 +                * will be called later
14918 +                */
14919 +               struct inode *hi = au_h_iptr(inode, bindex);
14920 +               if (hi)
14921 +                       err = IS_IMMUTABLE(hi) ? -EROFS : 0;
14922 +       }
14923 +
14924 +       return err;
14925 +}
14926 +
14927 +int au_test_h_perm(struct inode *h_inode, int mask)
14928 +{
14929 +       if (!current_fsuid())
14930 +               return 0;
14931 +       return inode_permission(h_inode, mask);
14932 +}
14933 +
14934 +int au_test_h_perm_sio(struct inode *h_inode, int mask)
14935 +{
14936 +       if (au_test_nfs(h_inode->i_sb)
14937 +           && (mask & MAY_WRITE)
14938 +           && S_ISDIR(h_inode->i_mode))
14939 +               mask |= MAY_READ; /* force permission check */
14940 +       return au_test_h_perm(h_inode, mask);
14941 +}
14942 diff -uprN -x .git linux-2.6.31/fs/aufs/inode.h aufs2-2.6.git/fs/aufs/inode.h
14943 --- linux-2.6.31/fs/aufs/inode.h        1970-01-01 00:00:00.000000000 +0000
14944 +++ aufs2-2.6.git/fs/aufs/inode.h       2009-09-21 21:49:23.404940801 +0000
14945 @@ -0,0 +1,497 @@
14946 +/*
14947 + * Copyright (C) 2005-2009 Junjiro R. Okajima
14948 + *
14949 + * This program, aufs is free software; you can redistribute it and/or modify
14950 + * it under the terms of the GNU General Public License as published by
14951 + * the Free Software Foundation; either version 2 of the License, or
14952 + * (at your option) any later version.
14953 + *
14954 + * This program is distributed in the hope that it will be useful,
14955 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
14956 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14957 + * GNU General Public License for more details.
14958 + *
14959 + * You should have received a copy of the GNU General Public License
14960 + * along with this program; if not, write to the Free Software
14961 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
14962 + */
14963 +
14964 +/*
14965 + * inode operations
14966 + */
14967 +
14968 +#ifndef __AUFS_INODE_H__
14969 +#define __AUFS_INODE_H__
14970 +
14971 +#ifdef __KERNEL__
14972 +
14973 +#include <linux/fs.h>
14974 +#include <linux/inotify.h>
14975 +#include <linux/aufs_type.h>
14976 +#include "rwsem.h"
14977 +
14978 +struct vfsmount;
14979 +
14980 +struct au_hinotify {
14981 +#ifdef CONFIG_AUFS_HINOTIFY
14982 +       struct inotify_watch    hin_watch;
14983 +       struct inode            *hin_aufs_inode;        /* no get/put */
14984 +#endif
14985 +};
14986 +
14987 +struct au_hinode {
14988 +       struct inode            *hi_inode;
14989 +       aufs_bindex_t           hi_id;
14990 +#ifdef CONFIG_AUFS_HINOTIFY
14991 +       struct au_hinotify      *hi_notify;
14992 +#endif
14993 +
14994 +       /* reference to the copied-up whiteout with get/put */
14995 +       struct dentry           *hi_whdentry;
14996 +};
14997 +
14998 +struct au_vdir;
14999 +struct au_iinfo {
15000 +       atomic_t                ii_generation;
15001 +       struct super_block      *ii_hsb1;       /* no get/put */
15002 +
15003 +       struct au_rwsem         ii_rwsem;
15004 +       aufs_bindex_t           ii_bstart, ii_bend;
15005 +       __u32                   ii_higen;
15006 +       struct au_hinode        *ii_hinode;
15007 +       struct au_vdir          *ii_vdir;
15008 +};
15009 +
15010 +struct au_icntnr {
15011 +       struct au_iinfo iinfo;
15012 +       struct inode vfs_inode;
15013 +};
15014 +
15015 +/* au_pin flags */
15016 +#define AuPin_DI_LOCKED                1
15017 +#define AuPin_MNT_WRITE                (1 << 1)
15018 +#define au_ftest_pin(flags, name)      ((flags) & AuPin_##name)
15019 +#define au_fset_pin(flags, name)       { (flags) |= AuPin_##name; }
15020 +#define au_fclr_pin(flags, name)       { (flags) &= ~AuPin_##name; }
15021 +
15022 +struct au_pin {
15023 +       /* input */
15024 +       struct dentry *dentry;
15025 +       unsigned int udba;
15026 +       unsigned char lsc_di, lsc_hi, flags;
15027 +       aufs_bindex_t bindex;
15028 +
15029 +       /* output */
15030 +       struct dentry *parent;
15031 +       struct au_hinode *hdir;
15032 +       struct vfsmount *h_mnt;
15033 +};
15034 +
15035 +/* ---------------------------------------------------------------------- */
15036 +
15037 +static inline struct au_iinfo *au_ii(struct inode *inode)
15038 +{
15039 +       struct au_iinfo *iinfo;
15040 +
15041 +       iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
15042 +       if (iinfo->ii_hinode)
15043 +               return iinfo;
15044 +       return NULL; /* debugging bad_inode case */
15045 +}
15046 +
15047 +/* ---------------------------------------------------------------------- */
15048 +
15049 +/* inode.c */
15050 +struct inode *au_igrab(struct inode *inode);
15051 +int au_refresh_hinode_self(struct inode *inode, int do_attr);
15052 +int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
15053 +int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
15054 +          unsigned int d_type, ino_t *ino);
15055 +struct inode *au_new_inode(struct dentry *dentry, int must_new);
15056 +int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
15057 +              struct inode *inode);
15058 +int au_test_h_perm(struct inode *h_inode, int mask);
15059 +int au_test_h_perm_sio(struct inode *h_inode, int mask);
15060 +
15061 +static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
15062 +                           ino_t h_ino, unsigned int d_type, ino_t *ino)
15063 +{
15064 +#ifdef CONFIG_AUFS_SHWH
15065 +       return au_ino(sb, bindex, h_ino, d_type, ino);
15066 +#else
15067 +       return 0;
15068 +#endif
15069 +}
15070 +
15071 +/* i_op.c */
15072 +extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
15073 +
15074 +/* au_wr_dir flags */
15075 +#define AuWrDir_ADD_ENTRY      1
15076 +#define AuWrDir_ISDIR          (1 << 1)
15077 +#define au_ftest_wrdir(flags, name)    ((flags) & AuWrDir_##name)
15078 +#define au_fset_wrdir(flags, name)     { (flags) |= AuWrDir_##name; }
15079 +#define au_fclr_wrdir(flags, name)     { (flags) &= ~AuWrDir_##name; }
15080 +
15081 +struct au_wr_dir_args {
15082 +       aufs_bindex_t force_btgt;
15083 +       unsigned char flags;
15084 +};
15085 +int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
15086 +             struct au_wr_dir_args *args);
15087 +
15088 +struct dentry *au_pinned_h_parent(struct au_pin *pin);
15089 +void au_pin_init(struct au_pin *pin, struct dentry *dentry,
15090 +                aufs_bindex_t bindex, int lsc_di, int lsc_hi,
15091 +                unsigned int udba, unsigned char flags);
15092 +int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
15093 +          unsigned int udba, unsigned char flags) __must_check;
15094 +int au_do_pin(struct au_pin *pin) __must_check;
15095 +void au_unpin(struct au_pin *pin);
15096 +
15097 +/* i_op_add.c */
15098 +int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
15099 +              struct dentry *h_parent, int isdir);
15100 +int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
15101 +int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
15102 +int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
15103 +               struct nameidata *nd);
15104 +int aufs_link(struct dentry *src_dentry, struct inode *dir,
15105 +             struct dentry *dentry);
15106 +int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
15107 +
15108 +/* i_op_del.c */
15109 +int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
15110 +int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
15111 +              struct dentry *h_parent, int isdir);
15112 +int aufs_unlink(struct inode *dir, struct dentry *dentry);
15113 +int aufs_rmdir(struct inode *dir, struct dentry *dentry);
15114 +
15115 +/* i_op_ren.c */
15116 +int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
15117 +int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
15118 +               struct inode *dir, struct dentry *dentry);
15119 +
15120 +/* iinfo.c */
15121 +struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
15122 +void au_hiput(struct au_hinode *hinode);
15123 +void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex);
15124 +void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
15125 +                 struct dentry *h_wh);
15126 +unsigned int au_hi_flags(struct inode *inode, int isdir);
15127 +
15128 +/* hinode flags */
15129 +#define AuHi_XINO      1
15130 +#define AuHi_HINOTIFY  (1 << 1)
15131 +#define au_ftest_hi(flags, name)       ((flags) & AuHi_##name)
15132 +#define au_fset_hi(flags, name)                { (flags) |= AuHi_##name; }
15133 +#define au_fclr_hi(flags, name)                { (flags) &= ~AuHi_##name; }
15134 +
15135 +#ifndef CONFIG_AUFS_HINOTIFY
15136 +#undef AuHi_HINOTIFY
15137 +#define AuHi_HINOTIFY  0
15138 +#endif
15139 +
15140 +void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
15141 +                  struct inode *h_inode, unsigned int flags);
15142 +
15143 +void au_update_iigen(struct inode *inode);
15144 +void au_update_brange(struct inode *inode, int do_put_zero);
15145 +
15146 +int au_iinfo_init(struct inode *inode);
15147 +void au_iinfo_fin(struct inode *inode);
15148 +int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
15149 +
15150 +/* plink.c */
15151 +void au_plink_block_maintain(struct super_block *sb);
15152 +#ifdef CONFIG_AUFS_DEBUG
15153 +void au_plink_list(struct super_block *sb);
15154 +#else
15155 +static inline void au_plink_list(struct super_block *sb)
15156 +{
15157 +       /* nothing */
15158 +}
15159 +#endif
15160 +int au_plink_test(struct inode *inode);
15161 +struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
15162 +void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
15163 +                    struct dentry *h_dentry);
15164 +void au_plink_put(struct super_block *sb);
15165 +void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
15166 +long au_plink_ioctl(struct file *file, unsigned int cmd);
15167 +
15168 +/* ---------------------------------------------------------------------- */
15169 +
15170 +/* lock subclass for iinfo */
15171 +enum {
15172 +       AuLsc_II_CHILD,         /* child first */
15173 +       AuLsc_II_CHILD2,        /* rename(2), link(2), and cpup at hinotify */
15174 +       AuLsc_II_CHILD3,        /* copyup dirs */
15175 +       AuLsc_II_PARENT,        /* see AuLsc_I_PARENT in vfsub.h */
15176 +       AuLsc_II_PARENT2,
15177 +       AuLsc_II_PARENT3,       /* copyup dirs */
15178 +       AuLsc_II_NEW_CHILD
15179 +};
15180 +
15181 +/*
15182 + * ii_read_lock_child, ii_write_lock_child,
15183 + * ii_read_lock_child2, ii_write_lock_child2,
15184 + * ii_read_lock_child3, ii_write_lock_child3,
15185 + * ii_read_lock_parent, ii_write_lock_parent,
15186 + * ii_read_lock_parent2, ii_write_lock_parent2,
15187 + * ii_read_lock_parent3, ii_write_lock_parent3,
15188 + * ii_read_lock_new_child, ii_write_lock_new_child,
15189 + */
15190 +#define AuReadLockFunc(name, lsc) \
15191 +static inline void ii_read_lock_##name(struct inode *i) \
15192 +{ \
15193 +       au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
15194 +}
15195 +
15196 +#define AuWriteLockFunc(name, lsc) \
15197 +static inline void ii_write_lock_##name(struct inode *i) \
15198 +{ \
15199 +       au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
15200 +}
15201 +
15202 +#define AuRWLockFuncs(name, lsc) \
15203 +       AuReadLockFunc(name, lsc) \
15204 +       AuWriteLockFunc(name, lsc)
15205 +
15206 +AuRWLockFuncs(child, CHILD);
15207 +AuRWLockFuncs(child2, CHILD2);
15208 +AuRWLockFuncs(child3, CHILD3);
15209 +AuRWLockFuncs(parent, PARENT);
15210 +AuRWLockFuncs(parent2, PARENT2);
15211 +AuRWLockFuncs(parent3, PARENT3);
15212 +AuRWLockFuncs(new_child, NEW_CHILD);
15213 +
15214 +#undef AuReadLockFunc
15215 +#undef AuWriteLockFunc
15216 +#undef AuRWLockFuncs
15217 +
15218 +/*
15219 + * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
15220 + */
15221 +AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
15222 +
15223 +#define IiMustNoWaiters(i)     AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
15224 +#define IiMustAnyLock(i)       AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
15225 +#define IiMustWriteLock(i)     AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
15226 +
15227 +/* ---------------------------------------------------------------------- */
15228 +
15229 +static inline unsigned int au_iigen(struct inode *inode)
15230 +{
15231 +       return atomic_read(&au_ii(inode)->ii_generation);
15232 +}
15233 +
15234 +/* tiny test for inode number */
15235 +/* tmpfs generation is too rough */
15236 +static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
15237 +{
15238 +       struct au_iinfo *iinfo;
15239 +
15240 +       iinfo = au_ii(inode);
15241 +       AuRwMustAnyLock(&iinfo->ii_rwsem);
15242 +       return !(iinfo->ii_hsb1 == h_inode->i_sb
15243 +                && iinfo->ii_higen == h_inode->i_generation);
15244 +}
15245 +
15246 +/* ---------------------------------------------------------------------- */
15247 +
15248 +static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
15249 +                                       aufs_bindex_t bindex)
15250 +{
15251 +       IiMustAnyLock(inode);
15252 +       return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
15253 +}
15254 +
15255 +static inline aufs_bindex_t au_ibstart(struct inode *inode)
15256 +{
15257 +       IiMustAnyLock(inode);
15258 +       return au_ii(inode)->ii_bstart;
15259 +}
15260 +
15261 +static inline aufs_bindex_t au_ibend(struct inode *inode)
15262 +{
15263 +       IiMustAnyLock(inode);
15264 +       return au_ii(inode)->ii_bend;
15265 +}
15266 +
15267 +static inline struct au_vdir *au_ivdir(struct inode *inode)
15268 +{
15269 +       IiMustAnyLock(inode);
15270 +       return au_ii(inode)->ii_vdir;
15271 +}
15272 +
15273 +static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
15274 +{
15275 +       IiMustAnyLock(inode);
15276 +       return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
15277 +}
15278 +
15279 +static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
15280 +{
15281 +       IiMustWriteLock(inode);
15282 +       au_ii(inode)->ii_bend = bindex;
15283 +}
15284 +
15285 +static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
15286 +{
15287 +       IiMustWriteLock(inode);
15288 +       au_ii(inode)->ii_vdir = vdir;
15289 +}
15290 +
15291 +static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
15292 +{
15293 +       IiMustAnyLock(inode);
15294 +       return au_ii(inode)->ii_hinode + bindex;
15295 +}
15296 +
15297 +/* ---------------------------------------------------------------------- */
15298 +
15299 +static inline struct dentry *au_pinned_parent(struct au_pin *pin)
15300 +{
15301 +       if (pin)
15302 +               return pin->parent;
15303 +       return NULL;
15304 +}
15305 +
15306 +static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
15307 +{
15308 +       if (pin && pin->hdir)
15309 +               return pin->hdir->hi_inode;
15310 +       return NULL;
15311 +}
15312 +
15313 +static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
15314 +{
15315 +       if (pin)
15316 +               return pin->hdir;
15317 +       return NULL;
15318 +}
15319 +
15320 +static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
15321 +{
15322 +       if (pin)
15323 +               pin->dentry = dentry;
15324 +}
15325 +
15326 +static inline void au_pin_set_parent_lflag(struct au_pin *pin,
15327 +                                          unsigned char lflag)
15328 +{
15329 +       if (pin) {
15330 +               /* dirty macros require brackets */
15331 +               if (lflag) {
15332 +                       au_fset_pin(pin->flags, DI_LOCKED);
15333 +               } else {
15334 +                       au_fclr_pin(pin->flags, DI_LOCKED);
15335 +               }
15336 +       }
15337 +}
15338 +
15339 +static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
15340 +{
15341 +       if (pin) {
15342 +               dput(pin->parent);
15343 +               pin->parent = dget(parent);
15344 +       }
15345 +}
15346 +
15347 +/* ---------------------------------------------------------------------- */
15348 +
15349 +#ifdef CONFIG_AUFS_HINOTIFY
15350 +/* hinotify.c */
15351 +int au_hin_alloc(struct au_hinode *hinode, struct inode *inode,
15352 +                struct inode *h_inode);
15353 +void au_hin_free(struct au_hinode *hinode);
15354 +void au_hin_ctl(struct au_hinode *hinode, int do_set);
15355 +void au_reset_hinotify(struct inode *inode, unsigned int flags);
15356 +
15357 +int __init au_hinotify_init(void);
15358 +void au_hinotify_fin(void);
15359 +
15360 +static inline
15361 +void au_hin_init(struct au_hinode *hinode, struct au_hinotify *val)
15362 +{
15363 +       hinode->hi_notify = val;
15364 +}
15365 +
15366 +static inline void au_iigen_dec(struct inode *inode)
15367 +{
15368 +       atomic_dec_return(&au_ii(inode)->ii_generation);
15369 +}
15370 +
15371 +#else
15372 +static inline
15373 +int au_hin_alloc(struct au_hinode *hinode __maybe_unused,
15374 +                struct inode *inode __maybe_unused,
15375 +                struct inode *h_inode __maybe_unused)
15376 +{
15377 +       return -EOPNOTSUPP;
15378 +}
15379 +
15380 +static inline void au_hin_free(struct au_hinode *hinode __maybe_unused)
15381 +{
15382 +       /* nothing */
15383 +}
15384 +
15385 +static inline void au_hin_ctl(struct au_hinode *hinode __maybe_unused,
15386 +                             int do_set __maybe_unused)
15387 +{
15388 +       /* nothing */
15389 +}
15390 +
15391 +static inline void au_reset_hinotify(struct inode *inode __maybe_unused,
15392 +                                    unsigned int flags __maybe_unused)
15393 +{
15394 +       /* nothing */
15395 +}
15396 +
15397 +static inline int au_hinotify_init(void)
15398 +{
15399 +       return 0;
15400 +}
15401 +
15402 +#define au_hinotify_fin()      do {} while (0)
15403 +
15404 +static inline
15405 +void au_hin_init(struct au_hinode *hinode __maybe_unused,
15406 +                struct au_hinotify *val __maybe_unused)
15407 +{
15408 +       /* empty */
15409 +}
15410 +#endif /* CONFIG_AUFS_HINOTIFY */
15411 +
15412 +static inline void au_hin_suspend(struct au_hinode *hdir)
15413 +{
15414 +       au_hin_ctl(hdir, /*do_set*/0);
15415 +}
15416 +
15417 +static inline void au_hin_resume(struct au_hinode *hdir)
15418 +{
15419 +       au_hin_ctl(hdir, /*do_set*/1);
15420 +}
15421 +
15422 +static inline void au_hin_imtx_lock(struct au_hinode *hdir)
15423 +{
15424 +       mutex_lock(&hdir->hi_inode->i_mutex);
15425 +       au_hin_suspend(hdir);
15426 +}
15427 +
15428 +static inline void au_hin_imtx_lock_nested(struct au_hinode *hdir,
15429 +                                          unsigned int sc __maybe_unused)
15430 +{
15431 +       mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
15432 +       au_hin_suspend(hdir);
15433 +}
15434 +
15435 +static inline void au_hin_imtx_unlock(struct au_hinode *hdir)
15436 +{
15437 +       au_hin_resume(hdir);
15438 +       mutex_unlock(&hdir->hi_inode->i_mutex);
15439 +}
15440 +
15441 +#endif /* __KERNEL__ */
15442 +#endif /* __AUFS_INODE_H__ */
15443 diff -uprN -x .git linux-2.6.31/fs/aufs/ioctl.c aufs2-2.6.git/fs/aufs/ioctl.c
15444 --- linux-2.6.31/fs/aufs/ioctl.c        1970-01-01 00:00:00.000000000 +0000
15445 +++ aufs2-2.6.git/fs/aufs/ioctl.c       2009-09-21 21:49:23.404940801 +0000
15446 @@ -0,0 +1,47 @@
15447 +/*
15448 + * Copyright (C) 2005-2009 Junjiro R. Okajima
15449 + *
15450 + * This program, aufs is free software; you can redistribute it and/or modify
15451 + * it under the terms of the GNU General Public License as published by
15452 + * the Free Software Foundation; either version 2 of the License, or
15453 + * (at your option) any later version.
15454 + *
15455 + * This program is distributed in the hope that it will be useful,
15456 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15457 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15458 + * GNU General Public License for more details.
15459 + *
15460 + * You should have received a copy of the GNU General Public License
15461 + * along with this program; if not, write to the Free Software
15462 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
15463 + */
15464 +
15465 +/*
15466 + * ioctl
15467 + * plink-management and readdir in userspace.
15468 + */
15469 +
15470 +#include "aufs.h"
15471 +
15472 +long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
15473 +{
15474 +       long err;
15475 +
15476 +       switch (cmd) {
15477 +       case AUFS_CTL_PLINK_MAINT:
15478 +       case AUFS_CTL_PLINK_CLEAN:
15479 +               err = au_plink_ioctl(file, cmd);
15480 +               break;
15481 +
15482 +       case AUFS_CTL_RDU:
15483 +       case AUFS_CTL_RDU_INO:
15484 +               err = au_rdu_ioctl(file, cmd, arg);
15485 +               break;
15486 +
15487 +       default:
15488 +               err = -EINVAL;
15489 +       }
15490 +
15491 +       AuTraceErr(err);
15492 +       return err;
15493 +}
15494 diff -uprN -x .git linux-2.6.31/fs/aufs/loop.c aufs2-2.6.git/fs/aufs/loop.c
15495 --- linux-2.6.31/fs/aufs/loop.c 1970-01-01 00:00:00.000000000 +0000
15496 +++ aufs2-2.6.git/fs/aufs/loop.c        2009-09-21 21:49:23.404940801 +0000
15497 @@ -0,0 +1,55 @@
15498 +/*
15499 + * Copyright (C) 2005-2009 Junjiro R. Okajima
15500 + *
15501 + * This program, aufs is free software; you can redistribute it and/or modify
15502 + * it under the terms of the GNU General Public License as published by
15503 + * the Free Software Foundation; either version 2 of the License, or
15504 + * (at your option) any later version.
15505 + *
15506 + * This program is distributed in the hope that it will be useful,
15507 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15508 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15509 + * GNU General Public License for more details.
15510 + *
15511 + * You should have received a copy of the GNU General Public License
15512 + * along with this program; if not, write to the Free Software
15513 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
15514 + */
15515 +
15516 +/*
15517 + * support for loopback block device as a branch
15518 + */
15519 +
15520 +#include <linux/loop.h>
15521 +#include "aufs.h"
15522 +
15523 +/*
15524 + * test if two lower dentries have overlapping branches.
15525 + */
15526 +int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_d1,
15527 +                            struct dentry *h_d2)
15528 +{
15529 +       struct inode *h_inode;
15530 +       struct loop_device *l;
15531 +
15532 +       h_inode = h_d1->d_inode;
15533 +       if (MAJOR(h_inode->i_sb->s_dev) != LOOP_MAJOR)
15534 +               return 0;
15535 +
15536 +       l = h_inode->i_sb->s_bdev->bd_disk->private_data;
15537 +       h_d1 = l->lo_backing_file->f_dentry;
15538 +       /* h_d1 can be local NFS. in this case aufs cannot detect the loop */
15539 +       if (unlikely(h_d1->d_sb == sb))
15540 +               return 1;
15541 +       return !!au_test_subdir(h_d1, h_d2);
15542 +}
15543 +
15544 +/* true if a kernel thread named 'loop[0-9].*' accesses a file */
15545 +int au_test_loopback_kthread(void)
15546 +{
15547 +       const char c = current->comm[4];
15548 +
15549 +       return current->mm == NULL
15550 +              && '0' <= c && c <= '9'
15551 +              && strncmp(current->comm, "loop", 4) == 0;
15552 +}
15553 diff -uprN -x .git linux-2.6.31/fs/aufs/loop.h aufs2-2.6.git/fs/aufs/loop.h
15554 --- linux-2.6.31/fs/aufs/loop.h 1970-01-01 00:00:00.000000000 +0000
15555 +++ aufs2-2.6.git/fs/aufs/loop.h        2009-09-21 21:49:23.404940801 +0000
15556 @@ -0,0 +1,51 @@
15557 +/*
15558 + * Copyright (C) 2005-2009 Junjiro R. Okajima
15559 + *
15560 + * This program, aufs is free software; you can redistribute it and/or modify
15561 + * it under the terms of the GNU General Public License as published by
15562 + * the Free Software Foundation; either version 2 of the License, or
15563 + * (at your option) any later version.
15564 + *
15565 + * This program is distributed in the hope that it will be useful,
15566 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15567 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15568 + * GNU General Public License for more details.
15569 + *
15570 + * You should have received a copy of the GNU General Public License
15571 + * along with this program; if not, write to the Free Software
15572 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
15573 + */
15574 +
15575 +/*
15576 + * support for loopback mount as a branch
15577 + */
15578 +
15579 +#ifndef __AUFS_LOOP_H__
15580 +#define __AUFS_LOOP_H__
15581 +
15582 +#ifdef __KERNEL__
15583 +
15584 +struct dentry;
15585 +struct super_block;
15586 +
15587 +#ifdef CONFIG_AUFS_BDEV_LOOP
15588 +/* loop.c */
15589 +int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_d1,
15590 +                            struct dentry *h_d2);
15591 +int au_test_loopback_kthread(void);
15592 +#else
15593 +static inline
15594 +int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_d1,
15595 +                            struct dentry *h_d2)
15596 +{
15597 +       return 0;
15598 +}
15599 +
15600 +static inline int au_test_loopback_kthread(void)
15601 +{
15602 +       return 0;
15603 +}
15604 +#endif /* BLK_DEV_LOOP */
15605 +
15606 +#endif /* __KERNEL__ */
15607 +#endif /* __AUFS_LOOP_H__ */
15608 diff -uprN -x .git linux-2.6.31/fs/aufs/magic.mk aufs2-2.6.git/fs/aufs/magic.mk
15609 --- linux-2.6.31/fs/aufs/magic.mk       1970-01-01 00:00:00.000000000 +0000
15610 +++ aufs2-2.6.git/fs/aufs/magic.mk      2009-09-21 21:49:23.404940801 +0000
15611 @@ -0,0 +1,52 @@
15612 +
15613 +# defined in ${srctree}/fs/fuse/inode.c
15614 +# tristate
15615 +ifdef CONFIG_FUSE_FS
15616 +ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
15617 +endif
15618 +
15619 +# defined in ${srctree}/fs/ocfs2/ocfs2_fs.h
15620 +# tristate
15621 +ifdef CONFIG_OCFS2_FS
15622 +ccflags-y += -DOCFS2_SUPER_MAGIC=0x7461636f
15623 +endif
15624 +
15625 +# defined in ${srctree}/fs/ocfs2/dlm/userdlm.h
15626 +# tristate
15627 +ifdef CONFIG_OCFS2_FS_O2CB
15628 +ccflags-y += -DDLMFS_MAGIC=0x76a9f425
15629 +endif
15630 +
15631 +# defined in ${srctree}/fs/ramfs/inode.c
15632 +# always true
15633 +ccflags-y += -DRAMFS_MAGIC=0x858458f6
15634 +
15635 +# defined in ${srctree}/fs/cifs/cifsfs.c
15636 +# tristate
15637 +ifdef CONFIG_CIFS_FS
15638 +ccflags-y += -DCIFS_MAGIC_NUMBER=0xFF534D42
15639 +endif
15640 +
15641 +# defined in ${srctree}/fs/xfs/xfs_sb.h
15642 +# tristate
15643 +ifdef CONFIG_XFS_FS
15644 +ccflags-y += -DXFS_SB_MAGIC=0x58465342
15645 +endif
15646 +
15647 +# defined in ${srctree}/fs/configfs/mount.c
15648 +# tristate
15649 +ifdef CONFIG_CONFIGFS_FS
15650 +ccflags-y += -DCONFIGFS_MAGIC=0x62656570
15651 +endif
15652 +
15653 +# defined in ${srctree}/fs/9p/v9fs.h
15654 +# tristate
15655 +ifdef CONFIG_9P_FS
15656 +ccflags-y += -DV9FS_MAGIC=0x01021997
15657 +endif
15658 +
15659 +# defined in ${srctree}/fs/ubifs/ubifs.h
15660 +# tristate
15661 +ifdef CONFIG_UBIFS_FS
15662 +ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
15663 +endif
15664 diff -uprN -x .git linux-2.6.31/fs/aufs/module.c aufs2-2.6.git/fs/aufs/module.c
15665 --- linux-2.6.31/fs/aufs/module.c       1970-01-01 00:00:00.000000000 +0000
15666 +++ aufs2-2.6.git/fs/aufs/module.c      2009-09-21 21:49:23.404940801 +0000
15667 @@ -0,0 +1,173 @@
15668 +/*
15669 + * Copyright (C) 2005-2009 Junjiro R. Okajima
15670 + *
15671 + * This program, aufs is free software; you can redistribute it and/or modify
15672 + * it under the terms of the GNU General Public License as published by
15673 + * the Free Software Foundation; either version 2 of the License, or
15674 + * (at your option) any later version.
15675 + *
15676 + * This program is distributed in the hope that it will be useful,
15677 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15678 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15679 + * GNU General Public License for more details.
15680 + *
15681 + * You should have received a copy of the GNU General Public License
15682 + * along with this program; if not, write to the Free Software
15683 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
15684 + */
15685 +
15686 +/*
15687 + * module global variables and operations
15688 + */
15689 +
15690 +#include <linux/module.h>
15691 +#include <linux/seq_file.h>
15692 +#include "aufs.h"
15693 +
15694 +void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
15695 +{
15696 +       if (new_sz <= nused)
15697 +               return p;
15698 +
15699 +       p = krealloc(p, new_sz, gfp);
15700 +       if (p)
15701 +               memset(p + nused, 0, new_sz - nused);
15702 +       return p;
15703 +}
15704 +
15705 +/* ---------------------------------------------------------------------- */
15706 +
15707 +/*
15708 + * aufs caches
15709 + */
15710 +struct kmem_cache *au_cachep[AuCache_Last];
15711 +static int __init au_cache_init(void)
15712 +{
15713 +       au_cachep[AuCache_DINFO] = AuCache(au_dinfo);
15714 +       if (au_cachep[AuCache_DINFO])
15715 +               au_cachep[AuCache_ICNTNR] = AuCache(au_icntnr);
15716 +       if (au_cachep[AuCache_ICNTNR])
15717 +               au_cachep[AuCache_FINFO] = AuCache(au_finfo);
15718 +       if (au_cachep[AuCache_FINFO])
15719 +               au_cachep[AuCache_VDIR] = AuCache(au_vdir);
15720 +       if (au_cachep[AuCache_VDIR])
15721 +               au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
15722 +       if (au_cachep[AuCache_DEHSTR])
15723 +               return 0;
15724 +
15725 +       return -ENOMEM;
15726 +}
15727 +
15728 +static void au_cache_fin(void)
15729 +{
15730 +       int i;
15731 +       for (i = 0; i < AuCache_Last; i++)
15732 +               if (au_cachep[i]) {
15733 +                       kmem_cache_destroy(au_cachep[i]);
15734 +                       au_cachep[i] = NULL;
15735 +               }
15736 +}
15737 +
15738 +/* ---------------------------------------------------------------------- */
15739 +
15740 +int au_dir_roflags;
15741 +
15742 +/*
15743 + * functions for module interface.
15744 + */
15745 +MODULE_LICENSE("GPL");
15746 +/* MODULE_LICENSE("GPL v2"); */
15747 +MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
15748 +MODULE_DESCRIPTION(AUFS_NAME
15749 +       " -- Advanced multi layered unification filesystem");
15750 +MODULE_VERSION(AUFS_VERSION);
15751 +
15752 +/* it should be 'byte', but param_set_byte() prints it by "%c" */
15753 +short aufs_nwkq = AUFS_NWKQ_DEF;
15754 +MODULE_PARM_DESC(nwkq, "the number of workqueue thread, " AUFS_WKQ_NAME);
15755 +module_param_named(nwkq, aufs_nwkq, short, S_IRUGO);
15756 +
15757 +/* this module parameter has no meaning when SYSFS is disabled */
15758 +int sysaufs_brs = 1;
15759 +MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
15760 +module_param_named(brs, sysaufs_brs, int, S_IRUGO);
15761 +
15762 +/* ---------------------------------------------------------------------- */
15763 +
15764 +static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
15765 +
15766 +int au_seq_path(struct seq_file *seq, struct path *path)
15767 +{
15768 +       return seq_path(seq, path, au_esc_chars);
15769 +}
15770 +
15771 +/* ---------------------------------------------------------------------- */
15772 +
15773 +static int __init aufs_init(void)
15774 +{
15775 +       int err, i;
15776 +       char *p;
15777 +
15778 +       p = au_esc_chars;
15779 +       for (i = 1; i <= ' '; i++)
15780 +               *p++ = i;
15781 +       *p++ = '\\';
15782 +       *p++ = '\x7f';
15783 +       *p = 0;
15784 +
15785 +       au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
15786 +
15787 +       sysaufs_brs_init();
15788 +       au_debug_init();
15789 +
15790 +       err = -EINVAL;
15791 +       if (unlikely(aufs_nwkq <= 0))
15792 +               goto out;
15793 +
15794 +       err = sysaufs_init();
15795 +       if (unlikely(err))
15796 +               goto out;
15797 +       err = au_wkq_init();
15798 +       if (unlikely(err))
15799 +               goto out_sysaufs;
15800 +       err = au_hinotify_init();
15801 +       if (unlikely(err))
15802 +               goto out_wkq;
15803 +       err = au_sysrq_init();
15804 +       if (unlikely(err))
15805 +               goto out_hin;
15806 +       err = au_cache_init();
15807 +       if (unlikely(err))
15808 +               goto out_sysrq;
15809 +       err = register_filesystem(&aufs_fs_type);
15810 +       if (unlikely(err))
15811 +               goto out_cache;
15812 +       pr_info(AUFS_NAME " " AUFS_VERSION "\n");
15813 +       goto out; /* success */
15814 +
15815 + out_cache:
15816 +       au_cache_fin();
15817 + out_sysrq:
15818 +       au_sysrq_fin();
15819 + out_hin:
15820 +       au_hinotify_fin();
15821 + out_wkq:
15822 +       au_wkq_fin();
15823 + out_sysaufs:
15824 +       sysaufs_fin();
15825 + out:
15826 +       return err;
15827 +}
15828 +
15829 +static void __exit aufs_exit(void)
15830 +{
15831 +       unregister_filesystem(&aufs_fs_type);
15832 +       au_cache_fin();
15833 +       au_sysrq_fin();
15834 +       au_hinotify_fin();
15835 +       au_wkq_fin();
15836 +       sysaufs_fin();
15837 +}
15838 +
15839 +module_init(aufs_init);
15840 +module_exit(aufs_exit);
15841 diff -uprN -x .git linux-2.6.31/fs/aufs/module.h aufs2-2.6.git/fs/aufs/module.h
15842 --- linux-2.6.31/fs/aufs/module.h       1970-01-01 00:00:00.000000000 +0000
15843 +++ aufs2-2.6.git/fs/aufs/module.h      2009-09-21 21:49:23.404940801 +0000
15844 @@ -0,0 +1,78 @@
15845 +/*
15846 + * Copyright (C) 2005-2009 Junjiro R. Okajima
15847 + *
15848 + * This program, aufs is free software; you can redistribute it and/or modify
15849 + * it under the terms of the GNU General Public License as published by
15850 + * the Free Software Foundation; either version 2 of the License, or
15851 + * (at your option) any later version.
15852 + *
15853 + * This program is distributed in the hope that it will be useful,
15854 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15855 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15856 + * GNU General Public License for more details.
15857 + *
15858 + * You should have received a copy of the GNU General Public License
15859 + * along with this program; if not, write to the Free Software
15860 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
15861 + */
15862 +
15863 +/*
15864 + * module initialization and module-global
15865 + */
15866 +
15867 +#ifndef __AUFS_MODULE_H__
15868 +#define __AUFS_MODULE_H__
15869 +
15870 +#ifdef __KERNEL__
15871 +
15872 +#include <linux/slab.h>
15873 +
15874 +struct path;
15875 +struct seq_file;
15876 +
15877 +/* module parameters */
15878 +extern short aufs_nwkq;
15879 +extern int sysaufs_brs;
15880 +
15881 +/* ---------------------------------------------------------------------- */
15882 +
15883 +extern int au_dir_roflags;
15884 +
15885 +void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
15886 +int au_seq_path(struct seq_file *seq, struct path *path);
15887 +
15888 +/* ---------------------------------------------------------------------- */
15889 +
15890 +/* kmem cache */
15891 +enum {
15892 +       AuCache_DINFO,
15893 +       AuCache_ICNTNR,
15894 +       AuCache_FINFO,
15895 +       AuCache_VDIR,
15896 +       AuCache_DEHSTR,
15897 +#ifdef CONFIG_AUFS_HINOTIFY
15898 +       AuCache_HINOTIFY,
15899 +#endif
15900 +       AuCache_Last
15901 +};
15902 +
15903 +#define AuCache(type)  KMEM_CACHE(type, SLAB_RECLAIM_ACCOUNT)
15904 +
15905 +extern struct kmem_cache *au_cachep[];
15906 +
15907 +#define AuCacheFuncs(name, index) \
15908 +static inline void *au_cache_alloc_##name(void) \
15909 +{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
15910 +static inline void au_cache_free_##name(void *p) \
15911 +{ kmem_cache_free(au_cachep[AuCache_##index], p); }
15912 +
15913 +AuCacheFuncs(dinfo, DINFO);
15914 +AuCacheFuncs(icntnr, ICNTNR);
15915 +AuCacheFuncs(finfo, FINFO);
15916 +AuCacheFuncs(vdir, VDIR);
15917 +AuCacheFuncs(dehstr, DEHSTR);
15918 +
15919 +/*  ---------------------------------------------------------------------- */
15920 +
15921 +#endif /* __KERNEL__ */
15922 +#endif /* __AUFS_MODULE_H__ */
15923 diff -uprN -x .git linux-2.6.31/fs/aufs/opts.c aufs2-2.6.git/fs/aufs/opts.c
15924 --- linux-2.6.31/fs/aufs/opts.c 1970-01-01 00:00:00.000000000 +0000
15925 +++ aufs2-2.6.git/fs/aufs/opts.c        2009-09-21 21:49:23.404940801 +0000
15926 @@ -0,0 +1,1546 @@
15927 +/*
15928 + * Copyright (C) 2005-2009 Junjiro R. Okajima
15929 + *
15930 + * This program, aufs is free software; you can redistribute it and/or modify
15931 + * it under the terms of the GNU General Public License as published by
15932 + * the Free Software Foundation; either version 2 of the License, or
15933 + * (at your option) any later version.
15934 + *
15935 + * This program is distributed in the hope that it will be useful,
15936 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15937 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15938 + * GNU General Public License for more details.
15939 + *
15940 + * You should have received a copy of the GNU General Public License
15941 + * along with this program; if not, write to the Free Software
15942 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
15943 + */
15944 +
15945 +/*
15946 + * mount options/flags
15947 + */
15948 +
15949 +#include <linux/file.h>
15950 +#include <linux/namei.h>
15951 +#include <linux/types.h> /* a distribution requires */
15952 +#include <linux/parser.h>
15953 +#include "aufs.h"
15954 +
15955 +/* ---------------------------------------------------------------------- */
15956 +
15957 +enum {
15958 +       Opt_br,
15959 +       Opt_add, Opt_del, Opt_mod, Opt_reorder, Opt_append, Opt_prepend,
15960 +       Opt_idel, Opt_imod, Opt_ireorder,
15961 +       Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash, Opt_rendir,
15962 +       Opt_rdblk_def, Opt_rdhash_def,
15963 +       Opt_xino, Opt_zxino, Opt_noxino,
15964 +       Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
15965 +       Opt_trunc_xino_path, Opt_itrunc_xino,
15966 +       Opt_trunc_xib, Opt_notrunc_xib,
15967 +       Opt_shwh, Opt_noshwh,
15968 +       Opt_plink, Opt_noplink, Opt_list_plink,
15969 +       Opt_udba,
15970 +       /* Opt_lock, Opt_unlock, */
15971 +       Opt_cmd, Opt_cmd_args,
15972 +       Opt_diropq_a, Opt_diropq_w,
15973 +       Opt_warn_perm, Opt_nowarn_perm,
15974 +       Opt_wbr_copyup, Opt_wbr_create,
15975 +       Opt_refrof, Opt_norefrof,
15976 +       Opt_verbose, Opt_noverbose,
15977 +       Opt_sum, Opt_nosum, Opt_wsum,
15978 +       Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
15979 +};
15980 +
15981 +static match_table_t options = {
15982 +       {Opt_br, "br=%s"},
15983 +       {Opt_br, "br:%s"},
15984 +
15985 +       {Opt_add, "add=%d:%s"},
15986 +       {Opt_add, "add:%d:%s"},
15987 +       {Opt_add, "ins=%d:%s"},
15988 +       {Opt_add, "ins:%d:%s"},
15989 +       {Opt_append, "append=%s"},
15990 +       {Opt_append, "append:%s"},
15991 +       {Opt_prepend, "prepend=%s"},
15992 +       {Opt_prepend, "prepend:%s"},
15993 +
15994 +       {Opt_del, "del=%s"},
15995 +       {Opt_del, "del:%s"},
15996 +       /* {Opt_idel, "idel:%d"}, */
15997 +       {Opt_mod, "mod=%s"},
15998 +       {Opt_mod, "mod:%s"},
15999 +       /* {Opt_imod, "imod:%d:%s"}, */
16000 +
16001 +       {Opt_dirwh, "dirwh=%d"},
16002 +
16003 +       {Opt_xino, "xino=%s"},
16004 +       {Opt_noxino, "noxino"},
16005 +       {Opt_trunc_xino, "trunc_xino"},
16006 +       {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
16007 +       {Opt_notrunc_xino, "notrunc_xino"},
16008 +       {Opt_trunc_xino_path, "trunc_xino=%s"},
16009 +       {Opt_itrunc_xino, "itrunc_xino=%d"},
16010 +       /* {Opt_zxino, "zxino=%s"}, */
16011 +       {Opt_trunc_xib, "trunc_xib"},
16012 +       {Opt_notrunc_xib, "notrunc_xib"},
16013 +
16014 +       {Opt_plink, "plink"},
16015 +       {Opt_noplink, "noplink"},
16016 +#ifdef CONFIG_AUFS_DEBUG
16017 +       {Opt_list_plink, "list_plink"},
16018 +#endif
16019 +
16020 +       {Opt_udba, "udba=%s"},
16021 +
16022 +       {Opt_diropq_a, "diropq=always"},
16023 +       {Opt_diropq_a, "diropq=a"},
16024 +       {Opt_diropq_w, "diropq=whiteouted"},
16025 +       {Opt_diropq_w, "diropq=w"},
16026 +
16027 +       {Opt_warn_perm, "warn_perm"},
16028 +       {Opt_nowarn_perm, "nowarn_perm"},
16029 +
16030 +       /* keep them temporary */
16031 +       {Opt_ignore_silent, "coo=%s"},
16032 +       {Opt_ignore_silent, "nodlgt"},
16033 +       {Opt_ignore_silent, "nodirperm1"},
16034 +       {Opt_ignore_silent, "clean_plink"},
16035 +
16036 +#ifdef CONFIG_AUFS_SHWH
16037 +       {Opt_shwh, "shwh"},
16038 +#endif
16039 +       {Opt_noshwh, "noshwh"},
16040 +
16041 +       {Opt_rendir, "rendir=%d"},
16042 +
16043 +       {Opt_refrof, "refrof"},
16044 +       {Opt_norefrof, "norefrof"},
16045 +
16046 +       {Opt_verbose, "verbose"},
16047 +       {Opt_verbose, "v"},
16048 +       {Opt_noverbose, "noverbose"},
16049 +       {Opt_noverbose, "quiet"},
16050 +       {Opt_noverbose, "q"},
16051 +       {Opt_noverbose, "silent"},
16052 +
16053 +       {Opt_sum, "sum"},
16054 +       {Opt_nosum, "nosum"},
16055 +       {Opt_wsum, "wsum"},
16056 +
16057 +       {Opt_rdcache, "rdcache=%d"},
16058 +       {Opt_rdblk, "rdblk=%d"},
16059 +       {Opt_rdblk_def, "rdblk=def"},
16060 +       {Opt_rdhash, "rdhash=%d"},
16061 +       {Opt_rdhash_def, "rdhash=def"},
16062 +
16063 +       {Opt_wbr_create, "create=%s"},
16064 +       {Opt_wbr_create, "create_policy=%s"},
16065 +       {Opt_wbr_copyup, "cpup=%s"},
16066 +       {Opt_wbr_copyup, "copyup=%s"},
16067 +       {Opt_wbr_copyup, "copyup_policy=%s"},
16068 +
16069 +       /* internal use for the scripts */
16070 +       {Opt_ignore_silent, "si=%s"},
16071 +
16072 +       {Opt_br, "dirs=%s"},
16073 +       {Opt_ignore, "debug=%d"},
16074 +       {Opt_ignore, "delete=whiteout"},
16075 +       {Opt_ignore, "delete=all"},
16076 +       {Opt_ignore, "imap=%s"},
16077 +
16078 +       /* temporary workaround, due to old mount(8)? */
16079 +       {Opt_ignore_silent, "relatime"},
16080 +
16081 +       {Opt_err, NULL}
16082 +};
16083 +
16084 +/* ---------------------------------------------------------------------- */
16085 +
16086 +static const char *au_parser_pattern(int val, struct match_token *token)
16087 +{
16088 +       while (token->pattern) {
16089 +               if (token->token == val)
16090 +                       return token->pattern;
16091 +               token++;
16092 +       }
16093 +       BUG();
16094 +       return "??";
16095 +}
16096 +
16097 +/* ---------------------------------------------------------------------- */
16098 +
16099 +static match_table_t brperms = {
16100 +       {AuBrPerm_RO, AUFS_BRPERM_RO},
16101 +       {AuBrPerm_RR, AUFS_BRPERM_RR},
16102 +       {AuBrPerm_RW, AUFS_BRPERM_RW},
16103 +
16104 +       {AuBrPerm_ROWH, AUFS_BRPERM_ROWH},
16105 +       {AuBrPerm_RRWH, AUFS_BRPERM_RRWH},
16106 +       {AuBrPerm_RWNoLinkWH, AUFS_BRPERM_RWNLWH},
16107 +
16108 +       {AuBrPerm_ROWH, "nfsro"},
16109 +       {AuBrPerm_RO, NULL}
16110 +};
16111 +
16112 +static int br_perm_val(char *perm)
16113 +{
16114 +       int val;
16115 +       substring_t args[MAX_OPT_ARGS];
16116 +
16117 +       val = match_token(perm, brperms, args);
16118 +       return val;
16119 +}
16120 +
16121 +const char *au_optstr_br_perm(int brperm)
16122 +{
16123 +       return au_parser_pattern(brperm, (void *)brperms);
16124 +}
16125 +
16126 +/* ---------------------------------------------------------------------- */
16127 +
16128 +static match_table_t udbalevel = {
16129 +       {AuOpt_UDBA_REVAL, "reval"},
16130 +       {AuOpt_UDBA_NONE, "none"},
16131 +#ifdef CONFIG_AUFS_HINOTIFY
16132 +       {AuOpt_UDBA_HINOTIFY, "inotify"},
16133 +#endif
16134 +       {-1, NULL}
16135 +};
16136 +
16137 +static int udba_val(char *str)
16138 +{
16139 +       substring_t args[MAX_OPT_ARGS];
16140 +
16141 +       return match_token(str, udbalevel, args);
16142 +}
16143 +
16144 +const char *au_optstr_udba(int udba)
16145 +{
16146 +       return au_parser_pattern(udba, (void *)udbalevel);
16147 +}
16148 +
16149 +/* ---------------------------------------------------------------------- */
16150 +
16151 +static match_table_t au_wbr_create_policy = {
16152 +       {AuWbrCreate_TDP, "tdp"},
16153 +       {AuWbrCreate_TDP, "top-down-parent"},
16154 +       {AuWbrCreate_RR, "rr"},
16155 +       {AuWbrCreate_RR, "round-robin"},
16156 +       {AuWbrCreate_MFS, "mfs"},
16157 +       {AuWbrCreate_MFS, "most-free-space"},
16158 +       {AuWbrCreate_MFSV, "mfs:%d"},
16159 +       {AuWbrCreate_MFSV, "most-free-space:%d"},
16160 +
16161 +       {AuWbrCreate_MFSRR, "mfsrr:%d"},
16162 +       {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
16163 +       {AuWbrCreate_PMFS, "pmfs"},
16164 +       {AuWbrCreate_PMFSV, "pmfs:%d"},
16165 +
16166 +       {-1, NULL}
16167 +};
16168 +
16169 +/*
16170 + * cf. linux/lib/parser.c and cmdline.c
16171 + * gave up calling memparse() since it uses simple_strtoull() instead of
16172 + * strict_...().
16173 + */
16174 +static int au_match_ull(substring_t *s, unsigned long long *result)
16175 +{
16176 +       int err;
16177 +       unsigned int len;
16178 +       char a[32];
16179 +
16180 +       err = -ERANGE;
16181 +       len = s->to - s->from;
16182 +       if (len + 1 <= sizeof(a)) {
16183 +               memcpy(a, s->from, len);
16184 +               a[len] = '\0';
16185 +               err = strict_strtoull(a, 0, result);
16186 +       }
16187 +       return err;
16188 +}
16189 +
16190 +static int au_wbr_mfs_wmark(substring_t *arg, char *str,
16191 +                           struct au_opt_wbr_create *create)
16192 +{
16193 +       int err;
16194 +       unsigned long long ull;
16195 +
16196 +       err = 0;
16197 +       if (!au_match_ull(arg, &ull))
16198 +               create->mfsrr_watermark = ull;
16199 +       else {
16200 +               AuErr("bad integer in %s\n", str);
16201 +               err = -EINVAL;
16202 +       }
16203 +
16204 +       return err;
16205 +}
16206 +
16207 +static int au_wbr_mfs_sec(substring_t *arg, char *str,
16208 +                         struct au_opt_wbr_create *create)
16209 +{
16210 +       int n, err;
16211 +
16212 +       err = 0;
16213 +       if (!match_int(arg, &n) && 0 <= n)
16214 +               create->mfs_second = n;
16215 +       else {
16216 +               AuErr("bad integer in %s\n", str);
16217 +               err = -EINVAL;
16218 +       }
16219 +
16220 +       return err;
16221 +}
16222 +
16223 +static int au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
16224 +{
16225 +       int err, e;
16226 +       substring_t args[MAX_OPT_ARGS];
16227 +
16228 +       err = match_token(str, au_wbr_create_policy, args);
16229 +       create->wbr_create = err;
16230 +       switch (err) {
16231 +       case AuWbrCreate_MFSRRV:
16232 +               e = au_wbr_mfs_wmark(&args[0], str, create);
16233 +               if (!e)
16234 +                       e = au_wbr_mfs_sec(&args[1], str, create);
16235 +               if (unlikely(e))
16236 +                       err = e;
16237 +               break;
16238 +       case AuWbrCreate_MFSRR:
16239 +               e = au_wbr_mfs_wmark(&args[0], str, create);
16240 +               if (unlikely(e)) {
16241 +                       err = e;
16242 +                       break;
16243 +               }
16244 +               /*FALLTHROUGH*/
16245 +       case AuWbrCreate_MFS:
16246 +       case AuWbrCreate_PMFS:
16247 +               create->mfs_second = AUFS_MFS_SECOND_DEF;
16248 +               break;
16249 +       case AuWbrCreate_MFSV:
16250 +       case AuWbrCreate_PMFSV:
16251 +               e = au_wbr_mfs_sec(&args[0], str, create);
16252 +               if (unlikely(e))
16253 +                       err = e;
16254 +               break;
16255 +       }
16256 +
16257 +       return err;
16258 +}
16259 +
16260 +const char *au_optstr_wbr_create(int wbr_create)
16261 +{
16262 +       return au_parser_pattern(wbr_create, (void *)au_wbr_create_policy);
16263 +}
16264 +
16265 +static match_table_t au_wbr_copyup_policy = {
16266 +       {AuWbrCopyup_TDP, "tdp"},
16267 +       {AuWbrCopyup_TDP, "top-down-parent"},
16268 +       {AuWbrCopyup_BUP, "bup"},
16269 +       {AuWbrCopyup_BUP, "bottom-up-parent"},
16270 +       {AuWbrCopyup_BU, "bu"},
16271 +       {AuWbrCopyup_BU, "bottom-up"},
16272 +       {-1, NULL}
16273 +};
16274 +
16275 +static int au_wbr_copyup_val(char *str)
16276 +{
16277 +       substring_t args[MAX_OPT_ARGS];
16278 +
16279 +       return match_token(str, au_wbr_copyup_policy, args);
16280 +}
16281 +
16282 +const char *au_optstr_wbr_copyup(int wbr_copyup)
16283 +{
16284 +       return au_parser_pattern(wbr_copyup, (void *)au_wbr_copyup_policy);
16285 +}
16286 +
16287 +/* ---------------------------------------------------------------------- */
16288 +
16289 +static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
16290 +
16291 +static void dump_opts(struct au_opts *opts)
16292 +{
16293 +#ifdef CONFIG_AUFS_DEBUG
16294 +       /* reduce stack space */
16295 +       union {
16296 +               struct au_opt_add *add;
16297 +               struct au_opt_del *del;
16298 +               struct au_opt_mod *mod;
16299 +               struct au_opt_xino *xino;
16300 +               struct au_opt_xino_itrunc *xino_itrunc;
16301 +               struct au_opt_wbr_create *create;
16302 +       } u;
16303 +       struct au_opt *opt;
16304 +
16305 +       opt = opts->opt;
16306 +       while (opt->type != Opt_tail) {
16307 +               switch (opt->type) {
16308 +               case Opt_add:
16309 +                       u.add = &opt->add;
16310 +                       AuDbg("add {b%d, %s, 0x%x, %p}\n",
16311 +                                 u.add->bindex, u.add->pathname, u.add->perm,
16312 +                                 u.add->path.dentry);
16313 +                       break;
16314 +               case Opt_del:
16315 +               case Opt_idel:
16316 +                       u.del = &opt->del;
16317 +                       AuDbg("del {%s, %p}\n",
16318 +                             u.del->pathname, u.del->h_path.dentry);
16319 +                       break;
16320 +               case Opt_mod:
16321 +               case Opt_imod:
16322 +                       u.mod = &opt->mod;
16323 +                       AuDbg("mod {%s, 0x%x, %p}\n",
16324 +                                 u.mod->path, u.mod->perm, u.mod->h_root);
16325 +                       break;
16326 +               case Opt_append:
16327 +                       u.add = &opt->add;
16328 +                       AuDbg("append {b%d, %s, 0x%x, %p}\n",
16329 +                                 u.add->bindex, u.add->pathname, u.add->perm,
16330 +                                 u.add->path.dentry);
16331 +                       break;
16332 +               case Opt_prepend:
16333 +                       u.add = &opt->add;
16334 +                       AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
16335 +                                 u.add->bindex, u.add->pathname, u.add->perm,
16336 +                                 u.add->path.dentry);
16337 +                       break;
16338 +               case Opt_dirwh:
16339 +                       AuDbg("dirwh %d\n", opt->dirwh);
16340 +                       break;
16341 +               case Opt_rdcache:
16342 +                       AuDbg("rdcache %d\n", opt->rdcache);
16343 +                       break;
16344 +               case Opt_rdblk:
16345 +                       AuDbg("rdblk %u\n", opt->rdblk);
16346 +                       break;
16347 +               case Opt_rdblk_def:
16348 +                       AuDbg("rdblk_def\n");
16349 +                       break;
16350 +               case Opt_rdhash:
16351 +                       AuDbg("rdhash %u\n", opt->rdhash);
16352 +                       break;
16353 +               case Opt_rdhash_def:
16354 +                       AuDbg("rdhash_def\n");
16355 +                       break;
16356 +               case Opt_xino:
16357 +                       u.xino = &opt->xino;
16358 +                       AuDbg("xino {%s %.*s}\n",
16359 +                                 u.xino->path,
16360 +                                 AuDLNPair(u.xino->file->f_dentry));
16361 +                       break;
16362 +               case Opt_trunc_xino:
16363 +                       AuLabel(trunc_xino);
16364 +                       break;
16365 +               case Opt_notrunc_xino:
16366 +                       AuLabel(notrunc_xino);
16367 +                       break;
16368 +               case Opt_trunc_xino_path:
16369 +               case Opt_itrunc_xino:
16370 +                       u.xino_itrunc = &opt->xino_itrunc;
16371 +                       AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
16372 +                       break;
16373 +
16374 +               case Opt_noxino:
16375 +                       AuLabel(noxino);
16376 +                       break;
16377 +               case Opt_trunc_xib:
16378 +                       AuLabel(trunc_xib);
16379 +                       break;
16380 +               case Opt_notrunc_xib:
16381 +                       AuLabel(notrunc_xib);
16382 +                       break;
16383 +               case Opt_shwh:
16384 +                       AuLabel(shwh);
16385 +                       break;
16386 +               case Opt_noshwh:
16387 +                       AuLabel(noshwh);
16388 +                       break;
16389 +               case Opt_plink:
16390 +                       AuLabel(plink);
16391 +                       break;
16392 +               case Opt_noplink:
16393 +                       AuLabel(noplink);
16394 +                       break;
16395 +               case Opt_list_plink:
16396 +                       AuLabel(list_plink);
16397 +                       break;
16398 +               case Opt_udba:
16399 +                       AuDbg("udba %d, %s\n",
16400 +                                 opt->udba, au_optstr_udba(opt->udba));
16401 +                       break;
16402 +               case Opt_diropq_a:
16403 +                       AuLabel(diropq_a);
16404 +                       break;
16405 +               case Opt_diropq_w:
16406 +                       AuLabel(diropq_w);
16407 +                       break;
16408 +               case Opt_warn_perm:
16409 +                       AuLabel(warn_perm);
16410 +                       break;
16411 +               case Opt_nowarn_perm:
16412 +                       AuLabel(nowarn_perm);
16413 +                       break;
16414 +               case Opt_refrof:
16415 +                       AuLabel(refrof);
16416 +                       break;
16417 +               case Opt_norefrof:
16418 +                       AuLabel(norefrof);
16419 +                       break;
16420 +               case Opt_verbose:
16421 +                       AuLabel(verbose);
16422 +                       break;
16423 +               case Opt_noverbose:
16424 +                       AuLabel(noverbose);
16425 +                       break;
16426 +               case Opt_sum:
16427 +                       AuLabel(sum);
16428 +                       break;
16429 +               case Opt_nosum:
16430 +                       AuLabel(nosum);
16431 +                       break;
16432 +               case Opt_wsum:
16433 +                       AuLabel(wsum);
16434 +                       break;
16435 +               case Opt_wbr_create:
16436 +                       u.create = &opt->wbr_create;
16437 +                       AuDbg("create %d, %s\n", u.create->wbr_create,
16438 +                                 au_optstr_wbr_create(u.create->wbr_create));
16439 +                       switch (u.create->wbr_create) {
16440 +                       case AuWbrCreate_MFSV:
16441 +                       case AuWbrCreate_PMFSV:
16442 +                               AuDbg("%d sec\n", u.create->mfs_second);
16443 +                               break;
16444 +                       case AuWbrCreate_MFSRR:
16445 +                               AuDbg("%llu watermark\n",
16446 +                                         u.create->mfsrr_watermark);
16447 +                               break;
16448 +                       case AuWbrCreate_MFSRRV:
16449 +                               AuDbg("%llu watermark, %d sec\n",
16450 +                                         u.create->mfsrr_watermark,
16451 +                                         u.create->mfs_second);
16452 +                               break;
16453 +                       }
16454 +                       break;
16455 +               case Opt_wbr_copyup:
16456 +                       AuDbg("copyup %d, %s\n", opt->wbr_copyup,
16457 +                                 au_optstr_wbr_copyup(opt->wbr_copyup));
16458 +                       break;
16459 +               default:
16460 +                       BUG();
16461 +               }
16462 +               opt++;
16463 +       }
16464 +#endif
16465 +}
16466 +
16467 +void au_opts_free(struct au_opts *opts)
16468 +{
16469 +       struct au_opt *opt;
16470 +
16471 +       opt = opts->opt;
16472 +       while (opt->type != Opt_tail) {
16473 +               switch (opt->type) {
16474 +               case Opt_add:
16475 +               case Opt_append:
16476 +               case Opt_prepend:
16477 +                       path_put(&opt->add.path);
16478 +                       break;
16479 +               case Opt_del:
16480 +               case Opt_idel:
16481 +                       path_put(&opt->del.h_path);
16482 +                       break;
16483 +               case Opt_mod:
16484 +               case Opt_imod:
16485 +                       dput(opt->mod.h_root);
16486 +                       break;
16487 +               case Opt_xino:
16488 +                       fput(opt->xino.file);
16489 +                       break;
16490 +               }
16491 +               opt++;
16492 +       }
16493 +}
16494 +
16495 +static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
16496 +                  aufs_bindex_t bindex)
16497 +{
16498 +       int err;
16499 +       struct au_opt_add *add = &opt->add;
16500 +       char *p;
16501 +
16502 +       add->bindex = bindex;
16503 +       add->perm = AuBrPerm_Last;
16504 +       add->pathname = opt_str;
16505 +       p = strchr(opt_str, '=');
16506 +       if (p) {
16507 +               *p++ = 0;
16508 +               if (*p)
16509 +                       add->perm = br_perm_val(p);
16510 +       }
16511 +
16512 +       err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
16513 +       if (!err) {
16514 +               if (!p) {
16515 +                       add->perm = AuBrPerm_RO;
16516 +                       if (au_test_fs_rr(add->path.dentry->d_sb))
16517 +                               add->perm = AuBrPerm_RR;
16518 +                       else if (!bindex && !(sb_flags & MS_RDONLY))
16519 +                               add->perm = AuBrPerm_RW;
16520 +               }
16521 +               opt->type = Opt_add;
16522 +               goto out;
16523 +       }
16524 +       AuErr("lookup failed %s (%d)\n", add->pathname, err);
16525 +       err = -EINVAL;
16526 +
16527 + out:
16528 +       return err;
16529 +}
16530 +
16531 +static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
16532 +{
16533 +       int err;
16534 +
16535 +       del->pathname = args[0].from;
16536 +       AuDbg("del path %s\n", del->pathname);
16537 +
16538 +       err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
16539 +       if (unlikely(err))
16540 +               AuErr("lookup failed %s (%d)\n", del->pathname, err);
16541 +
16542 +       return err;
16543 +}
16544 +
16545 +#if 0 /* reserved for future use */
16546 +static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
16547 +                             struct au_opt_del *del, substring_t args[])
16548 +{
16549 +       int err;
16550 +       struct dentry *root;
16551 +
16552 +       err = -EINVAL;
16553 +       root = sb->s_root;
16554 +       aufs_read_lock(root, AuLock_FLUSH);
16555 +       if (bindex < 0 || au_sbend(sb) < bindex) {
16556 +               AuErr("out of bounds, %d\n", bindex);
16557 +               goto out;
16558 +       }
16559 +
16560 +       err = 0;
16561 +       del->h_path.dentry = dget(au_h_dptr(root, bindex));
16562 +       del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
16563 +
16564 + out:
16565 +       aufs_read_unlock(root, !AuLock_IR);
16566 +       return err;
16567 +}
16568 +#endif
16569 +
16570 +static int au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
16571 +{
16572 +       int err;
16573 +       struct path path;
16574 +       char *p;
16575 +
16576 +       err = -EINVAL;
16577 +       mod->path = args[0].from;
16578 +       p = strchr(mod->path, '=');
16579 +       if (unlikely(!p)) {
16580 +               AuErr("no permssion %s\n", args[0].from);
16581 +               goto out;
16582 +       }
16583 +
16584 +       *p++ = 0;
16585 +       err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
16586 +       if (unlikely(err)) {
16587 +               AuErr("lookup failed %s (%d)\n", mod->path, err);
16588 +               goto out;
16589 +       }
16590 +
16591 +       mod->perm = br_perm_val(p);
16592 +       AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
16593 +       mod->h_root = dget(path.dentry);
16594 +       path_put(&path);
16595 +
16596 + out:
16597 +       return err;
16598 +}
16599 +
16600 +#if 0 /* reserved for future use */
16601 +static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
16602 +                             struct au_opt_mod *mod, substring_t args[])
16603 +{
16604 +       int err;
16605 +       struct dentry *root;
16606 +
16607 +       err = -EINVAL;
16608 +       root = sb->s_root;
16609 +       aufs_read_lock(root, AuLock_FLUSH);
16610 +       if (bindex < 0 || au_sbend(sb) < bindex) {
16611 +               AuErr("out of bounds, %d\n", bindex);
16612 +               goto out;
16613 +       }
16614 +
16615 +       err = 0;
16616 +       mod->perm = br_perm_val(args[1].from);
16617 +       AuDbg("mod path %s, perm 0x%x, %s\n",
16618 +             mod->path, mod->perm, args[1].from);
16619 +       mod->h_root = dget(au_h_dptr(root, bindex));
16620 +
16621 + out:
16622 +       aufs_read_unlock(root, !AuLock_IR);
16623 +       return err;
16624 +}
16625 +#endif
16626 +
16627 +static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
16628 +                             substring_t args[])
16629 +{
16630 +       int err;
16631 +       struct file *file;
16632 +
16633 +       file = au_xino_create(sb, args[0].from, /*silent*/0);
16634 +       err = PTR_ERR(file);
16635 +       if (IS_ERR(file))
16636 +               goto out;
16637 +
16638 +       err = -EINVAL;
16639 +       if (unlikely(file->f_dentry->d_sb == sb)) {
16640 +               fput(file);
16641 +               AuErr("%s must be outside\n", args[0].from);
16642 +               goto out;
16643 +       }
16644 +
16645 +       err = 0;
16646 +       xino->file = file;
16647 +       xino->path = args[0].from;
16648 +
16649 + out:
16650 +       return err;
16651 +}
16652 +
16653 +static
16654 +int au_opts_parse_xino_itrunc_path(struct super_block *sb,
16655 +                                  struct au_opt_xino_itrunc *xino_itrunc,
16656 +                                  substring_t args[])
16657 +{
16658 +       int err;
16659 +       aufs_bindex_t bend, bindex;
16660 +       struct path path;
16661 +       struct dentry *root;
16662 +
16663 +       err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
16664 +       if (unlikely(err)) {
16665 +               AuErr("lookup failed %s (%d)\n", args[0].from, err);
16666 +               goto out;
16667 +       }
16668 +
16669 +       xino_itrunc->bindex = -1;
16670 +       root = sb->s_root;
16671 +       aufs_read_lock(root, AuLock_FLUSH);
16672 +       bend = au_sbend(sb);
16673 +       for (bindex = 0; bindex <= bend; bindex++) {
16674 +               if (au_h_dptr(root, bindex) == path.dentry) {
16675 +                       xino_itrunc->bindex = bindex;
16676 +                       break;
16677 +               }
16678 +       }
16679 +       aufs_read_unlock(root, !AuLock_IR);
16680 +       path_put(&path);
16681 +
16682 +       if (unlikely(xino_itrunc->bindex < 0)) {
16683 +               AuErr("no such branch %s\n", args[0].from);
16684 +               err = -EINVAL;
16685 +       }
16686 +
16687 + out:
16688 +       return err;
16689 +}
16690 +
16691 +/* called without aufs lock */
16692 +int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
16693 +{
16694 +       int err, n, token;
16695 +       aufs_bindex_t bindex;
16696 +       unsigned char skipped;
16697 +       struct dentry *root;
16698 +       struct au_opt *opt, *opt_tail;
16699 +       char *opt_str;
16700 +       /* reduce the stack space */
16701 +       union {
16702 +               struct au_opt_xino_itrunc *xino_itrunc;
16703 +               struct au_opt_wbr_create *create;
16704 +       } u;
16705 +       struct {
16706 +               substring_t args[MAX_OPT_ARGS];
16707 +       } *a;
16708 +
16709 +       err = -ENOMEM;
16710 +       a = kmalloc(sizeof(*a), GFP_NOFS);
16711 +       if (unlikely(!a))
16712 +               goto out;
16713 +
16714 +       root = sb->s_root;
16715 +       err = 0;
16716 +       bindex = 0;
16717 +       opt = opts->opt;
16718 +       opt_tail = opt + opts->max_opt - 1;
16719 +       opt->type = Opt_tail;
16720 +       while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
16721 +               err = -EINVAL;
16722 +               skipped = 0;
16723 +               token = match_token(opt_str, options, a->args);
16724 +               switch (token) {
16725 +               case Opt_br:
16726 +                       err = 0;
16727 +                       while (!err && (opt_str = strsep(&a->args[0].from, ":"))
16728 +                              && *opt_str) {
16729 +                               err = opt_add(opt, opt_str, opts->sb_flags,
16730 +                                             bindex++);
16731 +                               if (unlikely(!err && ++opt > opt_tail)) {
16732 +                                       err = -E2BIG;
16733 +                                       break;
16734 +                               }
16735 +                               opt->type = Opt_tail;
16736 +                               skipped = 1;
16737 +                       }
16738 +                       break;
16739 +               case Opt_add:
16740 +                       if (unlikely(match_int(&a->args[0], &n))) {
16741 +                               AuErr("bad integer in %s\n", opt_str);
16742 +                               break;
16743 +                       }
16744 +                       bindex = n;
16745 +                       err = opt_add(opt, a->args[1].from, opts->sb_flags,
16746 +                                     bindex);
16747 +                       if (!err)
16748 +                               opt->type = token;
16749 +                       break;
16750 +               case Opt_append:
16751 +                       err = opt_add(opt, a->args[0].from, opts->sb_flags,
16752 +                                     /*dummy bindex*/1);
16753 +                       if (!err)
16754 +                               opt->type = token;
16755 +                       break;
16756 +               case Opt_prepend:
16757 +                       err = opt_add(opt, a->args[0].from, opts->sb_flags,
16758 +                                     /*bindex*/0);
16759 +                       if (!err)
16760 +                               opt->type = token;
16761 +                       break;
16762 +               case Opt_del:
16763 +                       err = au_opts_parse_del(&opt->del, a->args);
16764 +                       if (!err)
16765 +                               opt->type = token;
16766 +                       break;
16767 +#if 0 /* reserved for future use */
16768 +               case Opt_idel:
16769 +                       del->pathname = "(indexed)";
16770 +                       if (unlikely(match_int(&args[0], &n))) {
16771 +                               AuErr("bad integer in %s\n", opt_str);
16772 +                               break;
16773 +                       }
16774 +                       err = au_opts_parse_idel(sb, n, &opt->del, a->args);
16775 +                       if (!err)
16776 +                               opt->type = token;
16777 +                       break;
16778 +#endif
16779 +               case Opt_mod:
16780 +                       err = au_opts_parse_mod(&opt->mod, a->args);
16781 +                       if (!err)
16782 +                               opt->type = token;
16783 +                       break;
16784 +#ifdef IMOD /* reserved for future use */
16785 +               case Opt_imod:
16786 +                       u.mod->path = "(indexed)";
16787 +                       if (unlikely(match_int(&a->args[0], &n))) {
16788 +                               AuErr("bad integer in %s\n", opt_str);
16789 +                               break;
16790 +                       }
16791 +                       err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
16792 +                       if (!err)
16793 +                               opt->type = token;
16794 +                       break;
16795 +#endif
16796 +               case Opt_xino:
16797 +                       err = au_opts_parse_xino(sb, &opt->xino, a->args);
16798 +                       if (!err)
16799 +                               opt->type = token;
16800 +                       break;
16801 +
16802 +               case Opt_trunc_xino_path:
16803 +                       err = au_opts_parse_xino_itrunc_path
16804 +                               (sb, &opt->xino_itrunc, a->args);
16805 +                       if (!err)
16806 +                               opt->type = token;
16807 +                       break;
16808 +
16809 +               case Opt_itrunc_xino:
16810 +                       u.xino_itrunc = &opt->xino_itrunc;
16811 +                       if (unlikely(match_int(&a->args[0], &n))) {
16812 +                               AuErr("bad integer in %s\n", opt_str);
16813 +                               break;
16814 +                       }
16815 +                       u.xino_itrunc->bindex = n;
16816 +                       aufs_read_lock(root, AuLock_FLUSH);
16817 +                       if (n < 0 || au_sbend(sb) < n) {
16818 +                               AuErr("out of bounds, %d\n", n);
16819 +                               aufs_read_unlock(root, !AuLock_IR);
16820 +                               break;
16821 +                       }
16822 +                       aufs_read_unlock(root, !AuLock_IR);
16823 +                       err = 0;
16824 +                       opt->type = token;
16825 +                       break;
16826 +
16827 +               case Opt_dirwh:
16828 +                       if (unlikely(match_int(&a->args[0], &opt->dirwh)))
16829 +                               break;
16830 +                       err = 0;
16831 +                       opt->type = token;
16832 +                       break;
16833 +
16834 +               case Opt_rdcache:
16835 +                       if (unlikely(match_int(&a->args[0], &opt->rdcache)))
16836 +                               break;
16837 +                       err = 0;
16838 +                       opt->type = token;
16839 +                       break;
16840 +               case Opt_rdblk:
16841 +                       if (unlikely(match_int(&a->args[0], &n)
16842 +                                    || n < 0
16843 +                                    || n > KMALLOC_MAX_SIZE)) {
16844 +                               AuErr("bad integer in %s\n", opt_str);
16845 +                               break;
16846 +                       }
16847 +                       if (unlikely(n && n < NAME_MAX)) {
16848 +                               AuErr("rdblk must be larger than %d\n",
16849 +                                     NAME_MAX);
16850 +                               break;
16851 +                       }
16852 +                       opt->rdblk = n;
16853 +                       err = 0;
16854 +                       opt->type = token;
16855 +                       break;
16856 +               case Opt_rdhash:
16857 +                       if (unlikely(match_int(&a->args[0], &n)
16858 +                                    || n < 0
16859 +                                    || n * sizeof(struct hlist_head)
16860 +                                    > KMALLOC_MAX_SIZE)) {
16861 +                               AuErr("bad integer in %s\n", opt_str);
16862 +                               break;
16863 +                       }
16864 +                       opt->rdhash = n;
16865 +                       err = 0;
16866 +                       opt->type = token;
16867 +                       break;
16868 +
16869 +               case Opt_trunc_xino:
16870 +               case Opt_notrunc_xino:
16871 +               case Opt_noxino:
16872 +               case Opt_trunc_xib:
16873 +               case Opt_notrunc_xib:
16874 +               case Opt_shwh:
16875 +               case Opt_noshwh:
16876 +               case Opt_plink:
16877 +               case Opt_noplink:
16878 +               case Opt_list_plink:
16879 +               case Opt_diropq_a:
16880 +               case Opt_diropq_w:
16881 +               case Opt_warn_perm:
16882 +               case Opt_nowarn_perm:
16883 +               case Opt_refrof:
16884 +               case Opt_norefrof:
16885 +               case Opt_verbose:
16886 +               case Opt_noverbose:
16887 +               case Opt_sum:
16888 +               case Opt_nosum:
16889 +               case Opt_wsum:
16890 +               case Opt_rdblk_def:
16891 +               case Opt_rdhash_def:
16892 +                       err = 0;
16893 +                       opt->type = token;
16894 +                       break;
16895 +
16896 +               case Opt_udba:
16897 +                       opt->udba = udba_val(a->args[0].from);
16898 +                       if (opt->udba >= 0) {
16899 +                               err = 0;
16900 +                               opt->type = token;
16901 +                       } else
16902 +                               AuErr("wrong value, %s\n", opt_str);
16903 +                       break;
16904 +
16905 +               case Opt_wbr_create:
16906 +                       u.create = &opt->wbr_create;
16907 +                       u.create->wbr_create
16908 +                               = au_wbr_create_val(a->args[0].from, u.create);
16909 +                       if (u.create->wbr_create >= 0) {
16910 +                               err = 0;
16911 +                               opt->type = token;
16912 +                       } else
16913 +                               AuErr("wrong value, %s\n", opt_str);
16914 +                       break;
16915 +               case Opt_wbr_copyup:
16916 +                       opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
16917 +                       if (opt->wbr_copyup >= 0) {
16918 +                               err = 0;
16919 +                               opt->type = token;
16920 +                       } else
16921 +                               AuErr("wrong value, %s\n", opt_str);
16922 +                       break;
16923 +
16924 +               case Opt_ignore:
16925 +                       AuWarn("ignored %s\n", opt_str);
16926 +                       /*FALLTHROUGH*/
16927 +               case Opt_ignore_silent:
16928 +                       skipped = 1;
16929 +                       err = 0;
16930 +                       break;
16931 +               case Opt_err:
16932 +                       AuErr("unknown option %s\n", opt_str);
16933 +                       break;
16934 +               }
16935 +
16936 +               if (!err && !skipped) {
16937 +                       if (unlikely(++opt > opt_tail)) {
16938 +                               err = -E2BIG;
16939 +                               opt--;
16940 +                               opt->type = Opt_tail;
16941 +                               break;
16942 +                       }
16943 +                       opt->type = Opt_tail;
16944 +               }
16945 +       }
16946 +
16947 +       kfree(a);
16948 +       dump_opts(opts);
16949 +       if (unlikely(err))
16950 +               au_opts_free(opts);
16951 +
16952 + out:
16953 +       return err;
16954 +}
16955 +
16956 +static int au_opt_wbr_create(struct super_block *sb,
16957 +                            struct au_opt_wbr_create *create)
16958 +{
16959 +       int err;
16960 +       struct au_sbinfo *sbinfo;
16961 +
16962 +       SiMustWriteLock(sb);
16963 +
16964 +       err = 1; /* handled */
16965 +       sbinfo = au_sbi(sb);
16966 +       if (sbinfo->si_wbr_create_ops->fin) {
16967 +               err = sbinfo->si_wbr_create_ops->fin(sb);
16968 +               if (!err)
16969 +                       err = 1;
16970 +       }
16971 +
16972 +       sbinfo->si_wbr_create = create->wbr_create;
16973 +       sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
16974 +       switch (create->wbr_create) {
16975 +       case AuWbrCreate_MFSRRV:
16976 +       case AuWbrCreate_MFSRR:
16977 +               sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
16978 +               /*FALLTHROUGH*/
16979 +       case AuWbrCreate_MFS:
16980 +       case AuWbrCreate_MFSV:
16981 +       case AuWbrCreate_PMFS:
16982 +       case AuWbrCreate_PMFSV:
16983 +               sbinfo->si_wbr_mfs.mfs_expire = create->mfs_second * HZ;
16984 +               break;
16985 +       }
16986 +
16987 +       if (sbinfo->si_wbr_create_ops->init)
16988 +               sbinfo->si_wbr_create_ops->init(sb); /* ignore */
16989 +
16990 +       return err;
16991 +}
16992 +
16993 +/*
16994 + * returns,
16995 + * plus: processed without an error
16996 + * zero: unprocessed
16997 + */
16998 +static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
16999 +                        struct au_opts *opts)
17000 +{
17001 +       int err;
17002 +       struct au_sbinfo *sbinfo;
17003 +
17004 +       SiMustWriteLock(sb);
17005 +
17006 +       err = 1; /* handled */
17007 +       sbinfo = au_sbi(sb);
17008 +       switch (opt->type) {
17009 +       case Opt_udba:
17010 +               sbinfo->si_mntflags &= ~AuOptMask_UDBA;
17011 +               sbinfo->si_mntflags |= opt->udba;
17012 +               opts->given_udba |= opt->udba;
17013 +               break;
17014 +
17015 +       case Opt_plink:
17016 +               au_opt_set(sbinfo->si_mntflags, PLINK);
17017 +               break;
17018 +       case Opt_noplink:
17019 +               if (au_opt_test(sbinfo->si_mntflags, PLINK))
17020 +                       au_plink_put(sb);
17021 +               au_opt_clr(sbinfo->si_mntflags, PLINK);
17022 +               break;
17023 +       case Opt_list_plink:
17024 +               if (au_opt_test(sbinfo->si_mntflags, PLINK))
17025 +                       au_plink_list(sb);
17026 +               break;
17027 +
17028 +       case Opt_diropq_a:
17029 +               au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
17030 +               break;
17031 +       case Opt_diropq_w:
17032 +               au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
17033 +               break;
17034 +
17035 +       case Opt_warn_perm:
17036 +               au_opt_set(sbinfo->si_mntflags, WARN_PERM);
17037 +               break;
17038 +       case Opt_nowarn_perm:
17039 +               au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
17040 +               break;
17041 +
17042 +       case Opt_refrof:
17043 +               au_opt_set(sbinfo->si_mntflags, REFROF);
17044 +               break;
17045 +       case Opt_norefrof:
17046 +               au_opt_clr(sbinfo->si_mntflags, REFROF);
17047 +               break;
17048 +
17049 +       case Opt_verbose:
17050 +               au_opt_set(sbinfo->si_mntflags, VERBOSE);
17051 +               break;
17052 +       case Opt_noverbose:
17053 +               au_opt_clr(sbinfo->si_mntflags, VERBOSE);
17054 +               break;
17055 +
17056 +       case Opt_sum:
17057 +               au_opt_set(sbinfo->si_mntflags, SUM);
17058 +               break;
17059 +       case Opt_wsum:
17060 +               au_opt_clr(sbinfo->si_mntflags, SUM);
17061 +               au_opt_set(sbinfo->si_mntflags, SUM_W);
17062 +       case Opt_nosum:
17063 +               au_opt_clr(sbinfo->si_mntflags, SUM);
17064 +               au_opt_clr(sbinfo->si_mntflags, SUM_W);
17065 +               break;
17066 +
17067 +       case Opt_wbr_create:
17068 +               err = au_opt_wbr_create(sb, &opt->wbr_create);
17069 +               break;
17070 +       case Opt_wbr_copyup:
17071 +               sbinfo->si_wbr_copyup = opt->wbr_copyup;
17072 +               sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
17073 +               break;
17074 +
17075 +       case Opt_dirwh:
17076 +               sbinfo->si_dirwh = opt->dirwh;
17077 +               break;
17078 +
17079 +       case Opt_rdcache:
17080 +               sbinfo->si_rdcache = opt->rdcache * HZ;
17081 +               break;
17082 +       case Opt_rdblk:
17083 +               sbinfo->si_rdblk = opt->rdblk;
17084 +               break;
17085 +       case Opt_rdblk_def:
17086 +               sbinfo->si_rdblk = AUFS_RDBLK_DEF;
17087 +               break;
17088 +       case Opt_rdhash:
17089 +               sbinfo->si_rdhash = opt->rdhash;
17090 +               break;
17091 +       case Opt_rdhash_def:
17092 +               sbinfo->si_rdhash = AUFS_RDHASH_DEF;
17093 +               break;
17094 +
17095 +       case Opt_shwh:
17096 +               au_opt_set(sbinfo->si_mntflags, SHWH);
17097 +               break;
17098 +       case Opt_noshwh:
17099 +               au_opt_clr(sbinfo->si_mntflags, SHWH);
17100 +               break;
17101 +
17102 +       case Opt_trunc_xino:
17103 +               au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
17104 +               break;
17105 +       case Opt_notrunc_xino:
17106 +               au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
17107 +               break;
17108 +
17109 +       case Opt_trunc_xino_path:
17110 +       case Opt_itrunc_xino:
17111 +               err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
17112 +               if (!err)
17113 +                       err = 1;
17114 +               break;
17115 +
17116 +       case Opt_trunc_xib:
17117 +               au_fset_opts(opts->flags, TRUNC_XIB);
17118 +               break;
17119 +       case Opt_notrunc_xib:
17120 +               au_fclr_opts(opts->flags, TRUNC_XIB);
17121 +               break;
17122 +
17123 +       default:
17124 +               err = 0;
17125 +               break;
17126 +       }
17127 +
17128 +       return err;
17129 +}
17130 +
17131 +/*
17132 + * returns tri-state.
17133 + * plus: processed without an error
17134 + * zero: unprocessed
17135 + * minus: error
17136 + */
17137 +static int au_opt_br(struct super_block *sb, struct au_opt *opt,
17138 +                    struct au_opts *opts)
17139 +{
17140 +       int err, do_refresh;
17141 +
17142 +       err = 0;
17143 +       switch (opt->type) {
17144 +       case Opt_append:
17145 +               opt->add.bindex = au_sbend(sb) + 1;
17146 +               if (opt->add.bindex < 0)
17147 +                       opt->add.bindex = 0;
17148 +               goto add;
17149 +       case Opt_prepend:
17150 +               opt->add.bindex = 0;
17151 +       add:
17152 +       case Opt_add:
17153 +               err = au_br_add(sb, &opt->add,
17154 +                               au_ftest_opts(opts->flags, REMOUNT));
17155 +               if (!err) {
17156 +                       err = 1;
17157 +                       au_fset_opts(opts->flags, REFRESH_DIR);
17158 +                       if (au_br_whable(opt->add.perm))
17159 +                               au_fset_opts(opts->flags, REFRESH_NONDIR);
17160 +               }
17161 +               break;
17162 +
17163 +       case Opt_del:
17164 +       case Opt_idel:
17165 +               err = au_br_del(sb, &opt->del,
17166 +                               au_ftest_opts(opts->flags, REMOUNT));
17167 +               if (!err) {
17168 +                       err = 1;
17169 +                       au_fset_opts(opts->flags, TRUNC_XIB);
17170 +                       au_fset_opts(opts->flags, REFRESH_DIR);
17171 +                       au_fset_opts(opts->flags, REFRESH_NONDIR);
17172 +               }
17173 +               break;
17174 +
17175 +       case Opt_mod:
17176 +       case Opt_imod:
17177 +               err = au_br_mod(sb, &opt->mod,
17178 +                               au_ftest_opts(opts->flags, REMOUNT),
17179 +                               &do_refresh);
17180 +               if (!err) {
17181 +                       err = 1;
17182 +                       if (do_refresh) {
17183 +                               au_fset_opts(opts->flags, REFRESH_DIR);
17184 +                               au_fset_opts(opts->flags, REFRESH_NONDIR);
17185 +                       }
17186 +               }
17187 +               break;
17188 +       }
17189 +
17190 +       return err;
17191 +}
17192 +
17193 +static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
17194 +                      struct au_opt_xino **opt_xino,
17195 +                      struct au_opts *opts)
17196 +{
17197 +       int err;
17198 +       aufs_bindex_t bend, bindex;
17199 +       struct dentry *root, *parent, *h_root;
17200 +
17201 +       err = 0;
17202 +       switch (opt->type) {
17203 +       case Opt_xino:
17204 +               err = au_xino_set(sb, &opt->xino,
17205 +                                 !!au_ftest_opts(opts->flags, REMOUNT));
17206 +               if (unlikely(err))
17207 +                       break;
17208 +
17209 +               *opt_xino = &opt->xino;
17210 +               au_xino_brid_set(sb, -1);
17211 +
17212 +               /* safe d_parent access */
17213 +               parent = opt->xino.file->f_dentry->d_parent;
17214 +               root = sb->s_root;
17215 +               bend = au_sbend(sb);
17216 +               for (bindex = 0; bindex <= bend; bindex++) {
17217 +                       h_root = au_h_dptr(root, bindex);
17218 +                       if (h_root == parent) {
17219 +                               au_xino_brid_set(sb, au_sbr_id(sb, bindex));
17220 +                               break;
17221 +                       }
17222 +               }
17223 +               break;
17224 +
17225 +       case Opt_noxino:
17226 +               au_xino_clr(sb);
17227 +               au_xino_brid_set(sb, -1);
17228 +               *opt_xino = (void *)-1;
17229 +               break;
17230 +       }
17231 +
17232 +       return err;
17233 +}
17234 +
17235 +int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
17236 +                  unsigned int pending)
17237 +{
17238 +       int err;
17239 +       aufs_bindex_t bindex, bend;
17240 +       unsigned char do_plink, skip, do_free;
17241 +       struct au_branch *br;
17242 +       struct au_wbr *wbr;
17243 +       struct dentry *root;
17244 +       struct inode *dir, *h_dir;
17245 +       struct au_sbinfo *sbinfo;
17246 +       struct au_hinode *hdir;
17247 +
17248 +       SiMustAnyLock(sb);
17249 +
17250 +       sbinfo = au_sbi(sb);
17251 +       AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
17252 +
17253 +       if (!(sb_flags & MS_RDONLY)) {
17254 +               if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
17255 +                       AuWarn("first branch should be rw\n");
17256 +               if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
17257 +                       AuWarn("shwh should be used with ro\n");
17258 +       }
17259 +
17260 +       if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HINOTIFY)
17261 +           && !au_opt_test(sbinfo->si_mntflags, XINO))
17262 +               AuWarn("udba=inotify requires xino\n");
17263 +
17264 +       err = 0;
17265 +       root = sb->s_root;
17266 +       dir = sb->s_root->d_inode;
17267 +       do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
17268 +       bend = au_sbend(sb);
17269 +       for (bindex = 0; !err && bindex <= bend; bindex++) {
17270 +               skip = 0;
17271 +               h_dir = au_h_iptr(dir, bindex);
17272 +               br = au_sbr(sb, bindex);
17273 +               do_free = 0;
17274 +
17275 +               wbr = br->br_wbr;
17276 +               if (wbr)
17277 +                       wbr_wh_read_lock(wbr);
17278 +
17279 +               switch (br->br_perm) {
17280 +               case AuBrPerm_RO:
17281 +               case AuBrPerm_ROWH:
17282 +               case AuBrPerm_RR:
17283 +               case AuBrPerm_RRWH:
17284 +                       do_free = !!wbr;
17285 +                       skip = (!wbr
17286 +                               || (!wbr->wbr_whbase
17287 +                                   && !wbr->wbr_plink
17288 +                                   && !wbr->wbr_orph));
17289 +                       break;
17290 +
17291 +               case AuBrPerm_RWNoLinkWH:
17292 +                       /* skip = (!br->br_whbase && !br->br_orph); */
17293 +                       skip = (!wbr || !wbr->wbr_whbase);
17294 +                       if (skip && wbr) {
17295 +                               if (do_plink)
17296 +                                       skip = !!wbr->wbr_plink;
17297 +                               else
17298 +                                       skip = !wbr->wbr_plink;
17299 +                       }
17300 +                       break;
17301 +
17302 +               case AuBrPerm_RW:
17303 +                       /* skip = (br->br_whbase && br->br_ohph); */
17304 +                       skip = (wbr && wbr->wbr_whbase);
17305 +                       if (skip) {
17306 +                               if (do_plink)
17307 +                                       skip = !!wbr->wbr_plink;
17308 +                               else
17309 +                                       skip = !wbr->wbr_plink;
17310 +                       }
17311 +                       break;
17312 +
17313 +               default:
17314 +                       BUG();
17315 +               }
17316 +               if (wbr)
17317 +                       wbr_wh_read_unlock(wbr);
17318 +
17319 +               if (skip)
17320 +                       continue;
17321 +
17322 +               hdir = au_hi(dir, bindex);
17323 +               au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
17324 +               if (wbr)
17325 +                       wbr_wh_write_lock(wbr);
17326 +               err = au_wh_init(au_h_dptr(root, bindex), br, sb);
17327 +               if (wbr)
17328 +                       wbr_wh_write_unlock(wbr);
17329 +               au_hin_imtx_unlock(hdir);
17330 +
17331 +               if (!err && do_free) {
17332 +                       kfree(wbr);
17333 +                       br->br_wbr = NULL;
17334 +               }
17335 +       }
17336 +
17337 +       return err;
17338 +}
17339 +
17340 +int au_opts_mount(struct super_block *sb, struct au_opts *opts)
17341 +{
17342 +       int err;
17343 +       unsigned int tmp;
17344 +       aufs_bindex_t bend;
17345 +       struct au_opt *opt;
17346 +       struct au_opt_xino *opt_xino, xino;
17347 +       struct au_sbinfo *sbinfo;
17348 +
17349 +       SiMustWriteLock(sb);
17350 +
17351 +       err = 0;
17352 +       opt_xino = NULL;
17353 +       opt = opts->opt;
17354 +       while (err >= 0 && opt->type != Opt_tail)
17355 +               err = au_opt_simple(sb, opt++, opts);
17356 +       if (err > 0)
17357 +               err = 0;
17358 +       else if (unlikely(err < 0))
17359 +               goto out;
17360 +
17361 +       /* disable xino and udba temporary */
17362 +       sbinfo = au_sbi(sb);
17363 +       tmp = sbinfo->si_mntflags;
17364 +       au_opt_clr(sbinfo->si_mntflags, XINO);
17365 +       au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
17366 +
17367 +       opt = opts->opt;
17368 +       while (err >= 0 && opt->type != Opt_tail)
17369 +               err = au_opt_br(sb, opt++, opts);
17370 +       if (err > 0)
17371 +               err = 0;
17372 +       else if (unlikely(err < 0))
17373 +               goto out;
17374 +
17375 +       bend = au_sbend(sb);
17376 +       if (unlikely(bend < 0)) {
17377 +               err = -EINVAL;
17378 +               AuErr("no branches\n");
17379 +               goto out;
17380 +       }
17381 +
17382 +       if (au_opt_test(tmp, XINO))
17383 +               au_opt_set(sbinfo->si_mntflags, XINO);
17384 +       opt = opts->opt;
17385 +       while (!err && opt->type != Opt_tail)
17386 +               err = au_opt_xino(sb, opt++, &opt_xino, opts);
17387 +       if (unlikely(err))
17388 +               goto out;
17389 +
17390 +       err = au_opts_verify(sb, sb->s_flags, tmp);
17391 +       if (unlikely(err))
17392 +               goto out;
17393 +
17394 +       /* restore xino */
17395 +       if (au_opt_test(tmp, XINO) && !opt_xino) {
17396 +               xino.file = au_xino_def(sb);
17397 +               err = PTR_ERR(xino.file);
17398 +               if (IS_ERR(xino.file))
17399 +                       goto out;
17400 +
17401 +               err = au_xino_set(sb, &xino, /*remount*/0);
17402 +               fput(xino.file);
17403 +               if (unlikely(err))
17404 +                       goto out;
17405 +       }
17406 +
17407 +       /* restore udba */
17408 +       sbinfo->si_mntflags &= ~AuOptMask_UDBA;
17409 +       sbinfo->si_mntflags |= (tmp & AuOptMask_UDBA);
17410 +       if (au_opt_test(tmp, UDBA_HINOTIFY)) {
17411 +               struct inode *dir = sb->s_root->d_inode;
17412 +               au_reset_hinotify(dir,
17413 +                                 au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
17414 +       }
17415 +
17416 + out:
17417 +       return err;
17418 +}
17419 +
17420 +int au_opts_remount(struct super_block *sb, struct au_opts *opts)
17421 +{
17422 +       int err, rerr;
17423 +       struct inode *dir;
17424 +       struct au_opt_xino *opt_xino;
17425 +       struct au_opt *opt;
17426 +       struct au_sbinfo *sbinfo;
17427 +
17428 +       SiMustWriteLock(sb);
17429 +
17430 +       dir = sb->s_root->d_inode;
17431 +       sbinfo = au_sbi(sb);
17432 +       err = 0;
17433 +       opt_xino = NULL;
17434 +       opt = opts->opt;
17435 +       while (err >= 0 && opt->type != Opt_tail) {
17436 +               err = au_opt_simple(sb, opt, opts);
17437 +               if (!err)
17438 +                       err = au_opt_br(sb, opt, opts);
17439 +               if (!err)
17440 +                       err = au_opt_xino(sb, opt, &opt_xino, opts);
17441 +               opt++;
17442 +       }
17443 +       if (err > 0)
17444 +               err = 0;
17445 +       AuTraceErr(err);
17446 +       /* go on even err */
17447 +
17448 +       rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
17449 +       if (unlikely(rerr && !err))
17450 +               err = rerr;
17451 +
17452 +       if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
17453 +               rerr = au_xib_trunc(sb);
17454 +               if (unlikely(rerr && !err))
17455 +                       err = rerr;
17456 +       }
17457 +
17458 +       /* will be handled by the caller */
17459 +       if (!au_ftest_opts(opts->flags, REFRESH_DIR)
17460 +           && (opts->given_udba || au_opt_test(sbinfo->si_mntflags, XINO)))
17461 +               au_fset_opts(opts->flags, REFRESH_DIR);
17462 +
17463 +       AuDbg("status 0x%x\n", opts->flags);
17464 +       return err;
17465 +}
17466 +
17467 +/* ---------------------------------------------------------------------- */
17468 +
17469 +unsigned int au_opt_udba(struct super_block *sb)
17470 +{
17471 +       return au_mntflags(sb) & AuOptMask_UDBA;
17472 +}
17473 diff -uprN -x .git linux-2.6.31/fs/aufs/opts.h aufs2-2.6.git/fs/aufs/opts.h
17474 --- linux-2.6.31/fs/aufs/opts.h 1970-01-01 00:00:00.000000000 +0000
17475 +++ aufs2-2.6.git/fs/aufs/opts.h        2009-09-21 21:49:23.408274204 +0000
17476 @@ -0,0 +1,196 @@
17477 +/*
17478 + * Copyright (C) 2005-2009 Junjiro R. Okajima
17479 + *
17480 + * This program, aufs is free software; you can redistribute it and/or modify
17481 + * it under the terms of the GNU General Public License as published by
17482 + * the Free Software Foundation; either version 2 of the License, or
17483 + * (at your option) any later version.
17484 + *
17485 + * This program is distributed in the hope that it will be useful,
17486 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
17487 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17488 + * GNU General Public License for more details.
17489 + *
17490 + * You should have received a copy of the GNU General Public License
17491 + * along with this program; if not, write to the Free Software
17492 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17493 + */
17494 +
17495 +/*
17496 + * mount options/flags
17497 + */
17498 +
17499 +#ifndef __AUFS_OPTS_H__
17500 +#define __AUFS_OPTS_H__
17501 +
17502 +#ifdef __KERNEL__
17503 +
17504 +#include <linux/path.h>
17505 +#include <linux/aufs_type.h>
17506 +
17507 +struct file;
17508 +struct super_block;
17509 +
17510 +/* ---------------------------------------------------------------------- */
17511 +
17512 +/* mount flags */
17513 +#define AuOpt_XINO             1               /* external inode number bitmap
17514 +                                                  and translation table */
17515 +#define AuOpt_TRUNC_XINO       (1 << 1)        /* truncate xino files */
17516 +#define AuOpt_UDBA_NONE                (1 << 2)        /* users direct branch access */
17517 +#define AuOpt_UDBA_REVAL       (1 << 3)
17518 +#define AuOpt_UDBA_HINOTIFY    (1 << 4)
17519 +#define AuOpt_SHWH             (1 << 5)        /* show whiteout */
17520 +#define AuOpt_PLINK            (1 << 6)        /* pseudo-link */
17521 +#define AuOpt_DIRPERM1         (1 << 7)        /* unimplemented */
17522 +#define AuOpt_REFROF           (1 << 8)        /* unimplemented */
17523 +#define AuOpt_ALWAYS_DIROPQ    (1 << 9)        /* policy to creating diropq */
17524 +#define AuOpt_SUM              (1 << 10)       /* summation for statfs(2) */
17525 +#define AuOpt_SUM_W            (1 << 11)       /* unimplemented */
17526 +#define AuOpt_WARN_PERM                (1 << 12)       /* warn when add-branch */
17527 +#define AuOpt_VERBOSE          (1 << 13)       /* busy inode when del-branch */
17528 +
17529 +#ifndef CONFIG_AUFS_HINOTIFY
17530 +#undef AuOpt_UDBA_HINOTIFY
17531 +#define AuOpt_UDBA_HINOTIFY    0
17532 +#endif
17533 +#ifndef CONFIG_AUFS_SHWH
17534 +#undef AuOpt_SHWH
17535 +#define AuOpt_SHWH             0
17536 +#endif
17537 +
17538 +#define AuOpt_Def      (AuOpt_XINO \
17539 +                        | AuOpt_UDBA_REVAL \
17540 +                        | AuOpt_PLINK \
17541 +                        /* | AuOpt_DIRPERM1 */ \
17542 +                        | AuOpt_WARN_PERM)
17543 +#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
17544 +                        | AuOpt_UDBA_REVAL \
17545 +                        | AuOpt_UDBA_HINOTIFY)
17546 +
17547 +#define au_opt_test(flags, name)       (flags & AuOpt_##name)
17548 +#define au_opt_set(flags, name) do { \
17549 +       BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
17550 +       ((flags) |= AuOpt_##name); \
17551 +} while (0)
17552 +#define au_opt_set_udba(flags, name) do { \
17553 +       (flags) &= ~AuOptMask_UDBA; \
17554 +       ((flags) |= AuOpt_##name); \
17555 +} while (0)
17556 +#define au_opt_clr(flags, name)                { ((flags) &= ~AuOpt_##name); }
17557 +
17558 +/* ---------------------------------------------------------------------- */
17559 +
17560 +/* policies to select one among multiple writable branches */
17561 +enum {
17562 +       AuWbrCreate_TDP,        /* top down parent */
17563 +       AuWbrCreate_RR,         /* round robin */
17564 +       AuWbrCreate_MFS,        /* most free space */
17565 +       AuWbrCreate_MFSV,       /* mfs with seconds */
17566 +       AuWbrCreate_MFSRR,      /* mfs then rr */
17567 +       AuWbrCreate_MFSRRV,     /* mfs then rr with seconds */
17568 +       AuWbrCreate_PMFS,       /* parent and mfs */
17569 +       AuWbrCreate_PMFSV,      /* parent and mfs with seconds */
17570 +
17571 +       AuWbrCreate_Def = AuWbrCreate_TDP
17572 +};
17573 +
17574 +enum {
17575 +       AuWbrCopyup_TDP,        /* top down parent */
17576 +       AuWbrCopyup_BUP,        /* bottom up parent */
17577 +       AuWbrCopyup_BU,         /* bottom up */
17578 +
17579 +       AuWbrCopyup_Def = AuWbrCopyup_TDP
17580 +};
17581 +
17582 +/* ---------------------------------------------------------------------- */
17583 +
17584 +struct au_opt_add {
17585 +       aufs_bindex_t   bindex;
17586 +       char            *pathname;
17587 +       int             perm;
17588 +       struct path     path;
17589 +};
17590 +
17591 +struct au_opt_del {
17592 +       char            *pathname;
17593 +       struct path     h_path;
17594 +};
17595 +
17596 +struct au_opt_mod {
17597 +       char            *path;
17598 +       int             perm;
17599 +       struct dentry   *h_root;
17600 +};
17601 +
17602 +struct au_opt_xino {
17603 +       char            *path;
17604 +       struct file     *file;
17605 +};
17606 +
17607 +struct au_opt_xino_itrunc {
17608 +       aufs_bindex_t   bindex;
17609 +};
17610 +
17611 +struct au_opt_wbr_create {
17612 +       int                     wbr_create;
17613 +       int                     mfs_second;
17614 +       unsigned long long      mfsrr_watermark;
17615 +};
17616 +
17617 +struct au_opt {
17618 +       int type;
17619 +       union {
17620 +               struct au_opt_xino      xino;
17621 +               struct au_opt_xino_itrunc xino_itrunc;
17622 +               struct au_opt_add       add;
17623 +               struct au_opt_del       del;
17624 +               struct au_opt_mod       mod;
17625 +               int                     dirwh;
17626 +               int                     rdcache;
17627 +               unsigned int            rdblk;
17628 +               unsigned int            rdhash;
17629 +               int                     udba;
17630 +               struct au_opt_wbr_create wbr_create;
17631 +               int                     wbr_copyup;
17632 +       };
17633 +};
17634 +
17635 +/* opts flags */
17636 +#define AuOpts_REMOUNT         1
17637 +#define AuOpts_REFRESH_DIR     (1 << 1)
17638 +#define AuOpts_REFRESH_NONDIR  (1 << 2)
17639 +#define AuOpts_TRUNC_XIB       (1 << 3)
17640 +#define au_ftest_opts(flags, name)     ((flags) & AuOpts_##name)
17641 +#define au_fset_opts(flags, name)      { (flags) |= AuOpts_##name; }
17642 +#define au_fclr_opts(flags, name)      { (flags) &= ~AuOpts_##name; }
17643 +
17644 +struct au_opts {
17645 +       struct au_opt   *opt;
17646 +       int             max_opt;
17647 +
17648 +       unsigned int    given_udba;
17649 +       unsigned int    flags;
17650 +       unsigned long   sb_flags;
17651 +};
17652 +
17653 +/* ---------------------------------------------------------------------- */
17654 +
17655 +const char *au_optstr_br_perm(int brperm);
17656 +const char *au_optstr_udba(int udba);
17657 +const char *au_optstr_wbr_copyup(int wbr_copyup);
17658 +const char *au_optstr_wbr_create(int wbr_create);
17659 +
17660 +void au_opts_free(struct au_opts *opts);
17661 +int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
17662 +int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
17663 +                  unsigned int pending);
17664 +int au_opts_mount(struct super_block *sb, struct au_opts *opts);
17665 +int au_opts_remount(struct super_block *sb, struct au_opts *opts);
17666 +
17667 +unsigned int au_opt_udba(struct super_block *sb);
17668 +
17669 +/* ---------------------------------------------------------------------- */
17670 +
17671 +#endif /* __KERNEL__ */
17672 +#endif /* __AUFS_OPTS_H__ */
17673 diff -uprN -x .git linux-2.6.31/fs/aufs/plink.c aufs2-2.6.git/fs/aufs/plink.c
17674 --- linux-2.6.31/fs/aufs/plink.c        1970-01-01 00:00:00.000000000 +0000
17675 +++ aufs2-2.6.git/fs/aufs/plink.c       2009-09-21 21:49:23.408274204 +0000
17676 @@ -0,0 +1,396 @@
17677 +/*
17678 + * Copyright (C) 2005-2009 Junjiro R. Okajima
17679 + *
17680 + * This program, aufs is free software; you can redistribute it and/or modify
17681 + * it under the terms of the GNU General Public License as published by
17682 + * the Free Software Foundation; either version 2 of the License, or
17683 + * (at your option) any later version.
17684 + *
17685 + * This program is distributed in the hope that it will be useful,
17686 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
17687 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17688 + * GNU General Public License for more details.
17689 + *
17690 + * You should have received a copy of the GNU General Public License
17691 + * along with this program; if not, write to the Free Software
17692 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17693 + */
17694 +
17695 +/*
17696 + * pseudo-link
17697 + */
17698 +
17699 +#include "aufs.h"
17700 +
17701 +/*
17702 + * during a user process maintains the pseudo-links,
17703 + * prohibit adding a new plink and branch manipulation.
17704 + */
17705 +void au_plink_block_maintain(struct super_block *sb)
17706 +{
17707 +       struct au_sbinfo *sbi = au_sbi(sb);
17708 +
17709 +       SiMustAnyLock(sb);
17710 +
17711 +       /* gave up wake_up_bit() */
17712 +       wait_event(sbi->si_plink_wq, !au_ftest_si(sbi, MAINTAIN_PLINK));
17713 +}
17714 +
17715 +/* ---------------------------------------------------------------------- */
17716 +
17717 +struct pseudo_link {
17718 +       struct list_head list;
17719 +       struct inode *inode;
17720 +};
17721 +
17722 +#ifdef CONFIG_AUFS_DEBUG
17723 +void au_plink_list(struct super_block *sb)
17724 +{
17725 +       struct au_sbinfo *sbinfo;
17726 +       struct list_head *plink_list;
17727 +       struct pseudo_link *plink;
17728 +
17729 +       SiMustAnyLock(sb);
17730 +
17731 +       sbinfo = au_sbi(sb);
17732 +       AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
17733 +
17734 +       plink_list = &sbinfo->si_plink.head;
17735 +       spin_lock(&sbinfo->si_plink.spin);
17736 +       list_for_each_entry(plink, plink_list, list)
17737 +               AuDbg("%lu\n", plink->inode->i_ino);
17738 +       spin_unlock(&sbinfo->si_plink.spin);
17739 +}
17740 +#endif
17741 +
17742 +/* is the inode pseudo-linked? */
17743 +int au_plink_test(struct inode *inode)
17744 +{
17745 +       int found;
17746 +       struct au_sbinfo *sbinfo;
17747 +       struct list_head *plink_list;
17748 +       struct pseudo_link *plink;
17749 +
17750 +       sbinfo = au_sbi(inode->i_sb);
17751 +       AuRwMustAnyLock(&sbinfo->si_rwsem);
17752 +       AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
17753 +
17754 +       found = 0;
17755 +       plink_list = &sbinfo->si_plink.head;
17756 +       spin_lock(&sbinfo->si_plink.spin);
17757 +       list_for_each_entry(plink, plink_list, list)
17758 +               if (plink->inode == inode) {
17759 +                       found = 1;
17760 +                       break;
17761 +               }
17762 +       spin_unlock(&sbinfo->si_plink.spin);
17763 +       return found;
17764 +}
17765 +
17766 +/* ---------------------------------------------------------------------- */
17767 +
17768 +/*
17769 + * generate a name for plink.
17770 + * the file will be stored under AUFS_WH_PLINKDIR.
17771 + */
17772 +/* 20 is max digits length of ulong 64 */
17773 +#define PLINK_NAME_LEN ((20 + 1) * 2)
17774 +
17775 +static int plink_name(char *name, int len, struct inode *inode,
17776 +                     aufs_bindex_t bindex)
17777 +{
17778 +       int rlen;
17779 +       struct inode *h_inode;
17780 +
17781 +       h_inode = au_h_iptr(inode, bindex);
17782 +       rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
17783 +       return rlen;
17784 +}
17785 +
17786 +/* lookup the plink-ed @inode under the branch at @bindex */
17787 +struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
17788 +{
17789 +       struct dentry *h_dentry, *h_parent;
17790 +       struct au_branch *br;
17791 +       struct inode *h_dir;
17792 +       char a[PLINK_NAME_LEN];
17793 +       struct qstr tgtname = {
17794 +               .name   = a
17795 +       };
17796 +
17797 +       br = au_sbr(inode->i_sb, bindex);
17798 +       h_parent = br->br_wbr->wbr_plink;
17799 +       h_dir = h_parent->d_inode;
17800 +       tgtname.len = plink_name(a, sizeof(a), inode, bindex);
17801 +
17802 +       /* always superio. */
17803 +       mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
17804 +       h_dentry = au_sio_lkup_one(&tgtname, h_parent, br);
17805 +       mutex_unlock(&h_dir->i_mutex);
17806 +       return h_dentry;
17807 +}
17808 +
17809 +/* create a pseudo-link */
17810 +static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
17811 +                     struct dentry *h_dentry, struct au_branch *br)
17812 +{
17813 +       int err;
17814 +       struct path h_path = {
17815 +               .mnt = br->br_mnt
17816 +       };
17817 +       struct inode *h_dir;
17818 +
17819 +       h_dir = h_parent->d_inode;
17820 + again:
17821 +       h_path.dentry = au_lkup_one(tgt, h_parent, br, /*nd*/NULL);
17822 +       err = PTR_ERR(h_path.dentry);
17823 +       if (IS_ERR(h_path.dentry))
17824 +               goto out;
17825 +
17826 +       err = 0;
17827 +       /* wh.plink dir is not monitored */
17828 +       if (h_path.dentry->d_inode
17829 +           && h_path.dentry->d_inode != h_dentry->d_inode) {
17830 +               err = vfsub_unlink(h_dir, &h_path, /*force*/0);
17831 +               dput(h_path.dentry);
17832 +               h_path.dentry = NULL;
17833 +               if (!err)
17834 +                       goto again;
17835 +       }
17836 +       if (!err && !h_path.dentry->d_inode)
17837 +               err = vfsub_link(h_dentry, h_dir, &h_path);
17838 +       dput(h_path.dentry);
17839 +
17840 + out:
17841 +       return err;
17842 +}
17843 +
17844 +struct do_whplink_args {
17845 +       int *errp;
17846 +       struct qstr *tgt;
17847 +       struct dentry *h_parent;
17848 +       struct dentry *h_dentry;
17849 +       struct au_branch *br;
17850 +};
17851 +
17852 +static void call_do_whplink(void *args)
17853 +{
17854 +       struct do_whplink_args *a = args;
17855 +       *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
17856 +}
17857 +
17858 +static int whplink(struct dentry *h_dentry, struct inode *inode,
17859 +                  aufs_bindex_t bindex, struct au_branch *br)
17860 +{
17861 +       int err, wkq_err;
17862 +       struct au_wbr *wbr;
17863 +       struct dentry *h_parent;
17864 +       struct inode *h_dir;
17865 +       char a[PLINK_NAME_LEN];
17866 +       struct qstr tgtname = {
17867 +               .name = a
17868 +       };
17869 +
17870 +       wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
17871 +       h_parent = wbr->wbr_plink;
17872 +       h_dir = h_parent->d_inode;
17873 +       tgtname.len = plink_name(a, sizeof(a), inode, bindex);
17874 +
17875 +       /* always superio. */
17876 +       mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
17877 +       if (!au_test_wkq(current)) {
17878 +               struct do_whplink_args args = {
17879 +                       .errp           = &err,
17880 +                       .tgt            = &tgtname,
17881 +                       .h_parent       = h_parent,
17882 +                       .h_dentry       = h_dentry,
17883 +                       .br             = br
17884 +               };
17885 +               wkq_err = au_wkq_wait(call_do_whplink, &args);
17886 +               if (unlikely(wkq_err))
17887 +                       err = wkq_err;
17888 +       } else
17889 +               err = do_whplink(&tgtname, h_parent, h_dentry, br);
17890 +       mutex_unlock(&h_dir->i_mutex);
17891 +
17892 +       return err;
17893 +}
17894 +
17895 +/* free a single plink */
17896 +static void do_put_plink(struct pseudo_link *plink, int do_del)
17897 +{
17898 +       iput(plink->inode);
17899 +       if (do_del)
17900 +               list_del(&plink->list);
17901 +       kfree(plink);
17902 +}
17903 +
17904 +/*
17905 + * create a new pseudo-link for @h_dentry on @bindex.
17906 + * the linked inode is held in aufs @inode.
17907 + */
17908 +void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
17909 +                    struct dentry *h_dentry)
17910 +{
17911 +       struct super_block *sb;
17912 +       struct au_sbinfo *sbinfo;
17913 +       struct list_head *plink_list;
17914 +       struct pseudo_link *plink;
17915 +       int found, err, cnt;
17916 +
17917 +       sb = inode->i_sb;
17918 +       sbinfo = au_sbi(sb);
17919 +       AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
17920 +
17921 +       err = 0;
17922 +       cnt = 0;
17923 +       found = 0;
17924 +       plink_list = &sbinfo->si_plink.head;
17925 +       spin_lock(&sbinfo->si_plink.spin);
17926 +       list_for_each_entry(plink, plink_list, list) {
17927 +               cnt++;
17928 +               if (plink->inode == inode) {
17929 +                       found = 1;
17930 +                       break;
17931 +               }
17932 +       }
17933 +       if (found) {
17934 +               spin_unlock(&sbinfo->si_plink.spin);
17935 +               return;
17936 +       }
17937 +
17938 +       plink = NULL;
17939 +       if (!found) {
17940 +               plink = kmalloc(sizeof(*plink), GFP_ATOMIC);
17941 +               if (plink) {
17942 +                       plink->inode = au_igrab(inode);
17943 +                       list_add(&plink->list, plink_list);
17944 +                       cnt++;
17945 +               } else
17946 +                       err = -ENOMEM;
17947 +       }
17948 +       spin_unlock(&sbinfo->si_plink.spin);
17949 +
17950 +       if (!err) {
17951 +               au_plink_block_maintain(sb);
17952 +               err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
17953 +       }
17954 +
17955 +       if (unlikely(cnt > AUFS_PLINK_WARN))
17956 +               AuWarn1("unexpectedly many pseudo links, %d\n", cnt);
17957 +       if (unlikely(err)) {
17958 +               AuWarn("err %d, damaged pseudo link.\n", err);
17959 +               if (!found && plink)
17960 +                       do_put_plink(plink, /*do_del*/1);
17961 +       }
17962 +}
17963 +
17964 +/* free all plinks */
17965 +void au_plink_put(struct super_block *sb)
17966 +{
17967 +       struct au_sbinfo *sbinfo;
17968 +       struct list_head *plink_list;
17969 +       struct pseudo_link *plink, *tmp;
17970 +
17971 +       SiMustWriteLock(sb);
17972 +
17973 +       sbinfo = au_sbi(sb);
17974 +       AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
17975 +
17976 +       plink_list = &sbinfo->si_plink.head;
17977 +       /* no spin_lock since sbinfo is write-locked */
17978 +       list_for_each_entry_safe(plink, tmp, plink_list, list)
17979 +               do_put_plink(plink, 0);
17980 +       INIT_LIST_HEAD(plink_list);
17981 +}
17982 +
17983 +/* free the plinks on a branch specified by @br_id */
17984 +void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
17985 +{
17986 +       struct au_sbinfo *sbinfo;
17987 +       struct list_head *plink_list;
17988 +       struct pseudo_link *plink, *tmp;
17989 +       struct inode *inode;
17990 +       aufs_bindex_t bstart, bend, bindex;
17991 +       unsigned char do_put;
17992 +
17993 +       SiMustWriteLock(sb);
17994 +
17995 +       sbinfo = au_sbi(sb);
17996 +       AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
17997 +
17998 +       plink_list = &sbinfo->si_plink.head;
17999 +       /* no spin_lock since sbinfo is write-locked */
18000 +       list_for_each_entry_safe(plink, tmp, plink_list, list) {
18001 +               do_put = 0;
18002 +               inode = au_igrab(plink->inode);
18003 +               ii_write_lock_child(inode);
18004 +               bstart = au_ibstart(inode);
18005 +               bend = au_ibend(inode);
18006 +               if (bstart >= 0) {
18007 +                       for (bindex = bstart; bindex <= bend; bindex++) {
18008 +                               if (!au_h_iptr(inode, bindex)
18009 +                                   || au_ii_br_id(inode, bindex) != br_id)
18010 +                                       continue;
18011 +                               au_set_h_iptr(inode, bindex, NULL, 0);
18012 +                               do_put = 1;
18013 +                               break;
18014 +                       }
18015 +               } else
18016 +                       do_put_plink(plink, 1);
18017 +
18018 +               if (do_put) {
18019 +                       for (bindex = bstart; bindex <= bend; bindex++)
18020 +                               if (au_h_iptr(inode, bindex)) {
18021 +                                       do_put = 0;
18022 +                                       break;
18023 +                               }
18024 +                       if (do_put)
18025 +                               do_put_plink(plink, 1);
18026 +               }
18027 +               ii_write_unlock(inode);
18028 +               iput(inode);
18029 +       }
18030 +}
18031 +
18032 +/* ---------------------------------------------------------------------- */
18033 +
18034 +long au_plink_ioctl(struct file *file, unsigned int cmd)
18035 +{
18036 +       long err;
18037 +       struct super_block *sb;
18038 +       struct au_sbinfo *sbinfo;
18039 +
18040 +       err = -EACCES;
18041 +       if (!capable(CAP_SYS_ADMIN))
18042 +               goto out;
18043 +
18044 +       err = 0;
18045 +       sb = file->f_dentry->d_sb;
18046 +       sbinfo = au_sbi(sb);
18047 +       switch (cmd) {
18048 +       case AUFS_CTL_PLINK_MAINT:
18049 +               /*
18050 +                * pseudo-link maintenance mode,
18051 +                * cleared by aufs_release_dir()
18052 +                */
18053 +               si_write_lock(sb);
18054 +               if (!au_ftest_si(sbinfo, MAINTAIN_PLINK)) {
18055 +                       au_fset_si(sbinfo, MAINTAIN_PLINK);
18056 +                       au_fi(file)->fi_maintain_plink = 1;
18057 +               } else
18058 +                       err = -EBUSY;
18059 +               si_write_unlock(sb);
18060 +               break;
18061 +       case AUFS_CTL_PLINK_CLEAN:
18062 +               aufs_write_lock(sb->s_root);
18063 +               if (au_opt_test(sbinfo->si_mntflags, PLINK))
18064 +                       au_plink_put(sb);
18065 +               aufs_write_unlock(sb->s_root);
18066 +               break;
18067 +       default:
18068 +               err = -EINVAL;
18069 +       }
18070 + out:
18071 +       return err;
18072 +}
18073 diff -uprN -x .git linux-2.6.31/fs/aufs/poll.c aufs2-2.6.git/fs/aufs/poll.c
18074 --- linux-2.6.31/fs/aufs/poll.c 1970-01-01 00:00:00.000000000 +0000
18075 +++ aufs2-2.6.git/fs/aufs/poll.c        2009-09-21 21:49:23.408274204 +0000
18076 @@ -0,0 +1,56 @@
18077 +/*
18078 + * Copyright (C) 2005-2009 Junjiro R. Okajima
18079 + *
18080 + * This program, aufs is free software; you can redistribute it and/or modify
18081 + * it under the terms of the GNU General Public License as published by
18082 + * the Free Software Foundation; either version 2 of the License, or
18083 + * (at your option) any later version.
18084 + *
18085 + * This program is distributed in the hope that it will be useful,
18086 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18087 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18088 + * GNU General Public License for more details.
18089 + *
18090 + * You should have received a copy of the GNU General Public License
18091 + * along with this program; if not, write to the Free Software
18092 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18093 + */
18094 +
18095 +/*
18096 + * poll operation
18097 + * There is only one filesystem which implements ->poll operation, currently.
18098 + */
18099 +
18100 +#include "aufs.h"
18101 +
18102 +unsigned int aufs_poll(struct file *file, poll_table *wait)
18103 +{
18104 +       unsigned int mask;
18105 +       int err;
18106 +       struct file *h_file;
18107 +       struct dentry *dentry;
18108 +       struct super_block *sb;
18109 +
18110 +       /* We should pretend an error happened. */
18111 +       mask = POLLERR /* | POLLIN | POLLOUT */;
18112 +       dentry = file->f_dentry;
18113 +       sb = dentry->d_sb;
18114 +       si_read_lock(sb, AuLock_FLUSH);
18115 +       err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
18116 +       if (unlikely(err))
18117 +               goto out;
18118 +
18119 +       /* it is not an error if h_file has no operation */
18120 +       mask = DEFAULT_POLLMASK;
18121 +       h_file = au_h_fptr(file, au_fbstart(file));
18122 +       if (h_file->f_op && h_file->f_op->poll)
18123 +               mask = h_file->f_op->poll(h_file, wait);
18124 +
18125 +       di_read_unlock(dentry, AuLock_IR);
18126 +       fi_read_unlock(file);
18127 +
18128 + out:
18129 +       si_read_unlock(sb);
18130 +       AuTraceErr((int)mask);
18131 +       return mask;
18132 +}
18133 diff -uprN -x .git linux-2.6.31/fs/aufs/rdu.c aufs2-2.6.git/fs/aufs/rdu.c
18134 --- linux-2.6.31/fs/aufs/rdu.c  1970-01-01 00:00:00.000000000 +0000
18135 +++ aufs2-2.6.git/fs/aufs/rdu.c 2009-09-21 21:49:23.408274204 +0000
18136 @@ -0,0 +1,331 @@
18137 +/*
18138 + * Copyright (C) 2005-2009 Junjiro R. Okajima
18139 + *
18140 + * This program, aufs is free software; you can redistribute it and/or modify
18141 + * it under the terms of the GNU General Public License as published by
18142 + * the Free Software Foundation; either version 2 of the License, or
18143 + * (at your option) any later version.
18144 + *
18145 + * This program is distributed in the hope that it will be useful,
18146 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18147 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18148 + * GNU General Public License for more details.
18149 + *
18150 + * You should have received a copy of the GNU General Public License
18151 + * along with this program; if not, write to the Free Software
18152 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18153 + */
18154 +
18155 +/*
18156 + * readdir in userspace.
18157 + */
18158 +
18159 +#include <linux/security.h>
18160 +#include <linux/uaccess.h>
18161 +#include <linux/aufs_type.h>
18162 +#include "aufs.h"
18163 +
18164 +/* bits for struct aufs_rdu.flags */
18165 +#define        AuRdu_CALLED    1
18166 +#define        AuRdu_CONT      (1 << 1)
18167 +#define        AuRdu_FULL      (1 << 2)
18168 +#define au_ftest_rdu(flags, name)      ((flags) & AuRdu_##name)
18169 +#define au_fset_rdu(flags, name)       { (flags) |= AuRdu_##name; }
18170 +#define au_fclr_rdu(flags, name)       { (flags) &= ~AuRdu_##name; }
18171 +
18172 +struct au_rdu_arg {
18173 +       struct aufs_rdu                 *rdu;
18174 +       union au_rdu_ent_ul             ent;
18175 +       unsigned long                   end;
18176 +
18177 +       struct super_block              *sb;
18178 +       int                             err;
18179 +};
18180 +
18181 +static int au_rdu_fill(void *__arg, const char *name, int nlen,
18182 +                      loff_t offset, u64 h_ino, unsigned int d_type)
18183 +{
18184 +       int err, len;
18185 +       struct au_rdu_arg *arg = __arg;
18186 +       struct aufs_rdu *rdu = arg->rdu;
18187 +       struct au_rdu_ent ent;
18188 +
18189 +       err = 0;
18190 +       arg->err = 0;
18191 +       au_fset_rdu(rdu->cookie.flags, CALLED);
18192 +       len = au_rdu_len(nlen);
18193 +       if (arg->ent.ul + len  < arg->end) {
18194 +               ent.ino = h_ino;
18195 +               ent.bindex = rdu->cookie.bindex;
18196 +               ent.type = d_type;
18197 +               ent.nlen = nlen;
18198 +
18199 +               err = -EFAULT;
18200 +               if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
18201 +                       goto out;
18202 +               if (copy_to_user(arg->ent.e->name, name, nlen))
18203 +                       goto out;
18204 +               /* the terminating NULL */
18205 +               if (__put_user(0, arg->ent.e->name + nlen))
18206 +                       goto out;
18207 +               err = 0;
18208 +               /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
18209 +               arg->ent.ul += len;
18210 +               rdu->rent++;
18211 +       } else {
18212 +               err = -EFAULT;
18213 +               au_fset_rdu(rdu->cookie.flags, FULL);
18214 +               rdu->full = 1;
18215 +               rdu->tail = arg->ent;
18216 +       }
18217 +
18218 + out:
18219 +       /* AuTraceErr(err); */
18220 +       return err;
18221 +}
18222 +
18223 +static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
18224 +{
18225 +       int err;
18226 +       loff_t offset;
18227 +       struct au_rdu_cookie *cookie = &arg->rdu->cookie;
18228 +
18229 +       offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
18230 +       err = offset;
18231 +       if (unlikely(offset != cookie->h_pos))
18232 +               goto out;
18233 +
18234 +       err = 0;
18235 +       do {
18236 +               arg->err = 0;
18237 +               au_fclr_rdu(cookie->flags, CALLED);
18238 +               /* smp_mb(); */
18239 +               err = vfsub_readdir(h_file, au_rdu_fill, arg);
18240 +               if (err >= 0)
18241 +                       err = arg->err;
18242 +       } while (!err
18243 +                && au_ftest_rdu(cookie->flags, CALLED)
18244 +                && !au_ftest_rdu(cookie->flags, FULL));
18245 +       cookie->h_pos = h_file->f_pos;
18246 +
18247 + out:
18248 +       AuTraceErr(err);
18249 +       return err;
18250 +}
18251 +
18252 +static int au_rdu(struct file *file, struct aufs_rdu *rdu)
18253 +{
18254 +       int err;
18255 +       aufs_bindex_t bend;
18256 +       struct au_rdu_arg arg;
18257 +       struct dentry *dentry;
18258 +       struct inode *inode;
18259 +       struct file *h_file;
18260 +       struct au_rdu_cookie *cookie = &rdu->cookie;
18261 +
18262 +       err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
18263 +       if (unlikely(err)) {
18264 +               err = -EFAULT;
18265 +               AuTraceErr(err);
18266 +               goto out;
18267 +       }
18268 +       rdu->rent = 0;
18269 +       rdu->tail = rdu->ent;
18270 +       rdu->full = 0;
18271 +       arg.rdu = rdu;
18272 +       arg.ent = rdu->ent;
18273 +       arg.end = arg.ent.ul;
18274 +       arg.end += rdu->sz;
18275 +
18276 +       err = -ENOTDIR;
18277 +       if (unlikely(!file->f_op || !file->f_op->readdir))
18278 +               goto out;
18279 +
18280 +       err = security_file_permission(file, MAY_READ);
18281 +       AuTraceErr(err);
18282 +       if (unlikely(err))
18283 +               goto out;
18284 +
18285 +       dentry = file->f_dentry;
18286 +       inode = dentry->d_inode;
18287 +#if 1
18288 +       mutex_lock(&inode->i_mutex);
18289 +#else
18290 +       err = mutex_lock_killable(&inode->i_mutex);
18291 +       AuTraceErr(err);
18292 +       if (unlikely(err))
18293 +               goto out;
18294 +#endif
18295 +       err = -ENOENT;
18296 +       if (unlikely(IS_DEADDIR(inode)))
18297 +               goto out_mtx;
18298 +
18299 +       arg.sb = inode->i_sb;
18300 +       si_read_lock(arg.sb, AuLock_FLUSH);
18301 +       fi_read_lock(file);
18302 +
18303 +       err = -EAGAIN;
18304 +       if (unlikely(au_ftest_rdu(cookie->flags, CONT)
18305 +                    && cookie->generation != au_figen(file)))
18306 +               goto out_unlock;
18307 +
18308 +       err = 0;
18309 +       if (!rdu->blk) {
18310 +               rdu->blk = au_sbi(arg.sb)->si_rdblk;
18311 +               if (!rdu->blk)
18312 +                       rdu->blk = au_dir_size(file, /*dentry*/NULL);
18313 +       }
18314 +       bend = au_fbstart(file);
18315 +       if (cookie->bindex < bend)
18316 +               cookie->bindex = bend;
18317 +       bend = au_fbend(file);
18318 +       /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
18319 +       for (; !err && cookie->bindex <= bend;
18320 +            cookie->bindex++, cookie->h_pos = 0) {
18321 +               h_file = au_h_fptr(file, cookie->bindex);
18322 +               if (!h_file)
18323 +                       continue;
18324 +
18325 +               au_fclr_rdu(cookie->flags, FULL);
18326 +               err = au_rdu_do(h_file, &arg);
18327 +               AuTraceErr(err);
18328 +               if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
18329 +                       break;
18330 +       }
18331 +       AuDbg("rent %llu\n", rdu->rent);
18332 +
18333 +       if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
18334 +               rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
18335 +               au_fset_rdu(cookie->flags, CONT);
18336 +               cookie->generation = au_figen(file);
18337 +       }
18338 +
18339 +       ii_read_lock_child(inode);
18340 +       fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
18341 +       ii_read_unlock(inode);
18342 +
18343 + out_unlock:
18344 +       fi_read_unlock(file);
18345 +       si_read_unlock(arg.sb);
18346 + out_mtx:
18347 +       mutex_unlock(&inode->i_mutex);
18348 + out:
18349 +       AuTraceErr(err);
18350 +       return err;
18351 +}
18352 +
18353 +static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
18354 +{
18355 +       int err;
18356 +       ino_t ino;
18357 +       unsigned long long nent;
18358 +       union au_rdu_ent_ul *u;
18359 +       struct au_rdu_ent ent;
18360 +       struct super_block *sb;
18361 +
18362 +       err = 0;
18363 +       nent = rdu->nent;
18364 +       u = &rdu->ent;
18365 +       sb = file->f_dentry->d_sb;
18366 +       si_read_lock(sb, AuLock_FLUSH);
18367 +       while (nent-- > 0) {
18368 +               err = !access_ok(VERIFY_WRITE, u->e, sizeof(ent));
18369 +               if (unlikely(err)) {
18370 +                       err = -EFAULT;
18371 +                       AuTraceErr(err);
18372 +                       break;
18373 +               }
18374 +
18375 +               err = copy_from_user(&ent, u->e, sizeof(ent));
18376 +               if (unlikely(err)) {
18377 +                       err = -EFAULT;
18378 +                       AuTraceErr(err);
18379 +                       break;
18380 +               }
18381 +
18382 +               /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
18383 +               if (!ent.wh)
18384 +                       err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
18385 +               else
18386 +                       err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
18387 +                                       &ino);
18388 +               if (unlikely(err)) {
18389 +                       AuTraceErr(err);
18390 +                       break;
18391 +               }
18392 +
18393 +               err = __put_user(ino, &u->e->ino);
18394 +               if (unlikely(err)) {
18395 +                       err = -EFAULT;
18396 +                       AuTraceErr(err);
18397 +                       break;
18398 +               }
18399 +               u->ul += au_rdu_len(ent.nlen);
18400 +       }
18401 +       si_read_unlock(sb);
18402 +
18403 +       return err;
18404 +}
18405 +
18406 +/* ---------------------------------------------------------------------- */
18407 +
18408 +static int au_rdu_verify(struct aufs_rdu *rdu)
18409 +{
18410 +       AuDbg("rdu{%llu, %p, (%u, %u) | %u | %llu, %u, %u | "
18411 +             "%llu, b%d, 0x%x, g%u}\n",
18412 +             rdu->sz, rdu->ent.e, rdu->verify[0], rdu->verify[1],
18413 +             rdu->blk,
18414 +             rdu->rent, rdu->shwh, rdu->full,
18415 +             rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
18416 +             rdu->cookie.generation);
18417 +
18418 +       if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu)
18419 +           && rdu->verify[AufsCtlRduV_SZ_PTR] == sizeof(rdu))
18420 +               return 0;
18421 +
18422 +       AuDbg("%u:%u, %u:%u\n",
18423 +             rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu),
18424 +             rdu->verify[AufsCtlRduV_SZ_PTR], (unsigned int)sizeof(rdu));
18425 +       return -EINVAL;
18426 +}
18427 +
18428 +long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
18429 +{
18430 +       long err, e;
18431 +       struct aufs_rdu rdu;
18432 +       void __user *p = (void __user *)arg;
18433 +
18434 +       err = copy_from_user(&rdu, p, sizeof(rdu));
18435 +       if (unlikely(err)) {
18436 +               err = -EFAULT;
18437 +               AuTraceErr(err);
18438 +               goto out;
18439 +       }
18440 +       err = au_rdu_verify(&rdu);
18441 +       if (unlikely(err))
18442 +               goto out;
18443 +
18444 +       switch (cmd) {
18445 +       case AUFS_CTL_RDU:
18446 +               err = au_rdu(file, &rdu);
18447 +               if (unlikely(err))
18448 +                       break;
18449 +
18450 +               e = copy_to_user(p, &rdu, sizeof(rdu));
18451 +               if (unlikely(e)) {
18452 +                       err = -EFAULT;
18453 +                       AuTraceErr(err);
18454 +               }
18455 +               break;
18456 +       case AUFS_CTL_RDU_INO:
18457 +               err = au_rdu_ino(file, &rdu);
18458 +               break;
18459 +
18460 +       default:
18461 +               err = -EINVAL;
18462 +       }
18463 +
18464 + out:
18465 +       AuTraceErr(err);
18466 +       return err;
18467 +}
18468 diff -uprN -x .git linux-2.6.31/fs/aufs/rwsem.h aufs2-2.6.git/fs/aufs/rwsem.h
18469 --- linux-2.6.31/fs/aufs/rwsem.h        1970-01-01 00:00:00.000000000 +0000
18470 +++ aufs2-2.6.git/fs/aufs/rwsem.h       2009-09-21 21:49:23.408274204 +0000
18471 @@ -0,0 +1,186 @@
18472 +/*
18473 + * Copyright (C) 2005-2009 Junjiro R. Okajima
18474 + *
18475 + * This program, aufs is free software; you can redistribute it and/or modify
18476 + * it under the terms of the GNU General Public License as published by
18477 + * the Free Software Foundation; either version 2 of the License, or
18478 + * (at your option) any later version.
18479 + *
18480 + * This program is distributed in the hope that it will be useful,
18481 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18482 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18483 + * GNU General Public License for more details.
18484 + *
18485 + * You should have received a copy of the GNU General Public License
18486 + * along with this program; if not, write to the Free Software
18487 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18488 + */
18489 +
18490 +/*
18491 + * simple read-write semaphore wrappers
18492 + */
18493 +
18494 +#ifndef __AUFS_RWSEM_H__
18495 +#define __AUFS_RWSEM_H__
18496 +
18497 +#ifdef __KERNEL__
18498 +
18499 +#include <linux/rwsem.h>
18500 +
18501 +struct au_rwsem {
18502 +       struct rw_semaphore     rwsem;
18503 +#ifdef CONFIG_AUFS_DEBUG
18504 +       /* just for debugging, not almighty counter */
18505 +       atomic_t                rcnt, wcnt;
18506 +#endif
18507 +};
18508 +
18509 +#ifdef CONFIG_AUFS_DEBUG
18510 +#define AuDbgCntInit(rw) do { \
18511 +       atomic_set(&(rw)->rcnt, 0); \
18512 +       atomic_set(&(rw)->wcnt, 0); \
18513 +       smp_mb(); /* atomic set */ \
18514 +} while (0)
18515 +
18516 +#define AuDbgRcntInc(rw)       atomic_inc_return(&(rw)->rcnt)
18517 +#define AuDbgRcntDec(rw)       WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
18518 +#define AuDbgWcntInc(rw)       WARN_ON(atomic_inc_return(&(rw)->wcnt) > 1)
18519 +#define AuDbgWcntDec(rw)       WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
18520 +#else
18521 +#define AuDbgCntInit(rw)       do {} while (0)
18522 +#define AuDbgRcntInc(rw)       do {} while (0)
18523 +#define AuDbgRcntDec(rw)       do {} while (0)
18524 +#define AuDbgWcntInc(rw)       do {} while (0)
18525 +#define AuDbgWcntDec(rw)       do {} while (0)
18526 +#endif /* CONFIG_AUFS_DEBUG */
18527 +
18528 +/* to debug easier, do not make them inlined functions */
18529 +#define AuRwMustNoWaiters(rw)  AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
18530 +/* rwsem_is_locked() is unusable */
18531 +#define AuRwMustReadLock(rw)   AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
18532 +#define AuRwMustWriteLock(rw)  AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
18533 +#define AuRwMustAnyLock(rw)    AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
18534 +                                       && atomic_read(&(rw)->wcnt) <= 0)
18535 +#define AuRwDestroy(rw)                AuDebugOn(atomic_read(&(rw)->rcnt) \
18536 +                                       || atomic_read(&(rw)->wcnt))
18537 +
18538 +static inline void au_rw_init(struct au_rwsem *rw)
18539 +{
18540 +       AuDbgCntInit(rw);
18541 +       init_rwsem(&rw->rwsem);
18542 +}
18543 +
18544 +static inline void au_rw_init_wlock(struct au_rwsem *rw)
18545 +{
18546 +       au_rw_init(rw);
18547 +       down_write(&rw->rwsem);
18548 +       AuDbgWcntInc(rw);
18549 +}
18550 +
18551 +static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
18552 +                                          unsigned int lsc)
18553 +{
18554 +       au_rw_init(rw);
18555 +       down_write_nested(&rw->rwsem, lsc);
18556 +       AuDbgWcntInc(rw);
18557 +}
18558 +
18559 +static inline void au_rw_read_lock(struct au_rwsem *rw)
18560 +{
18561 +       down_read(&rw->rwsem);
18562 +       AuDbgRcntInc(rw);
18563 +}
18564 +
18565 +static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
18566 +{
18567 +       down_read_nested(&rw->rwsem, lsc);
18568 +       AuDbgRcntInc(rw);
18569 +}
18570 +
18571 +static inline void au_rw_read_unlock(struct au_rwsem *rw)
18572 +{
18573 +       AuRwMustReadLock(rw);
18574 +       AuDbgRcntDec(rw);
18575 +       up_read(&rw->rwsem);
18576 +}
18577 +
18578 +static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
18579 +{
18580 +       AuRwMustWriteLock(rw);
18581 +       AuDbgRcntInc(rw);
18582 +       AuDbgWcntDec(rw);
18583 +       downgrade_write(&rw->rwsem);
18584 +}
18585 +
18586 +static inline void au_rw_write_lock(struct au_rwsem *rw)
18587 +{
18588 +       down_write(&rw->rwsem);
18589 +       AuDbgWcntInc(rw);
18590 +}
18591 +
18592 +static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
18593 +                                          unsigned int lsc)
18594 +{
18595 +       down_write_nested(&rw->rwsem, lsc);
18596 +       AuDbgWcntInc(rw);
18597 +}
18598 +
18599 +static inline void au_rw_write_unlock(struct au_rwsem *rw)
18600 +{
18601 +       AuRwMustWriteLock(rw);
18602 +       AuDbgWcntDec(rw);
18603 +       up_write(&rw->rwsem);
18604 +}
18605 +
18606 +/* why is not _nested version defined */
18607 +static inline int au_rw_read_trylock(struct au_rwsem *rw)
18608 +{
18609 +       int ret = down_read_trylock(&rw->rwsem);
18610 +       if (ret)
18611 +               AuDbgRcntInc(rw);
18612 +       return ret;
18613 +}
18614 +
18615 +static inline int au_rw_write_trylock(struct au_rwsem *rw)
18616 +{
18617 +       int ret = down_write_trylock(&rw->rwsem);
18618 +       if (ret)
18619 +               AuDbgWcntInc(rw);
18620 +       return ret;
18621 +}
18622 +
18623 +#undef AuDbgCntInit
18624 +#undef AuDbgRcntInc
18625 +#undef AuDbgRcntDec
18626 +#undef AuDbgWcntInc
18627 +#undef AuDbgWcntDec
18628 +
18629 +#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
18630 +static inline void prefix##_read_lock(param) \
18631 +{ au_rw_read_lock(rwsem); } \
18632 +static inline void prefix##_write_lock(param) \
18633 +{ au_rw_write_lock(rwsem); } \
18634 +static inline int prefix##_read_trylock(param) \
18635 +{ return au_rw_read_trylock(rwsem); } \
18636 +static inline int prefix##_write_trylock(param) \
18637 +{ return au_rw_write_trylock(rwsem); }
18638 +/* why is not _nested version defined */
18639 +/* static inline void prefix##_read_trylock_nested(param, lsc)
18640 +{ au_rw_read_trylock_nested(rwsem, lsc)); }
18641 +static inline void prefix##_write_trylock_nestd(param, lsc)
18642 +{ au_rw_write_trylock_nested(rwsem, lsc); } */
18643 +
18644 +#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
18645 +static inline void prefix##_read_unlock(param) \
18646 +{ au_rw_read_unlock(rwsem); } \
18647 +static inline void prefix##_write_unlock(param) \
18648 +{ au_rw_write_unlock(rwsem); } \
18649 +static inline void prefix##_downgrade_lock(param) \
18650 +{ au_rw_dgrade_lock(rwsem); }
18651 +
18652 +#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
18653 +       AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
18654 +       AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
18655 +
18656 +#endif /* __KERNEL__ */
18657 +#endif /* __AUFS_RWSEM_H__ */
18658 diff -uprN -x .git linux-2.6.31/fs/aufs/sbinfo.c aufs2-2.6.git/fs/aufs/sbinfo.c
18659 --- linux-2.6.31/fs/aufs/sbinfo.c       1970-01-01 00:00:00.000000000 +0000
18660 +++ aufs2-2.6.git/fs/aufs/sbinfo.c      2009-09-21 21:49:23.408274204 +0000
18661 @@ -0,0 +1,208 @@
18662 +/*
18663 + * Copyright (C) 2005-2009 Junjiro R. Okajima
18664 + *
18665 + * This program, aufs is free software; you can redistribute it and/or modify
18666 + * it under the terms of the GNU General Public License as published by
18667 + * the Free Software Foundation; either version 2 of the License, or
18668 + * (at your option) any later version.
18669 + *
18670 + * This program is distributed in the hope that it will be useful,
18671 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18672 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18673 + * GNU General Public License for more details.
18674 + *
18675 + * You should have received a copy of the GNU General Public License
18676 + * along with this program; if not, write to the Free Software
18677 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18678 + */
18679 +
18680 +/*
18681 + * superblock private data
18682 + */
18683 +
18684 +#include "aufs.h"
18685 +
18686 +/*
18687 + * they are necessary regardless sysfs is disabled.
18688 + */
18689 +void au_si_free(struct kobject *kobj)
18690 +{
18691 +       struct au_sbinfo *sbinfo;
18692 +       struct super_block *sb;
18693 +
18694 +       sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
18695 +       AuDebugOn(!list_empty(&sbinfo->si_plink.head));
18696 +
18697 +       sb = sbinfo->si_sb;
18698 +       si_write_lock(sb);
18699 +       au_xino_clr(sb);
18700 +       au_br_free(sbinfo);
18701 +       kfree(sbinfo->si_branch);
18702 +       mutex_destroy(&sbinfo->si_xib_mtx);
18703 +       si_write_unlock(sb);
18704 +       AuRwDestroy(&sbinfo->si_rwsem);
18705 +
18706 +       kfree(sbinfo);
18707 +}
18708 +
18709 +int au_si_alloc(struct super_block *sb)
18710 +{
18711 +       int err;
18712 +       struct au_sbinfo *sbinfo;
18713 +
18714 +       err = -ENOMEM;
18715 +       sbinfo = kmalloc(sizeof(*sbinfo), GFP_NOFS);
18716 +       if (unlikely(!sbinfo))
18717 +               goto out;
18718 +
18719 +       /* will be reallocated separately */
18720 +       sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
18721 +       if (unlikely(!sbinfo->si_branch))
18722 +               goto out_sbinfo;
18723 +
18724 +       memset(&sbinfo->si_kobj, 0, sizeof(sbinfo->si_kobj));
18725 +       err = sysaufs_si_init(sbinfo);
18726 +       if (unlikely(err))
18727 +               goto out_br;
18728 +
18729 +       au_nwt_init(&sbinfo->si_nowait);
18730 +       au_rw_init_wlock(&sbinfo->si_rwsem);
18731 +       sbinfo->si_generation = 0;
18732 +       sbinfo->au_si_status = 0;
18733 +       sbinfo->si_bend = -1;
18734 +       sbinfo->si_last_br_id = 0;
18735 +
18736 +       sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
18737 +       sbinfo->si_wbr_create = AuWbrCreate_Def;
18738 +       sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + AuWbrCopyup_Def;
18739 +       sbinfo->si_wbr_create_ops = au_wbr_create_ops + AuWbrCreate_Def;
18740 +
18741 +       sbinfo->si_mntflags = AuOpt_Def;
18742 +
18743 +       sbinfo->si_xread = NULL;
18744 +       sbinfo->si_xwrite = NULL;
18745 +       sbinfo->si_xib = NULL;
18746 +       mutex_init(&sbinfo->si_xib_mtx);
18747 +       sbinfo->si_xib_buf = NULL;
18748 +       sbinfo->si_xino_brid = -1;
18749 +       /* leave si_xib_last_pindex and si_xib_next_bit */
18750 +
18751 +       sbinfo->si_rdcache = AUFS_RDCACHE_DEF * HZ;
18752 +       sbinfo->si_rdblk = AUFS_RDBLK_DEF;
18753 +       sbinfo->si_rdhash = AUFS_RDHASH_DEF;
18754 +       sbinfo->si_dirwh = AUFS_DIRWH_DEF;
18755 +
18756 +       au_spl_init(&sbinfo->si_plink);
18757 +       init_waitqueue_head(&sbinfo->si_plink_wq);
18758 +
18759 +       /* leave other members for sysaufs and si_mnt. */
18760 +       sbinfo->si_sb = sb;
18761 +       sb->s_fs_info = sbinfo;
18762 +       au_debug_sbinfo_init(sbinfo);
18763 +       return 0; /* success */
18764 +
18765 + out_br:
18766 +       kfree(sbinfo->si_branch);
18767 + out_sbinfo:
18768 +       kfree(sbinfo);
18769 + out:
18770 +       return err;
18771 +}
18772 +
18773 +int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
18774 +{
18775 +       int err, sz;
18776 +       struct au_branch **brp;
18777 +
18778 +       AuRwMustWriteLock(&sbinfo->si_rwsem);
18779 +
18780 +       err = -ENOMEM;
18781 +       sz = sizeof(*brp) * (sbinfo->si_bend + 1);
18782 +       if (unlikely(!sz))
18783 +               sz = sizeof(*brp);
18784 +       brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
18785 +       if (brp) {
18786 +               sbinfo->si_branch = brp;
18787 +               err = 0;
18788 +       }
18789 +
18790 +       return err;
18791 +}
18792 +
18793 +/* ---------------------------------------------------------------------- */
18794 +
18795 +unsigned int au_sigen_inc(struct super_block *sb)
18796 +{
18797 +       unsigned int gen;
18798 +
18799 +       SiMustWriteLock(sb);
18800 +
18801 +       gen = ++au_sbi(sb)->si_generation;
18802 +       au_update_digen(sb->s_root);
18803 +       au_update_iigen(sb->s_root->d_inode);
18804 +       sb->s_root->d_inode->i_version++;
18805 +       return gen;
18806 +}
18807 +
18808 +aufs_bindex_t au_new_br_id(struct super_block *sb)
18809 +{
18810 +       aufs_bindex_t br_id;
18811 +       int i;
18812 +       struct au_sbinfo *sbinfo;
18813 +
18814 +       SiMustWriteLock(sb);
18815 +
18816 +       sbinfo = au_sbi(sb);
18817 +       for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
18818 +               br_id = ++sbinfo->si_last_br_id;
18819 +               if (br_id && au_br_index(sb, br_id) < 0)
18820 +                       return br_id;
18821 +       }
18822 +
18823 +       return -1;
18824 +}
18825 +
18826 +/* ---------------------------------------------------------------------- */
18827 +
18828 +/* dentry and super_block lock. call at entry point */
18829 +void aufs_read_lock(struct dentry *dentry, int flags)
18830 +{
18831 +       si_read_lock(dentry->d_sb, flags);
18832 +       if (au_ftest_lock(flags, DW))
18833 +               di_write_lock_child(dentry);
18834 +       else
18835 +               di_read_lock_child(dentry, flags);
18836 +}
18837 +
18838 +void aufs_read_unlock(struct dentry *dentry, int flags)
18839 +{
18840 +       if (au_ftest_lock(flags, DW))
18841 +               di_write_unlock(dentry);
18842 +       else
18843 +               di_read_unlock(dentry, flags);
18844 +       si_read_unlock(dentry->d_sb);
18845 +}
18846 +
18847 +void aufs_write_lock(struct dentry *dentry)
18848 +{
18849 +       si_write_lock(dentry->d_sb);
18850 +       di_write_lock_child(dentry);
18851 +}
18852 +
18853 +void aufs_write_unlock(struct dentry *dentry)
18854 +{
18855 +       di_write_unlock(dentry);
18856 +       si_write_unlock(dentry->d_sb);
18857 +}
18858 +
18859 +void aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
18860 +{
18861 +       si_read_lock(d1->d_sb, flags);
18862 +       di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIR));
18863 +}
18864 +
18865 +void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
18866 +{
18867 +       di_write_unlock2(d1, d2);
18868 +       si_read_unlock(d1->d_sb);
18869 +}
18870 diff -uprN -x .git linux-2.6.31/fs/aufs/spl.h aufs2-2.6.git/fs/aufs/spl.h
18871 --- linux-2.6.31/fs/aufs/spl.h  1970-01-01 00:00:00.000000000 +0000
18872 +++ aufs2-2.6.git/fs/aufs/spl.h 2009-09-21 21:49:23.408274204 +0000
18873 @@ -0,0 +1,57 @@
18874 +/*
18875 + * Copyright (C) 2005-2009 Junjiro R. Okajima
18876 + *
18877 + * This program, aufs is free software; you can redistribute it and/or modify
18878 + * it under the terms of the GNU General Public License as published by
18879 + * the Free Software Foundation; either version 2 of the License, or
18880 + * (at your option) any later version.
18881 + *
18882 + * This program is distributed in the hope that it will be useful,
18883 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18884 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18885 + * GNU General Public License for more details.
18886 + *
18887 + * You should have received a copy of the GNU General Public License
18888 + * along with this program; if not, write to the Free Software
18889 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18890 + */
18891 +
18892 +/*
18893 + * simple list protected by a spinlock
18894 + */
18895 +
18896 +#ifndef __AUFS_SPL_H__
18897 +#define __AUFS_SPL_H__
18898 +
18899 +#ifdef __KERNEL__
18900 +
18901 +#include <linux/spinlock.h>
18902 +#include <linux/list.h>
18903 +
18904 +struct au_splhead {
18905 +       spinlock_t              spin;
18906 +       struct list_head        head;
18907 +};
18908 +
18909 +static inline void au_spl_init(struct au_splhead *spl)
18910 +{
18911 +       spin_lock_init(&spl->spin);
18912 +       INIT_LIST_HEAD(&spl->head);
18913 +}
18914 +
18915 +static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
18916 +{
18917 +       spin_lock(&spl->spin);
18918 +       list_add(list, &spl->head);
18919 +       spin_unlock(&spl->spin);
18920 +}
18921 +
18922 +static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
18923 +{
18924 +       spin_lock(&spl->spin);
18925 +       list_del(list);
18926 +       spin_unlock(&spl->spin);
18927 +}
18928 +
18929 +#endif /* __KERNEL__ */
18930 +#endif /* __AUFS_SPL_H__ */
18931 diff -uprN -x .git linux-2.6.31/fs/aufs/super.c aufs2-2.6.git/fs/aufs/super.c
18932 --- linux-2.6.31/fs/aufs/super.c        1970-01-01 00:00:00.000000000 +0000
18933 +++ aufs2-2.6.git/fs/aufs/super.c       2009-09-21 21:49:23.408274204 +0000
18934 @@ -0,0 +1,874 @@
18935 +/*
18936 + * Copyright (C) 2005-2009 Junjiro R. Okajima
18937 + *
18938 + * This program, aufs is free software; you can redistribute it and/or modify
18939 + * it under the terms of the GNU General Public License as published by
18940 + * the Free Software Foundation; either version 2 of the License, or
18941 + * (at your option) any later version.
18942 + *
18943 + * This program is distributed in the hope that it will be useful,
18944 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18945 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18946 + * GNU General Public License for more details.
18947 + *
18948 + * You should have received a copy of the GNU General Public License
18949 + * along with this program; if not, write to the Free Software
18950 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18951 + */
18952 +
18953 +/*
18954 + * mount and super_block operations
18955 + */
18956 +
18957 +#include <linux/buffer_head.h>
18958 +#include <linux/module.h>
18959 +#include <linux/seq_file.h>
18960 +#include <linux/statfs.h>
18961 +#include "aufs.h"
18962 +
18963 +/*
18964 + * super_operations
18965 + */
18966 +static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
18967 +{
18968 +       struct au_icntnr *c;
18969 +
18970 +       c = au_cache_alloc_icntnr();
18971 +       if (c) {
18972 +               inode_init_once(&c->vfs_inode);
18973 +               c->vfs_inode.i_version = 1; /* sigen(sb); */
18974 +               c->iinfo.ii_hinode = NULL;
18975 +               return &c->vfs_inode;
18976 +       }
18977 +       return NULL;
18978 +}
18979 +
18980 +static void aufs_destroy_inode(struct inode *inode)
18981 +{
18982 +       au_iinfo_fin(inode);
18983 +       au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
18984 +}
18985 +
18986 +struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
18987 +{
18988 +       struct inode *inode;
18989 +       int err;
18990 +
18991 +       inode = iget_locked(sb, ino);
18992 +       if (unlikely(!inode)) {
18993 +               inode = ERR_PTR(-ENOMEM);
18994 +               goto out;
18995 +       }
18996 +       if (!(inode->i_state & I_NEW))
18997 +               goto out;
18998 +
18999 +       err = au_xigen_new(inode);
19000 +       if (!err)
19001 +               err = au_iinfo_init(inode);
19002 +       if (!err)
19003 +               inode->i_version++;
19004 +       else {
19005 +               iget_failed(inode);
19006 +               inode = ERR_PTR(err);
19007 +       }
19008 +
19009 + out:
19010 +       /* never return NULL */
19011 +       AuDebugOn(!inode);
19012 +       AuTraceErrPtr(inode);
19013 +       return inode;
19014 +}
19015 +
19016 +/* lock free root dinfo */
19017 +static int au_show_brs(struct seq_file *seq, struct super_block *sb)
19018 +{
19019 +       int err;
19020 +       aufs_bindex_t bindex, bend;
19021 +       struct path path;
19022 +       struct au_hdentry *hd;
19023 +       struct au_branch *br;
19024 +
19025 +       err = 0;
19026 +       bend = au_sbend(sb);
19027 +       hd = au_di(sb->s_root)->di_hdentry;
19028 +       for (bindex = 0; !err && bindex <= bend; bindex++) {
19029 +               br = au_sbr(sb, bindex);
19030 +               path.mnt = br->br_mnt;
19031 +               path.dentry = hd[bindex].hd_dentry;
19032 +               err = au_seq_path(seq, &path);
19033 +               if (err > 0)
19034 +                       err = seq_printf(seq, "=%s",
19035 +                                        au_optstr_br_perm(br->br_perm));
19036 +               if (!err && bindex != bend)
19037 +                       err = seq_putc(seq, ':');
19038 +       }
19039 +
19040 +       return err;
19041 +}
19042 +
19043 +static void au_show_wbr_create(struct seq_file *m, int v,
19044 +                              struct au_sbinfo *sbinfo)
19045 +{
19046 +       const char *pat;
19047 +
19048 +       AuRwMustAnyLock(&sbinfo->si_rwsem);
19049 +
19050 +       seq_printf(m, ",create=");
19051 +       pat = au_optstr_wbr_create(v);
19052 +       switch (v) {
19053 +       case AuWbrCreate_TDP:
19054 +       case AuWbrCreate_RR:
19055 +       case AuWbrCreate_MFS:
19056 +       case AuWbrCreate_PMFS:
19057 +               seq_printf(m, pat);
19058 +               break;
19059 +       case AuWbrCreate_MFSV:
19060 +               seq_printf(m, /*pat*/"mfs:%lu",
19061 +                          sbinfo->si_wbr_mfs.mfs_expire / HZ);
19062 +               break;
19063 +       case AuWbrCreate_PMFSV:
19064 +               seq_printf(m, /*pat*/"pmfs:%lu",
19065 +                          sbinfo->si_wbr_mfs.mfs_expire / HZ);
19066 +               break;
19067 +       case AuWbrCreate_MFSRR:
19068 +               seq_printf(m, /*pat*/"mfsrr:%llu",
19069 +                          sbinfo->si_wbr_mfs.mfsrr_watermark);
19070 +               break;
19071 +       case AuWbrCreate_MFSRRV:
19072 +               seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
19073 +                          sbinfo->si_wbr_mfs.mfsrr_watermark,
19074 +                          sbinfo->si_wbr_mfs.mfs_expire / HZ);
19075 +               break;
19076 +       }
19077 +}
19078 +
19079 +static int au_show_xino(struct seq_file *seq, struct vfsmount *mnt)
19080 +{
19081 +#ifdef CONFIG_SYSFS
19082 +       return 0;
19083 +#else
19084 +       int err;
19085 +       const int len = sizeof(AUFS_XINO_FNAME) - 1;
19086 +       aufs_bindex_t bindex, brid;
19087 +       struct super_block *sb;
19088 +       struct qstr *name;
19089 +       struct file *f;
19090 +       struct dentry *d, *h_root;
19091 +
19092 +       AuRwMustAnyLock(&sbinfo->si_rwsem);
19093 +
19094 +       err = 0;
19095 +       sb = mnt->mnt_sb;
19096 +       f = au_sbi(sb)->si_xib;
19097 +       if (!f)
19098 +               goto out;
19099 +
19100 +       /* stop printing the default xino path on the first writable branch */
19101 +       h_root = NULL;
19102 +       brid = au_xino_brid(sb);
19103 +       if (brid >= 0) {
19104 +               bindex = au_br_index(sb, brid);
19105 +               h_root = au_di(sb->s_root)->di_hdentry[0 + bindex].hd_dentry;
19106 +       }
19107 +       d = f->f_dentry;
19108 +       name = &d->d_name;
19109 +       /* safe ->d_parent because the file is unlinked */
19110 +       if (d->d_parent == h_root
19111 +           && name->len == len
19112 +           && !memcmp(name->name, AUFS_XINO_FNAME, len))
19113 +               goto out;
19114 +
19115 +       seq_puts(seq, ",xino=");
19116 +       err = au_xino_path(seq, f);
19117 +
19118 + out:
19119 +       return err;
19120 +#endif
19121 +}
19122 +
19123 +/* seq_file will re-call me in case of too long string */
19124 +static int aufs_show_options(struct seq_file *m, struct vfsmount *mnt)
19125 +{
19126 +       int err, n;
19127 +       unsigned int mnt_flags, v;
19128 +       struct super_block *sb;
19129 +       struct au_sbinfo *sbinfo;
19130 +
19131 +#define AuBool(name, str) do { \
19132 +       v = au_opt_test(mnt_flags, name); \
19133 +       if (v != au_opt_test(AuOpt_Def, name)) \
19134 +               seq_printf(m, ",%s" #str, v ? "" : "no"); \
19135 +} while (0)
19136 +
19137 +#define AuStr(name, str) do { \
19138 +       v = mnt_flags & AuOptMask_##name; \
19139 +       if (v != (AuOpt_Def & AuOptMask_##name)) \
19140 +               seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
19141 +} while (0)
19142 +
19143 +#define AuUInt(name, str, val) do { \
19144 +       if (val != AUFS_##name##_DEF) \
19145 +               seq_printf(m, "," #str "=%u", val); \
19146 +} while (0)
19147 +
19148 +       /* lock free root dinfo */
19149 +       sb = mnt->mnt_sb;
19150 +       si_noflush_read_lock(sb);
19151 +       sbinfo = au_sbi(sb);
19152 +       seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
19153 +
19154 +       mnt_flags = au_mntflags(sb);
19155 +       if (au_opt_test(mnt_flags, XINO)) {
19156 +               err = au_show_xino(m, mnt);
19157 +               if (unlikely(err))
19158 +                       goto out;
19159 +       } else
19160 +               seq_puts(m, ",noxino");
19161 +
19162 +       AuBool(TRUNC_XINO, trunc_xino);
19163 +       AuStr(UDBA, udba);
19164 +       AuBool(SHWH, shwh);
19165 +       AuBool(PLINK, plink);
19166 +       /* AuBool(DIRPERM1, dirperm1); */
19167 +       /* AuBool(REFROF, refrof); */
19168 +
19169 +       v = sbinfo->si_wbr_create;
19170 +       if (v != AuWbrCreate_Def)
19171 +               au_show_wbr_create(m, v, sbinfo);
19172 +
19173 +       v = sbinfo->si_wbr_copyup;
19174 +       if (v != AuWbrCopyup_Def)
19175 +               seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
19176 +
19177 +       v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
19178 +       if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
19179 +               seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
19180 +
19181 +       AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
19182 +
19183 +       n = sbinfo->si_rdcache / HZ;
19184 +       AuUInt(RDCACHE, rdcache, n);
19185 +
19186 +       AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
19187 +       AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
19188 +
19189 +       AuBool(SUM, sum);
19190 +       /* AuBool(SUM_W, wsum); */
19191 +       AuBool(WARN_PERM, warn_perm);
19192 +       AuBool(VERBOSE, verbose);
19193 +
19194 + out:
19195 +       /* be sure to print "br:" last */
19196 +       if (!sysaufs_brs) {
19197 +               seq_puts(m, ",br:");
19198 +               au_show_brs(m, sb);
19199 +       }
19200 +       si_read_unlock(sb);
19201 +       return 0;
19202 +
19203 +#undef Deleted
19204 +#undef AuBool
19205 +#undef AuStr
19206 +}
19207 +
19208 +/* ---------------------------------------------------------------------- */
19209 +
19210 +/* sum mode which returns the summation for statfs(2) */
19211 +
19212 +static u64 au_add_till_max(u64 a, u64 b)
19213 +{
19214 +       u64 old;
19215 +
19216 +       old = a;
19217 +       a += b;
19218 +       if (old < a)
19219 +               return a;
19220 +       return ULLONG_MAX;
19221 +}
19222 +
19223 +static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
19224 +{
19225 +       int err;
19226 +       u64 blocks, bfree, bavail, files, ffree;
19227 +       aufs_bindex_t bend, bindex, i;
19228 +       unsigned char shared;
19229 +       struct vfsmount *h_mnt;
19230 +       struct super_block *h_sb;
19231 +
19232 +       blocks = 0;
19233 +       bfree = 0;
19234 +       bavail = 0;
19235 +       files = 0;
19236 +       ffree = 0;
19237 +
19238 +       err = 0;
19239 +       bend = au_sbend(sb);
19240 +       for (bindex = bend; bindex >= 0; bindex--) {
19241 +               h_mnt = au_sbr_mnt(sb, bindex);
19242 +               h_sb = h_mnt->mnt_sb;
19243 +               shared = 0;
19244 +               for (i = bindex + 1; !shared && i <= bend; i++)
19245 +                       shared = (au_sbr_sb(sb, i) == h_sb);
19246 +               if (shared)
19247 +                       continue;
19248 +
19249 +               /* sb->s_root for NFS is unreliable */
19250 +               err = vfs_statfs(h_mnt->mnt_root, buf);
19251 +               if (unlikely(err))
19252 +                       goto out;
19253 +
19254 +               blocks = au_add_till_max(blocks, buf->f_blocks);
19255 +               bfree = au_add_till_max(bfree, buf->f_bfree);
19256 +               bavail = au_add_till_max(bavail, buf->f_bavail);
19257 +               files = au_add_till_max(files, buf->f_files);
19258 +               ffree = au_add_till_max(ffree, buf->f_ffree);
19259 +       }
19260 +
19261 +       buf->f_blocks = blocks;
19262 +       buf->f_bfree = bfree;
19263 +       buf->f_bavail = bavail;
19264 +       buf->f_files = files;
19265 +       buf->f_ffree = ffree;
19266 +
19267 + out:
19268 +       return err;
19269 +}
19270 +
19271 +static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
19272 +{
19273 +       int err;
19274 +       struct super_block *sb;
19275 +
19276 +       /* lock free root dinfo */
19277 +       sb = dentry->d_sb;
19278 +       si_noflush_read_lock(sb);
19279 +       if (!au_opt_test(au_mntflags(sb), SUM))
19280 +               /* sb->s_root for NFS is unreliable */
19281 +               err = vfs_statfs(au_sbr_mnt(sb, 0)->mnt_root, buf);
19282 +       else
19283 +               err = au_statfs_sum(sb, buf);
19284 +       si_read_unlock(sb);
19285 +
19286 +       if (!err) {
19287 +               buf->f_type = AUFS_SUPER_MAGIC;
19288 +               buf->f_namelen -= AUFS_WH_PFX_LEN;
19289 +               memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
19290 +       }
19291 +       /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
19292 +
19293 +       return err;
19294 +}
19295 +
19296 +/* ---------------------------------------------------------------------- */
19297 +
19298 +/* try flushing the lower fs at aufs remount/unmount time */
19299 +
19300 +static void au_fsync_br(struct super_block *sb)
19301 +{
19302 +       aufs_bindex_t bend, bindex;
19303 +       int brperm;
19304 +       struct au_branch *br;
19305 +       struct super_block *h_sb;
19306 +
19307 +       bend = au_sbend(sb);
19308 +       for (bindex = 0; bindex < bend; bindex++) {
19309 +               br = au_sbr(sb, bindex);
19310 +               brperm = br->br_perm;
19311 +               if (brperm == AuBrPerm_RR || brperm == AuBrPerm_RRWH)
19312 +                       continue;
19313 +               h_sb = br->br_mnt->mnt_sb;
19314 +               if (bdev_read_only(h_sb->s_bdev))
19315 +                       continue;
19316 +
19317 +               lockdep_off();
19318 +               down_write(&h_sb->s_umount);
19319 +               shrink_dcache_sb(h_sb);
19320 +               sync_filesystem(h_sb);
19321 +               up_write(&h_sb->s_umount);
19322 +               lockdep_on();
19323 +       }
19324 +}
19325 +
19326 +/*
19327 + * this IS NOT for super_operations.
19328 + * I guess it will be reverted someday.
19329 + */
19330 +static void aufs_umount_begin(struct super_block *sb)
19331 +{
19332 +       struct au_sbinfo *sbinfo;
19333 +
19334 +       sbinfo = au_sbi(sb);
19335 +       if (!sbinfo)
19336 +               return;
19337 +
19338 +       si_write_lock(sb);
19339 +       au_fsync_br(sb);
19340 +       if (au_opt_test(au_mntflags(sb), PLINK))
19341 +               au_plink_put(sb);
19342 +       if (sbinfo->si_wbr_create_ops->fin)
19343 +               sbinfo->si_wbr_create_ops->fin(sb);
19344 +       si_write_unlock(sb);
19345 +}
19346 +
19347 +/* final actions when unmounting a file system */
19348 +static void aufs_put_super(struct super_block *sb)
19349 +{
19350 +       struct au_sbinfo *sbinfo;
19351 +
19352 +       sbinfo = au_sbi(sb);
19353 +       if (!sbinfo)
19354 +               return;
19355 +
19356 +       aufs_umount_begin(sb);
19357 +       dbgaufs_si_fin(sbinfo);
19358 +       kobject_put(&sbinfo->si_kobj);
19359 +}
19360 +
19361 +/* ---------------------------------------------------------------------- */
19362 +
19363 +/*
19364 + * refresh dentry and inode at remount time.
19365 + */
19366 +static int do_refresh(struct dentry *dentry, mode_t type,
19367 +                     unsigned int dir_flags)
19368 +{
19369 +       int err;
19370 +       struct dentry *parent;
19371 +
19372 +       di_write_lock_child(dentry);
19373 +       parent = dget_parent(dentry);
19374 +       di_read_lock_parent(parent, AuLock_IR);
19375 +
19376 +       /* returns the number of positive dentries */
19377 +       err = au_refresh_hdentry(dentry, type);
19378 +       if (err >= 0) {
19379 +               struct inode *inode = dentry->d_inode;
19380 +               err = au_refresh_hinode(inode, dentry);
19381 +               if (!err && type == S_IFDIR)
19382 +                       au_reset_hinotify(inode, dir_flags);
19383 +       }
19384 +       if (unlikely(err))
19385 +               AuErr("unrecoverable error %d, %.*s\n", err, AuDLNPair(dentry));
19386 +
19387 +       di_read_unlock(parent, AuLock_IR);
19388 +       dput(parent);
19389 +       di_write_unlock(dentry);
19390 +
19391 +       return err;
19392 +}
19393 +
19394 +static int test_dir(struct dentry *dentry, void *arg __maybe_unused)
19395 +{
19396 +       return S_ISDIR(dentry->d_inode->i_mode);
19397 +}
19398 +
19399 +/* gave up consolidating with refresh_nondir() */
19400 +static int refresh_dir(struct dentry *root, unsigned int sigen)
19401 +{
19402 +       int err, i, j, ndentry, e;
19403 +       struct au_dcsub_pages dpages;
19404 +       struct au_dpage *dpage;
19405 +       struct dentry **dentries;
19406 +       struct inode *inode;
19407 +       const unsigned int flags = au_hi_flags(root->d_inode, /*isdir*/1);
19408 +
19409 +       err = 0;
19410 +       list_for_each_entry(inode, &root->d_sb->s_inodes, i_sb_list)
19411 +               if (S_ISDIR(inode->i_mode) && au_iigen(inode) != sigen) {
19412 +                       ii_write_lock_child(inode);
19413 +                       e = au_refresh_hinode_self(inode, /*do_attr*/1);
19414 +                       ii_write_unlock(inode);
19415 +                       if (unlikely(e)) {
19416 +                               AuDbg("e %d, i%lu\n", e, inode->i_ino);
19417 +                               if (!err)
19418 +                                       err = e;
19419 +                               /* go on even if err */
19420 +                       }
19421 +               }
19422 +
19423 +       e = au_dpages_init(&dpages, GFP_NOFS);
19424 +       if (unlikely(e)) {
19425 +               if (!err)
19426 +                       err = e;
19427 +               goto out;
19428 +       }
19429 +       e = au_dcsub_pages(&dpages, root, test_dir, NULL);
19430 +       if (unlikely(e)) {
19431 +               if (!err)
19432 +                       err = e;
19433 +               goto out_dpages;
19434 +       }
19435 +
19436 +       for (i = 0; !e && i < dpages.ndpage; i++) {
19437 +               dpage = dpages.dpages + i;
19438 +               dentries = dpage->dentries;
19439 +               ndentry = dpage->ndentry;
19440 +               for (j = 0; !e && j < ndentry; j++) {
19441 +                       struct dentry *d;
19442 +
19443 +                       d = dentries[j];
19444 +                       au_dbg_verify_dir_parent(d, sigen);
19445 +                       if (au_digen(d) != sigen) {
19446 +                               e = do_refresh(d, S_IFDIR, flags);
19447 +                               if (unlikely(e && !err))
19448 +                                       err = e;
19449 +                               /* break on err */
19450 +                       }
19451 +               }
19452 +       }
19453 +
19454 + out_dpages:
19455 +       au_dpages_free(&dpages);
19456 + out:
19457 +       return err;
19458 +}
19459 +
19460 +static int test_nondir(struct dentry *dentry, void *arg __maybe_unused)
19461 +{
19462 +       return !S_ISDIR(dentry->d_inode->i_mode);
19463 +}
19464 +
19465 +static int refresh_nondir(struct dentry *root, unsigned int sigen,
19466 +                         int do_dentry)
19467 +{
19468 +       int err, i, j, ndentry, e;
19469 +       struct au_dcsub_pages dpages;
19470 +       struct au_dpage *dpage;
19471 +       struct dentry **dentries;
19472 +       struct inode *inode;
19473 +
19474 +       err = 0;
19475 +       list_for_each_entry(inode, &root->d_sb->s_inodes, i_sb_list)
19476 +               if (!S_ISDIR(inode->i_mode) && au_iigen(inode) != sigen) {
19477 +                       ii_write_lock_child(inode);
19478 +                       e = au_refresh_hinode_self(inode, /*do_attr*/1);
19479 +                       ii_write_unlock(inode);
19480 +                       if (unlikely(e)) {
19481 +                               AuDbg("e %d, i%lu\n", e, inode->i_ino);
19482 +                               if (!err)
19483 +                                       err = e;
19484 +                               /* go on even if err */
19485 +                       }
19486 +               }
19487 +
19488 +       if (!do_dentry)
19489 +               goto out;
19490 +
19491 +       e = au_dpages_init(&dpages, GFP_NOFS);
19492 +       if (unlikely(e)) {
19493 +               if (!err)
19494 +                       err = e;
19495 +               goto out;
19496 +       }
19497 +       e = au_dcsub_pages(&dpages, root, test_nondir, NULL);
19498 +       if (unlikely(e)) {
19499 +               if (!err)
19500 +                       err = e;
19501 +               goto out_dpages;
19502 +       }
19503 +
19504 +       for (i = 0; i < dpages.ndpage; i++) {
19505 +               dpage = dpages.dpages + i;
19506 +               dentries = dpage->dentries;
19507 +               ndentry = dpage->ndentry;
19508 +               for (j = 0; j < ndentry; j++) {
19509 +                       struct dentry *d;
19510 +
19511 +                       d = dentries[j];
19512 +                       au_dbg_verify_nondir_parent(d, sigen);
19513 +                       inode = d->d_inode;
19514 +                       if (inode && au_digen(d) != sigen) {
19515 +                               e = do_refresh(d, inode->i_mode & S_IFMT,
19516 +                                              /*dir_flags*/0);
19517 +                               if (unlikely(e && !err))
19518 +                                       err = e;
19519 +                               /* go on even err */
19520 +                       }
19521 +               }
19522 +       }
19523 +
19524 + out_dpages:
19525 +       au_dpages_free(&dpages);
19526 + out:
19527 +       return err;
19528 +}
19529 +
19530 +static void au_remount_refresh(struct super_block *sb, unsigned int flags)
19531 +{
19532 +       int err;
19533 +       unsigned int sigen;
19534 +       struct au_sbinfo *sbinfo;
19535 +       struct dentry *root;
19536 +       struct inode *inode;
19537 +
19538 +       au_sigen_inc(sb);
19539 +       sigen = au_sigen(sb);
19540 +       sbinfo = au_sbi(sb);
19541 +       au_fclr_si(sbinfo, FAILED_REFRESH_DIRS);
19542 +
19543 +       root = sb->s_root;
19544 +       DiMustNoWaiters(root);
19545 +       inode = root->d_inode;
19546 +       IiMustNoWaiters(inode);
19547 +       au_reset_hinotify(inode, au_hi_flags(inode, /*isdir*/1));
19548 +       di_write_unlock(root);
19549 +
19550 +       err = refresh_dir(root, sigen);
19551 +       if (unlikely(err)) {
19552 +               au_fset_si(sbinfo, FAILED_REFRESH_DIRS);
19553 +               AuWarn("Refreshing directories failed, ignored (%d)\n", err);
19554 +       }
19555 +
19556 +       if (au_ftest_opts(flags, REFRESH_NONDIR)) {
19557 +               err = refresh_nondir(root, sigen, !err);
19558 +               if (unlikely(err))
19559 +                       AuWarn("Refreshing non-directories failed, ignored"
19560 +                              "(%d)\n", err);
19561 +       }
19562 +
19563 +       /* aufs_write_lock() calls ..._child() */
19564 +       di_write_lock_child(root);
19565 +       au_cpup_attr_all(root->d_inode, /*force*/1);
19566 +}
19567 +
19568 +/* stop extra interpretation of errno in mount(8), and strange error messages */
19569 +static int cvt_err(int err)
19570 +{
19571 +       AuTraceErr(err);
19572 +
19573 +       switch (err) {
19574 +       case -ENOENT:
19575 +       case -ENOTDIR:
19576 +       case -EEXIST:
19577 +       case -EIO:
19578 +               err = -EINVAL;
19579 +       }
19580 +       return err;
19581 +}
19582 +
19583 +static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
19584 +{
19585 +       int err;
19586 +       struct au_opts opts;
19587 +       struct dentry *root;
19588 +       struct inode *inode;
19589 +       struct au_sbinfo *sbinfo;
19590 +
19591 +       err = 0;
19592 +       root = sb->s_root;
19593 +       if (!data || !*data) {
19594 +               aufs_write_lock(root);
19595 +               err = au_opts_verify(sb, *flags, /*pending*/0);
19596 +               if (!err)
19597 +                       au_fsync_br(sb);
19598 +               aufs_write_unlock(root);
19599 +               goto out;
19600 +       }
19601 +
19602 +       err = -ENOMEM;
19603 +       memset(&opts, 0, sizeof(opts));
19604 +       opts.opt = (void *)__get_free_page(GFP_NOFS);
19605 +       if (unlikely(!opts.opt))
19606 +               goto out;
19607 +       opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
19608 +       opts.flags = AuOpts_REMOUNT;
19609 +       opts.sb_flags = *flags;
19610 +
19611 +       /* parse it before aufs lock */
19612 +       err = au_opts_parse(sb, data, &opts);
19613 +       if (unlikely(err))
19614 +               goto out_opts;
19615 +
19616 +       sbinfo = au_sbi(sb);
19617 +       inode = root->d_inode;
19618 +       mutex_lock(&inode->i_mutex);
19619 +       aufs_write_lock(root);
19620 +       au_fsync_br(sb);
19621 +
19622 +       /* au_opts_remount() may return an error */
19623 +       err = au_opts_remount(sb, &opts);
19624 +       au_opts_free(&opts);
19625 +
19626 +       if (au_ftest_opts(opts.flags, REFRESH_DIR)
19627 +           || au_ftest_opts(opts.flags, REFRESH_NONDIR))
19628 +               au_remount_refresh(sb, opts.flags);
19629 +
19630 +       aufs_write_unlock(root);
19631 +       mutex_unlock(&inode->i_mutex);
19632 +
19633 + out_opts:
19634 +       free_page((unsigned long)opts.opt);
19635 + out:
19636 +       err = cvt_err(err);
19637 +       AuTraceErr(err);
19638 +       return err;
19639 +}
19640 +
19641 +static struct super_operations aufs_sop = {
19642 +       .alloc_inode    = aufs_alloc_inode,
19643 +       .destroy_inode  = aufs_destroy_inode,
19644 +       .drop_inode     = generic_delete_inode,
19645 +       .show_options   = aufs_show_options,
19646 +       .statfs         = aufs_statfs,
19647 +       .put_super      = aufs_put_super,
19648 +       .remount_fs     = aufs_remount_fs
19649 +};
19650 +
19651 +/* ---------------------------------------------------------------------- */
19652 +
19653 +static int alloc_root(struct super_block *sb)
19654 +{
19655 +       int err;
19656 +       struct inode *inode;
19657 +       struct dentry *root;
19658 +
19659 +       err = -ENOMEM;
19660 +       inode = au_iget_locked(sb, AUFS_ROOT_INO);
19661 +       err = PTR_ERR(inode);
19662 +       if (IS_ERR(inode))
19663 +               goto out;
19664 +
19665 +       inode->i_op = &aufs_dir_iop;
19666 +       inode->i_fop = &aufs_dir_fop;
19667 +       inode->i_mode = S_IFDIR;
19668 +       inode->i_nlink = 2;
19669 +       unlock_new_inode(inode);
19670 +
19671 +       root = d_alloc_root(inode);
19672 +       if (unlikely(!root))
19673 +               goto out_iput;
19674 +       err = PTR_ERR(root);
19675 +       if (IS_ERR(root))
19676 +               goto out_iput;
19677 +
19678 +       err = au_alloc_dinfo(root);
19679 +       if (!err) {
19680 +               sb->s_root = root;
19681 +               return 0; /* success */
19682 +       }
19683 +       dput(root);
19684 +       goto out; /* do not iput */
19685 +
19686 + out_iput:
19687 +       iget_failed(inode);
19688 +       iput(inode);
19689 + out:
19690 +       return err;
19691 +
19692 +}
19693 +
19694 +static int aufs_fill_super(struct super_block *sb, void *raw_data,
19695 +                          int silent __maybe_unused)
19696 +{
19697 +       int err;
19698 +       struct au_opts opts;
19699 +       struct dentry *root;
19700 +       struct inode *inode;
19701 +       char *arg = raw_data;
19702 +
19703 +       if (unlikely(!arg || !*arg)) {
19704 +               err = -EINVAL;
19705 +               AuErr("no arg\n");
19706 +               goto out;
19707 +       }
19708 +
19709 +       err = -ENOMEM;
19710 +       memset(&opts, 0, sizeof(opts));
19711 +       opts.opt = (void *)__get_free_page(GFP_NOFS);
19712 +       if (unlikely(!opts.opt))
19713 +               goto out;
19714 +       opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
19715 +       opts.sb_flags = sb->s_flags;
19716 +
19717 +       err = au_si_alloc(sb);
19718 +       if (unlikely(err))
19719 +               goto out_opts;
19720 +
19721 +       /* all timestamps always follow the ones on the branch */
19722 +       sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
19723 +       sb->s_op = &aufs_sop;
19724 +       sb->s_magic = AUFS_SUPER_MAGIC;
19725 +       sb->s_maxbytes = 0;
19726 +       au_export_init(sb);
19727 +
19728 +       err = alloc_root(sb);
19729 +       if (unlikely(err)) {
19730 +               si_write_unlock(sb);
19731 +               goto out_info;
19732 +       }
19733 +       root = sb->s_root;
19734 +       inode = root->d_inode;
19735 +
19736 +       /*
19737 +        * actually we can parse options regardless aufs lock here.
19738 +        * but at remount time, parsing must be done before aufs lock.
19739 +        * so we follow the same rule.
19740 +        */
19741 +       ii_write_lock_parent(inode);
19742 +       aufs_write_unlock(root);
19743 +       err = au_opts_parse(sb, arg, &opts);
19744 +       if (unlikely(err))
19745 +               goto out_root;
19746 +
19747 +       /* lock vfs_inode first, then aufs. */
19748 +       mutex_lock(&inode->i_mutex);
19749 +       inode->i_op = &aufs_dir_iop;
19750 +       inode->i_fop = &aufs_dir_fop;
19751 +       aufs_write_lock(root);
19752 +       err = au_opts_mount(sb, &opts);
19753 +       au_opts_free(&opts);
19754 +       if (unlikely(err))
19755 +               goto out_unlock;
19756 +       aufs_write_unlock(root);
19757 +       mutex_unlock(&inode->i_mutex);
19758 +       goto out_opts; /* success */
19759 +
19760 + out_unlock:
19761 +       aufs_write_unlock(root);
19762 +       mutex_unlock(&inode->i_mutex);
19763 + out_root:
19764 +       dput(root);
19765 +       sb->s_root = NULL;
19766 + out_info:
19767 +       kobject_put(&au_sbi(sb)->si_kobj);
19768 +       sb->s_fs_info = NULL;
19769 + out_opts:
19770 +       free_page((unsigned long)opts.opt);
19771 + out:
19772 +       AuTraceErr(err);
19773 +       err = cvt_err(err);
19774 +       AuTraceErr(err);
19775 +       return err;
19776 +}
19777 +
19778 +/* ---------------------------------------------------------------------- */
19779 +
19780 +static int aufs_get_sb(struct file_system_type *fs_type, int flags,
19781 +                      const char *dev_name __maybe_unused, void *raw_data,
19782 +                      struct vfsmount *mnt)
19783 +{
19784 +       int err;
19785 +       struct super_block *sb;
19786 +
19787 +       /* all timestamps always follow the ones on the branch */
19788 +       /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
19789 +       err = get_sb_nodev(fs_type, flags, raw_data, aufs_fill_super, mnt);
19790 +       if (!err) {
19791 +               sb = mnt->mnt_sb;
19792 +               si_write_lock(sb);
19793 +               sysaufs_brs_add(sb, 0);
19794 +               si_write_unlock(sb);
19795 +       }
19796 +       return err;
19797 +}
19798 +
19799 +struct file_system_type aufs_fs_type = {
19800 +       .name           = AUFS_FSTYPE,
19801 +       .fs_flags       =
19802 +               FS_RENAME_DOES_D_MOVE   /* a race between rename and others */
19803 +               | FS_REVAL_DOT,         /* for NFS branch and udba */
19804 +       .get_sb         = aufs_get_sb,
19805 +       .kill_sb        = generic_shutdown_super,
19806 +       /* no need to __module_get() and module_put(). */
19807 +       .owner          = THIS_MODULE,
19808 +};
19809 diff -uprN -x .git linux-2.6.31/fs/aufs/super.h aufs2-2.6.git/fs/aufs/super.h
19810 --- linux-2.6.31/fs/aufs/super.h        1970-01-01 00:00:00.000000000 +0000
19811 +++ aufs2-2.6.git/fs/aufs/super.h       2009-09-21 21:49:23.411607814 +0000
19812 @@ -0,0 +1,384 @@
19813 +/*
19814 + * Copyright (C) 2005-2009 Junjiro R. Okajima
19815 + *
19816 + * This program, aufs is free software; you can redistribute it and/or modify
19817 + * it under the terms of the GNU General Public License as published by
19818 + * the Free Software Foundation; either version 2 of the License, or
19819 + * (at your option) any later version.
19820 + *
19821 + * This program is distributed in the hope that it will be useful,
19822 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19823 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19824 + * GNU General Public License for more details.
19825 + *
19826 + * You should have received a copy of the GNU General Public License
19827 + * along with this program; if not, write to the Free Software
19828 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19829 + */
19830 +
19831 +/*
19832 + * super_block operations
19833 + */
19834 +
19835 +#ifndef __AUFS_SUPER_H__
19836 +#define __AUFS_SUPER_H__
19837 +
19838 +#ifdef __KERNEL__
19839 +
19840 +#include <linux/fs.h>
19841 +#include <linux/aufs_type.h>
19842 +#include "rwsem.h"
19843 +#include "spl.h"
19844 +#include "wkq.h"
19845 +
19846 +typedef ssize_t (*au_readf_t)(struct file *, char __user *, size_t, loff_t *);
19847 +typedef ssize_t (*au_writef_t)(struct file *, const char __user *, size_t,
19848 +                              loff_t *);
19849 +
19850 +/* policies to select one among multiple writable branches */
19851 +struct au_wbr_copyup_operations {
19852 +       int (*copyup)(struct dentry *dentry);
19853 +};
19854 +
19855 +struct au_wbr_create_operations {
19856 +       int (*create)(struct dentry *dentry, int isdir);
19857 +       int (*init)(struct super_block *sb);
19858 +       int (*fin)(struct super_block *sb);
19859 +};
19860 +
19861 +struct au_wbr_mfs {
19862 +       struct mutex    mfs_lock; /* protect this structure */
19863 +       unsigned long   mfs_jiffy;
19864 +       unsigned long   mfs_expire;
19865 +       aufs_bindex_t   mfs_bindex;
19866 +
19867 +       unsigned long long      mfsrr_bytes;
19868 +       unsigned long long      mfsrr_watermark;
19869 +};
19870 +
19871 +struct au_branch;
19872 +struct au_sbinfo {
19873 +       /* nowait tasks in the system-wide workqueue */
19874 +       struct au_nowait_tasks  si_nowait;
19875 +
19876 +       struct au_rwsem         si_rwsem;
19877 +
19878 +       /* branch management */
19879 +       unsigned int            si_generation;
19880 +
19881 +       /* see above flags */
19882 +       unsigned char           au_si_status;
19883 +
19884 +       aufs_bindex_t           si_bend;
19885 +       aufs_bindex_t           si_last_br_id;
19886 +       struct au_branch        **si_branch;
19887 +
19888 +       /* policy to select a writable branch */
19889 +       unsigned char           si_wbr_copyup;
19890 +       unsigned char           si_wbr_create;
19891 +       struct au_wbr_copyup_operations *si_wbr_copyup_ops;
19892 +       struct au_wbr_create_operations *si_wbr_create_ops;
19893 +
19894 +       /* round robin */
19895 +       atomic_t                si_wbr_rr_next;
19896 +
19897 +       /* most free space */
19898 +       struct au_wbr_mfs       si_wbr_mfs;
19899 +
19900 +       /* mount flags */
19901 +       /* include/asm-ia64/siginfo.h defines a macro named si_flags */
19902 +       unsigned int            si_mntflags;
19903 +
19904 +       /* external inode number (bitmap and translation table) */
19905 +       au_readf_t              si_xread;
19906 +       au_writef_t             si_xwrite;
19907 +       struct file             *si_xib;
19908 +       struct mutex            si_xib_mtx; /* protect xib members */
19909 +       unsigned long           *si_xib_buf;
19910 +       unsigned long           si_xib_last_pindex;
19911 +       int                     si_xib_next_bit;
19912 +       aufs_bindex_t           si_xino_brid;
19913 +       /* reserved for future use */
19914 +       /* unsigned long long   si_xib_limit; */        /* Max xib file size */
19915 +
19916 +#ifdef CONFIG_AUFS_EXPORT
19917 +       /* i_generation */
19918 +       struct file             *si_xigen;
19919 +       atomic_t                si_xigen_next;
19920 +#endif
19921 +
19922 +       /* vdir parameters */
19923 +       unsigned long           si_rdcache;     /* max cache time in HZ */
19924 +       unsigned int            si_rdblk;       /* deblk size */
19925 +       unsigned int            si_rdhash;      /* hash size */
19926 +
19927 +       /*
19928 +        * If the number of whiteouts are larger than si_dirwh, leave all of
19929 +        * them after au_whtmp_ren to reduce the cost of rmdir(2).
19930 +        * future fsck.aufs or kernel thread will remove them later.
19931 +        * Otherwise, remove all whiteouts and the dir in rmdir(2).
19932 +        */
19933 +       unsigned int            si_dirwh;
19934 +
19935 +       /*
19936 +        * rename(2) a directory with all children.
19937 +        */
19938 +       /* reserved for future use */
19939 +       /* int                  si_rendir; */
19940 +
19941 +       /* pseudo_link list */
19942 +       struct au_splhead       si_plink;
19943 +       wait_queue_head_t       si_plink_wq;
19944 +
19945 +       /*
19946 +        * sysfs and lifetime management.
19947 +        * this is not a small structure and it may be a waste of memory in case
19948 +        * of sysfs is disabled, particulary when many aufs-es are mounted.
19949 +        * but using sysfs is majority.
19950 +        */
19951 +       struct kobject          si_kobj;
19952 +#ifdef CONFIG_DEBUG_FS
19953 +       struct dentry            *si_dbgaufs, *si_dbgaufs_xib;
19954 +#ifdef CONFIG_AUFS_EXPORT
19955 +       struct dentry            *si_dbgaufs_xigen;
19956 +#endif
19957 +#endif
19958 +
19959 +       /* dirty, necessary for unmounting, sysfs and sysrq */
19960 +       struct super_block      *si_sb;
19961 +};
19962 +
19963 +/* sbinfo status flags */
19964 +/*
19965 + * set true when refresh_dirs() failed at remount time.
19966 + * then try refreshing dirs at access time again.
19967 + * if it is false, refreshing dirs at access time is unnecesary
19968 + */
19969 +#define AuSi_FAILED_REFRESH_DIRS       1
19970 +#define AuSi_MAINTAIN_PLINK            (1 << 1)        /* ioctl */
19971 +static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
19972 +                                          unsigned int flag)
19973 +{
19974 +       AuRwMustAnyLock(&sbi->si_rwsem);
19975 +       return sbi->au_si_status & flag;
19976 +}
19977 +#define au_ftest_si(sbinfo, name)      au_do_ftest_si(sbinfo, AuSi_##name)
19978 +#define au_fset_si(sbinfo, name) do { \
19979 +       AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
19980 +       (sbinfo)->au_si_status |= AuSi_##name; \
19981 +} while (0)
19982 +#define au_fclr_si(sbinfo, name) do { \
19983 +       AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
19984 +       (sbinfo)->au_si_status &= ~AuSi_##name; \
19985 +} while (0)
19986 +
19987 +/* ---------------------------------------------------------------------- */
19988 +
19989 +/* policy to select one among writable branches */
19990 +#define AuWbrCopyup(sbinfo, args...) \
19991 +       ((sbinfo)->si_wbr_copyup_ops->copyup(args))
19992 +#define AuWbrCreate(sbinfo, args...) \
19993 +       ((sbinfo)->si_wbr_create_ops->create(args))
19994 +
19995 +/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
19996 +#define AuLock_DW              1               /* write-lock dentry */
19997 +#define AuLock_IR              (1 << 1)        /* read-lock inode */
19998 +#define AuLock_IW              (1 << 2)        /* write-lock inode */
19999 +#define AuLock_FLUSH           (1 << 3)        /* wait for 'nowait' tasks */
20000 +#define AuLock_DIR             (1 << 4)        /* target is a dir */
20001 +#define au_ftest_lock(flags, name)     ((flags) & AuLock_##name)
20002 +#define au_fset_lock(flags, name)      { (flags) |= AuLock_##name; }
20003 +#define au_fclr_lock(flags, name)      { (flags) &= ~AuLock_##name; }
20004 +
20005 +/* ---------------------------------------------------------------------- */
20006 +
20007 +/* super.c */
20008 +extern struct file_system_type aufs_fs_type;
20009 +struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
20010 +
20011 +/* sbinfo.c */
20012 +void au_si_free(struct kobject *kobj);
20013 +int au_si_alloc(struct super_block *sb);
20014 +int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
20015 +
20016 +unsigned int au_sigen_inc(struct super_block *sb);
20017 +aufs_bindex_t au_new_br_id(struct super_block *sb);
20018 +
20019 +void aufs_read_lock(struct dentry *dentry, int flags);
20020 +void aufs_read_unlock(struct dentry *dentry, int flags);
20021 +void aufs_write_lock(struct dentry *dentry);
20022 +void aufs_write_unlock(struct dentry *dentry);
20023 +void aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int isdir);
20024 +void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
20025 +
20026 +/* wbr_policy.c */
20027 +extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
20028 +extern struct au_wbr_create_operations au_wbr_create_ops[];
20029 +int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
20030 +
20031 +/* ---------------------------------------------------------------------- */
20032 +
20033 +static inline struct au_sbinfo *au_sbi(struct super_block *sb)
20034 +{
20035 +       return sb->s_fs_info;
20036 +}
20037 +
20038 +/* ---------------------------------------------------------------------- */
20039 +
20040 +#ifdef CONFIG_AUFS_EXPORT
20041 +void au_export_init(struct super_block *sb);
20042 +
20043 +static inline int au_test_nfsd(struct task_struct *tsk)
20044 +{
20045 +       return !tsk->mm && !strcmp(tsk->comm, "nfsd");
20046 +}
20047 +
20048 +int au_xigen_inc(struct inode *inode);
20049 +int au_xigen_new(struct inode *inode);
20050 +int au_xigen_set(struct super_block *sb, struct file *base);
20051 +void au_xigen_clr(struct super_block *sb);
20052 +
20053 +static inline int au_busy_or_stale(void)
20054 +{
20055 +       if (!au_test_nfsd(current))
20056 +               return -EBUSY;
20057 +       return -ESTALE;
20058 +}
20059 +#else
20060 +static inline void au_export_init(struct super_block *sb)
20061 +{
20062 +       /* nothing */
20063 +}
20064 +
20065 +static inline int au_test_nfsd(struct task_struct *tsk)
20066 +{
20067 +       return 0;
20068 +}
20069 +
20070 +static inline int au_xigen_inc(struct inode *inode)
20071 +{
20072 +       return 0;
20073 +}
20074 +
20075 +static inline int au_xigen_new(struct inode *inode)
20076 +{
20077 +       return 0;
20078 +}
20079 +
20080 +static inline int au_xigen_set(struct super_block *sb, struct file *base)
20081 +{
20082 +       return 0;
20083 +}
20084 +
20085 +static inline void au_xigen_clr(struct super_block *sb)
20086 +{
20087 +       /* empty */
20088 +}
20089 +
20090 +static inline int au_busy_or_stale(void)
20091 +{
20092 +       return -EBUSY;
20093 +}
20094 +#endif /* CONFIG_AUFS_EXPORT */
20095 +
20096 +/* ---------------------------------------------------------------------- */
20097 +
20098 +static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
20099 +{
20100 +       /*
20101 +        * This function is a dynamic '__init' fucntion actually,
20102 +        * so the tiny check for si_rwsem is unnecessary.
20103 +        */
20104 +       /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
20105 +#ifdef CONFIG_DEBUG_FS
20106 +       sbinfo->si_dbgaufs = NULL;
20107 +       sbinfo->si_dbgaufs_xib = NULL;
20108 +#ifdef CONFIG_AUFS_EXPORT
20109 +       sbinfo->si_dbgaufs_xigen = NULL;
20110 +#endif
20111 +#endif
20112 +}
20113 +
20114 +/* ---------------------------------------------------------------------- */
20115 +
20116 +/* lock superblock. mainly for entry point functions */
20117 +/*
20118 + * si_noflush_read_lock, si_noflush_write_lock,
20119 + * si_read_unlock, si_write_unlock, si_downgrade_lock
20120 + */
20121 +AuSimpleLockRwsemFuncs(si_noflush, struct super_block *sb,
20122 +                      &au_sbi(sb)->si_rwsem);
20123 +AuSimpleUnlockRwsemFuncs(si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
20124 +
20125 +#define SiMustNoWaiters(sb)    AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
20126 +#define SiMustAnyLock(sb)      AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
20127 +#define SiMustWriteLock(sb)    AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
20128 +
20129 +static inline void si_read_lock(struct super_block *sb, int flags)
20130 +{
20131 +       if (au_ftest_lock(flags, FLUSH))
20132 +               au_nwt_flush(&au_sbi(sb)->si_nowait);
20133 +       si_noflush_read_lock(sb);
20134 +}
20135 +
20136 +static inline void si_write_lock(struct super_block *sb)
20137 +{
20138 +       au_nwt_flush(&au_sbi(sb)->si_nowait);
20139 +       si_noflush_write_lock(sb);
20140 +}
20141 +
20142 +static inline int si_read_trylock(struct super_block *sb, int flags)
20143 +{
20144 +       if (au_ftest_lock(flags, FLUSH))
20145 +               au_nwt_flush(&au_sbi(sb)->si_nowait);
20146 +       return si_noflush_read_trylock(sb);
20147 +}
20148 +
20149 +static inline int si_write_trylock(struct super_block *sb, int flags)
20150 +{
20151 +       if (au_ftest_lock(flags, FLUSH))
20152 +               au_nwt_flush(&au_sbi(sb)->si_nowait);
20153 +       return si_noflush_write_trylock(sb);
20154 +}
20155 +
20156 +/* ---------------------------------------------------------------------- */
20157 +
20158 +static inline aufs_bindex_t au_sbend(struct super_block *sb)
20159 +{
20160 +       SiMustAnyLock(sb);
20161 +       return au_sbi(sb)->si_bend;
20162 +}
20163 +
20164 +static inline unsigned int au_mntflags(struct super_block *sb)
20165 +{
20166 +       SiMustAnyLock(sb);
20167 +       return au_sbi(sb)->si_mntflags;
20168 +}
20169 +
20170 +static inline unsigned int au_sigen(struct super_block *sb)
20171 +{
20172 +       SiMustAnyLock(sb);
20173 +       return au_sbi(sb)->si_generation;
20174 +}
20175 +
20176 +static inline struct au_branch *au_sbr(struct super_block *sb,
20177 +                                      aufs_bindex_t bindex)
20178 +{
20179 +       SiMustAnyLock(sb);
20180 +       return au_sbi(sb)->si_branch[0 + bindex];
20181 +}
20182 +
20183 +static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
20184 +{
20185 +       SiMustWriteLock(sb);
20186 +       au_sbi(sb)->si_xino_brid = brid;
20187 +}
20188 +
20189 +static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
20190 +{
20191 +       SiMustAnyLock(sb);
20192 +       return au_sbi(sb)->si_xino_brid;
20193 +}
20194 +
20195 +#endif /* __KERNEL__ */
20196 +#endif /* __AUFS_SUPER_H__ */
20197 diff -uprN -x .git linux-2.6.31/fs/aufs/sysaufs.c aufs2-2.6.git/fs/aufs/sysaufs.c
20198 --- linux-2.6.31/fs/aufs/sysaufs.c      1970-01-01 00:00:00.000000000 +0000
20199 +++ aufs2-2.6.git/fs/aufs/sysaufs.c     2009-09-21 21:49:23.411607814 +0000
20200 @@ -0,0 +1,104 @@
20201 +/*
20202 + * Copyright (C) 2005-2009 Junjiro R. Okajima
20203 + *
20204 + * This program, aufs is free software; you can redistribute it and/or modify
20205 + * it under the terms of the GNU General Public License as published by
20206 + * the Free Software Foundation; either version 2 of the License, or
20207 + * (at your option) any later version.
20208 + *
20209 + * This program is distributed in the hope that it will be useful,
20210 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20211 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20212 + * GNU General Public License for more details.
20213 + *
20214 + * You should have received a copy of the GNU General Public License
20215 + * along with this program; if not, write to the Free Software
20216 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20217 + */
20218 +
20219 +/*
20220 + * sysfs interface and lifetime management
20221 + * they are necessary regardless sysfs is disabled.
20222 + */
20223 +
20224 +#include <linux/fs.h>
20225 +#include <linux/random.h>
20226 +#include <linux/sysfs.h>
20227 +#include "aufs.h"
20228 +
20229 +unsigned long sysaufs_si_mask;
20230 +struct kset *sysaufs_ket;
20231 +
20232 +#define AuSiAttr(_name) { \
20233 +       .attr   = { .name = __stringify(_name), .mode = 0444 }, \
20234 +       .show   = sysaufs_si_##_name,                           \
20235 +}
20236 +
20237 +static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
20238 +struct attribute *sysaufs_si_attrs[] = {
20239 +       &sysaufs_si_attr_xi_path.attr,
20240 +       NULL,
20241 +};
20242 +
20243 +static struct sysfs_ops au_sbi_ops = {
20244 +       .show   = sysaufs_si_show
20245 +};
20246 +
20247 +static struct kobj_type au_sbi_ktype = {
20248 +       .release        = au_si_free,
20249 +       .sysfs_ops      = &au_sbi_ops,
20250 +       .default_attrs  = sysaufs_si_attrs
20251 +};
20252 +
20253 +/* ---------------------------------------------------------------------- */
20254 +
20255 +int sysaufs_si_init(struct au_sbinfo *sbinfo)
20256 +{
20257 +       int err;
20258 +
20259 +       sbinfo->si_kobj.kset = sysaufs_ket;
20260 +       /* cf. sysaufs_name() */
20261 +       err = kobject_init_and_add
20262 +               (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_ket->kobj*/NULL,
20263 +                SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
20264 +
20265 +       dbgaufs_si_null(sbinfo);
20266 +       if (!err) {
20267 +               err = dbgaufs_si_init(sbinfo);
20268 +               if (unlikely(err))
20269 +                       kobject_put(&sbinfo->si_kobj);
20270 +       }
20271 +       return err;
20272 +}
20273 +
20274 +void sysaufs_fin(void)
20275 +{
20276 +       dbgaufs_fin();
20277 +       sysfs_remove_group(&sysaufs_ket->kobj, sysaufs_attr_group);
20278 +       kset_unregister(sysaufs_ket);
20279 +}
20280 +
20281 +int __init sysaufs_init(void)
20282 +{
20283 +       int err;
20284 +
20285 +       do {
20286 +               get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
20287 +       } while (!sysaufs_si_mask);
20288 +
20289 +       sysaufs_ket = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
20290 +       err = PTR_ERR(sysaufs_ket);
20291 +       if (IS_ERR(sysaufs_ket))
20292 +               goto out;
20293 +       err = sysfs_create_group(&sysaufs_ket->kobj, sysaufs_attr_group);
20294 +       if (unlikely(err)) {
20295 +               kset_unregister(sysaufs_ket);
20296 +               goto out;
20297 +       }
20298 +
20299 +       err = dbgaufs_init();
20300 +       if (unlikely(err))
20301 +               sysaufs_fin();
20302 + out:
20303 +       return err;
20304 +}
20305 diff -uprN -x .git linux-2.6.31/fs/aufs/sysaufs.h aufs2-2.6.git/fs/aufs/sysaufs.h
20306 --- linux-2.6.31/fs/aufs/sysaufs.h      1970-01-01 00:00:00.000000000 +0000
20307 +++ aufs2-2.6.git/fs/aufs/sysaufs.h     2009-09-21 21:49:23.411607814 +0000
20308 @@ -0,0 +1,120 @@
20309 +/*
20310 + * Copyright (C) 2005-2009 Junjiro R. Okajima
20311 + *
20312 + * This program, aufs is free software; you can redistribute it and/or modify
20313 + * it under the terms of the GNU General Public License as published by
20314 + * the Free Software Foundation; either version 2 of the License, or
20315 + * (at your option) any later version.
20316 + *
20317 + * This program is distributed in the hope that it will be useful,
20318 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20319 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20320 + * GNU General Public License for more details.
20321 + *
20322 + * You should have received a copy of the GNU General Public License
20323 + * along with this program; if not, write to the Free Software
20324 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20325 + */
20326 +
20327 +/*
20328 + * sysfs interface and mount lifetime management
20329 + */
20330 +
20331 +#ifndef __SYSAUFS_H__
20332 +#define __SYSAUFS_H__
20333 +
20334 +#ifdef __KERNEL__
20335 +
20336 +#include <linux/sysfs.h>
20337 +#include <linux/aufs_type.h>
20338 +#include "module.h"
20339 +
20340 +struct super_block;
20341 +struct au_sbinfo;
20342 +
20343 +struct sysaufs_si_attr {
20344 +       struct attribute attr;
20345 +       int (*show)(struct seq_file *seq, struct super_block *sb);
20346 +};
20347 +
20348 +/* ---------------------------------------------------------------------- */
20349 +
20350 +/* sysaufs.c */
20351 +extern unsigned long sysaufs_si_mask;
20352 +extern struct kset *sysaufs_ket;
20353 +extern struct attribute *sysaufs_si_attrs[];
20354 +int sysaufs_si_init(struct au_sbinfo *sbinfo);
20355 +int __init sysaufs_init(void);
20356 +void sysaufs_fin(void);
20357 +
20358 +/* ---------------------------------------------------------------------- */
20359 +
20360 +/* some people doesn't like to show a pointer in kernel */
20361 +static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
20362 +{
20363 +       return sysaufs_si_mask ^ (unsigned long)sbinfo;
20364 +}
20365 +
20366 +#define SysaufsSiNamePrefix    "si_"
20367 +#define SysaufsSiNameLen       (sizeof(SysaufsSiNamePrefix) + 16)
20368 +static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
20369 +{
20370 +       snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
20371 +                sysaufs_si_id(sbinfo));
20372 +}
20373 +
20374 +struct au_branch;
20375 +#ifdef CONFIG_SYSFS
20376 +/* sysfs.c */
20377 +extern struct attribute_group *sysaufs_attr_group;
20378 +
20379 +int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
20380 +ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
20381 +                        char *buf);
20382 +
20383 +void sysaufs_br_init(struct au_branch *br);
20384 +void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
20385 +void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
20386 +
20387 +#define sysaufs_brs_init()     do {} while (0)
20388 +
20389 +#else
20390 +#define sysaufs_attr_group     NULL
20391 +
20392 +static inline
20393 +int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
20394 +{
20395 +       return 0;
20396 +}
20397 +
20398 +static inline
20399 +ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
20400 +                        char *buf)
20401 +{
20402 +       return 0;
20403 +}
20404 +
20405 +static inline void sysaufs_br_init(struct au_branch *br)
20406 +{
20407 +       /* empty */
20408 +}
20409 +
20410 +static inline void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
20411 +{
20412 +       /* nothing */
20413 +}
20414 +
20415 +static inline void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
20416 +{
20417 +       /* nothing */
20418 +}
20419 +
20420 +static inline void sysaufs_brs_init(void)
20421 +{
20422 +       sysaufs_brs = 0;
20423 +}
20424 +
20425 +#endif /* CONFIG_SYSFS */
20426 +
20427 +#endif /* __KERNEL__ */
20428 +#endif /* __SYSAUFS_H__ */
20429 diff -uprN -x .git linux-2.6.31/fs/aufs/sysfs.c aufs2-2.6.git/fs/aufs/sysfs.c
20430 --- linux-2.6.31/fs/aufs/sysfs.c        1970-01-01 00:00:00.000000000 +0000
20431 +++ aufs2-2.6.git/fs/aufs/sysfs.c       2009-09-21 21:49:23.411607814 +0000
20432 @@ -0,0 +1,224 @@
20433 +/*
20434 + * Copyright (C) 2005-2009 Junjiro R. Okajima
20435 + *
20436 + * This program, aufs is free software; you can redistribute it and/or modify
20437 + * it under the terms of the GNU General Public License as published by
20438 + * the Free Software Foundation; either version 2 of the License, or
20439 + * (at your option) any later version.
20440 + *
20441 + * This program is distributed in the hope that it will be useful,
20442 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20443 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20444 + * GNU General Public License for more details.
20445 + *
20446 + * You should have received a copy of the GNU General Public License
20447 + * along with this program; if not, write to the Free Software
20448 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20449 + */
20450 +
20451 +/*
20452 + * sysfs interface
20453 + */
20454 +
20455 +#include <linux/fs.h>
20456 +#include <linux/module.h>
20457 +#include <linux/seq_file.h>
20458 +#include <linux/sysfs.h>
20459 +#include "aufs.h"
20460 +
20461 +static struct attribute *au_attr[] = {
20462 +       NULL,   /* need to NULL terminate the list of attributes */
20463 +};
20464 +
20465 +static struct attribute_group sysaufs_attr_group_body = {
20466 +       .attrs = au_attr
20467 +};
20468 +
20469 +struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
20470 +
20471 +/* ---------------------------------------------------------------------- */
20472 +
20473 +int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
20474 +{
20475 +       int err;
20476 +
20477 +       SiMustAnyLock(sb);
20478 +
20479 +       err = 0;
20480 +       if (au_opt_test(au_mntflags(sb), XINO)) {
20481 +               err = au_xino_path(seq, au_sbi(sb)->si_xib);
20482 +               seq_putc(seq, '\n');
20483 +       }
20484 +       return err;
20485 +}
20486 +
20487 +/*
20488 + * the lifetime of branch is independent from the entry under sysfs.
20489 + * sysfs handles the lifetime of the entry, and never call ->show() after it is
20490 + * unlinked.
20491 + */
20492 +static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
20493 +                        aufs_bindex_t bindex)
20494 +{
20495 +       struct path path;
20496 +       struct dentry *root;
20497 +       struct au_branch *br;
20498 +
20499 +       AuDbg("b%d\n", bindex);
20500 +
20501 +       root = sb->s_root;
20502 +       di_read_lock_parent(root, !AuLock_IR);
20503 +       br = au_sbr(sb, bindex);
20504 +       path.mnt = br->br_mnt;
20505 +       path.dentry = au_h_dptr(root, bindex);
20506 +       au_seq_path(seq, &path);
20507 +       di_read_unlock(root, !AuLock_IR);
20508 +       seq_printf(seq, "=%s\n", au_optstr_br_perm(br->br_perm));
20509 +       return 0;
20510 +}
20511 +
20512 +/* ---------------------------------------------------------------------- */
20513 +
20514 +static struct seq_file *au_seq(char *p, ssize_t len)
20515 +{
20516 +       struct seq_file *seq;
20517 +
20518 +       seq = kzalloc(sizeof(*seq), GFP_NOFS);
20519 +       if (seq) {
20520 +               /* mutex_init(&seq.lock); */
20521 +               seq->buf = p;
20522 +               seq->size = len;
20523 +               return seq; /* success */
20524 +       }
20525 +
20526 +       seq = ERR_PTR(-ENOMEM);
20527 +       return seq;
20528 +}
20529 +
20530 +#define SysaufsBr_PREFIX "br"
20531 +
20532 +/* todo: file size may exceed PAGE_SIZE */
20533 +ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
20534 +                       char *buf)
20535 +{
20536 +       ssize_t err;
20537 +       long l;
20538 +       aufs_bindex_t bend;
20539 +       struct au_sbinfo *sbinfo;
20540 +       struct super_block *sb;
20541 +       struct seq_file *seq;
20542 +       char *name;
20543 +       struct attribute **cattr;
20544 +
20545 +       sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
20546 +       sb = sbinfo->si_sb;
20547 +
20548 +       /*
20549 +        * prevent a race condition between sysfs and aufs.
20550 +        * for instance, sysfs_file_read() calls sysfs_get_active_two() which
20551 +        * prohibits maintaining the sysfs entries.
20552 +        * hew we acquire read lock after sysfs_get_active_two().
20553 +        * on the other hand, the remount process may maintain the sysfs/aufs
20554 +        * entries after acquiring write lock.
20555 +        * it can cause a deadlock.
20556 +        * simply we gave up processing read here.
20557 +        */
20558 +       err = -EBUSY;
20559 +       if (unlikely(!si_noflush_read_trylock(sb)))
20560 +               goto out;
20561 +
20562 +       seq = au_seq(buf, PAGE_SIZE);
20563 +       err = PTR_ERR(seq);
20564 +       if (IS_ERR(seq))
20565 +               goto out_unlock;
20566 +
20567 +       name = (void *)attr->name;
20568 +       cattr = sysaufs_si_attrs;
20569 +       while (*cattr) {
20570 +               if (!strcmp(name, (*cattr)->name)) {
20571 +                       err = container_of(*cattr, struct sysaufs_si_attr, attr)
20572 +                               ->show(seq, sb);
20573 +                       goto out_seq;
20574 +               }
20575 +               cattr++;
20576 +       }
20577 +
20578 +       bend = au_sbend(sb);
20579 +       if (!strncmp(name, SysaufsBr_PREFIX, sizeof(SysaufsBr_PREFIX) - 1)) {
20580 +               name += sizeof(SysaufsBr_PREFIX) - 1;
20581 +               err = strict_strtol(name, 10, &l);
20582 +               if (!err) {
20583 +                       if (l <= bend)
20584 +                               err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l);
20585 +                       else
20586 +                               err = -ENOENT;
20587 +               }
20588 +               goto out_seq;
20589 +       }
20590 +       BUG();
20591 +
20592 + out_seq:
20593 +       if (!err) {
20594 +               err = seq->count;
20595 +               /* sysfs limit */
20596 +               if (unlikely(err == PAGE_SIZE))
20597 +                       err = -EFBIG;
20598 +       }
20599 +       kfree(seq);
20600 + out_unlock:
20601 +       si_read_unlock(sb);
20602 + out:
20603 +       return err;
20604 +}
20605 +
20606 +/* ---------------------------------------------------------------------- */
20607 +
20608 +void sysaufs_br_init(struct au_branch *br)
20609 +{
20610 +       br->br_attr.name = br->br_name;
20611 +       br->br_attr.mode = S_IRUGO;
20612 +       br->br_attr.owner = THIS_MODULE;
20613 +}
20614 +
20615 +void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
20616 +{
20617 +       struct au_branch *br;
20618 +       struct kobject *kobj;
20619 +       aufs_bindex_t bend;
20620 +
20621 +       dbgaufs_brs_del(sb, bindex);
20622 +
20623 +       if (!sysaufs_brs)
20624 +               return;
20625 +
20626 +       kobj = &au_sbi(sb)->si_kobj;
20627 +       bend = au_sbend(sb);
20628 +       for (; bindex <= bend; bindex++) {
20629 +               br = au_sbr(sb, bindex);
20630 +               sysfs_remove_file(kobj, &br->br_attr);
20631 +       }
20632 +}
20633 +
20634 +void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
20635 +{
20636 +       int err;
20637 +       aufs_bindex_t bend;
20638 +       struct kobject *kobj;
20639 +       struct au_branch *br;
20640 +
20641 +       dbgaufs_brs_add(sb, bindex);
20642 +
20643 +       if (!sysaufs_brs)
20644 +               return;
20645 +
20646 +       kobj = &au_sbi(sb)->si_kobj;
20647 +       bend = au_sbend(sb);
20648 +       for (; bindex <= bend; bindex++) {
20649 +               br = au_sbr(sb, bindex);
20650 +               snprintf(br->br_name, sizeof(br->br_name), SysaufsBr_PREFIX
20651 +                        "%d", bindex);
20652 +               err = sysfs_create_file(kobj, &br->br_attr);
20653 +               if (unlikely(err))
20654 +                       AuWarn("failed %s under sysfs(%d)\n", br->br_name, err);
20655 +       }
20656 +}
20657 diff -uprN -x .git linux-2.6.31/fs/aufs/sysrq.c aufs2-2.6.git/fs/aufs/sysrq.c
20658 --- linux-2.6.31/fs/aufs/sysrq.c        1970-01-01 00:00:00.000000000 +0000
20659 +++ aufs2-2.6.git/fs/aufs/sysrq.c       2009-09-21 21:49:23.411607814 +0000
20660 @@ -0,0 +1,115 @@
20661 +/*
20662 + * Copyright (C) 2005-2009 Junjiro R. Okajima
20663 + *
20664 + * This program, aufs is free software; you can redistribute it and/or modify
20665 + * it under the terms of the GNU General Public License as published by
20666 + * the Free Software Foundation; either version 2 of the License, or
20667 + * (at your option) any later version.
20668 + *
20669 + * This program is distributed in the hope that it will be useful,
20670 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20671 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20672 + * GNU General Public License for more details.
20673 + *
20674 + * You should have received a copy of the GNU General Public License
20675 + * along with this program; if not, write to the Free Software
20676 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20677 + */
20678 +
20679 +/*
20680 + * magic sysrq hanlder
20681 + */
20682 +
20683 +#include <linux/fs.h>
20684 +#include <linux/module.h>
20685 +#include <linux/moduleparam.h>
20686 +/* #include <linux/sysrq.h> */
20687 +#include "aufs.h"
20688 +
20689 +/* ---------------------------------------------------------------------- */
20690 +
20691 +static void sysrq_sb(struct super_block *sb)
20692 +{
20693 +       char *plevel;
20694 +       struct au_sbinfo *sbinfo;
20695 +       struct file *file;
20696 +
20697 +       plevel = au_plevel;
20698 +       au_plevel = KERN_WARNING;
20699 +       au_debug(1);
20700 +
20701 +       sbinfo = au_sbi(sb);
20702 +       pr_warning("si=%lx\n", sysaufs_si_id(sbinfo));
20703 +       pr_warning(AUFS_NAME ": superblock\n");
20704 +       au_dpri_sb(sb);
20705 +       pr_warning(AUFS_NAME ": root dentry\n");
20706 +       au_dpri_dentry(sb->s_root);
20707 +       pr_warning(AUFS_NAME ": root inode\n");
20708 +       au_dpri_inode(sb->s_root->d_inode);
20709 +#if 0
20710 +       struct inode *i;
20711 +       pr_warning(AUFS_NAME ": isolated inode\n");
20712 +       list_for_each_entry(i, &sb->s_inodes, i_sb_list)
20713 +               if (list_empty(&i->i_dentry))
20714 +                       au_dpri_inode(i);
20715 +#endif
20716 +       pr_warning(AUFS_NAME ": files\n");
20717 +       list_for_each_entry(file, &sb->s_files, f_u.fu_list)
20718 +               if (!special_file(file->f_dentry->d_inode->i_mode))
20719 +                       au_dpri_file(file);
20720 +
20721 +       au_plevel = plevel;
20722 +       au_debug(0);
20723 +}
20724 +
20725 +/* ---------------------------------------------------------------------- */
20726 +
20727 +/* module parameter */
20728 +static char *aufs_sysrq_key = "a";
20729 +module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
20730 +MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
20731 +
20732 +static void au_sysrq(int key __maybe_unused,
20733 +                    struct tty_struct *tty __maybe_unused)
20734 +{
20735 +       struct kobject *kobj;
20736 +       struct au_sbinfo *sbinfo;
20737 +
20738 +       /* spin_lock(&sysaufs_ket->list_lock); */
20739 +       list_for_each_entry(kobj, &sysaufs_ket->list, entry) {
20740 +               sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
20741 +               sysrq_sb(sbinfo->si_sb);
20742 +       }
20743 +       /* spin_unlock(&sysaufs_ket->list_lock); */
20744 +}
20745 +
20746 +static struct sysrq_key_op au_sysrq_op = {
20747 +       .handler        = au_sysrq,
20748 +       .help_msg       = "Aufs",
20749 +       .action_msg     = "Aufs",
20750 +       .enable_mask    = SYSRQ_ENABLE_DUMP
20751 +};
20752 +
20753 +/* ---------------------------------------------------------------------- */
20754 +
20755 +int __init au_sysrq_init(void)
20756 +{
20757 +       int err;
20758 +       char key;
20759 +
20760 +       err = -1;
20761 +       key = *aufs_sysrq_key;
20762 +       if ('a' <= key && key <= 'z')
20763 +               err = register_sysrq_key(key, &au_sysrq_op);
20764 +       if (unlikely(err))
20765 +               AuErr("err %d, sysrq=%c\n", err, key);
20766 +       return err;
20767 +}
20768 +
20769 +void au_sysrq_fin(void)
20770 +{
20771 +       int err;
20772 +       err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
20773 +       if (unlikely(err))
20774 +               AuErr("err %d (ignored)\n", err);
20775 +}
20776 diff -uprN -x .git linux-2.6.31/fs/aufs/vdir.c aufs2-2.6.git/fs/aufs/vdir.c
20777 --- linux-2.6.31/fs/aufs/vdir.c 1970-01-01 00:00:00.000000000 +0000
20778 +++ aufs2-2.6.git/fs/aufs/vdir.c        2009-09-21 21:49:23.411607814 +0000
20779 @@ -0,0 +1,879 @@
20780 +/*
20781 + * Copyright (C) 2005-2009 Junjiro R. Okajima
20782 + *
20783 + * This program, aufs is free software; you can redistribute it and/or modify
20784 + * it under the terms of the GNU General Public License as published by
20785 + * the Free Software Foundation; either version 2 of the License, or
20786 + * (at your option) any later version.
20787 + *
20788 + * This program is distributed in the hope that it will be useful,
20789 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20790 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20791 + * GNU General Public License for more details.
20792 + *
20793 + * You should have received a copy of the GNU General Public License
20794 + * along with this program; if not, write to the Free Software
20795 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20796 + */
20797 +
20798 +/*
20799 + * virtual or vertical directory
20800 + */
20801 +
20802 +#include <linux/hash.h>
20803 +#include "aufs.h"
20804 +
20805 +static unsigned int calc_size(int nlen)
20806 +{
20807 +       BUILD_BUG_ON(sizeof(ino_t) != sizeof(long));
20808 +       return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
20809 +}
20810 +
20811 +static int set_deblk_end(union au_vdir_deblk_p *p,
20812 +                        union au_vdir_deblk_p *deblk_end)
20813 +{
20814 +       if (calc_size(0) <= deblk_end->deblk - p->deblk) {
20815 +               p->de->de_str.len = 0;
20816 +               /* smp_mb(); */
20817 +               return 0;
20818 +       }
20819 +       return -1; /* error */
20820 +}
20821 +
20822 +/* returns true or false */
20823 +static int is_deblk_end(union au_vdir_deblk_p *p,
20824 +                       union au_vdir_deblk_p *deblk_end)
20825 +{
20826 +       if (calc_size(0) <= deblk_end->deblk - p->deblk)
20827 +               return !p->de->de_str.len;
20828 +       return 1;
20829 +}
20830 +
20831 +static unsigned char *last_deblk(struct au_vdir *vdir)
20832 +{
20833 +       return vdir->vd_deblk[vdir->vd_nblk - 1];
20834 +}
20835 +
20836 +/* ---------------------------------------------------------------------- */
20837 +
20838 +/* estimate the apropriate size for name hash table */
20839 +unsigned int au_rdhash_est(loff_t sz)
20840 +{
20841 +       unsigned int n;
20842 +
20843 +       n = UINT_MAX;
20844 +       sz >>= 10;
20845 +       if (sz < n)
20846 +               n = sz;
20847 +       if (sz < AUFS_RDHASH_DEF)
20848 +               n = AUFS_RDHASH_DEF;
20849 +       /* AuInfo("n %u\n", n); */
20850 +       return n;
20851 +}
20852 +
20853 +/*
20854 + * the allocated memory has to be freed by
20855 + * au_nhash_wh_free() or au_nhash_de_free().
20856 + */
20857 +int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
20858 +{
20859 +       struct hlist_head *head;
20860 +       unsigned int u;
20861 +
20862 +       head = kmalloc(sizeof(*nhash->nh_head) * num_hash, gfp);
20863 +       if (head) {
20864 +               nhash->nh_num = num_hash;
20865 +               nhash->nh_head = head;
20866 +               for (u = 0; u < num_hash; u++)
20867 +                       INIT_HLIST_HEAD(head++);
20868 +               return 0; /* success */
20869 +       }
20870 +
20871 +       return -ENOMEM;
20872 +}
20873 +
20874 +static void nhash_count(struct hlist_head *head)
20875 +{
20876 +#if 0
20877 +       unsigned long n;
20878 +       struct hlist_node *pos;
20879 +
20880 +       n = 0;
20881 +       hlist_for_each(pos, head)
20882 +               n++;
20883 +       AuInfo("%lu\n", n);
20884 +#endif
20885 +}
20886 +
20887 +static void au_nhash_wh_do_free(struct hlist_head *head)
20888 +{
20889 +       struct au_vdir_wh *tpos;
20890 +       struct hlist_node *pos, *node;
20891 +
20892 +       hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) {
20893 +               /* hlist_del(pos); */
20894 +               kfree(tpos);
20895 +       }
20896 +}
20897 +
20898 +static void au_nhash_de_do_free(struct hlist_head *head)
20899 +{
20900 +       struct au_vdir_dehstr *tpos;
20901 +       struct hlist_node *pos, *node;
20902 +
20903 +       hlist_for_each_entry_safe(tpos, pos, node, head, hash) {
20904 +               /* hlist_del(pos); */
20905 +               au_cache_free_dehstr(tpos);
20906 +       }
20907 +}
20908 +
20909 +static void au_nhash_do_free(struct au_nhash *nhash,
20910 +                            void (*free)(struct hlist_head *head))
20911 +{
20912 +       unsigned int n;
20913 +       struct hlist_head *head;
20914 +
20915 +       n = nhash->nh_num;
20916 +       if (!n)
20917 +               return;
20918 +
20919 +       head = nhash->nh_head;
20920 +       while (n-- > 0) {
20921 +               nhash_count(head);
20922 +               free(head++);
20923 +       }
20924 +       kfree(nhash->nh_head);
20925 +}
20926 +
20927 +void au_nhash_wh_free(struct au_nhash *whlist)
20928 +{
20929 +       au_nhash_do_free(whlist, au_nhash_wh_do_free);
20930 +}
20931 +
20932 +static void au_nhash_de_free(struct au_nhash *delist)
20933 +{
20934 +       au_nhash_do_free(delist, au_nhash_de_do_free);
20935 +}
20936 +
20937 +/* ---------------------------------------------------------------------- */
20938 +
20939 +int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
20940 +                           int limit)
20941 +{
20942 +       int num;
20943 +       unsigned int u, n;
20944 +       struct hlist_head *head;
20945 +       struct au_vdir_wh *tpos;
20946 +       struct hlist_node *pos;
20947 +
20948 +       num = 0;
20949 +       n = whlist->nh_num;
20950 +       head = whlist->nh_head;
20951 +       for (u = 0; u < n; u++, head++)
20952 +               hlist_for_each_entry(tpos, pos, head, wh_hash)
20953 +                       if (tpos->wh_bindex == btgt && ++num > limit)
20954 +                               return 1;
20955 +       return 0;
20956 +}
20957 +
20958 +static struct hlist_head *au_name_hash(struct au_nhash *nhash,
20959 +                                      unsigned char *name,
20960 +                                      unsigned int len)
20961 +{
20962 +       unsigned int v;
20963 +       /* const unsigned int magic_bit = 12; */
20964 +
20965 +       AuDebugOn(!nhash->nh_num || !nhash->nh_head);
20966 +
20967 +       v = 0;
20968 +       while (len--)
20969 +               v += *name++;
20970 +       /* v = hash_long(v, magic_bit); */
20971 +       v %= nhash->nh_num;
20972 +       return nhash->nh_head + v;
20973 +}
20974 +
20975 +static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
20976 +                             int nlen)
20977 +{
20978 +       return str->len == nlen && !memcmp(str->name, name, nlen);
20979 +}
20980 +
20981 +/* returns found or not */
20982 +int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
20983 +{
20984 +       struct hlist_head *head;
20985 +       struct au_vdir_wh *tpos;
20986 +       struct hlist_node *pos;
20987 +       struct au_vdir_destr *str;
20988 +
20989 +       head = au_name_hash(whlist, name, nlen);
20990 +       hlist_for_each_entry(tpos, pos, head, wh_hash) {
20991 +               str = &tpos->wh_str;
20992 +               AuDbg("%.*s\n", str->len, str->name);
20993 +               if (au_nhash_test_name(str, name, nlen))
20994 +                       return 1;
20995 +       }
20996 +       return 0;
20997 +}
20998 +
20999 +/* returns found(true) or not */
21000 +static int test_known(struct au_nhash *delist, char *name, int nlen)
21001 +{
21002 +       struct hlist_head *head;
21003 +       struct au_vdir_dehstr *tpos;
21004 +       struct hlist_node *pos;
21005 +       struct au_vdir_destr *str;
21006 +
21007 +       head = au_name_hash(delist, name, nlen);
21008 +       hlist_for_each_entry(tpos, pos, head, hash) {
21009 +               str = tpos->str;
21010 +               AuDbg("%.*s\n", str->len, str->name);
21011 +               if (au_nhash_test_name(str, name, nlen))
21012 +                       return 1;
21013 +       }
21014 +       return 0;
21015 +}
21016 +
21017 +static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
21018 +                           unsigned char d_type)
21019 +{
21020 +#ifdef CONFIG_AUFS_SHWH
21021 +       wh->wh_ino = ino;
21022 +       wh->wh_type = d_type;
21023 +#endif
21024 +}
21025 +
21026 +/* ---------------------------------------------------------------------- */
21027 +
21028 +int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
21029 +                      unsigned int d_type, aufs_bindex_t bindex,
21030 +                      unsigned char shwh)
21031 +{
21032 +       int err;
21033 +       struct au_vdir_destr *str;
21034 +       struct au_vdir_wh *wh;
21035 +
21036 +       AuDbg("%.*s\n", nlen, name);
21037 +       AuDebugOn(!whlist->nh_num || !whlist->nh_head);
21038 +
21039 +       err = -ENOMEM;
21040 +       wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
21041 +       if (unlikely(!wh))
21042 +               goto out;
21043 +
21044 +       err = 0;
21045 +       wh->wh_bindex = bindex;
21046 +       if (shwh)
21047 +               au_shwh_init_wh(wh, ino, d_type);
21048 +       str = &wh->wh_str;
21049 +       str->len = nlen;
21050 +       memcpy(str->name, name, nlen);
21051 +       hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
21052 +       /* smp_mb(); */
21053 +
21054 + out:
21055 +       return err;
21056 +}
21057 +
21058 +static int append_deblk(struct au_vdir *vdir)
21059 +{
21060 +       int err;
21061 +       unsigned long ul;
21062 +       const unsigned int deblk_sz = vdir->vd_deblk_sz;
21063 +       union au_vdir_deblk_p p, deblk_end;
21064 +       unsigned char **o;
21065 +
21066 +       err = -ENOMEM;
21067 +       o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
21068 +                    GFP_NOFS);
21069 +       if (unlikely(!o))
21070 +               goto out;
21071 +
21072 +       vdir->vd_deblk = o;
21073 +       p.deblk = kmalloc(deblk_sz, GFP_NOFS);
21074 +       if (p.deblk) {
21075 +               ul = vdir->vd_nblk++;
21076 +               vdir->vd_deblk[ul] = p.deblk;
21077 +               vdir->vd_last.ul = ul;
21078 +               vdir->vd_last.p.deblk = p.deblk;
21079 +               deblk_end.deblk = p.deblk + deblk_sz;
21080 +               err = set_deblk_end(&p, &deblk_end);
21081 +       }
21082 +
21083 + out:
21084 +       return err;
21085 +}
21086 +
21087 +static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
21088 +                    unsigned int d_type, struct au_nhash *delist)
21089 +{
21090 +       int err;
21091 +       unsigned int sz;
21092 +       const unsigned int deblk_sz = vdir->vd_deblk_sz;
21093 +       union au_vdir_deblk_p p, *room, deblk_end;
21094 +       struct au_vdir_dehstr *dehstr;
21095 +
21096 +       p.deblk = last_deblk(vdir);
21097 +       deblk_end.deblk = p.deblk + deblk_sz;
21098 +       room = &vdir->vd_last.p;
21099 +       AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
21100 +                 || !is_deblk_end(room, &deblk_end));
21101 +
21102 +       sz = calc_size(nlen);
21103 +       if (unlikely(sz > deblk_end.deblk - room->deblk)) {
21104 +               err = append_deblk(vdir);
21105 +               if (unlikely(err))
21106 +                       goto out;
21107 +
21108 +               p.deblk = last_deblk(vdir);
21109 +               deblk_end.deblk = p.deblk + deblk_sz;
21110 +               /* smp_mb(); */
21111 +               AuDebugOn(room->deblk != p.deblk);
21112 +       }
21113 +
21114 +       err = -ENOMEM;
21115 +       dehstr = au_cache_alloc_dehstr();
21116 +       if (unlikely(!dehstr))
21117 +               goto out;
21118 +
21119 +       dehstr->str = &room->de->de_str;
21120 +       hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
21121 +       room->de->de_ino = ino;
21122 +       room->de->de_type = d_type;
21123 +       room->de->de_str.len = nlen;
21124 +       memcpy(room->de->de_str.name, name, nlen);
21125 +
21126 +       err = 0;
21127 +       room->deblk += sz;
21128 +       if (unlikely(set_deblk_end(room, &deblk_end)))
21129 +               err = append_deblk(vdir);
21130 +       /* smp_mb(); */
21131 +
21132 + out:
21133 +       return err;
21134 +}
21135 +
21136 +/* ---------------------------------------------------------------------- */
21137 +
21138 +void au_vdir_free(struct au_vdir *vdir)
21139 +{
21140 +       unsigned char **deblk;
21141 +
21142 +       deblk = vdir->vd_deblk;
21143 +       while (vdir->vd_nblk--)
21144 +               kfree(*deblk++);
21145 +       kfree(vdir->vd_deblk);
21146 +       au_cache_free_vdir(vdir);
21147 +}
21148 +
21149 +static struct au_vdir *alloc_vdir(struct file *file)
21150 +{
21151 +       struct au_vdir *vdir;
21152 +       struct super_block *sb;
21153 +       int err;
21154 +
21155 +       sb = file->f_dentry->d_sb;
21156 +       SiMustAnyLock(sb);
21157 +
21158 +       err = -ENOMEM;
21159 +       vdir = au_cache_alloc_vdir();
21160 +       if (unlikely(!vdir))
21161 +               goto out;
21162 +
21163 +       vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
21164 +       if (unlikely(!vdir->vd_deblk))
21165 +               goto out_free;
21166 +
21167 +       vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
21168 +       if (!vdir->vd_deblk_sz) {
21169 +               /* estimate the apropriate size for deblk */
21170 +               vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
21171 +               /* AuInfo("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
21172 +       }
21173 +       vdir->vd_nblk = 0;
21174 +       vdir->vd_version = 0;
21175 +       vdir->vd_jiffy = 0;
21176 +       err = append_deblk(vdir);
21177 +       if (!err)
21178 +               return vdir; /* success */
21179 +
21180 +       kfree(vdir->vd_deblk);
21181 +
21182 + out_free:
21183 +       au_cache_free_vdir(vdir);
21184 + out:
21185 +       vdir = ERR_PTR(err);
21186 +       return vdir;
21187 +}
21188 +
21189 +static int reinit_vdir(struct au_vdir *vdir)
21190 +{
21191 +       int err;
21192 +       union au_vdir_deblk_p p, deblk_end;
21193 +
21194 +       while (vdir->vd_nblk > 1) {
21195 +               kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
21196 +               /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
21197 +               vdir->vd_nblk--;
21198 +       }
21199 +       p.deblk = vdir->vd_deblk[0];
21200 +       deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
21201 +       err = set_deblk_end(&p, &deblk_end);
21202 +       /* keep vd_dblk_sz */
21203 +       vdir->vd_last.ul = 0;
21204 +       vdir->vd_last.p.deblk = vdir->vd_deblk[0];
21205 +       vdir->vd_version = 0;
21206 +       vdir->vd_jiffy = 0;
21207 +       /* smp_mb(); */
21208 +       return err;
21209 +}
21210 +
21211 +/* ---------------------------------------------------------------------- */
21212 +
21213 +#define AuFillVdir_CALLED      1
21214 +#define AuFillVdir_WHABLE      (1 << 1)
21215 +#define AuFillVdir_SHWH                (1 << 2)
21216 +#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
21217 +#define au_fset_fillvdir(flags, name)  { (flags) |= AuFillVdir_##name; }
21218 +#define au_fclr_fillvdir(flags, name)  { (flags) &= ~AuFillVdir_##name; }
21219 +
21220 +#ifndef CONFIG_AUFS_SHWH
21221 +#undef AuFillVdir_SHWH
21222 +#define AuFillVdir_SHWH                0
21223 +#endif
21224 +
21225 +struct fillvdir_arg {
21226 +       struct file             *file;
21227 +       struct au_vdir          *vdir;
21228 +       struct au_nhash         delist;
21229 +       struct au_nhash         whlist;
21230 +       aufs_bindex_t           bindex;
21231 +       unsigned int            flags;
21232 +       int                     err;
21233 +};
21234 +
21235 +static int fillvdir(void *__arg, const char *__name, int nlen,
21236 +                   loff_t offset __maybe_unused, u64 h_ino,
21237 +                   unsigned int d_type)
21238 +{
21239 +       struct fillvdir_arg *arg = __arg;
21240 +       char *name = (void *)__name;
21241 +       struct super_block *sb;
21242 +       ino_t ino;
21243 +       const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
21244 +
21245 +       arg->err = 0;
21246 +       sb = arg->file->f_dentry->d_sb;
21247 +       au_fset_fillvdir(arg->flags, CALLED);
21248 +       /* smp_mb(); */
21249 +       if (nlen <= AUFS_WH_PFX_LEN
21250 +           || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
21251 +               if (test_known(&arg->delist, name, nlen)
21252 +                   || au_nhash_test_known_wh(&arg->whlist, name, nlen))
21253 +                       goto out; /* already exists or whiteouted */
21254 +
21255 +               sb = arg->file->f_dentry->d_sb;
21256 +               arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
21257 +               if (!arg->err)
21258 +                       arg->err = append_de(arg->vdir, name, nlen, ino,
21259 +                                            d_type, &arg->delist);
21260 +       } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
21261 +               name += AUFS_WH_PFX_LEN;
21262 +               nlen -= AUFS_WH_PFX_LEN;
21263 +               if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
21264 +                       goto out; /* already whiteouted */
21265 +
21266 +               if (shwh)
21267 +                       arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
21268 +                                            &ino);
21269 +               if (!arg->err)
21270 +                       arg->err = au_nhash_append_wh
21271 +                               (&arg->whlist, name, nlen, ino, d_type,
21272 +                                arg->bindex, shwh);
21273 +       }
21274 +
21275 + out:
21276 +       if (!arg->err)
21277 +               arg->vdir->vd_jiffy = jiffies;
21278 +       /* smp_mb(); */
21279 +       AuTraceErr(arg->err);
21280 +       return arg->err;
21281 +}
21282 +
21283 +static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
21284 +                         struct au_nhash *whlist, struct au_nhash *delist)
21285 +{
21286 +#ifdef CONFIG_AUFS_SHWH
21287 +       int err;
21288 +       unsigned int nh, u;
21289 +       struct hlist_head *head;
21290 +       struct au_vdir_wh *tpos;
21291 +       struct hlist_node *pos, *n;
21292 +       char *p, *o;
21293 +       struct au_vdir_destr *destr;
21294 +
21295 +       AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
21296 +
21297 +       err = -ENOMEM;
21298 +       o = p = __getname();
21299 +       if (unlikely(!p))
21300 +               goto out;
21301 +
21302 +       err = 0;
21303 +       nh = whlist->nh_num;
21304 +       memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
21305 +       p += AUFS_WH_PFX_LEN;
21306 +       for (u = 0; u < nh; u++) {
21307 +               head = whlist->nh_head + u;
21308 +               hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) {
21309 +                       destr = &tpos->wh_str;
21310 +                       memcpy(p, destr->name, destr->len);
21311 +                       err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
21312 +                                       tpos->wh_ino, tpos->wh_type, delist);
21313 +                       if (unlikely(err))
21314 +                               break;
21315 +               }
21316 +       }
21317 +
21318 +       __putname(o);
21319 +
21320 + out:
21321 +       AuTraceErr(err);
21322 +       return err;
21323 +#else
21324 +       return 0;
21325 +#endif
21326 +}
21327 +
21328 +static int au_do_read_vdir(struct fillvdir_arg *arg)
21329 +{
21330 +       int err;
21331 +       unsigned int rdhash;
21332 +       loff_t offset;
21333 +       aufs_bindex_t bend, bindex, bstart;
21334 +       unsigned char shwh;
21335 +       struct file *hf, *file;
21336 +       struct super_block *sb;
21337 +
21338 +       file = arg->file;
21339 +       sb = file->f_dentry->d_sb;
21340 +       SiMustAnyLock(sb);
21341 +
21342 +       rdhash = au_sbi(sb)->si_rdhash;
21343 +       if (!rdhash)
21344 +               rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
21345 +       err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
21346 +       if (unlikely(err))
21347 +               goto out;
21348 +       err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
21349 +       if (unlikely(err))
21350 +               goto out_delist;
21351 +
21352 +       err = 0;
21353 +       arg->flags = 0;
21354 +       shwh = 0;
21355 +       if (au_opt_test(au_mntflags(sb), SHWH)) {
21356 +               shwh = 1;
21357 +               au_fset_fillvdir(arg->flags, SHWH);
21358 +       }
21359 +       bstart = au_fbstart(file);
21360 +       bend = au_fbend(file);
21361 +       for (bindex = bstart; !err && bindex <= bend; bindex++) {
21362 +               hf = au_h_fptr(file, bindex);
21363 +               if (!hf)
21364 +                       continue;
21365 +
21366 +               offset = vfsub_llseek(hf, 0, SEEK_SET);
21367 +               err = offset;
21368 +               if (unlikely(offset))
21369 +                       break;
21370 +
21371 +               arg->bindex = bindex;
21372 +               au_fclr_fillvdir(arg->flags, WHABLE);
21373 +               if (shwh
21374 +                   || (bindex != bend
21375 +                       && au_br_whable(au_sbr_perm(sb, bindex))))
21376 +                       au_fset_fillvdir(arg->flags, WHABLE);
21377 +               do {
21378 +                       arg->err = 0;
21379 +                       au_fclr_fillvdir(arg->flags, CALLED);
21380 +                       /* smp_mb(); */
21381 +                       err = vfsub_readdir(hf, fillvdir, arg);
21382 +                       if (err >= 0)
21383 +                               err = arg->err;
21384 +               } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
21385 +       }
21386 +
21387 +       if (!err && shwh)
21388 +               err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
21389 +
21390 +       au_nhash_wh_free(&arg->whlist);
21391 +
21392 + out_delist:
21393 +       au_nhash_de_free(&arg->delist);
21394 + out:
21395 +       return err;
21396 +}
21397 +
21398 +static int read_vdir(struct file *file, int may_read)
21399 +{
21400 +       int err;
21401 +       unsigned long expire;
21402 +       unsigned char do_read;
21403 +       struct fillvdir_arg arg;
21404 +       struct inode *inode;
21405 +       struct au_vdir *vdir, *allocated;
21406 +
21407 +       err = 0;
21408 +       inode = file->f_dentry->d_inode;
21409 +       IMustLock(inode);
21410 +       SiMustAnyLock(inode->i_sb);
21411 +
21412 +       allocated = NULL;
21413 +       do_read = 0;
21414 +       expire = au_sbi(inode->i_sb)->si_rdcache;
21415 +       vdir = au_ivdir(inode);
21416 +       if (!vdir) {
21417 +               do_read = 1;
21418 +               vdir = alloc_vdir(file);
21419 +               err = PTR_ERR(vdir);
21420 +               if (IS_ERR(vdir))
21421 +                       goto out;
21422 +               err = 0;
21423 +               allocated = vdir;
21424 +       } else if (may_read
21425 +                  && (inode->i_version != vdir->vd_version
21426 +                      || time_after(jiffies, vdir->vd_jiffy + expire))) {
21427 +               do_read = 1;
21428 +               err = reinit_vdir(vdir);
21429 +               if (unlikely(err))
21430 +                       goto out;
21431 +       }
21432 +
21433 +       if (!do_read)
21434 +               return 0; /* success */
21435 +
21436 +       arg.file = file;
21437 +       arg.vdir = vdir;
21438 +       err = au_do_read_vdir(&arg);
21439 +       if (!err) {
21440 +               /* file->f_pos = 0; */
21441 +               vdir->vd_version = inode->i_version;
21442 +               vdir->vd_last.ul = 0;
21443 +               vdir->vd_last.p.deblk = vdir->vd_deblk[0];
21444 +               if (allocated)
21445 +                       au_set_ivdir(inode, allocated);
21446 +       } else if (allocated)
21447 +               au_vdir_free(allocated);
21448 +
21449 + out:
21450 +       return err;
21451 +}
21452 +
21453 +static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
21454 +{
21455 +       int err, rerr;
21456 +       unsigned long ul, n;
21457 +       const unsigned int deblk_sz = src->vd_deblk_sz;
21458 +
21459 +       AuDebugOn(tgt->vd_nblk != 1);
21460 +
21461 +       err = -ENOMEM;
21462 +       if (tgt->vd_nblk < src->vd_nblk) {
21463 +               unsigned char **p;
21464 +
21465 +               p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
21466 +                            GFP_NOFS);
21467 +               if (unlikely(!p))
21468 +                       goto out;
21469 +               tgt->vd_deblk = p;
21470 +       }
21471 +
21472 +       if (tgt->vd_deblk_sz != deblk_sz) {
21473 +               unsigned char *p;
21474 +
21475 +               tgt->vd_deblk_sz = deblk_sz;
21476 +               p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
21477 +               if (unlikely(!p))
21478 +                       goto out;
21479 +               tgt->vd_deblk[0] = p;
21480 +       }
21481 +       memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
21482 +       tgt->vd_version = src->vd_version;
21483 +       tgt->vd_jiffy = src->vd_jiffy;
21484 +
21485 +       n = src->vd_nblk;
21486 +       for (ul = 1; ul < n; ul++) {
21487 +               tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
21488 +                                           GFP_NOFS);
21489 +               if (unlikely(!tgt->vd_deblk[ul]))
21490 +                       goto out;
21491 +               tgt->vd_nblk++;
21492 +       }
21493 +       tgt->vd_nblk = n;
21494 +       tgt->vd_last.ul = tgt->vd_last.ul;
21495 +       tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
21496 +       tgt->vd_last.p.deblk += src->vd_last.p.deblk
21497 +               - src->vd_deblk[src->vd_last.ul];
21498 +       /* smp_mb(); */
21499 +       return 0; /* success */
21500 +
21501 + out:
21502 +       rerr = reinit_vdir(tgt);
21503 +       BUG_ON(rerr);
21504 +       return err;
21505 +}
21506 +
21507 +int au_vdir_init(struct file *file)
21508 +{
21509 +       int err;
21510 +       struct inode *inode;
21511 +       struct au_vdir *vdir_cache, *allocated;
21512 +
21513 +       err = read_vdir(file, !file->f_pos);
21514 +       if (unlikely(err))
21515 +               goto out;
21516 +
21517 +       allocated = NULL;
21518 +       vdir_cache = au_fvdir_cache(file);
21519 +       if (!vdir_cache) {
21520 +               vdir_cache = alloc_vdir(file);
21521 +               err = PTR_ERR(vdir_cache);
21522 +               if (IS_ERR(vdir_cache))
21523 +                       goto out;
21524 +               allocated = vdir_cache;
21525 +       } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
21526 +               err = reinit_vdir(vdir_cache);
21527 +               if (unlikely(err))
21528 +                       goto out;
21529 +       } else
21530 +               return 0; /* success */
21531 +
21532 +       inode = file->f_dentry->d_inode;
21533 +       err = copy_vdir(vdir_cache, au_ivdir(inode));
21534 +       if (!err) {
21535 +               file->f_version = inode->i_version;
21536 +               if (allocated)
21537 +                       au_set_fvdir_cache(file, allocated);
21538 +       } else if (allocated)
21539 +               au_vdir_free(allocated);
21540 +
21541 + out:
21542 +       return err;
21543 +}
21544 +
21545 +static loff_t calc_offset(struct au_vdir *vdir)
21546 +{
21547 +       loff_t offset;
21548 +       union au_vdir_deblk_p p;
21549 +
21550 +       p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
21551 +       offset = vdir->vd_last.p.deblk - p.deblk;
21552 +       offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
21553 +       return offset;
21554 +}
21555 +
21556 +/* returns true or false */
21557 +static int seek_vdir(struct file *file)
21558 +{
21559 +       int valid;
21560 +       unsigned int deblk_sz;
21561 +       unsigned long ul, n;
21562 +       loff_t offset;
21563 +       union au_vdir_deblk_p p, deblk_end;
21564 +       struct au_vdir *vdir_cache;
21565 +
21566 +       valid = 1;
21567 +       vdir_cache = au_fvdir_cache(file);
21568 +       offset = calc_offset(vdir_cache);
21569 +       AuDbg("offset %lld\n", offset);
21570 +       if (file->f_pos == offset)
21571 +               goto out;
21572 +
21573 +       vdir_cache->vd_last.ul = 0;
21574 +       vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
21575 +       if (!file->f_pos)
21576 +               goto out;
21577 +
21578 +       valid = 0;
21579 +       deblk_sz = vdir_cache->vd_deblk_sz;
21580 +       ul = div64_u64(file->f_pos, deblk_sz);
21581 +       AuDbg("ul %lu\n", ul);
21582 +       if (ul >= vdir_cache->vd_nblk)
21583 +               goto out;
21584 +
21585 +       n = vdir_cache->vd_nblk;
21586 +       for (; ul < n; ul++) {
21587 +               p.deblk = vdir_cache->vd_deblk[ul];
21588 +               deblk_end.deblk = p.deblk + deblk_sz;
21589 +               offset = ul;
21590 +               offset *= deblk_sz;
21591 +               while (!is_deblk_end(&p, &deblk_end) && offset < file->f_pos) {
21592 +                       unsigned int l;
21593 +
21594 +                       l = calc_size(p.de->de_str.len);
21595 +                       offset += l;
21596 +                       p.deblk += l;
21597 +               }
21598 +               if (!is_deblk_end(&p, &deblk_end)) {
21599 +                       valid = 1;
21600 +                       vdir_cache->vd_last.ul = ul;
21601 +                       vdir_cache->vd_last.p = p;
21602 +                       break;
21603 +               }
21604 +       }
21605 +
21606 + out:
21607 +       /* smp_mb(); */
21608 +       AuTraceErr(!valid);
21609 +       return valid;
21610 +}
21611 +
21612 +int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir)
21613 +{
21614 +       int err;
21615 +       unsigned int l, deblk_sz;
21616 +       union au_vdir_deblk_p deblk_end;
21617 +       struct au_vdir *vdir_cache;
21618 +       struct au_vdir_de *de;
21619 +
21620 +       vdir_cache = au_fvdir_cache(file);
21621 +       if (!seek_vdir(file))
21622 +               return 0;
21623 +
21624 +       deblk_sz = vdir_cache->vd_deblk_sz;
21625 +       while (1) {
21626 +               deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
21627 +               deblk_end.deblk += deblk_sz;
21628 +               while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
21629 +                       de = vdir_cache->vd_last.p.de;
21630 +                       AuDbg("%.*s, off%lld, i%lu, dt%d\n",
21631 +                             de->de_str.len, de->de_str.name, file->f_pos,
21632 +                             (unsigned long)de->de_ino, de->de_type);
21633 +                       err = filldir(dirent, de->de_str.name, de->de_str.len,
21634 +                                     file->f_pos, de->de_ino, de->de_type);
21635 +                       if (unlikely(err)) {
21636 +                               AuTraceErr(err);
21637 +                               /* todo: ignore the error caused by udba? */
21638 +                               /* return err; */
21639 +                               return 0;
21640 +                       }
21641 +
21642 +                       l = calc_size(de->de_str.len);
21643 +                       vdir_cache->vd_last.p.deblk += l;
21644 +                       file->f_pos += l;
21645 +               }
21646 +               if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
21647 +                       vdir_cache->vd_last.ul++;
21648 +                       vdir_cache->vd_last.p.deblk
21649 +                               = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
21650 +                       file->f_pos = deblk_sz * vdir_cache->vd_last.ul;
21651 +                       continue;
21652 +               }
21653 +               break;
21654 +       }
21655 +
21656 +       /* smp_mb(); */
21657 +       return 0;
21658 +}
21659 diff -uprN -x .git linux-2.6.31/fs/aufs/vfsub.c aufs2-2.6.git/fs/aufs/vfsub.c
21660 --- linux-2.6.31/fs/aufs/vfsub.c        1970-01-01 00:00:00.000000000 +0000
21661 +++ aufs2-2.6.git/fs/aufs/vfsub.c       2009-09-21 21:49:23.411607814 +0000
21662 @@ -0,0 +1,751 @@
21663 +/*
21664 + * Copyright (C) 2005-2009 Junjiro R. Okajima
21665 + *
21666 + * This program, aufs is free software; you can redistribute it and/or modify
21667 + * it under the terms of the GNU General Public License as published by
21668 + * the Free Software Foundation; either version 2 of the License, or
21669 + * (at your option) any later version.
21670 + *
21671 + * This program is distributed in the hope that it will be useful,
21672 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21673 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21674 + * GNU General Public License for more details.
21675 + *
21676 + * You should have received a copy of the GNU General Public License
21677 + * along with this program; if not, write to the Free Software
21678 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21679 + */
21680 +
21681 +/*
21682 + * sub-routines for VFS
21683 + */
21684 +
21685 +#include <linux/ima.h>
21686 +#include <linux/namei.h>
21687 +#include <linux/security.h>
21688 +#include <linux/splice.h>
21689 +#include <linux/uaccess.h>
21690 +#include "aufs.h"
21691 +
21692 +int vfsub_update_h_iattr(struct path *h_path, int *did)
21693 +{
21694 +       int err;
21695 +       struct kstat st;
21696 +       struct super_block *h_sb;
21697 +
21698 +       /* for remote fs, leave work for its getattr or d_revalidate */
21699 +       /* for bad i_attr fs, handle them in aufs_getattr() */
21700 +       /* still some fs may acquire i_mutex. we need to skip them */
21701 +       err = 0;
21702 +       if (!did)
21703 +               did = &err;
21704 +       h_sb = h_path->dentry->d_sb;
21705 +       *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
21706 +       if (*did)
21707 +               err = vfs_getattr(h_path->mnt, h_path->dentry, &st);
21708 +
21709 +       return err;
21710 +}
21711 +
21712 +/* ---------------------------------------------------------------------- */
21713 +
21714 +struct file *vfsub_dentry_open(struct path *path, int flags,
21715 +                              const struct cred *cred)
21716 +{
21717 +       struct file *file;
21718 +
21719 +       file = dentry_open(path->dentry, path->mnt, flags, cred);
21720 +       if (IS_ERR(file))
21721 +               return file;
21722 +       /* as NFSD does, just call ima_..._get() simply after dentry_open */
21723 +       ima_counts_get(file);
21724 +       return file;
21725 +}
21726 +
21727 +struct file *vfsub_filp_open(const char *path, int oflags, int mode)
21728 +{
21729 +       struct file *file;
21730 +
21731 +       lockdep_off();
21732 +       file = filp_open(path, oflags, mode);
21733 +       lockdep_on();
21734 +       if (IS_ERR(file))
21735 +               goto out;
21736 +       vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
21737 +
21738 + out:
21739 +       return file;
21740 +}
21741 +
21742 +int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
21743 +{
21744 +       int err;
21745 +
21746 +       /* lockdep_off(); */
21747 +       err = kern_path(name, flags, path);
21748 +       /* lockdep_on(); */
21749 +       if (!err && path->dentry->d_inode)
21750 +               vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
21751 +       return err;
21752 +}
21753 +
21754 +struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
21755 +                                   int len)
21756 +{
21757 +       struct path path = {
21758 +               .mnt = NULL
21759 +       };
21760 +
21761 +       /* VFS checks it too, but by WARN_ON_ONCE() */
21762 +       IMustLock(parent->d_inode);
21763 +
21764 +       path.dentry = lookup_one_len(name, parent, len);
21765 +       if (IS_ERR(path.dentry))
21766 +               goto out;
21767 +       if (path.dentry->d_inode)
21768 +               vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
21769 +
21770 + out:
21771 +       return path.dentry;
21772 +}
21773 +
21774 +struct dentry *vfsub_lookup_hash(struct nameidata *nd)
21775 +{
21776 +       struct path path = {
21777 +               .mnt = nd->path.mnt
21778 +       };
21779 +
21780 +       IMustLock(nd->path.dentry->d_inode);
21781 +
21782 +       path.dentry = lookup_hash(nd);
21783 +       if (!IS_ERR(path.dentry) && path.dentry->d_inode)
21784 +               vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
21785 +
21786 +       return path.dentry;
21787 +}
21788 +
21789 +/* ---------------------------------------------------------------------- */
21790 +
21791 +struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
21792 +                                struct dentry *d2, struct au_hinode *hdir2)
21793 +{
21794 +       struct dentry *d;
21795 +
21796 +       lockdep_off();
21797 +       d = lock_rename(d1, d2);
21798 +       lockdep_on();
21799 +       au_hin_suspend(hdir1);
21800 +       if (hdir1 != hdir2)
21801 +               au_hin_suspend(hdir2);
21802 +
21803 +       return d;
21804 +}
21805 +
21806 +void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
21807 +                        struct dentry *d2, struct au_hinode *hdir2)
21808 +{
21809 +       au_hin_resume(hdir1);
21810 +       if (hdir1 != hdir2)
21811 +               au_hin_resume(hdir2);
21812 +       lockdep_off();
21813 +       unlock_rename(d1, d2);
21814 +       lockdep_on();
21815 +}
21816 +
21817 +/* ---------------------------------------------------------------------- */
21818 +
21819 +int vfsub_create(struct inode *dir, struct path *path, int mode)
21820 +{
21821 +       int err;
21822 +       struct dentry *d;
21823 +
21824 +       IMustLock(dir);
21825 +
21826 +       d = path->dentry;
21827 +       path->dentry = d->d_parent;
21828 +       err = security_path_mknod(path, path->dentry, mode, 0);
21829 +       path->dentry = d;
21830 +       if (unlikely(err))
21831 +               goto out;
21832 +
21833 +       if (au_test_fs_null_nd(dir->i_sb))
21834 +               err = vfs_create(dir, path->dentry, mode, NULL);
21835 +       else {
21836 +               struct nameidata h_nd;
21837 +
21838 +               memset(&h_nd, 0, sizeof(h_nd));
21839 +               h_nd.flags = LOOKUP_CREATE;
21840 +               h_nd.intent.open.flags = O_CREAT
21841 +                       | vfsub_fmode_to_uint(FMODE_READ);
21842 +               h_nd.intent.open.create_mode = mode;
21843 +               h_nd.path.dentry = path->dentry->d_parent;
21844 +               h_nd.path.mnt = path->mnt;
21845 +               path_get(&h_nd.path);
21846 +               err = vfs_create(dir, path->dentry, mode, &h_nd);
21847 +               path_put(&h_nd.path);
21848 +       }
21849 +
21850 +       if (!err) {
21851 +               struct path tmp = *path;
21852 +               int did;
21853 +
21854 +               vfsub_update_h_iattr(&tmp, &did);
21855 +               if (did) {
21856 +                       tmp.dentry = path->dentry->d_parent;
21857 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
21858 +               }
21859 +               /*ignore*/
21860 +       }
21861 +
21862 + out:
21863 +       return err;
21864 +}
21865 +
21866 +int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
21867 +{
21868 +       int err;
21869 +       struct dentry *d;
21870 +
21871 +       IMustLock(dir);
21872 +
21873 +       d = path->dentry;
21874 +       path->dentry = d->d_parent;
21875 +       err = security_path_symlink(path, path->dentry, symname);
21876 +       path->dentry = d;
21877 +       if (unlikely(err))
21878 +               goto out;
21879 +
21880 +       err = vfs_symlink(dir, path->dentry, symname);
21881 +       if (!err) {
21882 +               struct path tmp = *path;
21883 +               int did;
21884 +
21885 +               vfsub_update_h_iattr(&tmp, &did);
21886 +               if (did) {
21887 +                       tmp.dentry = path->dentry->d_parent;
21888 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
21889 +               }
21890 +               /*ignore*/
21891 +       }
21892 +
21893 + out:
21894 +       return err;
21895 +}
21896 +
21897 +int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
21898 +{
21899 +       int err;
21900 +       struct dentry *d;
21901 +
21902 +       IMustLock(dir);
21903 +
21904 +       d = path->dentry;
21905 +       path->dentry = d->d_parent;
21906 +       err = security_path_mknod(path, path->dentry, mode, dev);
21907 +       path->dentry = d;
21908 +       if (unlikely(err))
21909 +               goto out;
21910 +
21911 +       err = vfs_mknod(dir, path->dentry, mode, dev);
21912 +       if (!err) {
21913 +               struct path tmp = *path;
21914 +               int did;
21915 +
21916 +               vfsub_update_h_iattr(&tmp, &did);
21917 +               if (did) {
21918 +                       tmp.dentry = path->dentry->d_parent;
21919 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
21920 +               }
21921 +               /*ignore*/
21922 +       }
21923 +
21924 + out:
21925 +       return err;
21926 +}
21927 +
21928 +static int au_test_nlink(struct inode *inode)
21929 +{
21930 +       const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
21931 +
21932 +       if (!au_test_fs_no_limit_nlink(inode->i_sb)
21933 +           || inode->i_nlink < link_max)
21934 +               return 0;
21935 +       return -EMLINK;
21936 +}
21937 +
21938 +int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path)
21939 +{
21940 +       int err;
21941 +       struct dentry *d;
21942 +
21943 +       IMustLock(dir);
21944 +
21945 +       err = au_test_nlink(src_dentry->d_inode);
21946 +       if (unlikely(err))
21947 +               return err;
21948 +
21949 +       d = path->dentry;
21950 +       path->dentry = d->d_parent;
21951 +       err = security_path_link(src_dentry, path, path->dentry);
21952 +       path->dentry = d;
21953 +       if (unlikely(err))
21954 +               goto out;
21955 +
21956 +       lockdep_off();
21957 +       err = vfs_link(src_dentry, dir, path->dentry);
21958 +       lockdep_on();
21959 +       if (!err) {
21960 +               struct path tmp = *path;
21961 +               int did;
21962 +
21963 +               /* fuse has different memory inode for the same inumber */
21964 +               vfsub_update_h_iattr(&tmp, &did);
21965 +               if (did) {
21966 +                       tmp.dentry = path->dentry->d_parent;
21967 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
21968 +                       tmp.dentry = src_dentry;
21969 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
21970 +               }
21971 +               /*ignore*/
21972 +       }
21973 +
21974 + out:
21975 +       return err;
21976 +}
21977 +
21978 +int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
21979 +                struct inode *dir, struct path *path)
21980 +{
21981 +       int err;
21982 +       struct path tmp = {
21983 +               .mnt    = path->mnt
21984 +       };
21985 +       struct dentry *d;
21986 +
21987 +       IMustLock(dir);
21988 +       IMustLock(src_dir);
21989 +
21990 +       d = path->dentry;
21991 +       path->dentry = d->d_parent;
21992 +       tmp.dentry = src_dentry->d_parent;
21993 +       err = security_path_rename(&tmp, src_dentry, path, path->dentry);
21994 +       path->dentry = d;
21995 +       if (unlikely(err))
21996 +               goto out;
21997 +
21998 +       lockdep_off();
21999 +       err = vfs_rename(src_dir, src_dentry, dir, path->dentry);
22000 +       lockdep_on();
22001 +       if (!err) {
22002 +               int did;
22003 +
22004 +               tmp.dentry = d->d_parent;
22005 +               vfsub_update_h_iattr(&tmp, &did);
22006 +               if (did) {
22007 +                       tmp.dentry = src_dentry;
22008 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
22009 +                       tmp.dentry = src_dentry->d_parent;
22010 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
22011 +               }
22012 +               /*ignore*/
22013 +       }
22014 +
22015 + out:
22016 +       return err;
22017 +}
22018 +
22019 +int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
22020 +{
22021 +       int err;
22022 +       struct dentry *d;
22023 +
22024 +       IMustLock(dir);
22025 +
22026 +       d = path->dentry;
22027 +       path->dentry = d->d_parent;
22028 +       err = security_path_mkdir(path, path->dentry, mode);
22029 +       path->dentry = d;
22030 +       if (unlikely(err))
22031 +               goto out;
22032 +
22033 +       err = vfs_mkdir(dir, path->dentry, mode);
22034 +       if (!err) {
22035 +               struct path tmp = *path;
22036 +               int did;
22037 +
22038 +               vfsub_update_h_iattr(&tmp, &did);
22039 +               if (did) {
22040 +                       tmp.dentry = path->dentry->d_parent;
22041 +                       vfsub_update_h_iattr(&tmp, /*did*/NULL);
22042 +               }
22043 +               /*ignore*/
22044 +       }
22045 +
22046 + out:
22047 +       return err;
22048 +}
22049 +
22050 +int vfsub_rmdir(struct inode *dir, struct path *path)
22051 +{
22052 +       int err;
22053 +       struct dentry *d;
22054 +
22055 +       IMustLock(dir);
22056 +
22057 +       d = path->dentry;
22058 +       path->dentry = d->d_parent;
22059 +       err = security_path_rmdir(path, path->dentry);
22060 +       path->dentry = d;
22061 +       if (unlikely(err))
22062 +               goto out;
22063 +
22064 +       lockdep_off();
22065 +       err = vfs_rmdir(dir, path->dentry);
22066 +       lockdep_on();
22067 +       if (!err) {
22068 +               struct path tmp = {
22069 +                       .dentry = path->dentry->d_parent,
22070 +                       .mnt    = path->mnt
22071 +               };
22072 +
22073 +               vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
22074 +       }
22075 +
22076 + out:
22077 +       return err;
22078 +}
22079 +
22080 +/* ---------------------------------------------------------------------- */
22081 +
22082 +ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
22083 +                    loff_t *ppos)
22084 +{
22085 +       ssize_t err;
22086 +
22087 +       err = vfs_read(file, ubuf, count, ppos);
22088 +       if (err >= 0)
22089 +               vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
22090 +       return err;
22091 +}
22092 +
22093 +/* todo: kernel_read()? */
22094 +ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
22095 +                    loff_t *ppos)
22096 +{
22097 +       ssize_t err;
22098 +       mm_segment_t oldfs;
22099 +
22100 +       oldfs = get_fs();
22101 +       set_fs(KERNEL_DS);
22102 +       err = vfsub_read_u(file, (char __user *)kbuf, count, ppos);
22103 +       set_fs(oldfs);
22104 +       return err;
22105 +}
22106 +
22107 +ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
22108 +                     loff_t *ppos)
22109 +{
22110 +       ssize_t err;
22111 +
22112 +       lockdep_off();
22113 +       err = vfs_write(file, ubuf, count, ppos);
22114 +       lockdep_on();
22115 +       if (err >= 0)
22116 +               vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
22117 +       return err;
22118 +}
22119 +
22120 +ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
22121 +{
22122 +       ssize_t err;
22123 +       mm_segment_t oldfs;
22124 +
22125 +       oldfs = get_fs();
22126 +       set_fs(KERNEL_DS);
22127 +       err = vfsub_write_u(file, (const char __user *)kbuf, count, ppos);
22128 +       set_fs(oldfs);
22129 +       return err;
22130 +}
22131 +
22132 +int vfsub_readdir(struct file *file, filldir_t filldir, void *arg)
22133 +{
22134 +       int err;
22135 +
22136 +       lockdep_off();
22137 +       err = vfs_readdir(file, filldir, arg);
22138 +       lockdep_on();
22139 +       if (err >= 0)
22140 +               vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
22141 +       return err;
22142 +}
22143 +
22144 +long vfsub_splice_to(struct file *in, loff_t *ppos,
22145 +                    struct pipe_inode_info *pipe, size_t len,
22146 +                    unsigned int flags)
22147 +{
22148 +       long err;
22149 +
22150 +       lockdep_off();
22151 +       err = do_splice_to(in, ppos, pipe, len, flags);
22152 +       lockdep_on();
22153 +       if (err >= 0)
22154 +               vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
22155 +       return err;
22156 +}
22157 +
22158 +long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
22159 +                      loff_t *ppos, size_t len, unsigned int flags)
22160 +{
22161 +       long err;
22162 +
22163 +       lockdep_off();
22164 +       err = do_splice_from(pipe, out, ppos, len, flags);
22165 +       lockdep_on();
22166 +       if (err >= 0)
22167 +               vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
22168 +       return err;
22169 +}
22170 +
22171 +/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
22172 +int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
22173 +               struct file *h_file)
22174 +{
22175 +       int err;
22176 +       struct inode *h_inode;
22177 +
22178 +       h_inode = h_path->dentry->d_inode;
22179 +       if (!h_file) {
22180 +               err = mnt_want_write(h_path->mnt);
22181 +               if (err)
22182 +                       goto out;
22183 +               err = inode_permission(h_inode, MAY_WRITE);
22184 +               if (err)
22185 +                       goto out_mnt;
22186 +               err = get_write_access(h_inode);
22187 +               if (err)
22188 +                       goto out_mnt;
22189 +               err = break_lease(h_inode, vfsub_fmode_to_uint(FMODE_WRITE));
22190 +               if (err)
22191 +                       goto out_inode;
22192 +       }
22193 +
22194 +       err = locks_verify_truncate(h_inode, h_file, length);
22195 +       if (!err)
22196 +               err = security_path_truncate(h_path, length, attr);
22197 +       if (!err) {
22198 +               lockdep_off();
22199 +               err = do_truncate(h_path->dentry, length, attr, h_file);
22200 +               lockdep_on();
22201 +       }
22202 +
22203 + out_inode:
22204 +       if (!h_file)
22205 +               put_write_access(h_inode);
22206 + out_mnt:
22207 +       if (!h_file)
22208 +               mnt_drop_write(h_path->mnt);
22209 + out:
22210 +       return err;
22211 +}
22212 +
22213 +/* ---------------------------------------------------------------------- */
22214 +
22215 +struct au_vfsub_mkdir_args {
22216 +       int *errp;
22217 +       struct inode *dir;
22218 +       struct path *path;
22219 +       int mode;
22220 +};
22221 +
22222 +static void au_call_vfsub_mkdir(void *args)
22223 +{
22224 +       struct au_vfsub_mkdir_args *a = args;
22225 +       *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
22226 +}
22227 +
22228 +int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
22229 +{
22230 +       int err, do_sio, wkq_err;
22231 +
22232 +       do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
22233 +       if (!do_sio)
22234 +               err = vfsub_mkdir(dir, path, mode);
22235 +       else {
22236 +               struct au_vfsub_mkdir_args args = {
22237 +                       .errp   = &err,
22238 +                       .dir    = dir,
22239 +                       .path   = path,
22240 +                       .mode   = mode
22241 +               };
22242 +               wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
22243 +               if (unlikely(wkq_err))
22244 +                       err = wkq_err;
22245 +       }
22246 +
22247 +       return err;
22248 +}
22249 +
22250 +struct au_vfsub_rmdir_args {
22251 +       int *errp;
22252 +       struct inode *dir;
22253 +       struct path *path;
22254 +};
22255 +
22256 +static void au_call_vfsub_rmdir(void *args)
22257 +{
22258 +       struct au_vfsub_rmdir_args *a = args;
22259 +       *a->errp = vfsub_rmdir(a->dir, a->path);
22260 +}
22261 +
22262 +int vfsub_sio_rmdir(struct inode *dir, struct path *path)
22263 +{
22264 +       int err, do_sio, wkq_err;
22265 +
22266 +       do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
22267 +       if (!do_sio)
22268 +               err = vfsub_rmdir(dir, path);
22269 +       else {
22270 +               struct au_vfsub_rmdir_args args = {
22271 +                       .errp   = &err,
22272 +                       .dir    = dir,
22273 +                       .path   = path
22274 +               };
22275 +               wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
22276 +               if (unlikely(wkq_err))
22277 +                       err = wkq_err;
22278 +       }
22279 +
22280 +       return err;
22281 +}
22282 +
22283 +/* ---------------------------------------------------------------------- */
22284 +
22285 +struct notify_change_args {
22286 +       int *errp;
22287 +       struct path *path;
22288 +       struct iattr *ia;
22289 +};
22290 +
22291 +static void call_notify_change(void *args)
22292 +{
22293 +       struct notify_change_args *a = args;
22294 +       struct inode *h_inode;
22295 +
22296 +       h_inode = a->path->dentry->d_inode;
22297 +       IMustLock(h_inode);
22298 +
22299 +       *a->errp = -EPERM;
22300 +       if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
22301 +               lockdep_off();
22302 +               *a->errp = notify_change(a->path->dentry, a->ia);
22303 +               lockdep_on();
22304 +               if (!*a->errp)
22305 +                       vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
22306 +       }
22307 +       AuTraceErr(*a->errp);
22308 +}
22309 +
22310 +int vfsub_notify_change(struct path *path, struct iattr *ia)
22311 +{
22312 +       int err;
22313 +       struct notify_change_args args = {
22314 +               .errp   = &err,
22315 +               .path   = path,
22316 +               .ia     = ia
22317 +       };
22318 +
22319 +       call_notify_change(&args);
22320 +
22321 +       return err;
22322 +}
22323 +
22324 +int vfsub_sio_notify_change(struct path *path, struct iattr *ia)
22325 +{
22326 +       int err, wkq_err;
22327 +       struct notify_change_args args = {
22328 +               .errp   = &err,
22329 +               .path   = path,
22330 +               .ia     = ia
22331 +       };
22332 +
22333 +       wkq_err = au_wkq_wait(call_notify_change, &args);
22334 +       if (unlikely(wkq_err))
22335 +               err = wkq_err;
22336 +
22337 +       return err;
22338 +}
22339 +
22340 +/* ---------------------------------------------------------------------- */
22341 +
22342 +struct unlink_args {
22343 +       int *errp;
22344 +       struct inode *dir;
22345 +       struct path *path;
22346 +};
22347 +
22348 +static void call_unlink(void *args)
22349 +{
22350 +       struct unlink_args *a = args;
22351 +       struct dentry *d = a->path->dentry;
22352 +       struct inode *h_inode;
22353 +       const int stop_sillyrename = (au_test_nfs(d->d_sb)
22354 +                                     && atomic_read(&d->d_count) == 1);
22355 +
22356 +       IMustLock(a->dir);
22357 +
22358 +       a->path->dentry = d->d_parent;
22359 +       *a->errp = security_path_unlink(a->path, d);
22360 +       a->path->dentry = d;
22361 +       if (unlikely(*a->errp))
22362 +               return;
22363 +
22364 +       if (!stop_sillyrename)
22365 +               dget(d);
22366 +       h_inode = d->d_inode;
22367 +       if (h_inode)
22368 +               atomic_inc(&h_inode->i_count);
22369 +
22370 +       lockdep_off();
22371 +       *a->errp = vfs_unlink(a->dir, d);
22372 +       lockdep_on();
22373 +       if (!*a->errp) {
22374 +               struct path tmp = {
22375 +                       .dentry = d->d_parent,
22376 +                       .mnt    = a->path->mnt
22377 +               };
22378 +               vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
22379 +       }
22380 +
22381 +       if (!stop_sillyrename)
22382 +               dput(d);
22383 +       if (h_inode)
22384 +               iput(h_inode);
22385 +
22386 +       AuTraceErr(*a->errp);
22387 +}
22388 +
22389 +/*
22390 + * @dir: must be locked.
22391 + * @dentry: target dentry.
22392 + */
22393 +int vfsub_unlink(struct inode *dir, struct path *path, int force)
22394 +{
22395 +       int err;
22396 +       struct unlink_args args = {
22397 +               .errp   = &err,
22398 +               .dir    = dir,
22399 +               .path   = path
22400 +       };
22401 +
22402 +       if (!force)
22403 +               call_unlink(&args);
22404 +       else {
22405 +               int wkq_err;
22406 +
22407 +               wkq_err = au_wkq_wait(call_unlink, &args);
22408 +               if (unlikely(wkq_err))
22409 +                       err = wkq_err;
22410 +       }
22411 +
22412 +       return err;
22413 +}
22414 diff -uprN -x .git linux-2.6.31/fs/aufs/vfsub.h aufs2-2.6.git/fs/aufs/vfsub.h
22415 --- linux-2.6.31/fs/aufs/vfsub.h        1970-01-01 00:00:00.000000000 +0000
22416 +++ aufs2-2.6.git/fs/aufs/vfsub.h       2009-09-21 21:49:23.411607814 +0000
22417 @@ -0,0 +1,172 @@
22418 +/*
22419 + * Copyright (C) 2005-2009 Junjiro R. Okajima
22420 + *
22421 + * This program, aufs is free software; you can redistribute it and/or modify
22422 + * it under the terms of the GNU General Public License as published by
22423 + * the Free Software Foundation; either version 2 of the License, or
22424 + * (at your option) any later version.
22425 + *
22426 + * This program is distributed in the hope that it will be useful,
22427 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
22428 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22429 + * GNU General Public License for more details.
22430 + *
22431 + * You should have received a copy of the GNU General Public License
22432 + * along with this program; if not, write to the Free Software
22433 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
22434 + */
22435 +
22436 +/*
22437 + * sub-routines for VFS
22438 + */
22439 +
22440 +#ifndef __AUFS_VFSUB_H__
22441 +#define __AUFS_VFSUB_H__
22442 +
22443 +#ifdef __KERNEL__
22444 +
22445 +#include <linux/fs.h>
22446 +#include <linux/fs_stack.h>
22447 +
22448 +/* ---------------------------------------------------------------------- */
22449 +
22450 +/* lock subclass for lower inode */
22451 +/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
22452 +/* reduce? gave up. */
22453 +enum {
22454 +       AuLsc_I_Begin = I_MUTEX_QUOTA, /* 4 */
22455 +       AuLsc_I_PARENT,         /* lower inode, parent first */
22456 +       AuLsc_I_PARENT2,        /* copyup dirs */
22457 +       AuLsc_I_PARENT3,        /* copyup wh */
22458 +       AuLsc_I_CHILD,
22459 +       AuLsc_I_CHILD2,
22460 +       AuLsc_I_End
22461 +};
22462 +
22463 +/* to debug easier, do not make them inlined functions */
22464 +#define MtxMustLock(mtx)       AuDebugOn(!mutex_is_locked(mtx))
22465 +#define IMustLock(i)           MtxMustLock(&(i)->i_mutex)
22466 +
22467 +/* ---------------------------------------------------------------------- */
22468 +
22469 +static inline void vfsub_copy_inode_size(struct inode *inode,
22470 +                                        struct inode *h_inode)
22471 +{
22472 +       spin_lock(&inode->i_lock);
22473 +       fsstack_copy_inode_size(inode, h_inode);
22474 +       spin_unlock(&inode->i_lock);
22475 +}
22476 +
22477 +int vfsub_update_h_iattr(struct path *h_path, int *did);
22478 +struct file *vfsub_filp_open(const char *path, int oflags, int mode);
22479 +struct file *vfsub_dentry_open(struct path *path, int flags,
22480 +                              const struct cred *cred);
22481 +int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
22482 +struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
22483 +                                   int len);
22484 +struct dentry *vfsub_lookup_hash(struct nameidata *nd);
22485 +
22486 +/* ---------------------------------------------------------------------- */
22487 +
22488 +struct au_hinode;
22489 +struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
22490 +                                struct dentry *d2, struct au_hinode *hdir2);
22491 +void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
22492 +                        struct dentry *d2, struct au_hinode *hdir2);
22493 +
22494 +int vfsub_create(struct inode *dir, struct path *path, int mode);
22495 +int vfsub_symlink(struct inode *dir, struct path *path,
22496 +                 const char *symname);
22497 +int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
22498 +int vfsub_link(struct dentry *src_dentry, struct inode *dir,
22499 +              struct path *path);
22500 +int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
22501 +                struct inode *hdir, struct path *path);
22502 +int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
22503 +int vfsub_rmdir(struct inode *dir, struct path *path);
22504 +
22505 +int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
22506 +int vfsub_sio_rmdir(struct inode *dir, struct path *path);
22507 +int vfsub_sio_notify_change(struct path *path, struct iattr *ia);
22508 +int vfsub_notify_change(struct path *path, struct iattr *ia);
22509 +int vfsub_unlink(struct inode *dir, struct path *path, int force);
22510 +
22511 +/* ---------------------------------------------------------------------- */
22512 +
22513 +ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
22514 +                    loff_t *ppos);
22515 +ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
22516 +                       loff_t *ppos);
22517 +ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
22518 +                     loff_t *ppos);
22519 +ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
22520 +                     loff_t *ppos);
22521 +int vfsub_readdir(struct file *file, filldir_t filldir, void *arg);
22522 +
22523 +long vfsub_splice_to(struct file *in, loff_t *ppos,
22524 +                    struct pipe_inode_info *pipe, size_t len,
22525 +                    unsigned int flags);
22526 +long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
22527 +                      loff_t *ppos, size_t len, unsigned int flags);
22528 +int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
22529 +               struct file *h_file);
22530 +
22531 +static inline void vfsub_file_accessed(struct file *h_file)
22532 +{
22533 +       file_accessed(h_file);
22534 +       vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
22535 +}
22536 +
22537 +static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
22538 +                                    struct dentry *h_dentry)
22539 +{
22540 +       struct path h_path = {
22541 +               .dentry = h_dentry,
22542 +               .mnt    = h_mnt
22543 +       };
22544 +       touch_atime(h_mnt, h_dentry);
22545 +       vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
22546 +}
22547 +
22548 +/* ---------------------------------------------------------------------- */
22549 +
22550 +static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
22551 +{
22552 +       loff_t err;
22553 +
22554 +       lockdep_off();
22555 +       err = vfs_llseek(file, offset, origin);
22556 +       lockdep_on();
22557 +       return err;
22558 +}
22559 +
22560 +/* ---------------------------------------------------------------------- */
22561 +
22562 +/* dirty workaround for strict type of fmode_t */
22563 +union vfsub_fmu {
22564 +       fmode_t fm;
22565 +       unsigned int ui;
22566 +};
22567 +
22568 +static inline unsigned int vfsub_fmode_to_uint(fmode_t fm)
22569 +{
22570 +       union vfsub_fmu u = {
22571 +               .fm = fm
22572 +       };
22573 +
22574 +       BUILD_BUG_ON(sizeof(u.fm) != sizeof(u.ui));
22575 +
22576 +       return u.ui;
22577 +}
22578 +
22579 +static inline fmode_t vfsub_uint_to_fmode(unsigned int ui)
22580 +{
22581 +       union vfsub_fmu u = {
22582 +               .ui = ui
22583 +       };
22584 +
22585 +       return u.fm;
22586 +}
22587 +
22588 +#endif /* __KERNEL__ */
22589 +#endif /* __AUFS_VFSUB_H__ */
22590 diff -uprN -x .git linux-2.6.31/fs/aufs/wbr_policy.c aufs2-2.6.git/fs/aufs/wbr_policy.c
22591 --- linux-2.6.31/fs/aufs/wbr_policy.c   1970-01-01 00:00:00.000000000 +0000
22592 +++ aufs2-2.6.git/fs/aufs/wbr_policy.c  2009-09-21 21:49:23.411607814 +0000
22593 @@ -0,0 +1,641 @@
22594 +/*
22595 + * Copyright (C) 2005-2009 Junjiro R. Okajima
22596 + *
22597 + * This program, aufs is free software; you can redistribute it and/or modify
22598 + * it under the terms of the GNU General Public License as published by
22599 + * the Free Software Foundation; either version 2 of the License, or
22600 + * (at your option) any later version.
22601 + *
22602 + * This program is distributed in the hope that it will be useful,
22603 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
22604 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22605 + * GNU General Public License for more details.
22606 + *
22607 + * You should have received a copy of the GNU General Public License
22608 + * along with this program; if not, write to the Free Software
22609 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
22610 + */
22611 +
22612 +/*
22613 + * policies for selecting one among multiple writable branches
22614 + */
22615 +
22616 +#include <linux/statfs.h>
22617 +#include "aufs.h"
22618 +
22619 +/* subset of cpup_attr() */
22620 +static noinline_for_stack
22621 +int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
22622 +{
22623 +       int err, sbits;
22624 +       struct iattr ia;
22625 +       struct inode *h_isrc;
22626 +
22627 +       h_isrc = h_src->d_inode;
22628 +       ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
22629 +       ia.ia_mode = h_isrc->i_mode;
22630 +       ia.ia_uid = h_isrc->i_uid;
22631 +       ia.ia_gid = h_isrc->i_gid;
22632 +       sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
22633 +       au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc);
22634 +       err = vfsub_sio_notify_change(h_path, &ia);
22635 +
22636 +       /* is this nfs only? */
22637 +       if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
22638 +               ia.ia_valid = ATTR_FORCE | ATTR_MODE;
22639 +               ia.ia_mode = h_isrc->i_mode;
22640 +               err = vfsub_sio_notify_change(h_path, &ia);
22641 +       }
22642 +
22643 +       return err;
22644 +}
22645 +
22646 +#define AuCpdown_PARENT_OPQ    1
22647 +#define AuCpdown_WHED          (1 << 1)
22648 +#define AuCpdown_MADE_DIR      (1 << 2)
22649 +#define AuCpdown_DIROPQ                (1 << 3)
22650 +#define au_ftest_cpdown(flags, name)   ((flags) & AuCpdown_##name)
22651 +#define au_fset_cpdown(flags, name)    { (flags) |= AuCpdown_##name; }
22652 +#define au_fclr_cpdown(flags, name)    { (flags) &= ~AuCpdown_##name; }
22653 +
22654 +struct au_cpdown_dir_args {
22655 +       struct dentry *parent;
22656 +       unsigned int flags;
22657 +};
22658 +
22659 +static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
22660 +                            struct au_cpdown_dir_args *a)
22661 +{
22662 +       int err;
22663 +       struct dentry *opq_dentry;
22664 +
22665 +       opq_dentry = au_diropq_create(dentry, bdst);
22666 +       err = PTR_ERR(opq_dentry);
22667 +       if (IS_ERR(opq_dentry))
22668 +               goto out;
22669 +       dput(opq_dentry);
22670 +       au_fset_cpdown(a->flags, DIROPQ);
22671 +
22672 + out:
22673 +       return err;
22674 +}
22675 +
22676 +static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
22677 +                           struct inode *dir, aufs_bindex_t bdst)
22678 +{
22679 +       int err;
22680 +       struct path h_path;
22681 +       struct au_branch *br;
22682 +
22683 +       br = au_sbr(dentry->d_sb, bdst);
22684 +       h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
22685 +       err = PTR_ERR(h_path.dentry);
22686 +       if (IS_ERR(h_path.dentry))
22687 +               goto out;
22688 +
22689 +       err = 0;
22690 +       if (h_path.dentry->d_inode) {
22691 +               h_path.mnt = br->br_mnt;
22692 +               err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
22693 +                                         dentry);
22694 +       }
22695 +       dput(h_path.dentry);
22696 +
22697 + out:
22698 +       return err;
22699 +}
22700 +
22701 +static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
22702 +                        struct dentry *h_parent, void *arg)
22703 +{
22704 +       int err, rerr;
22705 +       aufs_bindex_t bend, bopq, bstart;
22706 +       unsigned char parent_opq;
22707 +       struct path h_path;
22708 +       struct dentry *parent;
22709 +       struct inode *h_dir, *h_inode, *inode, *dir;
22710 +       struct au_cpdown_dir_args *args = arg;
22711 +
22712 +       bstart = au_dbstart(dentry);
22713 +       /* dentry is di-locked */
22714 +       parent = dget_parent(dentry);
22715 +       dir = parent->d_inode;
22716 +       h_dir = h_parent->d_inode;
22717 +       AuDebugOn(h_dir != au_h_iptr(dir, bdst));
22718 +       IMustLock(h_dir);
22719 +
22720 +       err = au_lkup_neg(dentry, bdst);
22721 +       if (unlikely(err < 0))
22722 +               goto out;
22723 +       h_path.dentry = au_h_dptr(dentry, bdst);
22724 +       h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
22725 +       err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
22726 +                             S_IRWXU | S_IRUGO | S_IXUGO);
22727 +       if (unlikely(err))
22728 +               goto out_put;
22729 +       au_fset_cpdown(args->flags, MADE_DIR);
22730 +
22731 +       bend = au_dbend(dentry);
22732 +       bopq = au_dbdiropq(dentry);
22733 +       au_fclr_cpdown(args->flags, WHED);
22734 +       au_fclr_cpdown(args->flags, DIROPQ);
22735 +       if (au_dbwh(dentry) == bdst)
22736 +               au_fset_cpdown(args->flags, WHED);
22737 +       if (!au_ftest_cpdown(args->flags, PARENT_OPQ) && bopq <= bdst)
22738 +               au_fset_cpdown(args->flags, PARENT_OPQ);
22739 +       parent_opq = (au_ftest_cpdown(args->flags, PARENT_OPQ)
22740 +                     && args->parent == dentry);
22741 +       h_inode = h_path.dentry->d_inode;
22742 +       mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
22743 +       if (au_ftest_cpdown(args->flags, WHED)) {
22744 +               err = au_cpdown_dir_opq(dentry, bdst, args);
22745 +               if (unlikely(err)) {
22746 +                       mutex_unlock(&h_inode->i_mutex);
22747 +                       goto out_dir;
22748 +               }
22749 +       }
22750 +
22751 +       err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
22752 +       mutex_unlock(&h_inode->i_mutex);
22753 +       if (unlikely(err))
22754 +               goto out_opq;
22755 +
22756 +       if (au_ftest_cpdown(args->flags, WHED)) {
22757 +               err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
22758 +               if (unlikely(err))
22759 +                       goto out_opq;
22760 +       }
22761 +
22762 +       inode = dentry->d_inode;
22763 +       if (au_ibend(inode) < bdst)
22764 +               au_set_ibend(inode, bdst);
22765 +       au_set_h_iptr(inode, bdst, au_igrab(h_inode),
22766 +                     au_hi_flags(inode, /*isdir*/1));
22767 +       goto out; /* success */
22768 +
22769 +       /* revert */
22770 + out_opq:
22771 +       if (au_ftest_cpdown(args->flags, DIROPQ)) {
22772 +               mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
22773 +               rerr = au_diropq_remove(dentry, bdst);
22774 +               mutex_unlock(&h_inode->i_mutex);
22775 +               if (unlikely(rerr)) {
22776 +                       AuIOErr("failed removing diropq for %.*s b%d (%d)\n",
22777 +                               AuDLNPair(dentry), bdst, rerr);
22778 +                       err = -EIO;
22779 +                       goto out;
22780 +               }
22781 +       }
22782 + out_dir:
22783 +       if (au_ftest_cpdown(args->flags, MADE_DIR)) {
22784 +               rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
22785 +               if (unlikely(rerr)) {
22786 +                       AuIOErr("failed removing %.*s b%d (%d)\n",
22787 +                               AuDLNPair(dentry), bdst, rerr);
22788 +                       err = -EIO;
22789 +               }
22790 +       }
22791 + out_put:
22792 +       au_set_h_dptr(dentry, bdst, NULL);
22793 +       if (au_dbend(dentry) == bdst)
22794 +               au_update_dbend(dentry);
22795 + out:
22796 +       dput(parent);
22797 +       return err;
22798 +}
22799 +
22800 +int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
22801 +{
22802 +       int err;
22803 +       struct au_cpdown_dir_args args = {
22804 +               .parent = dget_parent(dentry),
22805 +               .flags  = 0
22806 +       };
22807 +
22808 +       err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &args);
22809 +       dput(args.parent);
22810 +
22811 +       return err;
22812 +}
22813 +
22814 +/* ---------------------------------------------------------------------- */
22815 +
22816 +/* policies for create */
22817 +
22818 +static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
22819 +{
22820 +       for (; bindex >= 0; bindex--)
22821 +               if (!au_br_rdonly(au_sbr(sb, bindex)))
22822 +                       return bindex;
22823 +       return -EROFS;
22824 +}
22825 +
22826 +/* top down parent */
22827 +static int au_wbr_create_tdp(struct dentry *dentry, int isdir __maybe_unused)
22828 +{
22829 +       int err;
22830 +       aufs_bindex_t bstart, bindex;
22831 +       struct super_block *sb;
22832 +       struct dentry *parent, *h_parent;
22833 +
22834 +       sb = dentry->d_sb;
22835 +       bstart = au_dbstart(dentry);
22836 +       err = bstart;
22837 +       if (!au_br_rdonly(au_sbr(sb, bstart)))
22838 +               goto out;
22839 +
22840 +       err = -EROFS;
22841 +       parent = dget_parent(dentry);
22842 +       for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
22843 +               h_parent = au_h_dptr(parent, bindex);
22844 +               if (!h_parent || !h_parent->d_inode)
22845 +                       continue;
22846 +
22847 +               if (!au_br_rdonly(au_sbr(sb, bindex))) {
22848 +                       err = bindex;
22849 +                       break;
22850 +               }
22851 +       }
22852 +       dput(parent);
22853 +
22854 +       /* bottom up here */
22855 +       if (unlikely(err < 0))
22856 +               err = au_wbr_bu(sb, bstart - 1);
22857 +
22858 + out:
22859 +       AuDbg("b%d\n", err);
22860 +       return err;
22861 +}
22862 +
22863 +/* ---------------------------------------------------------------------- */
22864 +
22865 +/* an exception for the policy other than tdp */
22866 +static int au_wbr_create_exp(struct dentry *dentry)
22867 +{
22868 +       int err;
22869 +       aufs_bindex_t bwh, bdiropq;
22870 +       struct dentry *parent;
22871 +
22872 +       err = -1;
22873 +       bwh = au_dbwh(dentry);
22874 +       parent = dget_parent(dentry);
22875 +       bdiropq = au_dbdiropq(parent);
22876 +       if (bwh >= 0) {
22877 +               if (bdiropq >= 0)
22878 +                       err = min(bdiropq, bwh);
22879 +               else
22880 +                       err = bwh;
22881 +               AuDbg("%d\n", err);
22882 +       } else if (bdiropq >= 0) {
22883 +               err = bdiropq;
22884 +               AuDbg("%d\n", err);
22885 +       }
22886 +       dput(parent);
22887 +
22888 +       if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
22889 +               err = -1;
22890 +
22891 +       AuDbg("%d\n", err);
22892 +       return err;
22893 +}
22894 +
22895 +/* ---------------------------------------------------------------------- */
22896 +
22897 +/* round robin */
22898 +static int au_wbr_create_init_rr(struct super_block *sb)
22899 +{
22900 +       int err;
22901 +
22902 +       err = au_wbr_bu(sb, au_sbend(sb));
22903 +       atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
22904 +       /* smp_mb(); */
22905 +
22906 +       AuDbg("b%d\n", err);
22907 +       return err;
22908 +}
22909 +
22910 +static int au_wbr_create_rr(struct dentry *dentry, int isdir)
22911 +{
22912 +       int err, nbr;
22913 +       unsigned int u;
22914 +       aufs_bindex_t bindex, bend;
22915 +       struct super_block *sb;
22916 +       atomic_t *next;
22917 +
22918 +       err = au_wbr_create_exp(dentry);
22919 +       if (err >= 0)
22920 +               goto out;
22921 +
22922 +       sb = dentry->d_sb;
22923 +       next = &au_sbi(sb)->si_wbr_rr_next;
22924 +       bend = au_sbend(sb);
22925 +       nbr = bend + 1;
22926 +       for (bindex = 0; bindex <= bend; bindex++) {
22927 +               if (!isdir) {
22928 +                       err = atomic_dec_return(next) + 1;
22929 +                       /* modulo for 0 is meaningless */
22930 +                       if (unlikely(!err))
22931 +                               err = atomic_dec_return(next) + 1;
22932 +               } else
22933 +                       err = atomic_read(next);
22934 +               AuDbg("%d\n", err);
22935 +               u = err;
22936 +               err = u % nbr;
22937 +               AuDbg("%d\n", err);
22938 +               if (!au_br_rdonly(au_sbr(sb, err)))
22939 +                       break;
22940 +               err = -EROFS;
22941 +       }
22942 +
22943 + out:
22944 +       AuDbg("%d\n", err);
22945 +       return err;
22946 +}
22947 +
22948 +/* ---------------------------------------------------------------------- */
22949 +
22950 +/* most free space */
22951 +static void au_mfs(struct dentry *dentry)
22952 +{
22953 +       struct super_block *sb;
22954 +       struct au_branch *br;
22955 +       struct au_wbr_mfs *mfs;
22956 +       aufs_bindex_t bindex, bend;
22957 +       int err;
22958 +       unsigned long long b, bavail;
22959 +       /* reduce the stack usage */
22960 +       struct kstatfs *st;
22961 +
22962 +       st = kmalloc(sizeof(*st), GFP_NOFS);
22963 +       if (unlikely(!st)) {
22964 +               AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
22965 +               return;
22966 +       }
22967 +
22968 +       bavail = 0;
22969 +       sb = dentry->d_sb;
22970 +       mfs = &au_sbi(sb)->si_wbr_mfs;
22971 +       MtxMustLock(&mfs->mfs_lock);
22972 +       mfs->mfs_bindex = -EROFS;
22973 +       mfs->mfsrr_bytes = 0;
22974 +       bend = au_sbend(sb);
22975 +       for (bindex = 0; bindex <= bend; bindex++) {
22976 +               br = au_sbr(sb, bindex);
22977 +               if (au_br_rdonly(br))
22978 +                       continue;
22979 +
22980 +               /* sb->s_root for NFS is unreliable */
22981 +               err = vfs_statfs(br->br_mnt->mnt_root, st);
22982 +               if (unlikely(err)) {
22983 +                       AuWarn1("failed statfs, b%d, %d\n", bindex, err);
22984 +                       continue;
22985 +               }
22986 +
22987 +               /* when the available size is equal, select the lower one */
22988 +               BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
22989 +                            || sizeof(b) < sizeof(st->f_bsize));
22990 +               b = st->f_bavail * st->f_bsize;
22991 +               br->br_wbr->wbr_bytes = b;
22992 +               if (b >= bavail) {
22993 +                       bavail = b;
22994 +                       mfs->mfs_bindex = bindex;
22995 +                       mfs->mfs_jiffy = jiffies;
22996 +               }
22997 +       }
22998 +
22999 +       mfs->mfsrr_bytes = bavail;
23000 +       AuDbg("b%d\n", mfs->mfs_bindex);
23001 +       kfree(st);
23002 +}
23003 +
23004 +static int au_wbr_create_mfs(struct dentry *dentry, int isdir __maybe_unused)
23005 +{
23006 +       int err;
23007 +       struct super_block *sb;
23008 +       struct au_wbr_mfs *mfs;
23009 +
23010 +       err = au_wbr_create_exp(dentry);
23011 +       if (err >= 0)
23012 +               goto out;
23013 +
23014 +       sb = dentry->d_sb;
23015 +       mfs = &au_sbi(sb)->si_wbr_mfs;
23016 +       mutex_lock(&mfs->mfs_lock);
23017 +       if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
23018 +           || mfs->mfs_bindex < 0
23019 +           || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
23020 +               au_mfs(dentry);
23021 +       mutex_unlock(&mfs->mfs_lock);
23022 +       err = mfs->mfs_bindex;
23023 +
23024 + out:
23025 +       AuDbg("b%d\n", err);
23026 +       return err;
23027 +}
23028 +
23029 +static int au_wbr_create_init_mfs(struct super_block *sb)
23030 +{
23031 +       struct au_wbr_mfs *mfs;
23032 +
23033 +       mfs = &au_sbi(sb)->si_wbr_mfs;
23034 +       mutex_init(&mfs->mfs_lock);
23035 +       mfs->mfs_jiffy = 0;
23036 +       mfs->mfs_bindex = -EROFS;
23037 +
23038 +       return 0;
23039 +}
23040 +
23041 +static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
23042 +{
23043 +       mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
23044 +       return 0;
23045 +}
23046 +
23047 +/* ---------------------------------------------------------------------- */
23048 +
23049 +/* most free space and then round robin */
23050 +static int au_wbr_create_mfsrr(struct dentry *dentry, int isdir)
23051 +{
23052 +       int err;
23053 +       struct au_wbr_mfs *mfs;
23054 +
23055 +       err = au_wbr_create_mfs(dentry, isdir);
23056 +       if (err >= 0) {
23057 +               mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
23058 +               mutex_lock(&mfs->mfs_lock);
23059 +               if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
23060 +                       err = au_wbr_create_rr(dentry, isdir);
23061 +               mutex_unlock(&mfs->mfs_lock);
23062 +       }
23063 +
23064 +       AuDbg("b%d\n", err);
23065 +       return err;
23066 +}
23067 +
23068 +static int au_wbr_create_init_mfsrr(struct super_block *sb)
23069 +{
23070 +       int err;
23071 +
23072 +       au_wbr_create_init_mfs(sb); /* ignore */
23073 +       err = au_wbr_create_init_rr(sb);
23074 +
23075 +       return err;
23076 +}
23077 +
23078 +/* ---------------------------------------------------------------------- */
23079 +
23080 +/* top down parent and most free space */
23081 +static int au_wbr_create_pmfs(struct dentry *dentry, int isdir)
23082 +{
23083 +       int err, e2;
23084 +       unsigned long long b;
23085 +       aufs_bindex_t bindex, bstart, bend;
23086 +       struct super_block *sb;
23087 +       struct dentry *parent, *h_parent;
23088 +       struct au_branch *br;
23089 +
23090 +       err = au_wbr_create_tdp(dentry, isdir);
23091 +       if (unlikely(err < 0))
23092 +               goto out;
23093 +       parent = dget_parent(dentry);
23094 +       bstart = au_dbstart(parent);
23095 +       bend = au_dbtaildir(parent);
23096 +       if (bstart == bend)
23097 +               goto out_parent; /* success */
23098 +
23099 +       e2 = au_wbr_create_mfs(dentry, isdir);
23100 +       if (e2 < 0)
23101 +               goto out_parent; /* success */
23102 +
23103 +       /* when the available size is equal, select upper one */
23104 +       sb = dentry->d_sb;
23105 +       br = au_sbr(sb, err);
23106 +       b = br->br_wbr->wbr_bytes;
23107 +       AuDbg("b%d, %llu\n", err, b);
23108 +
23109 +       for (bindex = bstart; bindex <= bend; bindex++) {
23110 +               h_parent = au_h_dptr(parent, bindex);
23111 +               if (!h_parent || !h_parent->d_inode)
23112 +                       continue;
23113 +
23114 +               br = au_sbr(sb, bindex);
23115 +               if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
23116 +                       b = br->br_wbr->wbr_bytes;
23117 +                       err = bindex;
23118 +                       AuDbg("b%d, %llu\n", err, b);
23119 +               }
23120 +       }
23121 +
23122 + out_parent:
23123 +       dput(parent);
23124 + out:
23125 +       AuDbg("b%d\n", err);
23126 +       return err;
23127 +}
23128 +
23129 +/* ---------------------------------------------------------------------- */
23130 +
23131 +/* policies for copyup */
23132 +
23133 +/* top down parent */
23134 +static int au_wbr_copyup_tdp(struct dentry *dentry)
23135 +{
23136 +       return au_wbr_create_tdp(dentry, /*isdir, anything is ok*/0);
23137 +}
23138 +
23139 +/* bottom up parent */
23140 +static int au_wbr_copyup_bup(struct dentry *dentry)
23141 +{
23142 +       int err;
23143 +       aufs_bindex_t bindex, bstart;
23144 +       struct dentry *parent, *h_parent;
23145 +       struct super_block *sb;
23146 +
23147 +       err = -EROFS;
23148 +       sb = dentry->d_sb;
23149 +       parent = dget_parent(dentry);
23150 +       bstart = au_dbstart(parent);
23151 +       for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
23152 +               h_parent = au_h_dptr(parent, bindex);
23153 +               if (!h_parent || !h_parent->d_inode)
23154 +                       continue;
23155 +
23156 +               if (!au_br_rdonly(au_sbr(sb, bindex))) {
23157 +                       err = bindex;
23158 +                       break;
23159 +               }
23160 +       }
23161 +       dput(parent);
23162 +
23163 +       /* bottom up here */
23164 +       if (unlikely(err < 0))
23165 +               err = au_wbr_bu(sb, bstart - 1);
23166 +
23167 +       AuDbg("b%d\n", err);
23168 +       return err;
23169 +}
23170 +
23171 +/* bottom up */
23172 +static int au_wbr_copyup_bu(struct dentry *dentry)
23173 +{
23174 +       int err;
23175 +
23176 +       err = au_wbr_bu(dentry->d_sb, au_dbstart(dentry));
23177 +
23178 +       AuDbg("b%d\n", err);
23179 +       return err;
23180 +}
23181 +
23182 +/* ---------------------------------------------------------------------- */
23183 +
23184 +struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
23185 +       [AuWbrCopyup_TDP] = {
23186 +               .copyup = au_wbr_copyup_tdp
23187 +       },
23188 +       [AuWbrCopyup_BUP] = {
23189 +               .copyup = au_wbr_copyup_bup
23190 +       },
23191 +       [AuWbrCopyup_BU] = {
23192 +               .copyup = au_wbr_copyup_bu
23193 +       }
23194 +};
23195 +
23196 +struct au_wbr_create_operations au_wbr_create_ops[] = {
23197 +       [AuWbrCreate_TDP] = {
23198 +               .create = au_wbr_create_tdp
23199 +       },
23200 +       [AuWbrCreate_RR] = {
23201 +               .create = au_wbr_create_rr,
23202 +               .init   = au_wbr_create_init_rr
23203 +       },
23204 +       [AuWbrCreate_MFS] = {
23205 +               .create = au_wbr_create_mfs,
23206 +               .init   = au_wbr_create_init_mfs,
23207 +               .fin    = au_wbr_create_fin_mfs
23208 +       },
23209 +       [AuWbrCreate_MFSV] = {
23210 +               .create = au_wbr_create_mfs,
23211 +               .init   = au_wbr_create_init_mfs,
23212 +               .fin    = au_wbr_create_fin_mfs
23213 +       },
23214 +       [AuWbrCreate_MFSRR] = {
23215 +               .create = au_wbr_create_mfsrr,
23216 +               .init   = au_wbr_create_init_mfsrr,
23217 +               .fin    = au_wbr_create_fin_mfs
23218 +       },
23219 +       [AuWbrCreate_MFSRRV] = {
23220 +               .create = au_wbr_create_mfsrr,
23221 +               .init   = au_wbr_create_init_mfsrr,
23222 +               .fin    = au_wbr_create_fin_mfs
23223 +       },
23224 +       [AuWbrCreate_PMFS] = {
23225 +               .create = au_wbr_create_pmfs,
23226 +               .init   = au_wbr_create_init_mfs,
23227 +               .fin    = au_wbr_create_fin_mfs
23228 +       },
23229 +       [AuWbrCreate_PMFSV] = {
23230 +               .create = au_wbr_create_pmfs,
23231 +               .init   = au_wbr_create_init_mfs,
23232 +               .fin    = au_wbr_create_fin_mfs
23233 +       }
23234 +};
23235 diff -uprN -x .git linux-2.6.31/fs/aufs/whout.c aufs2-2.6.git/fs/aufs/whout.c
23236 --- linux-2.6.31/fs/aufs/whout.c        1970-01-01 00:00:00.000000000 +0000
23237 +++ aufs2-2.6.git/fs/aufs/whout.c       2009-09-21 21:49:23.411607814 +0000
23238 @@ -0,0 +1,1052 @@
23239 +/*
23240 + * Copyright (C) 2005-2009 Junjiro R. Okajima
23241 + *
23242 + * This program, aufs is free software; you can redistribute it and/or modify
23243 + * it under the terms of the GNU General Public License as published by
23244 + * the Free Software Foundation; either version 2 of the License, or
23245 + * (at your option) any later version.
23246 + *
23247 + * This program is distributed in the hope that it will be useful,
23248 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23249 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23250 + * GNU General Public License for more details.
23251 + *
23252 + * You should have received a copy of the GNU General Public License
23253 + * along with this program; if not, write to the Free Software
23254 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
23255 + */
23256 +
23257 +/*
23258 + * whiteout for logical deletion and opaque directory
23259 + */
23260 +
23261 +#include <linux/fs.h>
23262 +#include "aufs.h"
23263 +
23264 +#define WH_MASK                        S_IRUGO
23265 +
23266 +/*
23267 + * If a directory contains this file, then it is opaque.  We start with the
23268 + * .wh. flag so that it is blocked by lookup.
23269 + */
23270 +static struct qstr diropq_name = {
23271 +       .name = AUFS_WH_DIROPQ,
23272 +       .len = sizeof(AUFS_WH_DIROPQ) - 1
23273 +};
23274 +
23275 +/*
23276 + * generate whiteout name, which is NOT terminated by NULL.
23277 + * @name: original d_name.name
23278 + * @len: original d_name.len
23279 + * @wh: whiteout qstr
23280 + * returns zero when succeeds, otherwise error.
23281 + * succeeded value as wh->name should be freed by kfree().
23282 + */
23283 +int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
23284 +{
23285 +       char *p;
23286 +
23287 +       if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
23288 +               return -ENAMETOOLONG;
23289 +
23290 +       wh->len = name->len + AUFS_WH_PFX_LEN;
23291 +       p = kmalloc(wh->len, GFP_NOFS);
23292 +       wh->name = p;
23293 +       if (p) {
23294 +               memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
23295 +               memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
23296 +               /* smp_mb(); */
23297 +               return 0;
23298 +       }
23299 +       return -ENOMEM;
23300 +}
23301 +
23302 +/* ---------------------------------------------------------------------- */
23303 +
23304 +/*
23305 + * test if the @wh_name exists under @h_parent.
23306 + * @try_sio specifies the necessary of super-io.
23307 + */
23308 +int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
23309 +              struct au_branch *br, int try_sio)
23310 +{
23311 +       int err;
23312 +       struct dentry *wh_dentry;
23313 +       struct inode *h_dir;
23314 +
23315 +       h_dir = h_parent->d_inode;
23316 +       if (!try_sio)
23317 +               wh_dentry = au_lkup_one(wh_name, h_parent, br, /*nd*/NULL);
23318 +       else
23319 +               wh_dentry = au_sio_lkup_one(wh_name, h_parent, br);
23320 +       err = PTR_ERR(wh_dentry);
23321 +       if (IS_ERR(wh_dentry))
23322 +               goto out;
23323 +
23324 +       err = 0;
23325 +       if (!wh_dentry->d_inode)
23326 +               goto out_wh; /* success */
23327 +
23328 +       err = 1;
23329 +       if (S_ISREG(wh_dentry->d_inode->i_mode))
23330 +               goto out_wh; /* success */
23331 +
23332 +       err = -EIO;
23333 +       AuIOErr("%.*s Invalid whiteout entry type 0%o.\n",
23334 +               AuDLNPair(wh_dentry), wh_dentry->d_inode->i_mode);
23335 +
23336 + out_wh:
23337 +       dput(wh_dentry);
23338 + out:
23339 +       return err;
23340 +}
23341 +
23342 +/*
23343 + * test if the @h_dentry sets opaque or not.
23344 + */
23345 +int au_diropq_test(struct dentry *h_dentry, struct au_branch *br)
23346 +{
23347 +       int err;
23348 +       struct inode *h_dir;
23349 +
23350 +       h_dir = h_dentry->d_inode;
23351 +       err = au_wh_test(h_dentry, &diropq_name, br,
23352 +                        au_test_h_perm_sio(h_dir, MAY_EXEC));
23353 +       return err;
23354 +}
23355 +
23356 +/*
23357 + * returns a negative dentry whose name is unique and temporary.
23358 + */
23359 +struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
23360 +                            struct qstr *prefix)
23361 +{
23362 +#define HEX_LEN        4
23363 +       struct dentry *dentry;
23364 +       int i;
23365 +       char defname[AUFS_WH_PFX_LEN * 2 + DNAME_INLINE_LEN_MIN + 1
23366 +                    + HEX_LEN + 1], *name, *p;
23367 +       static unsigned short cnt;
23368 +       struct qstr qs;
23369 +
23370 +       name = defname;
23371 +       qs.len = sizeof(defname) - DNAME_INLINE_LEN_MIN + prefix->len - 1;
23372 +       if (unlikely(prefix->len > DNAME_INLINE_LEN_MIN)) {
23373 +               dentry = ERR_PTR(-ENAMETOOLONG);
23374 +               if (unlikely(qs.len >= PATH_MAX))
23375 +                       goto out;
23376 +               dentry = ERR_PTR(-ENOMEM);
23377 +               name = kmalloc(qs.len + 1, GFP_NOFS);
23378 +               if (unlikely(!name))
23379 +                       goto out;
23380 +       }
23381 +
23382 +       /* doubly whiteout-ed */
23383 +       memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
23384 +       p = name + AUFS_WH_PFX_LEN * 2;
23385 +       memcpy(p, prefix->name, prefix->len);
23386 +       p += prefix->len;
23387 +       *p++ = '.';
23388 +       AuDebugOn(name + qs.len + 1 - p <= HEX_LEN);
23389 +
23390 +       qs.name = name;
23391 +       for (i = 0; i < 3; i++) {
23392 +               sprintf(p, "%.*d", HEX_LEN, cnt++);
23393 +               dentry = au_sio_lkup_one(&qs, h_parent, br);
23394 +               if (IS_ERR(dentry) || !dentry->d_inode)
23395 +                       goto out_name;
23396 +               dput(dentry);
23397 +       }
23398 +       /* AuWarn("could not get random name\n"); */
23399 +       dentry = ERR_PTR(-EEXIST);
23400 +       AuDbg("%.*s\n", AuLNPair(&qs));
23401 +       BUG();
23402 +
23403 + out_name:
23404 +       if (name != defname)
23405 +               kfree(name);
23406 + out:
23407 +       return dentry;
23408 +#undef HEX_LEN
23409 +}
23410 +
23411 +/*
23412 + * rename the @h_dentry on @br to the whiteouted temporary name.
23413 + */
23414 +int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
23415 +{
23416 +       int err;
23417 +       struct path h_path = {
23418 +               .mnt = br->br_mnt
23419 +       };
23420 +       struct inode *h_dir;
23421 +       struct dentry *h_parent;
23422 +
23423 +       h_parent = h_dentry->d_parent; /* dir inode is locked */
23424 +       h_dir = h_parent->d_inode;
23425 +       IMustLock(h_dir);
23426 +
23427 +       h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
23428 +       err = PTR_ERR(h_path.dentry);
23429 +       if (IS_ERR(h_path.dentry))
23430 +               goto out;
23431 +
23432 +       /* under the same dir, no need to lock_rename() */
23433 +       err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path);
23434 +       AuTraceErr(err);
23435 +       dput(h_path.dentry);
23436 +
23437 + out:
23438 +       return err;
23439 +}
23440 +
23441 +/* ---------------------------------------------------------------------- */
23442 +/*
23443 + * functions for removing a whiteout
23444 + */
23445 +
23446 +static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
23447 +{
23448 +       int force;
23449 +
23450 +       /*
23451 +        * forces superio when the dir has a sticky bit.
23452 +        * this may be a violation of unix fs semantics.
23453 +        */
23454 +       force = (h_dir->i_mode & S_ISVTX)
23455 +               && h_path->dentry->d_inode->i_uid != current_fsuid();
23456 +       return vfsub_unlink(h_dir, h_path, force);
23457 +}
23458 +
23459 +int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
23460 +                       struct dentry *dentry)
23461 +{
23462 +       int err;
23463 +
23464 +       err = do_unlink_wh(h_dir, h_path);
23465 +       if (!err && dentry)
23466 +               au_set_dbwh(dentry, -1);
23467 +
23468 +       return err;
23469 +}
23470 +
23471 +static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
23472 +                         struct au_branch *br)
23473 +{
23474 +       int err;
23475 +       struct path h_path = {
23476 +               .mnt = br->br_mnt
23477 +       };
23478 +
23479 +       err = 0;
23480 +       h_path.dentry = au_lkup_one(wh, h_parent, br, /*nd*/NULL);
23481 +       if (IS_ERR(h_path.dentry))
23482 +               err = PTR_ERR(h_path.dentry);
23483 +       else {
23484 +               if (h_path.dentry->d_inode
23485 +                   && S_ISREG(h_path.dentry->d_inode->i_mode))
23486 +                       err = do_unlink_wh(h_parent->d_inode, &h_path);
23487 +               dput(h_path.dentry);
23488 +       }
23489 +
23490 +       return err;
23491 +}
23492 +
23493 +/* ---------------------------------------------------------------------- */
23494 +/*
23495 + * initialize/clean whiteout for a branch
23496 + */
23497 +
23498 +static void au_wh_clean(struct inode *h_dir, struct path *whpath,
23499 +                       const int isdir)
23500 +{
23501 +       int err;
23502 +
23503 +       if (!whpath->dentry->d_inode)
23504 +               return;
23505 +
23506 +       err = mnt_want_write(whpath->mnt);
23507 +       if (!err) {
23508 +               if (isdir)
23509 +                       err = vfsub_rmdir(h_dir, whpath);
23510 +               else
23511 +                       err = vfsub_unlink(h_dir, whpath, /*force*/0);
23512 +               mnt_drop_write(whpath->mnt);
23513 +       }
23514 +       if (unlikely(err))
23515 +               AuWarn("failed removing %.*s (%d), ignored.\n",
23516 +                      AuDLNPair(whpath->dentry), err);
23517 +}
23518 +
23519 +static int test_linkable(struct dentry *h_root)
23520 +{
23521 +       struct inode *h_dir = h_root->d_inode;
23522 +
23523 +       if (h_dir->i_op->link)
23524 +               return 0;
23525 +
23526 +       AuErr("%.*s (%s) doesn't support link(2), use noplink and rw+nolwh\n",
23527 +             AuDLNPair(h_root), au_sbtype(h_root->d_sb));
23528 +       return -ENOSYS;
23529 +}
23530 +
23531 +/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
23532 +static int au_whdir(struct inode *h_dir, struct path *path)
23533 +{
23534 +       int err;
23535 +
23536 +       err = -EEXIST;
23537 +       if (!path->dentry->d_inode) {
23538 +               int mode = S_IRWXU;
23539 +
23540 +               if (au_test_nfs(path->dentry->d_sb))
23541 +                       mode |= S_IXUGO;
23542 +               err = mnt_want_write(path->mnt);
23543 +               if (!err) {
23544 +                       err = vfsub_mkdir(h_dir, path, mode);
23545 +                       mnt_drop_write(path->mnt);
23546 +               }
23547 +       } else if (S_ISDIR(path->dentry->d_inode->i_mode))
23548 +               err = 0;
23549 +       else
23550 +               AuErr("unknown %.*s exists\n", AuDLNPair(path->dentry));
23551 +
23552 +       return err;
23553 +}
23554 +
23555 +struct au_wh_base {
23556 +       const struct qstr *name;
23557 +       struct dentry *dentry;
23558 +};
23559 +
23560 +static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
23561 +                         struct path *h_path)
23562 +{
23563 +       h_path->dentry = base[AuBrWh_BASE].dentry;
23564 +       au_wh_clean(h_dir, h_path, /*isdir*/0);
23565 +       h_path->dentry = base[AuBrWh_PLINK].dentry;
23566 +       au_wh_clean(h_dir, h_path, /*isdir*/1);
23567 +       h_path->dentry = base[AuBrWh_ORPH].dentry;
23568 +       au_wh_clean(h_dir, h_path, /*isdir*/1);
23569 +}
23570 +
23571 +/*
23572 + * returns tri-state,
23573 + * minus: error, caller should print the mesage
23574 + * zero: succuess
23575 + * plus: error, caller should NOT print the mesage
23576 + */
23577 +static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
23578 +                               int do_plink, struct au_wh_base base[],
23579 +                               struct path *h_path)
23580 +{
23581 +       int err;
23582 +       struct inode *h_dir;
23583 +
23584 +       h_dir = h_root->d_inode;
23585 +       h_path->dentry = base[AuBrWh_BASE].dentry;
23586 +       au_wh_clean(h_dir, h_path, /*isdir*/0);
23587 +       h_path->dentry = base[AuBrWh_PLINK].dentry;
23588 +       if (do_plink) {
23589 +               err = test_linkable(h_root);
23590 +               if (unlikely(err)) {
23591 +                       err = 1;
23592 +                       goto out;
23593 +               }
23594 +
23595 +               err = au_whdir(h_dir, h_path);
23596 +               if (unlikely(err))
23597 +                       goto out;
23598 +               wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
23599 +       } else
23600 +               au_wh_clean(h_dir, h_path, /*isdir*/1);
23601 +       h_path->dentry = base[AuBrWh_ORPH].dentry;
23602 +       err = au_whdir(h_dir, h_path);
23603 +       if (unlikely(err))
23604 +               goto out;
23605 +       wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
23606 +
23607 + out:
23608 +       return err;
23609 +}
23610 +
23611 +/*
23612 + * for the moment, aufs supports the branch filesystem which does not support
23613 + * link(2). testing on FAT which does not support i_op->setattr() fully either,
23614 + * copyup failed. finally, such filesystem will not be used as the writable
23615 + * branch.
23616 + *
23617 + * returns tri-state, see above.
23618 + */
23619 +static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
23620 +                        int do_plink, struct au_wh_base base[],
23621 +                        struct path *h_path)
23622 +{
23623 +       int err;
23624 +       struct inode *h_dir;
23625 +
23626 +       WbrWhMustWriteLock(wbr);
23627 +
23628 +       err = test_linkable(h_root);
23629 +       if (unlikely(err)) {
23630 +               err = 1;
23631 +               goto out;
23632 +       }
23633 +
23634 +       /*
23635 +        * todo: should this create be done in /sbin/mount.aufs helper?
23636 +        */
23637 +       err = -EEXIST;
23638 +       h_dir = h_root->d_inode;
23639 +       if (!base[AuBrWh_BASE].dentry->d_inode) {
23640 +               err = mnt_want_write(h_path->mnt);
23641 +               if (!err) {
23642 +                       h_path->dentry = base[AuBrWh_BASE].dentry;
23643 +                       err = vfsub_create(h_dir, h_path, WH_MASK);
23644 +                       mnt_drop_write(h_path->mnt);
23645 +               }
23646 +       } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
23647 +               err = 0;
23648 +       else
23649 +               AuErr("unknown %.*s/%.*s exists\n",
23650 +                     AuDLNPair(h_root), AuDLNPair(base[AuBrWh_BASE].dentry));
23651 +       if (unlikely(err))
23652 +               goto out;
23653 +
23654 +       h_path->dentry = base[AuBrWh_PLINK].dentry;
23655 +       if (do_plink) {
23656 +               err = au_whdir(h_dir, h_path);
23657 +               if (unlikely(err))
23658 +                       goto out;
23659 +               wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
23660 +       } else
23661 +               au_wh_clean(h_dir, h_path, /*isdir*/1);
23662 +       wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
23663 +
23664 +       h_path->dentry = base[AuBrWh_ORPH].dentry;
23665 +       err = au_whdir(h_dir, h_path);
23666 +       if (unlikely(err))
23667 +               goto out;
23668 +       wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
23669 +
23670 + out:
23671 +       return err;
23672 +}
23673 +
23674 +/*
23675 + * initialize the whiteout base file/dir for @br.
23676 + */
23677 +int au_wh_init(struct dentry *h_root, struct au_branch *br,
23678 +              struct super_block *sb)
23679 +{
23680 +       int err, i;
23681 +       const unsigned char do_plink
23682 +               = !!au_opt_test(au_mntflags(sb), PLINK);
23683 +       struct path path = {
23684 +               .mnt = br->br_mnt
23685 +       };
23686 +       struct inode *h_dir;
23687 +       struct au_wbr *wbr = br->br_wbr;
23688 +       static const struct qstr base_name[] = {
23689 +               [AuBrWh_BASE] = {
23690 +                       .name   = AUFS_BASE_NAME,
23691 +                       .len    = sizeof(AUFS_BASE_NAME) - 1
23692 +               },
23693 +               [AuBrWh_PLINK] = {
23694 +                       .name   = AUFS_PLINKDIR_NAME,
23695 +                       .len    = sizeof(AUFS_PLINKDIR_NAME) - 1
23696 +               },
23697 +               [AuBrWh_ORPH] = {
23698 +                       .name   = AUFS_ORPHDIR_NAME,
23699 +                       .len    = sizeof(AUFS_ORPHDIR_NAME) - 1
23700 +               }
23701 +       };
23702 +       struct au_wh_base base[] = {
23703 +               [AuBrWh_BASE] = {
23704 +                       .name   = base_name + AuBrWh_BASE,
23705 +                       .dentry = NULL
23706 +               },
23707 +               [AuBrWh_PLINK] = {
23708 +                       .name   = base_name + AuBrWh_PLINK,
23709 +                       .dentry = NULL
23710 +               },
23711 +               [AuBrWh_ORPH] = {
23712 +                       .name   = base_name + AuBrWh_ORPH,
23713 +                       .dentry = NULL
23714 +               }
23715 +       };
23716 +
23717 +       if (wbr)
23718 +               WbrWhMustWriteLock(wbr);
23719 +
23720 +       h_dir = h_root->d_inode;
23721 +       for (i = 0; i < AuBrWh_Last; i++) {
23722 +               /* doubly whiteouted */
23723 +               struct dentry *d;
23724 +
23725 +               d = au_wh_lkup(h_root, (void *)base[i].name, br);
23726 +               err = PTR_ERR(d);
23727 +               if (IS_ERR(d))
23728 +                       goto out;
23729 +
23730 +               base[i].dentry = d;
23731 +               AuDebugOn(wbr
23732 +                         && wbr->wbr_wh[i]
23733 +                         && wbr->wbr_wh[i] != base[i].dentry);
23734 +       }
23735 +
23736 +       if (wbr)
23737 +               for (i = 0; i < AuBrWh_Last; i++) {
23738 +                       dput(wbr->wbr_wh[i]);
23739 +                       wbr->wbr_wh[i] = NULL;
23740 +               }
23741 +
23742 +       err = 0;
23743 +
23744 +       switch (br->br_perm) {
23745 +       case AuBrPerm_RO:
23746 +       case AuBrPerm_ROWH:
23747 +       case AuBrPerm_RR:
23748 +       case AuBrPerm_RRWH:
23749 +               au_wh_init_ro(h_dir, base, &path);
23750 +               break;
23751 +
23752 +       case AuBrPerm_RWNoLinkWH:
23753 +               err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
23754 +               if (err > 0)
23755 +                       goto out;
23756 +               else if (err)
23757 +                       goto out_err;
23758 +               break;
23759 +
23760 +       case AuBrPerm_RW:
23761 +               err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
23762 +               if (err > 0)
23763 +                       goto out;
23764 +               else if (err)
23765 +                       goto out_err;
23766 +               break;
23767 +
23768 +       default:
23769 +               BUG();
23770 +       }
23771 +       goto out; /* success */
23772 +
23773 + out_err:
23774 +       AuErr("an error(%d) on the writable branch %.*s(%s)\n",
23775 +             err, AuDLNPair(h_root), au_sbtype(h_root->d_sb));
23776 + out:
23777 +       for (i = 0; i < AuBrWh_Last; i++)
23778 +               dput(base[i].dentry);
23779 +       return err;
23780 +}
23781 +
23782 +/* ---------------------------------------------------------------------- */
23783 +/*
23784 + * whiteouts are all hard-linked usually.
23785 + * when its link count reaches a ceiling, we create a new whiteout base
23786 + * asynchronously.
23787 + */
23788 +
23789 +struct reinit_br_wh {
23790 +       struct super_block *sb;
23791 +       struct au_branch *br;
23792 +};
23793 +
23794 +static void reinit_br_wh(void *arg)
23795 +{
23796 +       int err;
23797 +       aufs_bindex_t bindex;
23798 +       struct path h_path;
23799 +       struct reinit_br_wh *a = arg;
23800 +       struct au_wbr *wbr;
23801 +       struct inode *dir;
23802 +       struct dentry *h_root;
23803 +       struct au_hinode *hdir;
23804 +
23805 +       err = 0;
23806 +       wbr = a->br->br_wbr;
23807 +       /* big aufs lock */
23808 +       si_noflush_write_lock(a->sb);
23809 +       if (!au_br_writable(a->br->br_perm))
23810 +               goto out;
23811 +       bindex = au_br_index(a->sb, a->br->br_id);
23812 +       if (unlikely(bindex < 0))
23813 +               goto out;
23814 +
23815 +       di_read_lock_parent(a->sb->s_root, AuLock_IR);
23816 +       dir = a->sb->s_root->d_inode;
23817 +       hdir = au_hi(dir, bindex);
23818 +       h_root = au_h_dptr(a->sb->s_root, bindex);
23819 +
23820 +       au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
23821 +       wbr_wh_write_lock(wbr);
23822 +       err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
23823 +                         h_root, a->br);
23824 +       if (!err) {
23825 +               err = mnt_want_write(a->br->br_mnt);
23826 +               if (!err) {
23827 +                       h_path.dentry = wbr->wbr_whbase;
23828 +                       h_path.mnt = a->br->br_mnt;
23829 +                       err = vfsub_unlink(hdir->hi_inode, &h_path, /*force*/0);
23830 +                       mnt_drop_write(a->br->br_mnt);
23831 +               }
23832 +       } else {
23833 +               AuWarn("%.*s is moved, ignored\n", AuDLNPair(wbr->wbr_whbase));
23834 +               err = 0;
23835 +       }
23836 +       dput(wbr->wbr_whbase);
23837 +       wbr->wbr_whbase = NULL;
23838 +       if (!err)
23839 +               err = au_wh_init(h_root, a->br, a->sb);
23840 +       wbr_wh_write_unlock(wbr);
23841 +       au_hin_imtx_unlock(hdir);
23842 +       di_read_unlock(a->sb->s_root, AuLock_IR);
23843 +
23844 + out:
23845 +       if (wbr)
23846 +               atomic_dec(&wbr->wbr_wh_running);
23847 +       atomic_dec(&a->br->br_count);
23848 +       au_nwt_done(&au_sbi(a->sb)->si_nowait);
23849 +       si_write_unlock(a->sb);
23850 +       kfree(arg);
23851 +       if (unlikely(err))
23852 +               AuIOErr("err %d\n", err);
23853 +}
23854 +
23855 +static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
23856 +{
23857 +       int do_dec, wkq_err;
23858 +       struct reinit_br_wh *arg;
23859 +
23860 +       do_dec = 1;
23861 +       if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
23862 +               goto out;
23863 +
23864 +       /* ignore ENOMEM */
23865 +       arg = kmalloc(sizeof(*arg), GFP_NOFS);
23866 +       if (arg) {
23867 +               /*
23868 +                * dec(wh_running), kfree(arg) and dec(br_count)
23869 +                * in reinit function
23870 +                */
23871 +               arg->sb = sb;
23872 +               arg->br = br;
23873 +               atomic_inc(&br->br_count);
23874 +               wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb);
23875 +               if (unlikely(wkq_err)) {
23876 +                       atomic_dec(&br->br_wbr->wbr_wh_running);
23877 +                       atomic_dec(&br->br_count);
23878 +                       kfree(arg);
23879 +               }
23880 +               do_dec = 0;
23881 +       }
23882 +
23883 + out:
23884 +       if (do_dec)
23885 +               atomic_dec(&br->br_wbr->wbr_wh_running);
23886 +}
23887 +
23888 +/* ---------------------------------------------------------------------- */
23889 +
23890 +/*
23891 + * create the whiteout @wh.
23892 + */
23893 +static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
23894 +                            struct dentry *wh)
23895 +{
23896 +       int err;
23897 +       struct path h_path = {
23898 +               .dentry = wh
23899 +       };
23900 +       struct au_branch *br;
23901 +       struct au_wbr *wbr;
23902 +       struct dentry *h_parent;
23903 +       struct inode *h_dir;
23904 +
23905 +       h_parent = wh->d_parent; /* dir inode is locked */
23906 +       h_dir = h_parent->d_inode;
23907 +       IMustLock(h_dir);
23908 +
23909 +       br = au_sbr(sb, bindex);
23910 +       h_path.mnt = br->br_mnt;
23911 +       wbr = br->br_wbr;
23912 +       wbr_wh_read_lock(wbr);
23913 +       if (wbr->wbr_whbase) {
23914 +               err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path);
23915 +               if (!err || err != -EMLINK)
23916 +                       goto out;
23917 +
23918 +               /* link count full. re-initialize br_whbase. */
23919 +               kick_reinit_br_wh(sb, br);
23920 +       }
23921 +
23922 +       /* return this error in this context */
23923 +       err = vfsub_create(h_dir, &h_path, WH_MASK);
23924 +
23925 + out:
23926 +       wbr_wh_read_unlock(wbr);
23927 +       return err;
23928 +}
23929 +
23930 +/* ---------------------------------------------------------------------- */
23931 +
23932 +/*
23933 + * create or remove the diropq.
23934 + */
23935 +static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
23936 +                               unsigned int flags)
23937 +{
23938 +       struct dentry *opq_dentry, *h_dentry;
23939 +       struct super_block *sb;
23940 +       struct au_branch *br;
23941 +       int err;
23942 +
23943 +       sb = dentry->d_sb;
23944 +       br = au_sbr(sb, bindex);
23945 +       h_dentry = au_h_dptr(dentry, bindex);
23946 +       opq_dentry = au_lkup_one(&diropq_name, h_dentry, br, /*nd*/NULL);
23947 +       if (IS_ERR(opq_dentry))
23948 +               goto out;
23949 +
23950 +       if (au_ftest_diropq(flags, CREATE)) {
23951 +               err = link_or_create_wh(sb, bindex, opq_dentry);
23952 +               if (!err) {
23953 +                       au_set_dbdiropq(dentry, bindex);
23954 +                       goto out; /* success */
23955 +               }
23956 +       } else {
23957 +               struct path tmp = {
23958 +                       .dentry = opq_dentry,
23959 +                       .mnt    = br->br_mnt
23960 +               };
23961 +               err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
23962 +               if (!err)
23963 +                       au_set_dbdiropq(dentry, -1);
23964 +       }
23965 +       dput(opq_dentry);
23966 +       opq_dentry = ERR_PTR(err);
23967 +
23968 + out:
23969 +       return opq_dentry;
23970 +}
23971 +
23972 +struct do_diropq_args {
23973 +       struct dentry **errp;
23974 +       struct dentry *dentry;
23975 +       aufs_bindex_t bindex;
23976 +       unsigned int flags;
23977 +};
23978 +
23979 +static void call_do_diropq(void *args)
23980 +{
23981 +       struct do_diropq_args *a = args;
23982 +       *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
23983 +}
23984 +
23985 +struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
23986 +                            unsigned int flags)
23987 +{
23988 +       struct dentry *diropq, *h_dentry;
23989 +
23990 +       h_dentry = au_h_dptr(dentry, bindex);
23991 +       if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
23992 +               diropq = do_diropq(dentry, bindex, flags);
23993 +       else {
23994 +               int wkq_err;
23995 +               struct do_diropq_args args = {
23996 +                       .errp           = &diropq,
23997 +                       .dentry         = dentry,
23998 +                       .bindex         = bindex,
23999 +                       .flags          = flags
24000 +               };
24001 +
24002 +               wkq_err = au_wkq_wait(call_do_diropq, &args);
24003 +               if (unlikely(wkq_err))
24004 +                       diropq = ERR_PTR(wkq_err);
24005 +       }
24006 +
24007 +       return diropq;
24008 +}
24009 +
24010 +/* ---------------------------------------------------------------------- */
24011 +
24012 +/*
24013 + * lookup whiteout dentry.
24014 + * @h_parent: lower parent dentry which must exist and be locked
24015 + * @base_name: name of dentry which will be whiteouted
24016 + * returns dentry for whiteout.
24017 + */
24018 +struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
24019 +                         struct au_branch *br)
24020 +{
24021 +       int err;
24022 +       struct qstr wh_name;
24023 +       struct dentry *wh_dentry;
24024 +
24025 +       err = au_wh_name_alloc(&wh_name, base_name);
24026 +       wh_dentry = ERR_PTR(err);
24027 +       if (!err) {
24028 +               wh_dentry = au_lkup_one(&wh_name, h_parent, br, /*nd*/NULL);
24029 +               kfree(wh_name.name);
24030 +       }
24031 +       return wh_dentry;
24032 +}
24033 +
24034 +/*
24035 + * link/create a whiteout for @dentry on @bindex.
24036 + */
24037 +struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
24038 +                           struct dentry *h_parent)
24039 +{
24040 +       struct dentry *wh_dentry;
24041 +       struct super_block *sb;
24042 +       int err;
24043 +
24044 +       sb = dentry->d_sb;
24045 +       wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
24046 +       if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
24047 +               err = link_or_create_wh(sb, bindex, wh_dentry);
24048 +               if (!err)
24049 +                       au_set_dbwh(dentry, bindex);
24050 +               else {
24051 +                       dput(wh_dentry);
24052 +                       wh_dentry = ERR_PTR(err);
24053 +               }
24054 +       }
24055 +
24056 +       return wh_dentry;
24057 +}
24058 +
24059 +/* ---------------------------------------------------------------------- */
24060 +
24061 +/* Delete all whiteouts in this directory on branch bindex. */
24062 +static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
24063 +                          aufs_bindex_t bindex, struct au_branch *br)
24064 +{
24065 +       int err;
24066 +       unsigned long ul, n;
24067 +       struct qstr wh_name;
24068 +       char *p;
24069 +       struct hlist_head *head;
24070 +       struct au_vdir_wh *tpos;
24071 +       struct hlist_node *pos;
24072 +       struct au_vdir_destr *str;
24073 +
24074 +       err = -ENOMEM;
24075 +       p = __getname();
24076 +       wh_name.name = p;
24077 +       if (unlikely(!wh_name.name))
24078 +               goto out;
24079 +
24080 +       err = 0;
24081 +       memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
24082 +       p += AUFS_WH_PFX_LEN;
24083 +       n = whlist->nh_num;
24084 +       head = whlist->nh_head;
24085 +       for (ul = 0; !err && ul < n; ul++, head++) {
24086 +               hlist_for_each_entry(tpos, pos, head, wh_hash) {
24087 +                       if (tpos->wh_bindex != bindex)
24088 +                               continue;
24089 +
24090 +                       str = &tpos->wh_str;
24091 +                       if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
24092 +                               memcpy(p, str->name, str->len);
24093 +                               wh_name.len = AUFS_WH_PFX_LEN + str->len;
24094 +                               err = unlink_wh_name(h_dentry, &wh_name, br);
24095 +                               if (!err)
24096 +                                       continue;
24097 +                               break;
24098 +                       }
24099 +                       AuIOErr("whiteout name too long %.*s\n",
24100 +                               str->len, str->name);
24101 +                       err = -EIO;
24102 +                       break;
24103 +               }
24104 +       }
24105 +       __putname(wh_name.name);
24106 +
24107 + out:
24108 +       return err;
24109 +}
24110 +
24111 +struct del_wh_children_args {
24112 +       int *errp;
24113 +       struct dentry *h_dentry;
24114 +       struct au_nhash *whlist;
24115 +       aufs_bindex_t bindex;
24116 +       struct au_branch *br;
24117 +};
24118 +
24119 +static void call_del_wh_children(void *args)
24120 +{
24121 +       struct del_wh_children_args *a = args;
24122 +       *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
24123 +}
24124 +
24125 +/* ---------------------------------------------------------------------- */
24126 +
24127 +struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
24128 +{
24129 +       struct au_whtmp_rmdir *whtmp;
24130 +       int err;
24131 +       unsigned int rdhash;
24132 +
24133 +       SiMustAnyLock(sb);
24134 +
24135 +       whtmp = kmalloc(sizeof(*whtmp), gfp);
24136 +       if (unlikely(!whtmp)) {
24137 +               whtmp = ERR_PTR(-ENOMEM);
24138 +               goto out;
24139 +       }
24140 +
24141 +       whtmp->dir = NULL;
24142 +       whtmp->wh_dentry = NULL;
24143 +       /* no estimation for dir size */
24144 +       rdhash = au_sbi(sb)->si_rdhash;
24145 +       if (!rdhash)
24146 +               rdhash = AUFS_RDHASH_DEF;
24147 +       err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
24148 +       if (unlikely(err)) {
24149 +               kfree(whtmp);
24150 +               whtmp = ERR_PTR(err);
24151 +       }
24152 +
24153 + out:
24154 +       return whtmp;
24155 +}
24156 +
24157 +void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
24158 +{
24159 +       dput(whtmp->wh_dentry);
24160 +       iput(whtmp->dir);
24161 +       au_nhash_wh_free(&whtmp->whlist);
24162 +       kfree(whtmp);
24163 +}
24164 +
24165 +/*
24166 + * rmdir the whiteouted temporary named dir @h_dentry.
24167 + * @whlist: whiteouted children.
24168 + */
24169 +int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
24170 +                  struct dentry *wh_dentry, struct au_nhash *whlist)
24171 +{
24172 +       int err;
24173 +       struct path h_tmp;
24174 +       struct inode *wh_inode, *h_dir;
24175 +       struct au_branch *br;
24176 +
24177 +       h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
24178 +       IMustLock(h_dir);
24179 +
24180 +       br = au_sbr(dir->i_sb, bindex);
24181 +       wh_inode = wh_dentry->d_inode;
24182 +       mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
24183 +
24184 +       /*
24185 +        * someone else might change some whiteouts while we were sleeping.
24186 +        * it means this whlist may have an obsoleted entry.
24187 +        */
24188 +       if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
24189 +               err = del_wh_children(wh_dentry, whlist, bindex, br);
24190 +       else {
24191 +               int wkq_err;
24192 +               struct del_wh_children_args args = {
24193 +                       .errp           = &err,
24194 +                       .h_dentry       = wh_dentry,
24195 +                       .whlist         = whlist,
24196 +                       .bindex         = bindex,
24197 +                       .br             = br
24198 +               };
24199 +
24200 +               wkq_err = au_wkq_wait(call_del_wh_children, &args);
24201 +               if (unlikely(wkq_err))
24202 +                       err = wkq_err;
24203 +       }
24204 +       mutex_unlock(&wh_inode->i_mutex);
24205 +
24206 +       if (!err) {
24207 +               h_tmp.dentry = wh_dentry;
24208 +               h_tmp.mnt = br->br_mnt;
24209 +               err = vfsub_rmdir(h_dir, &h_tmp);
24210 +               /* d_drop(h_dentry); */
24211 +       }
24212 +
24213 +       if (!err) {
24214 +               if (au_ibstart(dir) == bindex) {
24215 +                       au_cpup_attr_timesizes(dir);
24216 +                       drop_nlink(dir);
24217 +               }
24218 +               return 0; /* success */
24219 +       }
24220 +
24221 +       AuWarn("failed removing %.*s(%d), ignored\n",
24222 +              AuDLNPair(wh_dentry), err);
24223 +       return err;
24224 +}
24225 +
24226 +static void call_rmdir_whtmp(void *args)
24227 +{
24228 +       int err;
24229 +       struct au_whtmp_rmdir *a = args;
24230 +       struct super_block *sb;
24231 +       struct dentry *h_parent;
24232 +       struct inode *h_dir;
24233 +       struct au_branch *br;
24234 +       struct au_hinode *hdir;
24235 +
24236 +       /* rmdir by nfsd may cause deadlock with this i_mutex */
24237 +       /* mutex_lock(&a->dir->i_mutex); */
24238 +       sb = a->dir->i_sb;
24239 +       si_noflush_read_lock(sb);
24240 +       err = au_test_ro(sb, a->bindex, NULL);
24241 +       if (unlikely(err))
24242 +               goto out;
24243 +
24244 +       err = -EIO;
24245 +       br = au_sbr(sb, a->bindex);
24246 +       ii_write_lock_parent(a->dir);
24247 +       h_parent = dget_parent(a->wh_dentry);
24248 +       h_dir = h_parent->d_inode;
24249 +       hdir = au_hi(a->dir, a->bindex);
24250 +       au_hin_imtx_lock_nested(hdir, AuLsc_I_PARENT);
24251 +       err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent, br);
24252 +       if (!err) {
24253 +               err = mnt_want_write(br->br_mnt);
24254 +               if (!err) {
24255 +                       err = au_whtmp_rmdir(a->dir, a->bindex, a->wh_dentry,
24256 +                                            &a->whlist);
24257 +                       mnt_drop_write(br->br_mnt);
24258 +               }
24259 +       }
24260 +       au_hin_imtx_unlock(hdir);
24261 +       dput(h_parent);
24262 +       ii_write_unlock(a->dir);
24263 +
24264 + out:
24265 +       /* mutex_unlock(&a->dir->i_mutex); */
24266 +       au_nwt_done(&au_sbi(sb)->si_nowait);
24267 +       si_read_unlock(sb);
24268 +       au_whtmp_rmdir_free(a);
24269 +       if (unlikely(err))
24270 +               AuIOErr("err %d\n", err);
24271 +}
24272 +
24273 +void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
24274 +                        struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
24275 +{
24276 +       int wkq_err;
24277 +
24278 +       IMustLock(dir);
24279 +
24280 +       /* all post-process will be done in do_rmdir_whtmp(). */
24281 +       args->dir = au_igrab(dir);
24282 +       args->bindex = bindex;
24283 +       args->wh_dentry = dget(wh_dentry);
24284 +       wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, dir->i_sb);
24285 +       if (unlikely(wkq_err)) {
24286 +               AuWarn("rmdir error %.*s (%d), ignored\n",
24287 +                      AuDLNPair(wh_dentry), wkq_err);
24288 +               au_whtmp_rmdir_free(args);
24289 +       }
24290 +}
24291 diff -uprN -x .git linux-2.6.31/fs/aufs/whout.h aufs2-2.6.git/fs/aufs/whout.h
24292 --- linux-2.6.31/fs/aufs/whout.h        1970-01-01 00:00:00.000000000 +0000
24293 +++ aufs2-2.6.git/fs/aufs/whout.h       2009-09-21 21:49:23.414941217 +0000
24294 @@ -0,0 +1,87 @@
24295 +/*
24296 + * Copyright (C) 2005-2009 Junjiro R. Okajima
24297 + *
24298 + * This program, aufs is free software; you can redistribute it and/or modify
24299 + * it under the terms of the GNU General Public License as published by
24300 + * the Free Software Foundation; either version 2 of the License, or
24301 + * (at your option) any later version.
24302 + *
24303 + * This program is distributed in the hope that it will be useful,
24304 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
24305 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24306 + * GNU General Public License for more details.
24307 + *
24308 + * You should have received a copy of the GNU General Public License
24309 + * along with this program; if not, write to the Free Software
24310 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24311 + */
24312 +
24313 +/*
24314 + * whiteout for logical deletion and opaque directory
24315 + */
24316 +
24317 +#ifndef __AUFS_WHOUT_H__
24318 +#define __AUFS_WHOUT_H__
24319 +
24320 +#ifdef __KERNEL__
24321 +
24322 +#include <linux/aufs_type.h>
24323 +#include "dir.h"
24324 +
24325 +/* whout.c */
24326 +int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
24327 +struct au_branch;
24328 +int au_wh_test(struct dentry *h_parent, struct qstr *wh_name,
24329 +              struct au_branch *br, int try_sio);
24330 +int au_diropq_test(struct dentry *h_dentry, struct au_branch *br);
24331 +struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
24332 +                            struct qstr *prefix);
24333 +int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
24334 +int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
24335 +                       struct dentry *dentry);
24336 +int au_wh_init(struct dentry *h_parent, struct au_branch *br,
24337 +              struct super_block *sb);
24338 +
24339 +/* diropq flags */
24340 +#define AuDiropq_CREATE        1
24341 +#define au_ftest_diropq(flags, name)   ((flags) & AuDiropq_##name)
24342 +#define au_fset_diropq(flags, name)    { (flags) |= AuDiropq_##name; }
24343 +#define au_fclr_diropq(flags, name)    { (flags) &= ~AuDiropq_##name; }
24344 +
24345 +struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
24346 +                            unsigned int flags);
24347 +struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
24348 +                         struct au_branch *br);
24349 +struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
24350 +                           struct dentry *h_parent);
24351 +
24352 +/* real rmdir for the whiteout-ed dir */
24353 +struct au_whtmp_rmdir {
24354 +       struct inode *dir;
24355 +       aufs_bindex_t bindex;
24356 +       struct dentry *wh_dentry;
24357 +       struct au_nhash whlist;
24358 +};
24359 +
24360 +struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
24361 +void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
24362 +int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
24363 +                  struct dentry *wh_dentry, struct au_nhash *whlist);
24364 +void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
24365 +                        struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
24366 +
24367 +/* ---------------------------------------------------------------------- */
24368 +
24369 +static inline struct dentry *au_diropq_create(struct dentry *dentry,
24370 +                                             aufs_bindex_t bindex)
24371 +{
24372 +       return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
24373 +}
24374 +
24375 +static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
24376 +{
24377 +       return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
24378 +}
24379 +
24380 +#endif /* __KERNEL__ */
24381 +#endif /* __AUFS_WHOUT_H__ */
24382 diff -uprN -x .git linux-2.6.31/fs/aufs/wkq.c aufs2-2.6.git/fs/aufs/wkq.c
24383 --- linux-2.6.31/fs/aufs/wkq.c  1970-01-01 00:00:00.000000000 +0000
24384 +++ aufs2-2.6.git/fs/aufs/wkq.c 2009-09-21 21:49:23.414941217 +0000
24385 @@ -0,0 +1,259 @@
24386 +/*
24387 + * Copyright (C) 2005-2009 Junjiro R. Okajima
24388 + *
24389 + * This program, aufs is free software; you can redistribute it and/or modify
24390 + * it under the terms of the GNU General Public License as published by
24391 + * the Free Software Foundation; either version 2 of the License, or
24392 + * (at your option) any later version.
24393 + *
24394 + * This program is distributed in the hope that it will be useful,
24395 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
24396 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24397 + * GNU General Public License for more details.
24398 + *
24399 + * You should have received a copy of the GNU General Public License
24400 + * along with this program; if not, write to the Free Software
24401 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24402 + */
24403 +
24404 +/*
24405 + * workqueue for asynchronous/super-io operations
24406 + * todo: try new dredential scheme
24407 + */
24408 +
24409 +#include <linux/module.h>
24410 +#include "aufs.h"
24411 +
24412 +/* internal workqueue named AUFS_WKQ_NAME */
24413 +static struct au_wkq {
24414 +       struct workqueue_struct *q;
24415 +
24416 +       /* balancing */
24417 +       atomic_t                busy;
24418 +} *au_wkq;
24419 +
24420 +struct au_wkinfo {
24421 +       struct work_struct wk;
24422 +       struct super_block *sb;
24423 +
24424 +       unsigned int flags; /* see wkq.h */
24425 +
24426 +       au_wkq_func_t func;
24427 +       void *args;
24428 +
24429 +       atomic_t *busyp;
24430 +       struct completion *comp;
24431 +};
24432 +
24433 +/* ---------------------------------------------------------------------- */
24434 +
24435 +static int enqueue(struct au_wkq *wkq, struct au_wkinfo *wkinfo)
24436 +{
24437 +       wkinfo->busyp = &wkq->busy;
24438 +       if (au_ftest_wkq(wkinfo->flags, WAIT))
24439 +               return !queue_work(wkq->q, &wkinfo->wk);
24440 +       else
24441 +               return !schedule_work(&wkinfo->wk);
24442 +}
24443 +
24444 +static void do_wkq(struct au_wkinfo *wkinfo)
24445 +{
24446 +       unsigned int idle, n;
24447 +       int i, idle_idx;
24448 +
24449 +       while (1) {
24450 +               if (au_ftest_wkq(wkinfo->flags, WAIT)) {
24451 +                       idle_idx = 0;
24452 +                       idle = UINT_MAX;
24453 +                       for (i = 0; i < aufs_nwkq; i++) {
24454 +                               n = atomic_inc_return(&au_wkq[i].busy);
24455 +                               if (n == 1 && !enqueue(au_wkq + i, wkinfo))
24456 +                                       return; /* success */
24457 +
24458 +                               if (n < idle) {
24459 +                                       idle_idx = i;
24460 +                                       idle = n;
24461 +                               }
24462 +                               atomic_dec(&au_wkq[i].busy);
24463 +                       }
24464 +               } else
24465 +                       idle_idx = aufs_nwkq;
24466 +
24467 +               atomic_inc(&au_wkq[idle_idx].busy);
24468 +               if (!enqueue(au_wkq + idle_idx, wkinfo))
24469 +                       return; /* success */
24470 +
24471 +               /* impossible? */
24472 +               AuWarn1("failed to queue_work()\n");
24473 +               yield();
24474 +       }
24475 +}
24476 +
24477 +static void wkq_func(struct work_struct *wk)
24478 +{
24479 +       struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
24480 +
24481 +       wkinfo->func(wkinfo->args);
24482 +       atomic_dec_return(wkinfo->busyp);
24483 +       if (au_ftest_wkq(wkinfo->flags, WAIT))
24484 +               complete(wkinfo->comp);
24485 +       else {
24486 +               kobject_put(&au_sbi(wkinfo->sb)->si_kobj);
24487 +               module_put(THIS_MODULE);
24488 +               kfree(wkinfo);
24489 +       }
24490 +}
24491 +
24492 +/*
24493 + * Since struct completion is large, try allocating it dynamically.
24494 + */
24495 +#if defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS)
24496 +#define AuWkqCompDeclare(name) struct completion *comp = NULL
24497 +
24498 +static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
24499 +{
24500 +       *comp = kmalloc(sizeof(**comp), GFP_NOFS);
24501 +       if (*comp) {
24502 +               init_completion(*comp);
24503 +               wkinfo->comp = *comp;
24504 +               return 0;
24505 +       }
24506 +       return -ENOMEM;
24507 +}
24508 +
24509 +static void au_wkq_comp_free(struct completion *comp)
24510 +{
24511 +       kfree(comp);
24512 +}
24513 +
24514 +#else
24515 +
24516 +/* no braces */
24517 +#define AuWkqCompDeclare(name) \
24518 +       DECLARE_COMPLETION_ONSTACK(_ ## name); \
24519 +       struct completion *comp = &_ ## name
24520 +
24521 +static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
24522 +{
24523 +       wkinfo->comp = *comp;
24524 +       return 0;
24525 +}
24526 +
24527 +static void au_wkq_comp_free(struct completion *comp __maybe_unused)
24528 +{
24529 +       /* empty */
24530 +}
24531 +#endif /* 4KSTACKS */
24532 +
24533 +static void au_wkq_run(struct au_wkinfo *wkinfo)
24534 +{
24535 +       au_dbg_verify_kthread();
24536 +       INIT_WORK(&wkinfo->wk, wkq_func);
24537 +       do_wkq(wkinfo);
24538 +}
24539 +
24540 +int au_wkq_wait(au_wkq_func_t func, void *args)
24541 +{
24542 +       int err;
24543 +       AuWkqCompDeclare(comp);
24544 +       struct au_wkinfo wkinfo = {
24545 +               .flags  = AuWkq_WAIT,
24546 +               .func   = func,
24547 +               .args   = args
24548 +       };
24549 +
24550 +       err = au_wkq_comp_alloc(&wkinfo, &comp);
24551 +       if (!err) {
24552 +               au_wkq_run(&wkinfo);
24553 +               /* no timeout, no interrupt */
24554 +               wait_for_completion(wkinfo.comp);
24555 +               au_wkq_comp_free(comp);
24556 +       }
24557 +
24558 +       return err;
24559 +
24560 +}
24561 +
24562 +int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb)
24563 +{
24564 +       int err;
24565 +       struct au_wkinfo *wkinfo;
24566 +
24567 +       atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
24568 +
24569 +       /*
24570 +        * wkq_func() must free this wkinfo.
24571 +        * it highly depends upon the implementation of workqueue.
24572 +        */
24573 +       err = 0;
24574 +       wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
24575 +       if (wkinfo) {
24576 +               wkinfo->sb = sb;
24577 +               wkinfo->flags = !AuWkq_WAIT;
24578 +               wkinfo->func = func;
24579 +               wkinfo->args = args;
24580 +               wkinfo->comp = NULL;
24581 +               kobject_get(&au_sbi(sb)->si_kobj);
24582 +               __module_get(THIS_MODULE);
24583 +
24584 +               au_wkq_run(wkinfo);
24585 +       } else {
24586 +               err = -ENOMEM;
24587 +               atomic_dec(&au_sbi(sb)->si_nowait.nw_len);
24588 +       }
24589 +
24590 +       return err;
24591 +}
24592 +
24593 +/* ---------------------------------------------------------------------- */
24594 +
24595 +void au_nwt_init(struct au_nowait_tasks *nwt)
24596 +{
24597 +       atomic_set(&nwt->nw_len, 0);
24598 +       /* smp_mb();*/ /* atomic_set */
24599 +       init_waitqueue_head(&nwt->nw_wq);
24600 +}
24601 +
24602 +void au_wkq_fin(void)
24603 +{
24604 +       int i;
24605 +
24606 +       for (i = 0; i < aufs_nwkq; i++)
24607 +               if (au_wkq[i].q && !IS_ERR(au_wkq[i].q))
24608 +                       destroy_workqueue(au_wkq[i].q);
24609 +       kfree(au_wkq);
24610 +}
24611 +
24612 +int __init au_wkq_init(void)
24613 +{
24614 +       int err, i;
24615 +       struct au_wkq *nowaitq;
24616 +
24617 +       /* '+1' is for accounting of nowait queue */
24618 +       err = -ENOMEM;
24619 +       au_wkq = kcalloc(aufs_nwkq + 1, sizeof(*au_wkq), GFP_NOFS);
24620 +       if (unlikely(!au_wkq))
24621 +               goto out;
24622 +
24623 +       err = 0;
24624 +       for (i = 0; i < aufs_nwkq; i++) {
24625 +               au_wkq[i].q = create_singlethread_workqueue(AUFS_WKQ_NAME);
24626 +               if (au_wkq[i].q && !IS_ERR(au_wkq[i].q)) {
24627 +                       atomic_set(&au_wkq[i].busy, 0);
24628 +                       continue;
24629 +               }
24630 +
24631 +               err = PTR_ERR(au_wkq[i].q);
24632 +               au_wkq_fin();
24633 +               goto out;
24634 +       }
24635 +
24636 +       /* nowait accounting */
24637 +       nowaitq = au_wkq + aufs_nwkq;
24638 +       atomic_set(&nowaitq->busy, 0);
24639 +       nowaitq->q = NULL;
24640 +       /* smp_mb(); */ /* atomic_set */
24641 +
24642 + out:
24643 +       return err;
24644 +}
24645 diff -uprN -x .git linux-2.6.31/fs/aufs/wkq.h aufs2-2.6.git/fs/aufs/wkq.h
24646 --- linux-2.6.31/fs/aufs/wkq.h  1970-01-01 00:00:00.000000000 +0000
24647 +++ aufs2-2.6.git/fs/aufs/wkq.h 2009-09-21 21:49:23.414941217 +0000
24648 @@ -0,0 +1,82 @@
24649 +/*
24650 + * Copyright (C) 2005-2009 Junjiro R. Okajima
24651 + *
24652 + * This program, aufs is free software; you can redistribute it and/or modify
24653 + * it under the terms of the GNU General Public License as published by
24654 + * the Free Software Foundation; either version 2 of the License, or
24655 + * (at your option) any later version.
24656 + *
24657 + * This program is distributed in the hope that it will be useful,
24658 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
24659 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24660 + * GNU General Public License for more details.
24661 + *
24662 + * You should have received a copy of the GNU General Public License
24663 + * along with this program; if not, write to the Free Software
24664 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24665 + */
24666 +
24667 +/*
24668 + * workqueue for asynchronous/super-io operations
24669 + * todo: try new credentials management scheme
24670 + */
24671 +
24672 +#ifndef __AUFS_WKQ_H__
24673 +#define __AUFS_WKQ_H__
24674 +
24675 +#ifdef __KERNEL__
24676 +
24677 +#include <linux/sched.h>
24678 +#include <linux/wait.h>
24679 +#include <linux/aufs_type.h>
24680 +
24681 +struct super_block;
24682 +
24683 +/* ---------------------------------------------------------------------- */
24684 +
24685 +/*
24686 + * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
24687 + */
24688 +struct au_nowait_tasks {
24689 +       atomic_t                nw_len;
24690 +       wait_queue_head_t       nw_wq;
24691 +};
24692 +
24693 +/* ---------------------------------------------------------------------- */
24694 +
24695 +typedef void (*au_wkq_func_t)(void *args);
24696 +
24697 +/* wkq flags */
24698 +#define AuWkq_WAIT     1
24699 +#define au_ftest_wkq(flags, name)      ((flags) & AuWkq_##name)
24700 +#define au_fset_wkq(flags, name)       { (flags) |= AuWkq_##name; }
24701 +#define au_fclr_wkq(flags, name)       { (flags) &= ~AuWkq_##name; }
24702 +
24703 +/* wkq.c */
24704 +int au_wkq_wait(au_wkq_func_t func, void *args);
24705 +int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb);
24706 +void au_nwt_init(struct au_nowait_tasks *nwt);
24707 +int __init au_wkq_init(void);
24708 +void au_wkq_fin(void);
24709 +
24710 +/* ---------------------------------------------------------------------- */
24711 +
24712 +static inline int au_test_wkq(struct task_struct *tsk)
24713 +{
24714 +       return !tsk->mm && !strcmp(tsk->comm, AUFS_WKQ_NAME);
24715 +}
24716 +
24717 +static inline void au_nwt_done(struct au_nowait_tasks *nwt)
24718 +{
24719 +       if (!atomic_dec_return(&nwt->nw_len))
24720 +               wake_up_all(&nwt->nw_wq);
24721 +}
24722 +
24723 +static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
24724 +{
24725 +       wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
24726 +       return 0;
24727 +}
24728 +
24729 +#endif /* __KERNEL__ */
24730 +#endif /* __AUFS_WKQ_H__ */
24731 diff -uprN -x .git linux-2.6.31/fs/aufs/xino.c aufs2-2.6.git/fs/aufs/xino.c
24732 --- linux-2.6.31/fs/aufs/xino.c 1970-01-01 00:00:00.000000000 +0000
24733 +++ aufs2-2.6.git/fs/aufs/xino.c        2009-09-21 21:49:23.414941217 +0000
24734 @@ -0,0 +1,1203 @@
24735 +/*
24736 + * Copyright (C) 2005-2009 Junjiro R. Okajima
24737 + *
24738 + * This program, aufs is free software; you can redistribute it and/or modify
24739 + * it under the terms of the GNU General Public License as published by
24740 + * the Free Software Foundation; either version 2 of the License, or
24741 + * (at your option) any later version.
24742 + *
24743 + * This program is distributed in the hope that it will be useful,
24744 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
24745 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24746 + * GNU General Public License for more details.
24747 + *
24748 + * You should have received a copy of the GNU General Public License
24749 + * along with this program; if not, write to the Free Software
24750 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24751 + */
24752 +
24753 +/*
24754 + * external inode number translation table and bitmap
24755 + */
24756 +
24757 +#include <linux/file.h>
24758 +#include <linux/seq_file.h>
24759 +#include <linux/uaccess.h>
24760 +#include "aufs.h"
24761 +
24762 +ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
24763 +                  loff_t *pos)
24764 +{
24765 +       ssize_t err;
24766 +       mm_segment_t oldfs;
24767 +
24768 +       oldfs = get_fs();
24769 +       set_fs(KERNEL_DS);
24770 +       do {
24771 +               /* todo: signal_pending? */
24772 +               err = func(file, (char __user *)buf, size, pos);
24773 +       } while (err == -EAGAIN || err == -EINTR);
24774 +       set_fs(oldfs);
24775 +
24776 +#if 0 /* reserved for future use */
24777 +       if (err > 0)
24778 +               fsnotify_access(file->f_dentry);
24779 +#endif
24780 +
24781 +       return err;
24782 +}
24783 +
24784 +/* ---------------------------------------------------------------------- */
24785 +
24786 +static ssize_t do_xino_fwrite(au_writef_t func, struct file *file, void *buf,
24787 +                             size_t size, loff_t *pos)
24788 +{
24789 +       ssize_t err;
24790 +       mm_segment_t oldfs;
24791 +
24792 +       oldfs = get_fs();
24793 +       set_fs(KERNEL_DS);
24794 +       lockdep_off();
24795 +       do {
24796 +               /* todo: signal_pending? */
24797 +               err = func(file, (const char __user *)buf, size, pos);
24798 +       } while (err == -EAGAIN || err == -EINTR);
24799 +       lockdep_on();
24800 +       set_fs(oldfs);
24801 +
24802 +#if 0 /* reserved for future use */
24803 +       if (err > 0)
24804 +               fsnotify_modify(file->f_dentry);
24805 +#endif
24806 +
24807 +       return err;
24808 +}
24809 +
24810 +struct do_xino_fwrite_args {
24811 +       ssize_t *errp;
24812 +       au_writef_t func;
24813 +       struct file *file;
24814 +       void *buf;
24815 +       size_t size;
24816 +       loff_t *pos;
24817 +};
24818 +
24819 +static void call_do_xino_fwrite(void *args)
24820 +{
24821 +       struct do_xino_fwrite_args *a = args;
24822 +       *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
24823 +}
24824 +
24825 +ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
24826 +                   loff_t *pos)
24827 +{
24828 +       ssize_t err;
24829 +
24830 +       /* todo: signal block and no wkq? */
24831 +       /* todo: new credential scheme */
24832 +       /*
24833 +        * it breaks RLIMIT_FSIZE and normal user's limit,
24834 +        * users should care about quota and real 'filesystem full.'
24835 +        */
24836 +       if (!au_test_wkq(current)) {
24837 +               int wkq_err;
24838 +               struct do_xino_fwrite_args args = {
24839 +                       .errp   = &err,
24840 +                       .func   = func,
24841 +                       .file   = file,
24842 +                       .buf    = buf,
24843 +                       .size   = size,
24844 +                       .pos    = pos
24845 +               };
24846 +
24847 +               wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
24848 +               if (unlikely(wkq_err))
24849 +                       err = wkq_err;
24850 +       } else
24851 +               err = do_xino_fwrite(func, file, buf, size, pos);
24852 +
24853 +       return err;
24854 +}
24855 +
24856 +/* ---------------------------------------------------------------------- */
24857 +
24858 +/*
24859 + * create a new xinofile at the same place/path as @base_file.
24860 + */
24861 +struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
24862 +{
24863 +       struct file *file;
24864 +       struct dentry *base, *dentry, *parent;
24865 +       struct inode *dir;
24866 +       struct qstr *name;
24867 +       int err;
24868 +       struct path path;
24869 +
24870 +       base = base_file->f_dentry;
24871 +       parent = base->d_parent; /* dir inode is locked */
24872 +       dir = parent->d_inode;
24873 +       IMustLock(dir);
24874 +
24875 +       file = ERR_PTR(-EINVAL);
24876 +       name = &base->d_name;
24877 +       dentry = vfsub_lookup_one_len(name->name, parent, name->len);
24878 +       if (IS_ERR(dentry)) {
24879 +               file = (void *)dentry;
24880 +               AuErr("%.*s lookup err %ld\n", AuLNPair(name), PTR_ERR(dentry));
24881 +               goto out;
24882 +       }
24883 +
24884 +       /* no need to mnt_want_write() since we call dentry_open() later */
24885 +       err = vfs_create(dir, dentry, S_IRUGO | S_IWUGO, NULL);
24886 +       if (unlikely(err)) {
24887 +               file = ERR_PTR(err);
24888 +               AuErr("%.*s create err %d\n", AuLNPair(name), err);
24889 +               goto out_dput;
24890 +       }
24891 +
24892 +       path.dentry = dentry;
24893 +       path.mnt = base_file->f_vfsmnt;
24894 +       path_get(&path);
24895 +       file = vfsub_dentry_open(&path, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE,
24896 +                                current_cred());
24897 +       if (IS_ERR(file)) {
24898 +               AuErr("%.*s open err %ld\n", AuLNPair(name), PTR_ERR(file));
24899 +               goto out_dput;
24900 +       }
24901 +
24902 +       err = vfsub_unlink(dir, &file->f_path, /*force*/0);
24903 +       if (unlikely(err)) {
24904 +               AuErr("%.*s unlink err %d\n", AuLNPair(name), err);
24905 +               goto out_fput;
24906 +       }
24907 +
24908 +       if (copy_src) {
24909 +               /* no one can touch copy_src xino */
24910 +               err = au_copy_file(file, copy_src,
24911 +                                  i_size_read(copy_src->f_dentry->d_inode));
24912 +               if (unlikely(err)) {
24913 +                       AuErr("%.*s copy err %d\n", AuLNPair(name), err);
24914 +                       goto out_fput;
24915 +               }
24916 +       }
24917 +       goto out_dput; /* success */
24918 +
24919 + out_fput:
24920 +       fput(file);
24921 +       file = ERR_PTR(err);
24922 + out_dput:
24923 +       dput(dentry);
24924 + out:
24925 +       return file;
24926 +}
24927 +
24928 +struct au_xino_lock_dir {
24929 +       struct au_hinode *hdir;
24930 +       struct dentry *parent;
24931 +       struct mutex *mtx;
24932 +};
24933 +
24934 +static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
24935 +                            struct au_xino_lock_dir *ldir)
24936 +{
24937 +       aufs_bindex_t brid, bindex;
24938 +
24939 +       ldir->hdir = NULL;
24940 +       bindex = -1;
24941 +       brid = au_xino_brid(sb);
24942 +       if (brid >= 0)
24943 +               bindex = au_br_index(sb, brid);
24944 +       if (bindex >= 0) {
24945 +               ldir->hdir = au_hi(sb->s_root->d_inode, bindex);
24946 +               au_hin_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
24947 +       } else {
24948 +               ldir->parent = dget_parent(xino->f_dentry);
24949 +               ldir->mtx = &ldir->parent->d_inode->i_mutex;
24950 +               mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
24951 +       }
24952 +}
24953 +
24954 +static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
24955 +{
24956 +       if (ldir->hdir)
24957 +               au_hin_imtx_unlock(ldir->hdir);
24958 +       else {
24959 +               mutex_unlock(ldir->mtx);
24960 +               dput(ldir->parent);
24961 +       }
24962 +}
24963 +
24964 +/* ---------------------------------------------------------------------- */
24965 +
24966 +/* trucate xino files asynchronously */
24967 +
24968 +int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
24969 +{
24970 +       int err;
24971 +       aufs_bindex_t bi, bend;
24972 +       struct au_branch *br;
24973 +       struct file *new_xino, *file;
24974 +       struct super_block *h_sb;
24975 +       struct au_xino_lock_dir ldir;
24976 +
24977 +       err = -EINVAL;
24978 +       bend = au_sbend(sb);
24979 +       if (unlikely(bindex < 0 || bend < bindex))
24980 +               goto out;
24981 +       br = au_sbr(sb, bindex);
24982 +       file = br->br_xino.xi_file;
24983 +       if (!file)
24984 +               goto out;
24985 +
24986 +       au_xino_lock_dir(sb, file, &ldir);
24987 +       /* mnt_want_write() is unnecessary here */
24988 +       new_xino = au_xino_create2(file, file);
24989 +       au_xino_unlock_dir(&ldir);
24990 +       err = PTR_ERR(new_xino);
24991 +       if (IS_ERR(new_xino))
24992 +               goto out;
24993 +       err = 0;
24994 +       fput(file);
24995 +       br->br_xino.xi_file = new_xino;
24996 +
24997 +       h_sb = br->br_mnt->mnt_sb;
24998 +       for (bi = 0; bi <= bend; bi++) {
24999 +               if (unlikely(bi == bindex))
25000 +                       continue;
25001 +               br = au_sbr(sb, bi);
25002 +               if (br->br_mnt->mnt_sb != h_sb)
25003 +                       continue;
25004 +
25005 +               fput(br->br_xino.xi_file);
25006 +               br->br_xino.xi_file = new_xino;
25007 +               get_file(new_xino);
25008 +       }
25009 +
25010 + out:
25011 +       return err;
25012 +}
25013 +
25014 +struct xino_do_trunc_args {
25015 +       struct super_block *sb;
25016 +       struct au_branch *br;
25017 +};
25018 +
25019 +static void xino_do_trunc(void *_args)
25020 +{
25021 +       struct xino_do_trunc_args *args = _args;
25022 +       struct super_block *sb;
25023 +       struct au_branch *br;
25024 +       struct inode *dir;
25025 +       int err;
25026 +       aufs_bindex_t bindex;
25027 +
25028 +       err = 0;
25029 +       sb = args->sb;
25030 +       dir = sb->s_root->d_inode;
25031 +       br = args->br;
25032 +
25033 +       si_noflush_write_lock(sb);
25034 +       ii_read_lock_parent(dir);
25035 +       bindex = au_br_index(sb, br->br_id);
25036 +       err = au_xino_trunc(sb, bindex);
25037 +       if (!err
25038 +           && br->br_xino.xi_file->f_dentry->d_inode->i_blocks
25039 +           >= br->br_xino_upper)
25040 +               br->br_xino_upper += AUFS_XINO_TRUNC_STEP;
25041 +
25042 +       ii_read_unlock(dir);
25043 +       if (unlikely(err))
25044 +               AuWarn("err b%d, (%d)\n", bindex, err);
25045 +       atomic_dec(&br->br_xino_running);
25046 +       atomic_dec(&br->br_count);
25047 +       au_nwt_done(&au_sbi(sb)->si_nowait);
25048 +       si_write_unlock(sb);
25049 +       kfree(args);
25050 +}
25051 +
25052 +static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
25053 +{
25054 +       struct xino_do_trunc_args *args;
25055 +       int wkq_err;
25056 +
25057 +       if (br->br_xino.xi_file->f_dentry->d_inode->i_blocks
25058 +           < br->br_xino_upper)
25059 +               return;
25060 +
25061 +       if (atomic_inc_return(&br->br_xino_running) > 1)
25062 +               goto out;
25063 +
25064 +       /* lock and kfree() will be called in trunc_xino() */
25065 +       args = kmalloc(sizeof(*args), GFP_NOFS);
25066 +       if (unlikely(!args)) {
25067 +               AuErr1("no memory\n");
25068 +               goto out_args;
25069 +       }
25070 +
25071 +       atomic_inc_return(&br->br_count);
25072 +       args->sb = sb;
25073 +       args->br = br;
25074 +       wkq_err = au_wkq_nowait(xino_do_trunc, args, sb);
25075 +       if (!wkq_err)
25076 +               return; /* success */
25077 +
25078 +       AuErr("wkq %d\n", wkq_err);
25079 +       atomic_dec_return(&br->br_count);
25080 +
25081 + out_args:
25082 +       kfree(args);
25083 + out:
25084 +       atomic_dec_return(&br->br_xino_running);
25085 +}
25086 +
25087 +/* ---------------------------------------------------------------------- */
25088 +
25089 +static int au_xino_do_write(au_writef_t write, struct file *file,
25090 +                           ino_t h_ino, ino_t ino)
25091 +{
25092 +       loff_t pos;
25093 +       ssize_t sz;
25094 +
25095 +       pos = h_ino;
25096 +       if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
25097 +               AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
25098 +               return -EFBIG;
25099 +       }
25100 +       pos *= sizeof(ino);
25101 +       sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
25102 +       if (sz == sizeof(ino))
25103 +               return 0; /* success */
25104 +
25105 +       AuIOErr("write failed (%zd)\n", sz);
25106 +       return -EIO;
25107 +}
25108 +
25109 +/*
25110 + * write @ino to the xinofile for the specified branch{@sb, @bindex}
25111 + * at the position of @h_ino.
25112 + * even if @ino is zero, it is written to the xinofile and means no entry.
25113 + * if the size of the xino file on a specific filesystem exceeds the watermark,
25114 + * try truncating it.
25115 + */
25116 +int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
25117 +                 ino_t ino)
25118 +{
25119 +       int err;
25120 +       unsigned int mnt_flags;
25121 +       struct au_branch *br;
25122 +
25123 +       BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
25124 +                    || ((loff_t)-1) > 0);
25125 +       SiMustAnyLock(sb);
25126 +
25127 +       mnt_flags = au_mntflags(sb);
25128 +       if (!au_opt_test(mnt_flags, XINO))
25129 +               return 0;
25130 +
25131 +       br = au_sbr(sb, bindex);
25132 +       err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
25133 +                              h_ino, ino);
25134 +       if (!err) {
25135 +               if (au_opt_test(mnt_flags, TRUNC_XINO)
25136 +                   && au_test_fs_trunc_xino(br->br_mnt->mnt_sb))
25137 +                       xino_try_trunc(sb, br);
25138 +               return 0; /* success */
25139 +       }
25140 +
25141 +       AuIOErr("write failed (%d)\n", err);
25142 +       return -EIO;
25143 +}
25144 +
25145 +/* ---------------------------------------------------------------------- */
25146 +
25147 +/* aufs inode number bitmap */
25148 +
25149 +static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
25150 +static ino_t xib_calc_ino(unsigned long pindex, int bit)
25151 +{
25152 +       ino_t ino;
25153 +
25154 +       AuDebugOn(bit < 0 || page_bits <= bit);
25155 +       ino = AUFS_FIRST_INO + pindex * page_bits + bit;
25156 +       return ino;
25157 +}
25158 +
25159 +static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
25160 +{
25161 +       AuDebugOn(ino < AUFS_FIRST_INO);
25162 +       ino -= AUFS_FIRST_INO;
25163 +       *pindex = ino / page_bits;
25164 +       *bit = ino % page_bits;
25165 +}
25166 +
25167 +static int xib_pindex(struct super_block *sb, unsigned long pindex)
25168 +{
25169 +       int err;
25170 +       loff_t pos;
25171 +       ssize_t sz;
25172 +       struct au_sbinfo *sbinfo;
25173 +       struct file *xib;
25174 +       unsigned long *p;
25175 +
25176 +       sbinfo = au_sbi(sb);
25177 +       MtxMustLock(&sbinfo->si_xib_mtx);
25178 +       AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
25179 +                 || !au_opt_test(sbinfo->si_mntflags, XINO));
25180 +
25181 +       if (pindex == sbinfo->si_xib_last_pindex)
25182 +               return 0;
25183 +
25184 +       xib = sbinfo->si_xib;
25185 +       p = sbinfo->si_xib_buf;
25186 +       pos = sbinfo->si_xib_last_pindex;
25187 +       pos *= PAGE_SIZE;
25188 +       sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
25189 +       if (unlikely(sz != PAGE_SIZE))
25190 +               goto out;
25191 +
25192 +       pos = pindex;
25193 +       pos *= PAGE_SIZE;
25194 +       if (i_size_read(xib->f_dentry->d_inode) >= pos + PAGE_SIZE)
25195 +               sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
25196 +       else {
25197 +               memset(p, 0, PAGE_SIZE);
25198 +               sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
25199 +       }
25200 +       if (sz == PAGE_SIZE) {
25201 +               sbinfo->si_xib_last_pindex = pindex;
25202 +               return 0; /* success */
25203 +       }
25204 +
25205 + out:
25206 +       AuIOErr1("write failed (%zd)\n", sz);
25207 +       err = sz;
25208 +       if (sz >= 0)
25209 +               err = -EIO;
25210 +       return err;
25211 +}
25212 +
25213 +/* ---------------------------------------------------------------------- */
25214 +
25215 +int au_xino_write0(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
25216 +                  ino_t ino)
25217 +{
25218 +       int err, bit;
25219 +       unsigned long pindex;
25220 +       struct au_sbinfo *sbinfo;
25221 +
25222 +       if (!au_opt_test(au_mntflags(sb), XINO))
25223 +               return 0;
25224 +
25225 +       err = 0;
25226 +       if (ino) {
25227 +               sbinfo = au_sbi(sb);
25228 +               xib_calc_bit(ino, &pindex, &bit);
25229 +               AuDebugOn(page_bits <= bit);
25230 +               mutex_lock(&sbinfo->si_xib_mtx);
25231 +               err = xib_pindex(sb, pindex);
25232 +               if (!err) {
25233 +                       clear_bit(bit, sbinfo->si_xib_buf);
25234 +                       sbinfo->si_xib_next_bit = bit;
25235 +               }
25236 +               mutex_unlock(&sbinfo->si_xib_mtx);
25237 +       }
25238 +
25239 +       if (!err)
25240 +               err = au_xino_write(sb, bindex, h_ino, 0);
25241 +       return err;
25242 +}
25243 +
25244 +/* get an unused inode number from bitmap */
25245 +ino_t au_xino_new_ino(struct super_block *sb)
25246 +{
25247 +       ino_t ino;
25248 +       unsigned long *p, pindex, ul, pend;
25249 +       struct au_sbinfo *sbinfo;
25250 +       struct file *file;
25251 +       int free_bit, err;
25252 +
25253 +       if (!au_opt_test(au_mntflags(sb), XINO))
25254 +               return iunique(sb, AUFS_FIRST_INO);
25255 +
25256 +       sbinfo = au_sbi(sb);
25257 +       mutex_lock(&sbinfo->si_xib_mtx);
25258 +       p = sbinfo->si_xib_buf;
25259 +       free_bit = sbinfo->si_xib_next_bit;
25260 +       if (free_bit < page_bits && !test_bit(free_bit, p))
25261 +               goto out; /* success */
25262 +       free_bit = find_first_zero_bit(p, page_bits);
25263 +       if (free_bit < page_bits)
25264 +               goto out; /* success */
25265 +
25266 +       pindex = sbinfo->si_xib_last_pindex;
25267 +       for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
25268 +               err = xib_pindex(sb, ul);
25269 +               if (unlikely(err))
25270 +                       goto out_err;
25271 +               free_bit = find_first_zero_bit(p, page_bits);
25272 +               if (free_bit < page_bits)
25273 +                       goto out; /* success */
25274 +       }
25275 +
25276 +       file = sbinfo->si_xib;
25277 +       pend = i_size_read(file->f_dentry->d_inode) / PAGE_SIZE;
25278 +       for (ul = pindex + 1; ul <= pend; ul++) {
25279 +               err = xib_pindex(sb, ul);
25280 +               if (unlikely(err))
25281 +                       goto out_err;
25282 +               free_bit = find_first_zero_bit(p, page_bits);
25283 +               if (free_bit < page_bits)
25284 +                       goto out; /* success */
25285 +       }
25286 +       BUG();
25287 +
25288 + out:
25289 +       set_bit(free_bit, p);
25290 +       sbinfo->si_xib_next_bit++;
25291 +       pindex = sbinfo->si_xib_last_pindex;
25292 +       mutex_unlock(&sbinfo->si_xib_mtx);
25293 +       ino = xib_calc_ino(pindex, free_bit);
25294 +       AuDbg("i%lu\n", (unsigned long)ino);
25295 +       return ino;
25296 + out_err:
25297 +       mutex_unlock(&sbinfo->si_xib_mtx);
25298 +       AuDbg("i0\n");
25299 +       return 0;
25300 +}
25301 +
25302 +/*
25303 + * read @ino from xinofile for the specified branch{@sb, @bindex}
25304 + * at the position of @h_ino.
25305 + * if @ino does not exist and @do_new is true, get new one.
25306 + */
25307 +int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
25308 +                ino_t *ino)
25309 +{
25310 +       int err;
25311 +       ssize_t sz;
25312 +       loff_t pos;
25313 +       struct file *file;
25314 +       struct au_sbinfo *sbinfo;
25315 +
25316 +       *ino = 0;
25317 +       if (!au_opt_test(au_mntflags(sb), XINO))
25318 +               return 0; /* no xino */
25319 +
25320 +       err = 0;
25321 +       sbinfo = au_sbi(sb);
25322 +       pos = h_ino;
25323 +       if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
25324 +               AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
25325 +               return -EFBIG;
25326 +       }
25327 +       pos *= sizeof(*ino);
25328 +
25329 +       file = au_sbr(sb, bindex)->br_xino.xi_file;
25330 +       if (i_size_read(file->f_dentry->d_inode) < pos + sizeof(*ino))
25331 +               return 0; /* no ino */
25332 +
25333 +       sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
25334 +       if (sz == sizeof(*ino))
25335 +               return 0; /* success */
25336 +
25337 +       err = sz;
25338 +       if (unlikely(sz >= 0)) {
25339 +               err = -EIO;
25340 +               AuIOErr("xino read error (%zd)\n", sz);
25341 +       }
25342 +
25343 +       return err;
25344 +}
25345 +
25346 +/* ---------------------------------------------------------------------- */
25347 +
25348 +/* create and set a new xino file */
25349 +
25350 +struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
25351 +{
25352 +       struct file *file;
25353 +       struct dentry *h_parent, *d;
25354 +       struct inode *h_dir;
25355 +       int err;
25356 +
25357 +       /*
25358 +        * at mount-time, and the xino file is the default path,
25359 +        * hinotify is disabled so we have no inotify events to ignore.
25360 +        * when a user specified the xino, we cannot get au_hdir to be ignored.
25361 +        */
25362 +       file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE,
25363 +                              S_IRUGO | S_IWUGO);
25364 +       if (IS_ERR(file)) {
25365 +               if (!silent)
25366 +                       AuErr("open %s(%ld)\n", fname, PTR_ERR(file));
25367 +               return file;
25368 +       }
25369 +
25370 +       /* keep file count */
25371 +       h_parent = dget_parent(file->f_dentry);
25372 +       h_dir = h_parent->d_inode;
25373 +       mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
25374 +       /* mnt_want_write() is unnecessary here */
25375 +       err = vfsub_unlink(h_dir, &file->f_path, /*force*/0);
25376 +       mutex_unlock(&h_dir->i_mutex);
25377 +       dput(h_parent);
25378 +       if (unlikely(err)) {
25379 +               if (!silent)
25380 +                       AuErr("unlink %s(%d)\n", fname, err);
25381 +               goto out;
25382 +       }
25383 +
25384 +       err = -EINVAL;
25385 +       d = file->f_dentry;
25386 +       if (unlikely(sb == d->d_sb)) {
25387 +               if (!silent)
25388 +                       AuErr("%s must be outside\n", fname);
25389 +               goto out;
25390 +       }
25391 +       if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
25392 +               if (!silent)
25393 +                       AuErr("xino doesn't support %s(%s)\n",
25394 +                             fname, au_sbtype(d->d_sb));
25395 +               goto out;
25396 +       }
25397 +       return file; /* success */
25398 +
25399 + out:
25400 +       fput(file);
25401 +       file = ERR_PTR(err);
25402 +       return file;
25403 +}
25404 +
25405 +/*
25406 + * find another branch who is on the same filesystem of the specified
25407 + * branch{@btgt}. search until @bend.
25408 + */
25409 +static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
25410 +                       aufs_bindex_t bend)
25411 +{
25412 +       aufs_bindex_t bindex;
25413 +       struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
25414 +
25415 +       for (bindex = 0; bindex < btgt; bindex++)
25416 +               if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
25417 +                       return bindex;
25418 +       for (bindex++; bindex <= bend; bindex++)
25419 +               if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
25420 +                       return bindex;
25421 +       return -1;
25422 +}
25423 +
25424 +/* ---------------------------------------------------------------------- */
25425 +
25426 +/*
25427 + * initialize the xinofile for the specified branch @br
25428 + * at the place/path where @base_file indicates.
25429 + * test whether another branch is on the same filesystem or not,
25430 + * if @do_test is true.
25431 + */
25432 +int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
25433 +              struct file *base_file, int do_test)
25434 +{
25435 +       int err;
25436 +       ino_t ino;
25437 +       aufs_bindex_t bend, bindex;
25438 +       struct au_branch *shared_br, *b;
25439 +       struct file *file;
25440 +       struct super_block *tgt_sb;
25441 +
25442 +       shared_br = NULL;
25443 +       bend = au_sbend(sb);
25444 +       if (do_test) {
25445 +               tgt_sb = br->br_mnt->mnt_sb;
25446 +               for (bindex = 0; bindex <= bend; bindex++) {
25447 +                       b = au_sbr(sb, bindex);
25448 +                       if (tgt_sb == b->br_mnt->mnt_sb) {
25449 +                               shared_br = b;
25450 +                               break;
25451 +                       }
25452 +               }
25453 +       }
25454 +
25455 +       if (!shared_br || !shared_br->br_xino.xi_file) {
25456 +               struct au_xino_lock_dir ldir;
25457 +
25458 +               au_xino_lock_dir(sb, base_file, &ldir);
25459 +               /* mnt_want_write() is unnecessary here */
25460 +               file = au_xino_create2(base_file, NULL);
25461 +               au_xino_unlock_dir(&ldir);
25462 +               err = PTR_ERR(file);
25463 +               if (IS_ERR(file))
25464 +                       goto out;
25465 +               br->br_xino.xi_file = file;
25466 +       } else {
25467 +               br->br_xino.xi_file = shared_br->br_xino.xi_file;
25468 +               get_file(br->br_xino.xi_file);
25469 +       }
25470 +
25471 +       ino = AUFS_ROOT_INO;
25472 +       err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
25473 +                              h_ino, ino);
25474 +       if (!err)
25475 +               return 0; /* success */
25476 +
25477 +
25478 + out:
25479 +       return err;
25480 +}
25481 +
25482 +/* ---------------------------------------------------------------------- */
25483 +
25484 +/* trucate a xino bitmap file */
25485 +
25486 +/* todo: slow */
25487 +static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
25488 +{
25489 +       int err, bit;
25490 +       ssize_t sz;
25491 +       unsigned long pindex;
25492 +       loff_t pos, pend;
25493 +       struct au_sbinfo *sbinfo;
25494 +       au_readf_t func;
25495 +       ino_t *ino;
25496 +       unsigned long *p;
25497 +
25498 +       err = 0;
25499 +       sbinfo = au_sbi(sb);
25500 +       MtxMustLock(&sbinfo->si_xib_mtx);
25501 +       p = sbinfo->si_xib_buf;
25502 +       func = sbinfo->si_xread;
25503 +       pend = i_size_read(file->f_dentry->d_inode);
25504 +       pos = 0;
25505 +       while (pos < pend) {
25506 +               sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
25507 +               err = sz;
25508 +               if (unlikely(sz <= 0))
25509 +                       goto out;
25510 +
25511 +               err = 0;
25512 +               for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
25513 +                       if (unlikely(*ino < AUFS_FIRST_INO))
25514 +                               continue;
25515 +
25516 +                       xib_calc_bit(*ino, &pindex, &bit);
25517 +                       AuDebugOn(page_bits <= bit);
25518 +                       err = xib_pindex(sb, pindex);
25519 +                       if (!err)
25520 +                               set_bit(bit, p);
25521 +                       else
25522 +                               goto out;
25523 +               }
25524 +       }
25525 +
25526 + out:
25527 +       return err;
25528 +}
25529 +
25530 +static int xib_restore(struct super_block *sb)
25531 +{
25532 +       int err;
25533 +       aufs_bindex_t bindex, bend;
25534 +       void *page;
25535 +
25536 +       err = -ENOMEM;
25537 +       page = (void *)__get_free_page(GFP_NOFS);
25538 +       if (unlikely(!page))
25539 +               goto out;
25540 +
25541 +       err = 0;
25542 +       bend = au_sbend(sb);
25543 +       for (bindex = 0; !err && bindex <= bend; bindex++)
25544 +               if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
25545 +                       err = do_xib_restore
25546 +                               (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
25547 +               else
25548 +                       AuDbg("b%d\n", bindex);
25549 +       free_page((unsigned long)page);
25550 +
25551 + out:
25552 +       return err;
25553 +}
25554 +
25555 +int au_xib_trunc(struct super_block *sb)
25556 +{
25557 +       int err;
25558 +       ssize_t sz;
25559 +       loff_t pos;
25560 +       struct au_xino_lock_dir ldir;
25561 +       struct au_sbinfo *sbinfo;
25562 +       unsigned long *p;
25563 +       struct file *file;
25564 +
25565 +       SiMustWriteLock(sb);
25566 +
25567 +       err = 0;
25568 +       sbinfo = au_sbi(sb);
25569 +       if (!au_opt_test(sbinfo->si_mntflags, XINO))
25570 +               goto out;
25571 +
25572 +       file = sbinfo->si_xib;
25573 +       if (i_size_read(file->f_dentry->d_inode) <= PAGE_SIZE)
25574 +               goto out;
25575 +
25576 +       au_xino_lock_dir(sb, file, &ldir);
25577 +       /* mnt_want_write() is unnecessary here */
25578 +       file = au_xino_create2(sbinfo->si_xib, NULL);
25579 +       au_xino_unlock_dir(&ldir);
25580 +       err = PTR_ERR(file);
25581 +       if (IS_ERR(file))
25582 +               goto out;
25583 +       fput(sbinfo->si_xib);
25584 +       sbinfo->si_xib = file;
25585 +
25586 +       p = sbinfo->si_xib_buf;
25587 +       memset(p, 0, PAGE_SIZE);
25588 +       pos = 0;
25589 +       sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
25590 +       if (unlikely(sz != PAGE_SIZE)) {
25591 +               err = sz;
25592 +               AuIOErr("err %d\n", err);
25593 +               if (sz >= 0)
25594 +                       err = -EIO;
25595 +               goto out;
25596 +       }
25597 +
25598 +       mutex_lock(&sbinfo->si_xib_mtx);
25599 +       /* mnt_want_write() is unnecessary here */
25600 +       err = xib_restore(sb);
25601 +       mutex_unlock(&sbinfo->si_xib_mtx);
25602 +
25603 +out:
25604 +       return err;
25605 +}
25606 +
25607 +/* ---------------------------------------------------------------------- */
25608 +
25609 +/*
25610 + * xino mount option handlers
25611 + */
25612 +static au_readf_t find_readf(struct file *h_file)
25613 +{
25614 +       const struct file_operations *fop = h_file->f_op;
25615 +
25616 +       if (fop) {
25617 +               if (fop->read)
25618 +                       return fop->read;
25619 +               if (fop->aio_read)
25620 +                       return do_sync_read;
25621 +       }
25622 +       return ERR_PTR(-ENOSYS);
25623 +}
25624 +
25625 +static au_writef_t find_writef(struct file *h_file)
25626 +{
25627 +       const struct file_operations *fop = h_file->f_op;
25628 +
25629 +       if (fop) {
25630 +               if (fop->write)
25631 +                       return fop->write;
25632 +               if (fop->aio_write)
25633 +                       return do_sync_write;
25634 +       }
25635 +       return ERR_PTR(-ENOSYS);
25636 +}
25637 +
25638 +/* xino bitmap */
25639 +static void xino_clear_xib(struct super_block *sb)
25640 +{
25641 +       struct au_sbinfo *sbinfo;
25642 +
25643 +       SiMustWriteLock(sb);
25644 +
25645 +       sbinfo = au_sbi(sb);
25646 +       sbinfo->si_xread = NULL;
25647 +       sbinfo->si_xwrite = NULL;
25648 +       if (sbinfo->si_xib)
25649 +               fput(sbinfo->si_xib);
25650 +       sbinfo->si_xib = NULL;
25651 +       free_page((unsigned long)sbinfo->si_xib_buf);
25652 +       sbinfo->si_xib_buf = NULL;
25653 +}
25654 +
25655 +static int au_xino_set_xib(struct super_block *sb, struct file *base)
25656 +{
25657 +       int err;
25658 +       loff_t pos;
25659 +       struct au_sbinfo *sbinfo;
25660 +       struct file *file;
25661 +
25662 +       SiMustWriteLock(sb);
25663 +
25664 +       sbinfo = au_sbi(sb);
25665 +       file = au_xino_create2(base, sbinfo->si_xib);
25666 +       err = PTR_ERR(file);
25667 +       if (IS_ERR(file))
25668 +               goto out;
25669 +       if (sbinfo->si_xib)
25670 +               fput(sbinfo->si_xib);
25671 +       sbinfo->si_xib = file;
25672 +       sbinfo->si_xread = find_readf(file);
25673 +       sbinfo->si_xwrite = find_writef(file);
25674 +
25675 +       err = -ENOMEM;
25676 +       if (!sbinfo->si_xib_buf)
25677 +               sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
25678 +       if (unlikely(!sbinfo->si_xib_buf))
25679 +               goto out_unset;
25680 +
25681 +       sbinfo->si_xib_last_pindex = 0;
25682 +       sbinfo->si_xib_next_bit = 0;
25683 +       if (i_size_read(file->f_dentry->d_inode) < PAGE_SIZE) {
25684 +               pos = 0;
25685 +               err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
25686 +                                 PAGE_SIZE, &pos);
25687 +               if (unlikely(err != PAGE_SIZE))
25688 +                       goto out_free;
25689 +       }
25690 +       err = 0;
25691 +       goto out; /* success */
25692 +
25693 + out_free:
25694 +       free_page((unsigned long)sbinfo->si_xib_buf);
25695 +       sbinfo->si_xib_buf = NULL;
25696 +       if (err >= 0)
25697 +               err = -EIO;
25698 + out_unset:
25699 +       fput(sbinfo->si_xib);
25700 +       sbinfo->si_xib = NULL;
25701 +       sbinfo->si_xread = NULL;
25702 +       sbinfo->si_xwrite = NULL;
25703 + out:
25704 +       return err;
25705 +}
25706 +
25707 +/* xino for each branch */
25708 +static void xino_clear_br(struct super_block *sb)
25709 +{
25710 +       aufs_bindex_t bindex, bend;
25711 +       struct au_branch *br;
25712 +
25713 +       bend = au_sbend(sb);
25714 +       for (bindex = 0; bindex <= bend; bindex++) {
25715 +               br = au_sbr(sb, bindex);
25716 +               if (!br || !br->br_xino.xi_file)
25717 +                       continue;
25718 +
25719 +               fput(br->br_xino.xi_file);
25720 +               br->br_xino.xi_file = NULL;
25721 +       }
25722 +}
25723 +
25724 +static int au_xino_set_br(struct super_block *sb, struct file *base)
25725 +{
25726 +       int err;
25727 +       ino_t ino;
25728 +       aufs_bindex_t bindex, bend, bshared;
25729 +       struct {
25730 +               struct file *old, *new;
25731 +       } *fpair, *p;
25732 +       struct au_branch *br;
25733 +       struct inode *inode;
25734 +       au_writef_t writef;
25735 +
25736 +       SiMustWriteLock(sb);
25737 +
25738 +       err = -ENOMEM;
25739 +       bend = au_sbend(sb);
25740 +       fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
25741 +       if (unlikely(!fpair))
25742 +               goto out;
25743 +
25744 +       inode = sb->s_root->d_inode;
25745 +       ino = AUFS_ROOT_INO;
25746 +       writef = au_sbi(sb)->si_xwrite;
25747 +       for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
25748 +               br = au_sbr(sb, bindex);
25749 +               bshared = is_sb_shared(sb, bindex, bindex - 1);
25750 +               if (bshared >= 0) {
25751 +                       /* shared xino */
25752 +                       *p = fpair[bshared];
25753 +                       get_file(p->new);
25754 +               }
25755 +
25756 +               if (!p->new) {
25757 +                       /* new xino */
25758 +                       p->old = br->br_xino.xi_file;
25759 +                       p->new = au_xino_create2(base, br->br_xino.xi_file);
25760 +                       err = PTR_ERR(p->new);
25761 +                       if (IS_ERR(p->new)) {
25762 +                               p->new = NULL;
25763 +                               goto out_pair;
25764 +                       }
25765 +               }
25766 +
25767 +               err = au_xino_do_write(writef, p->new,
25768 +                                      au_h_iptr(inode, bindex)->i_ino, ino);
25769 +               if (unlikely(err))
25770 +                       goto out_pair;
25771 +       }
25772 +
25773 +       for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
25774 +               br = au_sbr(sb, bindex);
25775 +               if (br->br_xino.xi_file)
25776 +                       fput(br->br_xino.xi_file);
25777 +               get_file(p->new);
25778 +               br->br_xino.xi_file = p->new;
25779 +       }
25780 +
25781 + out_pair:
25782 +       for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
25783 +               if (p->new)
25784 +                       fput(p->new);
25785 +               else
25786 +                       break;
25787 +       kfree(fpair);
25788 + out:
25789 +       return err;
25790 +}
25791 +
25792 +void au_xino_clr(struct super_block *sb)
25793 +{
25794 +       struct au_sbinfo *sbinfo;
25795 +
25796 +       au_xigen_clr(sb);
25797 +       xino_clear_xib(sb);
25798 +       xino_clear_br(sb);
25799 +       sbinfo = au_sbi(sb);
25800 +       /* lvalue, do not call au_mntflags() */
25801 +       au_opt_clr(sbinfo->si_mntflags, XINO);
25802 +}
25803 +
25804 +int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
25805 +{
25806 +       int err, skip;
25807 +       struct dentry *parent, *cur_parent;
25808 +       struct qstr *dname, *cur_name;
25809 +       struct file *cur_xino;
25810 +       struct inode *dir;
25811 +       struct au_sbinfo *sbinfo;
25812 +
25813 +       SiMustWriteLock(sb);
25814 +
25815 +       err = 0;
25816 +       sbinfo = au_sbi(sb);
25817 +       parent = dget_parent(xino->file->f_dentry);
25818 +       if (remount) {
25819 +               skip = 0;
25820 +               dname = &xino->file->f_dentry->d_name;
25821 +               cur_xino = sbinfo->si_xib;
25822 +               if (cur_xino) {
25823 +                       cur_parent = dget_parent(cur_xino->f_dentry);
25824 +                       cur_name = &cur_xino->f_dentry->d_name;
25825 +                       skip = (cur_parent == parent
25826 +                               && dname->len == cur_name->len
25827 +                               && !memcmp(dname->name, cur_name->name,
25828 +                                          dname->len));
25829 +                       dput(cur_parent);
25830 +               }
25831 +               if (skip)
25832 +                       goto out;
25833 +       }
25834 +
25835 +       au_opt_set(sbinfo->si_mntflags, XINO);
25836 +       dir = parent->d_inode;
25837 +       mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
25838 +       /* mnt_want_write() is unnecessary here */
25839 +       err = au_xino_set_xib(sb, xino->file);
25840 +       if (!err)
25841 +               err = au_xigen_set(sb, xino->file);
25842 +       if (!err)
25843 +               err = au_xino_set_br(sb, xino->file);
25844 +       mutex_unlock(&dir->i_mutex);
25845 +       if (!err)
25846 +               goto out; /* success */
25847 +
25848 +       /* reset all */
25849 +       AuIOErr("failed creating xino(%d).\n", err);
25850 +
25851 + out:
25852 +       dput(parent);
25853 +       return err;
25854 +}
25855 +
25856 +/* ---------------------------------------------------------------------- */
25857 +
25858 +/*
25859 + * create a xinofile at the default place/path.
25860 + */
25861 +struct file *au_xino_def(struct super_block *sb)
25862 +{
25863 +       struct file *file;
25864 +       char *page, *p;
25865 +       struct au_branch *br;
25866 +       struct super_block *h_sb;
25867 +       struct path path;
25868 +       aufs_bindex_t bend, bindex, bwr;
25869 +
25870 +       br = NULL;
25871 +       bend = au_sbend(sb);
25872 +       bwr = -1;
25873 +       for (bindex = 0; bindex <= bend; bindex++) {
25874 +               br = au_sbr(sb, bindex);
25875 +               if (au_br_writable(br->br_perm)
25876 +                   && !au_test_fs_bad_xino(br->br_mnt->mnt_sb)) {
25877 +                       bwr = bindex;
25878 +                       break;
25879 +               }
25880 +       }
25881 +
25882 +       if (bwr >= 0) {
25883 +               file = ERR_PTR(-ENOMEM);
25884 +               page = __getname();
25885 +               if (unlikely(!page))
25886 +                       goto out;
25887 +               path.mnt = br->br_mnt;
25888 +               path.dentry = au_h_dptr(sb->s_root, bwr);
25889 +               p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
25890 +               file = (void *)p;
25891 +               if (!IS_ERR(p)) {
25892 +                       strcat(p, "/" AUFS_XINO_FNAME);
25893 +                       AuDbg("%s\n", p);
25894 +                       file = au_xino_create(sb, p, /*silent*/0);
25895 +                       if (!IS_ERR(file))
25896 +                               au_xino_brid_set(sb, br->br_id);
25897 +               }
25898 +               __putname(page);
25899 +       } else {
25900 +               file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
25901 +               if (IS_ERR(file))
25902 +                       goto out;
25903 +               h_sb = file->f_dentry->d_sb;
25904 +               if (unlikely(au_test_fs_bad_xino(h_sb))) {
25905 +                       AuErr("xino doesn't support %s(%s)\n",
25906 +                             AUFS_XINO_DEFPATH, au_sbtype(h_sb));
25907 +                       fput(file);
25908 +                       file = ERR_PTR(-EINVAL);
25909 +               }
25910 +               if (!IS_ERR(file))
25911 +                       au_xino_brid_set(sb, -1);
25912 +       }
25913 +
25914 + out:
25915 +       return file;
25916 +}
25917 +
25918 +/* ---------------------------------------------------------------------- */
25919 +
25920 +int au_xino_path(struct seq_file *seq, struct file *file)
25921 +{
25922 +       int err;
25923 +
25924 +       err = au_seq_path(seq, &file->f_path);
25925 +       if (unlikely(err < 0))
25926 +               goto out;
25927 +
25928 +       err = 0;
25929 +#define Deleted "\\040(deleted)"
25930 +       seq->count -= sizeof(Deleted) - 1;
25931 +       AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
25932 +                        sizeof(Deleted) - 1));
25933 +#undef Deleted
25934 +
25935 + out:
25936 +       return err;
25937 +}
25938 diff -uprN -x .git linux-2.6.31/fs/namei.c aufs2-2.6.git/fs/namei.c
25939 --- linux-2.6.31/fs/namei.c     2009-09-09 22:13:59.000000000 +0000
25940 +++ aufs2-2.6.git/fs/namei.c    2009-09-21 21:49:25.001190884 +0000
25941 @@ -1219,7 +1219,7 @@ out:
25942   * needs parent already locked. Doesn't follow mounts.
25943   * SMP-safe.
25944   */
25945 -static struct dentry *lookup_hash(struct nameidata *nd)
25946 +struct dentry *lookup_hash(struct nameidata *nd)
25947  {
25948         int err;
25949  
25950 @@ -1229,7 +1229,7 @@ static struct dentry *lookup_hash(struct
25951         return __lookup_hash(&nd->last, nd->path.dentry, nd);
25952  }
25953  
25954 -static int __lookup_one_len(const char *name, struct qstr *this,
25955 +int __lookup_one_len(const char *name, struct qstr *this,
25956                 struct dentry *base, int len)
25957  {
25958         unsigned long hash;
25959 diff -uprN -x .git linux-2.6.31/fs/splice.c aufs2-2.6.git/fs/splice.c
25960 --- linux-2.6.31/fs/splice.c    2009-09-09 22:13:59.000000000 +0000
25961 +++ aufs2-2.6.git/fs/splice.c   2009-09-21 21:49:25.471607719 +0000
25962 @@ -1057,8 +1057,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
25963  /*
25964   * Attempt to initiate a splice from pipe to file.
25965   */
25966 -static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
25967 -                          loff_t *ppos, size_t len, unsigned int flags)
25968 +long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
25969 +                   loff_t *ppos, size_t len, unsigned int flags)
25970  {
25971         ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
25972                                 loff_t *, size_t, unsigned int);
25973 @@ -1084,9 +1084,9 @@ static long do_splice_from(struct pipe_i
25974  /*
25975   * Attempt to initiate a splice from a file to a pipe.
25976   */
25977 -static long do_splice_to(struct file *in, loff_t *ppos,
25978 -                        struct pipe_inode_info *pipe, size_t len,
25979 -                        unsigned int flags)
25980 +long do_splice_to(struct file *in, loff_t *ppos,
25981 +                 struct pipe_inode_info *pipe, size_t len,
25982 +                 unsigned int flags)
25983  {
25984         ssize_t (*splice_read)(struct file *, loff_t *,
25985                                struct pipe_inode_info *, size_t, unsigned int);
25986 diff -uprN -x .git linux-2.6.31/include/linux/Kbuild aufs2-2.6.git/include/linux/Kbuild
25987 --- linux-2.6.31/include/linux/Kbuild   2009-09-09 22:13:59.000000000 +0000
25988 +++ aufs2-2.6.git/include/linux/Kbuild  2009-09-21 21:49:26.084940677 +0000
25989 @@ -34,6 +34,7 @@ header-y += atmppp.h
25990  header-y += atmsap.h
25991  header-y += atmsvc.h
25992  header-y += atm_zatm.h
25993 +header-y += aufs_type.h
25994  header-y += auto_fs4.h
25995  header-y += ax25.h
25996  header-y += b1lli.h
25997 diff -uprN -x .git linux-2.6.31/include/linux/aufs_type.h aufs2-2.6.git/include/linux/aufs_type.h
25998 --- linux-2.6.31/include/linux/aufs_type.h      1970-01-01 00:00:00.000000000 +0000
25999 +++ aufs2-2.6.git/include/linux/aufs_type.h     2009-09-21 21:49:26.101190816 +0000
26000 @@ -0,0 +1,184 @@
26001 +/*
26002 + * Copyright (C) 2005-2009 Junjiro R. Okajima
26003 + *
26004 + * This program, aufs is free software; you can redistribute it and/or modify
26005 + * it under the terms of the GNU General Public License as published by
26006 + * the Free Software Foundation; either version 2 of the License, or
26007 + * (at your option) any later version.
26008 + *
26009 + * This program is distributed in the hope that it will be useful,
26010 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
26011 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26012 + * GNU General Public License for more details.
26013 + *
26014 + * You should have received a copy of the GNU General Public License
26015 + * along with this program; if not, write to the Free Software
26016 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
26017 + */
26018 +
26019 +#ifndef __AUFS_TYPE_H__
26020 +#define __AUFS_TYPE_H__
26021 +
26022 +#include <linux/ioctl.h>
26023 +#include <linux/types.h>
26024 +
26025 +#define AUFS_VERSION   "2-31"
26026 +
26027 +/* todo? move this to linux-2.6.19/include/magic.h */
26028 +#define AUFS_SUPER_MAGIC       ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
26029 +
26030 +/* ---------------------------------------------------------------------- */
26031 +
26032 +#ifdef CONFIG_AUFS_BRANCH_MAX_127
26033 +typedef __s8 aufs_bindex_t;
26034 +#define AUFS_BRANCH_MAX 127
26035 +#else
26036 +typedef __s16 aufs_bindex_t;
26037 +#ifdef CONFIG_AUFS_BRANCH_MAX_511
26038 +#define AUFS_BRANCH_MAX 511
26039 +#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
26040 +#define AUFS_BRANCH_MAX 1023
26041 +#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
26042 +#define AUFS_BRANCH_MAX 32767
26043 +#endif
26044 +#endif
26045 +
26046 +#ifdef __KERNEL__
26047 +#ifndef AUFS_BRANCH_MAX
26048 +#error unknown CONFIG_AUFS_BRANCH_MAX value
26049 +#endif
26050 +#endif /* __KERNEL__ */
26051 +
26052 +/* ---------------------------------------------------------------------- */
26053 +
26054 +#define AUFS_NAME              "aufs"
26055 +#define AUFS_FSTYPE            AUFS_NAME
26056 +
26057 +#define AUFS_ROOT_INO          2
26058 +#define AUFS_FIRST_INO         11
26059 +
26060 +#define AUFS_WH_PFX            ".wh."
26061 +#define AUFS_WH_PFX_LEN                ((int)sizeof(AUFS_WH_PFX) - 1)
26062 +#define AUFS_XINO_FNAME                "." AUFS_NAME ".xino"
26063 +#define AUFS_XINO_DEFPATH      "/tmp/" AUFS_XINO_FNAME
26064 +#define AUFS_XINO_TRUNC_INIT   64 /* blocks */
26065 +#define AUFS_XINO_TRUNC_STEP   4  /* blocks */
26066 +#define AUFS_DIRWH_DEF         3
26067 +#define AUFS_RDCACHE_DEF       10 /* seconds */
26068 +#define AUFS_RDBLK_DEF         512 /* bytes */
26069 +#define AUFS_RDHASH_DEF                32
26070 +#define AUFS_WKQ_NAME          AUFS_NAME "d"
26071 +#define AUFS_NWKQ_DEF          4
26072 +#define AUFS_MFS_SECOND_DEF    30 /* seconds */
26073 +#define AUFS_PLINK_WARN                100 /* number of plinks */
26074 +
26075 +#define AUFS_DIROPQ_NAME       AUFS_WH_PFX ".opq" /* whiteouted doubly */
26076 +#define AUFS_WH_DIROPQ         AUFS_WH_PFX AUFS_DIROPQ_NAME
26077 +
26078 +#define AUFS_BASE_NAME         AUFS_WH_PFX AUFS_NAME
26079 +#define AUFS_PLINKDIR_NAME     AUFS_WH_PFX "plnk"
26080 +#define AUFS_ORPHDIR_NAME      AUFS_WH_PFX "orph"
26081 +
26082 +/* doubly whiteouted */
26083 +#define AUFS_WH_BASE           AUFS_WH_PFX AUFS_BASE_NAME
26084 +#define AUFS_WH_PLINKDIR       AUFS_WH_PFX AUFS_PLINKDIR_NAME
26085 +#define AUFS_WH_ORPHDIR                AUFS_WH_PFX AUFS_ORPHDIR_NAME
26086 +
26087 +/* branch permission */
26088 +#define AUFS_BRPERM_RW         "rw"
26089 +#define AUFS_BRPERM_RO         "ro"
26090 +#define AUFS_BRPERM_RR         "rr"
26091 +#define AUFS_BRPERM_WH         "wh"
26092 +#define AUFS_BRPERM_NLWH       "nolwh"
26093 +#define AUFS_BRPERM_ROWH       AUFS_BRPERM_RO "+" AUFS_BRPERM_WH
26094 +#define AUFS_BRPERM_RRWH       AUFS_BRPERM_RR "+" AUFS_BRPERM_WH
26095 +#define AUFS_BRPERM_RWNLWH     AUFS_BRPERM_RW "+" AUFS_BRPERM_NLWH
26096 +
26097 +/* ---------------------------------------------------------------------- */
26098 +
26099 +/* ioctl */
26100 +enum {
26101 +       AuCtl_PLINK_MAINT,
26102 +       AuCtl_PLINK_CLEAN,
26103 +
26104 +       /* readdir in userspace */
26105 +       AuCtl_RDU,
26106 +       AuCtl_RDU_INO
26107 +};
26108 +
26109 +/* borrowed from linux/include/linux/kernel.h */
26110 +#ifndef ALIGN
26111 +#define ALIGN(x, a)            __ALIGN_MASK(x, (typeof(x))(a)-1)
26112 +#define __ALIGN_MASK(x, mask)  (((x)+(mask))&~(mask))
26113 +#endif
26114 +
26115 +/* borrowed from linux/include/linux/compiler-gcc3.h */
26116 +#ifndef __aligned
26117 +#define __aligned(x)                   __attribute__((aligned(x)))
26118 +#define __packed                       __attribute__((packed))
26119 +#endif
26120 +
26121 +struct au_rdu_cookie {
26122 +       __u64           h_pos;
26123 +       __s16           bindex;
26124 +       __u8            flags;
26125 +       __u8            pad;
26126 +       __u32           generation;
26127 +} __aligned(8);
26128 +
26129 +struct au_rdu_ent {
26130 +       __u64           ino;
26131 +       __s16           bindex;
26132 +       __u8            type;
26133 +       __u8            nlen;
26134 +       __u8            wh;
26135 +       char            name[0];
26136 +} __aligned(8);
26137 +
26138 +static inline int au_rdu_len(int nlen)
26139 +{
26140 +       /* include the terminating NULL */
26141 +       return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
26142 +                    sizeof(__u64));
26143 +}
26144 +
26145 +union au_rdu_ent_ul {
26146 +       struct au_rdu_ent __user        *e;
26147 +       unsigned long                   ul;
26148 +};
26149 +
26150 +enum {
26151 +       AufsCtlRduV_SZ,
26152 +       AufsCtlRduV_SZ_PTR,
26153 +       AufsCtlRduV_End
26154 +};
26155 +
26156 +struct aufs_rdu {
26157 +       /* input */
26158 +       union {
26159 +               __u64           sz;     /* AuCtl_RDU */
26160 +               __u64           nent;   /* AuCtl_RDU_INO */
26161 +       };
26162 +       union au_rdu_ent_ul     ent;
26163 +       __u16                   verify[AufsCtlRduV_End];
26164 +
26165 +       /* input/output */
26166 +       __u32                   blk;
26167 +
26168 +       /* output */
26169 +       union au_rdu_ent_ul     tail;
26170 +       /* number of entries which were added in a single call */
26171 +       __u64                   rent;
26172 +       __u8                    full;
26173 +       __u8                    shwh;
26174 +
26175 +       struct au_rdu_cookie    cookie;
26176 +} __aligned(8);
26177 +
26178 +#define AuCtlType              'A'
26179 +#define AUFS_CTL_PLINK_MAINT   _IO(AuCtlType, AuCtl_PLINK_MAINT)
26180 +#define AUFS_CTL_PLINK_CLEAN   _IO(AuCtlType, AuCtl_PLINK_CLEAN)
26181 +#define AUFS_CTL_RDU           _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
26182 +#define AUFS_CTL_RDU_INO       _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
26183 +
26184 +#endif /* __AUFS_TYPE_H__ */
26185 diff -uprN -x .git linux-2.6.31/include/linux/namei.h aufs2-2.6.git/include/linux/namei.h
26186 --- linux-2.6.31/include/linux/namei.h  2009-09-09 22:13:59.000000000 +0000
26187 +++ aufs2-2.6.git/include/linux/namei.h 2009-09-21 21:49:26.484529184 +0000
26188 @@ -75,6 +75,9 @@ extern struct file *lookup_instantiate_f
26189  extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
26190  extern void release_open_intent(struct nameidata *);
26191  
26192 +extern struct dentry *lookup_hash(struct nameidata *nd);
26193 +extern int __lookup_one_len(const char *name, struct qstr *this,
26194 +                           struct dentry *base, int len);
26195  extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
26196  extern struct dentry *lookup_one_noperm(const char *, struct dentry *);
26197  
26198 diff -uprN -x .git linux-2.6.31/include/linux/splice.h aufs2-2.6.git/include/linux/splice.h
26199 --- linux-2.6.31/include/linux/splice.h 2009-09-09 22:13:59.000000000 +0000
26200 +++ aufs2-2.6.git/include/linux/splice.h        2009-09-21 21:49:26.544523817 +0000
26201 @@ -82,4 +82,10 @@ extern ssize_t splice_to_pipe(struct pip
26202  extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
26203                                       splice_direct_actor *);
26204  
26205 +extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
26206 +                          loff_t *ppos, size_t len, unsigned int flags);
26207 +extern long do_splice_to(struct file *in, loff_t *ppos,
26208 +                        struct pipe_inode_info *pipe, size_t len,
26209 +                        unsigned int flags);
26210 +
26211  #endif
This page took 2.688851 seconds and 3 git commands to generate.