From dece63587f387ad36d1e32eb1c34d4e194fa6852 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Arkadiusz=20Mi=C5=9Bkiewicz?= Date: Tue, 21 Jul 2009 07:46:58 +0000 Subject: [PATCH] - aufs2 updated Changed files: kernel-aufs2.patch -> 1.5 kernel-inittmpfs.patch -> 1.3 kernel-patches.config -> 1.4 --- kernel-aufs2.patch | 15042 ++++++++++++++++++--------------------- kernel-inittmpfs.patch | 16 +- kernel-patches.config | 1 + 3 files changed, 6786 insertions(+), 8273 deletions(-) diff --git a/kernel-aufs2.patch b/kernel-aufs2.patch index f1f71fe1..b78e6fb4 100644 --- a/kernel-aufs2.patch +++ b/kernel-aufs2.patch @@ -1,8 +1,6 @@ -diff --git a/Documentation/ABI/testing/debugfs-aufs b/Documentation/ABI/testing/debugfs-aufs -new file mode 100644 -index 0000000..4110b94 ---- /dev/null -+++ b/Documentation/ABI/testing/debugfs-aufs +diff -urN linux-2.6.30.org/Documentation/ABI/testing/debugfs-aufs linux-2.6.30/Documentation/ABI/testing/debugfs-aufs +--- linux-2.6.30.org/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/Documentation/ABI/testing/debugfs-aufs 2009-07-21 08:54:18.000000000 +0200 @@ -0,0 +1,40 @@ +What: /debug/aufs/si_/ +Date: March 2009 @@ -44,11 +42,9 @@ index 0000000..4110b94 + When the aufs mount option 'noxino' is specified, it + will be empty. About XINO files, see + Documentation/filesystems/aufs/aufs.5 in detail. -diff --git a/Documentation/ABI/testing/sysfs-aufs b/Documentation/ABI/testing/sysfs-aufs -new file mode 100644 -index 0000000..ca49330 ---- /dev/null -+++ b/Documentation/ABI/testing/sysfs-aufs +diff -urN linux-2.6.30.org/Documentation/ABI/testing/sysfs-aufs linux-2.6.30/Documentation/ABI/testing/sysfs-aufs +--- linux-2.6.30.org/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/Documentation/ABI/testing/sysfs-aufs 2009-07-21 08:54:18.000000000 +0200 @@ -0,0 +1,25 @@ +What: /sys/fs/aufs/si_/ +Date: March 2009 @@ -75,2974 +71,10 @@ index 0000000..ca49330 + When the aufs mount option 'noxino' is specified, it + will be empty. About XINO files, see + Documentation/filesystems/aufs/aufs.5 in detail. -diff --git a/Documentation/filesystems/aufs/README b/Documentation/filesystems/aufs/README -new file mode 100644 -index 0000000..ed68e31 ---- /dev/null -+++ b/Documentation/filesystems/aufs/README -@@ -0,0 +1,256 @@ -+ -+Aufs2 -- advanced multi layered unification filesystem version 2 -+http://aufs.sf.net -+Junjiro R. Okajima -+ -+ -+0. Introduction -+---------------------------------------- -+In the early days, aufs was entirely re-designed and re-implemented -+Unionfs Version 1.x series. After many original ideas, approaches, -+improvements and implementations, it becomes totally different from -+Unionfs while keeping the basic features. -+Recently, Unionfs Version 2.x series begin taking some of the same -+approaches to aufs1's. -+Unionfs is being developed by Professor Erez Zadok at Stony Brook -+University and his team. -+ -+This version of AUFS, aufs2 has several purposes. -+- to be reviewed easily and widely. -+- to make the source files simpler and smaller by dropping several -+ original features. -+ -+Note: it becomes clear that "Aufs was rejected. Let's give it up." -+According to Christoph Hellwig, linux rejects all union-type filesystems -+but UnionMount. -+ -+ -+Through this work, I found some bad things in aufs1 source code and -+fixed them. Some of the dropped features will be reverted in the future, -+but not all I'm afraid. -+Aufs2 supports linux-2.6.27 and later. If you want older kernel version -+support, try aufs1 from CVS on SourceForge. -+ -+ -+1. Features -+---------------------------------------- -+- unite several directories into a single virtual filesystem. The member -+ directory is called as a branch. -+- you can specify the permission flags to the branch, which are 'readonly', -+ 'readwrite' and 'whiteout-able.' -+- by upper writable branch, internal copyup and whiteout, files/dirs on -+ readonly branch are modifiable logically. -+- dynamic branch manipulation, add, del. -+- etc... -+ -+Also there are many enhancements in aufs1, such as: -+- keep inode number by external inode number table -+- keep the timestamps of file/dir in internal copyup operation -+- seekable directory, supporting NFS readdir. -+- support mmap(2) including /proc/PID/exe symlink, without page-copy -+- whiteout is hardlinked in order to reduce the consumption of inodes -+ on branch -+- do not copyup, nor create a whiteout when it is unnecessary -+- revert a single systemcall when an error occurs in aufs -+- remount interface instead of ioctl -+- maintain /etc/mtab by an external command, /sbin/mount.aufs. -+- loopback mounted filesystem as a branch -+- kernel thread for removing the dir who has a plenty of whiteouts -+- support copyup sparse file (a file which has a 'hole' in it) -+- default permission flags for branches -+- selectable permission flags for ro branch, whether whiteout can -+ exist or not -+- export via NFS. -+- support /fs/aufs and /aufs. -+- support multiple writable branches, some policies to select one -+ among multiple writable branches. -+- a new semantics for link(2) and rename(2) to support multiple -+ writable branches. -+- no glibc changes are required. -+- pseudo hardlink (hardlink over branches) -+- allow a direct access manually to a file on branch, e.g. bypassing aufs. -+ including NFS or remote filesystem branch. -+- and more... -+ -+Currently these features are dropped temporary from this version, aufs2. -+See design/08plan.txt in detail. -+- test only the highest one for the directory permission (dirperm1) -+- show whiteout mode (shwh) -+- copyup on open (coo=) -+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs -+ (robr) -+- statistics of aufs thread (/sys/fs/aufs/stat) -+- delegation mode (dlgt) -+ a delegation of the internal branch access to support task I/O -+ accounting, which also supports Linux Security Modules (LSM) mainly -+ for Suse AppArmor. -+- intent.open/create (file open in a single lookup) -+ -+Features or just an idea in the future (see also design/*.txt), -+- reorder the branch index without del/re-add. -+- permanent xino files for NFSD -+- an option for refreshing the opened files after add/del branches -+- 'move' policy for copy-up between two writable branches, after -+ checking free space. -+- O_DIRECT -+- light version, without branch manipulation. (unnecessary?) -+- copyup in userspace -+- inotify in userspace -+- readv/writev -+- xattr, acl -+ -+ -+2. Download -+---------------------------------------- -+Kindly one of aufs user, the Center for Scientific Computing and Free -+Software (C3SL), Federal University of Parana offered me a public GIT -+tree space. -+ -+There are three GIT trees, aufs2-2.6, aufs2-standalone and aufs2-util. -+While the aufs2-util is always necessary, you need either of aufs2-2.6 -+or aufs2-standalone. -+ -+The aufs2-2.6 tree includes the whole linux-2.6 GIT tree, -+git://git.kernel.org/.../torvalds/linux-2.6.git. -+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot -+build aufs2 as an externel kernel module. -+If you already have linux-2.6 GIT tree, you may want to pull and merge -+the "aufs2" branch from this tree. -+ -+On the other hand, the aufs2-standalone tree has only aufs2 source files -+and a necessary patch, and you can select CONFIG_AUFS_FS=m. In other -+words, the aufs2-standalone tree is generated from aufs2-2.6 tree by, -+- extract new files and modifications. -+- generate a single patch file from modifications. -+- generate a ChangeLog file from git-log. -+- commit the files newly and no log messages. this is not git-pull. -+ -+Both of aufs2-2.6 and aufs2-standalone trees have a branch whose name is -+in form of "aufs2-xx" where "xx" represents the linux kernel version, -+"linux-2.6.xx". -+ -+o aufs2-2.6 tree -+$ git clone --reference /your/linux-2.6/git/tree \ -+ http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-2.6.git \ -+ aufs2-2.6.git -+- if you don't have linux-2.6 GIT tree, then remove "--reference ..." -+$ cd aufs2-2.6.git -+$ git checkout origin/aufs2-xx # for instance, aufs2-27 for linux-2.6.27 -+ # aufs2 (no -xx) for the latest -rc version. -+ -+o aufs2-standalone tree -+$ git clone http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-standalone.git \ -+ aufs2-standalone.git -+$ cd aufs2-standalone.git -+$ git checkout origin/aufs2-xx # for instance, aufs2-27 for linux-2.6.27 -+ # aufs2 (no -xx) for the latest -rc version. -+- apply "aufs2-standalone.patch" to your kernel source files. -+ -+o aufs2-util tree -+$ git clone http://git.c3sl.ufpr.br/pub/scm/aufs/aufs2-util.git \ -+ aufs2-util.git -+$ cd aufs2-util.git -+- no particular tag/branch currently. -+ -+ -+3. Configuration and Compilation -+---------------------------------------- -+For aufs2-2.6 tree, -+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS. -+- set other aufs configurations if necessary. -+ -+For aufs2-standalone tree, -+- enable CONFIG_EXPERIMENTAL and CONFIG_AUFS_FS, you can select =m. -+- edit fs/aufs/config.mk and set other aufs configurations if necessary. -+ -+And then, -+- build your kernel (or a module) by "make" -+- install it and reboot your system -+- read README in aufs2-util, build and install it -+ -+ -+4. Usage -+---------------------------------------- -+At first, make sure aufs2-util are installed, and please read the aufs -+manual, ./Documentation/filesystems/aufs/aufs.5. -+$ man -l aufs.5 -+ -+And then, -+$ mkdir /tmp/rw /tmp/aufs -+# mount -t aufs -o br=/tmp/rw:${HOME}=ro none /tmp/aufs -+ -+Here is another example. The result is equivalent. -+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs -+ Or -+# mount -t aufs -o br:/tmp/rw none /tmp/aufs -+# mount -o remount,append:${HOME} /tmp/aufs -+ -+Then, you can see whole tree of your home dir through /tmp/aufs. If -+you modify a file under /tmp/aufs, the one on your home directory is -+not affected, instead the same named file will be newly created under -+/tmp/rw. And all of your modification to a file will be applied to -+the one under /tmp/rw. This is called the file based Copy on Write -+(COW) method. -+Aufs mount options are described in aufs.5. -+ -+Additionally, there are some sample usages of aufs which are a -+diskless system with network booting, and LiveCD over NFS. -+See sample dir in CVS tree on SourceForge. -+ -+ -+5. Contact -+---------------------------------------- -+When you have any problems or strange behaviour in aufs, please let me -+know with: -+- /proc/mounts (instead of the output of mount(8)) -+- /sys/module/aufs/* -+- /sys/fs/aufs/* (if you have them) -+- /debug/aufs/* (if you have them) -+- linux kernel version -+ if your kernel is not plain, for example modified by distributor, -+ the url where i can download its source is necessary too. -+- aufs version which was printed at loading the module or booting the -+ system, instead of the date you downloaded. -+- configuration (define/undefine CONFIG_AUFS_xxx) -+- kernel configuration or /proc/config.gz (if you have it) -+- behaviour which you think to be incorrect -+- actual operation, reproducible one is better -+- mailto: aufs-users at lists.sourceforge.net -+ -+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches, -+and Feature Requests) on SourceForge. Please join and write to -+aufs-users ML. -+ -+ -+6. Acknowledgements -+---------------------------------------- -+Thanks to everyone who have tried and are using aufs, whoever -+have reported a bug or any feedback. -+ -+Especially donors: -+Tomas Matejicek(slax.org) made a donation (much more than once). -+Dai Itasaka made a donation (2007/8). -+Chuck Smith made a donation (2008/4, 10 and 12). -+Henk Schoneveld made a donation (2008/9). -+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10). -+Francois Dupoux made a donation (2008/11). -+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public GIT -+tree (2009/2). -+William Grant made a donation (2009/3). -+ -+Thank you very much. -+Donations are always, including future donations, very important and -+helpful for me to keep on developing aufs. -+ -+ -+7. -+---------------------------------------- -+If you are an experienced user, no explanation is needed. Aufs is -+just a linux filesystem. -+ -+ -+Enjoy! -+ -+# Local variables: ; -+# mode: text; -+# End: ; -diff --git a/Documentation/filesystems/aufs/aufs.5 b/Documentation/filesystems/aufs/aufs.5 -new file mode 100644 -index 0000000..0b9732f ---- /dev/null -+++ b/Documentation/filesystems/aufs/aufs.5 -@@ -0,0 +1,1681 @@ -+.ds AUFS_VERSION aufs2 -+.ds AUFS_XINO_FNAME .aufs.xino -+.ds AUFS_XINO_DEFPATH /tmp/.aufs.xino -+.ds AUFS_DIRWH_DEF 3 -+.ds AUFS_WH_PFX .wh. -+.ds AUFS_WH_PFX_LEN 4 -+.ds AUFS_WKQ_NAME aufsd -+.ds AUFS_NWKQ_DEF 4 -+.ds AUFS_WH_DIROPQ .wh..wh..opq -+.ds AUFS_WH_BASE .wh..wh.aufs -+.ds AUFS_WH_PLINKDIR .wh..wh.plnk -+.ds AUFS_BRANCH_MAX 127 -+.ds AUFS_MFS_SECOND_DEF 30 -+.ds AUFS_RDBLK_DEF 512 -+.ds AUFS_RDHASH_DEF 32 -+.\".so aufs.tmac -+. -+.eo -+.de TQ -+.br -+.ns -+.TP \$1 -+.. -+.de Bu -+.IP \(bu 4 -+.. -+.ec -+.\" end of macro definitions -+. -+.\" ---------------------------------------------------------------------- -+.TH aufs 5 \*[AUFS_VERSION] Linux "Linux Aufs User\[aq]s Manual" -+.SH NAME -+aufs \- advanced multi layered unification filesystem. version \*[AUFS_VERSION] -+ -+.\" ---------------------------------------------------------------------- -+.SH DESCRIPTION -+Aufs is a stackable unification filesystem such as Unionfs, which unifies -+several directories and provides a merged single directory. -+In the early days, aufs was entirely re-designed and re-implemented -+Unionfs Version 1.x series. After -+many original ideas, approaches and improvements, it -+becomes totally different from Unionfs while keeping the basic features. -+See Unionfs Version 1.x series for the basic features. -+Recently, Unionfs Version 2.x series begin taking some of same -+approaches to aufs\[aq]s. -+ -+.\" ---------------------------------------------------------------------- -+.SH MOUNT OPTIONS -+At mount-time, the order of interpreting options is, -+.RS -+.Bu -+simple flags, except xino/noxino and udba=inotify -+.Bu -+branches -+.Bu -+xino/noxino -+.Bu -+udba=inotify -+.RE -+ -+At remount-time, -+the options are interpreted in the given order, -+e.g. left to right. -+.RS -+.Bu -+create or remove -+whiteout-base(\*[AUFS_WH_BASE]) and -+whplink-dir(\*[AUFS_WH_PLINKDIR]) if necessary -+.RE -+. -+.TP -+.B br:BRANCH[:BRANCH ...] (dirs=BRANCH[:BRANCH ...]) -+Adds new branches. -+(cf. Branch Syntax). -+ -+Aufs rejects the branch which is an ancestor or a descendant of another -+branch. It is called overlapped. When the branch is loopback-mounted -+directory, aufs also checks the source fs-image file of loopback -+device. If the source file is a descendant of another branch, it will -+be rejected too. -+ -+After mounting aufs or adding a branch, if you move a branch under -+another branch and make it descendant of another branch, aufs will not -+work correctly. -+. -+.TP -+.B [ add | ins ]:index:BRANCH -+Adds a new branch. -+The index begins with 0. -+Aufs creates -+whiteout-base(\*[AUFS_WH_BASE]) and -+whplink-dir(\*[AUFS_WH_PLINKDIR]) if necessary. -+ -+If there is the same named file on the lower branch (larger index), -+aufs will hide the lower file. -+You can only see the highest file. -+You will be confused if the added branch has whiteouts (including -+diropq), they may or may not hide the lower entries. -+.\" It is recommended to make sure that the added branch has no whiteout. -+ -+Even if a process have once mapped a file by mmap(2) with MAP_SHARED -+and the same named file exists on the lower branch, -+the process still refers the file on the lower(hidden) -+branch after adding the branch. -+If you want to update the contents of a process address space after -+adding, you need to restart your process or open/mmap the file again. -+.\" Usually, such files are executables or shared libraries. -+(cf. Branch Syntax). -+. -+.TP -+.B del:dir -+Removes a branch. -+Aufs does not remove -+whiteout-base(\*[AUFS_WH_BASE]) and -+whplink-dir(\*[AUFS_WH_PLINKDIR]) automatically. -+For example, when you add a RO branch which was unified as RW, you -+will see whiteout-base or whplink-dir on the added RO branch. -+ -+If a process is referencing the file/directory on the deleting branch -+(by open, mmap, current working directory, etc.), aufs will return an -+error EBUSY. -+. -+.TP -+.B mod:BRANCH -+Modifies the permission flags of the branch. -+Aufs creates or removes -+whiteout-base(\*[AUFS_WH_BASE]) and/or -+whplink-dir(\*[AUFS_WH_PLINKDIR]) if necessary. -+ -+If the branch permission is been changing \[oq]rw\[cq] to \[oq]ro\[cq], and a process -+is mapping a file by mmap(2) -+.\" with MAP_SHARED -+on the branch, the process may or may not -+be able to modify its mapped memory region after modifying branch -+permission flags. -+(cf. Branch Syntax). -+. -+.TP -+.B append:BRANCH -+equivalent to \[oq]add:(last index + 1):BRANCH\[cq]. -+(cf. Branch Syntax). -+. -+.TP -+.B prepend:BRANCH -+equivalent to \[oq]add:0:BRANCH.\[cq] -+(cf. Branch Syntax). -+. -+.TP -+.B xino=filename -+Use external inode number bitmap and translation table. -+When CONFIG_AUFS_EXPORT is enabled, external inode generation table too. -+It is set to -+/\*[AUFS_XINO_FNAME] by default, or -+\*[AUFS_XINO_DEFPATH]. -+Comma character in filename is not allowed. -+ -+The files are created per an aufs and per a branch filesystem, and -+unlinked. So you -+cannot find this file, but it exists and is read/written frequently by -+aufs. -+(cf. External Inode Number Bitmap, Translation Table and Generation Table). -+ -+If you enable CONFIG_SYSFS, the path of xino files are not shown in -+/proc/mounts (and /etc/mtab), instead it is shown in -+/fs/aufs/si_/xi_path. -+Otherwise, it is shown in /proc/mounts unless it is not the default -+path. -+. -+.TP -+.B noxino -+Stop using external inode number bitmap and translation table. -+ -+If you use this option, -+Some applications will not work correctly. -+.\" And pseudo link feature will not work after the inode cache is -+.\" shrunk. -+(cf. External Inode Number Bitmap, Translation Table and Generation Table). -+. -+.TP -+.B trunc_xib -+Truncate the external inode number bitmap file. The truncation is done -+automatically when you delete a branch unless you do not specify -+\[oq]notrunc_xib\[cq] option. -+(cf. External Inode Number Bitmap, Translation Table and Generation Table). -+. -+.TP -+.B notrunc_xib -+Stop truncating the external inode number bitmap file when you delete -+a branch. -+(cf. External Inode Number Bitmap, Translation Table and Generation Table). -+. -+.TP -+.B create_policy | create=CREATE_POLICY -+.TQ -+.B copyup_policy | copyup | cpup=COPYUP_POLICY -+Policies to select one among multiple writable branches. The default -+values are \[oq]create=tdp\[cq] and \[oq]cpup=tdp\[cq]. -+link(2) and rename(2) systemcalls have an exception. In aufs, they -+try keeping their operations in the branch where the source exists. -+(cf. Policies to Select One among Multiple Writable Branches). -+. -+.TP -+.B verbose | v -+Print some information. -+Currently, it is only busy file (or inode) at deleting a branch. -+. -+.TP -+.B noverbose | quiet | q | silent -+Disable \[oq]verbose\[cq] option. -+This is default value. -+. -+.TP -+.B sum -+df(1)/statfs(2) returns the total number of blocks and inodes of -+all branches. -+Note that there are cases that systemcalls may return ENOSPC, even if -+df(1)/statfs(2) shows that aufs has some free space/inode. -+. -+.TP -+.B nosum -+Disable \[oq]sum\[cq] option. -+This is default value. -+. -+.TP -+.B dirwh=N -+Watermark to remove a dir actually at rmdir(2) and rename(2). -+ -+If the target dir which is being removed or renamed (destination dir) -+has a huge number of whiteouts, i.e. the dir is empty logically but -+physically, the cost to remove/rename the single -+dir may be very high. -+It is -+required to unlink all of whiteouts internally before issuing -+rmdir/rename to the branch. -+To reduce the cost of single systemcall, -+aufs renames the target dir to a whiteout-ed temporary name and -+invokes a pre-created -+kernel thread to remove whiteout-ed children and the target dir. -+The rmdir/rename systemcall returns just after kicking the thread. -+ -+When the number of whiteout-ed children is less than the value of -+dirwh, aufs remove them in a single systemcall instead of passing -+another thread. -+This value is ignored when the branch is NFS. -+The default value is \*[AUFS_DIRWH_DEF]. -+.\" . -+.\" .TP -+.\" .B rdcache=N -+. -+.TP -+.B rdblk=N -+Specifies a size of internal VDIR block which is allocated at a time in -+byte. -+The VDIR block will be allocated several times when necessary. If your -+directory has millions of files, you may want to expand this size. -+The default value is defined as \*[AUFS_RDBLK_DEF]. -+The size has to be lager than NAME_MAX (usually 255) and kmalloc\-able -+(the maximum limit depends on your system. at least 128KB is available -+for every system). -+(cf. Virtual or Vertical Directory Block). -+. -+.TP -+.B rdhash=N -+Specifies a size of internal VDIR hash table which is used to compare -+the file names under the same named directory on multiple branches. -+The VDIR hash table will be allocated in readdir(3)/getdents(2), -+rmdir(2) and rename(2) for the existing target directory. If your -+directory has millions of files, you may want to expand this size. -+The default value is defined as \*[AUFS_RDHASH_DEF]. -+The size has to be lager than zero, and it will be multiplied by 4 or 8 -+(for 32\-bit and 64\-bit respectively, currently). The result must be -+kmalloc\-able -+(the maximum limit depends on your system. at least 128KB is available -+for every system). -+(cf. Virtual or Vertical Directory Block). -+. -+.TP -+.B plink -+.TQ -+.B noplink -+Specifies to use \[oq]pseudo link\[cq] feature or not. -+The default is \[oq]plink\[cq] which means use this feature. -+(cf. Pseudo Link) -+. -+.TP -+.B clean_plink -+Removes all pseudo-links in memory. -+In order to make pseudo-link permanent, use -+\[oq]auplink\[cq] utility just before one of these operations, -+unmounting aufs, -+using \[oq]ro\[cq] or \[oq]noplink\[cq] mount option, -+deleting a branch from aufs, -+adding a branch into aufs, -+or changing your writable branch as readonly. -+If you installed both of /sbin/mount.aufs and /sbin/umount.aufs, and your -+mount(8) and umount(8) support them, -+\[oq]auplink\[cq] utility will be executed automatically and flush pseudo-links. -+(cf. Pseudo Link) -+. -+.TP -+.B udba=none | reval | inotify -+Specifies the level of UDBA (User\[aq]s Direct Branch Access) test. -+(cf. User\[aq]s Direct Branch Access and Inotify Limitation). -+. -+.TP -+.B diropq=whiteouted | w | always | a -+Specifies whether mkdir(2) and rename(2) dir case make the created directory -+\[oq]opaque\[cq] or not. -+In other words, to create \[oq]\*[AUFS_WH_DIROPQ]\[cq] under the created or renamed -+directory, or not to create. -+When you specify diropq=w or diropq=whiteouted, aufs will not create -+it if the -+directory was not whiteouted or opaqued. If the directory was whiteouted -+or opaqued, the created or renamed directory will be opaque. -+When you specify diropq=a or diropq==always, aufs will always create -+it regardless -+the directory was whiteouted/opaqued or not. -+The default value is diropq=w, it means not to create when it is unnecessary. -+If you define CONFIG_AUFS_COMPAT at aufs compiling time, the default will be -+diropq=a. -+You need to consider this option if you are planning to add a branch later -+since \[oq]diropq\[cq] affects the same named directory on the added branch. -+. -+.TP -+.B warn_perm -+.TQ -+.B nowarn_perm -+Adding a branch, aufs will issue a warning about uid/gid/permission of -+the adding branch directory, -+when they differ from the existing branch\[aq]s. This difference may or -+may not impose a security risk. -+If you are sure that there is no problem and want to stop the warning, -+use \[oq]nowarn_perm\[cq] option. -+The default is \[oq]warn_perm\[cq] (cf. DIAGNOSTICS). -+ -+.\" ---------------------------------------------------------------------- -+.SH Module Parameters -+.TP -+.B nwkq=N -+The number of kernel thread named \*[AUFS_WKQ_NAME]. -+ -+Those threads stay in the system while the aufs module is loaded, -+and handle the special I/O requests from aufs. -+The default value is \*[AUFS_NWKQ_DEF]. -+ -+The special I/O requests from aufs include a part of copy-up, lookup, -+directory handling, pseudo-link, xino file operations and the -+delegated access to branches. -+For example, Unix filesystems allow you to rmdir(2) which has no write -+permission bit, if its parent directory has write permission bit. In aufs, the -+removing directory may or may not have whiteout or \[oq]dir opaque\[cq] mark as its -+child. And aufs needs to unlink(2) them before rmdir(2). -+Therefore aufs delegates the actual unlink(2) and rmdir(2) to another kernel -+thread which has been created already and has a superuser privilege. -+ -+If you enable CONFIG_SYSFS, you can check this value through -+/module/aufs/parameters/nwkq. -+ -+. -+.TP -+.B brs=1 | 0 -+Specifies to use the branch path data file under sysfs or not. -+ -+If the number of your branches is large or their path is long -+and you meet the limitation of mount(8) ro /etc/mtab, you need to -+enable CONFIG_SYSFS and set aufs module parameter brs=1. -+ -+When this parameter is set as 1, aufs does not show \[oq]br:\[cq] (or dirs=) -+mount option through /proc/mounts (and /etc/mtab). So you can -+keep yourself from the page limitation of -+mount(8) or /etc/mtab. -+Aufs shows branch paths through /fs/aufs/si_XXX/brNNN. -+Actually the file under sysfs has also a size limitation, but I don\[aq]t -+think it is harmful. -+ -+There is one more side effect in setting 1 to this parameter. -+If you rename your branch, the branch path written in /etc/mtab will be -+obsoleted and the future remount will meet some error due to the -+unmatched parameters (Remember that mount(8) may take the options from -+/etc/mtab and pass them to the systemcall). -+If you set 1, /etc/mtab will not hold the branch path and you will not -+meet such trouble. On the other hand, the entires for the -+branch path under sysfs are generated dynamically. So it must not be obsoleted. -+But I don\[aq]t think users want to rename branches so often. -+ -+If CONFIG_SYSFS is disable, this paramater is always set to 0. -+. -+.TP -+.B sysrq=key -+Specifies MagicSysRq key for debugging aufs. -+You need to enable both of CONFIG_MAGIC_SYSRQ and CONFIG_AUFS_DEBUG. -+Currently this is for developers only. -+The default is \[oq]a\[cq]. -+. -+.TP -+.B debug= 0 | 1 -+Specifies disable(0) or enable(1) debug print in aufs. -+This parameter can be changed dynamically. -+You need to enable CONFIG_AUFS_DEBUG. -+Currently this is for developers only. -+The default is \[oq]0\[cq] (disable). -+ -+.\" ---------------------------------------------------------------------- -+.SH Entries under Sysfs and Debugfs -+See linux/Documentation/ABI/*/{sys,debug}fs-aufs. -+ -+.\" ---------------------------------------------------------------------- -+.SH Branch Syntax -+.TP -+.B dir_path[ =permission [ + attribute ] ] -+.TQ -+.B permission := rw | ro | rr -+.TQ -+.B attribute := wh | nolwh -+dir_path is a directory path. -+The keyword after \[oq]dir_path=\[cq] is a -+permission flags for that branch. -+Comma, colon and the permission flags string (including \[oq]=\[cq])in the path -+are not allowed. -+ -+Any filesystem can be a branch, But some are not accepted such like -+sysfs, procfs and unionfs. -+If you specify such filesystems as an aufs branch, aufs will return an error -+saying it is unsupported. -+ -+Cramfs in linux stable release has strange inodes and it makes aufs -+confused. For example, -+.nf -+$ mkdir -p w/d1 w/d2 -+$ > w/z1 -+$ > w/z2 -+$ mkcramfs w cramfs -+$ sudo mount -t cramfs -o ro,loop cramfs /mnt -+$ find /mnt -ls -+ 76 1 drwxr-xr-x 1 jro 232 64 Jan 1 1970 /mnt -+ 1 1 drwxr-xr-x 1 jro 232 0 Jan 1 1970 /mnt/d1 -+ 1 1 drwxr-xr-x 1 jro 232 0 Jan 1 1970 /mnt/d2 -+ 1 1 -rw-r--r-- 1 jro 232 0 Jan 1 1970 /mnt/z1 -+ 1 1 -rw-r--r-- 1 jro 232 0 Jan 1 1970 /mnt/z2 -+.fi -+ -+All these two directories and two files have the same inode with one -+as their link count. Aufs cannot handle such inode correctly. -+Currently, aufs involves a tiny workaround for such inodes. But some -+applications may not work correctly since aufs inode number for such -+inode will change silently. -+If you do not have any empty files, empty directories or special files, -+inodes on cramfs will be all fine. -+ -+A branch should not be shared as the writable branch between multiple -+aufs. A readonly branch can be shared. -+ -+The maximum number of branches is configurable at compile time. -+The current value is \*[AUFS_BRANCH_MAX] which depends upon -+configuration. -+ -+When an unknown permission or attribute is given, aufs sets ro to that -+branch silently. -+ -+.SS Permission -+. -+.TP -+.B rw -+Readable and writable branch. Set as default for the first branch. -+If the branch filesystem is mounted as readonly, you cannot set it \[oq]rw.\[cq] -+.\" A filesystem which does not support link(2) and i_op\->setattr(), for -+.\" example FAT, will not be used as the writable branch. -+. -+.TP -+.B ro -+Readonly branch and it has no whiteouts on it. -+Set as default for all branches except the first one. Aufs never issue -+both of write operation and lookup operation for whiteout to this branch. -+. -+.TP -+.B rr -+Real readonly branch, special case of \[oq]ro\[cq], for natively readonly -+branch. Assuming the branch is natively readonly, aufs can optimize -+some internal operation. For example, if you specify \[oq]udba=inotify\[cq] -+option, aufs does not set inotify for the things on rr branch. -+Set by default for a branch whose fs-type is either \[oq]iso9660\[cq], -+\[oq]cramfs\[cq] or \[oq]romfs\[cq]. -+ -+When your branch exists on slower device and you have some -+capacity on your hdd, you may want to try ulobdev tool in ULOOP sample. -+It can cache the contents of the real devices on another faster device, -+so you will be able to get the better access performance. -+The ulobdev tool is for a generic block device, and the ulohttp is for a -+filesystem image on http server. -+If you want to spin down your hdd to save the -+battery life or something, then you may want to use ulobdev to save the -+access to the hdd, too. -+See $AufsCVS/sample/uloop in detail. -+ -+.SS Attribute -+. -+.TP -+.B wh -+Readonly branch and it has/might have whiteouts on it. -+Aufs never issue write operation to this branch, but lookup for whiteout. -+Use this as \[oq]=ro+wh\[cq]. -+. -+.TP -+.B nolwh -+Usually, aufs creates a whiteout as a hardlink on a writable -+branch. This attributes prohibits aufs to create the hardlinked -+whiteout, including the source file of all hardlinked whiteout -+(\*[AUFS_WH_BASE].) -+If you do not like a hardlink, or your writable branch does not support -+link(2), then use this attribute. -+But I am afraid a filesystem which does not support link(2) natively -+will fail in other place such as copy-up. -+Use this as \[oq]=rw+nolwh\[cq]. -+Also you may want to try \[oq]noplink\[cq] mount option, while it is not recommended. -+ -+.\" .SS FUSE as a branch -+.\" A FUSE branch needs special attention. -+.\" The struct fuse_operations has a statfs operation. It is OK, but the -+.\" parameter is struct statvfs* instead of struct statfs*. So almost -+.\" all user\-space implementaion will call statvfs(3)/fstatvfs(3) instead of -+.\" statfs(2)/fstatfs(2). -+.\" In glibc, [f]statvfs(3) issues [f]statfs(2), open(2)/read(2) for -+.\" /proc/mounts, -+.\" and stat(2) for the mountpoint. With this situation, a FUSE branch will -+.\" cause a deadlock in creating something in aufs. Here is a sample -+.\" scenario, -+.\" .\" .RS -+.\" .\" .IN -10 -+.\" .Bu -+.\" create/modify a file just under the aufs root dir. -+.\" .Bu -+.\" aufs aquires a write\-lock for the parent directory, ie. the root dir. -+.\" .Bu -+.\" A library function or fuse internal may call statfs for a fuse branch. -+.\" The create=mfs mode in aufs will surely call statfs for each writable -+.\" branches. -+.\" .Bu -+.\" FUSE in kernel\-space converts and redirects the statfs request to the -+.\" user\-space. -+.\" .Bu -+.\" the user\-space statfs handler will call [f]statvfs(3). -+.\" .Bu -+.\" the [f]statvfs(3) in glibc will access /proc/mounts and issue -+.\" stat(2) for the mountpoint. But those require a read\-lock for the aufs -+.\" root directory. -+.\" .Bu -+.\" Then a deadlock occurs. -+.\" .\" .RE 1 -+.\" .\" .IN -+.\" -+.\" In order to avoid this deadlock, I would suggest not to call -+.\" [f]statvfs(3) from fuse. Here is a sample code to do this. -+.\" .nf -+.\" struct statvfs stvfs; -+.\" -+.\" main() -+.\" { -+.\" statvfs(..., &stvfs) -+.\" or -+.\" fstatvfs(..., &stvfs) -+.\" stvfs.f_fsid = 0 -+.\" } -+.\" -+.\" statfs_handler(const char *path, struct statvfs *arg) -+.\" { -+.\" struct statfs stfs -+.\" -+.\" memcpy(arg, &stvfs, sizeof(stvfs)) -+.\" -+.\" statfs(..., &stfs) -+.\" or -+.\" fstatfs(..., &stfs) -+.\" -+.\" arg->f_bfree = stfs.f_bfree -+.\" arg->f_bavail = stfs.f_bavail -+.\" arg->f_ffree = stfs.f_ffree -+.\" arg->f_favail = /* any value */ -+.\" } -+.\" .fi -+ -+.\" ---------------------------------------------------------------------- -+.SH External Inode Number Bitmap, Translation Table and Generation Table (xino) -+Aufs uses one external bitmap file and one external inode number -+translation table files per an aufs and per a branch -+filesystem by default. -+Additionally when CONFIG_AUFS_EXPORT is enabled, one external inode -+generation table is added. -+The bitmap (and the generation table) is for recycling aufs inode number -+and the others -+are a table for converting an inode number on a branch to -+an aufs inode number. The default path -+is \[oq]first writable branch\[cq]/\*[AUFS_XINO_FNAME]. -+If there is no writable branch, the -+default path -+will be \*[AUFS_XINO_DEFPATH]. -+.\" A user who executes mount(8) needs the privilege to create xino -+.\" file. -+ -+If you enable CONFIG_SYSFS, the path of xino files are not shown in -+/proc/mounts (and /etc/mtab), instead it is shown in -+/fs/aufs/si_/xi_path. -+Otherwise, it is shown in /proc/mounts unless it is not the default -+path. -+ -+Those files are always opened and read/write by aufs frequently. -+If your writable branch is on flash memory device, it is recommended -+to put xino files on other than flash memory by specifying \[oq]xino=\[cq] -+mount option. -+ -+The -+maximum file size of the bitmap is, basically, the amount of the -+number of all the files on all branches divided by 8 (the number of -+bits in a byte). -+For example, on a 4KB page size system, if you have 32,768 (or -+2,599,968) files in aufs world, -+then the maximum file size of the bitmap is 4KB (or 320KB). -+ -+The -+maximum file size of the table will -+be \[oq]max inode number on the branch x size of an inode number\[cq]. -+For example in 32bit environment, -+ -+.nf -+$ df -i /branch_fs -+/dev/hda14 2599968 203127 2396841 8% /branch_fs -+.fi -+ -+and /branch_fs is an branch of the aufs. When the inode number is -+assigned contiguously (without \[oq]hole\[cq]), the maximum xino file size for -+/branch_fs will be 2,599,968 x 4 bytes = about 10 MB. But it might not be -+allocated all of disk blocks. -+When the inode number is assigned discontinuously, the maximum size of -+xino file will be the largest inode number on a branch x 4 bytes. -+Additionally, the file size is limited to LLONG_MAX or the s_maxbytes -+in filesystem\[aq]s superblock (s_maxbytes may be smaller than -+LLONG_MAX). So the -+support-able largest inode number on a branch is less than -+2305843009213693950 (LLONG_MAX/4\-1). -+This is the current limitation of aufs. -+On 64bit environment, this limitation becomes more strict and the -+supported largest inode number is less than LLONG_MAX/8\-1. -+ -+The xino files are always hidden, i.e. removed. So you cannot -+do \[oq]ls \-l xino_file\[cq]. -+If you enable CONFIG_DEBUG_FS, you can check these information through -+/aufs//{xib,xi[0-9]*,xigen}. xib is for the bitmap file, -+xi0 ix for the first branch, and xi1 is for the next. xigen is for the -+generation table. -+xib and xigen are in the format of, -+ -+.nf -+x -+.fi -+ -+Note that a filesystem usually has a -+feature called pre-allocation, which means a number of -+blocks are allocated automatically, and then deallocated -+silently when the filesystem thinks they are unnecessary. -+You do not have to be surprised the sudden changes of the number of -+blocks, when your filesystem which xino files are placed supports the -+pre-allocation feature. -+ -+The rests are hidden xino file information in the format of, -+ -+.nf -+, x -+.fi -+ -+If the file count is larger than 1, it means some of your branches are -+on the same filesystem and the xino file is shared by them. -+Note that the file size may not be equal to the actual consuming blocks -+since xino file is a sparse file, i.e. a hole in a file which does not -+consume any disk blocks. -+ -+Once you unmount aufs, the xino files for that aufs are totally gone. -+It means that the inode number is not permanent across umount or -+shutdown. -+ -+The xino files should be created on the filesystem except NFS. -+If your first writable branch is NFS, you will need to specify xino -+file path other than NFS. -+Also if you are going to remove the branch where xino files exist or -+change the branch permission to readonly, you need to use xino option -+before del/mod the branch. -+ -+The bitmap file can be truncated. -+For example, if you delete a branch which has huge number of files, -+many inode numbers will be recycled and the bitmap will be truncated -+to smaller size. Aufs does this automatically when a branch is -+deleted. -+You can truncate it anytime you like if you specify \[oq]trunc_xib\[cq] mount -+option. But when the accessed inode number was not deleted, nothing -+will be truncated. -+If you do not want to truncate it (it may be slow) when you delete a -+branch, specify \[oq]notrunc_xib\[cq] after \[oq]del\[cq] mount option. -+ -+If you do not want to use xino, use noxino mount option. Use this -+option with care, since the inode number may be changed silently and -+unexpectedly anytime. -+For example, -+rmdir failure, recursive chmod/chown/etc to a large and deep directory -+or anything else. -+And some applications will not work correctly. -+.\" When the inode number has been changed, your system -+.\" can be crazy. -+If you want to change the xino default path, use xino mount option. -+ -+After you add branches, the persistence of inode number may not be -+guaranteed. -+At remount time, cached but unused inodes are discarded. -+And the newly appeared inode may have different inode number at the -+next access time. The inodes in use have the persistent inode number. -+ -+When aufs assigned an inode number to a file, and if you create the -+same named file on the upper branch directly, then the next time you -+access the file, aufs may assign another inode number to the file even -+if you use xino option. -+Some applications may treat the file whose inode number has been -+changed as totally different file. -+ -+.\" ---------------------------------------------------------------------- -+.SH Pseudo Link (hardlink over branches) -+Aufs supports \[oq]pseudo link\[cq] which is a logical hard-link over -+branches (cf. ln(1) and link(2)). -+In other words, a copied-up file by link(2) and a copied-up file which was -+hard-linked on a readonly branch filesystem. -+ -+When you have files named fileA and fileB which are -+hardlinked on a readonly branch, if you write something into fileA, -+aufs copies-up fileA to a writable branch, and write(2) the originally -+requested thing to the copied-up fileA. On the writable branch, -+fileA is not hardlinked. -+But aufs remembers it was hardlinked, and handles fileB as if it existed -+on the writable branch, by referencing fileA\[aq]s inode on the writable -+branch as fileB\[aq]s inode. -+ -+Once you unmount aufs, the plink info for that aufs kept in memory are totally -+gone. -+It means that the pseudo-link is not permanent. -+If you want to make plink permanent, try \[oq]auplink\[cq] utility just before -+one of these operations, -+unmounting your aufs, -+using \[oq]ro\[cq] or \[oq]noplink\[cq] mount option, -+deleting a branch from aufs, -+adding a branch into aufs, -+or changing your writable branch to readonly. -+ -+This utility will reproduces all real hardlinks on a writable branch by linking -+them, and removes pseudo-link info in memory and temporary link on the -+writable branch. -+Since this utility access your branches directly, you cannot hide them by -+\[oq]mount \-\-bind /tmp /branch\[cq] or something. -+ -+If you are willing to rebuild your aufs with the same branches later, you -+should use auplink utility before you umount your aufs. -+If you installed both of /sbin/mount.aufs and /sbin/umount.aufs, and your -+mount(8) and umount(8) support them, -+\[oq]auplink\[cq] utility will be executed automatically and flush pseudo-links. -+ -+.nf -+# auplink /your/aufs/root flush -+# umount /your/aufs/root -+or -+# auplink /your/aufs/root flush -+# mount -o remount,mod:/your/writable/branch=ro /your/aufs/root -+or -+# auplink /your/aufs/root flush -+# mount -o remount,noplink /your/aufs/root -+or -+# auplink /your/aufs/root flush -+# mount -o remount,del:/your/aufs/branch /your/aufs/root -+or -+# auplink /your/aufs/root flush -+# mount -o remount,append:/your/aufs/branch /your/aufs/root -+.fi -+ -+The plinks are kept both in memory and on disk. When they consumes too much -+resources on your system, you can use the \[oq]auplink\[cq] utility at anytime and -+throw away the unnecessary pseudo-links in safe. -+ -+Additionally, the \[oq]auplink\[cq] utility is very useful for some security reasons. -+For example, when you have a directory whose permission flags -+are 0700, and a file who is 0644 under the 0700 directory. Usually, -+all files under the 0700 directory are private and no one else can see -+the file. But when the directory is 0711 and someone else knows the 0644 -+filename, he can read the file. -+ -+Basically, aufs pseudo-link feature creates a temporary link under the -+directory whose owner is root and the permission flags are 0700. -+But when the writable branch is NFS, aufs sets 0711 to the directory. -+When the 0644 file is pseudo-linked, the temporary link, of course the -+contents of the file is totally equivalent, will be created under the -+0711 directory. The filename will be generated by its inode number. -+While it is hard to know the generated filename, someone else may try peeping -+the temporary pseudo-linked file by his software tool which may try the name -+from one to MAX_INT or something. -+In this case, the 0644 file will be read unexpectedly. -+I am afraid that leaving the temporary pseudo-links can be a security hole. -+It makes sense to execute \[oq]auplink /your/aufs/root flush\[cq] -+periodically, when your writable branch is NFS. -+ -+When your writable branch is not NFS, or all users are careful enough to set 0600 -+to their private files, you do not have to worry about this issue. -+ -+If you do not want this feature, use \[oq]noplink\[cq] mount option. -+ -+.SS The behaviours of plink and noplink -+This sample shows that the \[oq]f_src_linked2\[cq] with \[oq]noplink\[cq] option cannot follow -+the link. -+ -+.nf -+none on /dev/shm/u type aufs (rw,xino=/dev/shm/rw/.aufs.xino,br:/dev/shm/rw=rw:/dev/shm/ro=ro) -+$ ls -li ../r?/f_src_linked* ./f_src_linked* ./copied -+ls: ./copied: No such file or directory -+15 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked -+15 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked2 -+22 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ./f_src_linked -+22 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ./f_src_linked2 -+$ echo abc >> f_src_linked -+$ cp f_src_linked copied -+$ ls -li ../r?/f_src_linked* ./f_src_linked* ./copied -+15 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked -+15 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked2 -+36 -rw-r--r-- 2 jro jro 6 Dec 22 11:03 ../rw/f_src_linked -+53 -rw-r--r-- 1 jro jro 6 Dec 22 11:03 ./copied -+22 -rw-r--r-- 2 jro jro 6 Dec 22 11:03 ./f_src_linked -+22 -rw-r--r-- 2 jro jro 6 Dec 22 11:03 ./f_src_linked2 -+$ cmp copied f_src_linked2 -+$ -+ -+none on /dev/shm/u type aufs (rw,xino=/dev/shm/rw/.aufs.xino,noplink,br:/dev/shm/rw=rw:/dev/shm/ro=ro) -+$ ls -li ../r?/f_src_linked* ./f_src_linked* ./copied -+ls: ./copied: No such file or directory -+17 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked -+17 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked2 -+23 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ./f_src_linked -+23 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ./f_src_linked2 -+$ echo abc >> f_src_linked -+$ cp f_src_linked copied -+$ ls -li ../r?/f_src_linked* ./f_src_linked* ./copied -+17 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked -+17 -rw-r--r-- 2 jro jro 2 Dec 22 11:03 ../ro/f_src_linked2 -+36 -rw-r--r-- 1 jro jro 6 Dec 22 11:03 ../rw/f_src_linked -+53 -rw-r--r-- 1 jro jro 6 Dec 22 11:03 ./copied -+23 -rw-r--r-- 2 jro jro 6 Dec 22 11:03 ./f_src_linked -+23 -rw-r--r-- 2 jro jro 6 Dec 22 11:03 ./f_src_linked2 -+$ cmp copied f_src_linked2 -+cmp: EOF on f_src_linked2 -+$ -+.fi -+ -+.\" -+.\" If you add/del a branch, or link/unlink the pseudo-linked -+.\" file on a branch -+.\" directly, aufs cannot keep the correct link count, but the status of -+.\" \[oq]pseudo-linked.\[cq] -+.\" Those files may or may not keep the file data after you unlink the -+.\" file on the branch directly, especially the case of your branch is -+.\" NFS. -+ -+If you add a branch which has fileA or fileB, aufs does not follow the -+pseudo link. The file on the added branch has no relation to the same -+named file(s) on the lower branch(es). -+If you use noxino mount option, pseudo link will not work after the -+kernel shrinks the inode cache. -+ -+This feature will not work for squashfs before version 3.2 since its -+inode is tricky. -+When the inode is hardlinked, squashfs inodes has the same inode -+number and correct link count, but the inode memory object is -+different. Squashfs inodes (before v3.2) are generated for each, even -+they are hardlinked. -+ -+.\" ---------------------------------------------------------------------- -+.SH User\[aq]s Direct Branch Access (UDBA) -+UDBA means a modification to a branch filesystem manually or directly, -+e.g. bypassing aufs. -+While aufs is designed and implemented to be safe after UDBA, -+it can make yourself and your aufs confused. And some information like -+aufs inode will be incorrect. -+For example, if you rename a file on a branch directly, the file on -+aufs may -+or may not be accessible through both of old and new name. -+Because aufs caches various information about the files on -+branches. And the cache still remains after UDBA. -+ -+Aufs has a mount option named \[oq]udba\[cq] which specifies the test level at -+access time whether UDBA was happened or not. -+. -+.TP -+.B udba=none -+Aufs trusts the dentry and the inode cache on the system, and never -+test about UDBA. With this option, aufs runs fastest, but it may show -+you incorrect data. -+Additionally, if you often modify a branch -+directly, aufs will not be able to trace the changes of inodes on the -+branch. It can be a cause of wrong behaviour, deadlock or anything else. -+ -+It is recommended to use this option only when you are sure that -+nobody access a file on a branch. -+It might be difficult for you to achieve real \[oq]no UDBA\[cq] world when you -+cannot stop your users doing \[oq]find / \-ls\[cq] or something. -+If you really want to forbid all of your users to UDBA, here is a trick -+for it. -+With this trick, users cannot see the -+branches directly and aufs runs with no problem, except \[oq]auplink\[cq] utility. -+But if you are not familiar with aufs, this trick may make -+yourself confused. -+ -+.nf -+# d=/tmp/.aufs.hide -+# mkdir $d -+# for i in $branches_you_want_to_hide -+> do -+> mount -n --bind $d $i -+> done -+.fi -+ -+When you unmount the aufs, delete/modify the branch by remount, or you -+want to show the hidden branches again, unmount the bound -+/tmp/.aufs.hide. -+ -+.nf -+# umount -n $branches_you_want_to_unbound -+.fi -+ -+If you use FUSE filesystem as an aufs branch which supports hardlink, -+you should not set this option, since FUSE makes inode objects for -+each hardlinks (at least in linux\-2.6.23). When your FUSE filesystem -+maintains them at link/unlinking, it is equivalent -+to \[oq]direct branch access\[cq] for aufs. -+ -+. -+.TP -+.B udba=reval -+Aufs tests only the existence of the file which existed. If -+the existed file was removed on the branch directly, aufs -+discard the cache about the file and -+re-lookup it. So the data will be updated. -+This test is at minimum level to keep the performance and ensure the -+existence of a file. -+This is default and aufs runs still fast. -+ -+This rule leads to some unexpected situation, but I hope it is -+harmless. Those are totally depends upon cache. Here are just a few -+examples. -+. -+.RS -+.Bu -+If the file is cached as negative or -+not-existed, aufs does not test it. And the file is still handled as -+negative after a user created the file on a branch directly. If the -+file is not cached, aufs will lookup normally and find the file. -+. -+.Bu -+When the file is cached as positive or existed, and a user created the -+same named file directly on the upper branch. Aufs detects the cached -+inode of the file is still existing and will show you the old (cached) -+file which is on the lower branch. -+. -+.Bu -+When the file is cached as positive or existed, and a user renamed the -+file by rename(2) directly. Aufs detects the inode of the file is -+still existing. You may or may not see both of the old and new files. -+Todo: If aufs also tests the name, we can detect this case. -+.RE -+ -+If your outer modification (UDBA) is rare and you can ignore the -+temporary and minor differences between virtual aufs world and real -+branch filesystem, then try this mount option. -+. -+.TP -+.B udba=inotify -+Aufs sets \[oq]inotify\[cq] to all the accessed directories on its branches -+and receives the event about the dir and its children. It consumes -+resources, cpu and memory. And I am afraid that the performance will be -+hurt, but it is most strict test level. -+There are some limitations of linux inotify, see also Inotify -+Limitation. -+So it is recommended to leave udba default option usually, and set it -+to inotify by remount when you need it. -+ -+When a user accesses the file which was notified UDBA before, the cached data -+about the file will be discarded and aufs re-lookup it. So the data will -+be updated. -+When an error condition occurs between UDBA and aufs operation, aufs -+will return an error, including EIO. -+To use this option, you need to enable CONFIG_INOTIFY and -+CONFIG_AUFS_UDBA_INOTIFY. -+ -+To rename/rmdir a directory on a branch directory may reveal the same named -+directory on the lower branch. Aufs tries re-lookuping the renamed -+directory and the revealed directory and assigning different inode -+number to them. But the inode number including their children can be a -+problem. The inode numbers will be changed silently, and -+aufs may produce a warning. If you rename a directory repeatedly and -+reveal/hide the lower directory, then aufs may confuse their inode -+numbers too. It depends upon the system cache. -+ -+When you make a directory in aufs and mount other filesystem on it, -+the directory in aufs cannot be removed expectedly because it is a -+mount point. But the same named directory on the writable branch can -+be removed, if someone wants. It is just an empty directory, instead -+of a mount point. -+Aufs cannot stop such direct rmdir, but produces a warning about it. -+ -+If the pseudo-linked file is hardlinked or unlinked on the branch -+directly, its inode link count in aufs may be incorrect. It is -+recommended to flush the psuedo-links by auplink script. -+ -+.\" ---------------------------------------------------------------------- -+.SH Linux Inotify Limitation -+Unfortunately, current inotify (linux\-2.6.18) has some limitations, -+and aufs must derive it. -+ -+.SS IN_ATTRIB, updating atime -+When a file/dir on a branch is accessed directly, the inode atime (access -+time, cf. stat(2)) may or may not be updated. In some cases, inotify -+does not fire this event. So the aufs inode atime may remain old. -+ -+.SS IN_ATTRIB, updating nlink -+When the link count of a file on a branch is incremented by link(2) -+directly, -+inotify fires IN_CREATE to the parent -+directory, but IN_ATTRIB to the file. So the aufs inode nlink may -+remain old. -+ -+.SS IN_DELETE, removing file on NFS -+When a file on a NFS branch is deleted directly, inotify may or may -+not fire -+IN_DELETE event. It depends upon the status of dentry -+(DCACHE_NFSFS_RENAMED flag). -+In this case, the file on aufs seems still exists. Aufs and any user can see -+the file. -+ -+.SS IN_IGNORED, deleted rename target -+When a file/dir on a branch is unlinked by rename(2) directly, inotify -+fires IN_IGNORED which means the inode is deleted. Actually, in some -+cases, the inode survives. For example, the rename target is linked or -+opened. In this case, inotify watch set by aufs is removed by VFS and -+inotify. -+And aufs cannot receive the events anymore. So aufs may show you -+incorrect data about the file/dir. -+ -+.\" ---------------------------------------------------------------------- -+.SH Virtual or Vertical Directory Block (VDIR) -+In order to provide the merged view of file listing, aufs builds -+internal direcotry block on memory. For readdir, aufs performs readdir() -+internally for each dir on branches, merges their entries with -+eliminating the whiteout\-ed ones, and sets it to the opened file (dir) -+object. So the file object has its entry list until it is closed. The -+entry list will be updated when the file position is zero (by -+rewinddir(3)) and becomes obsoleted. -+ -+Some people may call it can be a security hole or invite DoS attack -+since the opened and once readdir\-ed dir (file object) holds its entry -+list and becomes a pressure for system memory. But I would say it is similar -+to files under /proc or /sys. The virtual files in them also holds a -+memory page (generally) while they are opened. When an idea to reduce -+memory for them is introduced, it will be applied to aufs too. -+ -+The dynamically allocated memory block for the name of entries has a -+unit of \*[AUFS_RDBLK_DEF] bytes by default. -+During building dir blocks, aufs creates hash list (hashed and divied by -+\*[AUFS_RDHASH_DEF] by default) and judging whether -+the entry is whiteouted by its upper branch or already listed. -+ -+These values are suitable for normal environments. But you may have -+millions of files or very long filenames under a single direcotry. For -+such cases, you may need to customize these values by spicifying rdblk= -+and rdhash= aufs mount options. -+ -+For instance, there are 97 files under my /bin, and the total name -+length is 597 bytes. -+ -+.nf -+$ \\ls -1 /bin | wc -+ 97 97 597 -+.fi -+ -+Strictly speaking, 97 end\-of\-line codes are -+included. But it is OK since aufs VDIR also stores the name length in 1 -+byte. In this case, you do not need to customize the default values. 597 bytes -+filenames will be stored in 2 VDIR memory blocks (597 < -+\*[AUFS_RDBLK_DEF] x 2). -+And 97 filenames are distributed among \*[AUFS_RDHASH_DEF] lists, so one -+list will point 4 names in average. To judge the names is whiteouted or -+not, the number of comparision will be 4. 2 memory allocations -+and 4 comparison costs low (even if the directory is opened for a long -+time). So you do not need to customize. -+ -+If your directory has millions of files, the you will need to specify -+rdblk= and rdhash=. -+ -+.nf -+$ ls -U /mnt/rotating-rust | wc -l -+1382438 -+.fi -+ -+In this case, assuming the average length of filenames is 6, in order to -+get better time performance I would -+recommend to set $((128*1024)) or $((64*1024)) for rdblk, and -+$((8*1024)) or $((4*1024)) for rdhash. -+You can change these values of the active aufs mount by "mount -o -+remount". -+ -+This customization is not for -+reducing the memory space, but for reducing time for the number of memory -+allocation and the name comparison. The larger value is faster, in -+general. Of course, you will need system memory. This is a generic -+"time\-vs\-space" problem. -+ -+.\" ---------------------------------------------------------------------- -+.SH Copy On Write, or aufs internal copyup and copydown -+Every stackable filesystem which implements copy\-on\-write supports the -+copyup feature. The feature is to copy a file/dir from the lower branch -+to the upper internally. When you have one readonly branch and one -+upper writable branch, and you append a string to a file which exists on -+the readonly branch, then aufs will copy the file from the readonly -+branch to the writable branch with its directory hierarchy. It means one -+write(2) involves several logical/internal mkdir(2), creat(2), read(2), -+write(2) and close(2) systemcalls -+before the actual expected write(2) is performed. Sometimes it may take -+a long time, particulary when the file is very large. -+If CONFIG_AUFS_DEBUG is enabled, aufs produces a message saying \[oq]copying -+a large file.\[cq] -+ -+You may see the message when you change the xino file path or -+truncate the xino/xib files. Sometimes those files can be large and may -+take a long time to handle them. -+ -+.\" ---------------------------------------------------------------------- -+.SH Policies to Select One among Multiple Writable Branches -+Aufs has some policies to select one among multiple writable branches -+when you are going to write/modify something. There are two kinds of -+policies, one is for newly create something and the other is for -+internal copy-up. -+You can select them by specifying mount option \[oq]create=CREATE_POLICY\[cq] -+or \[oq]cpup=COPYUP_POLICY.\[cq] -+These policies have no meaning when you have only one writable -+branch. If there is some meaning, it must hurt the performance. -+ -+.SS Exceptions for Policies -+In every cases below, even if the policy says that the branch where a -+new file should be created is /rw2, the file will be created on /rw1. -+. -+.Bu -+If there is a readonly branch with \[oq]wh\[cq] attribute above the -+policy-selected branch and the parent dir is marked as opaque, -+or the target (creating) file is whiteouted on the ro+wh branch, then -+the policy will be ignored and the target file will be created on the -+nearest upper writable branch than the ro+wh branch. -+.RS -+.nf -+/aufs = /rw1 + /ro+wh/diropq + /rw2 -+/aufs = /rw1 + /ro+wh/wh.tgt + /rw2 -+.fi -+.RE -+. -+.Bu -+If there is a writable branch above the policy-selected branch and the -+parent dir is marked as opaque or the target file is whiteouted on the -+branch, then the policy will be ignored and the target file will be -+created on the highest one among the upper writable branches who has -+diropq or whiteout. In case of whiteout, aufs removes it as usual. -+.RS -+.nf -+/aufs = /rw1/diropq + /rw2 -+/aufs = /rw1/wh.tgt + /rw2 -+.fi -+.RE -+. -+.Bu -+link(2) and rename(2) systemcalls are exceptions in every policy. -+They try selecting the branch where the source exists as possible since -+copyup a large file will take long time. If it can\[aq]t be, ie. the -+branch where the source exists is readonly, then they will follow the -+copyup policy. -+. -+.Bu -+There is an exception for rename(2) when the target exists. -+If the rename target exists, aufs compares the index of the branches -+where the source and the target are existing and selects the higher -+one. If the selected branch is readonly, then aufs follows the copyup -+policy. -+ -+.SS Policies for Creating -+. -+.TP -+.B create=tdp | top\-down\-parent -+Selects the highest writable branch where the parent dir exists. If -+the parent dir does not exist on a writable branch, then the internal -+copyup will happen. The policy for this copyup is always \[oq]bottom-up.\[cq] -+This is the default policy. -+. -+.TP -+.B create=rr | round\-robin -+Selects a writable branch in round robin. When you have two writable -+branches and creates 10 new files, 5 files will be created for each -+branch. -+mkdir(2) systemcall is an exception. When you create 10 new directories, -+all are created on the same branch. -+. -+.TP -+.B create=mfs[:second] | most\-free\-space[:second] -+Selects a writable branch which has most free space. In order to keep -+the performance, you can specify the duration (\[oq]second\[cq]) which makes -+aufs hold the index of last selected writable branch until the -+specified seconds expires. The first time you create something in aufs -+after the specified seconds expired, aufs checks the amount of free -+space of all writable branches by internal statfs call -+and the held branch index will be updated. -+The default value is \*[AUFS_MFS_SECOND_DEF] seconds. -+. -+.TP -+.B create=mfsrr:low[:second] -+Selects a writable branch in most-free-space mode first, and then -+round-robin mode. If the selected branch has less free space than the -+specified value \[oq]low\[cq] in bytes, then aufs re-tries in round-robin mode. -+.\" \[oq]G\[cq], \[oq]M\[cq] and \[oq]K\[cq] (case insensitive) can be followed after \[oq]low.\[cq] Or -+Try an arithmetic expansion of shell which is defined by POSIX. -+For example, $((10 * 1024 * 1024)) for 10M. -+You can also specify the duration (\[oq]second\[cq]) which is equivalent to -+the \[oq]mfs\[cq] mode. -+. -+.TP -+.B create=pmfs[:second] -+Selects a writable branch where the parent dir exists, such as tdp -+mode. When the parent dir exists on multiple writable branches, aufs -+selects the one which has most free space, such as mfs mode. -+ -+.SS Policies for Copy-Up -+. -+.TP -+.B cpup=tdp | top\-down\-parent -+Equivalent to the same named policy for create. -+This is the default policy. -+. -+.TP -+.B cpup=bup | bottom\-up\-parent -+Selects the writable branch where the parent dir exists and the branch -+is nearest upper one from the copyup-source. -+. -+.TP -+.B cpup=bu | bottom\-up -+Selects the nearest upper writable branch from the copyup-source, -+regardless the existence of the parent dir. -+ -+.\" ---------------------------------------------------------------------- -+.SH Exporting Aufs via NFS -+Aufs is supporting NFS-exporting. -+Since aufs has no actual block device, you need to add NFS \[oq]fsid\[cq] option at -+exporting. Refer to the manual of NFS about the detail of this option. -+ -+There are some limitations or requirements. -+.RS -+.Bu -+The branch filesystem must support NFS-exporting. -+.Bu -+NFSv2 is not supported. When you mount the exported aufs from your NFS -+client, you will need to some NFS options like v3 or nfsvers=3, -+especially if it is nfsroot. -+.Bu -+If the size of the NFS file handle on your branch filesystem is large, -+aufs will -+not be able to handle it. The maximum size of NFSv3 file -+handle for a filesystem is 64 bytes. Aufs uses 24 bytes for 32bit -+system, plus 12 bytes for 64bit system. The rest is a room for a file -+handle of a branch filesystem. -+.Bu -+The External Inode Number Bitmap, Translation Table and Generation Table -+(xino) is -+required since NFS file -+handle is based upon inode number. The mount option \[oq]xino\[cq] is enabled -+by default. -+The external inode generation table and its debugfs entry -+(/aufs/si_*/xigen) is created when CONFIG_AUFS_EXPORT is -+enabled even if you don\[aq]t export aufs actually. -+The size of the external inode generation table grows only, never be -+truncated. You might need to pay attention to the free space of the -+filesystem where xino files are placed. By default, it is the first -+writable branch. -+.Bu -+The branch filesystems must be accessible, which means \[oq]not hidden.\[cq] -+It means you need to \[oq]mount \-\-move\[cq] when you use initramfs and -+switch_root(8), or chroot(8). -+.RE -+ -+.\" ---------------------------------------------------------------------- -+.SH Dentry and Inode Caches -+If you want to clear caches on your system, there are several tricks -+for that. If your system ram is low, -+try \[oq]find /large/dir \-ls > /dev/null\[cq]. -+It will read many inodes and dentries and cache them. Then old caches will be -+discarded. -+But when you have large ram or you do not have such large -+directory, it is not effective. -+ -+If you want to discard cache within a certain filesystem, -+try \[oq]mount \-o remount /your/mntpnt\[cq]. Some filesystem may return an error of -+EINVAL or something, but VFS discards the unused dentry/inode caches on the -+specified filesystem. -+ -+.\" ---------------------------------------------------------------------- -+.SH Compatible/Incompatible with Unionfs Version 1.x Series -+If you compile aufs with \-DCONFIG_AUFS_COMPAT, dirs= option and =nfsro -+branch permission flag are available. They are interpreted as -+br: option and =ro flags respectively. -+ \[oq]debug\[cq], \[oq]delete\[cq], \[oq]imap\[cq] options are ignored silently. When you -+compile aufs without \-DCONFIG_AUFS_COMPAT, these three options are -+also ignored, but a warning message is issued. -+ -+Ignoring \[oq]delete\[cq] option, and to keep filesystem consistency, aufs tries -+writing something to only one branch in a single systemcall. It means -+aufs may copyup even if the copyup-src branch is specified as writable. -+For example, you have two writable branches and a large regular file -+on the lower writable branch. When you issue rename(2) to the file on aufs, -+aufs may copyup it to the upper writable branch. -+If this behaviour is not what you want, then you should rename(2) it -+on the lower branch directly. -+ -+And there is a simple shell -+script \[oq]unionctl\[cq] under sample subdirectory, which is compatible with -+unionctl(8) in -+Unionfs Version 1.x series, except \-\-query action. -+This script executes mount(8) with \[oq]remount\[cq] option and uses -+add/del/mod aufs mount options. -+If you are familiar with Unionfs Version 1.x series and want to use unionctl(8), you can -+try this script instead of using mount \-o remount,... directly. -+Aufs does not support ioctl(2) interface. -+This script is highly depending upon mount(8) in -+util\-linux\-2.12p package, and you need to mount /proc to use this script. -+If your mount(8) version differs, you can try modifying this -+script. It is very easy. -+The unionctl script is just for a sample usage of aufs remount -+interface. -+ -+Aufs uses the external inode number bitmap and translation table by -+default. -+ -+The default branch permission for the first branch is \[oq]rw\[cq], and the -+rest is \[oq]ro.\[cq] -+ -+The whiteout is for hiding files on lower branches. Also it is applied -+to stop readdir going lower branches. -+The latter case is called \[oq]opaque directory.\[cq] Any -+whiteout is an empty file, it means whiteout is just an mark. -+In the case of hiding lower files, the name of whiteout is -+\[oq]\*[AUFS_WH_PFX].\[cq] -+And in the case of stopping readdir, the name is -+\[oq]\*[AUFS_WH_PFX]\*[AUFS_WH_PFX].opq\[cq] or -+\[oq]\*[AUFS_WH_PFX]__dir_opaque.\[cq] The name depends upon your compile -+configuration -+CONFIG_AUFS_COMPAT. -+.\" All of newly created or renamed directory will be opaque. -+All whiteouts are hardlinked, -+including \[oq]/\*[AUFS_WH_BASE].\[cq] -+ -+The hardlink on an ordinary (disk based) filesystem does not -+consume inode resource newly. But in linux tmpfs, the number of free -+inodes will be decremented by link(2). It is recommended to specify -+nr_inodes option to your tmpfs if you meet ENOSPC. Use this option -+after checking by \[oq]df \-i.\[cq] -+ -+When you rmdir or rename-to the dir who has a number of whiteouts, -+aufs rename the dir to the temporary whiteouted-name like -+\[oq]\*[AUFS_WH_PFX]..\[cq] Then remove it after actual operation. -+cf. mount option \[oq]dirwh.\[cq] -+ -+.\" ---------------------------------------------------------------------- -+.SH Incompatible with an Ordinary Filesystem -+stat(2) returns the inode info from the first existence inode among -+the branches, except the directory link count. -+Aufs computes the directory link count larger than the exact value usually, in -+order to keep UNIX filesystem semantics, or in order to shut find(1) mouth up. -+The size of a directory may be wrong too, but it has to do no harm. -+The timestamp of a directory will not be updated when a file is -+created or removed under it, and it was done on a lower branch. -+ -+The test for permission bits has two cases. One is for a directory, -+and the other is for a non-directory. In the case of a directory, aufs -+checks the permission bits of all existing directories. It means you -+need the correct privilege for the directories including the lower -+branches. -+The test for a non-directory is more simple. It checks only the -+topmost inode. -+ -+statfs(2) returns the information of the first branch info except -+namelen when \[oq]nosum\[cq] is specified (the default). The namelen is -+decreased by the whiteout prefix length. And the block size may differ -+from st_blksize which is obtained by stat(2). -+ -+Remember, seekdir(3) and telldir(3) are not defined in POSIX. They may -+not work as you expect. Try rewinddir(3) or re-open the dir. -+ -+The whiteout prefix (\*[AUFS_WH_PFX]) is reserved on all branches. Users should -+not handle the filename begins with this prefix. -+In order to future whiteout, the maxmum filename length is limited by -+the longest value \- \*[AUFS_WH_PFX_LEN]. It may be a violation of POSIX. -+ -+If you dislike the difference between the aufs entries in /etc/mtab -+and /proc/mounts, and if you are using mount(8) in util\-linux package, -+then try ./mount.aufs utility. Copy the script to /sbin/mount.aufs. -+This simple utility tries updating -+/etc/mtab. If you do not care about /etc/mtab, you can ignore this -+utility. -+Remember this utility is highly depending upon mount(8) in -+util\-linux\-2.12p package, and you need to mount /proc. -+ -+Since aufs uses its own inode and dentry, your system may cache huge -+number of inodes and dentries. It can be as twice as all of the files -+in your union. -+It means that unmounting or remounting readonly at shutdown time may -+take a long time, since mount(2) in VFS tries freeing all of the cache -+on the target filesystem. -+ -+When you open a directory, aufs will open several directories -+internally. -+It means you may reach the limit of the number of file descriptor. -+And when the lower directory cannot be opened, aufs will close all the -+opened upper directories and return an error. -+ -+The sub-mount under the branch -+of local filesystem -+is ignored. -+For example, if you have mount another filesystem on -+/branch/another/mntpnt, the files under \[oq]mntpnt\[cq] will be ignored by aufs. -+It is recommended to mount the sub-mount under the mounted aufs. -+For example, -+ -+.nf -+# sudo mount /dev/sdaXX /ro_branch -+# d=another/mntpnt -+# sudo mount /dev/sdbXX /ro_branch/$d -+# mkdir -p /rw_branch/$d -+# sudo mount -t aufs -o br:/rw_branch:/ro_branch none /aufs -+# sudo mount -t aufs -o br:/rw_branch/${d}:/ro_branch/${d} none /aufs/another/$d -+.fi -+ -+There are several characters which are not allowed to use in a branch -+directory path and xino filename. See detail in Branch Syntax and Mount -+Option. -+ -+The file-lock which means fcntl(2) with F_SETLK, F_SETLKW or F_GETLK, flock(2) -+and lockf(3), is applied to virtual aufs file only, not to the file on a -+branch. It means you can break the lock by accessing a branch directly. -+TODO: check \[oq]security\[cq] to hook locks, as inotify does. -+ -+The I/O to the named pipe or local socket are not handled by aufs, even -+if it exists in aufs. After the reader and the writer established their -+connection if the pipe/socket are copied-up, they keep using the old one -+instead of the copied-up one. -+ -+The fsync(2) and fdatasync(2) systemcalls return 0 which means success, even -+if the given file descriptor is not opened for writing. -+I am afraid this behaviour may violate some standards. Checking the -+behaviour of fsync(2) on ext2, aufs decided to return success. -+ -+If you want to use disk-quota, you should set it up to your writable -+branch since aufs does not have its own block device. -+ -+When your aufs is the root directory of your system, and your system -+tells you some of the filesystem were not unmounted cleanly, try these -+procedure when you shutdown your system. -+.nf -+# mount -no remount,ro / -+# for i in $writable_branches -+# do mount -no remount,ro $i -+# done -+.fi -+If your xino file is on a hard drive, you also need to specify -+\[oq]noxino\[cq] option or \[oq]xino=/your/tmpfs/xino\[cq] at remounting root -+directory. -+ -+To rename(2) directory may return EXDEV even if both of src and tgt -+are on the same aufs. When the rename-src dir exists on multiple -+branches and the lower dir has child(ren), aufs has to copyup all his -+children. It can be recursive copyup. Current aufs does not support -+such huge copyup operation at one time in kernel space, instead -+produces a warning and returns EXDEV. -+Generally, mv(1) detects this error and tries mkdir(2) and -+rename(2) or copy/unlink recursively. So the result is harmless. -+If your application which issues rename(2) for a directory does not -+support EXDEV, it will not work on aufs. -+Also this specification is applied to the case when the src directroy -+exists on the lower readonly branch and it has child(ren). -+ -+If a sudden accident such like a power failure happens during aufs is -+performing, and regular fsck for branch filesystems is completed after -+the disaster, you need to extra fsck for aufs writable branches. It is -+necessary to check whether the whiteout remains incorrectly or not, -+eg. the real filename and the whiteout for it under the same parent -+directory. If such whiteout remains, aufs cannot handle the file -+correctly. -+To check the consistency from the aufs\[aq] point of view, you can use a -+simple shell script called /sbin/auchk. Its purpose is a fsck tool for -+aufs, and it checks the illegal whiteout, the remained -+pseudo-links and the remained aufs-temp files. If they are found, the -+utility reports you and asks whether to delete or not. -+It is recommended to execute /sbin/auchk for every writable branch -+filesystem before mouting aufs if the system experienced crash. -+ -+ -+.\" ---------------------------------------------------------------------- -+.SH EXAMPLES -+The mount options are interpreted from left to right at remount-time. -+These examples -+shows how the options are handled. (assuming /sbin/mount.aufs was -+installed) -+ -+.nf -+# mount -v -t aufs br:/day0:/base none /u -+none on /u type aufs (rw,xino=/day0/.aufs.xino,br:/day0=rw:/base=ro) -+# mount -v -o remount,\\ -+ prepend:/day1,\\ -+ xino=/day1/xino,\\ -+ mod:/day0=ro,\\ -+ del:/day0 \\ -+ /u -+none on /u type aufs (rw,xino=/day1/xino,br:/day1=rw:/base=ro) -+.fi -+ -+.nf -+# mount -t aufs br:/rw none /u -+# mount -o remount,append:/ro /u -+different uid/gid/permission, /ro -+# mount -o remount,del:/ro /u -+# mount -o remount,nowarn_perm,append:/ro /u -+# -+(there is no warning) -+.fi -+ -+.\" If you want to expand your filesystem size, aufs may help you by -+.\" adding an writable branch. Since aufs supports multiple writable -+.\" branches, the old writable branch can be being writable, if you want. -+.\" In this example, any modifications to the files under /ro branch will -+.\" be copied-up to /new, but modifications to the files under /rw branch -+.\" will not. -+.\" And the next example shows the modifications to the files under /rw branch -+.\" will be copied-up to /new/a. -+.\" -+.\" Todo: test multiple writable branches policy. cpup=nearest, cpup=exist_parent. -+.\" -+.\" .nf -+.\" # mount -v -t aufs br:/rw:/ro none /u -+.\" none on /u type aufs (rw,xino=/rw/.aufs.xino,br:/rw=rw:/ro=ro) -+.\" # mkfs /new -+.\" # mount -v -o remount,add:1:/new=rw /u -+.\" none on /u type aufs (rw,xino=/rw/.aufs.xino,br:/rw=rw:/new=rw:/ro=ro) -+.\" .fi -+.\" -+.\" .nf -+.\" # mount -v -t aufs br:/rw:/ro none /u -+.\" none on /u type aufs (rw,xino=/rw/.aufs.xino,br:/rw=rw:/ro=ro) -+.\" # mkfs /new -+.\" # mkdir /new/a new/b -+.\" # mount -v -o remount,add:1:/new/b=rw,prepend:/new/a,mod:/rw=ro /u -+.\" none on /u type aufs (rw,xino=/rw/.aufs.xino,br:/new/a=rw:/rw=ro:/new/b=rw:/ro=ro) -+.\" .fi -+ -+When you use aufs as root filesystem, it is recommended to consider to -+exclude some directories. For example, /tmp and /var/log are not need -+to stack in many cases. They do not usually need to copyup or to whiteout. -+Also the swapfile on aufs (a regular file, not a block device) is not -+supported. -+In order to exclude the specific dir from aufs, try bind mounting. -+ -+And there is a good sample which is for network booted diskless machines. See -+sample/ in detail. -+ -+.\" ---------------------------------------------------------------------- -+.SH DIAGNOSTICS -+When you add a branch to your union, aufs may warn you about the -+privilege or security of the branch, which is the permission bits, -+owner and group of the top directory of the branch. -+For example, when your upper writable branch has a world writable top -+directory, -+a malicious user can create any files on the writable branch directly, -+like copyup and modify manually. I am afraid it can be a security -+issue. -+ -+When you mount or remount your union without \-o ro common mount option -+and without writable branch, aufs will warn you that the first branch -+should be writable. -+ -+.\" It is discouraged to set both of \[oq]udba\[cq] and \[oq]noxino\[cq] mount options. In -+.\" this case the inode number under aufs will always be changed and may -+.\" reach the end of inode number which is a maximum of unsigned long. If -+.\" the inode number reaches the end, aufs will return EIO repeatedly. -+ -+When you set udba other than inotify and change something on your -+branch filesystem directly, later aufs may detect some mismatches to -+its cache. If it is a critical mismatch, aufs returns EIO. -+ -+When an error occurs in aufs, aufs prints the kernel message with -+\[oq]errno.\[cq] The priority of the message (log level) is ERR or WARNING which -+depends upon the message itself. -+You can convert the \[oq]errno\[cq] into the error message by perror(3), -+strerror(3) or something. -+For example, the \[oq]errno\[cq] in the message \[oq]I/O Error, write failed (\-28)\[cq] -+is 28 which means ENOSPC or \[oq]No space left on device.\[cq] -+ -+When CONFIG_AUFS_BR_RAMFS is enabled, you can specify ramfs as an aufs -+branch. Since ramfs is simple, it does not set the maximum link count -+originally. In aufs, it is very dangerous, particulary for -+whiteouts. Finally aufs sets the maxmimum link count for ramfs. Tha -+value is 32000 which is borrowed from ext2. -+ -+ -+.\" .SH Current Limitation -+. -+.\" ---------------------------------------------------------------------- -+.\" SYNOPSIS -+.\" briefly describes the command or function\[aq]s interface. For commands, this -+.\" shows the syntax of the command and its arguments (including options); bold- -+.\" face is used for as-is text and italics are used to indicate replaceable -+.\" arguments. Brackets ([]) surround optional arguments, vertical bars (|) sep- -+.\" arate choices, and ellipses (...) can be repeated. For functions, it shows -+.\" any required data declarations or #include directives, followed by the func- -+.\" tion declaration. -+. -+.\" DESCRIPTION -+.\" gives an explanation of what the command, function, or format does. Discuss -+.\" how it interacts with files and standard input, and what it produces on -+.\" standard output or standard error. Omit internals and implementation -+.\" details unless they\[aq]re critical for understanding the interface. Describe -+.\" the usual case; for information on options use the OPTIONS section. If -+.\" there is some kind of input grammar or complex set of subcommands, consider -+.\" describing them in a separate USAGE section (and just place an overview in -+.\" the DESCRIPTION section). -+. -+.\" RETURN VALUE -+.\" gives a list of the values the library routine will return to the caller and -+.\" the conditions that cause these values to be returned. -+. -+.\" EXIT STATUS -+.\" lists the possible exit status values or a program and the conditions that -+.\" cause these values to be returned. -+. -+.\" USAGE -+.\" describes the grammar of any sublanguage this implements. -+. -+.\" FILES -+.\" lists the files the program or function uses, such as configuration files, -+.\" startup files, and files the program directly operates on. Give the full -+.\" pathname of these files, and use the installation process to modify the -+.\" directory part to match user preferences. For many programs, the default -+.\" installation location is in /usr/local, so your base manual page should use -+.\" /usr/local as the base. -+. -+.\" ENVIRONMENT -+.\" lists all environment variables that affect your program or function and how -+.\" they affect it. -+. -+.\" SECURITY -+.\" discusses security issues and implications. Warn about configurations or -+.\" environments that should be avoided, commands that may have security impli- -+.\" cations, and so on, especially if they aren\[aq]t obvious. Discussing security -+.\" in a separate section isn\[aq]t necessary; if it\[aq]s easier to understand, place -+.\" security information in the other sections (such as the DESCRIPTION or USAGE -+.\" section). However, please include security information somewhere! -+. -+.\" CONFORMING TO -+.\" describes any standards or conventions this implements. -+. -+.\" NOTES -+.\" provides miscellaneous notes. -+. -+.\" BUGS -+.\" lists limitations, known defects or inconveniences, and other questionable -+.\" activities. -+ -+.SH COPYRIGHT -+Copyright \(co 2005\-2009 Junjiro R. Okajima -+ -+.SH AUTHOR -+Junjiro R. Okajima -+ -+.\" SEE ALSO -+.\" lists related man pages in alphabetical order, possibly followed by other -+.\" related pages or documents. Conventionally this is the last section. -diff --git a/Documentation/filesystems/aufs/design/01intro.txt b/Documentation/filesystems/aufs/design/01intro.txt -new file mode 100644 -index 0000000..4955fdc ---- /dev/null -+++ b/Documentation/filesystems/aufs/design/01intro.txt -@@ -0,0 +1,128 @@ -+ -+# Copyright (C) 2005-2009 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+ -+Introduction -+---------------------------------------- -+ -+aufs [ei ju: ef es] | [a u f s] -+1. abbrev. for "advanced multi-layered unification filesystem". -+2. abbrev. for "another unionfs". -+3. abbrev. for "auf das" in German which means "on the" in English. -+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E). -+ But "Filesystem aufs Filesystem" is hard to understand. -+ -+AUFS is a filesystem with features: -+- multi layered stackable unification filesystem, the member directory -+ is called as a branch. -+- branch permission and attribute, 'readonly', 'real-readonly', -+ 'readwrite', 'whiteout-able', 'link-able whiteout' and their -+ combination. -+- internal "file copy-on-write". -+- logical deletion, whiteout. -+- dynamic branch manipulation, adding, deleting and changing permission. -+- allow bypassing aufs, user's direct branch access. -+- external inode number translation table and bitmap which maintains the -+ persistent aufs inode number. -+- seekable directory, including NFS readdir. -+- file mapping, mmap and sharing pages. -+- pseudo-link, hardlink over branches. -+- loopback mounted filesystem as a branch. -+- several policies to select one among multiple writable branches. -+- revert a single systemcall when an error occurs in aufs. -+- and more... -+ -+ -+Multi Layered Stackable Unification Filesystem -+---------------------------------------------------------------------- -+Most people already knows what it is. -+It is a filesystem which unifies several directories and provides a -+merged single directory. When users access a file, the access will be -+passed/re-directed/converted (sorry, I am not sure which English word is -+correct) to the real file on the member filesystem. The member -+filesystem is called 'lower filesystem' or 'branch' and has a mode -+'readonly' and 'readwrite.' And the deletion for a file on the lower -+readonly branch is handled by creating 'whiteout' on the upper writable -+branch. -+ -+On LKML, there have been discussions about UnionMount (Jan Blunck and -+Bharata B Rao) and Unionfs (Erez Zadok). They took different approaches -+to implement the merged-view. -+The former tries putting it into VFS, and the latter implements as a -+separate filesystem. -+(If I misunderstand about these implementations, please let me know and -+I shall correct it. Because it is a long time ago when I read their -+source files last time). -+UnionMount's approach will be able to small, but may be hard to share -+branches between several UnionMount since the whiteout in it is -+implemented in the inode on branch filesystem and always -+shared. According to Bharata's post, readdir does not seems to be -+finished yet. -+Unionfs has a longer history. When I started implementing a stacking filesystem -+(Aug 2005), it already existed. It has virtual super_block, inode, -+dentry and file objects and they have an array pointing lower same kind -+objects. After contributing many patches for Unionfs, I re-started my -+project AUFS (Jun 2006). -+ -+In AUFS, the structure of filesystem resembles to Unionfs, but I -+implemented my own ideas, approaches and enhancements and it became -+totally different one. -+ -+ -+Several characters/aspects of aufs -+---------------------------------------------------------------------- -+ -+Aufs has several characters or aspects. -+1. a filesystem, callee of VFS helper -+2. sub-VFS, caller of VFS helper for branches -+3. a virtual filesystem which maintains persistent inode number -+4. reader/writer of files on branches such like an application -+ -+1. Caller of VFS Helper -+As an ordinary linux filesystem, aufs is a callee of VFS. For instance, -+unlink(2) from an application reaches sys_unlink() kernel function and -+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it -+calls filesystem specific unlink operation. Actually aufs implements the -+unlink operation but it behaves like a redirector. -+ -+2. Caller of VFS Helper for Branches -+aufs_unlink() passes the unlink request to the branch filesystem as if -+it were called from VFS. So the called unlink operation of the branch -+filesystem acts as usual. As a caller of VFS helper, aufs should handle -+every necessary pre/post operation for the branch filesystem. -+- acquire the lock for the parent dir on a branch -+- lookup in a branch -+- revalidate dentry on a branch -+- mnt_want_write() for a branch -+- vfs_unlink() for a branch -+- mnt_drop_write() for a branch -+- release the lock on a branch -+ -+3. Persistent Inode Number -+One of the most important issue for a filesystem is to maintain inode -+numbers. This is particularly important to support exporting a -+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a -+backend block device for its own. But some storage is necessary to -+maintain inode number. It may be a large space and may not suit to keep -+in memory. Aufs rents some space from its first writable branch -+filesystem (by default) and creates file(s) on it. These files are -+created by aufs internally and removed soon (currently) keeping opened. -+Note: Because these files are removed, they are totally gone after -+ unmounting aufs. It means the inode numbers are not persistent -+ across unmount or reboot. I have a plan to make them really -+ persistent which will be important for aufs on NFS server. -+ -+4. Read/Write Files Internally (copy-on-write) -+Because a branch can be readonly, when you write a file on it, aufs will -+"copy-up" it to the upper writable branch internally. And then write the -+originally requested thing to the file. Generally kernel doesn't -+open/read/write file actively. In aufs, even a single write may cause a -+internal "file copy". This behaviour is very similar to cp(1) command. -+ -+Some people may think it is better to pass such work to user space -+helper, instead of doing in kernel space. Actually I am still thinking -+about it. But currently I have implemented it in kernel space. -diff --git a/Documentation/filesystems/aufs/design/02struct.txt b/Documentation/filesystems/aufs/design/02struct.txt -new file mode 100644 -index 0000000..6db666b ---- /dev/null -+++ b/Documentation/filesystems/aufs/design/02struct.txt -@@ -0,0 +1,205 @@ -+ -+# Copyright (C) 2005-2009 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+ -+Basic Aufs Internal Structure -+ -+Superblock/Inode/Dentry/File Objects -+---------------------------------------------------------------------- -+As like an ordinary filesystem, aufs has its own -+superblock/inode/dentry/file objects. All these objects have a -+dynamically allocated array and store the same kind of pointers to the -+lower filesystem, branch. -+For example, when you build a union with one readwrite branch and one -+readonly, mounted /au, /rw and /ro respectively. -+- /au = /rw + /ro -+- /ro/fileA exists but /rw/fileA -+ -+Aufs lookup operation finds /ro/fileA and gets dentry for that. These -+pointers are stored in a aufs dentry. The array in aufs dentry will be, -+- [0] = NULL -+- [1] = /ro/fileA -+ -+This style of an array is essentially same to the aufs -+superblock/inode/dentry/file objects. -+ -+Because aufs supports manipulating branches, ie. add/delete/change -+dynamically, these objects has its own generation. When branches are -+changed, the generation in aufs superblock is incremented. And a -+generation in other object are compared when it is accessed. -+When a generation in other objects are obsoleted, aufs refreshes the -+internal array. -+ -+ -+Superblock -+---------------------------------------------------------------------- -+Additionally aufs superblock has some data for policies to select one -+among multiple writable branches, XIB files, pseudo-links and kobject. -+See below in detail. -+About the policies which supports copy-down a directory, see policy.txt -+too. -+ -+ -+Branch and XINO(External Inode Number Translation Table) -+---------------------------------------------------------------------- -+Every branch has its own xino (external inode number translation table) -+file. The xino file is created and unlinked by aufs internally. When two -+members of a union exist on the same filesystem, they share the single -+xino file. -+The struct of a xino file is simple, just a sequence of aufs inode -+numbers which is indexed by the lower inode number. -+In the above sample, assume the inode number of /ro/fileA is i111 and -+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as -+4(8) bytes at 111 * 4(8) bytes offset in the xino file. -+ -+When the inode numbers are not contiguous, the xino file will be sparse -+which has a hole in it and doesn't consume as much disk space as it -+might appear. If your branch filesystem consumes disk space for such -+holes, then you should specify 'xino=' option at mounting aufs. -+ -+Also a writable branch has three kinds of "whiteout bases". All these -+are existed when the branch is joined to aufs and the names are -+whiteout-ed doubly, so that users will never see their names in aufs -+hierarchy. -+1. a regular file which will be linked to all whiteouts. -+2. a directory to store a pseudo-link. -+3. a directory to store an "orphan-ed" file temporary. -+ -+1. Whiteout Base -+ When you remove a file on a readonly branch, aufs handles it as a -+ logical deletion and creates a whiteout on the upper writable branch -+ as a hardlink of this file in order not to consume inode on the -+ writable branch. -+2. Pseudo-link Dir -+ See below, Pseudo-link. -+3. Step-Parent Dir -+ When "fileC" exists on the lower readonly branch only and it is -+ opened and removed with its parent dir, and then user writes -+ something into it, then aufs copies-up fileC to this -+ directory. Because there is no other dir to store fileC. After -+ creating a file under this dir, the file is unlinked. -+ -+Because aufs supports manipulating branches, ie. add/delete/change -+dynamically, a branch has its own id. When the branch order changes, aufs -+finds the new index by searching the branch id. -+ -+ -+Pseudo-link -+---------------------------------------------------------------------- -+Assume "fileA" exists on the lower readonly branch only and it is -+hardlinked to "fileB" on the branch. When you write something to fileA, -+aufs copies-up it to the upper writable branch. Additionally aufs -+creates a hardlink under the Pseudo-link Directory of the writable -+branch. The inode of a pseudo-link is kept in aufs super_block as a -+simple list. If fileB is read after unlinking fileA, aufs returns -+filedata from the pseudo-link instead of the lower readonly -+branch. Because the pseudo-link is based upon the inode, to keep the -+inode number by xino (see above) is important. -+ -+All the hardlinks under the Pseudo-link Directory of the writable branch -+should be restored in a proper location later. Aufs provides a utility -+to do this. The userspace helpers executed at remounting and unmounting -+aufs by default. -+ -+ -+XIB(external inode number bitmap) -+---------------------------------------------------------------------- -+Addition to the xino file per a branch, aufs has an external inode number -+bitmap in a superblock object. It is also a file such like a xino file. -+It is a simple bitmap to mark whether the aufs inode number is in-use or -+not. -+To reduce the file I/O, aufs prepares a single memory page to cache xib. -+ -+Aufs implements a feature to truncate/refresh both of xino and xib to -+reduce the number of consumed disk blocks for these files. -+ -+ -+Virtual or Vertical Dir -+---------------------------------------------------------------------- -+In order to support multiple layers (branches), aufs readdir operation -+constructs a virtual dir block on memory. For readdir, aufs calls -+vfs_readdir() internally for each dir on branches, merges their entries -+with eliminating the whiteout-ed ones, and sets it to file (dir) -+object. So the file object has its entry list until it is closed. The -+entry list will be updated when the file position is zero and becomes -+old. This decision is made in aufs automatically. -+ -+The dynamically allocated memory block for the name of entries has a -+unit of 512 bytes (by default) and stores the names contiguously (no -+padding). Another block for each entry is handled by kmem_cache too. -+During building dir blocks, aufs creates hash list and judging whether -+the entry is whiteouted by its upper branch or already listed. -+ -+Some people may call it can be a security hole or invite DoS attack -+since the opened and once readdir-ed dir (file object) holds its entry -+list and becomes a pressure for system memory. But I'd say it is similar -+to files under /proc or /sys. The virtual files in them also holds a -+memory page (generally) while they are opened. When an idea to reduce -+memory for them is introduced, it will be applied to aufs too. -+ -+ -+Workqueue -+---------------------------------------------------------------------- -+Aufs sometimes requires privilege access to a branch. For instance, -+in copy-up/down operation. When a user process is going to make changes -+to a file which exists in the lower readonly branch only, and the mode -+of one of ancestor directories may not be writable by a user -+process. Here aufs copy-up the file with its ancestors and they may -+require privilege to set its owner/group/mode/etc. -+This is a typical case of a application character of aufs (see -+Introduction). -+ -+Aufs uses workqueue synchronously for this case. It creates its own -+workqueue. The workqueue is a kernel thread and has privilege. Aufs -+passes the request to call mkdir or write (for example), and wait for -+its completion. This approach solves a problem of a signal handler -+simply. -+If aufs didn't adopt the workqueue and changed the privilege of the -+process, and if the mkdir/write call arises SIGXFSZ or other signal, -+then the user process might gain a privilege or the generated core file -+was owned by a superuser. But I have a plan to switch to a new -+credential approach which will be introduced in linux-2.6.29. -+ -+Also aufs uses the system global workqueue ("events" kernel thread) too -+for asynchronous tasks, such like handling inotify, re-creating a -+whiteout base and etc. This is unrelated to a privilege. -+Most of aufs operation tries acquiring a rw_semaphore for aufs -+superblock at the beginning, at the same time waits for the completion -+of all queued asynchronous tasks. -+ -+ -+Whiteout -+---------------------------------------------------------------------- -+The whiteout in aufs is very similar to Unionfs's. That is represented -+by its filename. UnionMount takes an approach of a file mode, but I am -+afraid several utilities (find(1) or something) will have to support it. -+ -+Basically the whiteout represents "logical deletion" which stops aufs to -+lookup further, but also it represents "dir is opaque" which also stop -+lookup. -+ -+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively. -+In order to make several functions in a single systemcall to be -+revertible, aufs adopts an approach to rename a directory to a temporary -+unique whiteouted name. -+For example, in rename(2) dir where the target dir already existed, aufs -+renames the target dir to a temporary unique whiteouted name before the -+actual rename on a branch and then handles other actions (make it opaque, -+update the attributes, etc). If an error happens in these actions, aufs -+simply renames the whiteouted name back and returns an error. If all are -+succeeded, aufs registers a function to remove the whiteouted unique -+temporary name completely and asynchronously to the system global -+workqueue. -+ -+ -+Copy-up -+---------------------------------------------------------------------- -+It is a well-known feature or concept. -+When user modifies a file on a readonly branch, aufs operate "copy-up" -+internally and makes change to the new file on the upper writable branch. -+When the trigger systemcall does not update the timestamps of the parent -+dir, aufs reverts it after copy-up. -diff --git a/Documentation/filesystems/aufs/design/03lookup.txt b/Documentation/filesystems/aufs/design/03lookup.txt -new file mode 100644 -index 0000000..36ddac7 ---- /dev/null -+++ b/Documentation/filesystems/aufs/design/03lookup.txt -@@ -0,0 +1,95 @@ -+ -+# Copyright (C) 2005-2009 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+ -+Lookup in a Branch -+---------------------------------------------------------------------- -+Since aufs has a character of sub-VFS (see Introduction), it operates -+lookup for branches as VFS does. It may be a heavy work. Generally -+speaking struct nameidata is a bigger structure and includes many -+information. But almost all lookup operation in aufs is the simplest -+case, ie. lookup only an entry directly connected to its parent. Digging -+down the directory hierarchy is unnecessary. -+ -+VFS has a function lookup_one_len() for that use, but it is not usable -+for a branch filesystem which requires struct nameidata. So aufs -+implements a simple lookup wrapper function. When a branch filesystem -+allows NULL as nameidata, it calls lookup_one_len(). Otherwise it builds -+a simplest nameidata and calls lookup_hash(). -+Here aufs applies "a principle in NFSD", ie. if the filesystem supports -+NFS-export, then it has to support NULL as a nameidata parameter for -+->create(), ->lookup() and ->d_revalidate(). So the lookup wrapper in -+aufs tests if ->s_export_op in the branch is NULL or not. -+ -+When a branch is a remote filesystem, aufs trusts its ->d_revalidate(). -+For d_revalidate, aufs implements three levels of revalidate tests. See -+"Revalidate Dentry and UDBA" in detail. -+ -+ -+Loopback Mount -+---------------------------------------------------------------------- -+Basically aufs supports any type of filesystem and block device for a -+branch (actually there are some exceptions). But it is prohibited to add -+a loopback mounted one whose backend file exists in a filesystem which is -+already added to aufs. The reason is to protect aufs from a recursive -+lookup. If it was allowed, the aufs lookup operation might re-enter a -+lookup for the loopback mounted branch in the same context, and will -+cause a deadlock. -+ -+ -+Revalidate Dentry and UDBA (User's Direct Branch Access) -+---------------------------------------------------------------------- -+Generally VFS helpers re-validate a dentry as a part of lookup. -+0. digging down the directory hierarchy. -+1. lock the parent dir by its i_mutex. -+2. lookup the final (child) entry. -+3. revalidate it. -+4. call the actual operation (create, unlink, etc.) -+5. unlock the parent dir -+ -+If the filesystem implements its ->d_revalidate() (step 3), then it is -+called. Actually aufs implements it and checks the dentry on a branch is -+still valid. -+But it is not enough. Because aufs has to release the lock for the -+parent dir on a branch at the end of ->lookup() (step 2) and -+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still -+held by VFS. -+If the file on a branch is changed directly, eg. bypassing aufs, after -+aufs released the lock, then the subsequent operation may cause -+something unpleasant result. -+ -+This situation is a result of VFS architecture, ->lookup() and -+->d_revalidate() is separated. But I never say it is wrong. It is a good -+design from VFS's point of view. It is just not suitable for sub-VFS -+character in aufs. -+ -+Aufs supports such case by three level of revalidation which is -+selectable by user. -+1. Simple Revalidate -+ Addition to the native flow in VFS's, confirm the child-parent -+ relationship on the branch just after locking the parent dir on the -+ branch in the "actual operation" (step 4). When this validation -+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still -+ checks the validation of the dentry on branches. -+2. Monitor Changes Internally by Inotify -+ Addition to above, in the "actual operation" (step 4) aufs re-lookup -+ the dentry on the branch, and returns EBUSY if it finds different -+ dentry. -+ Additionally, aufs sets the inotify watch for every dir on branches -+ during it is in cache. When the event is notified, aufs registers a -+ function to kernel 'events' thread by schedule_work(). And the -+ function sets some special status to the cached aufs dentry and inode -+ private data. If they are not cached, then aufs has nothing to -+ do. When the same file is accessed through aufs (step 0-3) later, -+ aufs will detect the status and refresh all necessary data. -+ In this mode, aufs has to ignore the event which is fired by aufs -+ itself. -+3. No Extra Validation -+ This is the simplest test and doesn't add any additional revalidation -+ test, and skip therevalidatin in step 4. It is useful and improves -+ aufs performance when system surely hide the aufs branches from user, -+ by over-mounting something (or another method). -diff --git a/Documentation/filesystems/aufs/design/04branch.txt b/Documentation/filesystems/aufs/design/04branch.txt -new file mode 100644 -index 0000000..78432c5 ---- /dev/null -+++ b/Documentation/filesystems/aufs/design/04branch.txt -@@ -0,0 +1,67 @@ -+ -+# Copyright (C) 2005-2009 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+ -+Branch Manipulation -+ -+Since aufs supports dynamic branch manipulation, ie. add/remove a branch -+and changing its permission/attribute, there are a lot of works to do. -+ -+ -+Add a Branch -+---------------------------------------------------------------------- -+o Confirm the adding dir exists outside of aufs, including loopback -+ mount. -+- and other various attributes... -+o Initialize the xino file and whiteout bases if necessary. -+ See struct.txt. -+ -+o Check the owner/group/mode of the directory -+ When the owner/group/mode of the adding directory differs from the -+ existing branch, aufs issues a warning because it may impose a -+ security risk. -+ For example, when a upper writable branch has a world writable empty -+ top directory, a malicious user can create any files on the writable -+ branch directly, like copy-up and modify manually. If something like -+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper -+ writable branch, and the writable branch is world-writable, then a -+ malicious guy may create /etc/passwd on the writable branch directly -+ and the infected file will be valid in aufs. -+ I am afraid it can be a security issue, but nothing to do except -+ producing a warning. -+ -+ -+Delete a Branch -+---------------------------------------------------------------------- -+o Confirm the deleting branch is not busy -+ To be general, there is one merit to adopt "remount" interface to -+ manipulate branches. It is to discard caches. At deleting a branch, -+ aufs checks the still cached (and connected) dentries and inodes. If -+ there are any, then they are all in-use. An inode without its -+ corresponding dentry can be alive alone (for example, inotify case). -+ -+ For the cached one, aufs checks whether the same named entry exists on -+ other branches. -+ If the cached one is a directory, because aufs provides a merged view -+ to users, as long as one dir is left on any branch aufs can show the -+ dir to users. In this case, the branch can be removed from aufs. -+ Otherwise aufs rejects deleting the branch. -+ -+ If any file on the deleting branch is opened by aufs, then aufs -+ rejects deleting. -+ -+ -+Modify the Permission of a Branch -+---------------------------------------------------------------------- -+o Re-initialize or remove the xino file and whiteout bases if necessary. -+ See struct.txt. -+ -+o rw --> ro: Confirm the modifying branch is not busy -+ Aufs rejects the request if any of these conditions are true. -+ - a file on the branch is mmap-ed. -+ - a regular file on the branch is opened for write and there is no -+ same named entry on the upper branch. -diff --git a/Documentation/filesystems/aufs/design/05wbr_policy.txt b/Documentation/filesystems/aufs/design/05wbr_policy.txt -new file mode 100644 -index 0000000..d9720ca ---- /dev/null -+++ b/Documentation/filesystems/aufs/design/05wbr_policy.txt -@@ -0,0 +1,57 @@ -+ -+# Copyright (C) 2005-2009 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+ -+ -+Policies to Select One among Multiple Writable Branches -+---------------------------------------------------------------------- -+When the number of writable branch is more than one, aufs has to decide -+the target branch for file creation or copy-up. By default, the highest -+writable branch which has the parent (or ancestor) dir of the target -+file is chosen (top-down-parent policy). -+By user's request, aufs implements some other policies to select the -+writable branch, for file creation two policies, round-robin and -+most-free-space policies. For copy-up three policies, top-down-parent, -+bottom-up-parent and bottom-up policies. -+ -+As expected, the round-robin policy selects the branch in circular. When -+you have two writable branches and creates 10 new files, 5 files will be -+created for each branch. mkdir(2) systemcall is an exception. When you -+create 10 new directories, all will be created on the same branch. -+And the most-free-space policy selects the one which has most free -+space among the writable branches. The amount of free space will be -+checked by aufs internally, and users can specify its time interval. -+ -+The policies for copy-up is more simple, -+top-down-parent is equivalent to the same named on in create policy, -+bottom-up-parent selects the writable branch where the parent dir -+exists and the nearest upper one from the copyup-source, -+bottom-up selects the nearest upper writable branch from the -+copyup-source, regardless the existence of the parent dir. -+ -+There are some rules or exceptions to apply these policies. -+- If there is a readonly branch above the policy-selected branch and -+ the parent dir is marked as opaque (a variation of whiteout), or the -+ target (creating) file is whiteout-ed on the upper readonly branch, -+ then the result of the policy is ignored and the target file will be -+ created on the nearest upper writable branch than the readonly branch. -+- If there is a writable branch above the policy-selected branch and -+ the parent dir is marked as opaque or the target file is whiteouted -+ on the branch, then the result of the policy is ignored and the target -+ file will be created on the highest one among the upper writable -+ branches who has diropq or whiteout. In case of whiteout, aufs removes -+ it as usual. -+- link(2) and rename(2) systemcalls are exceptions in every policy. -+ They try selecting the branch where the source exists as possible -+ since copyup a large file will take long time. If it can't be, -+ ie. the branch where the source exists is readonly, then they will -+ follow the copyup policy. -+- There is an exception for rename(2) when the target exists. -+ If the rename target exists, aufs compares the index of the branches -+ where the source and the target exists and selects the higher -+ one. If the selected branch is readonly, then aufs follows the -+ copyup policy. -diff --git a/Documentation/filesystems/aufs/design/06fmode_exec.txt b/Documentation/filesystems/aufs/design/06fmode_exec.txt -new file mode 100644 -index 0000000..b172cd1 ---- /dev/null -+++ b/Documentation/filesystems/aufs/design/06fmode_exec.txt -@@ -0,0 +1,24 @@ -+ -+# Copyright (C) 2005-2009 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+ -+FMODE_EXEC and deny_write() -+---------------------------------------------------------------------- -+Generally Unix prevents an executing file from writing its filedata. -+In linux it is implemented by deny_write() and allow_write(). -+When a file is executed by exec() family, open_exec() (and sys_uselib()) -+they opens the file and calls deny_write(). If the file is aufs's virtual -+one, it has no meaning. The file which deny_write() is really necessary -+is the file on a branch. But the FMODE_EXEC flag is not passed to -+->open() operation. So aufs adopt a dirty trick. -+ -+- in order to get FMODE_EXEC, aufs ->lookup() and ->d_revalidate() set -+ nd->intent.open.file->private_data to nd->intent.open.flags temporary. -+- in aufs ->open(), when FMODE_EXEC is set in file->private_data, it -+ calls deny_write() for the file on a branch. -+- when the aufs file is released, allow_write() for the file on a branch -+ is called. -diff --git a/Documentation/filesystems/aufs/design/07mmap.txt b/Documentation/filesystems/aufs/design/07mmap.txt -new file mode 100644 -index 0000000..d751c42 ---- /dev/null -+++ b/Documentation/filesystems/aufs/design/07mmap.txt -@@ -0,0 +1,44 @@ -+ -+# Copyright (C) 2005-2009 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+ -+mmap(2) -- File Memory Mapping -+---------------------------------------------------------------------- -+In aufs, the file-mapped pages are shared between the file on a branch -+and the virtual one in aufs by overriding vm_operation, particularly -+->fault(). -+ -+In aufs_mmap(), -+- get and store vm_ops of the real file on a branch. -+- map the file of aufs by generic_file_mmap() and set aufs's vm -+ operations. -+ -+In aufs_fault(), -+- get the file of aufs from the passed vma, sleep if needed. -+- get the real file on a branch from the aufs file. -+- a race may happen. for instance a multithreaded library. so some lock -+ is implemented. -+- call ->fault() in the previously stored vm_ops with setting the -+ real file on a branch to vm_file. -+- restore vm_file and wake_up if someone else got sleep. -+ -+When a branch is added to or deleted from aufs, the same-named file may -+unveil and its contents will be replaced by the new one when a process -+read(2) through previously opened file. -+(Some users may not want to refresh the filedata. For such users, I -+have a plan to implement a mount option 'refrof' which decides to -+refresh the opened files or not. See plan.txt too.) -+In this case, an already mapped file will not be updated since the -+contents are a part of a process already and it should not be changed by -+aufs branch manipulation. (Even if MAP_SHARED is specified, currently). -+Of course, in case of the deleting branch has a busy file, it cannot be -+deleted from the union. -+ -+In Unionfs, it took an approach which the memory pages mapped to -+filedata are copied from the lower (real) file into the Unionfs's -+virtual one and handles it by address_space operations. Recently Unionfs -+changed it to this approach which aufs adopted since Jul 2006. -diff --git a/Documentation/filesystems/aufs/design/08export.txt b/Documentation/filesystems/aufs/design/08export.txt -new file mode 100644 -index 0000000..8a7b9e5 ---- /dev/null -+++ b/Documentation/filesystems/aufs/design/08export.txt +diff -urN linux-2.6.30.org/fs/aufs/aufs.h linux-2.6.30/fs/aufs/aufs.h +--- linux-2.6.30.org/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/aufs.h 2009-07-21 08:54:18.000000000 +0200 @@ -0,0 +1,51 @@ -+ -+# Copyright (C) 2005-2009 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+ -+ -+Export Aufs via NFS -+---------------------------------------------------------------------- -+Here is an approach. -+- like xino/xib, add a new file 'xigen' which stores aufs inode -+ generation. -+- iget_locked(): initialize aufs inode generation for a new inode, and -+ store it in xigen file. -+- destroy_inode(): increment aufs inode generation and store it in xigen -+ file. it is necessary even if it is not unlinked, because any data of -+ inode may be changed by UDBA. -+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise -+ build file handle by -+ + branch id (4 bytes) -+ + superblock generation (4 bytes) -+ + inode number (4 or 8 bytes) -+ + parent dir inode number (4 or 8 bytes) -+ + inode generation (4 bytes)) -+ + return value of exportfs_encode_fh() for the parent on a branch (4 -+ bytes) -+ + file handle for a branch (by exportfs_encode_fh()) -+- fh_to_dentry(): -+ + find the index of a branch from its id in handle, and check it is -+ still exist in aufs. -+ + 1st level: get the inode number from handle and search it in cache. -+ + 2nd level: if not found, get the parent inode number from handle and -+ search it in cache. and then open the parent dir, find the matching -+ inode number by vfs_readdir() and get its name, and call -+ lookup_one_len() for the target dentry. -+ + 3rd level: if the parent dir is not cached, call -+ exportfs_decode_fh() for a branch and get the parent on a branch, -+ build a pathname of it, convert it a pathname in aufs, call -+ path_lookup(). now aufs gets a parent dir dentry, then handle it as -+ the 2nd level. -+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount -+ for every branch, but not itself. to get this, (currently) aufs -+ searches in current->nsproxy->mnt_ns list. it may not be a good -+ idea, but I didn't get other approach. -+ + test the generation of the gotten inode. -+- every inode operation: they may get EBUSY due to UDBA. in this case, -+ convert it into ESTALE for NFSD. -+- readdir(): call lockdep_on/off() because filldir in NFSD calls -+ lookup_one_len(), vfs_getattr(), encode_fh() and others. -diff --git a/Documentation/filesystems/aufs/design/99plan.txt b/Documentation/filesystems/aufs/design/99plan.txt -new file mode 100644 -index 0000000..3167d0e ---- /dev/null -+++ b/Documentation/filesystems/aufs/design/99plan.txt -@@ -0,0 +1,125 @@ -+ -+# Copyright (C) 2005-2009 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+ -+Plan -+ -+Restoring some features which was implemented in aufs1. -+They were dropped in aufs2 in order to make source files simpler and -+easier to be reviewed. -+ -+ -+Test Only the Highest One for the Directory Permission (dirperm1 option) -+---------------------------------------------------------------------- -+Let's try case study. -+- aufs has two branches, upper readwrite and lower readonly. -+ /au = /rw + /ro -+- "dirA" exists under /ro, but /rw. and its mode is 0700. -+- user invoked "chmod a+rx /au/dirA" -+- then "dirA" becomes world readable? -+ -+In this case, /ro/dirA is still 0700 since it exists in readonly branch, -+or it may be a natively readonly filesystem. If aufs respects the lower -+branch, it should not respond readdir request from other users. But user -+allowed it by chmod. Should really aufs rejects showing the entries -+under /ro/dirA? -+ -+To be honest, I don't have a best solution for this case. So I -+implemented 'dirperm1' and 'nodirperm1' option in aufs1, and leave it to -+users. -+When dirperm1 is specified, aufs checks only the highest one for the -+directory permission, and shows the entries. Otherwise, as usual, checks -+every dir existing on all branches and rejects the request. -+ -+As a side effect, dirperm1 option improves the performance of aufs -+because the number of permission check is reduced. -+ -+ -+Show Whiteout Mode (shwh) -+---------------------------------------------------------------------- -+Generally aufs hides the name of whiteouts. But in some cases, to show -+them is very useful for users. For instance, creating a new middle layer -+(branch) by merging existing layers. -+ -+(borrowing aufs1 HOW-TO from a user, Michael Towers) -+When you have three branches, -+- Bottom: 'system', squashfs (underlying base system), read-only -+- Middle: 'mods', squashfs, read-only -+- Top: 'overlay', ram (tmpfs), read-write -+ -+The top layer is loaded at boot time and saved at shutdown, to preserve -+the changes made to the system during the session. -+When larger changes have been made, or smaller changes have accumulated, -+the size of the saved top layer data grows. At this point, it would be -+nice to be able to merge the two overlay branches ('mods' and 'overlay') -+and rewrite the 'mods' squashfs, clearing the top layer and thus -+restoring save and load speed. -+ -+This merging is simplified by the use of another aufs mount, of just the -+two overlay branches using the 'shwh' option. -+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \ -+ aufs /livesys/merge_union -+ -+A merged view of these two branches is then available at -+/livesys/merge_union, and the new feature is that the whiteouts are -+visible! -+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable -+writing to all branches. Also the default mode for all branches is 'ro'. -+It is now possible to save the combined contents of the two overlay -+branches to a new squashfs, e.g.: -+# mksquashfs /livesys/merge_union /path/to/newmods.squash -+ -+This new squashfs archive can be stored on the boot device and the -+initramfs will use it to replace the old one at the next boot. -+ -+ -+Being Another Aufs's Readonly Branch (robr) -+---------------------------------------------------------------------- -+Aufs1 allows aufs to be another aufs's readonly branch. -+This feature was developed by a user's request. But it may not be used -+currecnly. -+ -+ -+Copy-up on Open (coo=) -+---------------------------------------------------------------------- -+By default the internal copy-up is executed when it is really necessary. -+It is not done when a file is opened for writing, but when write(2) is -+done. Users who have many (over 100) branches want to know and analyse -+when and what file is copied-up. To insert a new upper branch which -+contains such files only may improve the performance of aufs. -+ -+Aufs1 implemented "coo=none | leaf | all" option. -+ -+ -+Refresh the Opened File (refrof) -+---------------------------------------------------------------------- -+This option is implemented in aufs1 but incomplete. -+ -+When user reads from a file, he expects to get its latest filedata -+generally. If the file is removed and a new same named file is created, -+the content he gets is unchanged, ie. the unlinked filedata. -+ -+Let's try case study again. -+- aufs has two branches. -+ /au = /rw + /ro -+- "fileA" exists under /ro, but /rw. -+- user opened "/au/fileA". -+- he or someone else inserts a branch (/new) between /rw and /ro. -+ /au = /rw + /new + /ro -+- the new branch has "fileA". -+- user reads from the opened "fileA" -+- which filedata should aufs return, from /ro or /new? -+ -+Some people says it has to be "from /ro" and it is a semantics of Unix. -+The others say it should be "from /new" because the file is not removed -+and it is equivalent to the case of someone else modifies the file. -+ -+Here again I don't have a best and final answer. I got an idea to -+implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the -+Opened File) is specified (by default), aufs returns the filedata from -+/new. -+Otherwise from /new. -diff --git a/fs/Kconfig b/fs/Kconfig -index 93945dd..75156dd 100644 ---- a/fs/Kconfig -+++ b/fs/Kconfig -@@ -222,6 +222,7 @@ source "fs/qnx4/Kconfig" - source "fs/romfs/Kconfig" - source "fs/sysv/Kconfig" - source "fs/ufs/Kconfig" -+source "fs/aufs/Kconfig" - - endif # MISC_FILESYSTEMS - -diff --git a/fs/Makefile b/fs/Makefile -index dc20db3..a4e9a65 100644 ---- a/fs/Makefile -+++ b/fs/Makefile -@@ -124,3 +124,4 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ - obj-$(CONFIG_OCFS2_FS) += ocfs2/ - obj-$(CONFIG_BTRFS_FS) += btrfs/ - obj-$(CONFIG_GFS2_FS) += gfs2/ -+obj-$(CONFIG_AUFS_FS) += aufs/ -diff --git a/fs/aufs/Kconfig b/fs/aufs/Kconfig -new file mode 100644 -index 0000000..04b2060 ---- /dev/null -+++ b/fs/aufs/Kconfig -@@ -0,0 +1,109 @@ -+config AUFS_FS -+ bool "Aufs (Advanced multi layered unification filesystem) support" -+ depends on EXPERIMENTAL -+ help -+ Aufs is a stackable unification filesystem such as Unionfs, -+ which unifies several directories and provides a merged single -+ directory. -+ In the early days, aufs was entirely re-designed and -+ re-implemented Unionfs Version 1.x series. Introducing many -+ original ideas, approaches and improvements, it becomes totally -+ different from Unionfs while keeping the basic features. -+ -+if AUFS_FS -+choice -+ prompt "Maximum number of branches" -+ default AUFS_BRANCH_MAX_127 -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_127 -+ bool "127" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_511 -+ bool "511" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_1023 -+ bool "1023" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_32767 -+ bool "32767" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+endchoice -+ -+config AUFS_HINOTIFY -+ bool "Use inotify to detect actions on a branch" -+ depends on INOTIFY -+ help -+ If you want to modify files on branches directly, eg. bypassing aufs, -+ and want aufs to detect the changes of them fully, then enable this -+ option and use 'udba=inotify' mount option. -+ It will have a negative impact to the performance. -+ See detail in aufs.5. -+ -+config AUFS_EXPORT -+ bool "NFS-exportable aufs" -+ depends on EXPORTFS = y -+ help -+ If you want to export your mounted aufs via NFS, then enable this -+ option. There are several requirements for this configuration. -+ See detail in aufs.5. -+ -+config AUFS_BR_RAMFS -+ bool "Ramfs (initramfs/rootfs) as an aufs branch" -+ help -+ If you want to use ramfs as an aufs branch fs, then enable this -+ option. Generally tmpfs is recommended. -+ Aufs prohibited them to be a branch fs by default, because -+ initramfs becomes unusable after switch_root or something -+ generally. If you sets initramfs as an aufs branch and boot your -+ system by switch_root, you will meet a problem easily since the -+ files in initramfs may be inaccessible. -+ Unless you are going to use ramfs as an aufs branch fs without -+ switch_root or something, leave it N. -+ -+config AUFS_DEBUG -+ bool "Debug aufs" -+ help -+ Enable this to compile aufs internal debug code. -+ It will have a negative impact to the performance. -+ -+config AUFS_MAGIC_SYSRQ -+ bool -+ depends on AUFS_DEBUG && MAGIC_SYSRQ -+ default y -+ help -+ Automatic configuration for internal use. -+ When aufs supports Magic SysRq, enabled automatically. -+ -+config AUFS_BDEV_LOOP -+ bool -+ depends on BLK_DEV_LOOP -+ default y -+ help -+ Automatic configuration for internal use. -+ Convert =[ym] into =y. -+ -+config AUFS_INO_T_64 -+ bool -+ depends on AUFS_EXPORT -+ depends on 64BIT && !(ALPHA || S390) -+ default y -+ help -+ Automatic configuration for internal use. -+ /* typedef unsigned long/int __kernel_ino_t */ -+ /* alpha and s390x are int */ -+endif -diff --git a/fs/aufs/Makefile b/fs/aufs/Makefile -new file mode 100644 -index 0000000..477d245 ---- /dev/null -+++ b/fs/aufs/Makefile -@@ -0,0 +1,21 @@ -+ -+include ${srctree}/${src}/magic.mk -+ -+obj-$(CONFIG_AUFS_FS) += aufs.o -+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \ -+ wkq.o vfsub.o dcsub.o \ -+ cpup.o whout.o plink.o wbr_policy.o \ -+ dinfo.o dentry.o \ -+ finfo.o file.o f_op.o \ -+ dir.o vdir.o \ -+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \ -+ ioctl.o -+ -+# all are boolean -+aufs-$(CONFIG_SYSFS) += sysfs.o -+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o -+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o -+aufs-$(CONFIG_AUFS_HINOTIFY) += hinotify.o -+aufs-$(CONFIG_AUFS_EXPORT) += export.o -+aufs-$(CONFIG_AUFS_DEBUG) += debug.o -+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o -diff --git a/fs/aufs/aufs.h b/fs/aufs/aufs.h -new file mode 100644 -index 0000000..132e84e ---- /dev/null -+++ b/fs/aufs/aufs.h -@@ -0,0 +1,44 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -3050,6 +82,15 @@ index 0000000..132e84e + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -3061,8 +102,6 @@ index 0000000..132e84e + +#ifdef __KERNEL__ + -+#include -+ +#include "debug.h" + +#include "branch.h" @@ -3087,12 +126,10 @@ index 0000000..132e84e + +#endif /* __KERNEL__ */ +#endif /* __AUFS_H__ */ -diff --git a/fs/aufs/branch.c b/fs/aufs/branch.c -new file mode 100644 -index 0000000..7ef8a18 ---- /dev/null -+++ b/fs/aufs/branch.c -@@ -0,0 +1,946 @@ +diff -urN linux-2.6.30.org/fs/aufs/branch.c linux-2.6.30/fs/aufs/branch.c +--- linux-2.6.30.org/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/branch.c 2009-07-21 08:54:18.000000000 +0200 +@@ -0,0 +1,973 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -3100,12 +137,22 @@ index 0000000..7ef8a18 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * branch management + */ + ++#include +#include "aufs.h" + +/* @@ -3127,7 +174,7 @@ index 0000000..7ef8a18 + for (i = 0; i < AuBrWh_Last; i++) + dput(wbr->wbr_wh[i]); + AuDebugOn(atomic_read(&wbr->wbr_wh_running)); -+ au_rwsem_destroy(&wbr->wbr_wh_rwsem); ++ AuRwDestroy(&wbr->wbr_wh_rwsem); + } + + /* some filesystems acquire extra lock */ @@ -3147,6 +194,8 @@ index 0000000..7ef8a18 + aufs_bindex_t bmax; + struct au_branch **br; + ++ AuRwMustWriteLock(&sbinfo->si_rwsem); ++ + bmax = sbinfo->si_bend + 1; + br = sbinfo->si_branch; + while (bmax--) @@ -3382,7 +431,7 @@ index 0000000..7ef8a18 + struct au_wbr *wbr; + + wbr = br->br_wbr; -+ init_rwsem(&wbr->wbr_wh_rwsem); ++ au_rw_init(&wbr->wbr_wh_rwsem); + memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh)); + atomic_set(&wbr->wbr_wh_running, 0); + wbr->wbr_bytes = 0; @@ -3436,6 +485,8 @@ index 0000000..7ef8a18 +{ + struct au_branch **brp; + ++ AuRwMustWriteLock(&sbinfo->si_rwsem); ++ + brp = sbinfo->si_branch + bindex; + memmove(brp + 1, brp, sizeof(*brp) * amount); + *brp = br; @@ -3698,6 +749,8 @@ index 0000000..7ef8a18 +{ + struct au_branch **brp, **p; + ++ AuRwMustWriteLock(&sbinfo->si_rwsem); ++ + brp = sbinfo->si_branch + bindex; + if (bindex < bend) + memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex)); @@ -3750,6 +803,8 @@ index 0000000..7ef8a18 + struct dentry *root; + struct inode *inode; + ++ SiMustWriteLock(sb); ++ + root = sb->s_root; + inode = root->d_inode; + au_plink_block_maintain(sb); @@ -3865,6 +920,13 @@ index 0000000..7ef8a18 + * change a branch permission + */ + ++static void au_warn_ima(void) ++{ ++#ifdef CONFIG_IMA ++ AuWarn("RW -> RO makes IMA to produce wrong message"); ++#endif ++} ++ +static int do_need_sigen_inc(int a, int b) +{ + return au_br_whable(a) && !au_br_whable(b); @@ -3938,6 +1000,8 @@ index 0000000..7ef8a18 + } + + err = 0; ++ if (n) ++ au_warn_ima(); + for (ul = 0; ul < n; ul++) { + /* todo: already flushed? */ + /* cf. fs/super.c:mark_files_ro() */ @@ -4039,12 +1103,10 @@ index 0000000..7ef8a18 + out: + return err; +} -diff --git a/fs/aufs/branch.h b/fs/aufs/branch.h -new file mode 100644 -index 0000000..e448692 ---- /dev/null -+++ b/fs/aufs/branch.h -@@ -0,0 +1,207 @@ +diff -urN linux-2.6.30.org/fs/aufs/branch.h linux-2.6.30/fs/aufs/branch.h +--- linux-2.6.30.org/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/branch.h 2009-07-21 08:54:18.000000000 +0200 +@@ -0,0 +1,219 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -4052,6 +1114,15 @@ index 0000000..e448692 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -4065,7 +1136,6 @@ index 0000000..e448692 + +#include +#include -+#include +#include +#include "rwsem.h" +#include "super.h" @@ -4087,7 +1157,7 @@ index 0000000..e448692 +/* members for writable branch only */ +enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last}; +struct au_wbr { -+ struct rw_semaphore wbr_wh_rwsem; ++ struct au_rwsem wbr_wh_rwsem; + struct dentry *wbr_wh[AuBrWh_Last]; + atomic_t wbr_wh_running; +#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */ @@ -4229,7 +1299,7 @@ index 0000000..e448692 + +static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex) +{ -+ atomic_dec(&au_sbr(sb, bindex)->br_count); ++ atomic_dec_return(&au_sbr(sb, bindex)->br_count); +} + +static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex) @@ -4250,14 +1320,16 @@ index 0000000..e448692 + */ +AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem); + ++#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem) ++#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem) ++#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem) ++ +#endif /* __KERNEL__ */ +#endif /* __AUFS_BRANCH_H__ */ -diff --git a/fs/aufs/cpup.c b/fs/aufs/cpup.c -new file mode 100644 -index 0000000..91808b1 ---- /dev/null -+++ b/fs/aufs/cpup.c -@@ -0,0 +1,1028 @@ +diff -urN linux-2.6.30.org/fs/aufs/cpup.c linux-2.6.30/fs/aufs/cpup.c +--- linux-2.6.30.org/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/cpup.c 2009-07-21 08:54:18.000000000 +0200 +@@ -0,0 +1,1039 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -4265,13 +1337,24 @@ index 0000000..91808b1 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * copy-up functions, see wbr_policy.c for copy-down + */ + ++#include +#include ++#include +#include +#include "aufs.h" + @@ -5150,7 +2233,7 @@ index 0000000..91808b1 + h_inode = h_dentry->d_inode; + IMustLock(h_inode); + mutex_unlock(&h_inode->i_mutex); -+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT2); ++ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3); + mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); + } + @@ -5286,12 +2369,10 @@ index 0000000..91808b1 + dput(parent); + return err; +} -diff --git a/fs/aufs/cpup.h b/fs/aufs/cpup.h -new file mode 100644 -index 0000000..240014a ---- /dev/null -+++ b/fs/aufs/cpup.h -@@ -0,0 +1,68 @@ +diff -urN linux-2.6.30.org/fs/aufs/cpup.h linux-2.6.30/fs/aufs/cpup.h +--- linux-2.6.30.org/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/cpup.h 2009-07-21 08:54:18.000000000 +0200 +@@ -0,0 +1,81 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -5299,6 +2380,15 @@ index 0000000..240014a + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -5310,9 +2400,13 @@ index 0000000..240014a + +#ifdef __KERNEL__ + -+#include ++#include ++#include +#include + ++struct inode; ++struct file; ++ +void au_cpup_attr_flags(struct inode *dst, struct inode *src); +void au_cpup_attr_timesizes(struct inode *inode); +void au_cpup_attr_nlink(struct inode *inode, int force); @@ -5360,12 +2454,10 @@ index 0000000..240014a + +#endif /* __KERNEL__ */ +#endif /* __AUFS_CPUP_H__ */ -diff --git a/fs/aufs/dbgaufs.c b/fs/aufs/dbgaufs.c -new file mode 100644 -index 0000000..cb04b5d ---- /dev/null -+++ b/fs/aufs/dbgaufs.c -@@ -0,0 +1,304 @@ +diff -urN linux-2.6.30.org/fs/aufs/dbgaufs.c linux-2.6.30/fs/aufs/dbgaufs.c +--- linux-2.6.30.org/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/dbgaufs.c 2009-07-21 08:54:18.000000000 +0200 +@@ -0,0 +1,331 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -5373,6 +2465,15 @@ index 0000000..cb04b5d + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -5594,6 +2695,12 @@ index 0000000..cb04b5d +{ + int err; + ++ /* ++ * This function is a dynamic '__init' fucntion actually, ++ * so the tiny check for si_rwsem is unnecessary. ++ */ ++ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ ++ + err = -EIO; + sbinfo->si_dbgaufs_xigen = debugfs_create_file + ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo, @@ -5614,6 +2721,12 @@ index 0000000..cb04b5d + +void dbgaufs_si_fin(struct au_sbinfo *sbinfo) +{ ++ /* ++ * This function is a dynamic '__init' fucntion actually, ++ * so the tiny check for si_rwsem is unnecessary. ++ */ ++ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ ++ + debugfs_remove_recursive(sbinfo->si_dbgaufs); + sbinfo->si_dbgaufs = NULL; + kobject_put(&sbinfo->si_kobj); @@ -5624,6 +2737,12 @@ index 0000000..cb04b5d + int err; + char name[SysaufsSiNameLen]; + ++ /* ++ * This function is a dynamic '__init' fucntion actually, ++ * so the tiny check for si_rwsem is unnecessary. ++ */ ++ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ ++ + err = -ENOENT; + if (!dbgaufs) { + AuErr1("/debug/aufs is uninitialized\n"); @@ -5670,12 +2789,10 @@ index 0000000..cb04b5d + err = 0; + return err; +} -diff --git a/fs/aufs/dbgaufs.h b/fs/aufs/dbgaufs.h -new file mode 100644 -index 0000000..dfb0f0a ---- /dev/null -+++ b/fs/aufs/dbgaufs.h -@@ -0,0 +1,68 @@ +diff -urN linux-2.6.30.org/fs/aufs/dbgaufs.h linux-2.6.30/fs/aufs/dbgaufs.h +--- linux-2.6.30.org/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/dbgaufs.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,79 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -5683,6 +2800,15 @@ index 0000000..dfb0f0a + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -5694,10 +2820,12 @@ index 0000000..dfb0f0a + +#ifdef __KERNEL__ + -+#include ++#include +#include + ++struct super_block; +struct au_sbinfo; ++ +#ifdef CONFIG_DEBUG_FS +/* dbgaufs.c */ +void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex); @@ -5744,12 +2872,10 @@ index 0000000..dfb0f0a + +#endif /* __KERNEL__ */ +#endif /* __DBGAUFS_H__ */ -diff --git a/fs/aufs/dcsub.c b/fs/aufs/dcsub.c -new file mode 100644 -index 0000000..e4597e4 ---- /dev/null -+++ b/fs/aufs/dcsub.c -@@ -0,0 +1,214 @@ +diff -urN linux-2.6.30.org/fs/aufs/dcsub.c linux-2.6.30/fs/aufs/dcsub.c +--- linux-2.6.30.org/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/dcsub.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,223 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -5757,6 +2883,15 @@ index 0000000..e4597e4 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -5964,12 +3099,10 @@ index 0000000..e4597e4 + out: + return trap; +} -diff --git a/fs/aufs/dcsub.h b/fs/aufs/dcsub.h -new file mode 100644 -index 0000000..89a7745 ---- /dev/null -+++ b/fs/aufs/dcsub.h -@@ -0,0 +1,43 @@ +diff -urN linux-2.6.30.org/fs/aufs/dcsub.h linux-2.6.30/fs/aufs/dcsub.h +--- linux-2.6.30.org/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/dcsub.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,54 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -5977,6 +3110,15 @@ index 0000000..89a7745 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -5988,7 +3130,9 @@ index 0000000..89a7745 + +#ifdef __KERNEL__ + -+#include ++#include ++ ++struct dentry; + +struct au_dpage { + int ndentry; @@ -6013,12 +3157,10 @@ index 0000000..89a7745 + +#endif /* __KERNEL__ */ +#endif /* __AUFS_DCSUB_H__ */ -diff --git a/fs/aufs/debug.c b/fs/aufs/debug.c -new file mode 100644 -index 0000000..de38931 ---- /dev/null -+++ b/fs/aufs/debug.c -@@ -0,0 +1,416 @@ +diff -urN linux-2.6.30.org/fs/aufs/debug.c linux-2.6.30/fs/aufs/debug.c +--- linux-2.6.30.org/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/debug.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,427 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -6026,12 +3168,23 @@ index 0000000..de38931 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * debug print functions + */ + ++#include ++#include +#include "aufs.h" + +int aufs_debug; @@ -6435,12 +3588,10 @@ index 0000000..de38931 + + return 0; +} -diff --git a/fs/aufs/debug.h b/fs/aufs/debug.h -new file mode 100644 -index 0000000..36ff0ea ---- /dev/null -+++ b/fs/aufs/debug.h -@@ -0,0 +1,243 @@ +diff -urN linux-2.6.30.org/fs/aufs/debug.h linux-2.6.30/fs/aufs/debug.h +--- linux-2.6.30.org/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/debug.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,260 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -6448,6 +3599,15 @@ index 0000000..36ff0ea + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -6459,10 +3619,13 @@ index 0000000..36ff0ea + +#ifdef __KERNEL__ + ++#include ++/* #include */ ++/* #include */ ++/* #include */ +#include -+#include -+#include -+#include ++/* #include */ ++/* #include */ +#include +#include + @@ -6549,18 +3712,23 @@ index 0000000..36ff0ea + +struct au_sbinfo; +struct au_finfo; ++struct dentry; +#ifdef CONFIG_AUFS_DEBUG +extern char *au_plevel; +struct au_nhash; +void au_dpri_whlist(struct au_nhash *whlist); +struct au_vdir; +void au_dpri_vdir(struct au_vdir *vdir); ++struct inode; +void au_dpri_inode(struct inode *inode); +void au_dpri_dentry(struct dentry *dentry); ++struct file; +void au_dpri_file(struct file *filp); ++struct super_block; +void au_dpri_sb(struct super_block *sb); + +void au_dbg_sleep_jiffy(int jiffy); ++struct iattr; +void au_dbg_iattr(struct iattr *ia); + +void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen); @@ -6622,7 +3790,7 @@ index 0000000..36ff0ea + /* empty */ +} +static inline void au_dbg_verify_nondir_parent(struct dentry *dentry, -+ unsigned int sigen) ++ unsigned int sigen) +{ + /* empty */ +} @@ -6684,12 +3852,10 @@ index 0000000..36ff0ea + +#endif /* __KERNEL__ */ +#endif /* __AUFS_DEBUG_H__ */ -diff --git a/fs/aufs/dentry.c b/fs/aufs/dentry.c -new file mode 100644 -index 0000000..f36956a ---- /dev/null -+++ b/fs/aufs/dentry.c -@@ -0,0 +1,858 @@ +diff -urN linux-2.6.30.org/fs/aufs/dentry.c linux-2.6.30/fs/aufs/dentry.c +--- linux-2.6.30.org/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/dentry.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,876 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -6697,12 +3863,22 @@ index 0000000..f36956a + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * lookup and dentry operations + */ + ++#include +#include "aufs.h" + +static void au_h_nd(struct nameidata *h_nd, struct nameidata *nd) @@ -6849,6 +4025,14 @@ index 0000000..f36956a + return h_dentry; +} + ++static int au_test_shwh(struct super_block *sb, const struct qstr *name) ++{ ++ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH) ++ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))) ++ return -EPERM; ++ return 0; ++} ++ +/* + * returns the number of lower positive dentries, + * otherwise an error. @@ -6870,9 +4054,9 @@ index 0000000..f36956a + struct dentry *parent; + struct inode *inode; + -+ err = -EPERM; + parent = dget_parent(dentry); -+ if (unlikely(!strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))) ++ err = au_test_shwh(dentry->d_sb, name); ++ if (unlikely(err)) + goto out; + + err = au_wh_name_alloc(&whname, name); @@ -7539,7 +4723,7 @@ index 0000000..f36956a + } + } + kfree(dinfo->di_hdentry); -+ au_rwsem_destroy(&dinfo->di_rwsem); ++ AuRwDestroy(&dinfo->di_rwsem); + au_cache_free_dinfo(dinfo); + au_hin_di_reinit(dentry); +} @@ -7548,12 +4732,10 @@ index 0000000..f36956a + .d_revalidate = aufs_d_revalidate, + .d_release = aufs_d_release +}; -diff --git a/fs/aufs/dentry.h b/fs/aufs/dentry.h -new file mode 100644 -index 0000000..8708db8 ---- /dev/null -+++ b/fs/aufs/dentry.h -@@ -0,0 +1,213 @@ +diff -urN linux-2.6.30.org/fs/aufs/dentry.h linux-2.6.30/fs/aufs/dentry.h +--- linux-2.6.30.org/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/dentry.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,223 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -7561,6 +4743,15 @@ index 0000000..8708db8 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -7572,8 +4763,7 @@ index 0000000..8708db8 + +#ifdef __KERNEL__ + -+#include -+#include ++#include +#include +#include "rwsem.h" + @@ -7586,7 +4776,7 @@ index 0000000..8708db8 +struct au_dinfo { + atomic_t di_generation; + -+ struct rw_semaphore di_rwsem; ++ struct au_rwsem di_rwsem; + aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq; + struct au_hdentry *di_hdentry; +}; @@ -7685,6 +4875,8 @@ index 0000000..8708db8 +#undef AuRWLockFuncs + +#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem) ++#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem) ++#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem) + +/* ---------------------------------------------------------------------- */ + @@ -7751,7 +4943,7 @@ index 0000000..8708db8 +#ifdef CONFIG_AUFS_HINOTIFY +static inline void au_digen_dec(struct dentry *d) +{ -+ atomic_dec(&au_di(d)->di_generation); ++ atomic_dec_return(&au_di(d)->di_generation); +} + +static inline void au_hin_di_reinit(struct dentry *dentry) @@ -7767,12 +4959,10 @@ index 0000000..8708db8 + +#endif /* __KERNEL__ */ +#endif /* __AUFS_DENTRY_H__ */ -diff --git a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c -new file mode 100644 -index 0000000..b0645aa ---- /dev/null -+++ b/fs/aufs/dinfo.c -@@ -0,0 +1,351 @@ +diff -urN linux-2.6.30.org/fs/aufs/dinfo.c linux-2.6.30/fs/aufs/dinfo.c +--- linux-2.6.30.org/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/dinfo.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,359 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -7780,6 +4970,15 @@ index 0000000..b0645aa + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -7808,8 +5007,7 @@ index 0000000..b0645aa + + atomic_set(&dinfo->di_generation, au_sigen(sb)); + /* smp_mb(); */ /* atomic_set */ -+ init_rwsem(&dinfo->di_rwsem); -+ down_write_nested(&dinfo->di_rwsem, AuLsc_DI_CHILD); ++ au_rw_init_wlock_nested(&dinfo->di_rwsem, AuLsc_DI_CHILD); + dinfo->di_bstart = -1; + dinfo->di_bend = -1; + dinfo->di_bwh = -1; @@ -7899,7 +5097,7 @@ index 0000000..b0645aa + +void di_read_lock(struct dentry *d, int flags, unsigned int lsc) +{ -+ down_read_nested(&au_di(d)->di_rwsem, lsc); ++ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc); + if (d->d_inode) { + if (au_ftest_lock(flags, IW)) + do_ii_write_lock(d->d_inode, lsc); @@ -7916,19 +5114,19 @@ index 0000000..b0645aa + else if (au_ftest_lock(flags, IR)) + ii_read_unlock(d->d_inode); + } -+ up_read(&au_di(d)->di_rwsem); ++ au_rw_read_unlock(&au_di(d)->di_rwsem); +} + +void di_downgrade_lock(struct dentry *d, int flags) +{ -+ downgrade_write(&au_di(d)->di_rwsem); + if (d->d_inode && au_ftest_lock(flags, IR)) + ii_downgrade_lock(d->d_inode); ++ au_rw_dgrade_lock(&au_di(d)->di_rwsem); +} + +void di_write_lock(struct dentry *d, unsigned int lsc) +{ -+ down_write_nested(&au_di(d)->di_rwsem, lsc); ++ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc); + if (d->d_inode) + do_ii_write_lock(d->d_inode, lsc); +} @@ -7937,7 +5135,7 @@ index 0000000..b0645aa +{ + if (d->d_inode) + ii_write_unlock(d->d_inode); -+ up_write(&au_di(d)->di_rwsem); ++ au_rw_write_unlock(&au_di(d)->di_rwsem); +} + +void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir) @@ -7976,7 +5174,7 @@ index 0000000..b0645aa +{ + di_write_unlock(d1); + if (d1->d_inode == d2->d_inode) -+ up_write(&au_di(d2)->di_rwsem); ++ au_rw_write_unlock(&au_di(d2)->di_rwsem); + else + di_write_unlock(d2); +} @@ -8124,12 +5322,10 @@ index 0000000..b0645aa + return bindex; + return -1; +} -diff --git a/fs/aufs/dir.c b/fs/aufs/dir.c -new file mode 100644 -index 0000000..9fc6196 ---- /dev/null -+++ b/fs/aufs/dir.c -@@ -0,0 +1,513 @@ +diff -urN linux-2.6.30.org/fs/aufs/dir.c linux-2.6.30/fs/aufs/dir.c +--- linux-2.6.30.org/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/dir.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,535 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -8137,12 +5333,22 @@ index 0000000..9fc6196 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * directory operations + */ + ++#include +#include +#include "aufs.h" + @@ -8273,7 +5479,8 @@ index 0000000..9fc6196 + au_vdir_free(vdir_cache); + if (au_fi(file)->fi_maintain_plink) { + sbinfo = au_sbi(sb); -+ au_fclr_si(sbinfo, MAINTAIN_PLINK); ++ /* clear the flag without write-lock */ ++ sbinfo->au_si_status &= ~AuSi_MAINTAIN_PLINK; + wake_up_all(&sbinfo->si_plink_wq); + } + fi_write_unlock(file); @@ -8450,21 +5657,27 @@ index 0000000..9fc6196 +/* ---------------------------------------------------------------------- */ + +#define AuTestEmpty_WHONLY 1 -+#define AuTestEmpty_CALLED (1 << 2) ++#define AuTestEmpty_CALLED (1 << 1) ++#define AuTestEmpty_SHWH (1 << 2) +#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name) +#define au_fset_testempty(flags, name) { (flags) |= AuTestEmpty_##name; } +#define au_fclr_testempty(flags, name) { (flags) &= ~AuTestEmpty_##name; } + ++#ifndef CONFIG_AUFS_SHWH ++#undef AuTestEmpty_SHWH ++#define AuTestEmpty_SHWH 0 ++#endif ++ +struct test_empty_arg { -+ struct au_nhash *whlist; ++ struct au_nhash whlist; + unsigned int flags; + int err; + aufs_bindex_t bindex; +}; + +static int test_empty_cb(void *__arg, const char *__name, int namelen, -+ loff_t offset __maybe_unused, u64 ino __maybe_unused, -+ unsigned int d_type __maybe_unused) ++ loff_t offset __maybe_unused, u64 ino, ++ unsigned int d_type) +{ + struct test_empty_arg *arg = __arg; + char *name = (void *)__name; @@ -8479,16 +5692,17 @@ index 0000000..9fc6196 + if (namelen <= AUFS_WH_PFX_LEN + || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) { + if (au_ftest_testempty(arg->flags, WHONLY) -+ && !au_nhash_test_known_wh(arg->whlist, name, namelen)) ++ && !au_nhash_test_known_wh(&arg->whlist, name, namelen)) + arg->err = -ENOTEMPTY; + goto out; + } + + name += AUFS_WH_PFX_LEN; + namelen -= AUFS_WH_PFX_LEN; -+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen)) ++ if (!au_nhash_test_known_wh(&arg->whlist, name, namelen)) + arg->err = au_nhash_append_wh -+ (arg->whlist, name, namelen, arg->bindex); ++ (&arg->whlist, name, namelen, ino, d_type, arg->bindex, ++ au_ftest_testempty(arg->flags, SHWH)); + + out: + /* smp_mb(); */ @@ -8576,16 +5790,18 @@ index 0000000..9fc6196 + int err; + aufs_bindex_t bindex, bstart, btail; + struct test_empty_arg arg; -+ struct au_nhash *whlist; + -+ whlist = au_nhash_alloc(dentry->d_sb, /*bend*/0, GFP_NOFS); -+ err = PTR_ERR(whlist); -+ if (IS_ERR(whlist)) ++ SiMustAnyLock(dentry->d_sb); ++ ++ err = au_nhash_alloc(&arg.whlist, au_sbi(dentry->d_sb)->si_rdhash, ++ GFP_NOFS); ++ if (unlikely(err)) + goto out; + + bstart = au_dbstart(dentry); -+ arg.whlist = whlist; + arg.flags = 0; ++ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)) ++ au_fset_testempty(arg.flags, SHWH); + arg.bindex = bstart; + err = do_test_empty(dentry, &arg); + if (unlikely(err)) @@ -8604,7 +5820,7 @@ index 0000000..9fc6196 + } + + out_whlist: -+ au_nhash_wh_free(whlist, /*bend*/0); ++ au_nhash_wh_free(&arg.whlist); + out: + return err; +} @@ -8616,8 +5832,10 @@ index 0000000..9fc6196 + aufs_bindex_t bindex, btail; + + err = 0; -+ arg.whlist = whlist; ++ arg.whlist = *whlist; + arg.flags = AuTestEmpty_WHONLY; ++ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)) ++ au_fset_testempty(arg.flags, SHWH); + btail = au_dbtaildir(dentry); + for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) { + struct dentry *h_dentry; @@ -8643,12 +5861,10 @@ index 0000000..9fc6196 + .flush = aufs_flush, + .fsync = aufs_fsync_dir +}; -diff --git a/fs/aufs/dir.h b/fs/aufs/dir.h -new file mode 100644 -index 0000000..f41f050 ---- /dev/null -+++ b/fs/aufs/dir.h -@@ -0,0 +1,98 @@ +diff -urN linux-2.6.30.org/fs/aufs/dir.h linux-2.6.30/fs/aufs/dir.h +--- linux-2.6.30.org/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/dir.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,114 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -8656,6 +5872,15 @@ index 0000000..f41f050 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -8675,8 +5900,8 @@ index 0000000..f41f050 +/* need to be faster and smaller */ + +struct au_nhash { -+ unsigned int nh_num; -+ struct hlist_head *nh_head; ++ unsigned int nh_num; ++ struct hlist_head *nh_head; +}; + +struct au_vdir_destr { @@ -8698,7 +5923,14 @@ index 0000000..f41f050 + +struct au_vdir_wh { + struct hlist_node wh_hash; ++#ifdef CONFIG_AUFS_SHWH ++ ino_t wh_ino; + aufs_bindex_t wh_bindex; ++ unsigned char wh_type; ++#else ++ aufs_bindex_t wh_bindex; ++#endif ++ /* caution: packed */ + struct au_vdir_destr wh_str; +} __packed; + @@ -8710,13 +5942,13 @@ index 0000000..f41f050 +struct au_vdir { + unsigned char **vd_deblk; + unsigned long vd_nblk; -+ unsigned int vd_deblk_sz; + struct { + unsigned long ul; + union au_vdir_deblk_p p; + } vd_last; + + unsigned long vd_version; ++ unsigned int vd_deblk_sz; + unsigned long vd_jiffy; +}; + @@ -8730,14 +5962,14 @@ index 0000000..f41f050 +int au_test_empty(struct dentry *dentry, struct au_nhash *whlist); + +/* vdir.c */ -+struct au_nhash *au_nhash_alloc(struct super_block *sb, aufs_bindex_t bend, -+ gfp_t gfp); -+void au_nhash_wh_free(struct au_nhash *whlist, aufs_bindex_t bend); ++int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp); ++void au_nhash_wh_free(struct au_nhash *whlist); +int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt, + int limit); -+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int namelen); -+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int namelen, -+ aufs_bindex_t bindex); ++int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen); ++int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino, ++ unsigned int d_type, aufs_bindex_t bindex, ++ unsigned char shwh); +void au_vdir_free(struct au_vdir *vdir); +int au_vdir_init(struct file *file); +int au_vdir_fill_de(struct file *file, void *dirent, filldir_t filldir); @@ -8747,19 +5979,26 @@ index 0000000..f41f050 + +#endif /* __KERNEL__ */ +#endif /* __AUFS_DIR_H__ */ -diff --git a/fs/aufs/export.c b/fs/aufs/export.c -new file mode 100644 -index 0000000..7dd7f33 ---- /dev/null -+++ b/fs/aufs/export.c -@@ -0,0 +1,725 @@ +diff -urN linux-2.6.30.org/fs/aufs/export.c linux-2.6.30/fs/aufs/export.c +--- linux-2.6.30.org/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/export.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,745 @@ +/* -+ * Copyright (C) 2005-2009 Junjiro Okajima ++ * Copyright (C) 2005-2009 Junjiro R. Okajima + * + * This program, aufs is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -8767,7 +6006,9 @@ index 0000000..7dd7f33 + */ + +#include ++#include +#include ++#include +#include +#include "aufs.h" + @@ -8844,13 +6085,17 @@ index 0000000..7dd7f33 + + err = 0; + sb = inode->i_sb; -+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO))) ++ sbinfo = au_sbi(sb); ++ /* ++ * temporary workaround for escaping from SiMustAnyLock() in ++ * au_mntflags(), since this function is called from au_iinfo_fin(). ++ */ ++ if (unlikely(!au_opt_test(sbinfo->si_mntflags, XINO))) + goto out; + + pos = inode->i_ino; + pos *= sizeof(igen); + igen = inode->i_generation + 1; -+ sbinfo = au_sbi(sb); + sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen, + sizeof(igen), &pos); + if (sz == sizeof(igen)) @@ -8880,6 +6125,7 @@ index 0000000..7dd7f33 + if (inode->i_ino == AUFS_ROOT_INO) + goto out; + sb = inode->i_sb; ++ SiMustAnyLock(sb); + if (unlikely(!au_opt_test(au_mntflags(sb), XINO))) + goto out; + @@ -8923,6 +6169,8 @@ index 0000000..7dd7f33 + struct au_sbinfo *sbinfo; + struct file *file; + ++ SiMustWriteLock(sb); ++ + sbinfo = au_sbi(sb); + file = au_xino_create2(base, sbinfo->si_xigen); + err = PTR_ERR(file); @@ -8941,6 +6189,8 @@ index 0000000..7dd7f33 +{ + struct au_sbinfo *sbinfo; + ++ SiMustWriteLock(sb); ++ + sbinfo = au_sbi(sb); + if (sbinfo->si_xigen) { + fput(sbinfo->si_xigen); @@ -9478,12 +6728,10 @@ index 0000000..7dd7f33 + BUILD_BUG_ON(sizeof(u) != sizeof(int)); + atomic_set(&sbinfo->si_xigen_next, u); +} -diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c -new file mode 100644 -index 0000000..7841d99 ---- /dev/null -+++ b/fs/aufs/f_op.c -@@ -0,0 +1,556 @@ +diff -urN linux-2.6.30.org/fs/aufs/file.c linux-2.6.30/fs/aufs/file.c +--- linux-2.6.30.org/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/file.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,572 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -9491,561 +6739,575 @@ index 0000000..7841d99 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* -+ * file and vm operations ++ * handling file/dir, and address_space operation + */ + -+#include -+#include ++#include ++#include ++#include ++#include +#include "aufs.h" + -+/* common function to regular file and dir */ -+int aufs_flush(struct file *file, fl_owner_t id) -+{ -+ int err; -+ aufs_bindex_t bindex, bend; -+ struct dentry *dentry; -+ struct file *h_file; -+ -+ dentry = file->f_dentry; -+ si_noflush_read_lock(dentry->d_sb); -+ fi_read_lock(file); -+ di_read_lock_child(dentry, AuLock_IW); -+ -+ err = 0; -+ bend = au_fbend(file); -+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) { -+ h_file = au_h_fptr(file, bindex); -+ if (!h_file || !h_file->f_op || !h_file->f_op->flush) -+ continue; -+ -+ err = h_file->f_op->flush(h_file, id); -+ if (!err) -+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); -+ /*ignore*/ -+ } -+ au_cpup_attr_timesizes(dentry->d_inode); -+ -+ di_read_unlock(dentry, AuLock_IW); -+ fi_read_unlock(file); -+ si_read_unlock(dentry->d_sb); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int do_open_nondir(struct file *file, int flags) ++/* ++ * a dirty trick for handling deny_write_access(). ++ * because FMODE_EXEC flag is not passed to f_op->open(), ++ * set it to file->private_data temporary. ++ */ ++void au_store_oflag(struct nameidata *nd, struct inode *inode) +{ -+ int err; -+ aufs_bindex_t bindex; -+ struct file *h_file; -+ struct dentry *dentry; -+ -+ err = 0; -+ dentry = file->f_dentry; -+ au_fi(file)->fi_h_vm_ops = NULL; -+ bindex = au_dbstart(dentry); -+ /* O_TRUNC is processed already */ -+ BUG_ON(au_test_ro(dentry->d_sb, bindex, dentry->d_inode) -+ && (flags & O_TRUNC)); -+ -+ h_file = au_h_open(dentry, bindex, flags, file); -+ if (IS_ERR(h_file)) -+ err = PTR_ERR(h_file); -+ else { -+ au_set_fbstart(file, bindex); -+ au_set_fbend(file, bindex); -+ au_set_h_fptr(file, bindex, h_file); -+ au_update_figen(file); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ ++ if (nd ++ /* && !(nd->flags & LOOKUP_CONTINUE) */ ++ && (nd->flags & LOOKUP_OPEN) ++ && (nd->intent.open.flags & vfsub_fmode_to_uint(FMODE_EXEC)) ++ && inode ++ && S_ISREG(inode->i_mode)) { ++ /* suppress a warning in lp64 */ ++ unsigned long flags = nd->intent.open.flags; ++ nd->intent.open.file->private_data = (void *)flags; ++ /* smp_mb(); */ + } -+ return err; -+} -+ -+static int aufs_open_nondir(struct inode *inode __maybe_unused, -+ struct file *file) -+{ -+ return au_do_open(file, do_open_nondir); +} + -+static int aufs_release_nondir(struct inode *inode __maybe_unused, -+ struct file *file) ++/* drop flags for writing */ ++unsigned int au_file_roflags(unsigned int flags) +{ -+ struct super_block *sb = file->f_dentry->d_sb; -+ -+ si_noflush_read_lock(sb); -+ au_finfo_fin(file); -+ si_read_unlock(sb); -+ return 0; ++ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC); ++ flags |= O_RDONLY | O_NOATIME; ++ return flags; +} + -+/* ---------------------------------------------------------------------- */ -+ -+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count, -+ loff_t *ppos) ++/* common functions to regular file and dir */ ++struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags, ++ struct file *file) +{ -+ ssize_t err; -+ struct dentry *dentry; + struct file *h_file; ++ struct dentry *h_dentry; ++ struct inode *h_inode; + struct super_block *sb; ++ struct au_branch *br; ++ int err; ++ ++ /* a race condition can happen between open and unlink/rmdir */ ++ h_file = ERR_PTR(-ENOENT); ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (au_test_nfsd(current) && !h_dentry) ++ goto out; ++ h_inode = h_dentry->d_inode; ++ if (au_test_nfsd(current) && !h_inode) ++ goto out; ++ if (unlikely((!d_unhashed(dentry) && d_unhashed(h_dentry)) ++ || !h_inode)) ++ goto out; + -+ dentry = file->f_dentry; + sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); -+ if (unlikely(err)) ++ br = au_sbr(sb, bindex); ++ h_file = ERR_PTR(-EACCES); ++ if (file && (file->f_mode & FMODE_EXEC) ++ && (br->br_mnt->mnt_flags & MNT_NOEXEC)) + goto out; + -+ h_file = au_h_fptr(file, au_fbstart(file)); -+ err = vfsub_read_u(h_file, buf, count, ppos); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode); ++ /* drop flags for writing */ ++ if (au_test_ro(sb, bindex, dentry->d_inode)) ++ flags = au_file_roflags(flags); ++ flags &= ~O_CREAT; ++ atomic_inc(&br->br_count); ++ h_file = dentry_open(dget(h_dentry), mntget(br->br_mnt), flags, ++ current_cred()); ++ if (IS_ERR(h_file)) ++ goto out_br; + -+ di_read_unlock(dentry, AuLock_IR); -+ fi_read_unlock(file); ++ if (file && (file->f_mode & FMODE_EXEC)) { ++ h_file->f_mode |= FMODE_EXEC; ++ err = deny_write_access(h_file); ++ if (unlikely(err)) { ++ fput(h_file); ++ h_file = ERR_PTR(err); ++ goto out_br; ++ } ++ } ++ fsnotify_open(h_dentry); ++ goto out; /* success */ ++ ++ out_br: ++ atomic_dec(&br->br_count); + out: -+ si_read_unlock(sb); -+ return err; ++ return h_file; +} + -+static ssize_t aufs_write(struct file *file, const char __user *ubuf, -+ size_t count, loff_t *ppos) ++int au_do_open(struct file *file, int (*open)(struct file *file, int flags)) +{ -+ ssize_t err; -+ aufs_bindex_t bstart; -+ struct au_pin pin; ++ int err; + struct dentry *dentry; -+ struct inode *inode; + struct super_block *sb; -+ struct file *h_file; -+ char __user *buf = (char __user *)ubuf; + + dentry = file->f_dentry; + sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ mutex_lock(&inode->i_mutex); + si_read_lock(sb, AuLock_FLUSH); -+ -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); ++ err = au_finfo_init(file); + if (unlikely(err)) + goto out; + -+ err = au_ready_to_write(file, -1, &pin); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) -+ goto out_unlock; -+ -+ bstart = au_fbstart(file); -+ h_file = au_h_fptr(file, bstart); -+ au_unpin(&pin); -+ err = vfsub_write_u(h_file, buf, count, ppos); -+ au_cpup_attr_timesizes(inode); -+ inode->i_mode = h_file->f_dentry->d_inode->i_mode; -+ -+ out_unlock: ++ di_read_lock_child(dentry, AuLock_IR); ++ err = open(file, file->f_flags); + di_read_unlock(dentry, AuLock_IR); ++ + fi_write_unlock(file); ++ if (unlikely(err)) ++ au_finfo_fin(file); + out: + si_read_unlock(sb); -+ mutex_unlock(&inode->i_mutex); + return err; +} + -+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos, -+ struct pipe_inode_info *pipe, size_t len, -+ unsigned int flags) ++int au_reopen_nondir(struct file *file) +{ -+ ssize_t err; -+ struct file *h_file; ++ int err; ++ aufs_bindex_t bstart, bindex, bend; + struct dentry *dentry; -+ struct super_block *sb; ++ struct file *h_file, *h_file_tmp; + + dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); -+ if (unlikely(err)) -+ goto out; -+ -+ err = -EINVAL; -+ h_file = au_h_fptr(file, au_fbstart(file)); -+ if (au_test_loopback_kthread()) { -+ file->f_mapping = h_file->f_mapping; -+ smp_mb(); /* unnecessary? */ ++ bstart = au_dbstart(dentry); ++ h_file_tmp = NULL; ++ if (au_fbstart(file) == bstart) { ++ h_file = au_h_fptr(file, bstart); ++ if (file->f_mode == h_file->f_mode) ++ return 0; /* success */ ++ h_file_tmp = h_file; ++ get_file(h_file_tmp); ++ au_set_h_fptr(file, bstart, NULL); + } -+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags); -+ /* todo: necessasry? */ ++ AuDebugOn(au_fbstart(file) < bstart ++ || au_fi(file)->fi_hfile[0 + bstart].hf_file); ++ ++ h_file = au_h_open(dentry, bstart, file->f_flags & ~O_TRUNC, file); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out; /* todo: close all? */ ++ ++ err = 0; ++ au_set_fbstart(file, bstart); ++ au_set_h_fptr(file, bstart, h_file); ++ au_update_figen(file); ++ /* todo: necessary? */ + /* file->f_ra = h_file->f_ra; */ -+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode); + -+ di_read_unlock(dentry, AuLock_IR); -+ fi_read_unlock(file); ++ /* close lower files */ ++ bend = au_fbend(file); ++ for (bindex = bstart + 1; bindex <= bend; bindex++) ++ au_set_h_fptr(file, bindex, NULL); ++ au_set_fbend(file, bstart); + + out: -+ si_read_unlock(sb); ++ if (h_file_tmp) ++ fput(h_file_tmp); + return err; +} + -+static ssize_t -+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos, -+ size_t len, unsigned int flags) -+{ -+ ssize_t err; -+ struct au_pin pin; -+ struct dentry *dentry; -+ struct inode *inode; -+ struct super_block *sb; -+ struct file *h_file; ++/* ---------------------------------------------------------------------- */ + -+ dentry = file->f_dentry; -+ inode = dentry->d_inode; -+ mutex_lock(&inode->i_mutex); -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_ready_to_write(file, -1, &pin); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) -+ goto out_unlock; -+ -+ h_file = au_h_fptr(file, au_fbstart(file)); -+ au_unpin(&pin); -+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags); -+ au_cpup_attr_timesizes(inode); -+ inode->i_mode = h_file->f_dentry->d_inode->i_mode; -+ -+ out_unlock: -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+ out: -+ si_read_unlock(sb); -+ mutex_unlock(&inode->i_mutex); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static struct file *au_safe_file(struct vm_area_struct *vma) -+{ -+ struct file *file; -+ -+ file = vma->vm_file; -+ if (file->private_data && au_test_aufs(file->f_dentry->d_sb)) -+ return file; -+ return NULL; -+} -+ -+static void au_reset_file(struct vm_area_struct *vma, struct file *file) -+{ -+ vma->vm_file = file; -+ /* smp_mb(); */ /* flush vm_file */ -+} -+ -+static int aufs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ++static int au_reopen_wh(struct file *file, aufs_bindex_t btgt, ++ struct dentry *hi_wh) +{ + int err; -+ static DECLARE_WAIT_QUEUE_HEAD(wq); -+ struct file *file, *h_file; -+ struct au_finfo *finfo; -+ -+ /* todo: non-robr mode, user vm_file as it is? */ -+ wait_event(wq, (file = au_safe_file(vma))); -+ -+ /* do not revalidate, no si lock */ -+ finfo = au_fi(file); -+ h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file; -+ AuDebugOn(!h_file || !au_test_mmapped(file)); ++ aufs_bindex_t bstart; ++ struct au_dinfo *dinfo; ++ struct dentry *h_dentry; + -+ fi_write_lock(file); -+ vma->vm_file = h_file; -+ err = finfo->fi_h_vm_ops->fault(vma, vmf); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ au_reset_file(vma, file); -+ fi_write_unlock(file); -+#if 0 /* def CONFIG_SMP */ -+ /* wake_up_nr(&wq, online_cpu - 1); */ -+ wake_up_all(&wq); -+#else -+ wake_up(&wq); -+#endif ++ dinfo = au_di(file->f_dentry); ++ bstart = dinfo->di_bstart; ++ dinfo->di_bstart = btgt; ++ h_dentry = dinfo->di_hdentry[0 + btgt].hd_dentry; ++ dinfo->di_hdentry[0 + btgt].hd_dentry = hi_wh; ++ err = au_reopen_nondir(file); ++ dinfo->di_hdentry[0 + btgt].hd_dentry = h_dentry; ++ dinfo->di_bstart = bstart; + + return err; +} + -+static struct vm_operations_struct aufs_vm_ops = { -+ .fault = aufs_fault -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+static struct vm_operations_struct *au_vm_ops(struct file *h_file, -+ struct vm_area_struct *vma) ++static int au_ready_to_write_wh(struct file *file, loff_t len, ++ aufs_bindex_t bcpup) +{ -+ struct vm_operations_struct *vm_ops; + int err; ++ struct inode *inode; ++ struct dentry *dentry, *hi_wh; ++ struct super_block *sb; + -+ vm_ops = ERR_PTR(-ENODEV); -+ if (!h_file->f_op || !h_file->f_op->mmap) -+ goto out; -+ -+ err = h_file->f_op->mmap(h_file, vma); -+ vm_ops = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; ++ dentry = file->f_dentry; ++ inode = dentry->d_inode; ++ hi_wh = au_hi_wh(inode, bcpup); ++ if (!hi_wh) ++ err = au_sio_cpup_wh(dentry, bcpup, len, file); ++ else ++ /* already copied-up after unlink */ ++ err = au_reopen_wh(file, bcpup, hi_wh); + -+ vm_ops = vma->vm_ops; -+ err = do_munmap(current->mm, vma->vm_start, -+ vma->vm_end - vma->vm_start); -+ if (unlikely(err)) { -+ AuIOErr("failed internal unmapping %.*s, %d\n", -+ AuDLNPair(h_file->f_dentry), err); -+ vm_ops = ERR_PTR(-EIO); -+ } ++ sb = dentry->d_sb; ++ if (!err && inode->i_nlink > 1 && au_opt_test(au_mntflags(sb), PLINK)) ++ au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup)); + -+ out: -+ return vm_ops; ++ return err; +} + -+static int aufs_mmap(struct file *file, struct vm_area_struct *vma) ++/* ++ * prepare the @file for writing. ++ */ ++int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin) +{ + int err; -+ unsigned char wlock, mmapped; -+ struct dentry *dentry; ++ aufs_bindex_t bstart, bcpup; ++ struct dentry *dentry, *parent, *h_dentry; ++ struct inode *h_inode, *inode; + struct super_block *sb; -+ struct file *h_file; -+ struct vm_operations_struct *vm_ops; + + dentry = file->f_dentry; -+ mmapped = !!au_test_mmapped(file); /* can be harmless race condition */ -+ wlock = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED); + sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, wlock | !mmapped); -+ if (unlikely(err)) ++ bstart = au_fbstart(file); ++ inode = dentry->d_inode; ++ err = au_test_ro(sb, bstart, inode); ++ if (!err && (au_h_fptr(file, bstart)->f_mode & FMODE_WRITE)) { ++ err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0); + goto out; ++ } + -+ if (wlock) { -+ struct au_pin pin; ++ /* need to cpup */ ++ parent = dget_parent(dentry); ++ di_write_lock_parent(parent); ++ err = AuWbrCopyup(au_sbi(sb), dentry); ++ bcpup = err; ++ if (unlikely(err < 0)) ++ goto out_dgrade; ++ err = 0; + -+ err = au_ready_to_write(file, -1, &pin); -+ di_downgrade_lock(dentry, AuLock_IR); ++ if (!au_h_dptr(parent, bcpup)) { ++ err = au_cpup_dirs(dentry, bcpup); + if (unlikely(err)) -+ goto out_unlock; -+ au_unpin(&pin); -+ } else if (!mmapped) -+ di_downgrade_lock(dentry, AuLock_IR); -+ -+ h_file = au_h_fptr(file, au_fbstart(file)); -+ if (au_test_fs_bad_mapping(h_file->f_dentry->d_sb)) { -+ /* -+ * by this assignment, f_mapping will differs from aufs inode -+ * i_mapping. -+ * if someone else mixes the use of f_dentry->d_inode and -+ * f_mapping->host, then a problem may arise. -+ */ -+ file->f_mapping = h_file->f_mapping; -+ } -+ -+ vm_ops = NULL; -+ if (!mmapped) { -+ vm_ops = au_vm_ops(h_file, vma); -+ err = PTR_ERR(vm_ops); -+ if (IS_ERR(vm_ops)) -+ goto out_unlock; ++ goto out_dgrade; + } + -+ /* -+ * unnecessary to handle MAP_DENYWRITE and deny_write_access()? -+ * currently MAP_DENYWRITE from userspace is ignored, but elf loader -+ * sets it. when FMODE_EXEC is set (by open_exec() or sys_uselib()), -+ * both of the aufs file and the lower file is deny_write_access()-ed. -+ * finally I hope we can skip handlling MAP_DENYWRITE here. -+ */ -+ err = generic_file_mmap(file, vma); ++ err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE, ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); + if (unlikely(err)) -+ goto out_unlock; -+ vma->vm_ops = &aufs_vm_ops; -+ /* test again */ -+ if (!au_test_mmapped(file)) -+ au_fi(file)->fi_h_vm_ops = vm_ops; ++ goto out_dgrade; + -+ vfsub_file_accessed(h_file); -+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode); ++ h_dentry = au_h_fptr(file, bstart)->f_dentry; ++ h_inode = h_dentry->d_inode; ++ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); ++ if (d_unhashed(dentry) /* || d_unhashed(h_dentry) */ ++ /* || !h_inode->i_nlink */) { ++ err = au_ready_to_write_wh(file, len, bcpup); ++ di_downgrade_lock(parent, AuLock_IR); ++ } else { ++ di_downgrade_lock(parent, AuLock_IR); ++ if (!au_h_dptr(dentry, bcpup)) ++ err = au_sio_cpup_simple(dentry, bcpup, len, ++ AuCpup_DTIME); ++ if (!err) ++ err = au_reopen_nondir(file); ++ } ++ mutex_unlock(&h_inode->i_mutex); ++ ++ if (!err) { ++ au_pin_set_parent_lflag(pin, /*lflag*/0); ++ goto out_dput; /* success */ ++ } ++ au_unpin(pin); ++ goto out_unlock; + ++ out_dgrade: ++ di_downgrade_lock(parent, AuLock_IR); + out_unlock: -+ di_read_unlock(dentry, AuLock_IR); -+ if (!wlock && mmapped) -+ fi_read_unlock(file); -+ else -+ fi_write_unlock(file); ++ di_read_unlock(parent, AuLock_IR); ++ out_dput: ++ dput(parent); + out: -+ si_read_unlock(sb); + return err; +} + +/* ---------------------------------------------------------------------- */ + -+static unsigned int aufs_poll(struct file *file, poll_table *wait) ++static int au_file_refresh_by_inode(struct file *file, int *need_reopen) +{ -+ unsigned int mask; + int err; -+ struct file *h_file; -+ struct dentry *dentry; ++ aufs_bindex_t bstart; ++ struct au_pin pin; ++ struct au_finfo *finfo; ++ struct dentry *dentry, *parent, *hi_wh; ++ struct inode *inode; + struct super_block *sb; + -+ /* We should pretend an error happened. */ -+ mask = POLLERR /* | POLLIN | POLLOUT */; ++ err = 0; ++ finfo = au_fi(file); + dentry = file->f_dentry; + sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); -+ if (unlikely(err)) ++ inode = dentry->d_inode; ++ bstart = au_ibstart(inode); ++ if (bstart == finfo->fi_bstart) + goto out; + -+ /* it is not an error if h_file has no operation */ -+ mask = DEFAULT_POLLMASK; -+ h_file = au_h_fptr(file, au_fbstart(file)); -+ if (h_file->f_op && h_file->f_op->poll) -+ mask = h_file->f_op->poll(h_file, wait); ++ parent = dget_parent(dentry); ++ if (au_test_ro(sb, bstart, inode)) { ++ di_read_lock_parent(parent, !AuLock_IR); ++ err = AuWbrCopyup(au_sbi(sb), dentry); ++ bstart = err; ++ di_read_unlock(parent, !AuLock_IR); ++ if (unlikely(err < 0)) ++ goto out_parent; ++ err = 0; ++ } + -+ di_read_unlock(dentry, AuLock_IR); -+ fi_read_unlock(file); ++ di_read_lock_parent(parent, AuLock_IR); ++ hi_wh = au_hi_wh(inode, bstart); ++ if (au_opt_test(au_mntflags(sb), PLINK) ++ && au_plink_test(inode) ++ && !d_unhashed(dentry)) { ++ err = au_test_and_cpup_dirs(dentry, bstart); ++ if (unlikely(err)) ++ goto out_unlock; ++ ++ /* always superio. */ ++ err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE, ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ if (!err) ++ err = au_sio_cpup_simple(dentry, bstart, -1, ++ AuCpup_DTIME); ++ au_unpin(&pin); ++ } else if (hi_wh) { ++ /* already copied-up after unlink */ ++ err = au_reopen_wh(file, bstart, hi_wh); ++ *need_reopen = 0; ++ } + ++ out_unlock: ++ di_read_unlock(parent, AuLock_IR); ++ out_parent: ++ dput(parent); + out: -+ si_read_unlock(sb); -+ AuTraceErr((int)mask); -+ return mask; ++ return err; +} + -+static int aufs_fsync_nondir(struct file *file, struct dentry *dentry, -+ int datasync) ++static void au_do_refresh_file(struct file *file) +{ -+ int err; -+ struct au_pin pin; -+ struct inode *inode; -+ struct file *h_file; ++ aufs_bindex_t bindex, bend, new_bindex, brid; ++ struct au_hfile *p, tmp, *q; ++ struct au_finfo *finfo; + struct super_block *sb; + -+ inode = dentry->d_inode; -+ IMustLock(file->f_mapping->host); -+ if (inode != file->f_mapping->host) { -+ mutex_unlock(&file->f_mapping->host->i_mutex); -+ mutex_lock(&inode->i_mutex); ++ sb = file->f_dentry->d_sb; ++ finfo = au_fi(file); ++ p = finfo->fi_hfile + finfo->fi_bstart; ++ brid = p->hf_br->br_id; ++ bend = finfo->fi_bend; ++ for (bindex = finfo->fi_bstart; bindex <= bend; bindex++, p++) { ++ if (!p->hf_file) ++ continue; ++ ++ new_bindex = au_br_index(sb, p->hf_br->br_id); ++ if (new_bindex == bindex) ++ continue; ++ if (new_bindex < 0) { ++ au_set_h_fptr(file, bindex, NULL); ++ continue; ++ } ++ ++ /* swap two lower inode, and loop again */ ++ q = finfo->fi_hfile + new_bindex; ++ tmp = *q; ++ *q = *p; ++ *p = tmp; ++ if (tmp.hf_file) { ++ bindex--; ++ p--; ++ } + } -+ IMustLock(inode); + -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); ++ p = finfo->fi_hfile; ++ if (!au_test_mmapped(file) && !d_unhashed(file->f_dentry)) { ++ bend = au_sbend(sb); ++ for (finfo->fi_bstart = 0; finfo->fi_bstart <= bend; ++ finfo->fi_bstart++, p++) ++ if (p->hf_file) { ++ if (p->hf_file->f_dentry ++ && p->hf_file->f_dentry->d_inode) ++ break; ++ else ++ au_hfput(p, file); ++ } ++ } else { ++ bend = au_br_index(sb, brid); ++ for (finfo->fi_bstart = 0; finfo->fi_bstart < bend; ++ finfo->fi_bstart++, p++) ++ if (p->hf_file) ++ au_hfput(p, file); ++ bend = au_sbend(sb); ++ } + -+ err = 0; /* -EBADF; */ /* posix? */ -+ if (unlikely(!(file->f_mode & FMODE_WRITE))) -+ goto out; -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out; ++ p = finfo->fi_hfile + bend; ++ for (finfo->fi_bend = bend; finfo->fi_bend >= finfo->fi_bstart; ++ finfo->fi_bend--, p--) ++ if (p->hf_file) { ++ if (p->hf_file->f_dentry ++ && p->hf_file->f_dentry->d_inode) ++ break; ++ else ++ au_hfput(p, file); ++ } ++ AuDebugOn(finfo->fi_bend < finfo->fi_bstart); ++} + -+ err = au_ready_to_write(file, -1, &pin); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) -+ goto out_unlock; -+ au_unpin(&pin); ++/* ++ * after branch manipulating, refresh the file. ++ */ ++static int refresh_file(struct file *file, int (*reopen)(struct file *file)) ++{ ++ int err, need_reopen; ++ struct dentry *dentry; ++ aufs_bindex_t bend, bindex; + -+ err = -EINVAL; -+ h_file = au_h_fptr(file, au_fbstart(file)); -+ if (h_file->f_op && h_file->f_op->fsync) { -+ struct dentry *h_d; -+ struct mutex *h_mtx; ++ dentry = file->f_dentry; ++ err = au_fi_realloc(au_fi(file), au_sbend(dentry->d_sb) + 1); ++ if (unlikely(err)) ++ goto out; ++ au_do_refresh_file(file); + -+ /* -+ * no filemap_fdatawrite() since aufs file has no its own -+ * mapping, but dir. -+ */ -+ h_d = h_file->f_dentry; -+ h_mtx = &h_d->d_inode->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ err = h_file->f_op->fsync(h_file, h_d, datasync); -+ if (!err) -+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); -+ /*ignore*/ -+ au_cpup_attr_timesizes(inode); -+ mutex_unlock(h_mtx); ++ err = 0; ++ need_reopen = 1; ++ if (!au_test_mmapped(file)) ++ err = au_file_refresh_by_inode(file, &need_reopen); ++ if (!err && need_reopen && !d_unhashed(dentry)) ++ err = reopen(file); ++ if (!err) { ++ au_update_figen(file); ++ return 0; /* success */ + } + -+ out_unlock: -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); ++ /* error, close all lower files */ ++ bend = au_fbend(file); ++ for (bindex = au_fbstart(file); bindex <= bend; bindex++) ++ au_set_h_fptr(file, bindex, NULL); ++ + out: -+ si_read_unlock(sb); -+ if (inode != file->f_mapping->host) { -+ mutex_unlock(&inode->i_mutex); -+ mutex_lock(&file->f_mapping->host->i_mutex); -+ } + return err; +} + -+static int aufs_fasync(int fd, struct file *file, int flag) ++/* common function to regular file and dir */ ++int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file), ++ int wlock) +{ + int err; -+ struct file *h_file; ++ unsigned int sigen, figen; ++ aufs_bindex_t bstart; ++ unsigned char pseudo_link; + struct dentry *dentry; -+ struct super_block *sb; + ++ err = 0; + dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); -+ if (unlikely(err)) -+ goto out; ++ sigen = au_sigen(dentry->d_sb); ++ fi_write_lock(file); ++ figen = au_figen(file); ++ di_write_lock_child(dentry); ++ bstart = au_dbstart(dentry); ++ pseudo_link = (bstart != au_ibstart(dentry->d_inode)); ++ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) { ++ if (!wlock) { ++ di_downgrade_lock(dentry, AuLock_IR); ++ fi_downgrade_lock(file); ++ } ++ goto out; /* success */ ++ } + -+ h_file = au_h_fptr(file, au_fbstart(file)); -+ if (h_file->f_op && h_file->f_op->fasync) -+ err = h_file->f_op->fasync(fd, h_file, flag); ++ AuDbg("sigen %d, figen %d\n", sigen, figen); ++ if (sigen != au_digen(dentry) ++ || sigen != au_iigen(dentry->d_inode)) { ++ err = au_reval_dpath(dentry, sigen); ++ if (unlikely(err < 0)) ++ goto out; ++ AuDebugOn(au_digen(dentry) != sigen ++ || au_iigen(dentry->d_inode) != sigen); ++ } + -+ di_read_unlock(dentry, AuLock_IR); -+ fi_read_unlock(file); ++ err = refresh_file(file, reopen); ++ if (!err) { ++ if (!wlock) { ++ di_downgrade_lock(dentry, AuLock_IR); ++ fi_downgrade_lock(file); ++ } ++ } else { ++ di_write_unlock(dentry); ++ fi_write_unlock(file); ++ } + + out: -+ si_read_unlock(sb); + return err; +} + +/* ---------------------------------------------------------------------- */ + -+const struct file_operations aufs_file_fop = { -+ /* -+ * while generic_file_llseek/_unlocked() don't use BKL, -+ * don't use it since it operates file->f_mapping->host. -+ * in aufs, it may be a real file and may confuse users by UDBA. -+ */ -+ /* .llseek = generic_file_llseek, */ ++/* cf. aufs_nopage() */ ++/* for madvise(2) */ ++static int aufs_readpage(struct file *file __maybe_unused, struct page *page) ++{ ++ unlock_page(page); ++ return 0; ++} + -+ .read = aufs_read, -+ .write = aufs_write, -+ .poll = aufs_poll, -+ .mmap = aufs_mmap, -+ .open = aufs_open_nondir, -+ .flush = aufs_flush, -+ .release = aufs_release_nondir, -+ .fsync = aufs_fsync_nondir, -+ .fasync = aufs_fasync, -+ .splice_write = aufs_splice_write, -+ .splice_read = aufs_splice_read ++/* they will never be called. */ ++#ifdef CONFIG_AUFS_DEBUG ++static int aufs_write_begin(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned flags, ++ struct page **pagep, void **fsdata) ++{ AuUnsupport(); return 0; } ++static int aufs_write_end(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned copied, ++ struct page *page, void *fsdata) ++{ AuUnsupport(); return 0; } ++static int aufs_writepage(struct page *page, struct writeback_control *wbc) ++{ AuUnsupport(); return 0; } ++static void aufs_sync_page(struct page *page) ++{ AuUnsupport(); } ++ ++static int aufs_set_page_dirty(struct page *page) ++{ AuUnsupport(); return 0; } ++static void aufs_invalidatepage(struct page *page, unsigned long offset) ++{ AuUnsupport(); } ++static int aufs_releasepage(struct page *page, gfp_t gfp) ++{ AuUnsupport(); return 0; } ++static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb, ++ const struct iovec *iov, loff_t offset, ++ unsigned long nr_segs) ++{ AuUnsupport(); return 0; } ++#endif /* CONFIG_AUFS_DEBUG */ ++ ++struct address_space_operations aufs_aop = { ++ .readpage = aufs_readpage, ++#ifdef CONFIG_AUFS_DEBUG ++ .writepage = aufs_writepage, ++ .sync_page = aufs_sync_page, ++ .set_page_dirty = aufs_set_page_dirty, ++ .write_begin = aufs_write_begin, ++ .write_end = aufs_write_end, ++ .invalidatepage = aufs_invalidatepage, ++ .releasepage = aufs_releasepage, ++ .direct_IO = aufs_direct_IO, ++#endif /* CONFIG_AUFS_DEBUG */ +}; -diff --git a/fs/aufs/file.c b/fs/aufs/file.c -new file mode 100644 -index 0000000..d472587 ---- /dev/null -+++ b/fs/aufs/file.c -@@ -0,0 +1,557 @@ +diff -urN linux-2.6.30.org/fs/aufs/file.h linux-2.6.30/fs/aufs/file.h +--- linux-2.6.30.org/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/file.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,167 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -10053,1165 +7315,1440 @@ index 0000000..d472587 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* -+ * handling file/dir, and address_space operation ++ * file operations + */ + -+#include -+#include -+#include "aufs.h" ++#ifndef __AUFS_FILE_H__ ++#define __AUFS_FILE_H__ + -+/* -+ * a dirty trick for handling deny_write_access(). -+ * because FMODE_EXEC flag is not passed to f_op->open(), -+ * set it to file->private_data temporary. -+ */ -+void au_store_oflag(struct nameidata *nd, struct inode *inode) -+{ -+ if (nd -+ /* && !(nd->flags & LOOKUP_CONTINUE) */ -+ && (nd->flags & LOOKUP_OPEN) -+ && (nd->intent.open.flags & vfsub_fmode_to_uint(FMODE_EXEC)) -+ && inode -+ && S_ISREG(inode->i_mode)) { -+ /* suppress a warning in lp64 */ -+ unsigned long flags = nd->intent.open.flags; -+ nd->intent.open.file->private_data = (void *)flags; -+ /* smp_mb(); */ -+ } -+} ++#ifdef __KERNEL__ + -+/* drop flags for writing */ -+unsigned int au_file_roflags(unsigned int flags) -+{ -+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC); -+ flags |= O_RDONLY | O_NOATIME; -+ return flags; -+} ++#include ++#include ++#include ++#include "rwsem.h" + -+/* common functions to regular file and dir */ -+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags, -+ struct file *file) -+{ -+ struct file *h_file; -+ struct dentry *h_dentry; -+ struct inode *h_inode; -+ struct super_block *sb; -+ struct au_branch *br; -+ int err; ++struct au_branch; ++struct au_hfile { ++ struct file *hf_file; ++ struct au_branch *hf_br; ++}; + -+ h_dentry = au_h_dptr(dentry, bindex); -+ h_inode = h_dentry->d_inode; -+ /* a race condition can happen between open and unlink/rmdir */ -+ h_file = ERR_PTR(-ENOENT); -+ if (unlikely((!d_unhashed(dentry) && d_unhashed(h_dentry)) -+ || !h_inode)) -+ goto out; ++struct au_vdir; ++struct au_finfo { ++ atomic_t fi_generation; + -+ sb = dentry->d_sb; -+ br = au_sbr(sb, bindex); -+ h_file = ERR_PTR(-EACCES); -+ if (file && (file->f_mode & FMODE_EXEC) -+ && (br->br_mnt->mnt_flags & MNT_NOEXEC)) -+ goto out; ++ struct au_rwsem fi_rwsem; ++ struct au_hfile *fi_hfile; ++ aufs_bindex_t fi_bstart, fi_bend; + -+ /* drop flags for writing */ -+ if (au_test_ro(sb, bindex, dentry->d_inode)) -+ flags = au_file_roflags(flags); -+ flags &= ~O_CREAT; -+ atomic_inc(&br->br_count); -+ h_file = dentry_open(dget(h_dentry), mntget(br->br_mnt), flags, -+ current_cred()); -+ if (IS_ERR(h_file)) -+ goto out_br; ++ union { ++ /* non-dir only */ ++ struct { ++ struct vm_operations_struct *fi_h_vm_ops; ++ struct vm_operations_struct *fi_vm_ops; ++ }; + -+ if (file && (file->f_mode & FMODE_EXEC)) { -+ h_file->f_mode |= FMODE_EXEC; -+ err = deny_write_access(h_file); -+ if (unlikely(err)) { -+ fput(h_file); -+ h_file = ERR_PTR(err); -+ goto out_br; -+ } -+ } -+ fsnotify_open(h_dentry); -+ goto out; /* success */ ++ /* dir only */ ++ struct { ++ struct au_vdir *fi_vdir_cache; ++ int fi_maintain_plink; ++ }; ++ }; ++}; + -+ out_br: -+ atomic_dec(&br->br_count); -+ out: -+ return h_file; -+} ++/* ---------------------------------------------------------------------- */ + -+int au_do_open(struct file *file, int (*open)(struct file *file, int flags)) -+{ -+ int err; -+ struct dentry *dentry; -+ struct super_block *sb; ++/* file.c */ ++extern struct address_space_operations aufs_aop; ++void au_store_oflag(struct nameidata *nd, struct inode *inode); ++unsigned int au_file_roflags(unsigned int flags); ++struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags, ++ struct file *file); ++int au_do_open(struct file *file, int (*open)(struct file *file, int flags)); ++int au_reopen_nondir(struct file *file); ++struct au_pin; ++int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin); ++int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file), ++ int wlock); + -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ err = au_finfo_init(file); -+ if (unlikely(err)) -+ goto out; ++/* poll.c */ ++#ifdef CONFIG_AUFS_POLL ++unsigned int aufs_poll(struct file *file, poll_table *wait); ++#endif + -+ di_read_lock_child(dentry, AuLock_IR); -+ err = open(file, file->f_flags); -+ di_read_unlock(dentry, AuLock_IR); ++/* f_op.c */ ++extern const struct file_operations aufs_file_fop; ++int aufs_flush(struct file *file, fl_owner_t id); + -+ fi_write_unlock(file); -+ if (unlikely(err)) -+ au_finfo_fin(file); -+ out: -+ si_read_unlock(sb); -+ return err; -+} ++/* finfo.c */ ++void au_hfput(struct au_hfile *hf, struct file *file); ++void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, ++ struct file *h_file); + -+int au_reopen_nondir(struct file *file) -+{ -+ int err; -+ aufs_bindex_t bstart, bindex, bend; -+ struct dentry *dentry; -+ struct file *h_file, *h_file_tmp; ++void au_update_figen(struct file *file); + -+ dentry = file->f_dentry; -+ bstart = au_dbstart(dentry); -+ h_file_tmp = NULL; -+ if (au_fbstart(file) == bstart) { -+ h_file = au_h_fptr(file, bstart); -+ if (file->f_mode == h_file->f_mode) -+ return 0; /* success */ -+ h_file_tmp = h_file; -+ get_file(h_file_tmp); -+ au_set_h_fptr(file, bstart, NULL); -+ } -+ AuDebugOn(au_fbstart(file) < bstart -+ || au_fi(file)->fi_hfile[0 + bstart].hf_file); ++void au_finfo_fin(struct file *file); ++int au_finfo_init(struct file *file); ++int au_fi_realloc(struct au_finfo *finfo, int nbr); + -+ h_file = au_h_open(dentry, bstart, file->f_flags & ~O_TRUNC, file); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; /* todo: close all? */ ++/* ---------------------------------------------------------------------- */ + -+ err = 0; -+ au_set_fbstart(file, bstart); -+ au_set_h_fptr(file, bstart, h_file); -+ au_update_figen(file); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ ++static inline struct au_finfo *au_fi(struct file *file) ++{ ++ return file->private_data; ++} + -+ /* close lower files */ -+ bend = au_fbend(file); -+ for (bindex = bstart + 1; bindex <= bend; bindex++) -+ au_set_h_fptr(file, bindex, NULL); -+ au_set_fbend(file, bstart); ++/* ---------------------------------------------------------------------- */ + -+ out: -+ if (h_file_tmp) -+ fput(h_file_tmp); -+ return err; -+} ++/* ++ * fi_read_lock, fi_write_lock, ++ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock ++ */ ++AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem); ++ ++#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem) ++#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem) ++#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem) + +/* ---------------------------------------------------------------------- */ + -+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt, -+ struct dentry *hi_wh) ++/* todo: hard/soft set? */ ++static inline aufs_bindex_t au_fbstart(struct file *file) +{ -+ int err; -+ aufs_bindex_t bstart; -+ struct au_dinfo *dinfo; -+ struct dentry *h_dentry; ++ return au_fi(file)->fi_bstart; ++} + -+ dinfo = au_di(file->f_dentry); -+ bstart = dinfo->di_bstart; -+ dinfo->di_bstart = btgt; -+ h_dentry = dinfo->di_hdentry[0 + btgt].hd_dentry; -+ dinfo->di_hdentry[0 + btgt].hd_dentry = hi_wh; -+ err = au_reopen_nondir(file); -+ dinfo->di_hdentry[0 + btgt].hd_dentry = h_dentry; -+ dinfo->di_bstart = bstart; ++static inline aufs_bindex_t au_fbend(struct file *file) ++{ ++ return au_fi(file)->fi_bend; ++} + -+ return err; ++static inline struct au_vdir *au_fvdir_cache(struct file *file) ++{ ++ return au_fi(file)->fi_vdir_cache; +} + -+static int au_ready_to_write_wh(struct file *file, loff_t len, -+ aufs_bindex_t bcpup) ++static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex) +{ -+ int err; -+ struct inode *inode; -+ struct dentry *dentry, *hi_wh; -+ struct super_block *sb; ++ au_fi(file)->fi_bstart = bindex; ++} + -+ dentry = file->f_dentry; -+ inode = dentry->d_inode; -+ hi_wh = au_hi_wh(inode, bcpup); -+ if (!hi_wh) -+ err = au_sio_cpup_wh(dentry, bcpup, len, file); -+ else -+ /* already copied-up after unlink */ -+ err = au_reopen_wh(file, bcpup, hi_wh); ++static inline void au_set_fbend(struct file *file, aufs_bindex_t bindex) ++{ ++ au_fi(file)->fi_bend = bindex; ++} + -+ sb = dentry->d_sb; -+ if (!err && inode->i_nlink > 1 && au_opt_test(au_mntflags(sb), PLINK)) -+ au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup)); ++static inline void au_set_fvdir_cache(struct file *file, ++ struct au_vdir *vdir_cache) ++{ ++ au_fi(file)->fi_vdir_cache = vdir_cache; ++} + -+ return err; ++static inline struct file *au_h_fptr(struct file *file, aufs_bindex_t bindex) ++{ ++ return au_fi(file)->fi_hfile[0 + bindex].hf_file; +} + -+/* -+ * prepare the @file for writing. -+ */ -+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin) ++/* todo: memory barrier? */ ++static inline unsigned int au_figen(struct file *f) +{ -+ int err; -+ aufs_bindex_t bstart, bcpup; -+ struct dentry *dentry, *parent, *h_dentry; -+ struct inode *h_inode, *inode; -+ struct super_block *sb; ++ return atomic_read(&au_fi(f)->fi_generation); ++} + -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ bstart = au_fbstart(file); -+ inode = dentry->d_inode; -+ err = au_test_ro(sb, bstart, inode); -+ if (!err && (au_h_fptr(file, bstart)->f_mode & FMODE_WRITE)) { -+ err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0); -+ goto out; -+ } ++static inline int au_test_mmapped(struct file *f) ++{ ++ return !!(au_fi(f)->fi_h_vm_ops); ++} + -+ /* need to cpup */ -+ parent = dget_parent(dentry); -+ di_write_lock_parent(parent); -+ err = AuWbrCopyup(au_sbi(sb), dentry); -+ bcpup = err; -+ if (unlikely(err < 0)) -+ goto out_dgrade; -+ err = 0; ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_FILE_H__ */ +diff -urN linux-2.6.30.org/fs/aufs/finfo.c linux-2.6.30/fs/aufs/finfo.c +--- linux-2.6.30.org/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/finfo.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,133 @@ ++/* ++ * Copyright (C) 2005-2009 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+ if (!au_h_dptr(parent, bcpup)) { -+ err = au_cpup_dirs(dentry, bcpup); -+ if (unlikely(err)) -+ goto out_dgrade; -+ } ++/* ++ * file private data ++ */ + -+ err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (unlikely(err)) -+ goto out_dgrade; ++#include ++#include "aufs.h" + -+ h_dentry = au_h_fptr(file, bstart)->f_dentry; -+ h_inode = h_dentry->d_inode; -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ if (d_unhashed(dentry) /* || d_unhashed(h_dentry) */ -+ /* || !h_inode->i_nlink */) { -+ err = au_ready_to_write_wh(file, len, bcpup); -+ di_downgrade_lock(parent, AuLock_IR); -+ } else { -+ di_downgrade_lock(parent, AuLock_IR); -+ if (!au_h_dptr(dentry, bcpup)) -+ err = au_sio_cpup_simple(dentry, bcpup, len, -+ AuCpup_DTIME); -+ if (!err) -+ err = au_reopen_nondir(file); -+ } -+ mutex_unlock(&h_inode->i_mutex); ++void au_hfput(struct au_hfile *hf, struct file *file) ++{ ++ if (file->f_mode & FMODE_EXEC) ++ allow_write_access(hf->hf_file); ++ fput(hf->hf_file); ++ hf->hf_file = NULL; ++ atomic_dec_return(&hf->hf_br->br_count); ++ hf->hf_br = NULL; ++} + -+ if (!err) { -+ au_pin_set_parent_lflag(pin, /*lflag*/0); -+ goto out_dput; /* success */ ++void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val) ++{ ++ struct au_finfo *finfo = au_fi(file); ++ struct au_hfile *hf; ++ ++ hf = finfo->fi_hfile + bindex; ++ if (hf->hf_file) ++ au_hfput(hf, file); ++ if (val) { ++ hf->hf_file = val; ++ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex); + } -+ au_unpin(pin); -+ goto out_unlock; ++} + -+ out_dgrade: -+ di_downgrade_lock(parent, AuLock_IR); -+ out_unlock: -+ di_read_unlock(parent, AuLock_IR); -+ out_dput: -+ dput(parent); -+ out: -+ return err; ++void au_update_figen(struct file *file) ++{ ++ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry)); ++ /* smp_mb(); */ /* atomic_set */ +} + +/* ---------------------------------------------------------------------- */ + -+static int au_file_refresh_by_inode(struct file *file, int *need_reopen) ++void au_finfo_fin(struct file *file) +{ -+ int err; -+ aufs_bindex_t bstart; -+ struct au_pin pin; + struct au_finfo *finfo; -+ struct dentry *dentry, *parent, *hi_wh; -+ struct inode *inode; -+ struct super_block *sb; ++ aufs_bindex_t bindex, bend; ++ ++ fi_write_lock(file); ++ bend = au_fbend(file); ++ bindex = au_fbstart(file); ++ if (bindex >= 0) ++ /* ++ * calls fput() instead of filp_close(), ++ * since no dnotify or lock for the lower file. ++ */ ++ for (; bindex <= bend; bindex++) ++ au_set_h_fptr(file, bindex, NULL); + -+ err = 0; + finfo = au_fi(file); ++ au_dbg_verify_hf(finfo); ++ kfree(finfo->fi_hfile); ++ fi_write_unlock(file); ++ AuRwDestroy(&finfo->fi_rwsem); ++ au_cache_free_finfo(finfo); ++} ++ ++int au_finfo_init(struct file *file) ++{ ++ struct au_finfo *finfo; ++ struct dentry *dentry; ++ unsigned long ul; ++ + dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ bstart = au_ibstart(inode); -+ if (bstart == finfo->fi_bstart) ++ finfo = au_cache_alloc_finfo(); ++ if (unlikely(!finfo)) + goto out; + -+ parent = dget_parent(dentry); -+ if (au_test_ro(sb, bstart, inode)) { -+ di_read_lock_parent(parent, !AuLock_IR); -+ err = AuWbrCopyup(au_sbi(sb), dentry); -+ bstart = err; -+ di_read_unlock(parent, !AuLock_IR); -+ if (unlikely(err < 0)) -+ goto out_parent; -+ err = 0; -+ } ++ finfo->fi_hfile = kcalloc(au_sbend(dentry->d_sb) + 1, ++ sizeof(*finfo->fi_hfile), GFP_NOFS); ++ if (unlikely(!finfo->fi_hfile)) ++ goto out_finfo; + -+ di_read_lock_parent(parent, AuLock_IR); -+ hi_wh = au_hi_wh(inode, bstart); -+ if (au_opt_test(au_mntflags(sb), PLINK) -+ && au_plink_test(inode) -+ && !d_unhashed(dentry)) { -+ err = au_test_and_cpup_dirs(dentry, bstart); -+ if (unlikely(err)) -+ goto out_unlock; ++ au_rw_init_wlock(&finfo->fi_rwsem); ++ finfo->fi_bstart = -1; ++ finfo->fi_bend = -1; ++ atomic_set(&finfo->fi_generation, au_digen(dentry)); ++ /* smp_mb(); */ /* atomic_set */ + -+ /* always superio. */ -+ err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (!err) -+ err = au_sio_cpup_simple(dentry, bstart, -1, -+ AuCpup_DTIME); -+ au_unpin(&pin); -+ } else if (hi_wh) { -+ /* already copied-up after unlink */ -+ err = au_reopen_wh(file, bstart, hi_wh); -+ *need_reopen = 0; -+ } ++ /* cf. au_store_oflag() */ ++ /* suppress a warning in lp64 */ ++ ul = (unsigned long)file->private_data; ++ file->f_mode |= (vfsub_uint_to_fmode(ul) & FMODE_EXEC); ++ file->private_data = finfo; ++ return 0; /* success */ + -+ out_unlock: -+ di_read_unlock(parent, AuLock_IR); -+ out_parent: -+ dput(parent); ++ out_finfo: ++ au_cache_free_finfo(finfo); + out: -+ return err; ++ return -ENOMEM; +} + -+static void au_do_refresh_file(struct file *file) ++int au_fi_realloc(struct au_finfo *finfo, int nbr) +{ -+ aufs_bindex_t bindex, bend, new_bindex, brid; -+ struct au_hfile *p, tmp, *q; -+ struct au_finfo *finfo; -+ struct super_block *sb; -+ -+ sb = file->f_dentry->d_sb; -+ finfo = au_fi(file); -+ p = finfo->fi_hfile + finfo->fi_bstart; -+ brid = p->hf_br->br_id; -+ bend = finfo->fi_bend; -+ for (bindex = finfo->fi_bstart; bindex <= bend; bindex++, p++) { -+ if (!p->hf_file) -+ continue; -+ -+ new_bindex = au_br_index(sb, p->hf_br->br_id); -+ if (new_bindex == bindex) -+ continue; -+ if (new_bindex < 0) { -+ au_set_h_fptr(file, bindex, NULL); -+ continue; -+ } -+ -+ /* swap two lower inode, and loop again */ -+ q = finfo->fi_hfile + new_bindex; -+ tmp = *q; -+ *q = *p; -+ *p = tmp; -+ if (tmp.hf_file) { -+ bindex--; -+ p--; -+ } -+ } ++ int err, sz; ++ struct au_hfile *hfp; + -+ p = finfo->fi_hfile; -+ if (!au_test_mmapped(file) && !d_unhashed(file->f_dentry)) { -+ bend = au_sbend(sb); -+ for (finfo->fi_bstart = 0; finfo->fi_bstart <= bend; -+ finfo->fi_bstart++, p++) -+ if (p->hf_file) { -+ if (p->hf_file->f_dentry -+ && p->hf_file->f_dentry->d_inode) -+ break; -+ else -+ au_hfput(p, file); -+ } -+ } else { -+ bend = au_br_index(sb, brid); -+ for (finfo->fi_bstart = 0; finfo->fi_bstart < bend; -+ finfo->fi_bstart++, p++) -+ if (p->hf_file) -+ au_hfput(p, file); -+ bend = au_sbend(sb); ++ err = -ENOMEM; ++ sz = sizeof(*hfp) * (finfo->fi_bend + 1); ++ if (!sz) ++ sz = sizeof(*hfp); ++ hfp = au_kzrealloc(finfo->fi_hfile, sz, sizeof(*hfp) * nbr, GFP_NOFS); ++ if (hfp) { ++ finfo->fi_hfile = hfp; ++ err = 0; + } + -+ p = finfo->fi_hfile + bend; -+ for (finfo->fi_bend = bend; finfo->fi_bend >= finfo->fi_bstart; -+ finfo->fi_bend--, p--) -+ if (p->hf_file) { -+ if (p->hf_file->f_dentry -+ && p->hf_file->f_dentry->d_inode) -+ break; -+ else -+ au_hfput(p, file); -+ } -+ AuDebugOn(finfo->fi_bend < finfo->fi_bstart); ++ return err; +} +diff -urN linux-2.6.30.org/fs/aufs/f_op.c linux-2.6.30/fs/aufs/f_op.c +--- linux-2.6.30.org/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/f_op.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,801 @@ ++/* ++ * Copyright (C) 2005-2009 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + +/* -+ * after branch manipulating, refresh the file. ++ * file and vm operations + */ -+static int refresh_file(struct file *file, int (*reopen)(struct file *file)) ++ ++#include ++#include ++#include ++#include ++#include "aufs.h" ++ ++/* common function to regular file and dir */ ++int aufs_flush(struct file *file, fl_owner_t id) +{ -+ int err, need_reopen; ++ int err; ++ aufs_bindex_t bindex, bend; + struct dentry *dentry; -+ aufs_bindex_t bend, bindex; ++ struct file *h_file; + + dentry = file->f_dentry; -+ err = au_fi_realloc(au_fi(file), au_sbend(dentry->d_sb) + 1); -+ if (unlikely(err)) -+ goto out; -+ au_do_refresh_file(file); ++ si_noflush_read_lock(dentry->d_sb); ++ fi_read_lock(file); ++ di_read_lock_child(dentry, AuLock_IW); + + err = 0; -+ need_reopen = 1; -+ if (!au_test_mmapped(file)) -+ err = au_file_refresh_by_inode(file, &need_reopen); -+ if (!err && need_reopen && !d_unhashed(dentry)) -+ err = reopen(file); -+ if (!err) { -+ au_update_figen(file); -+ return 0; /* success */ -+ } -+ -+ /* error, close all lower files */ + bend = au_fbend(file); -+ for (bindex = au_fbstart(file); bindex <= bend; bindex++) -+ au_set_h_fptr(file, bindex, NULL); ++ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) { ++ h_file = au_h_fptr(file, bindex); ++ if (!h_file || !h_file->f_op || !h_file->f_op->flush) ++ continue; + -+ out: ++ err = h_file->f_op->flush(h_file, id); ++ if (!err) ++ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); ++ /*ignore*/ ++ } ++ au_cpup_attr_timesizes(dentry->d_inode); ++ ++ di_read_unlock(dentry, AuLock_IW); ++ fi_read_unlock(file); ++ si_read_unlock(dentry->d_sb); + return err; +} + -+/* common function to regular file and dir */ -+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file), -+ int wlock) ++/* ---------------------------------------------------------------------- */ ++ ++static int do_open_nondir(struct file *file, int flags) +{ + int err; -+ unsigned int sigen, figen; -+ aufs_bindex_t bstart; -+ unsigned char pseudo_link; ++ aufs_bindex_t bindex; ++ struct file *h_file; + struct dentry *dentry; ++ struct au_finfo *finfo; + + err = 0; + dentry = file->f_dentry; -+ sigen = au_sigen(dentry->d_sb); -+ fi_write_lock(file); -+ figen = au_figen(file); -+ di_write_lock_child(dentry); -+ bstart = au_dbstart(dentry); -+ pseudo_link = (bstart != au_ibstart(dentry->d_inode)); -+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) { -+ if (!wlock) { -+ di_downgrade_lock(dentry, AuLock_IR); -+ fi_downgrade_lock(file); -+ } -+ goto out; /* success */ -+ } -+ -+ AuDbg("sigen %d, figen %d\n", sigen, figen); -+ if (sigen != au_digen(dentry) -+ || sigen != au_iigen(dentry->d_inode)) { -+ err = au_reval_dpath(dentry, sigen); -+ if (unlikely(err < 0)) -+ goto out; -+ AuDebugOn(au_digen(dentry) != sigen -+ || au_iigen(dentry->d_inode) != sigen); -+ } ++ finfo = au_fi(file); ++ finfo->fi_h_vm_ops = NULL; ++ finfo->fi_vm_ops = NULL; ++ bindex = au_dbstart(dentry); ++ /* O_TRUNC is processed already */ ++ BUG_ON(au_test_ro(dentry->d_sb, bindex, dentry->d_inode) ++ && (flags & O_TRUNC)); + -+ err = refresh_file(file, reopen); -+ if (!err) { -+ if (!wlock) { -+ di_downgrade_lock(dentry, AuLock_IR); -+ fi_downgrade_lock(file); -+ } -+ } else { -+ di_write_unlock(dentry); -+ fi_write_unlock(file); ++ h_file = au_h_open(dentry, bindex, flags, file); ++ if (IS_ERR(h_file)) ++ err = PTR_ERR(h_file); ++ else { ++ au_set_fbstart(file, bindex); ++ au_set_fbend(file, bindex); ++ au_set_h_fptr(file, bindex, h_file); ++ au_update_figen(file); ++ /* todo: necessary? */ ++ /* file->f_ra = h_file->f_ra; */ + } -+ -+ out: + return err; +} + -+/* ---------------------------------------------------------------------- */ ++static int aufs_open_nondir(struct inode *inode __maybe_unused, ++ struct file *file) ++{ ++ return au_do_open(file, do_open_nondir); ++} + -+/* cf. aufs_nopage() */ -+/* for madvise(2) */ -+static int aufs_readpage(struct file *file __maybe_unused, struct page *page) ++static int aufs_release_nondir(struct inode *inode __maybe_unused, ++ struct file *file) +{ -+ unlock_page(page); ++ struct super_block *sb = file->f_dentry->d_sb; ++ ++ si_noflush_read_lock(sb); ++ kfree(au_fi(file)->fi_vm_ops); ++ au_finfo_fin(file); ++ si_read_unlock(sb); + return 0; +} + -+/* they will never be called. */ -+#ifdef CONFIG_AUFS_DEBUG -+static int aufs_write_begin(struct file *file, struct address_space *mapping, -+ loff_t pos, unsigned len, unsigned flags, -+ struct page **pagep, void **fsdata) -+{ AuUnsupport(); return 0; } -+static int aufs_write_end(struct file *file, struct address_space *mapping, -+ loff_t pos, unsigned len, unsigned copied, -+ struct page *page, void *fsdata) -+{ AuUnsupport(); return 0; } -+static int aufs_writepage(struct page *page, struct writeback_control *wbc) -+{ AuUnsupport(); return 0; } -+static void aufs_sync_page(struct page *page) -+{ AuUnsupport(); } ++/* ---------------------------------------------------------------------- */ + -+static int aufs_set_page_dirty(struct page *page) -+{ AuUnsupport(); return 0; } -+static void aufs_invalidatepage(struct page *page, unsigned long offset) -+{ AuUnsupport(); } -+static int aufs_releasepage(struct page *page, gfp_t gfp) -+{ AuUnsupport(); return 0; } -+static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb, -+ const struct iovec *iov, loff_t offset, -+ unsigned long nr_segs) -+{ AuUnsupport(); return 0; } -+#endif /* CONFIG_AUFS_DEBUG */ ++static ssize_t aufs_read(struct file *file, char __user *buf, size_t count, ++ loff_t *ppos) ++{ ++ ssize_t err; ++ struct dentry *dentry; ++ struct file *h_file; ++ struct super_block *sb; + -+struct address_space_operations aufs_aop = { -+ .readpage = aufs_readpage, -+#ifdef CONFIG_AUFS_DEBUG -+ .writepage = aufs_writepage, -+ .sync_page = aufs_sync_page, -+ .set_page_dirty = aufs_set_page_dirty, -+ .write_begin = aufs_write_begin, -+ .write_end = aufs_write_end, -+ .invalidatepage = aufs_invalidatepage, -+ .releasepage = aufs_releasepage, -+ .direct_IO = aufs_direct_IO, -+#endif /* CONFIG_AUFS_DEBUG */ -+}; -diff --git a/fs/aufs/file.h b/fs/aufs/file.h -new file mode 100644 -index 0000000..dd9ee61 ---- /dev/null -+++ b/fs/aufs/file.h -@@ -0,0 +1,148 @@ -+/* -+ * Copyright (C) 2005-2009 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ */ -+ -+/* -+ * file operations -+ */ -+ -+#ifndef __AUFS_FILE_H__ -+#define __AUFS_FILE_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include -+#include -+#include "rwsem.h" ++ dentry = file->f_dentry; ++ sb = dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); ++ if (unlikely(err)) ++ goto out; + -+struct au_branch; -+struct au_hfile { -+ struct file *hf_file; -+ struct au_branch *hf_br; -+}; ++ h_file = au_h_fptr(file, au_fbstart(file)); ++ err = vfsub_read_u(h_file, buf, count, ppos); ++ /* todo: necessary? */ ++ /* file->f_ra = h_file->f_ra; */ ++ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode); + -+struct au_vdir; -+struct au_finfo { -+ atomic_t fi_generation; ++ di_read_unlock(dentry, AuLock_IR); ++ fi_read_unlock(file); ++ out: ++ si_read_unlock(sb); ++ return err; ++} + -+ struct rw_semaphore fi_rwsem; -+ struct au_hfile *fi_hfile; -+ aufs_bindex_t fi_bstart, fi_bend; ++static ssize_t aufs_write(struct file *file, const char __user *ubuf, ++ size_t count, loff_t *ppos) ++{ ++ ssize_t err; ++ aufs_bindex_t bstart; ++ struct au_pin pin; ++ struct dentry *dentry; ++ struct inode *inode; ++ struct super_block *sb; ++ struct file *h_file; ++ char __user *buf = (char __user *)ubuf; + -+ union { -+ /* non-dir only */ -+ struct vm_operations_struct *fi_h_vm_ops; ++ dentry = file->f_dentry; ++ sb = dentry->d_sb; ++ inode = dentry->d_inode; ++ mutex_lock(&inode->i_mutex); ++ si_read_lock(sb, AuLock_FLUSH); + -+ /* dir only */ -+ struct { -+ struct au_vdir *fi_vdir_cache; -+ int fi_maintain_plink; -+ }; -+ }; -+}; ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); ++ if (unlikely(err)) ++ goto out; + -+/* ---------------------------------------------------------------------- */ ++ err = au_ready_to_write(file, -1, &pin); ++ di_downgrade_lock(dentry, AuLock_IR); ++ if (unlikely(err)) ++ goto out_unlock; + -+/* file.c */ -+extern struct address_space_operations aufs_aop; -+void au_store_oflag(struct nameidata *nd, struct inode *inode); -+unsigned int au_file_roflags(unsigned int flags); -+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags, -+ struct file *file); -+int au_do_open(struct file *file, int (*open)(struct file *file, int flags)); -+int au_reopen_nondir(struct file *file); -+struct au_pin; -+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin); -+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file), -+ int wlock); ++ bstart = au_fbstart(file); ++ h_file = au_h_fptr(file, bstart); ++ au_unpin(&pin); ++ err = vfsub_write_u(h_file, buf, count, ppos); ++ au_cpup_attr_timesizes(inode); ++ inode->i_mode = h_file->f_dentry->d_inode->i_mode; + -+/* f_op.c */ -+extern const struct file_operations aufs_file_fop; -+int aufs_flush(struct file *file, fl_owner_t id); ++ out_unlock: ++ di_read_unlock(dentry, AuLock_IR); ++ fi_write_unlock(file); ++ out: ++ si_read_unlock(sb); ++ mutex_unlock(&inode->i_mutex); ++ return err; ++} + -+/* finfo.c */ -+void au_hfput(struct au_hfile *hf, struct file *file); -+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, -+ struct file *h_file); ++static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov, ++ unsigned long nv, loff_t pos) ++{ ++ ssize_t err; ++ struct file *file, *h_file; ++ struct dentry *dentry; ++ struct super_block *sb; + -+void au_update_figen(struct file *file); ++ file = kio->ki_filp; ++ dentry = file->f_dentry; ++ sb = dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); ++ if (unlikely(err)) ++ goto out; + -+void au_finfo_fin(struct file *file); -+int au_finfo_init(struct file *file); -+int au_fi_realloc(struct au_finfo *finfo, int nbr); ++ err = -ENOSYS; ++ h_file = au_h_fptr(file, au_fbstart(file)); ++ if (h_file->f_op && h_file->f_op->aio_read) { ++ err = security_file_permission(h_file, MAY_READ); ++ if (unlikely(err)) ++ goto out_unlock; ++ if (!is_sync_kiocb(kio)) { ++ get_file(h_file); ++ fput(file); ++ } ++ kio->ki_filp = h_file; ++ err = h_file->f_op->aio_read(kio, iov, nv, pos); ++ /* todo: necessary? */ ++ /* file->f_ra = h_file->f_ra; */ ++ fsstack_copy_attr_atime(dentry->d_inode, ++ h_file->f_dentry->d_inode); ++ } else ++ /* currently there is no such fs */ ++ WARN_ON_ONCE(h_file->f_op && h_file->f_op->read); + -+/* ---------------------------------------------------------------------- */ ++ out_unlock: ++ di_read_unlock(dentry, AuLock_IR); ++ fi_read_unlock(file); ++ out: ++ si_read_unlock(sb); ++ return err; ++} + -+static inline struct au_finfo *au_fi(struct file *file) ++static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov, ++ unsigned long nv, loff_t pos) +{ -+ return file->private_data; -+} ++ ssize_t err; ++ aufs_bindex_t bstart; ++ struct au_pin pin; ++ struct dentry *dentry; ++ struct inode *inode; ++ struct super_block *sb; ++ struct file *file, *h_file; + -+/* ---------------------------------------------------------------------- */ ++ file = kio->ki_filp; ++ dentry = file->f_dentry; ++ sb = dentry->d_sb; ++ inode = dentry->d_inode; ++ mutex_lock(&inode->i_mutex); ++ si_read_lock(sb, AuLock_FLUSH); + -+/* -+ * fi_read_lock, fi_write_lock, -+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock -+ */ -+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem); ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); ++ if (unlikely(err)) ++ goto out; + -+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem) ++ err = au_ready_to_write(file, -1, &pin); ++ di_downgrade_lock(dentry, AuLock_IR); ++ if (unlikely(err)) ++ goto out_unlock; + -+/* ---------------------------------------------------------------------- */ ++ err = -ENOSYS; ++ bstart = au_fbstart(file); ++ h_file = au_h_fptr(file, bstart); ++ au_unpin(&pin); ++ if (h_file->f_op && h_file->f_op->aio_write) { ++ err = security_file_permission(h_file, MAY_WRITE); ++ if (unlikely(err)) ++ goto out_unlock; ++ if (!is_sync_kiocb(kio)) { ++ get_file(h_file); ++ fput(file); ++ } ++ kio->ki_filp = h_file; ++ err = h_file->f_op->aio_write(kio, iov, nv, pos); ++ au_cpup_attr_timesizes(inode); ++ inode->i_mode = h_file->f_dentry->d_inode->i_mode; ++ } else ++ /* currently there is no such fs */ ++ WARN_ON_ONCE(h_file->f_op && h_file->f_op->write); + -+/* todo: hard/soft set? */ -+static inline aufs_bindex_t au_fbstart(struct file *file) -+{ -+ return au_fi(file)->fi_bstart; ++ out_unlock: ++ di_read_unlock(dentry, AuLock_IR); ++ fi_write_unlock(file); ++ out: ++ si_read_unlock(sb); ++ mutex_unlock(&inode->i_mutex); ++ return err; +} + -+static inline aufs_bindex_t au_fbend(struct file *file) ++static ssize_t aufs_splice_read(struct file *file, loff_t *ppos, ++ struct pipe_inode_info *pipe, size_t len, ++ unsigned int flags) +{ -+ return au_fi(file)->fi_bend; -+} ++ ssize_t err; ++ struct file *h_file; ++ struct dentry *dentry; ++ struct super_block *sb; + -+static inline struct au_vdir *au_fvdir_cache(struct file *file) -+{ -+ return au_fi(file)->fi_vdir_cache; -+} ++ dentry = file->f_dentry; ++ sb = dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); ++ if (unlikely(err)) ++ goto out; + -+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex) -+{ -+ au_fi(file)->fi_bstart = bindex; -+} ++ err = -EINVAL; ++ h_file = au_h_fptr(file, au_fbstart(file)); ++ if (au_test_loopback_kthread()) { ++ file->f_mapping = h_file->f_mapping; ++ smp_mb(); /* unnecessary? */ ++ } ++ err = vfsub_splice_to(h_file, ppos, pipe, len, flags); ++ /* todo: necessasry? */ ++ /* file->f_ra = h_file->f_ra; */ ++ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode); + -+static inline void au_set_fbend(struct file *file, aufs_bindex_t bindex) -+{ -+ au_fi(file)->fi_bend = bindex; -+} ++ di_read_unlock(dentry, AuLock_IR); ++ fi_read_unlock(file); + -+static inline void au_set_fvdir_cache(struct file *file, -+ struct au_vdir *vdir_cache) -+{ -+ au_fi(file)->fi_vdir_cache = vdir_cache; ++ out: ++ si_read_unlock(sb); ++ return err; +} + -+static inline struct file *au_h_fptr(struct file *file, aufs_bindex_t bindex) -+{ -+ return au_fi(file)->fi_hfile[0 + bindex].hf_file; -+} -+ -+/* todo: memory barrier? */ -+static inline unsigned int au_figen(struct file *f) ++static ssize_t ++aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos, ++ size_t len, unsigned int flags) +{ -+ return atomic_read(&au_fi(f)->fi_generation); -+} ++ ssize_t err; ++ struct au_pin pin; ++ struct dentry *dentry; ++ struct inode *inode; ++ struct super_block *sb; ++ struct file *h_file; + -+static inline int au_test_mmapped(struct file *f) -+{ -+ return !!(au_fi(f)->fi_h_vm_ops); -+} ++ dentry = file->f_dentry; ++ inode = dentry->d_inode; ++ mutex_lock(&inode->i_mutex); ++ sb = dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); + -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_FILE_H__ */ -diff --git a/fs/aufs/finfo.c b/fs/aufs/finfo.c -new file mode 100644 -index 0000000..dfb4851 ---- /dev/null -+++ b/fs/aufs/finfo.c -@@ -0,0 +1,124 @@ -+/* -+ * Copyright (C) 2005-2009 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ */ ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); ++ if (unlikely(err)) ++ goto out; + -+/* -+ * file private data -+ */ ++ err = au_ready_to_write(file, -1, &pin); ++ di_downgrade_lock(dentry, AuLock_IR); ++ if (unlikely(err)) ++ goto out_unlock; + -+#include "aufs.h" ++ h_file = au_h_fptr(file, au_fbstart(file)); ++ au_unpin(&pin); ++ err = vfsub_splice_from(pipe, h_file, ppos, len, flags); ++ au_cpup_attr_timesizes(inode); ++ inode->i_mode = h_file->f_dentry->d_inode->i_mode; + -+void au_hfput(struct au_hfile *hf, struct file *file) -+{ -+ if (file->f_mode & FMODE_EXEC) -+ allow_write_access(hf->hf_file); -+ fput(hf->hf_file); -+ hf->hf_file = NULL; -+ atomic_dec(&hf->hf_br->br_count); -+ hf->hf_br = NULL; ++ out_unlock: ++ di_read_unlock(dentry, AuLock_IR); ++ fi_write_unlock(file); ++ out: ++ si_read_unlock(sb); ++ mutex_unlock(&inode->i_mutex); ++ return err; +} + -+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val) ++/* ---------------------------------------------------------------------- */ ++ ++static struct file *au_safe_file(struct vm_area_struct *vma) +{ -+ struct au_finfo *finfo = au_fi(file); -+ struct au_hfile *hf; ++ struct file *file; + -+ hf = finfo->fi_hfile + bindex; -+ if (hf->hf_file) -+ au_hfput(hf, file); -+ if (val) { -+ hf->hf_file = val; -+ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex); -+ } ++ file = vma->vm_file; ++ if (file->private_data && au_test_aufs(file->f_dentry->d_sb)) ++ return file; ++ return NULL; +} + -+void au_update_figen(struct file *file) ++static void au_reset_file(struct vm_area_struct *vma, struct file *file) +{ -+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry)); -+ /* smp_mb(); */ /* atomic_set */ ++ vma->vm_file = file; ++ /* smp_mb(); */ /* flush vm_file */ +} + -+/* ---------------------------------------------------------------------- */ -+ -+void au_finfo_fin(struct file *file) ++static int aufs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ ++ int err; ++ static DECLARE_WAIT_QUEUE_HEAD(wq); ++ struct file *file, *h_file; + struct au_finfo *finfo; -+ aufs_bindex_t bindex, bend; + -+ fi_write_lock(file); -+ bend = au_fbend(file); -+ bindex = au_fbstart(file); -+ if (bindex >= 0) -+ /* -+ * calls fput() instead of filp_close(), -+ * since no dnotify or lock for the lower file. -+ */ -+ for (; bindex <= bend; bindex++) -+ au_set_h_fptr(file, bindex, NULL); ++ /* todo: non-robr mode, user vm_file as it is? */ ++ wait_event(wq, (file = au_safe_file(vma))); + ++ /* do not revalidate, no si lock */ + finfo = au_fi(file); -+ au_dbg_verify_hf(finfo); -+ kfree(finfo->fi_hfile); ++ h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file; ++ AuDebugOn(!h_file || !au_test_mmapped(file)); ++ ++ fi_write_lock(file); ++ vma->vm_file = h_file; ++ err = finfo->fi_h_vm_ops->fault(vma, vmf); ++ /* todo: necessary? */ ++ /* file->f_ra = h_file->f_ra; */ ++ au_reset_file(vma, file); + fi_write_unlock(file); -+ au_rwsem_destroy(&finfo->fi_rwsem); -+ au_cache_free_finfo(finfo); ++#if 0 /* def CONFIG_SMP */ ++ /* wake_up_nr(&wq, online_cpu - 1); */ ++ wake_up_all(&wq); ++#else ++ wake_up(&wq); ++#endif ++ ++ return err; +} + -+int au_finfo_init(struct file *file) ++static int aufs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +{ ++ int err; ++ static DECLARE_WAIT_QUEUE_HEAD(wq); ++ struct file *file, *h_file; + struct au_finfo *finfo; -+ struct dentry *dentry; -+ unsigned long ul; -+ -+ dentry = file->f_dentry; -+ finfo = au_cache_alloc_finfo(); -+ if (unlikely(!finfo)) -+ goto out; + -+ finfo->fi_hfile = kcalloc(au_sbend(dentry->d_sb) + 1, -+ sizeof(*finfo->fi_hfile), GFP_NOFS); -+ if (unlikely(!finfo->fi_hfile)) -+ goto out_finfo; ++ wait_event(wq, (file = au_safe_file(vma))); + -+ init_rwsem(&finfo->fi_rwsem); -+ down_write(&finfo->fi_rwsem); -+ finfo->fi_bstart = -1; -+ finfo->fi_bend = -1; -+ atomic_set(&finfo->fi_generation, au_digen(dentry)); -+ /* smp_mb(); */ /* atomic_set */ ++ finfo = au_fi(file); ++ h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file; ++ AuDebugOn(!h_file || !au_test_mmapped(file)); + -+ /* cf. au_store_oflag() */ -+ /* suppress a warning in lp64 */ -+ ul = (unsigned long)file->private_data; -+ file->f_mode |= (vfsub_uint_to_fmode(ul) & FMODE_EXEC); -+ file->private_data = finfo; -+ return 0; /* success */ ++ fi_write_lock(file); ++ vma->vm_file = h_file; ++ err = finfo->fi_h_vm_ops->page_mkwrite(vma, vmf); ++ au_reset_file(vma, file); ++ fi_write_unlock(file); ++ wake_up(&wq); + -+ out_finfo: -+ au_cache_free_finfo(finfo); -+ out: -+ return -ENOMEM; ++ return err; +} + -+int au_fi_realloc(struct au_finfo *finfo, int nbr) ++static void aufs_vm_close(struct vm_area_struct *vma) +{ -+ int err, sz; -+ struct au_hfile *hfp; ++ static DECLARE_WAIT_QUEUE_HEAD(wq); ++ struct file *file, *h_file; ++ struct au_finfo *finfo; + -+ err = -ENOMEM; -+ sz = sizeof(*hfp) * (finfo->fi_bend + 1); -+ if (!sz) -+ sz = sizeof(*hfp); -+ hfp = au_kzrealloc(finfo->fi_hfile, sz, sizeof(*hfp) * nbr, GFP_NOFS); -+ if (hfp) { -+ finfo->fi_hfile = hfp; -+ err = 0; -+ } ++ wait_event(wq, (file = au_safe_file(vma))); + -+ return err; ++ finfo = au_fi(file); ++ h_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file; ++ AuDebugOn(!h_file || !au_test_mmapped(file)); ++ ++ fi_write_lock(file); ++ vma->vm_file = h_file; ++ finfo->fi_h_vm_ops->close(vma); ++ au_reset_file(vma, file); ++ fi_write_unlock(file); ++ wake_up(&wq); +} -diff --git a/fs/aufs/fstype.h b/fs/aufs/fstype.h -new file mode 100644 -index 0000000..54896c6 ---- /dev/null -+++ b/fs/aufs/fstype.h -@@ -0,0 +1,464 @@ -+/* -+ * Copyright (C) 2005-2009 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ */ + -+/* -+ * judging filesystem type -+ */ ++static struct vm_operations_struct aufs_vm_ops = { ++ /* .close and .page_mkwrite are not set by default */ ++ .fault = aufs_fault, ++}; + -+#ifndef __AUFS_FSTYPE_H__ -+#define __AUFS_FSTYPE_H__ ++/* ---------------------------------------------------------------------- */ + -+#ifdef __KERNEL__ ++static struct vm_operations_struct *au_vm_ops(struct file *h_file, ++ struct vm_area_struct *vma) ++{ ++ struct vm_operations_struct *vm_ops; ++ int err; + -+#include -+#include -+#include -+#include -+#include ++ vm_ops = ERR_PTR(-ENODEV); ++ if (!h_file->f_op || !h_file->f_op->mmap) ++ goto out; + -+static inline int au_test_aufs(struct super_block *sb) -+{ -+ return sb->s_magic == AUFS_SUPER_MAGIC; -+} ++ err = h_file->f_op->mmap(h_file, vma); ++ vm_ops = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out; + -+static inline const char *au_sbtype(struct super_block *sb) -+{ -+ return sb->s_type->name; -+} ++ vm_ops = vma->vm_ops; ++ err = do_munmap(current->mm, vma->vm_start, ++ vma->vm_end - vma->vm_start); ++ if (unlikely(err)) { ++ AuIOErr("failed internal unmapping %.*s, %d\n", ++ AuDLNPair(h_file->f_dentry), err); ++ vm_ops = ERR_PTR(-EIO); ++ } + -+static inline int au_test_iso9660(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE) -+ return sb->s_magic == ROMFS_MAGIC; -+#else -+ return 0; -+#endif ++ out: ++ return vm_ops; +} + -+static inline int au_test_romfs(struct super_block *sb __maybe_unused) ++static int au_custom_vm_ops(struct au_finfo *finfo, struct vm_area_struct *vma) +{ -+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE) -+ return sb->s_magic == ISOFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ int err; ++ struct vm_operations_struct *h_ops; + -+static inline int au_test_cramfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE) -+ return sb->s_magic == CRAMFS_MAGIC; -+#endif -+ return 0; -+} ++ err = 0; ++ h_ops = finfo->fi_h_vm_ops; ++ AuDebugOn(!h_ops); ++ if ((!h_ops->page_mkwrite && !h_ops->close) ++ || finfo->fi_vm_ops) ++ goto out; + -+static inline int au_test_nfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE) -+ return sb->s_magic == NFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ err = -ENOMEM; ++ finfo->fi_vm_ops = kmemdup(&aufs_vm_ops, sizeof(aufs_vm_ops), GFP_NOFS); ++ if (unlikely(!finfo->fi_vm_ops)) ++ goto out; + -+static inline int au_test_fuse(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE) -+ return sb->s_magic == FUSE_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ err = 0; ++ if (h_ops->page_mkwrite) ++ finfo->fi_vm_ops->page_mkwrite = aufs_page_mkwrite; ++ if (h_ops->close) ++ finfo->fi_vm_ops->close = aufs_vm_close; + -+static inline int au_test_xfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE) -+ return sb->s_magic == XFS_SB_MAGIC; -+#else -+ return 0; -+#endif -+} ++ vma->vm_ops = finfo->fi_vm_ops; + -+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_TMPFS -+ return sb->s_magic == TMPFS_MAGIC; -+#else -+ return 0; -+#endif ++ out: ++ return err; +} + -+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused) ++static int aufs_mmap(struct file *file, struct vm_area_struct *vma) +{ -+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE) -+ return !strcmp(au_sbtype(sb), "ecryptfs"); -+#else -+ return 0; -+#endif -+} ++ int err; ++ unsigned char wlock, mmapped; ++ struct dentry *dentry; ++ struct super_block *sb; ++ struct file *h_file; ++ struct vm_operations_struct *vm_ops; + -+static inline int au_test_smbfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE) -+ return sb->s_magic == SMB_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ dentry = file->f_dentry; ++ mmapped = !!au_test_mmapped(file); /* can be harmless race condition */ ++ wlock = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED); ++ sb = dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, wlock | !mmapped); ++ if (unlikely(err)) ++ goto out; + -+static inline int au_test_ocfs2(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE) -+ return sb->s_magic == OCFS2_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ if (wlock) { ++ struct au_pin pin; + -+static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE) -+ return sb->s_magic == DLMFS_MAGIC; -+#else -+ return 0; -+#endif -+} ++ err = au_ready_to_write(file, -1, &pin); ++ di_downgrade_lock(dentry, AuLock_IR); ++ if (unlikely(err)) ++ goto out_unlock; ++ au_unpin(&pin); ++ } else if (!mmapped) ++ di_downgrade_lock(dentry, AuLock_IR); + -+static inline int au_test_coda(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE) -+ return sb->s_magic == CODA_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ h_file = au_h_fptr(file, au_fbstart(file)); ++ if (au_test_fs_bad_mapping(h_file->f_dentry->d_sb)) { ++ /* ++ * by this assignment, f_mapping will differs from aufs inode ++ * i_mapping. ++ * if someone else mixes the use of f_dentry->d_inode and ++ * f_mapping->host, then a problem may arise. ++ */ ++ file->f_mapping = h_file->f_mapping; ++ } + -+static inline int au_test_v9fs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE) -+ return sb->s_magic == V9FS_MAGIC; -+#else -+ return 0; -+#endif -+} ++ vm_ops = NULL; ++ if (!mmapped) { ++ vm_ops = au_vm_ops(h_file, vma); ++ err = PTR_ERR(vm_ops); ++ if (IS_ERR(vm_ops)) ++ goto out_unlock; ++ } + -+static inline int au_test_ext4(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE) -+ return sb->s_magic == EXT4_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ /* ++ * unnecessary to handle MAP_DENYWRITE and deny_write_access()? ++ * currently MAP_DENYWRITE from userspace is ignored, but elf loader ++ * sets it. when FMODE_EXEC is set (by open_exec() or sys_uselib()), ++ * both of the aufs file and the lower file is deny_write_access()-ed. ++ * finally I hope we can skip handlling MAP_DENYWRITE here. ++ */ ++ err = generic_file_mmap(file, vma); ++ if (unlikely(err)) ++ goto out_unlock; + -+static inline int au_test_sysv(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE) -+ return !strcmp(au_sbtype(sb), "sysv"); -+#else -+ return 0; -+#endif -+} ++ vma->vm_ops = &aufs_vm_ops; ++ /* test again */ ++ if (!au_test_mmapped(file)) ++ au_fi(file)->fi_h_vm_ops = vm_ops; + -+static inline int au_test_ramfs(struct super_block *sb) -+{ -+ return sb->s_magic == RAMFS_MAGIC; -+} ++ err = au_custom_vm_ops(au_fi(file), vma); ++ if (unlikely(err)) ++ goto out_unlock; + -+static inline int au_test_ubifs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE) -+ return sb->s_magic == UBIFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} ++ vfsub_file_accessed(h_file); ++ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode); + -+static inline int au_test_procfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_PROC_FS -+ return sb->s_magic == PROC_SUPER_MAGIC; -+#else -+ return 0; -+#endif ++ out_unlock: ++ di_read_unlock(dentry, AuLock_IR); ++ if (!wlock && mmapped) ++ fi_read_unlock(file); ++ else ++ fi_write_unlock(file); ++ out: ++ si_read_unlock(sb); ++ return err; +} + -+static inline int au_test_sysfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_SYSFS -+ return sb->s_magic == SYSFS_MAGIC; -+#else -+ return 0; -+#endif -+} ++/* ---------------------------------------------------------------------- */ + -+static inline int au_test_configfs(struct super_block *sb __maybe_unused) ++static int aufs_fsync_nondir(struct file *file, struct dentry *dentry, ++ int datasync) +{ -+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE) -+ return sb->s_magic == CONFIGFS_MAGIC; -+#else -+ return 0; -+#endif -+} ++ int err; ++ struct au_pin pin; ++ struct inode *inode; ++ struct file *h_file; ++ struct super_block *sb; + -+static inline int au_test_minix(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE) -+ return sb->s_magic == MINIX3_SUPER_MAGIC -+ || sb->s_magic == MINIX2_SUPER_MAGIC -+ || sb->s_magic == MINIX2_SUPER_MAGIC2 -+ || sb->s_magic == MINIX_SUPER_MAGIC -+ || sb->s_magic == MINIX_SUPER_MAGIC2; -+#else -+ return 0; -+#endif -+} ++ inode = dentry->d_inode; ++ IMustLock(file->f_mapping->host); ++ if (inode != file->f_mapping->host) { ++ mutex_unlock(&file->f_mapping->host->i_mutex); ++ mutex_lock(&inode->i_mutex); ++ } ++ IMustLock(inode); + -+static inline int au_test_cifs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE) -+ return sb->s_magic == CIFS_MAGIC_NUMBER; -+#else -+ return 0; -+#endif -+} ++ sb = dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); + -+static inline int au_test_fat(struct super_block *sb __maybe_unused) ++ err = 0; /* -EBADF; */ /* posix? */ ++ if (unlikely(!(file->f_mode & FMODE_WRITE))) ++ goto out; ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); ++ if (unlikely(err)) ++ goto out; ++ ++ err = au_ready_to_write(file, -1, &pin); ++ di_downgrade_lock(dentry, AuLock_IR); ++ if (unlikely(err)) ++ goto out_unlock; ++ au_unpin(&pin); ++ ++ err = -EINVAL; ++ h_file = au_h_fptr(file, au_fbstart(file)); ++ if (h_file->f_op && h_file->f_op->fsync) { ++ struct dentry *h_d; ++ struct mutex *h_mtx; ++ ++ /* ++ * no filemap_fdatawrite() since aufs file has no its own ++ * mapping, but dir. ++ */ ++ h_d = h_file->f_dentry; ++ h_mtx = &h_d->d_inode->i_mutex; ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ err = h_file->f_op->fsync(h_file, h_d, datasync); ++ if (!err) ++ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); ++ /*ignore*/ ++ au_cpup_attr_timesizes(inode); ++ mutex_unlock(h_mtx); ++ } ++ ++ out_unlock: ++ di_read_unlock(dentry, AuLock_IR); ++ fi_write_unlock(file); ++ out: ++ si_read_unlock(sb); ++ if (inode != file->f_mapping->host) { ++ mutex_unlock(&inode->i_mutex); ++ mutex_lock(&file->f_mapping->host->i_mutex); ++ } ++ return err; ++} ++ ++/* no one supports this operation, currently */ ++#if 0 ++static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync) +{ -+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE) -+ return sb->s_magic == MSDOS_SUPER_MAGIC; -+#else -+ return 0; ++ int err; ++ struct au_pin pin; ++ struct dentry *dentry; ++ struct inode *inode; ++ struct file *file, *h_file; ++ struct super_block *sb; ++ ++ file = kio->ki_filp; ++ dentry = file->f_dentry; ++ inode = dentry->d_inode; ++ mutex_lock(&inode->i_mutex); ++ ++ sb = dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ ++ err = 0; /* -EBADF; */ /* posix? */ ++ if (unlikely(!(file->f_mode & FMODE_WRITE))) ++ goto out; ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); ++ if (unlikely(err)) ++ goto out; ++ ++ err = au_ready_to_write(file, -1, &pin); ++ di_downgrade_lock(dentry, AuLock_IR); ++ if (unlikely(err)) ++ goto out_unlock; ++ au_unpin(&pin); ++ ++ err = -ENOSYS; ++ h_file = au_h_fptr(file, au_fbstart(file)); ++ if (h_file->f_op && h_file->f_op->aio_fsync) { ++ struct dentry *h_d; ++ struct mutex *h_mtx; ++ ++ h_d = h_file->f_dentry; ++ h_mtx = &h_d->d_inode->i_mutex; ++ if (!is_sync_kiocb(kio)) { ++ get_file(h_file); ++ fput(file); ++ } ++ kio->ki_filp = h_file; ++ err = h_file->f_op->aio_fsync(kio, datasync); ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ if (!err) ++ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); ++ /*ignore*/ ++ au_cpup_attr_timesizes(inode); ++ mutex_unlock(h_mtx); ++ } ++ ++ out_unlock: ++ di_read_unlock(dentry, AuLock_IR); ++ fi_write_unlock(file); ++ out: ++ si_read_unlock(sb); ++ mutex_unlock(&inode->i_mutex); ++ return err; ++} +#endif ++ ++static int aufs_fasync(int fd, struct file *file, int flag) ++{ ++ int err; ++ struct file *h_file; ++ struct dentry *dentry; ++ struct super_block *sb; ++ ++ dentry = file->f_dentry; ++ sb = dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); ++ if (unlikely(err)) ++ goto out; ++ ++ h_file = au_h_fptr(file, au_fbstart(file)); ++ if (h_file->f_op && h_file->f_op->fasync) ++ err = h_file->f_op->fasync(fd, h_file, flag); ++ ++ di_read_unlock(dentry, AuLock_IR); ++ fi_read_unlock(file); ++ ++ out: ++ si_read_unlock(sb); ++ return err; +} + -+static inline int au_test_msdos(struct super_block *sb) ++/* ---------------------------------------------------------------------- */ ++ ++/* no one supports this operation, currently */ ++#if 0 ++static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset, ++ size_t len, loff_t *pos , int more) +{ -+ return au_test_fat(sb); +} ++#endif + -+static inline int au_test_vfat(struct super_block *sb) ++/* ---------------------------------------------------------------------- */ ++ ++const struct file_operations aufs_file_fop = { ++ /* ++ * while generic_file_llseek/_unlocked() don't use BKL, ++ * don't use it since it operates file->f_mapping->host. ++ * in aufs, it may be a real file and may confuse users by UDBA. ++ */ ++ /* .llseek = generic_file_llseek, */ ++ ++ .read = aufs_read, ++ .write = aufs_write, ++ .aio_read = aufs_aio_read, ++ .aio_write = aufs_aio_write, ++#ifdef CONFIG_AUFS_POLL ++ .poll = aufs_poll, ++#endif ++ .mmap = aufs_mmap, ++ .open = aufs_open_nondir, ++ .flush = aufs_flush, ++ .release = aufs_release_nondir, ++ .fsync = aufs_fsync_nondir, ++ /* .aio_fsync = aufs_aio_fsync_nondir, */ ++ .fasync = aufs_fasync, ++ /* .sendpage = aufs_sendpage, */ ++ .splice_write = aufs_splice_write, ++ .splice_read = aufs_splice_read, ++#if 0 ++ .aio_splice_write = aufs_aio_splice_write, ++ .aio_splice_read = aufs_aio_splice_read ++#endif ++}; +diff -urN linux-2.6.30.org/fs/aufs/fstype.h linux-2.6.30/fs/aufs/fstype.h +--- linux-2.6.30.org/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/fstype.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,474 @@ ++/* ++ * Copyright (C) 2005-2009 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* ++ * judging filesystem type ++ */ ++ ++#ifndef __AUFS_FSTYPE_H__ ++#define __AUFS_FSTYPE_H__ ++ ++#ifdef __KERNEL__ ++ ++#include ++#include ++#include ++#include ++#include ++ ++static inline int au_test_aufs(struct super_block *sb) +{ -+ return au_test_fat(sb); ++ return sb->s_magic == AUFS_SUPER_MAGIC; +} + -+static inline int au_test_securityfs(struct super_block *sb __maybe_unused) ++static inline const char *au_sbtype(struct super_block *sb) +{ -+#ifdef CONFIG_SECURITYFS -+ return sb->s_magic == SECURITYFS_MAGIC; ++ return sb->s_type->name; ++} ++ ++static inline int au_test_iso9660(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE) ++ return sb->s_magic == ROMFS_MAGIC; +#else + return 0; +#endif +} + -+static inline int au_test_squashfs(struct super_block *sb __maybe_unused) ++static inline int au_test_romfs(struct super_block *sb __maybe_unused) +{ -+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE) -+ return sb->s_magic == SQUASHFS_MAGIC; ++#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE) ++ return sb->s_magic == ISOFS_SUPER_MAGIC; +#else + return 0; +#endif +} + -+static inline int au_test_btrfs(struct super_block *sb __maybe_unused) ++static inline int au_test_cramfs(struct super_block *sb __maybe_unused) +{ -+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE) -+ return sb->s_magic == BTRFS_SUPER_MAGIC; ++#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE) ++ return sb->s_magic == CRAMFS_MAGIC; ++#endif ++ return 0; ++} ++ ++static inline int au_test_nfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE) ++ return sb->s_magic == NFS_SUPER_MAGIC; +#else + return 0; +#endif +} + -+static inline int au_test_xenfs(struct super_block *sb __maybe_unused) ++static inline int au_test_fuse(struct super_block *sb __maybe_unused) +{ -+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE) -+ return sb->s_magic == XENFS_SUPER_MAGIC; ++#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE) ++ return sb->s_magic == FUSE_SUPER_MAGIC; +#else + return 0; +#endif +} + -+static inline int au_test_debugfs(struct super_block *sb __maybe_unused) ++static inline int au_test_xfs(struct super_block *sb __maybe_unused) +{ -+#ifdef CONFIG_DEBUG_FS -+ return sb->s_magic == DEBUGFS_MAGIC; ++#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE) ++ return sb->s_magic == XFS_SB_MAGIC; +#else + return 0; +#endif +} + -+/* ---------------------------------------------------------------------- */ -+/* -+ * they can't be an aufs branch. -+ */ -+static inline int au_test_fs_unsuppoted(struct super_block *sb) ++static inline int au_test_tmpfs(struct super_block *sb __maybe_unused) +{ -+ return -+#ifndef CONFIG_AUFS_BR_RAMFS -+ au_test_ramfs(sb) || ++#ifdef CONFIG_TMPFS ++ return sb->s_magic == TMPFS_MAGIC; ++#else ++ return 0; +#endif -+ au_test_procfs(sb) -+ || au_test_sysfs(sb) -+ || au_test_configfs(sb) -+ || au_test_debugfs(sb) -+ || au_test_securityfs(sb) -+ || au_test_xenfs(sb) -+ /* || !strcmp(au_sbtype(sb), "unionfs") */ -+ || au_test_aufs(sb); /* will be supported in next version */ +} + -+/* -+ * If the filesystem supports NFS-export, then it has to support NULL as -+ * a nameidata parameter for ->create(), ->lookup() and ->d_revalidate(). -+ * We can apply this principle when we handle a lower filesystem. ++static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE) ++ return !strcmp(au_sbtype(sb), "ecryptfs"); ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_smbfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_SMB_FS) || defined(CONFIG_SMB_FS_MODULE) ++ return sb->s_magic == SMB_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_ocfs2(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE) ++ return sb->s_magic == OCFS2_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE) ++ return sb->s_magic == DLMFS_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_coda(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE) ++ return sb->s_magic == CODA_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_v9fs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE) ++ return sb->s_magic == V9FS_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_ext4(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_EXT4DEV_FS) || defined(CONFIG_EXT4DEV_FS_MODULE) ++ return sb->s_magic == EXT4_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_sysv(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE) ++ return !strcmp(au_sbtype(sb), "sysv"); ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_ramfs(struct super_block *sb) ++{ ++ return sb->s_magic == RAMFS_MAGIC; ++} ++ ++static inline int au_test_ubifs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE) ++ return sb->s_magic == UBIFS_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_procfs(struct super_block *sb __maybe_unused) ++{ ++#ifdef CONFIG_PROC_FS ++ return sb->s_magic == PROC_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_sysfs(struct super_block *sb __maybe_unused) ++{ ++#ifdef CONFIG_SYSFS ++ return sb->s_magic == SYSFS_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_configfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE) ++ return sb->s_magic == CONFIGFS_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_minix(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE) ++ return sb->s_magic == MINIX3_SUPER_MAGIC ++ || sb->s_magic == MINIX2_SUPER_MAGIC ++ || sb->s_magic == MINIX2_SUPER_MAGIC2 ++ || sb->s_magic == MINIX_SUPER_MAGIC ++ || sb->s_magic == MINIX_SUPER_MAGIC2; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_cifs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE) ++ return sb->s_magic == CIFS_MAGIC_NUMBER; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_fat(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE) ++ return sb->s_magic == MSDOS_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_msdos(struct super_block *sb) ++{ ++ return au_test_fat(sb); ++} ++ ++static inline int au_test_vfat(struct super_block *sb) ++{ ++ return au_test_fat(sb); ++} ++ ++static inline int au_test_securityfs(struct super_block *sb __maybe_unused) ++{ ++#ifdef CONFIG_SECURITYFS ++ return sb->s_magic == SECURITYFS_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_squashfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE) ++ return sb->s_magic == SQUASHFS_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_btrfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE) ++ return sb->s_magic == BTRFS_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_xenfs(struct super_block *sb __maybe_unused) ++{ ++#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE) ++ return sb->s_magic == XENFS_SUPER_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_test_debugfs(struct super_block *sb __maybe_unused) ++{ ++#ifdef CONFIG_DEBUG_FS ++ return sb->s_magic == DEBUGFS_MAGIC; ++#else ++ return 0; ++#endif ++} ++ ++/* ---------------------------------------------------------------------- */ ++/* ++ * they can't be an aufs branch. ++ */ ++static inline int au_test_fs_unsuppoted(struct super_block *sb) ++{ ++ return ++#ifndef CONFIG_AUFS_BR_RAMFS ++ au_test_ramfs(sb) || ++#endif ++ au_test_procfs(sb) ++ || au_test_sysfs(sb) ++ || au_test_configfs(sb) ++ || au_test_debugfs(sb) ++ || au_test_securityfs(sb) ++ || au_test_xenfs(sb) ++ /* || !strcmp(au_sbtype(sb), "unionfs") */ ++ || au_test_aufs(sb); /* will be supported in next version */ ++} ++ ++/* ++ * If the filesystem supports NFS-export, then it has to support NULL as ++ * a nameidata parameter for ->create(), ->lookup() and ->d_revalidate(). ++ * We can apply this principle when we handle a lower filesystem. + */ +static inline int au_test_fs_null_nd(struct super_block *sb) +{ @@ -11287,7 +8824,7 @@ index 0000000..54896c6 +#ifdef CONFIG_AUFS_BR_RAMFS + || au_test_ramfs(sb) +#endif -+ ; ++ || au_test_ubifs(sb); +} + +/* @@ -11297,8 +8834,8 @@ index 0000000..54896c6 +{ + return au_test_nfs(sb) + || au_test_fuse(sb) ++ || au_test_ubifs(sb) + /* || au_test_cifs(sb) */ /* untested */ -+ /* || au_test_ubifs(sb) */ /* untested */ + ; +} + @@ -11307,7 +8844,8 @@ index 0000000..54896c6 + */ +static inline int au_test_fs_bad_mapping(struct super_block *sb) +{ -+ return au_test_fuse(sb); ++ return au_test_fuse(sb) ++ || au_test_ubifs(sb); +} + +/* temporary support for i#1 in cramfs */ @@ -11357,12 +8895,10 @@ index 0000000..54896c6 + +#endif /* __KERNEL__ */ +#endif /* __AUFS_FSTYPE_H__ */ -diff --git a/fs/aufs/hinotify.c b/fs/aufs/hinotify.c -new file mode 100644 -index 0000000..af8ebdb ---- /dev/null -+++ b/fs/aufs/hinotify.c -@@ -0,0 +1,746 @@ +diff -urN linux-2.6.30.org/fs/aufs/hinotify.c linux-2.6.30/fs/aufs/hinotify.c +--- linux-2.6.30.org/fs/aufs/hinotify.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/hinotify.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,755 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -11370,6 +8906,15 @@ index 0000000..af8ebdb + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -12109,12 +9654,10 @@ index 0000000..af8ebdb + if (au_cachep[AuCache_HINOTIFY]) + au_hin_destroy_cache(); +} -diff --git a/fs/aufs/i_op.c b/fs/aufs/i_op.c -new file mode 100644 -index 0000000..a64d6b0 ---- /dev/null -+++ b/fs/aufs/i_op.c -@@ -0,0 +1,862 @@ +diff -urN linux-2.6.30.org/fs/aufs/iinfo.c linux-2.6.30/fs/aufs/iinfo.c +--- linux-2.6.30.org/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/iinfo.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,271 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -12122,1498 +9665,1204 @@ index 0000000..a64d6b0 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* -+ * inode operations (except add/del/rename) ++ * inode private data + */ + -+#include -+#include -+#include +#include "aufs.h" + -+static int h_permission(struct inode *h_inode, int mask, -+ struct vfsmount *h_mnt, int brperm) ++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex) +{ -+ int err; -+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND)); ++ struct inode *h_inode; + -+ err = -EACCES; -+ if ((write_mask && IS_IMMUTABLE(h_inode)) -+ || ((mask & MAY_EXEC) -+ && S_ISREG(h_inode->i_mode) -+ && ((h_mnt->mnt_flags & MNT_NOEXEC) -+ || !(h_inode->i_mode & S_IXUGO)))) -+ goto out; ++ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode; ++ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); ++ return h_inode; ++} + -+ /* -+ * - skip the lower fs test in the case of write to ro branch. -+ * - nfs dir permission write check is optimized, but a policy for -+ * link/rename requires a real check. -+ */ -+ if ((write_mask && !au_br_writable(brperm)) -+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode) -+ && write_mask && !(mask & MAY_READ)) -+ || !h_inode->i_op->permission) { -+ /* AuLabel(generic_permission); */ -+ err = generic_permission(h_inode, mask, NULL); -+ } else { -+ /* AuLabel(h_inode->permission); */ -+ err = h_inode->i_op->permission(h_inode, mask); -+ AuTraceErr(err); -+ } ++/* todo: hard/soft set? */ ++void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex) ++{ ++ struct au_iinfo *iinfo = au_ii(inode); ++ struct inode *h_inode; + -+ if (!err) -+ err = devcgroup_inode_permission(h_inode, mask); -+ if (!err) -+ err = security_inode_permission -+ (h_inode, mask & (MAY_READ | MAY_WRITE | MAY_EXEC -+ | MAY_APPEND)); ++ iinfo->ii_bstart = bindex; ++ h_inode = iinfo->ii_hinode[bindex + 0].hi_inode; ++ if (h_inode) ++ au_cpup_igen(inode, h_inode); ++} + -+ out: -+ return err; ++void au_hiput(struct au_hinode *hinode) ++{ ++ au_hin_free(hinode); ++ dput(hinode->hi_whdentry); ++ iput(hinode->hi_inode); +} + -+static int aufs_permission(struct inode *inode, int mask) ++unsigned int au_hi_flags(struct inode *inode, int isdir) +{ -+ int err; -+ aufs_bindex_t bindex, bend; -+ const unsigned char isdir = !!S_ISDIR(inode->i_mode); -+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND)); -+ struct inode *h_inode; -+ struct super_block *sb; -+ struct au_branch *br; ++ unsigned int flags; ++ const unsigned int mnt_flags = au_mntflags(inode->i_sb); + -+ sb = inode->i_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ ii_read_lock_child(inode); ++ flags = 0; ++ if (au_opt_test(mnt_flags, XINO)) ++ au_fset_hi(flags, XINO); ++ if (isdir && au_opt_test(mnt_flags, UDBA_HINOTIFY)) ++ au_fset_hi(flags, HINOTIFY); ++ return flags; ++} + -+ if (!isdir || write_mask) { -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ AuDebugOn(!h_inode -+ || ((h_inode->i_mode & S_IFMT) -+ != (inode->i_mode & S_IFMT))); -+ err = 0; -+ bindex = au_ibstart(inode); -+ br = au_sbr(sb, bindex); -+ err = h_permission(h_inode, mask, br->br_mnt, br->br_perm); ++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, ++ struct inode *h_inode, unsigned int flags) ++{ ++ struct au_hinode *hinode; ++ struct inode *hi; ++ struct au_iinfo *iinfo = au_ii(inode); + -+ if (write_mask && !err) { -+ /* test whether the upper writable branch exists */ -+ err = -EROFS; -+ for (; bindex >= 0; bindex--) -+ if (!au_br_rdonly(au_sbr(sb, bindex))) { -+ err = 0; -+ break; -+ } ++ hinode = iinfo->ii_hinode + bindex; ++ hi = hinode->hi_inode; ++ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); ++ AuDebugOn(h_inode && hi); ++ ++ if (hi) ++ au_hiput(hinode); ++ hinode->hi_inode = h_inode; ++ if (h_inode) { ++ int err; ++ struct super_block *sb = inode->i_sb; ++ struct au_branch *br; ++ ++ if (bindex == iinfo->ii_bstart) ++ au_cpup_igen(inode, h_inode); ++ br = au_sbr(sb, bindex); ++ hinode->hi_id = br->br_id; ++ if (au_ftest_hi(flags, XINO)) { ++ err = au_xino_write(sb, bindex, h_inode->i_ino, ++ inode->i_ino); ++ if (unlikely(err)) ++ AuIOErr1("failed au_xino_write() %d\n", err); + } -+ goto out; -+ } + -+ /* non-write to dir */ -+ err = 0; -+ bend = au_ibend(inode); -+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) { -+ h_inode = au_h_iptr(inode, bindex); -+ if (h_inode) { -+ AuDebugOn(!S_ISDIR(h_inode->i_mode)); -+ br = au_sbr(sb, bindex); -+ err = h_permission(h_inode, mask, br->br_mnt, -+ br->br_perm); ++ if (au_ftest_hi(flags, HINOTIFY) ++ && au_br_hinotifyable(br->br_perm)) { ++ err = au_hin_alloc(hinode, inode, h_inode); ++ if (unlikely(err)) ++ AuIOErr1("au_hin_alloc() %d\n", err); + } + } ++} + -+ out: -+ ii_read_unlock(inode); -+ si_read_unlock(sb); -+ return err; ++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, ++ struct dentry *h_wh) ++{ ++ struct au_hinode *hinode; ++ ++ hinode = au_ii(inode)->ii_hinode + bindex; ++ AuDebugOn(hinode->hi_whdentry); ++ hinode->hi_whdentry = h_wh; +} + -+/* ---------------------------------------------------------------------- */ ++void au_update_iigen(struct inode *inode) ++{ ++ atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb)); ++ /* smp_mb(); */ /* atomic_set */ ++} + -+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry, -+ struct nameidata *nd) ++/* it may be called at remount time, too */ ++void au_update_brange(struct inode *inode, int do_put_zero) +{ -+ struct dentry *ret, *parent; -+ struct inode *inode, *h_inode; -+ struct mutex *mtx; -+ struct super_block *sb; -+ int err, npositive; -+ aufs_bindex_t bstart; ++ struct au_iinfo *iinfo; + -+ /* temporary workaround for a bug in NFSD readdir */ -+ if (!au_test_nfsd(current)) -+ IMustLock(dir); -+ else -+ WARN_ONCE(!mutex_is_locked(&dir->i_mutex), -+ "a known problem of NFSD readdir since 2.6.28\n"); ++ iinfo = au_ii(inode); ++ if (!iinfo || iinfo->ii_bstart < 0) ++ return; + -+ sb = dir->i_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ err = au_alloc_dinfo(dentry); -+ ret = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; ++ if (do_put_zero) { ++ aufs_bindex_t bindex; + -+ parent = dentry->d_parent; /* dir inode is locked */ -+ di_read_lock_parent(parent, AuLock_IR); -+ npositive = au_lkup_dentry(dentry, au_dbstart(parent), /*type*/0, nd); -+ di_read_unlock(parent, AuLock_IR); -+ err = npositive; -+ ret = ERR_PTR(err); -+ if (unlikely(err < 0)) -+ goto out_unlock; ++ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; ++ bindex++) { ++ struct inode *h_i; + -+ inode = NULL; -+ if (npositive) { -+ bstart = au_dbstart(dentry); -+ h_inode = au_h_dptr(dentry, bstart)->d_inode; -+ if (!S_ISDIR(h_inode->i_mode)) { -+ /* -+ * stop 'race'-ing between hardlinks under different -+ * parents. -+ */ -+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx; -+ mutex_lock(mtx); -+ inode = au_new_inode(dentry, /*must_new*/0); -+ mutex_unlock(mtx); -+ } else -+ inode = au_new_inode(dentry, /*must_new*/0); -+ ret = (void *)inode; ++ h_i = iinfo->ii_hinode[0 + bindex].hi_inode; ++ if (h_i && !h_i->i_nlink) ++ au_set_h_iptr(inode, bindex, NULL, 0); ++ } + } -+ if (IS_ERR(inode)) -+ goto out_unlock; + -+ ret = d_splice_alias(inode, dentry); -+ if (unlikely(IS_ERR(ret) && inode)) -+ ii_write_unlock(inode); -+ au_store_oflag(nd, inode); ++ iinfo->ii_bstart = -1; ++ while (++iinfo->ii_bstart <= iinfo->ii_bend) ++ if (iinfo->ii_hinode[0 + iinfo->ii_bstart].hi_inode) ++ break; ++ if (iinfo->ii_bstart > iinfo->ii_bend) { ++ iinfo->ii_bstart = -1; ++ iinfo->ii_bend = -1; ++ return; ++ } + -+ out_unlock: -+ di_write_unlock(dentry); -+ out: -+ si_read_unlock(sb); -+ return ret; ++ iinfo->ii_bend++; ++ while (0 <= --iinfo->ii_bend) ++ if (iinfo->ii_hinode[0 + iinfo->ii_bend].hi_inode) ++ break; ++ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend || iinfo->ii_bend < 0); +} + +/* ---------------------------------------------------------------------- */ + -+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent, -+ const unsigned char add_entry, aufs_bindex_t bcpup, -+ aufs_bindex_t bstart) ++int au_iinfo_init(struct inode *inode) +{ -+ int err; -+ struct dentry *h_parent; -+ struct inode *h_dir; ++ struct au_iinfo *iinfo; ++ struct super_block *sb; ++ int nbr, i; + -+ if (add_entry) { -+ au_update_dbstart(dentry); -+ IMustLock(parent->d_inode); -+ } else -+ di_write_lock_parent(parent); ++ sb = inode->i_sb; ++ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); ++ nbr = au_sbend(sb) + 1; ++ if (unlikely(nbr <= 0)) ++ nbr = 1; ++ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS); ++ if (iinfo->ii_hinode) { ++ for (i = 0; i < nbr; i++) ++ iinfo->ii_hinode[i].hi_id = -1; + -+ err = 0; -+ if (!au_h_dptr(parent, bcpup)) { -+ if (bstart < bcpup) -+ err = au_cpdown_dirs(dentry, bcpup); -+ else -+ err = au_cpup_dirs(dentry, bcpup); -+ } -+ if (!err && add_entry) { -+ h_parent = au_h_dptr(parent, bcpup); -+ h_dir = h_parent->d_inode; -+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT); -+ err = au_lkup_neg(dentry, bcpup); -+ /* todo: no unlock here */ -+ mutex_unlock(&h_dir->i_mutex); -+ if (bstart < bcpup && au_dbstart(dentry) < 0) { -+ au_set_dbstart(dentry, 0); -+ au_update_dbrange(dentry, /*do_put_zero*/0); -+ } ++ atomic_set(&iinfo->ii_generation, au_sigen(sb)); ++ /* smp_mb(); */ /* atomic_set */ ++ au_rw_init(&iinfo->ii_rwsem); ++ iinfo->ii_bstart = -1; ++ iinfo->ii_bend = -1; ++ iinfo->ii_vdir = NULL; ++ return 0; + } ++ return -ENOMEM; ++} + -+ if (!add_entry) -+ di_write_unlock(parent); -+ if (!err) -+ err = bcpup; /* success */ ++int au_ii_realloc(struct au_iinfo *iinfo, int nbr) ++{ ++ int err, sz; ++ struct au_hinode *hip; ++ ++ err = -ENOMEM; ++ sz = sizeof(*hip) * (iinfo->ii_bend + 1); ++ if (!sz) ++ sz = sizeof(*hip); ++ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS); ++ if (hip) { ++ iinfo->ii_hinode = hip; ++ err = 0; ++ } + + return err; +} + -+/* -+ * decide the branch and the parent dir where we will create a new entry. -+ * returns new bindex or an error. -+ * copyup the parent dir if needed. -+ */ -+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, -+ struct au_wr_dir_args *args) ++static int au_iinfo_write0(struct super_block *sb, struct au_hinode *hinode, ++ ino_t ino) +{ + int err; -+ aufs_bindex_t bcpup, bstart, src_bstart; -+ const unsigned char add_entry = !!au_ftest_wrdir(args->flags, -+ ADD_ENTRY); ++ aufs_bindex_t bindex; ++ unsigned char locked; ++ ++ err = 0; ++ locked = !!si_noflush_read_trylock(sb); ++ bindex = au_br_index(sb, hinode->hi_id); ++ if (bindex >= 0) ++ err = au_xino_write0(sb, bindex, hinode->hi_inode->i_ino, ino); ++ /* error action? */ ++ if (locked) ++ si_read_unlock(sb); ++ return err; ++} ++ ++void au_iinfo_fin(struct inode *inode) ++{ ++ ino_t ino; ++ aufs_bindex_t bend; ++ unsigned char unlinked = !inode->i_nlink; ++ struct au_iinfo *iinfo; ++ struct au_hinode *hi; + struct super_block *sb; -+ struct dentry *parent; -+ struct au_sbinfo *sbinfo; + -+ sb = dentry->d_sb; -+ sbinfo = au_sbi(sb); -+ parent = dget_parent(dentry); -+ bstart = au_dbstart(dentry); -+ bcpup = bstart; -+ if (args->force_btgt < 0) { -+ if (src_dentry) { -+ src_bstart = au_dbstart(src_dentry); -+ if (src_bstart < bstart) -+ bcpup = src_bstart; -+ } else if (add_entry) { -+ err = AuWbrCreate(sbinfo, dentry, -+ au_ftest_wrdir(args->flags, ISDIR)); -+ bcpup = err; -+ } ++ if (unlinked) { ++ int err = au_xigen_inc(inode); ++ if (unlikely(err)) ++ AuWarn1("failed resetting i_generation, %d\n", err); ++ } + -+ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) { -+ if (add_entry) -+ err = AuWbrCopyup(sbinfo, dentry); -+ else { -+ if (!IS_ROOT(dentry)) { -+ di_read_lock_parent(parent, !AuLock_IR); -+ err = AuWbrCopyup(sbinfo, dentry); -+ di_read_unlock(parent, !AuLock_IR); -+ } else -+ err = AuWbrCopyup(sbinfo, dentry); ++ iinfo = au_ii(inode); ++ /* bad_inode case */ ++ if (!iinfo) ++ return; ++ ++ if (iinfo->ii_vdir) ++ au_vdir_free(iinfo->ii_vdir); ++ ++ if (iinfo->ii_bstart >= 0) { ++ sb = inode->i_sb; ++ ino = 0; ++ if (unlinked) ++ ino = inode->i_ino; ++ hi = iinfo->ii_hinode + iinfo->ii_bstart; ++ bend = iinfo->ii_bend; ++ while (iinfo->ii_bstart++ <= bend) { ++ if (hi->hi_inode) { ++ if (unlinked || !hi->hi_inode->i_nlink) { ++ au_iinfo_write0(sb, hi, ino); ++ /* ignore this error */ ++ ino = 0; ++ } ++ au_hiput(hi); + } -+ bcpup = err; -+ if (unlikely(err < 0)) -+ goto out; ++ hi++; + } -+ } else { -+ bcpup = args->force_btgt; -+ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode)); + } -+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup); -+ if (bstart < bcpup) -+ au_update_dbrange(dentry, /*do_put_zero*/1); -+ -+ err = bcpup; -+ if (bcpup == bstart) -+ goto out; /* success */ -+ -+ /* copyup the new parent into the branch we process */ -+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart); + -+ out: -+ dput(parent); -+ return err; ++ kfree(iinfo->ii_hinode); ++ AuRwDestroy(&iinfo->ii_rwsem); +} +diff -urN linux-2.6.30.org/fs/aufs/inode.c linux-2.6.30/fs/aufs/inode.c +--- linux-2.6.30.org/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/inode.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,376 @@ ++/* ++ * Copyright (C) 2005-2009 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+/* ---------------------------------------------------------------------- */ ++/* ++ * inode functions ++ */ + -+struct dentry *au_pinned_h_parent(struct au_pin *pin) ++#include "aufs.h" ++ ++struct inode *au_igrab(struct inode *inode) +{ -+ if (pin && pin->parent) -+ return au_h_dptr(pin->parent, pin->bindex); -+ return NULL; ++ if (inode) { ++ AuDebugOn(!atomic_read(&inode->i_count)); ++ atomic_inc_return(&inode->i_count); ++ } ++ return inode; +} + -+void au_unpin(struct au_pin *p) ++static void au_refresh_hinode_attr(struct inode *inode, int do_version) +{ -+ if (au_ftest_pin(p->flags, MNT_WRITE)) -+ mnt_drop_write(p->h_mnt); -+ if (!p->hdir) -+ return; -+ -+ au_hin_imtx_unlock(p->hdir); -+ if (!au_ftest_pin(p->flags, DI_LOCKED)) -+ di_read_unlock(p->parent, AuLock_IR); -+ iput(p->hdir->hi_inode); -+ dput(p->parent); -+ p->parent = NULL; -+ p->hdir = NULL; -+ p->h_mnt = NULL; ++ au_cpup_attr_all(inode, /*force*/0); ++ au_update_iigen(inode); ++ if (do_version) ++ inode->i_version++; +} + -+int au_do_pin(struct au_pin *p) ++int au_refresh_hinode_self(struct inode *inode, int do_attr) +{ + int err; ++ aufs_bindex_t bindex, new_bindex; ++ unsigned char update; ++ struct inode *first; ++ struct au_hinode *p, *q, tmp; + struct super_block *sb; -+ struct dentry *h_dentry, *h_parent; -+ struct au_branch *br; -+ struct inode *h_dir; ++ struct au_iinfo *iinfo; + -+ err = 0; -+ sb = p->dentry->d_sb; -+ br = au_sbr(sb, p->bindex); -+ if (IS_ROOT(p->dentry)) { -+ if (au_ftest_pin(p->flags, MNT_WRITE)) { -+ p->h_mnt = br->br_mnt; -+ err = mnt_want_write(p->h_mnt); -+ if (unlikely(err)) { -+ au_fclr_pin(p->flags, MNT_WRITE); -+ goto out_err; -+ } -+ } ++ update = 0; ++ sb = inode->i_sb; ++ iinfo = au_ii(inode); ++ err = au_ii_realloc(iinfo, au_sbend(sb) + 1); ++ if (unlikely(err)) + goto out; -+ } -+ -+ h_dentry = NULL; -+ if (p->bindex <= au_dbend(p->dentry)) -+ h_dentry = au_h_dptr(p->dentry, p->bindex); -+ -+ p->parent = dget_parent(p->dentry); -+ if (!au_ftest_pin(p->flags, DI_LOCKED)) -+ di_read_lock(p->parent, AuLock_IR, p->lsc_di); -+ -+ h_dir = NULL; -+ h_parent = au_h_dptr(p->parent, p->bindex); -+ p->hdir = au_hi(p->parent->d_inode, p->bindex); -+ if (p->hdir) -+ h_dir = p->hdir->hi_inode; + -+ /* udba case */ -+ if (unlikely(!p->hdir || !h_dir)) { -+ err = au_busy_or_stale(); -+ if (!au_ftest_pin(p->flags, DI_LOCKED)) -+ di_read_unlock(p->parent, AuLock_IR); -+ dput(p->parent); -+ p->parent = NULL; -+ goto out_err; -+ } ++ p = iinfo->ii_hinode + iinfo->ii_bstart; ++ first = p->hi_inode; ++ err = 0; ++ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; ++ bindex++, p++) { ++ if (!p->hi_inode) ++ continue; + -+ au_igrab(h_dir); -+ au_hin_imtx_lock_nested(p->hdir, p->lsc_hi); ++ new_bindex = au_br_index(sb, p->hi_id); ++ if (new_bindex == bindex) ++ continue; + -+ if (h_dentry) { -+ err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br); -+ if (unlikely(err)) { -+ au_fclr_pin(p->flags, MNT_WRITE); -+ goto out_unpin; ++ if (new_bindex < 0) { ++ update++; ++ au_hiput(p); ++ p->hi_inode = NULL; ++ continue; + } -+ } + -+ if (au_ftest_pin(p->flags, MNT_WRITE)) { -+ p->h_mnt = br->br_mnt; -+ err = mnt_want_write(p->h_mnt); -+ if (unlikely(err)) { -+ au_fclr_pin(p->flags, MNT_WRITE); -+ goto out_unpin; ++ if (new_bindex < iinfo->ii_bstart) ++ iinfo->ii_bstart = new_bindex; ++ if (iinfo->ii_bend < new_bindex) ++ iinfo->ii_bend = new_bindex; ++ /* swap two lower inode, and loop again */ ++ q = iinfo->ii_hinode + new_bindex; ++ tmp = *q; ++ *q = *p; ++ *p = tmp; ++ if (tmp.hi_inode) { ++ bindex--; ++ p--; + } + } -+ goto out; /* success */ ++ au_update_brange(inode, /*do_put_zero*/0); ++ if (do_attr) ++ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode)); + -+ out_unpin: -+ au_unpin(p); -+ out_err: -+ AuErr("err %d\n", err); -+ err = au_busy_or_stale(); + out: + return err; +} + -+void au_pin_init(struct au_pin *p, struct dentry *dentry, -+ aufs_bindex_t bindex, int lsc_di, int lsc_hi, -+ unsigned int udba, unsigned char flags) ++int au_refresh_hinode(struct inode *inode, struct dentry *dentry) +{ -+ p->dentry = dentry; -+ p->udba = udba; -+ p->lsc_di = lsc_di; -+ p->lsc_hi = lsc_hi; -+ p->flags = flags; -+ p->bindex = bindex; ++ int err, update; ++ unsigned int flags; ++ aufs_bindex_t bindex, bend; ++ unsigned char isdir; ++ struct inode *first; ++ struct au_hinode *p; ++ struct au_iinfo *iinfo; + -+ p->parent = NULL; -+ p->hdir = NULL; -+ p->h_mnt = NULL; -+} ++ err = au_refresh_hinode_self(inode, /*do_attr*/0); ++ if (unlikely(err)) ++ goto out; + -+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, -+ unsigned int udba, unsigned char flags) -+{ -+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2, -+ udba, flags); -+ return au_do_pin(pin); -+} ++ update = 0; ++ iinfo = au_ii(inode); ++ p = iinfo->ii_hinode + iinfo->ii_bstart; ++ first = p->hi_inode; ++ isdir = S_ISDIR(inode->i_mode); ++ flags = au_hi_flags(inode, isdir); ++ bend = au_dbend(dentry); ++ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) { ++ struct inode *h_i; ++ struct dentry *h_d; + -+/* ---------------------------------------------------------------------- */ ++ h_d = au_h_dptr(dentry, bindex); ++ if (!h_d || !h_d->d_inode) ++ continue; + -+#define AuIcpup_DID_CPUP 1 -+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name) -+#define au_fset_icpup(flags, name) { (flags) |= AuIcpup_##name; } -+#define au_fclr_icpup(flags, name) { (flags) &= ~AuIcpup_##name; } ++ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) { ++ h_i = au_h_iptr(inode, bindex); ++ if (h_i) { ++ if (h_i == h_d->d_inode) ++ continue; ++ err = -EIO; ++ break; ++ } ++ } ++ if (bindex < iinfo->ii_bstart) ++ iinfo->ii_bstart = bindex; ++ if (iinfo->ii_bend < bindex) ++ iinfo->ii_bend = bindex; ++ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags); ++ update = 1; ++ } ++ au_update_brange(inode, /*do_put_zero*/0); + -+struct au_icpup_args { -+ unsigned char flags; -+ unsigned char pin_flags; -+ aufs_bindex_t btgt; -+ struct au_pin pin; -+ struct path h_path; -+ struct inode *h_inode; -+}; ++ if (unlikely(err)) ++ goto out; + -+static int au_lock_and_icpup(struct dentry *dentry, struct iattr *ia, -+ struct au_icpup_args *a) ++ au_refresh_hinode_attr(inode, update && isdir); ++ ++ out: ++ return err; ++} ++ ++static int set_inode(struct inode *inode, struct dentry *dentry) +{ + int err; -+ unsigned int udba; -+ loff_t sz; -+ aufs_bindex_t bstart; -+ struct dentry *hi_wh, *parent; -+ struct inode *inode; -+ struct au_wr_dir_args wr_dir_args = { -+ .force_btgt = -1, -+ .flags = 0 -+ }; -+ -+ di_write_lock_child(dentry); -+ bstart = au_dbstart(dentry); -+ inode = dentry->d_inode; -+ if (S_ISDIR(inode->i_mode)) -+ au_fset_wrdir(wr_dir_args.flags, ISDIR); -+ /* plink or hi_wh() case */ -+ if (bstart != au_ibstart(inode)) -+ wr_dir_args.force_btgt = au_ibstart(inode); -+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args); -+ if (unlikely(err < 0)) -+ goto out_dentry; -+ a->btgt = err; -+ if (err != bstart) -+ au_fset_icpup(a->flags, DID_CPUP); ++ unsigned int flags; ++ umode_t mode; ++ aufs_bindex_t bindex, bstart, btail; ++ unsigned char isdir; ++ struct dentry *h_dentry; ++ struct inode *h_inode; ++ struct au_iinfo *iinfo; + + err = 0; -+ a->pin_flags = AuPin_MNT_WRITE; -+ parent = NULL; -+ if (!IS_ROOT(dentry)) { -+ au_fset_pin(a->pin_flags, DI_LOCKED); -+ parent = dget_parent(dentry); -+ di_write_lock_parent(parent); -+ } -+ -+ udba = au_opt_udba(dentry->d_sb); -+ if (d_unhashed(dentry) || (ia->ia_valid & ATTR_FILE)) -+ udba = AuOpt_UDBA_NONE; -+ err = au_pin(&a->pin, dentry, a->btgt, udba, a->pin_flags); -+ if (unlikely(err)) { -+ if (parent) { -+ di_write_unlock(parent); -+ dput(parent); -+ } -+ goto out_dentry; -+ } -+ a->h_path.dentry = au_h_dptr(dentry, bstart); -+ a->h_inode = a->h_path.dentry->d_inode; -+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); -+ sz = -1; -+ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode)) -+ sz = ia->ia_size; -+ -+ hi_wh = NULL; -+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unhashed(dentry)) { -+ hi_wh = au_hi_wh(inode, a->btgt); -+ if (!hi_wh) { -+ err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL); -+ if (unlikely(err)) -+ goto out_unlock; -+ hi_wh = au_hi_wh(inode, a->btgt); -+ /* todo: revalidate hi_wh? */ -+ } -+ } -+ -+ if (parent) { -+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0); -+ di_downgrade_lock(parent, AuLock_IR); -+ dput(parent); ++ isdir = 0; ++ bstart = au_dbstart(dentry); ++ h_inode = au_h_dptr(dentry, bstart)->d_inode; ++ mode = h_inode->i_mode; ++ switch (mode & S_IFMT) { ++ case S_IFREG: ++ btail = au_dbtail(dentry); ++ inode->i_op = &aufs_iop; ++ inode->i_fop = &aufs_file_fop; ++ inode->i_mapping->a_ops = &aufs_aop; ++ break; ++ case S_IFDIR: ++ isdir = 1; ++ btail = au_dbtaildir(dentry); ++ inode->i_op = &aufs_dir_iop; ++ inode->i_fop = &aufs_dir_fop; ++ break; ++ case S_IFLNK: ++ btail = au_dbtail(dentry); ++ inode->i_op = &aufs_symlink_iop; ++ break; ++ case S_IFBLK: ++ case S_IFCHR: ++ case S_IFIFO: ++ case S_IFSOCK: ++ btail = au_dbtail(dentry); ++ inode->i_op = &aufs_iop; ++ init_special_inode(inode, mode, h_inode->i_rdev); ++ break; ++ default: ++ AuIOErr("Unknown file type 0%o\n", mode); ++ err = -EIO; ++ goto out; + } -+ if (!au_ftest_icpup(a->flags, DID_CPUP)) -+ goto out; /* success */ -+ -+ if (!d_unhashed(dentry)) { -+ err = au_sio_cpup_simple(dentry, a->btgt, sz, AuCpup_DTIME); -+ if (!err) -+ a->h_path.dentry = au_h_dptr(dentry, a->btgt); -+ } else if (!hi_wh) -+ a->h_path.dentry = au_h_dptr(dentry, a->btgt); -+ else -+ a->h_path.dentry = hi_wh; /* do not dget here */ + -+ out_unlock: -+ mutex_unlock(&a->h_inode->i_mutex); -+ a->h_inode = a->h_path.dentry->d_inode; -+ if (!err) { -+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); -+ goto out; /* success */ ++ /* do not set inotify for whiteouted dirs (SHWH mode) */ ++ flags = au_hi_flags(inode, isdir); ++ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH) ++ && au_ftest_hi(flags, HINOTIFY) ++ && dentry->d_name.len > AUFS_WH_PFX_LEN ++ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) ++ au_fclr_hi(flags, HINOTIFY); ++ iinfo = au_ii(inode); ++ iinfo->ii_bstart = bstart; ++ iinfo->ii_bend = btail; ++ for (bindex = bstart; bindex <= btail; bindex++) { ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (h_dentry) ++ au_set_h_iptr(inode, bindex, ++ au_igrab(h_dentry->d_inode), flags); + } ++ au_cpup_attr_all(inode, /*force*/1); + -+ au_unpin(&a->pin); -+ -+ out_dentry: -+ di_write_unlock(dentry); + out: + return err; +} + -+static int aufs_setattr(struct dentry *dentry, struct iattr *ia) ++/* successful returns with iinfo write_locked */ ++static int reval_inode(struct inode *inode, struct dentry *dentry, int *matched) +{ + int err; -+ struct inode *inode; -+ struct super_block *sb; -+ struct file *file; -+ struct au_icpup_args *a; ++ aufs_bindex_t bindex, bend; ++ struct inode *h_inode, *h_dinode; + -+ err = -ENOMEM; -+ a = kzalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; ++ *matched = 0; + -+ inode = dentry->d_inode; -+ IMustLock(inode); -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); ++ /* ++ * before this function, if aufs got any iinfo lock, it must be only ++ * one, the parent dir. ++ * it can happen by UDBA and the obsoleted inode number. ++ */ ++ err = -EIO; ++ if (unlikely(inode->i_ino == parent_ino(dentry))) ++ goto out; + -+ file = NULL; -+ if (ia->ia_valid & ATTR_FILE) { -+ /* currently ftruncate(2) only */ -+ file = ia->ia_file; -+ fi_write_lock(file); -+ ia->ia_file = au_h_fptr(file, au_fbstart(file)); ++ err = 0; ++ ii_write_lock_new_child(inode); ++ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode; ++ bend = au_ibend(inode); ++ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) { ++ h_inode = au_h_iptr(inode, bindex); ++ if (h_inode && h_inode == h_dinode) { ++ *matched = 1; ++ err = 0; ++ if (au_iigen(inode) != au_digen(dentry)) ++ err = au_refresh_hinode(inode, dentry); ++ break; ++ } + } + -+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) -+ ia->ia_valid &= ~ATTR_MODE; ++ if (unlikely(err)) ++ ii_write_unlock(inode); ++ out: ++ return err; ++} + -+ err = au_lock_and_icpup(dentry, ia, a); -+ if (unlikely(err < 0)) -+ goto out_si; -+ if (au_ftest_icpup(a->flags, DID_CPUP)) { -+ ia->ia_file = NULL; -+ ia->ia_valid &= ~ATTR_FILE; ++/* successful returns with iinfo write_locked */ ++/* todo: return with unlocked? */ ++struct inode *au_new_inode(struct dentry *dentry, int must_new) ++{ ++ struct inode *inode; ++ struct dentry *h_dentry; ++ struct super_block *sb; ++ ino_t h_ino, ino; ++ int err, match; ++ aufs_bindex_t bstart; ++ ++ sb = dentry->d_sb; ++ bstart = au_dbstart(dentry); ++ h_dentry = au_h_dptr(dentry, bstart); ++ h_ino = h_dentry->d_inode->i_ino; ++ err = au_xino_read(sb, bstart, h_ino, &ino); ++ inode = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out; ++ new_ino: ++ if (!ino) { ++ ino = au_xino_new_ino(sb); ++ if (unlikely(!ino)) { ++ inode = ERR_PTR(-EIO); ++ goto out; ++ } + } + -+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt); -+ if (ia->ia_valid & ATTR_SIZE) { -+ struct file *f; ++ AuDbg("i%lu\n", (unsigned long)ino); ++ inode = au_iget_locked(sb, ino); ++ err = PTR_ERR(inode); ++ if (IS_ERR(inode)) ++ goto out; + -+ if (ia->ia_size < i_size_read(inode)) { -+ /* unmap only */ -+ err = vmtruncate(inode, ia->ia_size); -+ if (unlikely(err)) -+ goto out_unlock; -+ } ++ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW)); ++ if (inode->i_state & I_NEW) { ++ ii_write_lock_new_child(inode); ++ err = set_inode(inode, dentry); ++ unlock_new_inode(inode); ++ if (!err) ++ goto out; /* success */ + -+ f = NULL; -+ if (ia->ia_valid & ATTR_FILE) -+ f = ia->ia_file; -+ mutex_unlock(&a->h_inode->i_mutex); -+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f); -+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); -+ } else -+ err = vfsub_notify_change(&a->h_path, ia); -+ if (!err) -+ au_cpup_attr_changeable(inode); ++ iget_failed(inode); ++ ii_write_unlock(inode); ++ goto out_iput; ++ } else if (!must_new) { ++ err = reval_inode(inode, dentry, &match); ++ if (!err) ++ goto out; /* success */ ++ else if (match) ++ goto out_iput; ++ } + -+ out_unlock: -+ mutex_unlock(&a->h_inode->i_mutex); -+ au_unpin(&a->pin); -+ di_write_unlock(dentry); -+ out_si: -+ if (file) { -+ fi_write_unlock(file); -+ ia->ia_file = file; -+ ia->ia_valid |= ATTR_FILE; ++ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode))) ++ AuWarn1("Un-notified UDBA or repeatedly renamed dir," ++ " b%d, %s, %.*s, hi%lu, i%lu.\n", ++ bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry), ++ (unsigned long)h_ino, (unsigned long)ino); ++ ino = 0; ++ err = au_xino_write0(sb, bstart, h_ino, 0); ++ if (!err) { ++ iput(inode); ++ goto new_ino; + } -+ si_read_unlock(sb); -+ kfree(a); ++ ++ out_iput: ++ iput(inode); ++ inode = ERR_PTR(err); + out: -+ return err; ++ return inode; +} + -+static int au_getattr_lock_reval(struct dentry *dentry, unsigned int sigen) ++/* ---------------------------------------------------------------------- */ ++ ++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, ++ struct inode *inode) +{ + int err; -+ struct inode *inode; -+ struct dentry *parent; + -+ err = 0; -+ inode = dentry->d_inode; -+ di_write_lock_child(dentry); -+ if (au_digen(dentry) != sigen || au_iigen(inode) != sigen) { -+ parent = dget_parent(dentry); -+ di_read_lock_parent(parent, AuLock_IR); -+ /* returns a number of positive dentries */ -+ err = au_refresh_hdentry(dentry, inode->i_mode & S_IFMT); -+ if (err > 0) -+ err = au_refresh_hinode(inode, dentry); -+ di_read_unlock(parent, AuLock_IR); -+ dput(parent); -+ if (unlikely(!err)) -+ err = -EIO; ++ err = au_br_rdonly(au_sbr(sb, bindex)); ++ ++ /* pseudo-link after flushed may happen out of bounds */ ++ if (!err ++ && inode ++ && au_ibstart(inode) <= bindex ++ && bindex <= au_ibend(inode)) { ++ /* ++ * permission check is unnecessary since vfsub routine ++ * will be called later ++ */ ++ struct inode *hi = au_h_iptr(inode, bindex); ++ if (hi) ++ err = IS_IMMUTABLE(hi) ? -EROFS : 0; + } -+ di_downgrade_lock(dentry, AuLock_IR); + + return err; +} + -+static void au_refresh_iattr(struct inode *inode, struct kstat *st, -+ unsigned int nlink) ++int au_test_h_perm(struct inode *h_inode, int mask) +{ -+ inode->i_mode = st->mode; -+ inode->i_uid = st->uid; -+ inode->i_gid = st->gid; -+ inode->i_atime = st->atime; -+ inode->i_mtime = st->mtime; -+ inode->i_ctime = st->ctime; -+ -+ au_cpup_attr_nlink(inode, /*force*/0); -+ if (S_ISDIR(inode->i_mode)) { -+ inode->i_nlink -= nlink; -+ inode->i_nlink += st->nlink; -+ } -+ -+ spin_lock(&inode->i_lock); -+ inode->i_blocks = st->blocks; -+ i_size_write(inode, st->size); -+ spin_unlock(&inode->i_lock); ++ if (!current_fsuid()) ++ return 0; ++ return inode_permission(h_inode, mask); +} + -+static int aufs_getattr(struct vfsmount *mnt __maybe_unused, -+ struct dentry *dentry, struct kstat *st) ++int au_test_h_perm_sio(struct inode *h_inode, int mask) +{ -+ int err; -+ unsigned int mnt_flags; -+ aufs_bindex_t bindex; -+ unsigned char udba_none, positive, did_lock; -+ struct super_block *sb, *h_sb; -+ struct inode *inode; -+ struct vfsmount *h_mnt; -+ struct dentry *h_dentry; ++ if (au_test_nfs(h_inode->i_sb) ++ && (mask & MAY_WRITE) ++ && S_ISDIR(h_inode->i_mode)) ++ mask |= MAY_READ; /* force permission check */ ++ return au_test_h_perm(h_inode, mask); ++} +diff -urN linux-2.6.30.org/fs/aufs/inode.h linux-2.6.30/fs/aufs/inode.h +--- linux-2.6.30.org/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/inode.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,475 @@ ++/* ++ * Copyright (C) 2005-2009 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + -+ err = 0; -+ did_lock = 0; -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ si_read_lock(sb, AuLock_FLUSH); -+ if (IS_ROOT(dentry)) { -+ /* lock free root dinfo */ -+ h_dentry = dget(au_di(dentry)->di_hdentry->hd_dentry); -+ h_mnt = au_sbr_mnt(sb, 0); -+ goto getattr; -+ } ++/* ++ * inode operations ++ */ + -+ did_lock = 1; -+ mnt_flags = au_mntflags(sb); -+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE); ++#ifndef __AUFS_INODE_H__ ++#define __AUFS_INODE_H__ + -+ /* support fstat(2) */ -+ if (!d_unhashed(dentry) && !udba_none) { -+ unsigned int sigen = au_sigen(sb); -+ if (au_digen(dentry) == sigen && au_iigen(inode) == sigen) -+ di_read_lock_child(dentry, AuLock_IR); -+ else { -+ err = au_getattr_lock_reval(dentry, sigen); -+ if (unlikely(err)) -+ goto out; -+ } -+ } else -+ di_read_lock_child(dentry, AuLock_IR); -+ -+ bindex = au_ibstart(inode); -+ h_mnt = au_sbr_mnt(sb, bindex); -+ h_sb = h_mnt->mnt_sb; -+ if (!au_test_fs_bad_iattr(h_sb) && udba_none) -+ goto out_fill; /* success */ -+ -+ if (au_dbstart(dentry) == bindex) -+ h_dentry = dget(au_h_dptr(dentry, bindex)); -+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) { -+ h_dentry = au_plink_lkup(inode, bindex); -+ if (IS_ERR(h_dentry)) -+ goto out_fill; /* pretending success */ -+ } else -+ /* illegally overlapped or something */ -+ goto out_fill; /* pretending success */ -+ -+ getattr: -+ positive = !!h_dentry->d_inode; -+ if (positive) -+ err = vfs_getattr(h_mnt, h_dentry, st); -+ dput(h_dentry); -+ if (!err) { -+ if (positive) -+ au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink); -+ goto out_fill; /* success */ -+ } -+ goto out; -+ -+ out_fill: -+ generic_fillattr(inode, st); -+ out: -+ if (did_lock) -+ di_read_unlock(dentry, AuLock_IR); -+ si_read_unlock(sb); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf, -+ int bufsiz) -+{ -+ int err; -+ struct super_block *sb; -+ struct dentry *h_dentry; -+ -+ err = -EINVAL; -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (unlikely(/* !h_dentry -+ || !h_dentry->d_inode -+ || !h_dentry->d_inode->i_op -+ || */ !h_dentry->d_inode->i_op->readlink)) -+ goto out; -+ -+ err = security_inode_readlink(h_dentry); -+ if (unlikely(err)) -+ goto out; ++#ifdef __KERNEL__ + -+ sb = dentry->d_sb; -+ if (!au_test_ro(sb, bindex, dentry->d_inode)) { -+ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry); -+ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode); -+ } -+ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz); ++#include ++#include ++#include ++#include "rwsem.h" + -+ out: -+ return err; -+} ++struct vfsmount; + -+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) -+{ -+ int err; ++struct au_hinotify { ++#ifdef CONFIG_AUFS_HINOTIFY ++ struct inotify_watch hin_watch; ++ struct inode *hin_aufs_inode; /* no get/put */ ++#endif ++}; + -+ aufs_read_lock(dentry, AuLock_IR); -+ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz); -+ aufs_read_unlock(dentry, AuLock_IR); ++struct au_hinode { ++ struct inode *hi_inode; ++ aufs_bindex_t hi_id; ++#ifdef CONFIG_AUFS_HINOTIFY ++ struct au_hinotify *hi_notify; ++#endif + -+ return err; -+} ++ /* reference to the copied-up whiteout with get/put */ ++ struct dentry *hi_whdentry; ++}; + -+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd) -+{ -+ int err; -+ char *buf; -+ mm_segment_t old_fs; ++struct au_vdir; ++struct au_iinfo { ++ atomic_t ii_generation; ++ struct super_block *ii_hsb1; /* no get/put */ + -+ err = -ENOMEM; -+ buf = __getname(); -+ if (unlikely(!buf)) -+ goto out; ++ struct au_rwsem ii_rwsem; ++ aufs_bindex_t ii_bstart, ii_bend; ++ __u32 ii_higen; ++ struct au_hinode *ii_hinode; ++ struct au_vdir *ii_vdir; ++}; + -+ aufs_read_lock(dentry, AuLock_IR); -+ old_fs = get_fs(); -+ set_fs(KERNEL_DS); -+ err = h_readlink(dentry, au_dbstart(dentry), (char __user *)buf, -+ PATH_MAX); -+ set_fs(old_fs); -+ aufs_read_unlock(dentry, AuLock_IR); ++struct au_icntnr { ++ struct au_iinfo iinfo; ++ struct inode vfs_inode; ++}; + -+ if (err >= 0) { -+ buf[err] = 0; -+ /* will be freed by put_link */ -+ nd_set_link(nd, buf); -+ return NULL; /* success */ -+ } -+ __putname(buf); ++/* au_pin flags */ ++#define AuPin_DI_LOCKED 1 ++#define AuPin_MNT_WRITE (1 << 1) ++#define au_ftest_pin(flags, name) ((flags) & AuPin_##name) ++#define au_fset_pin(flags, name) { (flags) |= AuPin_##name; } ++#define au_fclr_pin(flags, name) { (flags) &= ~AuPin_##name; } + -+ out: -+ path_put(&nd->path); -+ AuTraceErr(err); -+ return ERR_PTR(err); -+} ++struct au_pin { ++ /* input */ ++ struct dentry *dentry; ++ unsigned int udba; ++ unsigned char lsc_di, lsc_hi, flags; ++ aufs_bindex_t bindex; + -+static void aufs_put_link(struct dentry *dentry __maybe_unused, -+ struct nameidata *nd, void *cookie __maybe_unused) -+{ -+ __putname(nd_get_link(nd)); -+} ++ /* output */ ++ struct dentry *parent; ++ struct au_hinode *hdir; ++ struct vfsmount *h_mnt; ++}; + +/* ---------------------------------------------------------------------- */ + -+static void aufs_truncate_range(struct inode *inode __maybe_unused, -+ loff_t start __maybe_unused, -+ loff_t end __maybe_unused) ++static inline struct au_iinfo *au_ii(struct inode *inode) +{ -+ AuUnsupport(); ++ struct au_iinfo *iinfo; ++ ++ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); ++ if (iinfo->ii_hinode) ++ return iinfo; ++ return NULL; /* debugging bad_inode case */ +} + +/* ---------------------------------------------------------------------- */ + -+struct inode_operations aufs_symlink_iop = { -+ .permission = aufs_permission, -+ .setattr = aufs_setattr, -+ .getattr = aufs_getattr, -+ .readlink = aufs_readlink, -+ .follow_link = aufs_follow_link, -+ .put_link = aufs_put_link -+}; ++/* inode.c */ ++struct inode *au_igrab(struct inode *inode); ++int au_refresh_hinode_self(struct inode *inode, int do_attr); ++int au_refresh_hinode(struct inode *inode, struct dentry *dentry); ++struct inode *au_new_inode(struct dentry *dentry, int must_new); ++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, ++ struct inode *inode); ++int au_test_h_perm(struct inode *h_inode, int mask); ++int au_test_h_perm_sio(struct inode *h_inode, int mask); + -+struct inode_operations aufs_dir_iop = { -+ .create = aufs_create, -+ .lookup = aufs_lookup, -+ .link = aufs_link, -+ .unlink = aufs_unlink, -+ .symlink = aufs_symlink, -+ .mkdir = aufs_mkdir, -+ .rmdir = aufs_rmdir, -+ .mknod = aufs_mknod, -+ .rename = aufs_rename, ++/* i_op.c */ ++extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop; + -+ .permission = aufs_permission, -+ .setattr = aufs_setattr, -+ .getattr = aufs_getattr -+}; ++/* au_wr_dir flags */ ++#define AuWrDir_ADD_ENTRY 1 ++#define AuWrDir_ISDIR (1 << 1) ++#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name) ++#define au_fset_wrdir(flags, name) { (flags) |= AuWrDir_##name; } ++#define au_fclr_wrdir(flags, name) { (flags) &= ~AuWrDir_##name; } + -+struct inode_operations aufs_iop = { -+ .permission = aufs_permission, -+ .setattr = aufs_setattr, -+ .getattr = aufs_getattr, -+ .truncate_range = aufs_truncate_range ++struct au_wr_dir_args { ++ aufs_bindex_t force_btgt; ++ unsigned char flags; +}; -diff --git a/fs/aufs/i_op_add.c b/fs/aufs/i_op_add.c -new file mode 100644 -index 0000000..516120d ---- /dev/null -+++ b/fs/aufs/i_op_add.c -@@ -0,0 +1,625 @@ -+/* -+ * Copyright (C) 2005-2009 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ */ ++int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, ++ struct au_wr_dir_args *args); + -+/* -+ * inode operations (add entry) -+ */ ++struct dentry *au_pinned_h_parent(struct au_pin *pin); ++void au_pin_init(struct au_pin *pin, struct dentry *dentry, ++ aufs_bindex_t bindex, int lsc_di, int lsc_hi, ++ unsigned int udba, unsigned char flags); ++int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, ++ unsigned int udba, unsigned char flags) __must_check; ++int au_do_pin(struct au_pin *pin) __must_check; ++void au_unpin(struct au_pin *pin); + -+#include "aufs.h" ++/* i_op_add.c */ ++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_parent, int isdir); ++int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev); ++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname); ++int aufs_create(struct inode *dir, struct dentry *dentry, int mode, ++ struct nameidata *nd); ++int aufs_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry); ++int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode); + -+/* -+ * final procedure of adding a new entry, except link(2). -+ * remove whiteout, instantiate, copyup the parent dir's times and size -+ * and update version. -+ * if it failed, re-create the removed whiteout. -+ */ -+static int epilog(struct inode *dir, aufs_bindex_t bindex, -+ struct dentry *wh_dentry, struct dentry *dentry) -+{ -+ int err, rerr; -+ aufs_bindex_t bwh; -+ struct path h_path; -+ struct inode *inode, *h_dir; -+ struct dentry *wh; ++/* i_op_del.c */ ++int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup); ++int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_parent, int isdir); ++int aufs_unlink(struct inode *dir, struct dentry *dentry); ++int aufs_rmdir(struct inode *dir, struct dentry *dentry); + -+ bwh = -1; -+ if (wh_dentry) { -+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */ -+ IMustLock(h_dir); -+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir); -+ bwh = au_dbwh(dentry); -+ h_path.dentry = wh_dentry; -+ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex); -+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, -+ dentry); -+ if (unlikely(err)) -+ goto out; -+ } ++/* i_op_ren.c */ ++int au_wbr(struct dentry *dentry, aufs_bindex_t btgt); ++int aufs_rename(struct inode *src_dir, struct dentry *src_dentry, ++ struct inode *dir, struct dentry *dentry); + -+ inode = au_new_inode(dentry, /*must_new*/1); -+ if (!IS_ERR(inode)) { -+ d_instantiate(dentry, inode); -+ dir = dentry->d_parent->d_inode; /* dir inode is locked */ -+ IMustLock(dir); -+ if (au_ibstart(dir) == au_dbstart(dentry)) -+ au_cpup_attr_timesizes(dir); -+ dir->i_version++; -+ return 0; /* success */ -+ } ++/* iinfo.c */ ++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex); ++void au_hiput(struct au_hinode *hinode); ++void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex); ++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, ++ struct dentry *h_wh); ++unsigned int au_hi_flags(struct inode *inode, int isdir); + -+ err = PTR_ERR(inode); -+ if (!wh_dentry) -+ goto out; ++/* hinode flags */ ++#define AuHi_XINO 1 ++#define AuHi_HINOTIFY (1 << 1) ++#define au_ftest_hi(flags, name) ((flags) & AuHi_##name) ++#define au_fset_hi(flags, name) { (flags) |= AuHi_##name; } ++#define au_fclr_hi(flags, name) { (flags) &= ~AuHi_##name; } + -+ /* revert */ -+ /* dir inode is locked */ -+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent); -+ rerr = PTR_ERR(wh); -+ if (IS_ERR(wh)) { -+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n", -+ AuDLNPair(dentry), err, rerr); -+ err = -EIO; -+ } else -+ dput(wh); ++#ifndef CONFIG_AUFS_HINOTIFY ++#undef AuHi_HINOTIFY ++#define AuHi_HINOTIFY 0 ++#endif + -+ out: -+ return err; ++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, ++ struct inode *h_inode, unsigned int flags); ++ ++void au_update_iigen(struct inode *inode); ++void au_update_brange(struct inode *inode, int do_put_zero); ++ ++int au_iinfo_init(struct inode *inode); ++void au_iinfo_fin(struct inode *inode); ++int au_ii_realloc(struct au_iinfo *iinfo, int nbr); ++ ++/* plink.c */ ++void au_plink_block_maintain(struct super_block *sb); ++#ifdef CONFIG_AUFS_DEBUG ++void au_plink_list(struct super_block *sb); ++#else ++static inline void au_plink_list(struct super_block *sb) ++{ ++ /* nothing */ +} ++#endif ++int au_plink_test(struct inode *inode); ++struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex); ++void au_plink_append(struct inode *inode, aufs_bindex_t bindex, ++ struct dentry *h_dentry); ++void au_plink_put(struct super_block *sb); ++void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id); ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* lock subclass for iinfo */ ++enum { ++ AuLsc_II_CHILD, /* child first */ ++ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hinotify */ ++ AuLsc_II_CHILD3, /* copyup dirs */ ++ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */ ++ AuLsc_II_PARENT2, ++ AuLsc_II_PARENT3, /* copyup dirs */ ++ AuLsc_II_NEW_CHILD ++}; + +/* -+ * simple tests for the adding inode operations. -+ * following the checks in vfs, plus the parent-child relationship. ++ * ii_read_lock_child, ii_write_lock_child, ++ * ii_read_lock_child2, ii_write_lock_child2, ++ * ii_read_lock_child3, ii_write_lock_child3, ++ * ii_read_lock_parent, ii_write_lock_parent, ++ * ii_read_lock_parent2, ii_write_lock_parent2, ++ * ii_read_lock_parent3, ii_write_lock_parent3, ++ * ii_read_lock_new_child, ii_write_lock_new_child, + */ -+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir) -+{ -+ int err; -+ umode_t h_mode; -+ struct dentry *h_dentry; -+ struct inode *h_inode; ++#define AuReadLockFunc(name, lsc) \ ++static inline void ii_read_lock_##name(struct inode *i) \ ++{ \ ++ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ ++} + -+ h_dentry = au_h_dptr(dentry, bindex); -+ h_inode = h_dentry->d_inode; -+ if (!dentry->d_inode) { -+ err = -EEXIST; -+ if (unlikely(h_inode)) -+ goto out; -+ } else { -+ /* rename(2) case */ -+ err = -EIO; -+ if (unlikely(!h_inode || !h_inode->i_nlink)) -+ goto out; ++#define AuWriteLockFunc(name, lsc) \ ++static inline void ii_write_lock_##name(struct inode *i) \ ++{ \ ++ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ ++} + -+ h_mode = h_inode->i_mode; -+ if (!isdir) { -+ err = -EISDIR; -+ if (unlikely(S_ISDIR(h_mode))) -+ goto out; -+ } else if (unlikely(!S_ISDIR(h_mode))) { -+ err = -ENOTDIR; -+ goto out; -+ } -+ } ++#define AuRWLockFuncs(name, lsc) \ ++ AuReadLockFunc(name, lsc) \ ++ AuWriteLockFunc(name, lsc) + -+ err = -EIO; -+ /* expected parent dir is locked */ -+ if (unlikely(h_parent != h_dentry->d_parent)) -+ goto out; -+ err = 0; ++AuRWLockFuncs(child, CHILD); ++AuRWLockFuncs(child2, CHILD2); ++AuRWLockFuncs(child3, CHILD3); ++AuRWLockFuncs(parent, PARENT); ++AuRWLockFuncs(parent2, PARENT2); ++AuRWLockFuncs(parent3, PARENT3); ++AuRWLockFuncs(new_child, NEW_CHILD); + -+ out: -+ return err; -+} ++#undef AuReadLockFunc ++#undef AuWriteLockFunc ++#undef AuRWLockFuncs + +/* -+ * initial procedure of adding a new entry. -+ * prepare writable branch and the parent dir, lock it, -+ * and lookup whiteout for the new entry. ++ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock + */ -+static struct dentry* -+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt, -+ struct dentry *src_dentry, struct au_pin *pin, -+ struct au_wr_dir_args *wr_dir_args) -+{ -+ struct dentry *wh_dentry, *h_parent; -+ struct super_block *sb; -+ struct au_branch *br; -+ int err; -+ unsigned int udba; -+ aufs_bindex_t bcpup; -+ -+ err = au_wr_dir(dentry, src_dentry, wr_dir_args); -+ bcpup = err; -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err < 0)) -+ goto out; -+ -+ sb = dentry->d_sb; -+ udba = au_opt_udba(sb); -+ err = au_pin(pin, dentry, bcpup, udba, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; ++AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem); + -+ h_parent = au_pinned_h_parent(pin); -+ if (udba != AuOpt_UDBA_NONE -+ && au_dbstart(dentry) == bcpup) { -+ err = au_may_add(dentry, bcpup, h_parent, -+ au_ftest_wrdir(wr_dir_args->flags, ISDIR)); -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out_unpin; -+ } ++#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem) ++#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem) ++#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem) + -+ br = au_sbr(sb, bcpup); -+ if (dt) { -+ struct path tmp = { -+ .dentry = h_parent, -+ .mnt = br->br_mnt -+ }; -+ au_dtime_store(dt, au_pinned_parent(pin), &tmp); -+ } ++/* ---------------------------------------------------------------------- */ + -+ wh_dentry = NULL; -+ if (bcpup != au_dbwh(dentry)) -+ goto out; /* success */ ++static inline unsigned int au_iigen(struct inode *inode) ++{ ++ return atomic_read(&au_ii(inode)->ii_generation); ++} + -+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br); ++/* tiny test for inode number */ ++/* tmpfs generation is too rough */ ++static inline int au_test_higen(struct inode *inode, struct inode *h_inode) ++{ ++ struct au_iinfo *iinfo; + -+ out_unpin: -+ if (IS_ERR(wh_dentry)) -+ au_unpin(pin); -+ out: -+ return wh_dentry; ++ iinfo = au_ii(inode); ++ return !(iinfo->ii_hsb1 == h_inode->i_sb ++ && iinfo->ii_higen == h_inode->i_generation); +} + +/* ---------------------------------------------------------------------- */ + -+enum { Mknod, Symlink, Creat }; -+struct simple_arg { -+ int type; -+ union { -+ struct { -+ int mode; -+ struct nameidata *nd; -+ } c; -+ struct { -+ const char *symname; -+ } s; -+ struct { -+ int mode; -+ dev_t dev; -+ } m; -+ } u; -+}; -+ -+static int add_simple(struct inode *dir, struct dentry *dentry, -+ struct simple_arg *arg) ++static inline aufs_bindex_t au_ii_br_id(struct inode *inode, ++ aufs_bindex_t bindex) +{ -+ int err; -+ aufs_bindex_t bstart; -+ unsigned char created; -+ struct au_dtime dt; -+ struct au_pin pin; -+ struct path h_path; -+ struct dentry *wh_dentry, *parent; -+ struct inode *h_dir; -+ struct au_wr_dir_args wr_dir_args = { -+ .force_btgt = -1, -+ .flags = AuWrDir_ADD_ENTRY -+ }; -+ -+ IMustLock(dir); -+ -+ parent = dentry->d_parent; /* dir inode is locked */ -+ aufs_read_lock(dentry, AuLock_DW); -+ di_write_lock_parent(parent); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin, -+ &wr_dir_args); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out; ++ return au_ii(inode)->ii_hinode[0 + bindex].hi_id; ++} + -+ bstart = au_dbstart(dentry); -+ h_path.dentry = au_h_dptr(dentry, bstart); -+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart); -+ h_dir = au_pinned_h_dir(&pin); -+ switch (arg->type) { -+ case Creat: -+ err = vfsub_create(h_dir, &h_path, arg->u.c.mode); -+ break; -+ case Symlink: -+ err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname); -+ break; -+ case Mknod: -+ err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev); -+ break; -+ default: -+ BUG(); -+ } -+ created = !err; -+ if (!err) -+ err = epilog(dir, bstart, wh_dentry, dentry); ++static inline aufs_bindex_t au_ibstart(struct inode *inode) ++{ ++ return au_ii(inode)->ii_bstart; ++} + -+ /* revert */ -+ if (unlikely(created && err && h_path.dentry->d_inode)) { -+ int rerr; -+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0); -+ if (rerr) { -+ AuIOErr("%.*s revert failure(%d, %d)\n", -+ AuDLNPair(dentry), err, rerr); -+ err = -EIO; -+ } -+ au_dtime_revert(&dt); -+ d_drop(dentry); -+ } ++static inline aufs_bindex_t au_ibend(struct inode *inode) ++{ ++ return au_ii(inode)->ii_bend; ++} + -+ au_unpin(&pin); -+ dput(wh_dentry); ++static inline struct au_vdir *au_ivdir(struct inode *inode) ++{ ++ return au_ii(inode)->ii_vdir; ++} + -+ out: -+ if (unlikely(err)) { -+ au_update_dbstart(dentry); -+ d_drop(dentry); -+ } -+ di_write_unlock(parent); -+ aufs_read_unlock(dentry, AuLock_DW); -+ return err; ++static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex) ++{ ++ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry; +} + -+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) ++static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex) +{ -+ struct simple_arg arg = { -+ .type = Mknod, -+ .u.m = { -+ .mode = mode, -+ .dev = dev -+ } -+ }; -+ return add_simple(dir, dentry, &arg); ++ au_ii(inode)->ii_bend = bindex; +} + -+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) ++static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir) +{ -+ struct simple_arg arg = { -+ .type = Symlink, -+ .u.s.symname = symname -+ }; -+ return add_simple(dir, dentry, &arg); ++ au_ii(inode)->ii_vdir = vdir; +} + -+int aufs_create(struct inode *dir, struct dentry *dentry, int mode, -+ struct nameidata *nd) ++static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex) +{ -+ struct simple_arg arg = { -+ .type = Creat, -+ .u.c = { -+ .mode = mode, -+ .nd = nd -+ } -+ }; -+ return add_simple(dir, dentry, &arg); ++ return au_ii(inode)->ii_hinode + bindex; +} + +/* ---------------------------------------------------------------------- */ + -+struct au_link_args { -+ aufs_bindex_t bdst, bsrc; -+ struct au_pin pin; -+ struct path h_path; -+ struct dentry *src_parent, *parent; -+}; -+ -+static int au_cpup_before_link(struct dentry *src_dentry, -+ struct au_link_args *a) ++static inline struct dentry *au_pinned_parent(struct au_pin *pin) +{ -+ int err; -+ struct dentry *h_src_dentry; -+ struct mutex *h_mtx; -+ -+ di_read_lock_parent(a->src_parent, AuLock_IR); -+ err = au_test_and_cpup_dirs(src_dentry, a->bdst); -+ if (unlikely(err)) -+ goto out; -+ -+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc); -+ h_mtx = &h_src_dentry->d_inode->i_mutex; -+ err = au_pin(&a->pin, src_dentry, a->bdst, -+ au_opt_udba(src_dentry->d_sb), -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (unlikely(err)) -+ goto out; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ err = au_sio_cpup_simple(src_dentry, a->bdst, -1, -+ AuCpup_DTIME /* | AuCpup_KEEPLINO */); -+ mutex_unlock(h_mtx); -+ au_unpin(&a->pin); -+ -+ out: -+ di_read_unlock(a->src_parent, AuLock_IR); -+ return err; ++ if (pin) ++ return pin->parent; ++ return NULL; +} + -+static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a) ++static inline struct inode *au_pinned_h_dir(struct au_pin *pin) +{ -+ int err; -+ unsigned char plink; -+ struct inode *h_inode, *inode; -+ struct dentry *h_src_dentry; -+ struct super_block *sb; ++ if (pin && pin->hdir) ++ return pin->hdir->hi_inode; ++ return NULL; ++} + -+ plink = 0; -+ h_inode = NULL; -+ sb = src_dentry->d_sb; -+ inode = src_dentry->d_inode; -+ if (au_ibstart(inode) <= a->bdst) -+ h_inode = au_h_iptr(inode, a->bdst); -+ if (!h_inode || !h_inode->i_nlink) { -+ /* copyup src_dentry as the name of dentry. */ -+ au_set_dbstart(src_dentry, a->bdst); -+ au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry)); -+ h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode; -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc, -1, -+ AuCpup_KEEPLINO, a->parent); -+ mutex_unlock(&h_inode->i_mutex); -+ au_set_h_dptr(src_dentry, a->bdst, NULL); -+ au_set_dbstart(src_dentry, a->bsrc); -+ } else { -+ /* the inode of src_dentry already exists on a.bdst branch */ -+ h_src_dentry = d_find_alias(h_inode); -+ if (!h_src_dentry && au_plink_test(inode)) { -+ plink = 1; -+ h_src_dentry = au_plink_lkup(inode, a->bdst); -+ err = PTR_ERR(h_src_dentry); -+ if (IS_ERR(h_src_dentry)) -+ goto out; ++static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin) ++{ ++ if (pin) ++ return pin->hdir; ++ return NULL; ++} + -+ if (unlikely(!h_src_dentry->d_inode)) { -+ dput(h_src_dentry); -+ h_src_dentry = NULL; -+ } ++static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry) ++{ ++ if (pin) ++ pin->dentry = dentry; ++} + -+ } -+ if (h_src_dentry) { -+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), -+ &a->h_path); -+ dput(h_src_dentry); ++static inline void au_pin_set_parent_lflag(struct au_pin *pin, ++ unsigned char lflag) ++{ ++ if (pin) { ++ /* dirty macros require brackets */ ++ if (lflag) { ++ au_fset_pin(pin->flags, DI_LOCKED); + } else { -+ AuIOErr("no dentry found for hi%lu on b%d\n", -+ h_inode->i_ino, a->bdst); -+ err = -EIO; ++ au_fclr_pin(pin->flags, DI_LOCKED); + } + } -+ -+ if (!err && !plink) -+ au_plink_append(inode, a->bdst, a->h_path.dentry); -+ -+out: -+ return err; +} + -+int aufs_link(struct dentry *src_dentry, struct inode *dir, -+ struct dentry *dentry) ++static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent) +{ -+ int err, rerr; -+ struct au_dtime dt; -+ struct au_link_args *a; -+ struct dentry *wh_dentry, *h_src_dentry; -+ struct inode *inode; -+ struct super_block *sb; -+ struct au_wr_dir_args wr_dir_args = { -+ /* .force_btgt = -1, */ -+ .flags = AuWrDir_ADD_ENTRY -+ }; -+ -+ IMustLock(dir); -+ inode = src_dentry->d_inode; -+ IMustLock(inode); ++ if (pin) { ++ dput(pin->parent); ++ pin->parent = dget(parent); ++ } ++} + -+ err = -ENOMEM; -+ a = kzalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; ++/* ---------------------------------------------------------------------- */ + -+ a->parent = dentry->d_parent; /* dir inode is locked */ -+ aufs_read_and_write_lock2(dentry, src_dentry, /*AuLock_FLUSH*/0); -+ a->src_parent = dget_parent(src_dentry); -+ wr_dir_args.force_btgt = au_dbstart(src_dentry); ++#ifdef CONFIG_AUFS_HINOTIFY ++/* hinotify.c */ ++int au_hin_alloc(struct au_hinode *hinode, struct inode *inode, ++ struct inode *h_inode); ++void au_hin_free(struct au_hinode *hinode); ++void au_hin_ctl(struct au_hinode *hinode, int do_set); ++void au_reset_hinotify(struct inode *inode, unsigned int flags); + -+ di_write_lock_parent(a->parent); -+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin, -+ &wr_dir_args); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_unlock; ++int __init au_hinotify_init(void); ++void au_hinotify_fin(void); + -+ err = 0; -+ sb = dentry->d_sb; -+ a->bdst = au_dbstart(dentry); -+ a->h_path.dentry = au_h_dptr(dentry, a->bdst); -+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst); -+ a->bsrc = au_dbstart(src_dentry); -+ if (au_opt_test(au_mntflags(sb), PLINK)) { -+ if (a->bdst < a->bsrc -+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) -+ err = au_cpup_or_link(src_dentry, a); -+ else { -+ h_src_dentry = au_h_dptr(src_dentry, a->bdst); -+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), -+ &a->h_path); -+ } -+ } else { -+ /* -+ * copyup src_dentry to the branch we process, -+ * and then link(2) to it. -+ */ -+ if (a->bdst < a->bsrc -+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) { -+ au_unpin(&a->pin); -+ di_write_unlock(a->parent); -+ err = au_cpup_before_link(src_dentry, a); -+ if (!err) { -+ di_write_lock_parent(a->parent); -+ err = au_pin(&a->pin, dentry, a->bdst, -+ au_opt_udba(sb), -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (unlikely(err)) -+ goto out_wh; -+ } -+ } -+ if (!err) { -+ h_src_dentry = au_h_dptr(src_dentry, a->bdst); -+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), -+ &a->h_path); -+ } -+ } -+ if (unlikely(err)) -+ goto out_unpin; ++static inline ++void au_hin_init(struct au_hinode *hinode, struct au_hinotify *val) ++{ ++ hinode->hi_notify = val; ++} + -+ if (wh_dentry) { -+ a->h_path.dentry = wh_dentry; -+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path, -+ dentry); -+ if (unlikely(err)) -+ goto out_revert; -+ } ++static inline void au_iigen_dec(struct inode *inode) ++{ ++ atomic_dec_return(&au_ii(inode)->ii_generation); ++} + -+ dir->i_version++; -+ if (au_ibstart(dir) == au_dbstart(dentry)) -+ au_cpup_attr_timesizes(dir); -+ inc_nlink(inode); -+ inode->i_ctime = dir->i_ctime; -+ if (!d_unhashed(a->h_path.dentry)) -+ d_instantiate(dentry, au_igrab(inode)); -+ else -+ /* some filesystem calls d_drop() */ -+ d_drop(dentry); -+ goto out_unpin; /* success */ ++#else ++static inline ++int au_hin_alloc(struct au_hinode *hinode __maybe_unused, ++ struct inode *inode __maybe_unused, ++ struct inode *h_inode __maybe_unused) ++{ ++ return -EOPNOTSUPP; ++} + -+ out_revert: -+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0); -+ if (!rerr) -+ goto out_dt; -+ AuIOErr("%.*s reverting failed(%d, %d)\n", -+ AuDLNPair(dentry), err, rerr); -+ err = -EIO; -+ out_dt: -+ d_drop(dentry); -+ au_dtime_revert(&dt); -+ out_unpin: -+ au_unpin(&a->pin); -+ out_wh: -+ dput(wh_dentry); -+ out_unlock: -+ if (unlikely(err)) { -+ au_update_dbstart(dentry); -+ d_drop(dentry); -+ } -+ di_write_unlock(a->parent); -+ dput(a->src_parent); -+ aufs_read_and_write_unlock2(dentry, src_dentry); -+ kfree(a); -+ out: -+ return err; ++static inline void au_hin_free(struct au_hinode *hinode __maybe_unused) ++{ ++ /* nothing */ +} + -+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode) ++static inline void au_hin_ctl(struct au_hinode *hinode __maybe_unused, ++ int do_set __maybe_unused) +{ -+ int err, rerr; -+ aufs_bindex_t bindex; -+ unsigned char diropq; -+ struct au_pin pin; -+ struct path h_path; -+ struct dentry *wh_dentry, *parent, *opq_dentry; -+ struct mutex *h_mtx; -+ struct super_block *sb; -+ struct au_dtime dt; -+ struct au_wr_dir_args wr_dir_args = { -+ .force_btgt = -1, -+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR -+ }; ++ /* nothing */ ++} + -+ IMustLock(dir); ++static inline void au_reset_hinotify(struct inode *inode __maybe_unused, ++ unsigned int flags __maybe_unused) ++{ ++ /* nothing */ ++} + -+ aufs_read_lock(dentry, AuLock_DW); -+ parent = dentry->d_parent; /* dir inode is locked */ -+ di_write_lock_parent(parent); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin, -+ &wr_dir_args); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out; ++static inline int au_hinotify_init(void) ++{ ++ return 0; ++} + -+ sb = dentry->d_sb; -+ bindex = au_dbstart(dentry); -+ h_path.dentry = au_h_dptr(dentry, bindex); -+ h_path.mnt = au_sbr_mnt(sb, bindex); -+ err = vfsub_mkdir(au_pinned_h_dir(&pin), &h_path, mode); -+ if (unlikely(err)) -+ goto out_unlock; ++#define au_hinotify_fin() do {} while (0) + -+ /* make the dir opaque */ -+ diropq = 0; -+ h_mtx = &h_path.dentry->d_inode->i_mutex; -+ if (wh_dentry -+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) { -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ opq_dentry = au_diropq_create(dentry, bindex); -+ mutex_unlock(h_mtx); -+ err = PTR_ERR(opq_dentry); -+ if (IS_ERR(opq_dentry)) -+ goto out_dir; -+ dput(opq_dentry); -+ diropq = 1; -+ } ++static inline ++void au_hin_init(struct au_hinode *hinode __maybe_unused, ++ struct au_hinotify *val __maybe_unused) ++{ ++ /* empty */ ++} ++#endif /* CONFIG_AUFS_HINOTIFY */ + -+ err = epilog(dir, bindex, wh_dentry, dentry); -+ if (!err) { -+ inc_nlink(dir); -+ goto out_unlock; /* success */ -+ } ++static inline void au_hin_suspend(struct au_hinode *hdir) ++{ ++ au_hin_ctl(hdir, /*do_set*/0); ++} + -+ /* revert */ -+ if (diropq) { -+ AuLabel(revert opq); -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ rerr = au_diropq_remove(dentry, bindex); -+ mutex_unlock(h_mtx); -+ if (rerr) { -+ AuIOErr("%.*s reverting diropq failed(%d, %d)\n", -+ AuDLNPair(dentry), err, rerr); -+ err = -EIO; -+ } -+ } ++static inline void au_hin_resume(struct au_hinode *hdir) ++{ ++ au_hin_ctl(hdir, /*do_set*/1); ++} + -+ out_dir: -+ AuLabel(revert dir); -+ rerr = vfsub_rmdir(au_pinned_h_dir(&pin), &h_path); -+ if (rerr) { -+ AuIOErr("%.*s reverting dir failed(%d, %d)\n", -+ AuDLNPair(dentry), err, rerr); -+ err = -EIO; ++static inline void au_hin_imtx_lock(struct au_hinode *hdir) ++{ ++ mutex_lock(&hdir->hi_inode->i_mutex); ++ au_hin_suspend(hdir); ++} ++ ++static inline void au_hin_imtx_lock_nested(struct au_hinode *hdir, ++ unsigned int sc __maybe_unused) ++{ ++ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc); ++ au_hin_suspend(hdir); ++} ++ ++static inline void au_hin_imtx_unlock(struct au_hinode *hdir) ++{ ++ au_hin_resume(hdir); ++ mutex_unlock(&hdir->hi_inode->i_mutex); ++} ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_INODE_H__ */ +diff -urN linux-2.6.30.org/fs/aufs/ioctl.c linux-2.6.30/fs/aufs/ioctl.c +--- linux-2.6.30.org/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/ioctl.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,67 @@ ++/* ++ * Copyright (C) 2005-2009 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* ++ * ioctl ++ * currently plink-management only. ++ */ ++ ++#include ++#include "aufs.h" ++ ++long aufs_ioctl_dir(struct file *file, unsigned int cmd, ++ unsigned long arg __maybe_unused) ++{ ++ long err; ++ struct super_block *sb; ++ struct au_sbinfo *sbinfo; ++ ++ err = -EACCES; ++ if (!capable(CAP_SYS_ADMIN)) ++ goto out; ++ ++ err = 0; ++ sb = file->f_dentry->d_sb; ++ sbinfo = au_sbi(sb); ++ switch (cmd) { ++ case AUFS_CTL_PLINK_MAINT: ++ /* ++ * pseudo-link maintenance mode, ++ * cleared by aufs_release_dir() ++ */ ++ si_write_lock(sb); ++ if (!au_ftest_si(sbinfo, MAINTAIN_PLINK)) { ++ au_fset_si(sbinfo, MAINTAIN_PLINK); ++ au_fi(file)->fi_maintain_plink = 1; ++ } else ++ err = -EBUSY; ++ si_write_unlock(sb); ++ break; ++ case AUFS_CTL_PLINK_CLEAN: ++ aufs_write_lock(sb->s_root); ++ if (au_opt_test(sbinfo->si_mntflags, PLINK)) ++ au_plink_put(sb); ++ aufs_write_unlock(sb->s_root); ++ break; ++ default: ++ err = -EINVAL; + } -+ d_drop(dentry); -+ au_dtime_revert(&dt); -+ out_unlock: -+ au_unpin(&pin); -+ dput(wh_dentry); ++ + out: -+ if (unlikely(err)) { -+ au_update_dbstart(dentry); -+ d_drop(dentry); -+ } -+ di_write_unlock(parent); -+ aufs_read_unlock(dentry, AuLock_DW); + return err; +} -diff --git a/fs/aufs/i_op_del.c b/fs/aufs/i_op_del.c -new file mode 100644 -index 0000000..a9756ae ---- /dev/null -+++ b/fs/aufs/i_op_del.c -@@ -0,0 +1,463 @@ +diff -urN linux-2.6.30.org/fs/aufs/i_op_add.c linux-2.6.30/fs/aufs/i_op_add.c +--- linux-2.6.30.org/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/i_op_add.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,638 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -13621,94 +10870,103 @@ index 0000000..a9756ae + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* -+ * inode operations (del entry) ++ * inode operations (add entry) + */ + +#include "aufs.h" + +/* -+ * decide if a new whiteout for @dentry is necessary or not. -+ * when it is necessary, prepare the parent dir for the upper branch whose -+ * branch index is @bcpup for creation. the actual creation of the whiteout will -+ * be done by caller. -+ * return value: -+ * 0: wh is unnecessary -+ * plus: wh is necessary -+ * minus: error ++ * final procedure of adding a new entry, except link(2). ++ * remove whiteout, instantiate, copyup the parent dir's times and size ++ * and update version. ++ * if it failed, re-create the removed whiteout. + */ -+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup) ++static int epilog(struct inode *dir, aufs_bindex_t bindex, ++ struct dentry *wh_dentry, struct dentry *dentry) +{ -+ int need_wh, err; -+ aufs_bindex_t bstart; -+ struct super_block *sb; -+ -+ sb = dentry->d_sb; -+ bstart = au_dbstart(dentry); -+ if (*bcpup < 0) { -+ *bcpup = bstart; -+ if (au_test_ro(sb, bstart, dentry->d_inode)) { -+ err = AuWbrCopyup(au_sbi(sb), dentry); -+ *bcpup = err; -+ if (unlikely(err < 0)) -+ goto out; -+ } -+ } else -+ AuDebugOn(bstart < *bcpup -+ || au_test_ro(sb, *bcpup, dentry->d_inode)); -+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart); ++ int err, rerr; ++ aufs_bindex_t bwh; ++ struct path h_path; ++ struct inode *inode, *h_dir; ++ struct dentry *wh; + -+ if (*bcpup != bstart) { -+ err = au_cpup_dirs(dentry, *bcpup); ++ bwh = -1; ++ if (wh_dentry) { ++ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */ ++ IMustLock(h_dir); ++ AuDebugOn(au_h_iptr(dir, bindex) != h_dir); ++ bwh = au_dbwh(dentry); ++ h_path.dentry = wh_dentry; ++ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex); ++ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, ++ dentry); + if (unlikely(err)) + goto out; -+ need_wh = 1; -+ } else { -+ aufs_bindex_t old_bend, new_bend, bdiropq = -1; ++ } + -+ old_bend = au_dbend(dentry); -+ if (isdir) { -+ bdiropq = au_dbdiropq(dentry); -+ au_set_dbdiropq(dentry, -1); -+ } -+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0, -+ /*nd*/NULL); -+ err = need_wh; -+ if (isdir) -+ au_set_dbdiropq(dentry, bdiropq); -+ if (unlikely(err < 0)) -+ goto out; -+ new_bend = au_dbend(dentry); -+ if (!need_wh && old_bend != new_bend) { -+ au_set_h_dptr(dentry, new_bend, NULL); -+ au_set_dbend(dentry, old_bend); -+ } ++ inode = au_new_inode(dentry, /*must_new*/1); ++ if (!IS_ERR(inode)) { ++ d_instantiate(dentry, inode); ++ dir = dentry->d_parent->d_inode; /* dir inode is locked */ ++ IMustLock(dir); ++ if (au_ibstart(dir) == au_dbstart(dentry)) ++ au_cpup_attr_timesizes(dir); ++ dir->i_version++; ++ return 0; /* success */ + } -+ AuDbg("need_wh %d\n", need_wh); -+ err = need_wh; ++ ++ err = PTR_ERR(inode); ++ if (!wh_dentry) ++ goto out; ++ ++ /* revert */ ++ /* dir inode is locked */ ++ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent); ++ rerr = PTR_ERR(wh); ++ if (IS_ERR(wh)) { ++ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n", ++ AuDLNPair(dentry), err, rerr); ++ err = -EIO; ++ } else ++ dput(wh); + + out: + return err; +} + +/* -+ * simple tests for the del-entry operations. ++ * simple tests for the adding inode operations. + * following the checks in vfs, plus the parent-child relationship. + */ -+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, ++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, + struct dentry *h_parent, int isdir) +{ + int err; + umode_t h_mode; -+ struct dentry *h_dentry, *h_latest; ++ struct dentry *h_dentry; + struct inode *h_inode; + + h_dentry = au_h_dptr(dentry, bindex); + h_inode = h_dentry->d_inode; -+ if (dentry->d_inode) { -+ err = -ENOENT; ++ if (!dentry->d_inode) { ++ err = -EEXIST; ++ if (unlikely(h_inode)) ++ goto out; ++ } else { ++ /* rename(2) case */ ++ err = -EIO; + if (unlikely(!h_inode || !h_inode->i_nlink)) + goto out; + @@ -13716,373 +10974,537 @@ index 0000000..a9756ae + if (!isdir) { + err = -EISDIR; + if (unlikely(S_ISDIR(h_mode))) -+ goto out; -+ } else if (unlikely(!S_ISDIR(h_mode))) { -+ err = -ENOTDIR; -+ goto out; -+ } -+ } else { -+ /* rename(2) case */ -+ err = -EIO; -+ if (unlikely(h_inode)) -+ goto out; -+ } -+ -+ err = -ENOENT; -+ /* expected parent dir is locked */ -+ if (unlikely(h_parent != h_dentry->d_parent)) -+ goto out; -+ err = 0; -+ -+ /* -+ * rmdir a dir may break the consistency on some filesystem. -+ * let's try heavy test. -+ */ -+ err = -EACCES; -+ if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE))) -+ goto out; ++ goto out; ++ } else if (unlikely(!S_ISDIR(h_mode))) { ++ err = -ENOTDIR; ++ goto out; ++ } ++ } + -+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent, -+ au_sbr(dentry->d_sb, bindex)); + err = -EIO; -+ if (IS_ERR(h_latest)) ++ /* expected parent dir is locked */ ++ if (unlikely(h_parent != h_dentry->d_parent)) + goto out; -+ if (h_latest == h_dentry) -+ err = 0; -+ dput(h_latest); ++ err = 0; + + out: + return err; +} + +/* -+ * decide the branch where we operate for @dentry. the branch index will be set -+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent -+ * dir for reverting. -+ * when a new whiteout is necessary, create it. ++ * initial procedure of adding a new entry. ++ * prepare writable branch and the parent dir, lock it, ++ * and lookup whiteout for the new entry. + */ +static struct dentry* -+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup, -+ struct au_dtime *dt, struct au_pin *pin) ++lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt, ++ struct dentry *src_dentry, struct au_pin *pin, ++ struct au_wr_dir_args *wr_dir_args) +{ -+ struct dentry *wh_dentry; ++ struct dentry *wh_dentry, *h_parent; + struct super_block *sb; -+ struct path h_path; -+ int err, need_wh; ++ struct au_branch *br; ++ int err; + unsigned int udba; + aufs_bindex_t bcpup; + -+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup); -+ wh_dentry = ERR_PTR(need_wh); -+ if (unlikely(need_wh < 0)) ++ err = au_wr_dir(dentry, src_dentry, wr_dir_args); ++ bcpup = err; ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err < 0)) + goto out; + + sb = dentry->d_sb; + udba = au_opt_udba(sb); -+ bcpup = *rbcpup; + err = au_pin(pin, dentry, bcpup, udba, + AuPin_DI_LOCKED | AuPin_MNT_WRITE); + wh_dentry = ERR_PTR(err); + if (unlikely(err)) + goto out; + -+ h_path.dentry = au_pinned_h_parent(pin); ++ h_parent = au_pinned_h_parent(pin); + if (udba != AuOpt_UDBA_NONE + && au_dbstart(dentry) == bcpup) { -+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir); ++ err = au_may_add(dentry, bcpup, h_parent, ++ au_ftest_wrdir(wr_dir_args->flags, ISDIR)); + wh_dentry = ERR_PTR(err); + if (unlikely(err)) + goto out_unpin; + } + -+ h_path.mnt = au_sbr_mnt(sb, bcpup); -+ au_dtime_store(dt, au_pinned_parent(pin), &h_path); -+ wh_dentry = NULL; -+ if (!need_wh) -+ goto out; /* success, no need to create whiteout */ ++ br = au_sbr(sb, bcpup); ++ if (dt) { ++ struct path tmp = { ++ .dentry = h_parent, ++ .mnt = br->br_mnt ++ }; ++ au_dtime_store(dt, au_pinned_parent(pin), &tmp); ++ } + -+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry); -+ if (!IS_ERR(wh_dentry)) ++ wh_dentry = NULL; ++ if (bcpup != au_dbwh(dentry)) + goto out; /* success */ -+ /* returns with the parent is locked and wh_dentry is dget-ed */ ++ ++ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br); + + out_unpin: -+ au_unpin(pin); ++ if (IS_ERR(wh_dentry)) ++ au_unpin(pin); + out: + return wh_dentry; +} + -+/* -+ * when removing a dir, rename it to a unique temporary whiteout-ed name first -+ * in order to be revertible and save time for removing many child whiteouts -+ * under the dir. -+ * returns 1 when there are too many child whiteout and caller should remove -+ * them asynchronously. returns 0 when the number of children is enough small to -+ * remove now or the branch fs is a remote fs. -+ * otherwise return an error. -+ */ -+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex, -+ struct au_nhash *whlist, struct inode *dir) ++/* ---------------------------------------------------------------------- */ ++ ++enum { Mknod, Symlink, Creat }; ++struct simple_arg { ++ int type; ++ union { ++ struct { ++ int mode; ++ struct nameidata *nd; ++ } c; ++ struct { ++ const char *symname; ++ } s; ++ struct { ++ int mode; ++ dev_t dev; ++ } m; ++ } u; ++}; ++ ++static int add_simple(struct inode *dir, struct dentry *dentry, ++ struct simple_arg *arg) +{ -+ int rmdir_later, err, dirwh; -+ struct dentry *h_dentry; -+ struct super_block *sb; ++ int err; ++ aufs_bindex_t bstart; ++ unsigned char created; ++ struct au_dtime dt; ++ struct au_pin pin; ++ struct path h_path; ++ struct dentry *wh_dentry, *parent; ++ struct inode *h_dir; ++ struct au_wr_dir_args wr_dir_args = { ++ .force_btgt = -1, ++ .flags = AuWrDir_ADD_ENTRY ++ }; + -+ sb = dentry->d_sb; -+ h_dentry = au_h_dptr(dentry, bindex); -+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex)); -+ if (unlikely(err)) -+ goto out; ++ IMustLock(dir); + -+ /* stop monitoring */ -+ au_hin_free(au_hi(dentry->d_inode, bindex)); ++ parent = dentry->d_parent; /* dir inode is locked */ ++ aufs_read_lock(dentry, AuLock_DW); ++ di_write_lock_parent(parent); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin, ++ &wr_dir_args); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out; + -+ if (!au_test_fs_remote(h_dentry->d_sb)) { -+ dirwh = au_sbi(sb)->si_dirwh; -+ rmdir_later = (dirwh <= 1); -+ if (!rmdir_later) -+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex, -+ dirwh); -+ if (rmdir_later) -+ return rmdir_later; ++ bstart = au_dbstart(dentry); ++ h_path.dentry = au_h_dptr(dentry, bstart); ++ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart); ++ h_dir = au_pinned_h_dir(&pin); ++ switch (arg->type) { ++ case Creat: ++ err = vfsub_create(h_dir, &h_path, arg->u.c.mode); ++ break; ++ case Symlink: ++ err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname); ++ break; ++ case Mknod: ++ err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev); ++ break; ++ default: ++ BUG(); + } ++ created = !err; ++ if (!err) ++ err = epilog(dir, bstart, wh_dentry, dentry); + -+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist); -+ if (unlikely(err)) { -+ AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n", -+ AuDLNPair(h_dentry), bindex, err); -+ err = 0; ++ /* revert */ ++ if (unlikely(created && err && h_path.dentry->d_inode)) { ++ int rerr; ++ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0); ++ if (rerr) { ++ AuIOErr("%.*s revert failure(%d, %d)\n", ++ AuDLNPair(dentry), err, rerr); ++ err = -EIO; ++ } ++ au_dtime_revert(&dt); ++ d_drop(dentry); + } + ++ au_unpin(&pin); ++ dput(wh_dentry); ++ + out: ++ if (unlikely(err)) { ++ au_update_dbstart(dentry); ++ d_drop(dentry); ++ } ++ di_write_unlock(parent); ++ aufs_read_unlock(dentry, AuLock_DW); + return err; +} + -+/* -+ * final procedure for deleting a entry. -+ * maintain dentry and iattr. -+ */ -+static void epilog(struct inode *dir, struct dentry *dentry, -+ aufs_bindex_t bindex) ++int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ -+ struct inode *inode; ++ struct simple_arg arg = { ++ .type = Mknod, ++ .u.m = { ++ .mode = mode, ++ .dev = dev ++ } ++ }; ++ return add_simple(dir, dentry, &arg); ++} + -+ /* -+ * even if this is not a dir, -+ * set S_DEAD here since we need to detect the dead inode. -+ */ -+ inode = dentry->d_inode; -+ if (!inode->i_nlink) -+ inode->i_flags |= S_DEAD; -+ d_drop(dentry); -+ inode->i_ctime = dir->i_ctime; ++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) ++{ ++ struct simple_arg arg = { ++ .type = Symlink, ++ .u.s.symname = symname ++ }; ++ return add_simple(dir, dentry, &arg); ++} ++ ++int aufs_create(struct inode *dir, struct dentry *dentry, int mode, ++ struct nameidata *nd) ++{ ++ struct simple_arg arg = { ++ .type = Creat, ++ .u.c = { ++ .mode = mode, ++ .nd = nd ++ } ++ }; ++ return add_simple(dir, dentry, &arg); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct au_link_args { ++ aufs_bindex_t bdst, bsrc; ++ struct au_pin pin; ++ struct path h_path; ++ struct dentry *src_parent, *parent; ++}; ++ ++static int au_cpup_before_link(struct dentry *src_dentry, ++ struct au_link_args *a) ++{ ++ int err; ++ struct dentry *h_src_dentry; ++ struct mutex *h_mtx; ++ ++ di_read_lock_parent(a->src_parent, AuLock_IR); ++ err = au_test_and_cpup_dirs(src_dentry, a->bdst); ++ if (unlikely(err)) ++ goto out; ++ ++ h_src_dentry = au_h_dptr(src_dentry, a->bsrc); ++ h_mtx = &h_src_dentry->d_inode->i_mutex; ++ err = au_pin(&a->pin, src_dentry, a->bdst, ++ au_opt_udba(src_dentry->d_sb), ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ if (unlikely(err)) ++ goto out; ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ err = au_sio_cpup_simple(src_dentry, a->bdst, -1, ++ AuCpup_DTIME /* | AuCpup_KEEPLINO */); ++ mutex_unlock(h_mtx); ++ au_unpin(&a->pin); + -+ if (atomic_read(&dentry->d_count) == 1) { -+ au_set_h_dptr(dentry, au_dbstart(dentry), NULL); -+ au_update_dbstart(dentry); -+ } -+ if (au_ibstart(dir) == bindex) -+ au_cpup_attr_timesizes(dir); -+ dir->i_version++; ++ out: ++ di_read_unlock(a->src_parent, AuLock_IR); ++ return err; +} + -+/* -+ * when an error happened, remove the created whiteout and revert everything. -+ */ -+static int do_revert(int err, struct inode *dir, aufs_bindex_t bwh, -+ struct dentry *wh_dentry, struct dentry *dentry, -+ struct au_dtime *dt) ++static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a) +{ -+ int rerr; -+ struct path h_path = { -+ .dentry = wh_dentry, -+ .mnt = au_sbr_mnt(dir->i_sb, bwh) -+ }; ++ int err; ++ unsigned char plink; ++ struct inode *h_inode, *inode; ++ struct dentry *h_src_dentry; ++ struct super_block *sb; + -+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bwh), &h_path, dentry); -+ if (!rerr) { -+ au_set_dbwh(dentry, bwh); -+ au_dtime_revert(dt); -+ return 0; ++ plink = 0; ++ h_inode = NULL; ++ sb = src_dentry->d_sb; ++ inode = src_dentry->d_inode; ++ if (au_ibstart(inode) <= a->bdst) ++ h_inode = au_h_iptr(inode, a->bdst); ++ if (!h_inode || !h_inode->i_nlink) { ++ /* copyup src_dentry as the name of dentry. */ ++ au_set_dbstart(src_dentry, a->bdst); ++ au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry)); ++ h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode; ++ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); ++ err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc, -1, ++ AuCpup_KEEPLINO, a->parent); ++ mutex_unlock(&h_inode->i_mutex); ++ au_set_h_dptr(src_dentry, a->bdst, NULL); ++ au_set_dbstart(src_dentry, a->bsrc); ++ } else { ++ /* the inode of src_dentry already exists on a.bdst branch */ ++ h_src_dentry = d_find_alias(h_inode); ++ if (!h_src_dentry && au_plink_test(inode)) { ++ plink = 1; ++ h_src_dentry = au_plink_lkup(inode, a->bdst); ++ err = PTR_ERR(h_src_dentry); ++ if (IS_ERR(h_src_dentry)) ++ goto out; ++ ++ if (unlikely(!h_src_dentry->d_inode)) { ++ dput(h_src_dentry); ++ h_src_dentry = NULL; ++ } ++ ++ } ++ if (h_src_dentry) { ++ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), ++ &a->h_path); ++ dput(h_src_dentry); ++ } else { ++ AuIOErr("no dentry found for hi%lu on b%d\n", ++ h_inode->i_ino, a->bdst); ++ err = -EIO; ++ } + } + -+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n", -+ AuDLNPair(dentry), err, rerr); -+ return -EIO; -+} ++ if (!err && !plink) ++ au_plink_append(inode, a->bdst, a->h_path.dentry); + -+/* ---------------------------------------------------------------------- */ ++out: ++ return err; ++} + -+int aufs_unlink(struct inode *dir, struct dentry *dentry) ++int aufs_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry) +{ -+ int err; -+ aufs_bindex_t bwh, bindex, bstart; ++ int err, rerr; + struct au_dtime dt; -+ struct au_pin pin; -+ struct path h_path; -+ struct inode *inode, *h_dir; -+ struct dentry *parent, *wh_dentry; ++ struct au_link_args *a; ++ struct dentry *wh_dentry, *h_src_dentry; ++ struct inode *inode; ++ struct super_block *sb; ++ struct au_wr_dir_args wr_dir_args = { ++ /* .force_btgt = -1, */ ++ .flags = AuWrDir_ADD_ENTRY ++ }; + + IMustLock(dir); -+ inode = dentry->d_inode; -+ if (unlikely(!inode)) -+ return -ENOENT; /* possible? */ ++ inode = src_dentry->d_inode; + IMustLock(inode); + -+ aufs_read_lock(dentry, AuLock_DW); -+ parent = dentry->d_parent; /* dir inode is locked */ -+ di_write_lock_parent(parent); ++ err = -ENOENT; ++ if (unlikely(!inode->i_nlink)) ++ goto out; + -+ bstart = au_dbstart(dentry); -+ bwh = au_dbwh(dentry); -+ bindex = -1; -+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) ++ err = -ENOMEM; ++ a = kzalloc(sizeof(*a), GFP_NOFS); ++ if (unlikely(!a)) + goto out; + -+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart); -+ h_path.dentry = au_h_dptr(dentry, bstart); -+ dget(h_path.dentry); -+ if (bindex == bstart) { -+ h_dir = au_pinned_h_dir(&pin); -+ err = vfsub_unlink(h_dir, &h_path, /*force*/0); -+ } else { -+ /* dir inode is locked */ -+ h_dir = wh_dentry->d_parent->d_inode; -+ IMustLock(h_dir); -+ err = 0; -+ } ++ a->parent = dentry->d_parent; /* dir inode is locked */ ++ aufs_read_and_write_lock2(dentry, src_dentry, /*AuLock_FLUSH*/0); ++ a->src_parent = dget_parent(src_dentry); ++ wr_dir_args.force_btgt = au_dbstart(src_dentry); + -+ if (!err) { -+ drop_nlink(inode); -+ epilog(dir, dentry, bindex); ++ di_write_lock_parent(a->parent); ++ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin, ++ &wr_dir_args); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out_unlock; + -+ /* update target timestamps */ -+ if (bindex == bstart) { -+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/ -+ inode->i_ctime = h_path.dentry->d_inode->i_ctime; -+ } else -+ /* todo: this timestamp may be reverted later */ -+ inode->i_ctime = h_dir->i_ctime; -+ goto out_unlock; /* success */ ++ err = 0; ++ sb = dentry->d_sb; ++ a->bdst = au_dbstart(dentry); ++ a->h_path.dentry = au_h_dptr(dentry, a->bdst); ++ a->h_path.mnt = au_sbr_mnt(sb, a->bdst); ++ a->bsrc = au_dbstart(src_dentry); ++ if (au_opt_test(au_mntflags(sb), PLINK)) { ++ if (a->bdst < a->bsrc ++ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) ++ err = au_cpup_or_link(src_dentry, a); ++ else { ++ h_src_dentry = au_h_dptr(src_dentry, a->bdst); ++ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), ++ &a->h_path); ++ } ++ } else { ++ /* ++ * copyup src_dentry to the branch we process, ++ * and then link(2) to it. ++ */ ++ if (a->bdst < a->bsrc ++ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) { ++ au_unpin(&a->pin); ++ di_write_unlock(a->parent); ++ err = au_cpup_before_link(src_dentry, a); ++ if (!err) { ++ di_write_lock_parent(a->parent); ++ err = au_pin(&a->pin, dentry, a->bdst, ++ au_opt_udba(sb), ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ if (unlikely(err)) ++ goto out_wh; ++ } ++ } ++ if (!err) { ++ h_src_dentry = au_h_dptr(src_dentry, a->bdst); ++ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), ++ &a->h_path); ++ } + } ++ if (unlikely(err)) ++ goto out_unpin; + -+ /* revert */ + if (wh_dentry) { -+ int rerr; -+ -+ rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt); -+ if (rerr) -+ err = rerr; ++ a->h_path.dentry = wh_dentry; ++ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path, ++ dentry); ++ if (unlikely(err)) ++ goto out_revert; + } + -+ out_unlock: -+ au_unpin(&pin); ++ dir->i_version++; ++ if (au_ibstart(dir) == au_dbstart(dentry)) ++ au_cpup_attr_timesizes(dir); ++ inc_nlink(inode); ++ inode->i_ctime = dir->i_ctime; ++ if (!d_unhashed(a->h_path.dentry)) ++ d_instantiate(dentry, au_igrab(inode)); ++ else ++ /* some filesystem calls d_drop() */ ++ d_drop(dentry); ++ goto out_unpin; /* success */ ++ ++ out_revert: ++ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0); ++ if (!rerr) ++ goto out_dt; ++ AuIOErr("%.*s reverting failed(%d, %d)\n", ++ AuDLNPair(dentry), err, rerr); ++ err = -EIO; ++ out_dt: ++ d_drop(dentry); ++ au_dtime_revert(&dt); ++ out_unpin: ++ au_unpin(&a->pin); ++ out_wh: + dput(wh_dentry); -+ dput(h_path.dentry); ++ out_unlock: ++ if (unlikely(err)) { ++ au_update_dbstart(dentry); ++ d_drop(dentry); ++ } ++ di_write_unlock(a->parent); ++ dput(a->src_parent); ++ aufs_read_and_write_unlock2(dentry, src_dentry); ++ kfree(a); + out: -+ di_write_unlock(parent); -+ aufs_read_unlock(dentry, AuLock_DW); + return err; +} + -+int aufs_rmdir(struct inode *dir, struct dentry *dentry) -+{ -+ int err, rmdir_later; -+ aufs_bindex_t bwh, bindex, bstart; -+ struct au_dtime dt; ++int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode) ++{ ++ int err, rerr; ++ aufs_bindex_t bindex; ++ unsigned char diropq; + struct au_pin pin; -+ struct inode *inode; -+ struct dentry *parent, *wh_dentry, *h_dentry; -+ struct au_whtmp_rmdir *args; ++ struct path h_path; ++ struct dentry *wh_dentry, *parent, *opq_dentry; ++ struct mutex *h_mtx; ++ struct super_block *sb; ++ struct au_dtime dt; ++ struct au_wr_dir_args wr_dir_args = { ++ .force_btgt = -1, ++ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR ++ }; + + IMustLock(dir); -+ inode = dentry->d_inode; -+ err = -ENOENT; /* possible? */ -+ if (unlikely(!inode)) -+ goto out; -+ IMustLock(inode); -+ -+ err = -ENOMEM; -+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS); -+ if (unlikely(!args)) -+ goto out; + -+ aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH); ++ aufs_read_lock(dentry, AuLock_DW); + parent = dentry->d_parent; /* dir inode is locked */ + di_write_lock_parent(parent); -+ err = au_test_empty(dentry, args->whlist); -+ if (unlikely(err)) -+ goto out_args; -+ -+ bstart = au_dbstart(dentry); -+ bwh = au_dbwh(dentry); -+ bindex = -1; -+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin, ++ &wr_dir_args); + err = PTR_ERR(wh_dentry); + if (IS_ERR(wh_dentry)) -+ goto out_args; ++ goto out; + -+ h_dentry = au_h_dptr(dentry, bstart); -+ dget(h_dentry); -+ rmdir_later = 0; -+ if (bindex == bstart) { -+ err = renwh_and_rmdir(dentry, bstart, args->whlist, dir); -+ if (err > 0) { -+ rmdir_later = err; -+ err = 0; -+ } -+ } else { -+ /* stop monitoring */ -+ au_hin_free(au_hi(inode, bstart)); ++ sb = dentry->d_sb; ++ bindex = au_dbstart(dentry); ++ h_path.dentry = au_h_dptr(dentry, bindex); ++ h_path.mnt = au_sbr_mnt(sb, bindex); ++ err = vfsub_mkdir(au_pinned_h_dir(&pin), &h_path, mode); ++ if (unlikely(err)) ++ goto out_unlock; + -+ /* dir inode is locked */ -+ IMustLock(wh_dentry->d_parent->d_inode); -+ err = 0; ++ /* make the dir opaque */ ++ diropq = 0; ++ h_mtx = &h_path.dentry->d_inode->i_mutex; ++ if (wh_dentry ++ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) { ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ opq_dentry = au_diropq_create(dentry, bindex); ++ mutex_unlock(h_mtx); ++ err = PTR_ERR(opq_dentry); ++ if (IS_ERR(opq_dentry)) ++ goto out_dir; ++ dput(opq_dentry); ++ diropq = 1; + } + ++ err = epilog(dir, bindex, wh_dentry, dentry); + if (!err) { -+ clear_nlink(inode); -+ au_set_dbdiropq(dentry, -1); -+ epilog(dir, dentry, bindex); -+ -+ if (rmdir_later) { -+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args); -+ args = NULL; -+ } -+ ++ inc_nlink(dir); + goto out_unlock; /* success */ + } + + /* revert */ -+ AuLabel(revert); -+ if (wh_dentry) { -+ int rerr; -+ -+ rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt); -+ if (rerr) -+ err = rerr; ++ if (diropq) { ++ AuLabel(revert opq); ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ rerr = au_diropq_remove(dentry, bindex); ++ mutex_unlock(h_mtx); ++ if (rerr) { ++ AuIOErr("%.*s reverting diropq failed(%d, %d)\n", ++ AuDLNPair(dentry), err, rerr); ++ err = -EIO; ++ } + } + ++ out_dir: ++ AuLabel(revert dir); ++ rerr = vfsub_rmdir(au_pinned_h_dir(&pin), &h_path); ++ if (rerr) { ++ AuIOErr("%.*s reverting dir failed(%d, %d)\n", ++ AuDLNPair(dentry), err, rerr); ++ err = -EIO; ++ } ++ d_drop(dentry); ++ au_dtime_revert(&dt); + out_unlock: + au_unpin(&pin); + dput(wh_dentry); -+ dput(h_dentry); -+ out_args: ++ out: ++ if (unlikely(err)) { ++ au_update_dbstart(dentry); ++ d_drop(dentry); ++ } + di_write_unlock(parent); + aufs_read_unlock(dentry, AuLock_DW); -+ if (args) -+ au_whtmp_rmdir_free(args); -+ out: + return err; +} -diff --git a/fs/aufs/i_op_ren.c b/fs/aufs/i_op_ren.c -new file mode 100644 -index 0000000..3617d94 ---- /dev/null -+++ b/fs/aufs/i_op_ren.c -@@ -0,0 +1,940 @@ +diff -urN linux-2.6.30.org/fs/aufs/i_op.c linux-2.6.30/fs/aufs/i_op.c +--- linux-2.6.30.org/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/i_op.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,877 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -14090,945 +11512,880 @@ index 0000000..3617d94 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* -+ * inode operation (rename entry) -+ * todo: this is crazy monster -+ */ -+ -+#include "aufs.h" -+ -+enum { AuSRC, AuDST, AuSrcDst }; -+enum { AuPARENT, AuCHILD, AuParentChild }; -+ -+#define AuRen_ISDIR 1 -+#define AuRen_ISSAMEDIR (1 << 1) -+#define AuRen_WHSRC (1 << 2) -+#define AuRen_WHDST (1 << 3) -+#define AuRen_MNT_WRITE (1 << 4) -+#define AuRen_DT_DSTDIR (1 << 5) -+#define AuRen_DIROPQ (1 << 6) -+#define AuRen_CPUP (1 << 7) -+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name) -+#define au_fset_ren(flags, name) { (flags) |= AuRen_##name; } -+#define au_fclr_ren(flags, name) { (flags) &= ~AuRen_##name; } -+ -+struct au_ren_args { -+ struct { -+ struct dentry *dentry, *h_dentry, *parent, *h_parent, -+ *wh_dentry; -+ struct inode *dir, *inode; -+ struct au_hinode *hdir; -+ struct au_dtime dt[AuParentChild]; -+ aufs_bindex_t bstart; -+ } sd[AuSrcDst]; -+ -+#define src_dentry sd[AuSRC].dentry -+#define src_dir sd[AuSRC].dir -+#define src_inode sd[AuSRC].inode -+#define src_h_dentry sd[AuSRC].h_dentry -+#define src_parent sd[AuSRC].parent -+#define src_h_parent sd[AuSRC].h_parent -+#define src_wh_dentry sd[AuSRC].wh_dentry -+#define src_hdir sd[AuSRC].hdir -+#define src_h_dir sd[AuSRC].hdir->hi_inode -+#define src_dt sd[AuSRC].dt -+#define src_bstart sd[AuSRC].bstart -+ -+#define dst_dentry sd[AuDST].dentry -+#define dst_dir sd[AuDST].dir -+#define dst_inode sd[AuDST].inode -+#define dst_h_dentry sd[AuDST].h_dentry -+#define dst_parent sd[AuDST].parent -+#define dst_h_parent sd[AuDST].h_parent -+#define dst_wh_dentry sd[AuDST].wh_dentry -+#define dst_hdir sd[AuDST].hdir -+#define dst_h_dir sd[AuDST].hdir->hi_inode -+#define dst_dt sd[AuDST].dt -+#define dst_bstart sd[AuDST].bstart -+ -+ struct dentry *h_trap; -+ struct au_branch *br; -+ struct au_hinode *src_hinode; -+ struct path h_path; -+ struct au_nhash *whlist; -+ aufs_bindex_t btgt; -+ -+ unsigned int flags; -+ -+ struct au_whtmp_rmdir *thargs; -+ struct dentry *h_dst; -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * functions for reverting. -+ * when an error happened in a single rename systemcall, we should revert -+ * everything as if nothing happend. -+ * we don't need to revert the copied-up/down the parent dir since they are -+ * harmless. ++ * inode operations (except add/del/rename) + */ + -+#define RevertFailure(fmt, args...) do { \ -+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \ -+ ##args, err, rerr); \ -+ err = -EIO; \ -+} while (0) -+ -+static void au_ren_rev_diropq(int err, struct au_ren_args *a) -+{ -+ int rerr; -+ -+ au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD); -+ rerr = au_diropq_remove(a->src_dentry, a->btgt); -+ au_hin_imtx_unlock(a->src_hinode); -+ if (rerr) -+ RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry)); -+} -+ -+ -+static void au_ren_rev_rename(int err, struct au_ren_args *a) -+{ -+ int rerr; -+ -+ a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent, -+ a->br, /*nd*/NULL); -+ rerr = PTR_ERR(a->h_path.dentry); -+ if (IS_ERR(a->h_path.dentry)) { -+ RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry)); -+ return; -+ } -+ -+ rerr = vfsub_rename(a->dst_h_dir, -+ au_h_dptr(a->src_dentry, a->btgt), -+ a->src_h_dir, &a->h_path); -+ d_drop(a->h_path.dentry); -+ dput(a->h_path.dentry); -+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */ -+ if (rerr) -+ RevertFailure("rename %.*s", AuDLNPair(a->src_dentry)); -+} -+ -+static void au_ren_rev_cpup(int err, struct au_ren_args *a) -+{ -+ int rerr; -+ -+ a->h_path.dentry = a->dst_h_dentry; -+ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0); -+ au_set_h_dptr(a->src_dentry, a->btgt, NULL); -+ au_set_dbstart(a->src_dentry, a->src_bstart); -+ if (rerr) -+ RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry)); -+} -+ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "aufs.h" + -+static void au_ren_rev_whtmp(int err, struct au_ren_args *a) ++static int h_permission(struct inode *h_inode, int mask, ++ struct vfsmount *h_mnt, int brperm) +{ -+ int rerr; ++ int err; ++ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND)); + -+ a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent, -+ a->br, /*nd*/NULL); -+ rerr = PTR_ERR(a->h_path.dentry); -+ if (IS_ERR(a->h_path.dentry)) { -+ RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry)); -+ return; -+ } -+ if (a->h_path.dentry->d_inode) { -+ d_drop(a->h_path.dentry); -+ dput(a->h_path.dentry); -+ return; ++ err = -EACCES; ++ if ((write_mask && IS_IMMUTABLE(h_inode)) ++ || ((mask & MAY_EXEC) ++ && S_ISREG(h_inode->i_mode) ++ && ((h_mnt->mnt_flags & MNT_NOEXEC) ++ || !(h_inode->i_mode & S_IXUGO)))) ++ goto out; ++ ++ /* ++ * - skip the lower fs test in the case of write to ro branch. ++ * - nfs dir permission write check is optimized, but a policy for ++ * link/rename requires a real check. ++ */ ++ if ((write_mask && !au_br_writable(brperm)) ++ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode) ++ && write_mask && !(mask & MAY_READ)) ++ || !h_inode->i_op->permission) { ++ /* AuLabel(generic_permission); */ ++ err = generic_permission(h_inode, mask, NULL); ++ } else { ++ /* AuLabel(h_inode->permission); */ ++ err = h_inode->i_op->permission(h_inode, mask); ++ AuTraceErr(err); + } + -+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path); -+ d_drop(a->h_path.dentry); -+ dput(a->h_path.dentry); -+ if (!rerr) { -+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL); -+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst)); -+ } else -+ RevertFailure("rename %.*s", AuDLNPair(a->h_dst)); ++ if (!err) ++ err = devcgroup_inode_permission(h_inode, mask); ++ if (!err) ++ err = security_inode_permission ++ (h_inode, mask & (MAY_READ | MAY_WRITE | MAY_EXEC ++ | MAY_APPEND)); ++ ++ out: ++ return err; +} + -+static void au_ren_rev_whsrc(int err, struct au_ren_args *a) ++static int aufs_permission(struct inode *inode, int mask) +{ -+ int rerr; ++ int err; ++ aufs_bindex_t bindex, bend; ++ const unsigned char isdir = !!S_ISDIR(inode->i_mode); ++ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND)); ++ struct inode *h_inode; ++ struct super_block *sb; ++ struct au_branch *br; + -+ a->h_path.dentry = a->src_wh_dentry; -+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry); -+ if (rerr) -+ RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry)); -+} ++ sb = inode->i_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ ii_read_lock_child(inode); + -+static void au_ren_rev_drop(struct au_ren_args *a) -+{ -+ struct dentry *d, *h_d; -+ int i; -+ aufs_bindex_t bend, bindex; ++ if (!isdir || write_mask) { ++ h_inode = au_h_iptr(inode, au_ibstart(inode)); ++ AuDebugOn(!h_inode ++ || ((h_inode->i_mode & S_IFMT) ++ != (inode->i_mode & S_IFMT))); ++ err = 0; ++ bindex = au_ibstart(inode); ++ br = au_sbr(sb, bindex); ++ err = h_permission(h_inode, mask, br->br_mnt, br->br_perm); + -+ for (i = 0; i < AuSrcDst; i++) { -+ d = a->sd[i].dentry; -+ d_drop(d); -+ bend = au_dbend(d); -+ for (bindex = au_dbstart(d); bindex <= bend; bindex++) { -+ h_d = au_h_dptr(d, bindex); -+ if (h_d) -+ d_drop(h_d); ++ if (write_mask && !err) { ++ /* test whether the upper writable branch exists */ ++ err = -EROFS; ++ for (; bindex >= 0; bindex--) ++ if (!au_br_rdonly(au_sbr(sb, bindex))) { ++ err = 0; ++ break; ++ } + } ++ goto out; + } + -+ au_update_dbstart(a->dst_dentry); -+ if (a->thargs) -+ d_drop(a->h_dst); ++ /* non-write to dir */ ++ err = 0; ++ bend = au_ibend(inode); ++ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) { ++ h_inode = au_h_iptr(inode, bindex); ++ if (h_inode) { ++ AuDebugOn(!S_ISDIR(h_inode->i_mode)); ++ br = au_sbr(sb, bindex); ++ err = h_permission(h_inode, mask, br->br_mnt, ++ br->br_perm); ++ } ++ } ++ ++ out: ++ ii_read_unlock(inode); ++ si_read_unlock(sb); ++ return err; +} -+#undef RevertFailure + +/* ---------------------------------------------------------------------- */ + -+/* -+ * when we have to copyup the renaming entry, do it with the rename-target name -+ * in order to minimize the cost (the later actual rename is unnecessary). -+ * otherwise rename it on the target branch. -+ */ -+static int au_ren_or_cpup(struct au_ren_args *a) ++static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry, ++ struct nameidata *nd) +{ -+ int err; -+ struct dentry *d; ++ struct dentry *ret, *parent; ++ struct inode *inode, *h_inode; ++ struct mutex *mtx; ++ struct super_block *sb; ++ int err, npositive; ++ aufs_bindex_t bstart; + -+ d = a->src_dentry; -+ if (au_dbstart(d) == a->btgt) { -+ a->h_path.dentry = a->dst_h_dentry; -+ if (au_ftest_ren(a->flags, DIROPQ) -+ && au_dbdiropq(d) == a->btgt) -+ au_fclr_ren(a->flags, DIROPQ); -+ AuDebugOn(au_dbstart(d) != a->btgt); -+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt), -+ a->dst_h_dir, &a->h_path); -+ } else { -+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex; ++ /* temporary workaround for a bug in NFSD readdir */ ++ if (!au_test_nfsd(current)) ++ IMustLock(dir); ++ else ++ WARN_ONCE(!mutex_is_locked(&dir->i_mutex), ++ "a known problem of NFSD readdir since 2.6.28\n"); + -+ au_fset_ren(a->flags, CPUP); -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ au_set_dbstart(d, a->btgt); -+ au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry)); -+ err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1, -+ !AuCpup_DTIME, a->dst_parent); -+ if (unlikely(err)) { -+ au_set_h_dptr(d, a->btgt, NULL); -+ au_set_dbstart(d, a->src_bstart); -+ } -+ mutex_unlock(h_mtx); ++ sb = dir->i_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ err = au_alloc_dinfo(dentry); ++ ret = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out; ++ ++ parent = dentry->d_parent; /* dir inode is locked */ ++ di_read_lock_parent(parent, AuLock_IR); ++ npositive = au_lkup_dentry(dentry, au_dbstart(parent), /*type*/0, nd); ++ di_read_unlock(parent, AuLock_IR); ++ err = npositive; ++ ret = ERR_PTR(err); ++ if (unlikely(err < 0)) ++ goto out_unlock; ++ ++ inode = NULL; ++ if (npositive) { ++ bstart = au_dbstart(dentry); ++ h_inode = au_h_dptr(dentry, bstart)->d_inode; ++ if (!S_ISDIR(h_inode->i_mode)) { ++ /* ++ * stop 'race'-ing between hardlinks under different ++ * parents. ++ */ ++ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx; ++ mutex_lock(mtx); ++ inode = au_new_inode(dentry, /*must_new*/0); ++ mutex_unlock(mtx); ++ } else ++ inode = au_new_inode(dentry, /*must_new*/0); ++ ret = (void *)inode; + } ++ if (IS_ERR(inode)) ++ goto out_unlock; + -+ return err; ++ ret = d_splice_alias(inode, dentry); ++ if (unlikely(IS_ERR(ret) && inode)) ++ ii_write_unlock(inode); ++ au_store_oflag(nd, inode); ++ ++ out_unlock: ++ di_write_unlock(dentry); ++ out: ++ si_read_unlock(sb); ++ return ret; +} + -+/* cf. aufs_rmdir() */ -+static int au_ren_del_whtmp(struct au_ren_args *a) ++/* ---------------------------------------------------------------------- */ ++ ++static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent, ++ const unsigned char add_entry, aufs_bindex_t bcpup, ++ aufs_bindex_t bstart) +{ + int err; -+ struct inode *dir; ++ struct dentry *h_parent; ++ struct inode *h_dir; + -+ dir = a->dst_dir; -+ if (!au_nhash_test_longer_wh(a->whlist, a->btgt, -+ au_sbi(dir->i_sb)->si_dirwh) -+ || au_test_fs_remote(a->h_dst->d_sb)) { -+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, a->whlist); -+ if (unlikely(err)) -+ AuWarn("failed removing whtmp dir %.*s (%d), " -+ "ignored.\n", AuDLNPair(a->h_dst), err); -+ } else { -+ au_nhash_wh_free(a->thargs->whlist, /*bend*/0); -+ a->thargs->whlist = a->whlist; -+ a->whlist = NULL; -+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs); -+ dput(a->h_dst); -+ a->thargs = NULL; ++ if (add_entry) { ++ au_update_dbstart(dentry); ++ IMustLock(parent->d_inode); ++ } else ++ di_write_lock_parent(parent); ++ ++ err = 0; ++ if (!au_h_dptr(parent, bcpup)) { ++ if (bstart < bcpup) ++ err = au_cpdown_dirs(dentry, bcpup); ++ else ++ err = au_cpup_dirs(dentry, bcpup); ++ } ++ if (!err && add_entry) { ++ h_parent = au_h_dptr(parent, bcpup); ++ h_dir = h_parent->d_inode; ++ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT); ++ err = au_lkup_neg(dentry, bcpup); ++ /* todo: no unlock here */ ++ mutex_unlock(&h_dir->i_mutex); ++ if (bstart < bcpup && au_dbstart(dentry) < 0) { ++ au_set_dbstart(dentry, 0); ++ au_update_dbrange(dentry, /*do_put_zero*/0); ++ } + } + -+ return 0; -+} -+ -+/* make it 'opaque' dir. */ -+static int au_ren_diropq(struct au_ren_args *a) -+{ -+ int err; -+ struct dentry *diropq; -+ -+ err = 0; -+ a->src_hinode = au_hi(a->src_inode, a->btgt); -+ au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD); -+ diropq = au_diropq_create(a->src_dentry, a->btgt); -+ au_hin_imtx_unlock(a->src_hinode); -+ if (IS_ERR(diropq)) -+ err = PTR_ERR(diropq); -+ dput(diropq); ++ if (!add_entry) ++ di_write_unlock(parent); ++ if (!err) ++ err = bcpup; /* success */ + + return err; +} + -+static int do_rename(struct au_ren_args *a) ++/* ++ * decide the branch and the parent dir where we will create a new entry. ++ * returns new bindex or an error. ++ * copyup the parent dir if needed. ++ */ ++int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, ++ struct au_wr_dir_args *args) +{ + int err; -+ struct dentry *d, *h_d; ++ aufs_bindex_t bcpup, bstart, src_bstart; ++ const unsigned char add_entry = !!au_ftest_wrdir(args->flags, ++ ADD_ENTRY); ++ struct super_block *sb; ++ struct dentry *parent; ++ struct au_sbinfo *sbinfo; + -+ /* prepare workqueue args for asynchronous rmdir */ -+ h_d = a->dst_h_dentry; -+ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) { -+ err = -ENOMEM; -+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS); -+ if (unlikely(!a->thargs)) -+ goto out; -+ a->h_dst = dget(h_d); -+ } ++ sb = dentry->d_sb; ++ sbinfo = au_sbi(sb); ++ parent = dget_parent(dentry); ++ bstart = au_dbstart(dentry); ++ bcpup = bstart; ++ if (args->force_btgt < 0) { ++ if (src_dentry) { ++ src_bstart = au_dbstart(src_dentry); ++ if (src_bstart < bstart) ++ bcpup = src_bstart; ++ } else if (add_entry) { ++ err = AuWbrCreate(sbinfo, dentry, ++ au_ftest_wrdir(args->flags, ISDIR)); ++ bcpup = err; ++ } + -+ /* create whiteout for src_dentry */ -+ if (au_ftest_ren(a->flags, WHSRC)) { -+ a->src_wh_dentry -+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent); -+ err = PTR_ERR(a->src_wh_dentry); -+ if (IS_ERR(a->src_wh_dentry)) -+ goto out_thargs; ++ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) { ++ if (add_entry) ++ err = AuWbrCopyup(sbinfo, dentry); ++ else { ++ if (!IS_ROOT(dentry)) { ++ di_read_lock_parent(parent, !AuLock_IR); ++ err = AuWbrCopyup(sbinfo, dentry); ++ di_read_unlock(parent, !AuLock_IR); ++ } else ++ err = AuWbrCopyup(sbinfo, dentry); ++ } ++ bcpup = err; ++ if (unlikely(err < 0)) ++ goto out; ++ } ++ } else { ++ bcpup = args->force_btgt; ++ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode)); + } ++ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup); ++ if (bstart < bcpup) ++ au_update_dbrange(dentry, /*do_put_zero*/1); + -+ /* lookup whiteout for dentry */ -+ if (au_ftest_ren(a->flags, WHDST)) { -+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name, -+ a->br); -+ err = PTR_ERR(h_d); -+ if (IS_ERR(h_d)) -+ goto out_whsrc; -+ if (!h_d->d_inode) -+ dput(h_d); -+ else -+ a->dst_wh_dentry = h_d; -+ } ++ err = bcpup; ++ if (bcpup == bstart) ++ goto out; /* success */ + -+ /* rename dentry to tmpwh */ -+ if (a->thargs) { -+ err = au_whtmp_ren(a->dst_h_dentry, a->br); -+ if (unlikely(err)) -+ goto out_whdst; ++ /* copyup the new parent into the branch we process */ ++ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart); + -+ d = a->dst_dentry; -+ au_set_h_dptr(d, a->btgt, NULL); -+ err = au_lkup_neg(d, a->btgt); -+ if (unlikely(err)) -+ goto out_whtmp; -+ a->dst_h_dentry = au_h_dptr(d, a->btgt); -+ } ++ out: ++ dput(parent); ++ return err; ++} + -+ /* cpup src */ -+ if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) { -+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex; ++/* ---------------------------------------------------------------------- */ + -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1, -+ !AuCpup_DTIME); -+ mutex_unlock(h_mtx); -+ if (unlikely(err)) -+ goto out_whtmp; -+ } ++struct dentry *au_pinned_h_parent(struct au_pin *pin) ++{ ++ if (pin && pin->parent) ++ return au_h_dptr(pin->parent, pin->bindex); ++ return NULL; ++} + -+ /* rename by vfs_rename or cpup */ -+ d = a->dst_dentry; -+ if (au_ftest_ren(a->flags, ISDIR) -+ && (a->dst_wh_dentry -+ || au_dbdiropq(d) == a->btgt -+ /* hide the lower to keep xino */ -+ || a->btgt < au_dbend(d) -+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ))) -+ au_fset_ren(a->flags, DIROPQ); -+ err = au_ren_or_cpup(a); -+ if (unlikely(err)) -+ /* leave the copied-up one */ -+ goto out_whtmp; ++void au_unpin(struct au_pin *p) ++{ ++ if (au_ftest_pin(p->flags, MNT_WRITE)) ++ mnt_drop_write(p->h_mnt); ++ if (!p->hdir) ++ return; + -+ /* make dir opaque */ -+ if (au_ftest_ren(a->flags, DIROPQ)) { -+ err = au_ren_diropq(a); -+ if (unlikely(err)) -+ goto out_rename; -+ } ++ au_hin_imtx_unlock(p->hdir); ++ if (!au_ftest_pin(p->flags, DI_LOCKED)) ++ di_read_unlock(p->parent, AuLock_IR); ++ iput(p->hdir->hi_inode); ++ dput(p->parent); ++ p->parent = NULL; ++ p->hdir = NULL; ++ p->h_mnt = NULL; ++} + -+ /* update target timestamps */ -+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt); -+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt); -+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/ -+ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime; ++int au_do_pin(struct au_pin *p) ++{ ++ int err; ++ struct super_block *sb; ++ struct dentry *h_dentry, *h_parent; ++ struct au_branch *br; ++ struct inode *h_dir; + -+ /* remove whiteout for dentry */ -+ if (a->dst_wh_dentry) { -+ a->h_path.dentry = a->dst_wh_dentry; -+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path, -+ a->dst_dentry); -+ if (unlikely(err)) -+ goto out_diropq; ++ err = 0; ++ sb = p->dentry->d_sb; ++ br = au_sbr(sb, p->bindex); ++ if (IS_ROOT(p->dentry)) { ++ if (au_ftest_pin(p->flags, MNT_WRITE)) { ++ p->h_mnt = br->br_mnt; ++ err = mnt_want_write(p->h_mnt); ++ if (unlikely(err)) { ++ au_fclr_pin(p->flags, MNT_WRITE); ++ goto out_err; ++ } ++ } ++ goto out; + } + -+ /* remove whtmp */ -+ if (a->thargs) -+ au_ren_del_whtmp(a); /* ignore this error */ ++ h_dentry = NULL; ++ if (p->bindex <= au_dbend(p->dentry)) ++ h_dentry = au_h_dptr(p->dentry, p->bindex); + -+ err = 0; -+ goto out_success; ++ p->parent = dget_parent(p->dentry); ++ if (!au_ftest_pin(p->flags, DI_LOCKED)) ++ di_read_lock(p->parent, AuLock_IR, p->lsc_di); + -+ out_diropq: -+ if (au_ftest_ren(a->flags, DIROPQ)) -+ au_ren_rev_diropq(err, a); -+ out_rename: -+ if (!au_ftest_ren(a->flags, CPUP)) -+ au_ren_rev_rename(err, a); -+ else -+ au_ren_rev_cpup(err, a); -+ out_whtmp: -+ if (a->thargs) -+ au_ren_rev_whtmp(err, a); -+ out_whdst: -+ dput(a->dst_wh_dentry); -+ a->dst_wh_dentry = NULL; -+ out_whsrc: -+ if (a->src_wh_dentry) -+ au_ren_rev_whsrc(err, a); -+ au_ren_rev_drop(a); -+ out_success: -+ dput(a->src_wh_dentry); -+ dput(a->dst_wh_dentry); -+ out_thargs: -+ if (a->thargs) { -+ dput(a->h_dst); -+ au_whtmp_rmdir_free(a->thargs); -+ a->thargs = NULL; ++ h_dir = NULL; ++ h_parent = au_h_dptr(p->parent, p->bindex); ++ p->hdir = au_hi(p->parent->d_inode, p->bindex); ++ if (p->hdir) ++ h_dir = p->hdir->hi_inode; ++ ++ /* udba case */ ++ if (unlikely(!p->hdir || !h_dir)) { ++ if (!au_ftest_pin(p->flags, DI_LOCKED)) ++ di_read_unlock(p->parent, AuLock_IR); ++ dput(p->parent); ++ p->parent = NULL; ++ goto out_err; ++ } ++ ++ au_igrab(h_dir); ++ au_hin_imtx_lock_nested(p->hdir, p->lsc_hi); ++ ++ if (h_dentry) { ++ err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br); ++ if (unlikely(err)) { ++ au_fclr_pin(p->flags, MNT_WRITE); ++ goto out_unpin; ++ } + } ++ ++ if (au_ftest_pin(p->flags, MNT_WRITE)) { ++ p->h_mnt = br->br_mnt; ++ err = mnt_want_write(p->h_mnt); ++ if (unlikely(err)) { ++ au_fclr_pin(p->flags, MNT_WRITE); ++ goto out_unpin; ++ } ++ } ++ goto out; /* success */ ++ ++ out_unpin: ++ au_unpin(p); ++ out_err: ++ AuErr("err %d\n", err); ++ err = au_busy_or_stale(); + out: + return err; +} + -+/* ---------------------------------------------------------------------- */ ++void au_pin_init(struct au_pin *p, struct dentry *dentry, ++ aufs_bindex_t bindex, int lsc_di, int lsc_hi, ++ unsigned int udba, unsigned char flags) ++{ ++ p->dentry = dentry; ++ p->udba = udba; ++ p->lsc_di = lsc_di; ++ p->lsc_hi = lsc_hi; ++ p->flags = flags; ++ p->bindex = bindex; + -+/* -+ * test if @dentry dir can be rename destination or not. -+ * success means, it is a logically empty dir. -+ */ -+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist) ++ p->parent = NULL; ++ p->hdir = NULL; ++ p->h_mnt = NULL; ++} ++ ++int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, ++ unsigned int udba, unsigned char flags) +{ -+ return au_test_empty(dentry, whlist); ++ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2, ++ udba, flags); ++ return au_do_pin(pin); +} + -+/* -+ * test if @dentry dir can be rename source or not. -+ * if it can, return 0 and @children is filled. -+ * success means, -+ * - it is a logically empty dir. -+ * - or, it exists on writable branch and has no children including whiteouts -+ * on the lower branch. -+ */ -+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt) ++/* ---------------------------------------------------------------------- */ ++ ++#define AuIcpup_DID_CPUP 1 ++#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name) ++#define au_fset_icpup(flags, name) { (flags) |= AuIcpup_##name; } ++#define au_fclr_icpup(flags, name) { (flags) &= ~AuIcpup_##name; } ++ ++struct au_icpup_args { ++ unsigned char flags; ++ unsigned char pin_flags; ++ aufs_bindex_t btgt; ++ struct au_pin pin; ++ struct path h_path; ++ struct inode *h_inode; ++}; ++ ++static int au_lock_and_icpup(struct dentry *dentry, struct iattr *ia, ++ struct au_icpup_args *a) +{ + int err; ++ unsigned int udba; ++ loff_t sz; + aufs_bindex_t bstart; ++ struct dentry *hi_wh, *parent; ++ struct inode *inode; ++ struct au_wr_dir_args wr_dir_args = { ++ .force_btgt = -1, ++ .flags = 0 ++ }; + ++ di_write_lock_child(dentry); + bstart = au_dbstart(dentry); -+ if (bstart != btgt) { -+ struct au_nhash *whlist; ++ inode = dentry->d_inode; ++ if (S_ISDIR(inode->i_mode)) ++ au_fset_wrdir(wr_dir_args.flags, ISDIR); ++ /* plink or hi_wh() case */ ++ if (bstart != au_ibstart(inode)) ++ wr_dir_args.force_btgt = au_ibstart(inode); ++ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args); ++ if (unlikely(err < 0)) ++ goto out_dentry; ++ a->btgt = err; ++ if (err != bstart) ++ au_fset_icpup(a->flags, DID_CPUP); + -+ whlist = au_nhash_alloc(dentry->d_sb, /*bend*/0, GFP_NOFS); -+ err = PTR_ERR(whlist); -+ if (IS_ERR(whlist)) -+ goto out; -+ err = au_test_empty(dentry, whlist); -+ au_nhash_wh_free(whlist, /*bend*/0); -+ goto out; ++ err = 0; ++ a->pin_flags = AuPin_MNT_WRITE; ++ parent = NULL; ++ if (!IS_ROOT(dentry)) { ++ au_fset_pin(a->pin_flags, DI_LOCKED); ++ parent = dget_parent(dentry); ++ di_write_lock_parent(parent); + } + -+ if (bstart == au_dbtaildir(dentry)) -+ return 0; /* success */ ++ udba = au_opt_udba(dentry->d_sb); ++ if (d_unhashed(dentry) || (ia->ia_valid & ATTR_FILE)) ++ udba = AuOpt_UDBA_NONE; ++ err = au_pin(&a->pin, dentry, a->btgt, udba, a->pin_flags); ++ if (unlikely(err)) { ++ if (parent) { ++ di_write_unlock(parent); ++ dput(parent); ++ } ++ goto out_dentry; ++ } ++ a->h_path.dentry = au_h_dptr(dentry, bstart); ++ a->h_inode = a->h_path.dentry->d_inode; ++ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); ++ sz = -1; ++ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode)) ++ sz = ia->ia_size; + -+ err = au_test_empty_lower(dentry); ++ hi_wh = NULL; ++ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unhashed(dentry)) { ++ hi_wh = au_hi_wh(inode, a->btgt); ++ if (!hi_wh) { ++ err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL); ++ if (unlikely(err)) ++ goto out_unlock; ++ hi_wh = au_hi_wh(inode, a->btgt); ++ /* todo: revalidate hi_wh? */ ++ } ++ } + -+ out: -+ if (err == -ENOTEMPTY) { -+ AuWarn1("renaming dir who has child(ren) on multiple branches," -+ " is not supported\n"); -+ err = -EXDEV; ++ if (parent) { ++ au_pin_set_parent_lflag(&a->pin, /*lflag*/0); ++ di_downgrade_lock(parent, AuLock_IR); ++ dput(parent); ++ } ++ if (!au_ftest_icpup(a->flags, DID_CPUP)) ++ goto out; /* success */ ++ ++ if (!d_unhashed(dentry)) { ++ err = au_sio_cpup_simple(dentry, a->btgt, sz, AuCpup_DTIME); ++ if (!err) ++ a->h_path.dentry = au_h_dptr(dentry, a->btgt); ++ } else if (!hi_wh) ++ a->h_path.dentry = au_h_dptr(dentry, a->btgt); ++ else ++ a->h_path.dentry = hi_wh; /* do not dget here */ ++ ++ out_unlock: ++ mutex_unlock(&a->h_inode->i_mutex); ++ a->h_inode = a->h_path.dentry->d_inode; ++ if (!err) { ++ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); ++ goto out; /* success */ + } ++ ++ au_unpin(&a->pin); ++ ++ out_dentry: ++ di_write_unlock(dentry); ++ out: + return err; +} + -+/* side effect: sets whlist and h_dentry */ -+static int au_ren_may_dir(struct au_ren_args *a) ++static int aufs_setattr(struct dentry *dentry, struct iattr *ia) +{ + int err; -+ struct dentry *d; ++ struct inode *inode; ++ struct super_block *sb; ++ struct file *file; ++ struct au_icpup_args *a; + + err = -ENOMEM; -+ d = a->dst_dentry; -+ a->whlist = au_nhash_alloc(d->d_sb, /*bend*/0, GFP_NOFS); -+ if (unlikely(!a->whlist)) ++ a = kzalloc(sizeof(*a), GFP_NOFS); ++ if (unlikely(!a)) + goto out; + -+ err = 0; -+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) { -+ au_set_dbstart(d, a->dst_bstart); -+ err = may_rename_dstdir(d, a->whlist); -+ au_set_dbstart(d, a->btgt); ++ inode = dentry->d_inode; ++ IMustLock(inode); ++ sb = dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ ++ file = NULL; ++ if (ia->ia_valid & ATTR_FILE) { ++ /* currently ftruncate(2) only */ ++ file = ia->ia_file; ++ fi_write_lock(file); ++ ia->ia_file = au_h_fptr(file, au_fbstart(file)); + } -+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d)); -+ if (unlikely(err)) -+ goto out; + -+ d = a->src_dentry; -+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d)); -+ if (au_ftest_ren(a->flags, ISDIR)) { -+ err = may_rename_srcdir(d, a->btgt); -+ if (unlikely(err)) { -+ au_nhash_wh_free(a->whlist, /*bend*/0); -+ a->whlist = NULL; ++ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) ++ ia->ia_valid &= ~ATTR_MODE; ++ ++ err = au_lock_and_icpup(dentry, ia, a); ++ if (unlikely(err < 0)) ++ goto out_si; ++ if (au_ftest_icpup(a->flags, DID_CPUP)) { ++ ia->ia_file = NULL; ++ ia->ia_valid &= ~ATTR_FILE; ++ } ++ ++ a->h_path.mnt = au_sbr_mnt(sb, a->btgt); ++ if (ia->ia_valid & ATTR_SIZE) { ++ struct file *f; ++ ++ if (ia->ia_size < i_size_read(inode)) { ++ /* unmap only */ ++ err = vmtruncate(inode, ia->ia_size); ++ if (unlikely(err)) ++ goto out_unlock; + } ++ ++ f = NULL; ++ if (ia->ia_valid & ATTR_FILE) ++ f = ia->ia_file; ++ mutex_unlock(&a->h_inode->i_mutex); ++ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f); ++ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); ++ } else ++ err = vfsub_notify_change(&a->h_path, ia); ++ if (!err) ++ au_cpup_attr_changeable(inode); ++ ++ out_unlock: ++ mutex_unlock(&a->h_inode->i_mutex); ++ au_unpin(&a->pin); ++ di_write_unlock(dentry); ++ out_si: ++ if (file) { ++ fi_write_unlock(file); ++ ia->ia_file = file; ++ ia->ia_valid |= ATTR_FILE; + } ++ si_read_unlock(sb); ++ kfree(a); + out: + return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * simple tests for rename. -+ * following the checks in vfs, plus the parent-child relationship. -+ */ -+static int au_may_ren(struct au_ren_args *a) ++static int au_getattr_lock_reval(struct dentry *dentry, unsigned int sigen) +{ -+ int err, isdir; -+ struct inode *h_inode; -+ -+ if (a->src_bstart == a->btgt) { -+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent, -+ au_ftest_ren(a->flags, ISDIR)); -+ if (unlikely(err)) -+ goto out; -+ err = -EINVAL; -+ if (unlikely(a->src_h_dentry == a->h_trap)) -+ goto out; -+ } ++ int err; ++ struct inode *inode; ++ struct dentry *parent; + + err = 0; -+ if (a->dst_bstart != a->btgt) -+ goto out; -+ -+ err = -EIO; -+ h_inode = a->dst_h_dentry->d_inode; -+ isdir = !!au_ftest_ren(a->flags, ISDIR); -+ if (!a->dst_dentry->d_inode) { -+ if (unlikely(h_inode)) -+ goto out; -+ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent, -+ isdir); -+ } else { -+ if (unlikely(!h_inode || !h_inode->i_nlink)) -+ goto out; -+ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent, -+ isdir); -+ if (unlikely(err)) -+ goto out; -+ err = -ENOTEMPTY; -+ if (unlikely(a->dst_h_dentry == a->h_trap)) -+ goto out; -+ err = 0; ++ inode = dentry->d_inode; ++ di_write_lock_child(dentry); ++ if (au_digen(dentry) != sigen || au_iigen(inode) != sigen) { ++ parent = dget_parent(dentry); ++ di_read_lock_parent(parent, AuLock_IR); ++ /* returns a number of positive dentries */ ++ err = au_refresh_hdentry(dentry, inode->i_mode & S_IFMT); ++ if (err > 0) ++ err = au_refresh_hinode(inode, dentry); ++ di_read_unlock(parent, AuLock_IR); ++ dput(parent); ++ if (unlikely(!err)) ++ err = -EIO; + } ++ di_downgrade_lock(dentry, AuLock_IR); ++ if (unlikely(err)) ++ di_read_unlock(dentry, AuLock_IR); + -+ out: -+ if (unlikely(err == -ENOENT || err == -EEXIST)) -+ err = -EIO; + return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * locking order -+ * (VFS) -+ * - src_dir and dir by lock_rename() -+ * - inode if exitsts -+ * (aufs) -+ * - lock all -+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls, -+ * + si_read_lock -+ * + di_write_lock2_child() -+ * + di_write_lock_child() -+ * + ii_write_lock_child() -+ * + di_write_lock_child2() -+ * + ii_write_lock_child2() -+ * + src_parent and parent -+ * + di_write_lock_parent() -+ * + ii_write_lock_parent() -+ * + di_write_lock_parent2() -+ * + ii_write_lock_parent2() -+ * + lower src_dir and dir by vfsub_lock_rename() -+ * + verify the every relationships between child and parent. if any -+ * of them failed, unlock all and return -EBUSY. -+ */ -+static void au_ren_unlock(struct au_ren_args *a) ++static void au_refresh_iattr(struct inode *inode, struct kstat *st, ++ unsigned int nlink) +{ -+ struct super_block *sb; ++ inode->i_mode = st->mode; ++ inode->i_uid = st->uid; ++ inode->i_gid = st->gid; ++ inode->i_atime = st->atime; ++ inode->i_mtime = st->mtime; ++ inode->i_ctime = st->ctime; + -+ sb = a->dst_dentry->d_sb; -+ if (au_ftest_ren(a->flags, MNT_WRITE)) -+ mnt_drop_write(a->br->br_mnt); -+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir, -+ a->dst_h_parent, a->dst_hdir); ++ au_cpup_attr_nlink(inode, /*force*/0); ++ if (S_ISDIR(inode->i_mode)) { ++ inode->i_nlink -= nlink; ++ inode->i_nlink += st->nlink; ++ } ++ ++ spin_lock(&inode->i_lock); ++ inode->i_blocks = st->blocks; ++ i_size_write(inode, st->size); ++ spin_unlock(&inode->i_lock); +} + -+static int au_ren_lock(struct au_ren_args *a) ++static int aufs_getattr(struct vfsmount *mnt __maybe_unused, ++ struct dentry *dentry, struct kstat *st) +{ + int err; -+ unsigned int udba; ++ unsigned int mnt_flags; ++ aufs_bindex_t bindex; ++ unsigned char udba_none, positive, did_lock; ++ struct super_block *sb, *h_sb; ++ struct inode *inode; ++ struct vfsmount *h_mnt; ++ struct dentry *h_dentry; + + err = 0; -+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt); -+ a->src_hdir = au_hi(a->src_dir, a->btgt); -+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt); -+ a->dst_hdir = au_hi(a->dst_dir, a->btgt); -+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir, -+ a->dst_h_parent, a->dst_hdir); -+ udba = au_opt_udba(a->src_dentry->d_sb); -+ if (au_dbstart(a->src_dentry) == a->btgt) -+ err = au_h_verify(a->src_h_dentry, udba, -+ a->src_h_parent->d_inode, a->src_h_parent, -+ a->br); -+ if (!err && au_dbstart(a->dst_dentry) == a->btgt) -+ err = au_h_verify(a->dst_h_dentry, udba, -+ a->dst_h_parent->d_inode, a->dst_h_parent, -+ a->br); -+ if (!err) { -+ err = mnt_want_write(a->br->br_mnt); -+ if (unlikely(err)) -+ goto out_unlock; -+ au_fset_ren(a->flags, MNT_WRITE); -+ goto out; /* success */ ++ did_lock = 0; ++ sb = dentry->d_sb; ++ inode = dentry->d_inode; ++ si_read_lock(sb, AuLock_FLUSH); ++ if (IS_ROOT(dentry)) { ++ /* lock free root dinfo */ ++ h_dentry = dget(au_di(dentry)->di_hdentry->hd_dentry); ++ h_mnt = au_sbr_mnt(sb, 0); ++ goto getattr; + } + -+ err = au_busy_or_stale(); -+ -+ out_unlock: -+ au_ren_unlock(a); -+ out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static void au_ren_refresh_dir(struct au_ren_args *a) -+{ -+ struct inode *dir; ++ mnt_flags = au_mntflags(sb); ++ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE); + -+ dir = a->dst_dir; -+ dir->i_version++; -+ if (au_ftest_ren(a->flags, ISDIR)) { -+ /* is this updating defined in POSIX? */ -+ au_cpup_attr_timesizes(a->src_inode); -+ au_cpup_attr_nlink(dir, /*force*/1); -+ if (a->dst_inode) { -+ clear_nlink(a->dst_inode); -+ au_cpup_attr_timesizes(a->dst_inode); ++ /* support fstat(2) */ ++ if (!d_unhashed(dentry) && !udba_none) { ++ unsigned int sigen = au_sigen(sb); ++ if (au_digen(dentry) == sigen && au_iigen(inode) == sigen) ++ di_read_lock_child(dentry, AuLock_IR); ++ else { ++ err = au_getattr_lock_reval(dentry, sigen); ++ if (unlikely(err)) ++ goto out; + } -+ } -+ if (au_ibstart(dir) == a->btgt) -+ au_cpup_attr_timesizes(dir); -+ -+ if (au_ftest_ren(a->flags, ISSAMEDIR)) -+ return; -+ -+ dir = a->src_dir; -+ dir->i_version++; -+ if (au_ftest_ren(a->flags, ISDIR)) -+ au_cpup_attr_nlink(dir, /*force*/1); -+ if (au_ibstart(dir) == a->btgt) -+ au_cpup_attr_timesizes(dir); -+} ++ } else ++ di_read_lock_child(dentry, AuLock_IR); ++ did_lock = 1; + -+static void au_ren_refresh(struct au_ren_args *a) -+{ -+ aufs_bindex_t bend, bindex; -+ struct dentry *d, *h_d; -+ struct inode *i, *h_i; -+ struct super_block *sb; ++ bindex = au_ibstart(inode); ++ h_mnt = au_sbr_mnt(sb, bindex); ++ h_sb = h_mnt->mnt_sb; ++ if (!au_test_fs_bad_iattr(h_sb) && udba_none) ++ goto out_fill; /* success */ + -+ d = a->src_dentry; -+ au_set_dbwh(d, -1); -+ bend = au_dbend(d); -+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) { -+ h_d = au_h_dptr(d, bindex); -+ if (h_d) -+ au_set_h_dptr(d, bindex, NULL); ++ h_dentry = NULL; ++ if (au_dbstart(dentry) == bindex) ++ h_dentry = dget(au_h_dptr(dentry, bindex)); ++ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) { ++ h_dentry = au_plink_lkup(inode, bindex); ++ if (IS_ERR(h_dentry)) ++ goto out_fill; /* pretending success */ + } -+ au_set_dbend(d, a->btgt); -+ -+ sb = d->d_sb; -+ i = a->src_inode; -+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i)) -+ return; /* success */ ++ /* illegally overlapped or something */ ++ if (unlikely(!h_dentry)) ++ goto out_fill; /* pretending success */ + -+ bend = au_ibend(i); -+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) { -+ h_i = au_h_iptr(i, bindex); -+ if (h_i) { -+ au_xino_write0(sb, bindex, h_i->i_ino, 0); -+ /* ignore this error */ -+ au_set_h_iptr(i, bindex, NULL, 0); -+ } ++ getattr: ++ positive = !!h_dentry->d_inode; ++ if (positive) ++ err = vfs_getattr(h_mnt, h_dentry, st); ++ dput(h_dentry); ++ if (!err) { ++ if (positive) ++ au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink); ++ goto out_fill; /* success */ + } -+ au_set_ibend(i, a->btgt); ++ goto out; ++ ++ out_fill: ++ generic_fillattr(inode, st); ++ out: ++ if (did_lock) ++ di_read_unlock(dentry, AuLock_IR); ++ si_read_unlock(sb); ++ return err; +} + +/* ---------------------------------------------------------------------- */ + -+/* mainly for link(2) and rename(2) */ -+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt) -+{ -+ aufs_bindex_t bdiropq, bwh; -+ struct dentry *parent; -+ struct au_branch *br; -+ -+ parent = dentry->d_parent; -+ IMustLock(parent->d_inode); /* dir is locked */ -+ -+ bdiropq = au_dbdiropq(parent); -+ bwh = au_dbwh(dentry); -+ br = au_sbr(dentry->d_sb, btgt); -+ if (au_br_rdonly(br) -+ || (0 <= bdiropq && bdiropq < btgt) -+ || (0 <= bwh && bwh < btgt)) -+ btgt = -1; -+ -+ AuDbg("btgt %d\n", btgt); -+ return btgt; -+} -+ -+/* sets src_bstart, dst_bstart and btgt */ -+static int au_ren_wbr(struct au_ren_args *a) ++static int h_readlink(struct dentry *dentry, int bindex, char __user *buf, ++ int bufsiz) +{ + int err; -+ struct au_wr_dir_args wr_dir_args = { -+ /* .force_btgt = -1, */ -+ .flags = AuWrDir_ADD_ENTRY -+ }; ++ struct super_block *sb; ++ struct dentry *h_dentry; + -+ a->src_bstart = au_dbstart(a->src_dentry); -+ a->dst_bstart = au_dbstart(a->dst_dentry); -+ if (au_ftest_ren(a->flags, ISDIR)) -+ au_fset_wrdir(wr_dir_args.flags, ISDIR); -+ wr_dir_args.force_btgt = a->src_bstart; -+ if (a->dst_inode && a->dst_bstart < a->src_bstart) -+ wr_dir_args.force_btgt = a->dst_bstart; -+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt); -+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args); -+ a->btgt = err; ++ err = -EINVAL; ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (unlikely(/* !h_dentry ++ || !h_dentry->d_inode ++ || !h_dentry->d_inode->i_op ++ || */ !h_dentry->d_inode->i_op->readlink)) ++ goto out; + -+ return err; -+} ++ err = security_inode_readlink(h_dentry); ++ if (unlikely(err)) ++ goto out; + -+static void au_ren_dt(struct au_ren_args *a) -+{ -+ a->h_path.dentry = a->src_h_parent; -+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path); -+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) { -+ a->h_path.dentry = a->dst_h_parent; -+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path); ++ sb = dentry->d_sb; ++ if (!au_test_ro(sb, bindex, dentry->d_inode)) { ++ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry); ++ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode); + } ++ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz); + -+ au_fclr_ren(a->flags, DT_DSTDIR); -+ if (!au_ftest_ren(a->flags, ISDIR)) -+ return; -+ -+ a->h_path.dentry = a->src_h_dentry; -+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path); -+ if (a->dst_h_dentry->d_inode) { -+ au_fset_ren(a->flags, DT_DSTDIR); -+ a->h_path.dentry = a->dst_h_dentry; -+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path); -+ } ++ out: ++ return err; +} + -+static void au_ren_rev_dt(int err, struct au_ren_args *a) ++static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +{ -+ struct dentry *h_d; -+ struct mutex *h_mtx; -+ -+ au_dtime_revert(a->src_dt + AuPARENT); -+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) -+ au_dtime_revert(a->dst_dt + AuPARENT); ++ int err; + -+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) { -+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry; -+ h_mtx = &h_d->d_inode->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ au_dtime_revert(a->src_dt + AuCHILD); -+ mutex_unlock(h_mtx); ++ aufs_read_lock(dentry, AuLock_IR); ++ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz); ++ aufs_read_unlock(dentry, AuLock_IR); + -+ if (au_ftest_ren(a->flags, DT_DSTDIR)) { -+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry; -+ h_mtx = &h_d->d_inode->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ au_dtime_revert(a->dst_dt + AuCHILD); -+ mutex_unlock(h_mtx); -+ } -+ } ++ return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry, -+ struct inode *_dst_dir, struct dentry *_dst_dentry) ++static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + int err; -+ /* reduce stack space */ -+ struct au_ren_args *a; -+ -+ IMustLock(_src_dir); -+ IMustLock(_dst_dir); ++ char *buf; ++ mm_segment_t old_fs; + + err = -ENOMEM; -+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE); -+ a = kzalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) ++ buf = __getname(); ++ if (unlikely(!buf)) + goto out; + -+ a->src_dir = _src_dir; -+ a->src_dentry = _src_dentry; -+ a->src_inode = a->src_dentry->d_inode; -+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */ -+ a->dst_dir = _dst_dir; -+ a->dst_dentry = _dst_dentry; -+ a->dst_inode = a->dst_dentry->d_inode; -+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */ -+ if (a->dst_inode) { -+ IMustLock(a->dst_inode); -+ au_igrab(a->dst_inode); -+ } -+ -+ err = -ENOTDIR; -+ if (S_ISDIR(a->src_inode->i_mode)) { -+ au_fset_ren(a->flags, ISDIR); -+ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode))) -+ goto out_free; -+ aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, -+ AuLock_DIR | AuLock_FLUSH); -+ } else -+ aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, -+ AuLock_FLUSH); -+ -+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */ -+ di_write_lock_parent(a->dst_parent); -+ -+ /* which branch we process */ -+ err = au_ren_wbr(a); -+ if (unlikely(err < 0)) -+ goto out_unlock; -+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt); -+ a->h_path.mnt = a->br->br_mnt; -+ -+ /* are they available to be renamed */ -+ err = au_ren_may_dir(a); -+ if (unlikely(err)) -+ goto out_unlock; ++ aufs_read_lock(dentry, AuLock_IR); ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = h_readlink(dentry, au_dbstart(dentry), (char __user *)buf, ++ PATH_MAX); ++ set_fs(old_fs); ++ aufs_read_unlock(dentry, AuLock_IR); + -+ /* prepare the writable parent dir on the same branch */ -+ if (a->dst_bstart == a->btgt) { -+ au_fset_ren(a->flags, WHDST); -+ } else { -+ err = au_cpup_dirs(a->dst_dentry, a->btgt); -+ if (unlikely(err)) -+ goto out_children; ++ if (err >= 0) { ++ buf[err] = 0; ++ /* will be freed by put_link */ ++ nd_set_link(nd, buf); ++ return NULL; /* success */ + } ++ __putname(buf); + -+ if (a->src_dir != a->dst_dir) { -+ /* -+ * this temporary unlock is safe, -+ * because both dir->i_mutex are locked. -+ */ -+ di_write_unlock(a->dst_parent); -+ di_write_lock_parent(a->src_parent); -+ err = au_wr_dir_need_wh(a->src_dentry, -+ au_ftest_ren(a->flags, ISDIR), -+ &a->btgt); -+ di_write_unlock(a->src_parent); -+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1); -+ au_fclr_ren(a->flags, ISSAMEDIR); -+ } else -+ err = au_wr_dir_need_wh(a->src_dentry, -+ au_ftest_ren(a->flags, ISDIR), -+ &a->btgt); -+ if (unlikely(err < 0)) -+ goto out_children; -+ if (err) -+ au_fset_ren(a->flags, WHSRC); ++ out: ++ path_put(&nd->path); ++ AuTraceErr(err); ++ return ERR_PTR(err); ++} + -+ /* lock them all */ -+ err = au_ren_lock(a); -+ if (unlikely(err)) -+ goto out_children; ++static void aufs_put_link(struct dentry *dentry __maybe_unused, ++ struct nameidata *nd, void *cookie __maybe_unused) ++{ ++ __putname(nd_get_link(nd)); ++} + -+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE)) { -+ err = au_may_ren(a); -+ if (unlikely(err)) -+ goto out_hdir; -+ } ++/* ---------------------------------------------------------------------- */ + -+ /* store timestamps to be revertible */ -+ au_ren_dt(a); ++static void aufs_truncate_range(struct inode *inode __maybe_unused, ++ loff_t start __maybe_unused, ++ loff_t end __maybe_unused) ++{ ++ AuUnsupport(); ++} + -+ /* here we go */ -+ err = do_rename(a); -+ if (unlikely(err)) -+ goto out_dt; ++/* ---------------------------------------------------------------------- */ + -+ /* update dir attributes */ -+ au_ren_refresh_dir(a); ++struct inode_operations aufs_symlink_iop = { ++ .permission = aufs_permission, ++ .setattr = aufs_setattr, ++ .getattr = aufs_getattr, ++ .readlink = aufs_readlink, ++ .follow_link = aufs_follow_link, ++ .put_link = aufs_put_link ++}; + -+ /* dput/iput all lower dentries */ -+ au_ren_refresh(a); ++struct inode_operations aufs_dir_iop = { ++ .create = aufs_create, ++ .lookup = aufs_lookup, ++ .link = aufs_link, ++ .unlink = aufs_unlink, ++ .symlink = aufs_symlink, ++ .mkdir = aufs_mkdir, ++ .rmdir = aufs_rmdir, ++ .mknod = aufs_mknod, ++ .rename = aufs_rename, + -+ goto out_hdir; /* success */ ++ .permission = aufs_permission, ++ .setattr = aufs_setattr, ++ .getattr = aufs_getattr ++}; + -+ out_dt: -+ au_ren_rev_dt(err, a); -+ out_hdir: -+ au_ren_unlock(a); -+ out_children: -+ if (a->whlist) -+ au_nhash_wh_free(a->whlist, /*bend*/0); -+ out_unlock: -+ if (unlikely(err && au_ftest_ren(a->flags, ISDIR))) { -+ au_update_dbstart(a->dst_dentry); -+ d_drop(a->dst_dentry); -+ } -+ if (!err) { -+ d_move(a->src_dentry, a->dst_dentry); -+ if (a->dst_inode -+ && (a->dst_inode->i_nlink <= 1 -+ || au_ftest_ren(a->flags, ISDIR))) -+ a->dst_inode->i_flags |= S_DEAD; -+ } -+ if (au_ftest_ren(a->flags, ISSAMEDIR)) -+ di_write_unlock(a->dst_parent); -+ else -+ di_write_unlock2(a->src_parent, a->dst_parent); -+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry); -+ out_free: -+ iput(a->dst_inode); -+ if (a->thargs) -+ au_whtmp_rmdir_free(a->thargs); -+ kfree(a); -+ out: -+ return err; -+} -diff --git a/fs/aufs/iinfo.c b/fs/aufs/iinfo.c -new file mode 100644 -index 0000000..b1cff91 ---- /dev/null -+++ b/fs/aufs/iinfo.c -@@ -0,0 +1,262 @@ ++struct inode_operations aufs_iop = { ++ .permission = aufs_permission, ++ .setattr = aufs_setattr, ++ .getattr = aufs_getattr, ++ .truncate_range = aufs_truncate_range ++}; +diff -urN linux-2.6.30.org/fs/aufs/i_op_del.c linux-2.6.30/fs/aufs/i_op_del.c +--- linux-2.6.30.org/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/i_op_del.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,468 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -15036,267 +12393,471 @@ index 0000000..b1cff91 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* -+ * inode private data ++ * inode operations (del entry) + */ + +#include "aufs.h" + -+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex) ++/* ++ * decide if a new whiteout for @dentry is necessary or not. ++ * when it is necessary, prepare the parent dir for the upper branch whose ++ * branch index is @bcpup for creation. the actual creation of the whiteout will ++ * be done by caller. ++ * return value: ++ * 0: wh is unnecessary ++ * plus: wh is necessary ++ * minus: error ++ */ ++int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup) +{ -+ struct inode *h_inode; ++ int need_wh, err; ++ aufs_bindex_t bstart; ++ struct super_block *sb; + -+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode; -+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); -+ return h_inode; ++ sb = dentry->d_sb; ++ bstart = au_dbstart(dentry); ++ if (*bcpup < 0) { ++ *bcpup = bstart; ++ if (au_test_ro(sb, bstart, dentry->d_inode)) { ++ err = AuWbrCopyup(au_sbi(sb), dentry); ++ *bcpup = err; ++ if (unlikely(err < 0)) ++ goto out; ++ } ++ } else ++ AuDebugOn(bstart < *bcpup ++ || au_test_ro(sb, *bcpup, dentry->d_inode)); ++ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart); ++ ++ if (*bcpup != bstart) { ++ err = au_cpup_dirs(dentry, *bcpup); ++ if (unlikely(err)) ++ goto out; ++ need_wh = 1; ++ } else { ++ aufs_bindex_t old_bend, new_bend, bdiropq = -1; ++ ++ old_bend = au_dbend(dentry); ++ if (isdir) { ++ bdiropq = au_dbdiropq(dentry); ++ au_set_dbdiropq(dentry, -1); ++ } ++ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0, ++ /*nd*/NULL); ++ err = need_wh; ++ if (isdir) ++ au_set_dbdiropq(dentry, bdiropq); ++ if (unlikely(err < 0)) ++ goto out; ++ new_bend = au_dbend(dentry); ++ if (!need_wh && old_bend != new_bend) { ++ au_set_h_dptr(dentry, new_bend, NULL); ++ au_set_dbend(dentry, old_bend); ++ } ++ } ++ AuDbg("need_wh %d\n", need_wh); ++ err = need_wh; ++ ++ out: ++ return err; +} + -+/* todo: hard/soft set? */ -+void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex) ++/* ++ * simple tests for the del-entry operations. ++ * following the checks in vfs, plus the parent-child relationship. ++ */ ++int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_parent, int isdir) +{ -+ struct au_iinfo *iinfo = au_ii(inode); ++ int err; ++ umode_t h_mode; ++ struct dentry *h_dentry, *h_latest; + struct inode *h_inode; + -+ iinfo->ii_bstart = bindex; -+ h_inode = iinfo->ii_hinode[bindex + 0].hi_inode; -+ if (h_inode) -+ au_cpup_igen(inode, h_inode); -+} ++ h_dentry = au_h_dptr(dentry, bindex); ++ h_inode = h_dentry->d_inode; ++ if (dentry->d_inode) { ++ err = -ENOENT; ++ if (unlikely(!h_inode || !h_inode->i_nlink)) ++ goto out; + -+void au_hiput(struct au_hinode *hinode) -+{ -+ au_hin_free(hinode); -+ dput(hinode->hi_whdentry); -+ iput(hinode->hi_inode); ++ h_mode = h_inode->i_mode; ++ if (!isdir) { ++ err = -EISDIR; ++ if (unlikely(S_ISDIR(h_mode))) ++ goto out; ++ } else if (unlikely(!S_ISDIR(h_mode))) { ++ err = -ENOTDIR; ++ goto out; ++ } ++ } else { ++ /* rename(2) case */ ++ err = -EIO; ++ if (unlikely(h_inode)) ++ goto out; ++ } ++ ++ err = -ENOENT; ++ /* expected parent dir is locked */ ++ if (unlikely(h_parent != h_dentry->d_parent)) ++ goto out; ++ err = 0; ++ ++ /* ++ * rmdir a dir may break the consistency on some filesystem. ++ * let's try heavy test. ++ */ ++ err = -EACCES; ++ if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE))) ++ goto out; ++ ++ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent, ++ au_sbr(dentry->d_sb, bindex)); ++ err = -EIO; ++ if (IS_ERR(h_latest)) ++ goto out; ++ if (h_latest == h_dentry) ++ err = 0; ++ dput(h_latest); ++ ++ out: ++ return err; +} + -+unsigned int au_hi_flags(struct inode *inode, int isdir) ++/* ++ * decide the branch where we operate for @dentry. the branch index will be set ++ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent ++ * dir for reverting. ++ * when a new whiteout is necessary, create it. ++ */ ++static struct dentry* ++lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup, ++ struct au_dtime *dt, struct au_pin *pin) +{ -+ unsigned int flags; -+ const unsigned int mnt_flags = au_mntflags(inode->i_sb); ++ struct dentry *wh_dentry; ++ struct super_block *sb; ++ struct path h_path; ++ int err, need_wh; ++ unsigned int udba; ++ aufs_bindex_t bcpup; + -+ flags = 0; -+ if (au_opt_test(mnt_flags, XINO)) -+ au_fset_hi(flags, XINO); -+ if (isdir && au_opt_test(mnt_flags, UDBA_HINOTIFY)) -+ au_fset_hi(flags, HINOTIFY); -+ return flags; ++ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup); ++ wh_dentry = ERR_PTR(need_wh); ++ if (unlikely(need_wh < 0)) ++ goto out; ++ ++ sb = dentry->d_sb; ++ udba = au_opt_udba(sb); ++ bcpup = *rbcpup; ++ err = au_pin(pin, dentry, bcpup, udba, ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out; ++ ++ h_path.dentry = au_pinned_h_parent(pin); ++ if (udba != AuOpt_UDBA_NONE ++ && au_dbstart(dentry) == bcpup) { ++ err = au_may_del(dentry, bcpup, h_path.dentry, isdir); ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out_unpin; ++ } ++ ++ h_path.mnt = au_sbr_mnt(sb, bcpup); ++ au_dtime_store(dt, au_pinned_parent(pin), &h_path); ++ wh_dentry = NULL; ++ if (!need_wh) ++ goto out; /* success, no need to create whiteout */ ++ ++ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry); ++ if (!IS_ERR(wh_dentry)) ++ goto out; /* success */ ++ /* returns with the parent is locked and wh_dentry is dget-ed */ ++ ++ out_unpin: ++ au_unpin(pin); ++ out: ++ return wh_dentry; +} + -+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode, unsigned int flags) ++/* ++ * when removing a dir, rename it to a unique temporary whiteout-ed name first ++ * in order to be revertible and save time for removing many child whiteouts ++ * under the dir. ++ * returns 1 when there are too many child whiteout and caller should remove ++ * them asynchronously. returns 0 when the number of children is enough small to ++ * remove now or the branch fs is a remote fs. ++ * otherwise return an error. ++ */ ++static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex, ++ struct au_nhash *whlist, struct inode *dir) +{ -+ struct au_hinode *hinode; -+ struct inode *hi; -+ struct au_iinfo *iinfo = au_ii(inode); ++ int rmdir_later, err, dirwh; ++ struct dentry *h_dentry; ++ struct super_block *sb; + -+ hinode = iinfo->ii_hinode + bindex; -+ hi = hinode->hi_inode; -+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); -+ AuDebugOn(h_inode && hi); ++ sb = dentry->d_sb; ++ SiMustAnyLock(sb); ++ h_dentry = au_h_dptr(dentry, bindex); ++ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex)); ++ if (unlikely(err)) ++ goto out; + -+ if (hi) -+ au_hiput(hinode); -+ hinode->hi_inode = h_inode; -+ if (h_inode) { -+ int err; -+ struct super_block *sb = inode->i_sb; -+ struct au_branch *br; ++ /* stop monitoring */ ++ au_hin_free(au_hi(dentry->d_inode, bindex)); + -+ if (bindex == iinfo->ii_bstart) -+ au_cpup_igen(inode, h_inode); -+ br = au_sbr(sb, bindex); -+ hinode->hi_id = br->br_id; -+ if (au_ftest_hi(flags, XINO)) { -+ err = au_xino_write(sb, bindex, h_inode->i_ino, -+ inode->i_ino); -+ if (unlikely(err)) -+ AuIOErr1("failed au_xino_write() %d\n", err); -+ } ++ if (!au_test_fs_remote(h_dentry->d_sb)) { ++ dirwh = au_sbi(sb)->si_dirwh; ++ rmdir_later = (dirwh <= 1); ++ if (!rmdir_later) ++ rmdir_later = au_nhash_test_longer_wh(whlist, bindex, ++ dirwh); ++ if (rmdir_later) ++ return rmdir_later; ++ } + -+ if (au_ftest_hi(flags, HINOTIFY) -+ && au_br_hinotifyable(br->br_perm)) { -+ err = au_hin_alloc(hinode, inode, h_inode); -+ if (unlikely(err)) -+ AuIOErr1("au_hin_alloc() %d\n", err); -+ } ++ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist); ++ if (unlikely(err)) { ++ AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n", ++ AuDLNPair(h_dentry), bindex, err); ++ err = 0; + } ++ ++ out: ++ return err; +} + -+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_wh) ++/* ++ * final procedure for deleting a entry. ++ * maintain dentry and iattr. ++ */ ++static void epilog(struct inode *dir, struct dentry *dentry, ++ aufs_bindex_t bindex) +{ -+ struct au_hinode *hinode; ++ struct inode *inode; + -+ hinode = au_ii(inode)->ii_hinode + bindex; -+ AuDebugOn(hinode->hi_whdentry); -+ hinode->hi_whdentry = h_wh; ++ inode = dentry->d_inode; ++ d_drop(dentry); ++ inode->i_ctime = dir->i_ctime; ++ ++ if (atomic_read(&dentry->d_count) == 1) { ++ au_set_h_dptr(dentry, au_dbstart(dentry), NULL); ++ au_update_dbstart(dentry); ++ } ++ if (au_ibstart(dir) == bindex) ++ au_cpup_attr_timesizes(dir); ++ dir->i_version++; +} + -+void au_update_iigen(struct inode *inode) ++/* ++ * when an error happened, remove the created whiteout and revert everything. ++ */ ++static int do_revert(int err, struct inode *dir, aufs_bindex_t bwh, ++ struct dentry *wh_dentry, struct dentry *dentry, ++ struct au_dtime *dt) +{ -+ atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb)); -+ /* smp_mb(); */ /* atomic_set */ ++ int rerr; ++ struct path h_path = { ++ .dentry = wh_dentry, ++ .mnt = au_sbr_mnt(dir->i_sb, bwh) ++ }; ++ ++ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bwh), &h_path, dentry); ++ if (!rerr) { ++ au_set_dbwh(dentry, bwh); ++ au_dtime_revert(dt); ++ return 0; ++ } ++ ++ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n", ++ AuDLNPair(dentry), err, rerr); ++ return -EIO; +} + -+/* it may be called at remount time, too */ -+void au_update_brange(struct inode *inode, int do_put_zero) ++/* ---------------------------------------------------------------------- */ ++ ++int aufs_unlink(struct inode *dir, struct dentry *dentry) +{ -+ struct au_iinfo *iinfo; ++ int err; ++ aufs_bindex_t bwh, bindex, bstart; ++ struct au_dtime dt; ++ struct au_pin pin; ++ struct path h_path; ++ struct inode *inode, *h_dir; ++ struct dentry *parent, *wh_dentry; + -+ iinfo = au_ii(inode); -+ if (!iinfo || iinfo->ii_bstart < 0) -+ return; ++ IMustLock(dir); ++ inode = dentry->d_inode; ++ if (unlikely(!inode)) ++ return -ENOENT; /* possible? */ ++ IMustLock(inode); + -+ if (do_put_zero) { -+ aufs_bindex_t bindex; ++ aufs_read_lock(dentry, AuLock_DW); ++ parent = dentry->d_parent; /* dir inode is locked */ ++ di_write_lock_parent(parent); + -+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; -+ bindex++) { -+ struct inode *h_i; ++ bstart = au_dbstart(dentry); ++ bwh = au_dbwh(dentry); ++ bindex = -1; ++ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out; + -+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode; -+ if (h_i && !h_i->i_nlink) -+ au_set_h_iptr(inode, bindex, NULL, 0); -+ } ++ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart); ++ h_path.dentry = au_h_dptr(dentry, bstart); ++ dget(h_path.dentry); ++ if (bindex == bstart) { ++ h_dir = au_pinned_h_dir(&pin); ++ err = vfsub_unlink(h_dir, &h_path, /*force*/0); ++ } else { ++ /* dir inode is locked */ ++ h_dir = wh_dentry->d_parent->d_inode; ++ IMustLock(h_dir); ++ err = 0; + } + -+ iinfo->ii_bstart = -1; -+ while (++iinfo->ii_bstart <= iinfo->ii_bend) -+ if (iinfo->ii_hinode[0 + iinfo->ii_bstart].hi_inode) -+ break; -+ if (iinfo->ii_bstart > iinfo->ii_bend) { -+ iinfo->ii_bstart = -1; -+ iinfo->ii_bend = -1; -+ return; ++ if (!err) { ++ drop_nlink(inode); ++ epilog(dir, dentry, bindex); ++ ++ /* update target timestamps */ ++ if (bindex == bstart) { ++ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/ ++ inode->i_ctime = h_path.dentry->d_inode->i_ctime; ++ } else ++ /* todo: this timestamp may be reverted later */ ++ inode->i_ctime = h_dir->i_ctime; ++ goto out_unlock; /* success */ + } + -+ iinfo->ii_bend++; -+ while (0 <= --iinfo->ii_bend) -+ if (iinfo->ii_hinode[0 + iinfo->ii_bend].hi_inode) -+ break; -+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend || iinfo->ii_bend < 0); -+} ++ /* revert */ ++ if (wh_dentry) { ++ int rerr; + -+/* ---------------------------------------------------------------------- */ ++ rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt); ++ if (rerr) ++ err = rerr; ++ } + -+int au_iinfo_init(struct inode *inode) ++ out_unlock: ++ au_unpin(&pin); ++ dput(wh_dentry); ++ dput(h_path.dentry); ++ out: ++ di_write_unlock(parent); ++ aufs_read_unlock(dentry, AuLock_DW); ++ return err; ++} ++ ++int aufs_rmdir(struct inode *dir, struct dentry *dentry) +{ -+ struct au_iinfo *iinfo; -+ struct super_block *sb; -+ int nbr, i; ++ int err, rmdir_later; ++ aufs_bindex_t bwh, bindex, bstart; ++ struct au_dtime dt; ++ struct au_pin pin; ++ struct inode *inode; ++ struct dentry *parent, *wh_dentry, *h_dentry; ++ struct au_whtmp_rmdir *args; + -+ sb = inode->i_sb; -+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); -+ nbr = au_sbend(sb) + 1; -+ if (unlikely(nbr <= 0)) -+ nbr = 1; -+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS); -+ if (iinfo->ii_hinode) { -+ for (i = 0; i < nbr; i++) -+ iinfo->ii_hinode[i].hi_id = -1; ++ IMustLock(dir); ++ inode = dentry->d_inode; ++ err = -ENOENT; /* possible? */ ++ if (unlikely(!inode)) ++ goto out; ++ IMustLock(inode); + -+ atomic_set(&iinfo->ii_generation, au_sigen(sb)); -+ /* smp_mb(); */ /* atomic_set */ -+ init_rwsem(&iinfo->ii_rwsem); -+ iinfo->ii_bstart = -1; -+ iinfo->ii_bend = -1; -+ iinfo->ii_vdir = NULL; -+ return 0; -+ } -+ return -ENOMEM; -+} ++ aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH); ++ err = -ENOMEM; ++ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS); ++ if (unlikely(!args)) ++ goto out_unlock; ++ ++ parent = dentry->d_parent; /* dir inode is locked */ ++ di_write_lock_parent(parent); ++ err = au_test_empty(dentry, &args->whlist); ++ if (unlikely(err)) ++ goto out_args; ++ ++ bstart = au_dbstart(dentry); ++ bwh = au_dbwh(dentry); ++ bindex = -1; ++ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out_args; + -+int au_ii_realloc(struct au_iinfo *iinfo, int nbr) -+{ -+ int err, sz; -+ struct au_hinode *hip; ++ h_dentry = au_h_dptr(dentry, bstart); ++ dget(h_dentry); ++ rmdir_later = 0; ++ if (bindex == bstart) { ++ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir); ++ if (err > 0) { ++ rmdir_later = err; ++ err = 0; ++ } ++ } else { ++ /* stop monitoring */ ++ au_hin_free(au_hi(inode, bstart)); + -+ err = -ENOMEM; -+ sz = sizeof(*hip) * (iinfo->ii_bend + 1); -+ if (!sz) -+ sz = sizeof(*hip); -+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS); -+ if (hip) { -+ iinfo->ii_hinode = hip; ++ /* dir inode is locked */ ++ IMustLock(wh_dentry->d_parent->d_inode); + err = 0; + } + -+ return err; -+} -+ -+static int au_iinfo_write0(struct super_block *sb, struct au_hinode *hinode, -+ ino_t ino) -+{ -+ int err; -+ aufs_bindex_t bindex; -+ unsigned char locked; -+ -+ err = 0; -+ locked = !!si_noflush_read_trylock(sb); -+ bindex = au_br_index(sb, hinode->hi_id); -+ if (bindex >= 0) -+ err = au_xino_write0(sb, bindex, hinode->hi_inode->i_ino, ino); -+ /* error action? */ -+ if (locked) -+ si_read_unlock(sb); -+ return err; -+} ++ if (!err) { ++ clear_nlink(inode); ++ au_set_dbdiropq(dentry, -1); ++ epilog(dir, dentry, bindex); + -+void au_iinfo_fin(struct inode *inode) -+{ -+ ino_t ino; -+ aufs_bindex_t bend; -+ unsigned char unlinked = !inode->i_nlink || IS_DEADDIR(inode); -+ struct au_iinfo *iinfo; -+ struct au_hinode *hi; -+ struct super_block *sb; ++ if (rmdir_later) { ++ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args); ++ args = NULL; ++ } + -+ if (unlinked) { -+ int err = au_xigen_inc(inode); -+ if (unlikely(err)) -+ AuWarn1("failed resetting i_generation, %d\n", err); ++ goto out_unpin; /* success */ + } + -+ iinfo = au_ii(inode); -+ /* bad_inode case */ -+ if (!iinfo) -+ return; -+ -+ if (iinfo->ii_vdir) -+ au_vdir_free(iinfo->ii_vdir); ++ /* revert */ ++ AuLabel(revert); ++ if (wh_dentry) { ++ int rerr; + -+ if (iinfo->ii_bstart >= 0) { -+ sb = inode->i_sb; -+ ino = 0; -+ if (unlinked) -+ ino = inode->i_ino; -+ hi = iinfo->ii_hinode + iinfo->ii_bstart; -+ bend = iinfo->ii_bend; -+ while (iinfo->ii_bstart++ <= bend) { -+ if (hi->hi_inode) { -+ if (unlinked || !hi->hi_inode->i_nlink) { -+ au_iinfo_write0(sb, hi, ino); -+ /* ignore this error */ -+ ino = 0; -+ } -+ au_hiput(hi); -+ } -+ hi++; -+ } ++ rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt); ++ if (rerr) ++ err = rerr; + } + -+ kfree(iinfo->ii_hinode); -+ au_rwsem_destroy(&iinfo->ii_rwsem); -+} -diff --git a/fs/aufs/inode.c b/fs/aufs/inode.c -new file mode 100644 -index 0000000..c4a962b ---- /dev/null -+++ b/fs/aufs/inode.c -@@ -0,0 +1,356 @@ ++ out_unpin: ++ au_unpin(&pin); ++ dput(wh_dentry); ++ dput(h_dentry); ++ out_args: ++ di_write_unlock(parent); ++ if (args) ++ au_whtmp_rmdir_free(args); ++ out_unlock: ++ aufs_read_unlock(dentry, AuLock_DW); ++ out: ++ return err; ++} +diff -urN linux-2.6.30.org/fs/aufs/i_op_ren.c linux-2.6.30/fs/aufs/i_op_ren.c +--- linux-2.6.30.org/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/i_op_ren.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,945 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -15304,898 +12865,1084 @@ index 0000000..c4a962b + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* -+ * inode functions ++ * inode operation (rename entry) ++ * todo: this is crazy monster + */ + +#include "aufs.h" + -+static void au_refresh_hinode_attr(struct inode *inode, int do_version) -+{ -+ au_cpup_attr_all(inode, /*force*/0); -+ au_update_iigen(inode); -+ if (do_version) -+ inode->i_version++; -+} -+ -+int au_refresh_hinode_self(struct inode *inode, int do_attr) -+{ -+ int err; -+ aufs_bindex_t bindex, new_bindex; -+ unsigned char update; -+ struct inode *first; -+ struct au_hinode *p, *q, tmp; -+ struct super_block *sb; -+ struct au_iinfo *iinfo; -+ -+ update = 0; -+ sb = inode->i_sb; -+ iinfo = au_ii(inode); -+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1); -+ if (unlikely(err)) -+ goto out; ++enum { AuSRC, AuDST, AuSrcDst }; ++enum { AuPARENT, AuCHILD, AuParentChild }; + -+ p = iinfo->ii_hinode + iinfo->ii_bstart; -+ first = p->hi_inode; -+ err = 0; -+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; -+ bindex++, p++) { -+ if (!p->hi_inode) -+ continue; ++#define AuRen_ISDIR 1 ++#define AuRen_ISSAMEDIR (1 << 1) ++#define AuRen_WHSRC (1 << 2) ++#define AuRen_WHDST (1 << 3) ++#define AuRen_MNT_WRITE (1 << 4) ++#define AuRen_DT_DSTDIR (1 << 5) ++#define AuRen_DIROPQ (1 << 6) ++#define AuRen_CPUP (1 << 7) ++#define au_ftest_ren(flags, name) ((flags) & AuRen_##name) ++#define au_fset_ren(flags, name) { (flags) |= AuRen_##name; } ++#define au_fclr_ren(flags, name) { (flags) &= ~AuRen_##name; } + -+ new_bindex = au_br_index(sb, p->hi_id); -+ if (new_bindex == bindex) -+ continue; ++struct au_ren_args { ++ struct { ++ struct dentry *dentry, *h_dentry, *parent, *h_parent, ++ *wh_dentry; ++ struct inode *dir, *inode; ++ struct au_hinode *hdir; ++ struct au_dtime dt[AuParentChild]; ++ aufs_bindex_t bstart; ++ } sd[AuSrcDst]; + -+ if (new_bindex < 0) { -+ update++; -+ au_hiput(p); -+ p->hi_inode = NULL; -+ continue; -+ } ++#define src_dentry sd[AuSRC].dentry ++#define src_dir sd[AuSRC].dir ++#define src_inode sd[AuSRC].inode ++#define src_h_dentry sd[AuSRC].h_dentry ++#define src_parent sd[AuSRC].parent ++#define src_h_parent sd[AuSRC].h_parent ++#define src_wh_dentry sd[AuSRC].wh_dentry ++#define src_hdir sd[AuSRC].hdir ++#define src_h_dir sd[AuSRC].hdir->hi_inode ++#define src_dt sd[AuSRC].dt ++#define src_bstart sd[AuSRC].bstart + -+ if (new_bindex < iinfo->ii_bstart) -+ iinfo->ii_bstart = new_bindex; -+ if (iinfo->ii_bend < new_bindex) -+ iinfo->ii_bend = new_bindex; -+ /* swap two lower inode, and loop again */ -+ q = iinfo->ii_hinode + new_bindex; -+ tmp = *q; -+ *q = *p; -+ *p = tmp; -+ if (tmp.hi_inode) { -+ bindex--; -+ p--; -+ } -+ } -+ au_update_brange(inode, /*do_put_zero*/0); -+ if (do_attr) -+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode)); ++#define dst_dentry sd[AuDST].dentry ++#define dst_dir sd[AuDST].dir ++#define dst_inode sd[AuDST].inode ++#define dst_h_dentry sd[AuDST].h_dentry ++#define dst_parent sd[AuDST].parent ++#define dst_h_parent sd[AuDST].h_parent ++#define dst_wh_dentry sd[AuDST].wh_dentry ++#define dst_hdir sd[AuDST].hdir ++#define dst_h_dir sd[AuDST].hdir->hi_inode ++#define dst_dt sd[AuDST].dt ++#define dst_bstart sd[AuDST].bstart + -+ out: -+ return err; -+} ++ struct dentry *h_trap; ++ struct au_branch *br; ++ struct au_hinode *src_hinode; ++ struct path h_path; ++ struct au_nhash whlist; ++ aufs_bindex_t btgt; + -+int au_refresh_hinode(struct inode *inode, struct dentry *dentry) -+{ -+ int err, update; + unsigned int flags; -+ aufs_bindex_t bindex, bend; -+ unsigned char isdir; -+ struct inode *first; -+ struct au_hinode *p; -+ struct au_iinfo *iinfo; -+ -+ err = au_refresh_hinode_self(inode, /*do_attr*/0); -+ if (unlikely(err)) -+ goto out; + -+ update = 0; -+ iinfo = au_ii(inode); -+ p = iinfo->ii_hinode + iinfo->ii_bstart; -+ first = p->hi_inode; -+ isdir = S_ISDIR(inode->i_mode); -+ flags = au_hi_flags(inode, isdir); -+ bend = au_dbend(dentry); -+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) { -+ struct inode *h_i; -+ struct dentry *h_d; -+ -+ h_d = au_h_dptr(dentry, bindex); -+ if (!h_d || !h_d->d_inode) -+ continue; ++ struct au_whtmp_rmdir *thargs; ++ struct dentry *h_dst; ++}; + -+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) { -+ h_i = au_h_iptr(inode, bindex); -+ if (h_i) { -+ if (h_i == h_d->d_inode) -+ continue; -+ err = -EIO; -+ break; -+ } -+ } -+ if (bindex < iinfo->ii_bstart) -+ iinfo->ii_bstart = bindex; -+ if (iinfo->ii_bend < bindex) -+ iinfo->ii_bend = bindex; -+ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags); -+ update = 1; -+ } -+ au_update_brange(inode, /*do_put_zero*/0); ++/* ---------------------------------------------------------------------- */ + -+ if (unlikely(err)) -+ goto out; ++/* ++ * functions for reverting. ++ * when an error happened in a single rename systemcall, we should revert ++ * everything as if nothing happend. ++ * we don't need to revert the copied-up/down the parent dir since they are ++ * harmless. ++ */ + -+ au_refresh_hinode_attr(inode, update && isdir); ++#define RevertFailure(fmt, args...) do { \ ++ AuIOErr("revert failure: " fmt " (%d, %d)\n", \ ++ ##args, err, rerr); \ ++ err = -EIO; \ ++} while (0) + -+ out: -+ return err; ++static void au_ren_rev_diropq(int err, struct au_ren_args *a) ++{ ++ int rerr; ++ ++ au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD); ++ rerr = au_diropq_remove(a->src_dentry, a->btgt); ++ au_hin_imtx_unlock(a->src_hinode); ++ if (rerr) ++ RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry)); +} + -+static int set_inode(struct inode *inode, struct dentry *dentry) -+{ -+ int err; -+ unsigned int flags; -+ umode_t mode; -+ aufs_bindex_t bindex, bstart, btail; -+ unsigned char isdir; -+ struct dentry *h_dentry; -+ struct inode *h_inode; -+ struct au_iinfo *iinfo; + -+ err = 0; -+ isdir = 0; -+ bstart = au_dbstart(dentry); -+ h_inode = au_h_dptr(dentry, bstart)->d_inode; -+ mode = h_inode->i_mode; -+ switch (mode & S_IFMT) { -+ case S_IFREG: -+ btail = au_dbtail(dentry); -+ inode->i_op = &aufs_iop; -+ inode->i_fop = &aufs_file_fop; -+ inode->i_mapping->a_ops = &aufs_aop; -+ break; -+ case S_IFDIR: -+ isdir = 1; -+ btail = au_dbtaildir(dentry); -+ inode->i_op = &aufs_dir_iop; -+ inode->i_fop = &aufs_dir_fop; -+ break; -+ case S_IFLNK: -+ btail = au_dbtail(dentry); -+ inode->i_op = &aufs_symlink_iop; -+ break; -+ case S_IFBLK: -+ case S_IFCHR: -+ case S_IFIFO: -+ case S_IFSOCK: -+ btail = au_dbtail(dentry); -+ inode->i_op = &aufs_iop; -+ init_special_inode(inode, mode, h_inode->i_rdev); -+ break; -+ default: -+ AuIOErr("Unknown file type 0%o\n", mode); -+ err = -EIO; -+ goto out; -+ } ++static void au_ren_rev_rename(int err, struct au_ren_args *a) ++{ ++ int rerr; + -+ flags = au_hi_flags(inode, isdir); -+ iinfo = au_ii(inode); -+ iinfo->ii_bstart = bstart; -+ iinfo->ii_bend = btail; -+ for (bindex = bstart; bindex <= btail; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry) -+ au_set_h_iptr(inode, bindex, -+ au_igrab(h_dentry->d_inode), flags); ++ a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent, ++ a->br, /*nd*/NULL); ++ rerr = PTR_ERR(a->h_path.dentry); ++ if (IS_ERR(a->h_path.dentry)) { ++ RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry)); ++ return; + } -+ au_cpup_attr_all(inode, /*force*/1); + -+ out: -+ return err; ++ rerr = vfsub_rename(a->dst_h_dir, ++ au_h_dptr(a->src_dentry, a->btgt), ++ a->src_h_dir, &a->h_path); ++ d_drop(a->h_path.dentry); ++ dput(a->h_path.dentry); ++ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */ ++ if (rerr) ++ RevertFailure("rename %.*s", AuDLNPair(a->src_dentry)); +} + -+/* successful returns with iinfo write_locked */ -+static int reval_inode(struct inode *inode, struct dentry *dentry, int *matched) ++static void au_ren_rev_cpup(int err, struct au_ren_args *a) +{ -+ int err; -+ aufs_bindex_t bindex, bend; -+ struct inode *h_inode, *h_dinode; ++ int rerr; + -+ *matched = 0; ++ a->h_path.dentry = a->dst_h_dentry; ++ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0); ++ au_set_h_dptr(a->src_dentry, a->btgt, NULL); ++ au_set_dbstart(a->src_dentry, a->src_bstart); ++ if (rerr) ++ RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry)); ++} + -+ /* -+ * before this function, if aufs got any iinfo lock, it must be only -+ * one, the parent dir. -+ * it can happen by UDBA and the obsoleted inode number. -+ */ -+ err = -EIO; -+ if (unlikely(inode->i_ino == parent_ino(dentry))) -+ goto out; + -+ ii_write_lock_new_child(inode); -+ if (unlikely(IS_DEADDIR(inode))) -+ goto out_unlock; ++static void au_ren_rev_whtmp(int err, struct au_ren_args *a) ++{ ++ int rerr; + -+ err = 0; -+ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode; -+ bend = au_ibend(inode); -+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) { -+ h_inode = au_h_iptr(inode, bindex); -+ if (h_inode && h_inode == h_dinode) { -+ *matched = 1; -+ err = 0; -+ if (au_iigen(inode) != au_digen(dentry)) -+ err = au_refresh_hinode(inode, dentry); -+ break; -+ } ++ a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent, ++ a->br, /*nd*/NULL); ++ rerr = PTR_ERR(a->h_path.dentry); ++ if (IS_ERR(a->h_path.dentry)) { ++ RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry)); ++ return; ++ } ++ if (a->h_path.dentry->d_inode) { ++ d_drop(a->h_path.dentry); ++ dput(a->h_path.dentry); ++ return; + } + -+ out_unlock: -+ if (unlikely(err)) -+ ii_write_unlock(inode); -+ out: -+ return err; ++ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path); ++ d_drop(a->h_path.dentry); ++ dput(a->h_path.dentry); ++ if (!rerr) { ++ au_set_h_dptr(a->dst_dentry, a->btgt, NULL); ++ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst)); ++ } else ++ RevertFailure("rename %.*s", AuDLNPair(a->h_dst)); +} + -+/* successful returns with iinfo write_locked */ -+/* todo: return with unlocked? */ -+struct inode *au_new_inode(struct dentry *dentry, int must_new) ++static void au_ren_rev_whsrc(int err, struct au_ren_args *a) +{ -+ struct inode *inode; -+ struct dentry *h_dentry; -+ struct super_block *sb; -+ ino_t h_ino, ino; -+ int err, match; -+ aufs_bindex_t bstart; -+ -+ sb = dentry->d_sb; -+ bstart = au_dbstart(dentry); -+ h_dentry = au_h_dptr(dentry, bstart); -+ h_ino = h_dentry->d_inode->i_ino; -+ err = au_xino_read(sb, bstart, h_ino, &ino); -+ inode = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; -+ new_ino: -+ if (!ino) { -+ ino = au_xino_new_ino(sb); -+ if (unlikely(!ino)) { -+ inode = ERR_PTR(-EIO); -+ goto out; -+ } -+ } -+ -+ AuDbg("i%lu\n", (unsigned long)ino); -+ inode = au_iget_locked(sb, ino); -+ err = PTR_ERR(inode); -+ if (IS_ERR(inode)) -+ goto out; ++ int rerr; + -+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW)); -+ if (inode->i_state & I_NEW) { -+ ii_write_lock_new_child(inode); -+ err = set_inode(inode, dentry); -+ unlock_new_inode(inode); -+ if (!err) -+ goto out; /* success */ ++ a->h_path.dentry = a->src_wh_dentry; ++ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry); ++ if (rerr) ++ RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry)); ++} + -+ iget_failed(inode); -+ ii_write_unlock(inode); -+ goto out_iput; -+ } else if (!must_new) { -+ err = reval_inode(inode, dentry, &match); -+ if (!err) -+ goto out; /* success */ -+ else if (match) -+ goto out_iput; -+ } ++static void au_ren_rev_drop(struct au_ren_args *a) ++{ ++ struct dentry *d, *h_d; ++ int i; ++ aufs_bindex_t bend, bindex; + -+ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode))) -+ AuWarn1("Un-notified UDBA or repeatedly renamed dir," -+ " b%d, %s, %.*s, hi%lu, i%lu.\n", -+ bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry), -+ (unsigned long)h_ino, (unsigned long)ino); -+ ino = 0; -+ err = au_xino_write0(sb, bstart, h_ino, 0); -+ if (!err) { -+ iput(inode); -+ goto new_ino; ++ for (i = 0; i < AuSrcDst; i++) { ++ d = a->sd[i].dentry; ++ d_drop(d); ++ bend = au_dbend(d); ++ for (bindex = au_dbstart(d); bindex <= bend; bindex++) { ++ h_d = au_h_dptr(d, bindex); ++ if (h_d) ++ d_drop(h_d); ++ } + } + -+ out_iput: -+ iput(inode); -+ inode = ERR_PTR(err); -+ out: -+ return inode; ++ au_update_dbstart(a->dst_dentry); ++ if (a->thargs) ++ d_drop(a->h_dst); +} ++#undef RevertFailure + +/* ---------------------------------------------------------------------- */ + -+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, -+ struct inode *inode) ++/* ++ * when we have to copyup the renaming entry, do it with the rename-target name ++ * in order to minimize the cost (the later actual rename is unnecessary). ++ * otherwise rename it on the target branch. ++ */ ++static int au_ren_or_cpup(struct au_ren_args *a) +{ + int err; ++ struct dentry *d; + -+ err = au_br_rdonly(au_sbr(sb, bindex)); ++ d = a->src_dentry; ++ if (au_dbstart(d) == a->btgt) { ++ a->h_path.dentry = a->dst_h_dentry; ++ if (au_ftest_ren(a->flags, DIROPQ) ++ && au_dbdiropq(d) == a->btgt) ++ au_fclr_ren(a->flags, DIROPQ); ++ AuDebugOn(au_dbstart(d) != a->btgt); ++ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt), ++ a->dst_h_dir, &a->h_path); ++ } else { ++ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex; + -+ /* pseudo-link after flushed may happen out of bounds */ -+ if (!err -+ && inode -+ && au_ibstart(inode) <= bindex -+ && bindex <= au_ibend(inode)) { -+ /* -+ * permission check is unnecessary since vfsub routine -+ * will be called later -+ */ -+ struct inode *hi = au_h_iptr(inode, bindex); -+ if (hi) -+ err = IS_IMMUTABLE(hi) ? -EROFS : 0; ++ au_fset_ren(a->flags, CPUP); ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ au_set_dbstart(d, a->btgt); ++ au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry)); ++ err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1, ++ !AuCpup_DTIME, a->dst_parent); ++ if (unlikely(err)) { ++ au_set_h_dptr(d, a->btgt, NULL); ++ au_set_dbstart(d, a->src_bstart); ++ } ++ mutex_unlock(h_mtx); + } + + return err; +} + -+int au_test_h_perm(struct inode *h_inode, int mask) ++/* cf. aufs_rmdir() */ ++static int au_ren_del_whtmp(struct au_ren_args *a) +{ -+ if (!current_fsuid()) -+ return 0; -+ return inode_permission(h_inode, mask); ++ int err; ++ struct inode *dir; ++ ++ dir = a->dst_dir; ++ SiMustAnyLock(dir->i_sb); ++ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt, ++ au_sbi(dir->i_sb)->si_dirwh) ++ || au_test_fs_remote(a->h_dst->d_sb)) { ++ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist); ++ if (unlikely(err)) ++ AuWarn("failed removing whtmp dir %.*s (%d), " ++ "ignored.\n", AuDLNPair(a->h_dst), err); ++ } else { ++ au_nhash_wh_free(&a->thargs->whlist); ++ a->thargs->whlist = a->whlist; ++ a->whlist.nh_num = 0; ++ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs); ++ dput(a->h_dst); ++ a->thargs = NULL; ++ } ++ ++ return 0; +} + -+int au_test_h_perm_sio(struct inode *h_inode, int mask) ++/* make it 'opaque' dir. */ ++static int au_ren_diropq(struct au_ren_args *a) +{ -+ if (au_test_nfs(h_inode->i_sb) -+ && (mask & MAY_WRITE) -+ && S_ISDIR(h_inode->i_mode)) -+ mask |= MAY_READ; /* force permission check */ -+ return au_test_h_perm(h_inode, mask); -+} -diff --git a/fs/aufs/inode.h b/fs/aufs/inode.h -new file mode 100644 -index 0000000..ffda951 ---- /dev/null -+++ b/fs/aufs/inode.h -@@ -0,0 +1,471 @@ -+/* -+ * Copyright (C) 2005-2009 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ */ ++ int err; ++ struct dentry *diropq; + -+/* -+ * inode operations -+ */ ++ err = 0; ++ a->src_hinode = au_hi(a->src_inode, a->btgt); ++ au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD); ++ diropq = au_diropq_create(a->src_dentry, a->btgt); ++ au_hin_imtx_unlock(a->src_hinode); ++ if (IS_ERR(diropq)) ++ err = PTR_ERR(diropq); ++ dput(diropq); + -+#ifndef __AUFS_INODE_H__ -+#define __AUFS_INODE_H__ ++ return err; ++} + -+#ifdef __KERNEL__ ++static int do_rename(struct au_ren_args *a) ++{ ++ int err; ++ struct dentry *d, *h_d; + -+#include -+#include -+#include -+#include -+#include "rwsem.h" ++ /* prepare workqueue args for asynchronous rmdir */ ++ h_d = a->dst_h_dentry; ++ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) { ++ err = -ENOMEM; ++ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS); ++ if (unlikely(!a->thargs)) ++ goto out; ++ a->h_dst = dget(h_d); ++ } + -+struct au_hinotify { -+#ifdef CONFIG_AUFS_HINOTIFY -+ struct inotify_watch hin_watch; -+ struct inode *hin_aufs_inode; /* no get/put */ -+#endif -+}; ++ /* create whiteout for src_dentry */ ++ if (au_ftest_ren(a->flags, WHSRC)) { ++ a->src_wh_dentry ++ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent); ++ err = PTR_ERR(a->src_wh_dentry); ++ if (IS_ERR(a->src_wh_dentry)) ++ goto out_thargs; ++ } + -+struct au_hinode { -+ struct inode *hi_inode; -+ aufs_bindex_t hi_id; -+#ifdef CONFIG_AUFS_HINOTIFY -+ struct au_hinotify *hi_notify; -+#endif ++ /* lookup whiteout for dentry */ ++ if (au_ftest_ren(a->flags, WHDST)) { ++ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name, ++ a->br); ++ err = PTR_ERR(h_d); ++ if (IS_ERR(h_d)) ++ goto out_whsrc; ++ if (!h_d->d_inode) ++ dput(h_d); ++ else ++ a->dst_wh_dentry = h_d; ++ } + -+ /* reference to the copied-up whiteout with get/put */ -+ struct dentry *hi_whdentry; -+}; ++ /* rename dentry to tmpwh */ ++ if (a->thargs) { ++ err = au_whtmp_ren(a->dst_h_dentry, a->br); ++ if (unlikely(err)) ++ goto out_whdst; + -+struct au_vdir; -+struct au_iinfo { -+ atomic_t ii_generation; -+ struct super_block *ii_hsb1; /* no get/put */ ++ d = a->dst_dentry; ++ au_set_h_dptr(d, a->btgt, NULL); ++ err = au_lkup_neg(d, a->btgt); ++ if (unlikely(err)) ++ goto out_whtmp; ++ a->dst_h_dentry = au_h_dptr(d, a->btgt); ++ } + -+ struct rw_semaphore ii_rwsem; -+ aufs_bindex_t ii_bstart, ii_bend; -+ __u32 ii_higen; -+ struct au_hinode *ii_hinode; -+ struct au_vdir *ii_vdir; -+}; ++ /* cpup src */ ++ if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) { ++ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex; + -+struct au_icntnr { -+ struct au_iinfo iinfo; -+ struct inode vfs_inode; -+}; ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1, ++ !AuCpup_DTIME); ++ mutex_unlock(h_mtx); ++ if (unlikely(err)) ++ goto out_whtmp; ++ } + -+/* au_pin flags */ -+#define AuPin_DI_LOCKED 1 -+#define AuPin_MNT_WRITE (1 << 1) -+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name) -+#define au_fset_pin(flags, name) { (flags) |= AuPin_##name; } -+#define au_fclr_pin(flags, name) { (flags) &= ~AuPin_##name; } ++ /* rename by vfs_rename or cpup */ ++ d = a->dst_dentry; ++ if (au_ftest_ren(a->flags, ISDIR) ++ && (a->dst_wh_dentry ++ || au_dbdiropq(d) == a->btgt ++ /* hide the lower to keep xino */ ++ || a->btgt < au_dbend(d) ++ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ))) ++ au_fset_ren(a->flags, DIROPQ); ++ err = au_ren_or_cpup(a); ++ if (unlikely(err)) ++ /* leave the copied-up one */ ++ goto out_whtmp; + -+struct au_pin { -+ /* input */ -+ struct dentry *dentry; -+ unsigned int udba; -+ unsigned char lsc_di, lsc_hi, flags; -+ aufs_bindex_t bindex; ++ /* make dir opaque */ ++ if (au_ftest_ren(a->flags, DIROPQ)) { ++ err = au_ren_diropq(a); ++ if (unlikely(err)) ++ goto out_rename; ++ } + -+ /* output */ -+ struct dentry *parent; -+ struct au_hinode *hdir; -+ struct vfsmount *h_mnt; -+}; ++ /* update target timestamps */ ++ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt); ++ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt); ++ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/ ++ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime; ++ ++ /* remove whiteout for dentry */ ++ if (a->dst_wh_dentry) { ++ a->h_path.dentry = a->dst_wh_dentry; ++ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path, ++ a->dst_dentry); ++ if (unlikely(err)) ++ goto out_diropq; ++ } + -+/* ---------------------------------------------------------------------- */ ++ /* remove whtmp */ ++ if (a->thargs) ++ au_ren_del_whtmp(a); /* ignore this error */ + -+static inline struct au_iinfo *au_ii(struct inode *inode) -+{ -+ struct au_iinfo *iinfo; ++ err = 0; ++ goto out_success; + -+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); -+ if (iinfo->ii_hinode) -+ return iinfo; -+ return NULL; /* debugging bad_inode case */ ++ out_diropq: ++ if (au_ftest_ren(a->flags, DIROPQ)) ++ au_ren_rev_diropq(err, a); ++ out_rename: ++ if (!au_ftest_ren(a->flags, CPUP)) ++ au_ren_rev_rename(err, a); ++ else ++ au_ren_rev_cpup(err, a); ++ out_whtmp: ++ if (a->thargs) ++ au_ren_rev_whtmp(err, a); ++ out_whdst: ++ dput(a->dst_wh_dentry); ++ a->dst_wh_dentry = NULL; ++ out_whsrc: ++ if (a->src_wh_dentry) ++ au_ren_rev_whsrc(err, a); ++ au_ren_rev_drop(a); ++ out_success: ++ dput(a->src_wh_dentry); ++ dput(a->dst_wh_dentry); ++ out_thargs: ++ if (a->thargs) { ++ dput(a->h_dst); ++ au_whtmp_rmdir_free(a->thargs); ++ a->thargs = NULL; ++ } ++ out: ++ return err; +} + +/* ---------------------------------------------------------------------- */ + -+/* inode.c */ -+int au_refresh_hinode_self(struct inode *inode, int do_attr); -+int au_refresh_hinode(struct inode *inode, struct dentry *dentry); -+struct inode *au_new_inode(struct dentry *dentry, int must_new); -+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, -+ struct inode *inode); -+int au_test_h_perm(struct inode *h_inode, int mask); -+int au_test_h_perm_sio(struct inode *h_inode, int mask); -+ -+/* i_op.c */ -+extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop; -+ -+/* au_wr_dir flags */ -+#define AuWrDir_ADD_ENTRY 1 -+#define AuWrDir_ISDIR (1 << 1) -+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name) -+#define au_fset_wrdir(flags, name) { (flags) |= AuWrDir_##name; } -+#define au_fclr_wrdir(flags, name) { (flags) &= ~AuWrDir_##name; } -+ -+struct au_wr_dir_args { -+ aufs_bindex_t force_btgt; -+ unsigned char flags; -+}; -+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, -+ struct au_wr_dir_args *args); -+ -+struct dentry *au_pinned_h_parent(struct au_pin *pin); -+void au_pin_init(struct au_pin *pin, struct dentry *dentry, -+ aufs_bindex_t bindex, int lsc_di, int lsc_hi, -+ unsigned int udba, unsigned char flags); -+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, -+ unsigned int udba, unsigned char flags) __must_check; -+int au_do_pin(struct au_pin *pin) __must_check; -+void au_unpin(struct au_pin *pin); ++/* ++ * test if @dentry dir can be rename destination or not. ++ * success means, it is a logically empty dir. ++ */ ++static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist) ++{ ++ return au_test_empty(dentry, whlist); ++} + -+/* i_op_add.c */ -+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir); -+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev); -+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname); -+int aufs_create(struct inode *dir, struct dentry *dentry, int mode, -+ struct nameidata *nd); -+int aufs_link(struct dentry *src_dentry, struct inode *dir, -+ struct dentry *dentry); -+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode); ++/* ++ * test if @dentry dir can be rename source or not. ++ * if it can, return 0 and @children is filled. ++ * success means, ++ * - it is a logically empty dir. ++ * - or, it exists on writable branch and has no children including whiteouts ++ * on the lower branch. ++ */ ++static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt) ++{ ++ int err; ++ aufs_bindex_t bstart; + -+/* i_op_del.c */ -+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup); -+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir); -+int aufs_unlink(struct inode *dir, struct dentry *dentry); -+int aufs_rmdir(struct inode *dir, struct dentry *dentry); ++ bstart = au_dbstart(dentry); ++ if (bstart != btgt) { ++ struct au_nhash whlist; + -+/* i_op_ren.c */ -+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt); -+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry, -+ struct inode *dir, struct dentry *dentry); ++ SiMustAnyLock(dentry->d_sb); ++ err = au_nhash_alloc(&whlist, au_sbi(dentry->d_sb)->si_rdhash, ++ GFP_NOFS); ++ if (unlikely(err)) ++ goto out; ++ err = au_test_empty(dentry, &whlist); ++ au_nhash_wh_free(&whlist); ++ goto out; ++ } + -+/* iinfo.c */ -+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex); -+void au_hiput(struct au_hinode *hinode); -+void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex); -+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_wh); -+unsigned int au_hi_flags(struct inode *inode, int isdir); ++ if (bstart == au_dbtaildir(dentry)) ++ return 0; /* success */ + -+/* hinode flags */ -+#define AuHi_XINO 1 -+#define AuHi_HINOTIFY (1 << 1) -+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name) -+#define au_fset_hi(flags, name) { (flags) |= AuHi_##name; } -+#define au_fclr_hi(flags, name) { (flags) &= ~AuHi_##name; } ++ err = au_test_empty_lower(dentry); + -+#ifndef CONFIG_AUFS_HINOTIFY -+#undef AuHi_HINOTIFY -+#define AuHi_HINOTIFY 0 -+#endif ++ out: ++ if (err == -ENOTEMPTY) { ++ AuWarn1("renaming dir who has child(ren) on multiple branches," ++ " is not supported\n"); ++ err = -EXDEV; ++ } ++ return err; ++} + -+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode, unsigned int flags); ++/* side effect: sets whlist and h_dentry */ ++static int au_ren_may_dir(struct au_ren_args *a) ++{ ++ int err; ++ struct dentry *d; + -+void au_update_iigen(struct inode *inode); -+void au_update_brange(struct inode *inode, int do_put_zero); ++ d = a->dst_dentry; ++ SiMustAnyLock(d->d_sb); ++ err = au_nhash_alloc(&a->whlist, au_sbi(d->d_sb)->si_rdhash, GFP_NOFS); ++ if (unlikely(err)) ++ goto out; + -+int au_iinfo_init(struct inode *inode); -+void au_iinfo_fin(struct inode *inode); -+int au_ii_realloc(struct au_iinfo *iinfo, int nbr); ++ err = 0; ++ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) { ++ au_set_dbstart(d, a->dst_bstart); ++ err = may_rename_dstdir(d, &a->whlist); ++ au_set_dbstart(d, a->btgt); ++ } ++ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d)); ++ if (unlikely(err)) ++ goto out; + -+/* plink.c */ -+void au_plink_block_maintain(struct super_block *sb); -+#ifdef CONFIG_AUFS_DEBUG -+void au_plink_list(struct super_block *sb); -+#else -+static inline void au_plink_list(struct super_block *sb) -+{ -+ /* nothing */ ++ d = a->src_dentry; ++ a->src_h_dentry = au_h_dptr(d, au_dbstart(d)); ++ if (au_ftest_ren(a->flags, ISDIR)) { ++ err = may_rename_srcdir(d, a->btgt); ++ if (unlikely(err)) { ++ au_nhash_wh_free(&a->whlist); ++ a->whlist.nh_num = 0; ++ } ++ } ++ out: ++ return err; +} -+#endif -+int au_plink_test(struct inode *inode); -+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex); -+void au_plink_append(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_dentry); -+void au_plink_put(struct super_block *sb); -+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id); + +/* ---------------------------------------------------------------------- */ + -+/* lock subclass for iinfo */ -+enum { -+ AuLsc_II_CHILD, /* child first */ -+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hinotify */ -+ AuLsc_II_CHILD3, /* copyup dirs */ -+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */ -+ AuLsc_II_PARENT2, -+ AuLsc_II_PARENT3, /* copyup dirs */ -+ AuLsc_II_NEW_CHILD -+}; -+ +/* -+ * ii_read_lock_child, ii_write_lock_child, -+ * ii_read_lock_child2, ii_write_lock_child2, -+ * ii_read_lock_child3, ii_write_lock_child3, -+ * ii_read_lock_parent, ii_write_lock_parent, -+ * ii_read_lock_parent2, ii_write_lock_parent2, -+ * ii_read_lock_parent3, ii_write_lock_parent3, -+ * ii_read_lock_new_child, ii_write_lock_new_child, ++ * simple tests for rename. ++ * following the checks in vfs, plus the parent-child relationship. + */ -+#define AuReadLockFunc(name, lsc) \ -+static inline void ii_read_lock_##name(struct inode *i) \ -+{ \ -+ down_read_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ -+} ++static int au_may_ren(struct au_ren_args *a) ++{ ++ int err, isdir; ++ struct inode *h_inode; ++ ++ if (a->src_bstart == a->btgt) { ++ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent, ++ au_ftest_ren(a->flags, ISDIR)); ++ if (unlikely(err)) ++ goto out; ++ err = -EINVAL; ++ if (unlikely(a->src_h_dentry == a->h_trap)) ++ goto out; ++ } ++ ++ err = 0; ++ if (a->dst_bstart != a->btgt) ++ goto out; ++ ++ err = -EIO; ++ h_inode = a->dst_h_dentry->d_inode; ++ isdir = !!au_ftest_ren(a->flags, ISDIR); ++ if (!a->dst_dentry->d_inode) { ++ if (unlikely(h_inode)) ++ goto out; ++ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent, ++ isdir); ++ } else { ++ if (unlikely(!h_inode || !h_inode->i_nlink)) ++ goto out; ++ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent, ++ isdir); ++ if (unlikely(err)) ++ goto out; ++ err = -ENOTEMPTY; ++ if (unlikely(a->dst_h_dentry == a->h_trap)) ++ goto out; ++ err = 0; ++ } + -+#define AuWriteLockFunc(name, lsc) \ -+static inline void ii_write_lock_##name(struct inode *i) \ -+{ \ -+ down_write_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ ++ out: ++ if (unlikely(err == -ENOENT || err == -EEXIST)) ++ err = -EIO; ++ return err; +} + -+#define AuRWLockFuncs(name, lsc) \ -+ AuReadLockFunc(name, lsc) \ -+ AuWriteLockFunc(name, lsc) -+ -+AuRWLockFuncs(child, CHILD); -+AuRWLockFuncs(child2, CHILD2); -+AuRWLockFuncs(child3, CHILD3); -+AuRWLockFuncs(parent, PARENT); -+AuRWLockFuncs(parent2, PARENT2); -+AuRWLockFuncs(parent3, PARENT3); -+AuRWLockFuncs(new_child, NEW_CHILD); -+ -+#undef AuReadLockFunc -+#undef AuWriteLockFunc -+#undef AuRWLockFuncs ++/* ---------------------------------------------------------------------- */ + +/* -+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock ++ * locking order ++ * (VFS) ++ * - src_dir and dir by lock_rename() ++ * - inode if exitsts ++ * (aufs) ++ * - lock all ++ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls, ++ * + si_read_lock ++ * + di_write_lock2_child() ++ * + di_write_lock_child() ++ * + ii_write_lock_child() ++ * + di_write_lock_child2() ++ * + ii_write_lock_child2() ++ * + src_parent and parent ++ * + di_write_lock_parent() ++ * + ii_write_lock_parent() ++ * + di_write_lock_parent2() ++ * + ii_write_lock_parent2() ++ * + lower src_dir and dir by vfsub_lock_rename() ++ * + verify the every relationships between child and parent. if any ++ * of them failed, unlock all and return -EBUSY. + */ -+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem); -+ -+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem) -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline unsigned int au_iigen(struct inode *inode) -+{ -+ return atomic_read(&au_ii(inode)->ii_generation); -+} -+ -+/* tiny test for inode number */ -+/* tmpfs generation is too rough */ -+static inline int au_test_higen(struct inode *inode, struct inode *h_inode) ++static void au_ren_unlock(struct au_ren_args *a) +{ -+ struct au_iinfo *iinfo; ++ struct super_block *sb; + -+ iinfo = au_ii(inode); -+ return !(iinfo->ii_hsb1 == h_inode->i_sb -+ && iinfo->ii_higen == h_inode->i_generation); ++ sb = a->dst_dentry->d_sb; ++ if (au_ftest_ren(a->flags, MNT_WRITE)) ++ mnt_drop_write(a->br->br_mnt); ++ vfsub_unlock_rename(a->src_h_parent, a->src_hdir, ++ a->dst_h_parent, a->dst_hdir); +} + -+static inline struct inode *au_igrab(struct inode *inode) ++static int au_ren_lock(struct au_ren_args *a) +{ -+ if (inode) { -+ AuDebugOn(!atomic_read(&inode->i_count)); -+ atomic_inc(&inode->i_count); ++ int err; ++ unsigned int udba; ++ ++ err = 0; ++ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt); ++ a->src_hdir = au_hi(a->src_dir, a->btgt); ++ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt); ++ a->dst_hdir = au_hi(a->dst_dir, a->btgt); ++ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir, ++ a->dst_h_parent, a->dst_hdir); ++ udba = au_opt_udba(a->src_dentry->d_sb); ++ if (au_dbstart(a->src_dentry) == a->btgt) ++ err = au_h_verify(a->src_h_dentry, udba, ++ a->src_h_parent->d_inode, a->src_h_parent, ++ a->br); ++ if (!err && au_dbstart(a->dst_dentry) == a->btgt) ++ err = au_h_verify(a->dst_h_dentry, udba, ++ a->dst_h_parent->d_inode, a->dst_h_parent, ++ a->br); ++ if (!err) { ++ err = mnt_want_write(a->br->br_mnt); ++ if (unlikely(err)) ++ goto out_unlock; ++ au_fset_ren(a->flags, MNT_WRITE); ++ goto out; /* success */ + } -+ return inode; ++ ++ err = au_busy_or_stale(); ++ ++ out_unlock: ++ au_ren_unlock(a); ++ out: ++ return err; +} + +/* ---------------------------------------------------------------------- */ + -+static inline aufs_bindex_t au_ii_br_id(struct inode *inode, -+ aufs_bindex_t bindex) ++static void au_ren_refresh_dir(struct au_ren_args *a) +{ -+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id; -+} ++ struct inode *dir; + -+static inline aufs_bindex_t au_ibstart(struct inode *inode) -+{ -+ return au_ii(inode)->ii_bstart; -+} ++ dir = a->dst_dir; ++ dir->i_version++; ++ if (au_ftest_ren(a->flags, ISDIR)) { ++ /* is this updating defined in POSIX? */ ++ au_cpup_attr_timesizes(a->src_inode); ++ au_cpup_attr_nlink(dir, /*force*/1); ++ if (a->dst_inode) { ++ clear_nlink(a->dst_inode); ++ au_cpup_attr_timesizes(a->dst_inode); ++ } ++ } ++ if (au_ibstart(dir) == a->btgt) ++ au_cpup_attr_timesizes(dir); + -+static inline aufs_bindex_t au_ibend(struct inode *inode) -+{ -+ return au_ii(inode)->ii_bend; -+} ++ if (au_ftest_ren(a->flags, ISSAMEDIR)) ++ return; + -+static inline struct au_vdir *au_ivdir(struct inode *inode) -+{ -+ return au_ii(inode)->ii_vdir; ++ dir = a->src_dir; ++ dir->i_version++; ++ if (au_ftest_ren(a->flags, ISDIR)) ++ au_cpup_attr_nlink(dir, /*force*/1); ++ if (au_ibstart(dir) == a->btgt) ++ au_cpup_attr_timesizes(dir); +} + -+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex) ++static void au_ren_refresh(struct au_ren_args *a) +{ -+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry; -+} ++ aufs_bindex_t bend, bindex; ++ struct dentry *d, *h_d; ++ struct inode *i, *h_i; ++ struct super_block *sb; + -+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex) -+{ -+ au_ii(inode)->ii_bend = bindex; -+} ++ d = a->src_dentry; ++ au_set_dbwh(d, -1); ++ bend = au_dbend(d); ++ for (bindex = a->btgt + 1; bindex <= bend; bindex++) { ++ h_d = au_h_dptr(d, bindex); ++ if (h_d) ++ au_set_h_dptr(d, bindex, NULL); ++ } ++ au_set_dbend(d, a->btgt); + -+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir) -+{ -+ au_ii(inode)->ii_vdir = vdir; -+} ++ sb = d->d_sb; ++ i = a->src_inode; ++ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i)) ++ return; /* success */ + -+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex) -+{ -+ return au_ii(inode)->ii_hinode + bindex; ++ bend = au_ibend(i); ++ for (bindex = a->btgt + 1; bindex <= bend; bindex++) { ++ h_i = au_h_iptr(i, bindex); ++ if (h_i) { ++ au_xino_write0(sb, bindex, h_i->i_ino, 0); ++ /* ignore this error */ ++ au_set_h_iptr(i, bindex, NULL, 0); ++ } ++ } ++ au_set_ibend(i, a->btgt); +} + +/* ---------------------------------------------------------------------- */ + -+static inline struct dentry *au_pinned_parent(struct au_pin *pin) ++/* mainly for link(2) and rename(2) */ ++int au_wbr(struct dentry *dentry, aufs_bindex_t btgt) +{ -+ if (pin) -+ return pin->parent; -+ return NULL; ++ aufs_bindex_t bdiropq, bwh; ++ struct dentry *parent; ++ struct au_branch *br; ++ ++ parent = dentry->d_parent; ++ IMustLock(parent->d_inode); /* dir is locked */ ++ ++ bdiropq = au_dbdiropq(parent); ++ bwh = au_dbwh(dentry); ++ br = au_sbr(dentry->d_sb, btgt); ++ if (au_br_rdonly(br) ++ || (0 <= bdiropq && bdiropq < btgt) ++ || (0 <= bwh && bwh < btgt)) ++ btgt = -1; ++ ++ AuDbg("btgt %d\n", btgt); ++ return btgt; +} + -+static inline struct inode *au_pinned_h_dir(struct au_pin *pin) ++/* sets src_bstart, dst_bstart and btgt */ ++static int au_ren_wbr(struct au_ren_args *a) +{ -+ if (pin && pin->hdir) -+ return pin->hdir->hi_inode; -+ return NULL; ++ int err; ++ struct au_wr_dir_args wr_dir_args = { ++ /* .force_btgt = -1, */ ++ .flags = AuWrDir_ADD_ENTRY ++ }; ++ ++ a->src_bstart = au_dbstart(a->src_dentry); ++ a->dst_bstart = au_dbstart(a->dst_dentry); ++ if (au_ftest_ren(a->flags, ISDIR)) ++ au_fset_wrdir(wr_dir_args.flags, ISDIR); ++ wr_dir_args.force_btgt = a->src_bstart; ++ if (a->dst_inode && a->dst_bstart < a->src_bstart) ++ wr_dir_args.force_btgt = a->dst_bstart; ++ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt); ++ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args); ++ a->btgt = err; ++ ++ return err; +} + -+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin) ++static void au_ren_dt(struct au_ren_args *a) +{ -+ if (pin) -+ return pin->hdir; -+ return NULL; ++ a->h_path.dentry = a->src_h_parent; ++ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path); ++ if (!au_ftest_ren(a->flags, ISSAMEDIR)) { ++ a->h_path.dentry = a->dst_h_parent; ++ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path); ++ } ++ ++ au_fclr_ren(a->flags, DT_DSTDIR); ++ if (!au_ftest_ren(a->flags, ISDIR)) ++ return; ++ ++ a->h_path.dentry = a->src_h_dentry; ++ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path); ++ if (a->dst_h_dentry->d_inode) { ++ au_fset_ren(a->flags, DT_DSTDIR); ++ a->h_path.dentry = a->dst_h_dentry; ++ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path); ++ } +} + -+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry) ++static void au_ren_rev_dt(int err, struct au_ren_args *a) +{ -+ if (pin) -+ pin->dentry = dentry; -+} ++ struct dentry *h_d; ++ struct mutex *h_mtx; + -+static inline void au_pin_set_parent_lflag(struct au_pin *pin, -+ unsigned char lflag) -+{ -+ if (pin) { -+ /* dirty macros require brackets */ -+ if (lflag) { -+ au_fset_pin(pin->flags, DI_LOCKED); -+ } else { -+ au_fclr_pin(pin->flags, DI_LOCKED); ++ au_dtime_revert(a->src_dt + AuPARENT); ++ if (!au_ftest_ren(a->flags, ISSAMEDIR)) ++ au_dtime_revert(a->dst_dt + AuPARENT); ++ ++ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) { ++ h_d = a->src_dt[AuCHILD].dt_h_path.dentry; ++ h_mtx = &h_d->d_inode->i_mutex; ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ au_dtime_revert(a->src_dt + AuCHILD); ++ mutex_unlock(h_mtx); ++ ++ if (au_ftest_ren(a->flags, DT_DSTDIR)) { ++ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry; ++ h_mtx = &h_d->d_inode->i_mutex; ++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); ++ au_dtime_revert(a->dst_dt + AuCHILD); ++ mutex_unlock(h_mtx); + } + } +} + -+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent) ++/* ---------------------------------------------------------------------- */ ++ ++int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry, ++ struct inode *_dst_dir, struct dentry *_dst_dentry) +{ -+ if (pin) { -+ dput(pin->parent); -+ pin->parent = dget(parent); ++ int err; ++ /* reduce stack space */ ++ struct au_ren_args *a; ++ ++ IMustLock(_src_dir); ++ IMustLock(_dst_dir); ++ ++ err = -ENOMEM; ++ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE); ++ a = kzalloc(sizeof(*a), GFP_NOFS); ++ if (unlikely(!a)) ++ goto out; ++ ++ a->src_dir = _src_dir; ++ a->src_dentry = _src_dentry; ++ a->src_inode = a->src_dentry->d_inode; ++ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */ ++ a->dst_dir = _dst_dir; ++ a->dst_dentry = _dst_dentry; ++ a->dst_inode = a->dst_dentry->d_inode; ++ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */ ++ if (a->dst_inode) { ++ IMustLock(a->dst_inode); ++ au_igrab(a->dst_inode); + } -+} + -+/* ---------------------------------------------------------------------- */ ++ err = -ENOTDIR; ++ if (S_ISDIR(a->src_inode->i_mode)) { ++ au_fset_ren(a->flags, ISDIR); ++ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode))) ++ goto out_free; ++ aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, ++ AuLock_DIR | AuLock_FLUSH); ++ } else ++ aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, ++ AuLock_FLUSH); + -+#ifdef CONFIG_AUFS_HINOTIFY -+/* hinotify.c */ -+int au_hin_alloc(struct au_hinode *hinode, struct inode *inode, -+ struct inode *h_inode); -+void au_hin_free(struct au_hinode *hinode); -+void au_hin_ctl(struct au_hinode *hinode, int do_set); -+void au_reset_hinotify(struct inode *inode, unsigned int flags); ++ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */ ++ di_write_lock_parent(a->dst_parent); + -+int __init au_hinotify_init(void); -+void au_hinotify_fin(void); ++ /* which branch we process */ ++ err = au_ren_wbr(a); ++ if (unlikely(err < 0)) ++ goto out_unlock; ++ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt); ++ a->h_path.mnt = a->br->br_mnt; + -+static inline -+void au_hin_init(struct au_hinode *hinode, struct au_hinotify *val) -+{ -+ hinode->hi_notify = val; -+} ++ /* are they available to be renamed */ ++ err = au_ren_may_dir(a); ++ if (unlikely(err)) ++ goto out_unlock; + -+static inline void au_iigen_dec(struct inode *inode) -+{ -+ atomic_dec(&au_ii(inode)->ii_generation); -+} ++ /* prepare the writable parent dir on the same branch */ ++ if (a->dst_bstart == a->btgt) { ++ au_fset_ren(a->flags, WHDST); ++ } else { ++ err = au_cpup_dirs(a->dst_dentry, a->btgt); ++ if (unlikely(err)) ++ goto out_children; ++ } + -+#else -+static inline -+int au_hin_alloc(struct au_hinode *hinode __maybe_unused, -+ struct inode *inode __maybe_unused, -+ struct inode *h_inode __maybe_unused) -+{ -+ return -EOPNOTSUPP; -+} ++ if (a->src_dir != a->dst_dir) { ++ /* ++ * this temporary unlock is safe, ++ * because both dir->i_mutex are locked. ++ */ ++ di_write_unlock(a->dst_parent); ++ di_write_lock_parent(a->src_parent); ++ err = au_wr_dir_need_wh(a->src_dentry, ++ au_ftest_ren(a->flags, ISDIR), ++ &a->btgt); ++ di_write_unlock(a->src_parent); ++ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1); ++ au_fclr_ren(a->flags, ISSAMEDIR); ++ } else ++ err = au_wr_dir_need_wh(a->src_dentry, ++ au_ftest_ren(a->flags, ISDIR), ++ &a->btgt); ++ if (unlikely(err < 0)) ++ goto out_children; ++ if (err) ++ au_fset_ren(a->flags, WHSRC); + -+static inline void au_hin_free(struct au_hinode *hinode __maybe_unused) -+{ -+ /* nothing */ -+} ++ /* lock them all */ ++ err = au_ren_lock(a); ++ if (unlikely(err)) ++ goto out_children; + -+static inline void au_hin_ctl(struct au_hinode *hinode __maybe_unused, -+ int do_set __maybe_unused) -+{ -+ /* nothing */ -+} ++ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE)) { ++ err = au_may_ren(a); ++ if (unlikely(err)) ++ goto out_hdir; ++ } + -+static inline void au_reset_hinotify(struct inode *inode __maybe_unused, -+ unsigned int flags __maybe_unused) -+{ -+ /* nothing */ -+} ++ /* store timestamps to be revertible */ ++ au_ren_dt(a); + -+static inline int au_hinotify_init(void) -+{ -+ return 0; -+} ++ /* here we go */ ++ err = do_rename(a); ++ if (unlikely(err)) ++ goto out_dt; + -+#define au_hinotify_fin() do {} while (0) ++ /* update dir attributes */ ++ au_ren_refresh_dir(a); + -+static inline -+void au_hin_init(struct au_hinode *hinode __maybe_unused, -+ struct au_hinotify *val __maybe_unused) -+{ -+ /* empty */ -+} -+#endif /* CONFIG_AUFS_HINOTIFY */ ++ /* dput/iput all lower dentries */ ++ au_ren_refresh(a); + -+static inline void au_hin_suspend(struct au_hinode *hdir) -+{ -+ au_hin_ctl(hdir, /*do_set*/0); ++ goto out_hdir; /* success */ ++ ++ out_dt: ++ au_ren_rev_dt(err, a); ++ out_hdir: ++ au_ren_unlock(a); ++ out_children: ++ au_nhash_wh_free(&a->whlist); ++ out_unlock: ++ if (unlikely(err && au_ftest_ren(a->flags, ISDIR))) { ++ au_update_dbstart(a->dst_dentry); ++ d_drop(a->dst_dentry); ++ } ++ if (!err) ++ d_move(a->src_dentry, a->dst_dentry); ++ if (au_ftest_ren(a->flags, ISSAMEDIR)) ++ di_write_unlock(a->dst_parent); ++ else ++ di_write_unlock2(a->src_parent, a->dst_parent); ++ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry); ++ out_free: ++ iput(a->dst_inode); ++ if (a->thargs) ++ au_whtmp_rmdir_free(a->thargs); ++ kfree(a); ++ out: ++ return err; +} +diff -urN linux-2.6.30.org/fs/aufs/Kconfig linux-2.6.30/fs/aufs/Kconfig +--- linux-2.6.30.org/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/Kconfig 2009-07-21 08:54:18.000000000 +0200 +@@ -0,0 +1,132 @@ ++config AUFS_FS ++ tristate "Aufs (Advanced multi layered unification filesystem) support" ++ depends on EXPERIMENTAL ++ help ++ Aufs is a stackable unification filesystem such as Unionfs, ++ which unifies several directories and provides a merged single ++ directory. ++ In the early days, aufs was entirely re-designed and ++ re-implemented Unionfs Version 1.x series. Introducing many ++ original ideas, approaches and improvements, it becomes totally ++ different from Unionfs while keeping the basic features. ++ ++if AUFS_FS ++choice ++ prompt "Maximum number of branches" ++ default AUFS_BRANCH_MAX_127 ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_127 ++ bool "127" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_511 ++ bool "511" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_1023 ++ bool "1023" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_32767 ++ bool "32767" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++endchoice + -+static inline void au_hin_resume(struct au_hinode *hdir) -+{ -+ au_hin_ctl(hdir, /*do_set*/1); -+} ++config AUFS_HINOTIFY ++ bool "Use inotify to detect actions on a branch" ++ depends on INOTIFY ++ help ++ If you want to modify files on branches directly, eg. bypassing aufs, ++ and want aufs to detect the changes of them fully, then enable this ++ option and use 'udba=inotify' mount option. ++ It will have a negative impact to the performance. ++ See detail in aufs.5. + -+static inline void au_hin_imtx_lock(struct au_hinode *hdir) -+{ -+ mutex_lock(&hdir->hi_inode->i_mutex); -+ au_hin_suspend(hdir); -+} ++config AUFS_EXPORT ++ bool "NFS-exportable aufs" ++ depends on (AUFS_FS = y && EXPORTFS = y) || (AUFS_FS = m && EXPORTFS) ++ help ++ If you want to export your mounted aufs via NFS, then enable this ++ option. There are several requirements for this configuration. ++ See detail in aufs.5. + -+static inline void au_hin_imtx_lock_nested(struct au_hinode *hdir, -+ unsigned int sc __maybe_unused) -+{ -+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc); -+ au_hin_suspend(hdir); -+} ++config AUFS_SHWH ++ bool "Show whiteouts" ++ help ++ If you want to make the whiteouts in aufs visible, then enable ++ this option and specify 'shwh' mount option. Although it may ++ sounds like philosophy or something, but in technically it ++ simply shows the name of whiteout with keeping its behaviour. + -+static inline void au_hin_imtx_unlock(struct au_hinode *hdir) -+{ -+ au_hin_resume(hdir); -+ mutex_unlock(&hdir->hi_inode->i_mutex); -+} ++config AUFS_BR_RAMFS ++ bool "Ramfs (initramfs/rootfs) as an aufs branch" ++ help ++ If you want to use ramfs as an aufs branch fs, then enable this ++ option. Generally tmpfs is recommended. ++ Aufs prohibited them to be a branch fs by default, because ++ initramfs becomes unusable after switch_root or something ++ generally. If you sets initramfs as an aufs branch and boot your ++ system by switch_root, you will meet a problem easily since the ++ files in initramfs may be inaccessible. ++ Unless you are going to use ramfs as an aufs branch fs without ++ switch_root or something, leave it N. + -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_INODE_H__ */ -diff --git a/fs/aufs/ioctl.c b/fs/aufs/ioctl.c -new file mode 100644 -index 0000000..ff17d77 ---- /dev/null -+++ b/fs/aufs/ioctl.c -@@ -0,0 +1,54 @@ -+/* -+ * Copyright (C) 2005-2009 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ */ ++config AUFS_BR_FUSE ++ bool "Fuse fs as an aufs branch" ++ depends on FUSE_FS ++ select AUFS_POLL ++ help ++ If you want to use fuse-based userspace filesystem as an aufs ++ branch fs, then enable this option. ++ It implements the internal poll(2) operation which is ++ implemented by fuse only (curretnly). + -+#include -+#include "aufs.h" ++config AUFS_DEBUG ++ bool "Debug aufs" ++ help ++ Enable this to compile aufs internal debug code. ++ It will have a negative impact to the performance. + -+long aufs_ioctl_dir(struct file *file, unsigned int cmd, -+ unsigned long arg __maybe_unused) -+{ -+ long err; -+ struct super_block *sb; -+ struct au_sbinfo *sbinfo; ++config AUFS_MAGIC_SYSRQ ++ bool ++ depends on AUFS_DEBUG && MAGIC_SYSRQ ++ default y ++ help ++ Automatic configuration for internal use. ++ When aufs supports Magic SysRq, enabled automatically. + -+ err = -EACCES; -+ if (!capable(CAP_SYS_ADMIN)) -+ goto out; ++config AUFS_BDEV_LOOP ++ bool ++ depends on BLK_DEV_LOOP ++ default y ++ help ++ Automatic configuration for internal use. ++ Convert =[ym] into =y. + -+ err = 0; -+ sb = file->f_dentry->d_sb; -+ sbinfo = au_sbi(sb); -+ switch (cmd) { -+ case AUFS_CTL_PLINK_MAINT: -+ /* -+ * pseudo-link maintenance mode, -+ * cleared by aufs_release_dir() -+ */ -+ si_write_lock(sb); -+ if (!au_ftest_si(sbinfo, MAINTAIN_PLINK)) { -+ au_fset_si(sbinfo, MAINTAIN_PLINK); -+ au_fi(file)->fi_maintain_plink = 1; -+ } else -+ err = -EBUSY; -+ si_write_unlock(sb); -+ break; -+ case AUFS_CTL_PLINK_CLEAN: -+ if (au_opt_test(sbinfo->si_mntflags, PLINK)) { -+ aufs_write_lock(sb->s_root); -+ au_plink_put(sb); -+ aufs_write_unlock(sb->s_root); -+ } -+ break; -+ default: -+ err = -EINVAL; -+ } ++config AUFS_INO_T_64 ++ bool ++ depends on AUFS_EXPORT ++ depends on 64BIT && !(ALPHA || S390) ++ default y ++ help ++ Automatic configuration for internal use. ++ /* typedef unsigned long/int __kernel_ino_t */ ++ /* alpha and s390x are int */ + -+ out: -+ return err; -+} -diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c -new file mode 100644 -index 0000000..84a940f ---- /dev/null -+++ b/fs/aufs/loop.c -@@ -0,0 +1,46 @@ ++config AUFS_POLL ++ bool ++ help ++ Automatic configuration for internal use. ++endif +diff -urN linux-2.6.30.org/fs/aufs/loop.c linux-2.6.30/fs/aufs/loop.c +--- linux-2.6.30.org/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/loop.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,55 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -16203,6 +13950,15 @@ index 0000000..84a940f + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -16242,12 +13998,10 @@ index 0000000..84a940f + && '0' <= c && c <= '9' + && strncmp(current->comm, "loop", 4) == 0; +} -diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h -new file mode 100644 -index 0000000..1c0ee40 ---- /dev/null -+++ b/fs/aufs/loop.h -@@ -0,0 +1,41 @@ +diff -urN linux-2.6.30.org/fs/aufs/loop.h linux-2.6.30/fs/aufs/loop.h +--- linux-2.6.30.org/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/loop.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,51 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -16255,6 +14009,15 @@ index 0000000..1c0ee40 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -16266,7 +14029,8 @@ index 0000000..1c0ee40 + +#ifdef __KERNEL__ + -+#include ++struct dentry; ++struct super_block; + +#ifdef CONFIG_AUFS_BDEV_LOOP +/* loop.c */ @@ -16289,12 +14053,10 @@ index 0000000..1c0ee40 + +#endif /* __KERNEL__ */ +#endif /* __AUFS_LOOP_H__ */ -diff --git a/fs/aufs/magic.mk b/fs/aufs/magic.mk -new file mode 100644 -index 0000000..6258f57 ---- /dev/null -+++ b/fs/aufs/magic.mk -@@ -0,0 +1,58 @@ +diff -urN linux-2.6.30.org/fs/aufs/magic.mk linux-2.6.30/fs/aufs/magic.mk +--- linux-2.6.30.org/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/magic.mk 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,52 @@ + +# defined in ${srctree}/fs/fuse/inode.c +# tristate @@ -16347,18 +14109,37 @@ index 0000000..6258f57 +ifdef CONFIG_UBIFS_FS +ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905 +endif +diff -urN linux-2.6.30.org/fs/aufs/Makefile linux-2.6.30/fs/aufs/Makefile +--- linux-2.6.30.org/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/Makefile 2009-07-21 08:54:18.000000000 +0200 +@@ -0,0 +1,23 @@ + -+# defined in ${srctree}/fs/debugfs/inode.c -+# boolean -+ifdef CONFIG_DEBUG_FS -+ccflags-y += -DDEBUGFS_MAGIC=0x64626720 -+endif -diff --git a/fs/aufs/module.c b/fs/aufs/module.c -new file mode 100644 -index 0000000..c2219d3 ---- /dev/null -+++ b/fs/aufs/module.c -@@ -0,0 +1,164 @@ ++include ${src}/magic.mk ++-include ${src}/priv_def.mk ++ ++obj-$(CONFIG_AUFS_FS) += aufs.o ++aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \ ++ wkq.o vfsub.o dcsub.o \ ++ cpup.o whout.o plink.o wbr_policy.o \ ++ dinfo.o dentry.o \ ++ finfo.o file.o f_op.o \ ++ dir.o vdir.o \ ++ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \ ++ ioctl.o ++ ++# all are boolean ++aufs-$(CONFIG_SYSFS) += sysfs.o ++aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o ++aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o ++aufs-$(CONFIG_AUFS_HINOTIFY) += hinotify.o ++aufs-$(CONFIG_AUFS_EXPORT) += export.o ++aufs-$(CONFIG_AUFS_POLL) += poll.o ++aufs-$(CONFIG_AUFS_DEBUG) += debug.o ++aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o +diff -urN linux-2.6.30.org/fs/aufs/module.c linux-2.6.30/fs/aufs/module.c +--- linux-2.6.30.org/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/module.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,173 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -16366,6 +14147,15 @@ index 0000000..c2219d3 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -16429,7 +14219,7 @@ index 0000000..c2219d3 + */ +MODULE_LICENSE("GPL"); +/* MODULE_LICENSE("GPL v2"); */ -+MODULE_AUTHOR("Junjiro R. Okajima"); ++MODULE_AUTHOR("Junjiro R. Okajima "); +MODULE_DESCRIPTION(AUFS_NAME + " -- Advanced multi layered unification filesystem"); +MODULE_VERSION(AUFS_VERSION); @@ -16523,12 +14313,10 @@ index 0000000..c2219d3 + +module_init(aufs_init); +module_exit(aufs_exit); -diff --git a/fs/aufs/module.h b/fs/aufs/module.h -new file mode 100644 -index 0000000..85e397a ---- /dev/null -+++ b/fs/aufs/module.h -@@ -0,0 +1,66 @@ +diff -urN linux-2.6.30.org/fs/aufs/module.h linux-2.6.30/fs/aufs/module.h +--- linux-2.6.30.org/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/module.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,78 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -16536,6 +14324,15 @@ index 0000000..85e397a + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -16549,6 +14346,9 @@ index 0000000..85e397a + +#include + ++struct path; ++struct seq_file; ++ +/* module parameters */ +extern short aufs_nwkq; +extern int sysaufs_brs; @@ -16595,12 +14395,10 @@ index 0000000..85e397a + +#endif /* __KERNEL__ */ +#endif /* __AUFS_MODULE_H__ */ -diff --git a/fs/aufs/opts.c b/fs/aufs/opts.c -new file mode 100644 -index 0000000..630db0e ---- /dev/null -+++ b/fs/aufs/opts.c -@@ -0,0 +1,1478 @@ +diff -urN linux-2.6.30.org/fs/aufs/opts.c linux-2.6.30/fs/aufs/opts.c +--- linux-2.6.30.org/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/opts.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,1543 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -16608,12 +14406,23 @@ index 0000000..630db0e + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * mount options/flags + */ + ++#include ++#include +#include /* a distribution requires */ +#include +#include "aufs.h" @@ -16625,10 +14434,12 @@ index 0000000..630db0e + Opt_add, Opt_del, Opt_mod, Opt_reorder, Opt_append, Opt_prepend, + Opt_idel, Opt_imod, Opt_ireorder, + Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash, Opt_rendir, ++ Opt_rdblk_def, Opt_rdhash_def, + Opt_xino, Opt_zxino, Opt_noxino, + Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino, + Opt_trunc_xino_path, Opt_itrunc_xino, + Opt_trunc_xib, Opt_notrunc_xib, ++ Opt_shwh, Opt_noshwh, + Opt_plink, Opt_noplink, Opt_list_plink, + Opt_udba, + /* Opt_lock, Opt_unlock, */ @@ -16695,9 +14506,13 @@ index 0000000..630db0e + {Opt_ignore_silent, "coo=%s"}, + {Opt_ignore_silent, "nodlgt"}, + {Opt_ignore_silent, "nodirperm1"}, -+ {Opt_ignore_silent, "noshwh"}, + {Opt_ignore_silent, "clean_plink"}, + ++#ifdef CONFIG_AUFS_SHWH ++ {Opt_shwh, "shwh"}, ++#endif ++ {Opt_noshwh, "noshwh"}, ++ + {Opt_rendir, "rendir=%d"}, + + {Opt_refrof, "refrof"}, @@ -16716,7 +14531,9 @@ index 0000000..630db0e + + {Opt_rdcache, "rdcache=%d"}, + {Opt_rdblk, "rdblk=%d"}, ++ {Opt_rdblk_def, "rdblk=def"}, + {Opt_rdhash, "rdhash=%d"}, ++ {Opt_rdhash_def, "rdhash=def"}, + + {Opt_wbr_create, "create=%s"}, + {Opt_wbr_create, "create_policy=%s"}, @@ -16821,7 +14638,11 @@ index 0000000..630db0e + {-1, NULL} +}; + -+/* cf. linux/lib/parser.c */ ++/* ++ * cf. linux/lib/parser.c and cmdline.c ++ * gave up calling memparse() since it uses simple_strtoull() instead of ++ * strict_...(). ++ */ +static int au_match_ull(substring_t *s, unsigned long long *result) +{ + int err; @@ -16995,9 +14816,15 @@ index 0000000..630db0e + case Opt_rdblk: + AuDbg("rdblk %u\n", opt->rdblk); + break; ++ case Opt_rdblk_def: ++ AuDbg("rdblk_def\n"); ++ break; + case Opt_rdhash: + AuDbg("rdhash %u\n", opt->rdhash); + break; ++ case Opt_rdhash_def: ++ AuDbg("rdhash_def\n"); ++ break; + case Opt_xino: + u.xino = &opt->xino; + AuDbg("xino {%s %.*s}\n", @@ -17025,6 +14852,12 @@ index 0000000..630db0e + case Opt_notrunc_xib: + AuLabel(notrunc_xib); + break; ++ case Opt_shwh: ++ AuLabel(shwh); ++ break; ++ case Opt_noshwh: ++ AuLabel(noshwh); ++ break; + case Opt_plink: + AuLabel(plink); + break; @@ -17510,6 +15343,8 @@ index 0000000..630db0e + case Opt_noxino: + case Opt_trunc_xib: + case Opt_notrunc_xib: ++ case Opt_shwh: ++ case Opt_noshwh: + case Opt_plink: + case Opt_noplink: + case Opt_list_plink: @@ -17524,6 +15359,8 @@ index 0000000..630db0e + case Opt_sum: + case Opt_nosum: + case Opt_wsum: ++ case Opt_rdblk_def: ++ case Opt_rdhash_def: + err = 0; + opt->type = token; + break; @@ -17594,6 +15431,8 @@ index 0000000..630db0e + int err; + struct au_sbinfo *sbinfo; + ++ SiMustWriteLock(sb); ++ + err = 1; /* handled */ + sbinfo = au_sbi(sb); + if (sbinfo->si_wbr_create_ops->fin) { @@ -17634,6 +15473,8 @@ index 0000000..630db0e + int err; + struct au_sbinfo *sbinfo; + ++ SiMustWriteLock(sb); ++ + err = 1; /* handled */ + sbinfo = au_sbi(sb); + switch (opt->type) { @@ -17713,9 +15554,22 @@ index 0000000..630db0e + case Opt_rdblk: + sbinfo->si_rdblk = opt->rdblk; + break; ++ case Opt_rdblk_def: ++ sbinfo->si_rdblk = AUFS_RDBLK_DEF; ++ break; + case Opt_rdhash: + sbinfo->si_rdhash = opt->rdhash; + break; ++ case Opt_rdhash_def: ++ sbinfo->si_rdhash = AUFS_RDHASH_DEF; ++ break; ++ ++ case Opt_shwh: ++ au_opt_set(sbinfo->si_mntflags, SHWH); ++ break; ++ case Opt_noshwh: ++ au_opt_clr(sbinfo->si_mntflags, SHWH); ++ break; + + case Opt_trunc_xino: + au_opt_set(sbinfo->si_mntflags, TRUNC_XINO); @@ -17863,12 +15717,17 @@ index 0000000..630db0e + struct au_sbinfo *sbinfo; + struct au_hinode *hdir; + ++ SiMustAnyLock(sb); ++ + sbinfo = au_sbi(sb); + AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA)); + -+ if (unlikely(!(sb_flags & MS_RDONLY) -+ && !au_br_writable(au_sbr_perm(sb, 0)))) -+ AuWarn("first branch should be rw\n"); ++ if (!(sb_flags & MS_RDONLY)) { ++ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0)))) ++ AuWarn("first branch should be rw\n"); ++ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH))) ++ AuWarn("shwh should be used with ro\n"); ++ } + + if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HINOTIFY) + && !au_opt_test(sbinfo->si_mntflags, XINO)) @@ -17959,6 +15818,8 @@ index 0000000..630db0e + struct au_opt_xino *opt_xino, xino; + struct au_sbinfo *sbinfo; + ++ SiMustWriteLock(sb); ++ + err = 0; + opt_xino = NULL; + opt = opts->opt; @@ -18036,6 +15897,8 @@ index 0000000..630db0e + struct au_opt *opt; + struct au_sbinfo *sbinfo; + ++ SiMustWriteLock(sb); ++ + dir = sb->s_root->d_inode; + sbinfo = au_sbi(sb); + err = 0; @@ -18079,12 +15942,10 @@ index 0000000..630db0e +{ + return au_mntflags(sb) & AuOptMask_UDBA; +} -diff --git a/fs/aufs/opts.h b/fs/aufs/opts.h -new file mode 100644 -index 0000000..275702c ---- /dev/null -+++ b/fs/aufs/opts.h -@@ -0,0 +1,180 @@ +diff -urN linux-2.6.30.org/fs/aufs/opts.h linux-2.6.30/fs/aufs/opts.h +--- linux-2.6.30.org/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/opts.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,196 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -18092,6 +15953,15 @@ index 0000000..275702c + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -18103,10 +15973,12 @@ index 0000000..275702c + +#ifdef __KERNEL__ + -+#include -+#include ++#include +#include + ++struct file; ++struct super_block; ++ +/* ---------------------------------------------------------------------- */ + +/* mount flags */ @@ -18116,19 +15988,24 @@ index 0000000..275702c +#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */ +#define AuOpt_UDBA_REVAL (1 << 3) +#define AuOpt_UDBA_HINOTIFY (1 << 4) -+#define AuOpt_PLINK (1 << 5) /* pseudo-link */ -+#define AuOpt_DIRPERM1 (1 << 6) /* unimplemented */ -+#define AuOpt_REFROF (1 << 7) /* unimplemented */ -+#define AuOpt_ALWAYS_DIROPQ (1 << 8) /* policy to creating diropq */ -+#define AuOpt_SUM (1 << 9) /* summation for statfs(2) */ -+#define AuOpt_SUM_W (1 << 10) /* unimplemented */ -+#define AuOpt_WARN_PERM (1 << 11) /* warn when add-branch */ -+#define AuOpt_VERBOSE (1 << 12) /* busy inode when del-branch */ ++#define AuOpt_SHWH (1 << 5) /* show whiteout */ ++#define AuOpt_PLINK (1 << 6) /* pseudo-link */ ++#define AuOpt_DIRPERM1 (1 << 7) /* unimplemented */ ++#define AuOpt_REFROF (1 << 8) /* unimplemented */ ++#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */ ++#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */ ++#define AuOpt_SUM_W (1 << 11) /* unimplemented */ ++#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */ ++#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */ + +#ifndef CONFIG_AUFS_HINOTIFY +#undef AuOpt_UDBA_HINOTIFY +#define AuOpt_UDBA_HINOTIFY 0 +#endif ++#ifndef CONFIG_AUFS_SHWH ++#undef AuOpt_SHWH ++#define AuOpt_SHWH 0 ++#endif + +#define AuOpt_Def (AuOpt_XINO \ + | AuOpt_UDBA_REVAL \ @@ -18265,12 +16142,10 @@ index 0000000..275702c + +#endif /* __KERNEL__ */ +#endif /* __AUFS_OPTS_H__ */ -diff --git a/fs/aufs/plink.c b/fs/aufs/plink.c -new file mode 100644 -index 0000000..62d258f ---- /dev/null -+++ b/fs/aufs/plink.c -@@ -0,0 +1,335 @@ +diff -urN linux-2.6.30.org/fs/aufs/plink.c linux-2.6.30/fs/aufs/plink.c +--- linux-2.6.30.org/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/plink.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,354 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -18278,6 +16153,15 @@ index 0000000..62d258f + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -18293,6 +16177,9 @@ index 0000000..62d258f +void au_plink_block_maintain(struct super_block *sb) +{ + struct au_sbinfo *sbi = au_sbi(sb); ++ ++ SiMustAnyLock(sb); ++ + /* gave up wake_up_bit() */ + wait_event(sbi->si_plink_wq, !au_ftest_si(sbi, MAINTAIN_PLINK)); +} @@ -18311,6 +16198,8 @@ index 0000000..62d258f + struct list_head *plink_list; + struct pseudo_link *plink; + ++ SiMustAnyLock(sb); ++ + sbinfo = au_sbi(sb); + AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK)); + @@ -18331,6 +16220,7 @@ index 0000000..62d258f + struct pseudo_link *plink; + + sbinfo = au_sbi(inode->i_sb); ++ AuRwMustAnyLock(&sbinfo->si_rwsem); + AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK)); + + found = 0; @@ -18550,6 +16440,8 @@ index 0000000..62d258f + struct list_head *plink_list; + struct pseudo_link *plink, *tmp; + ++ SiMustWriteLock(sb); ++ + sbinfo = au_sbi(sb); + AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK)); + @@ -18570,6 +16462,8 @@ index 0000000..62d258f + aufs_bindex_t bstart, bend, bindex; + unsigned char do_put; + ++ SiMustWriteLock(sb); ++ + sbinfo = au_sbi(sb); + AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK)); + @@ -18593,25 +16487,83 @@ index 0000000..62d258f + } else + do_put_plink(plink, 1); + -+ if (do_put) { -+ for (bindex = bstart; bindex <= bend; bindex++) -+ if (au_h_iptr(inode, bindex)) { -+ do_put = 0; -+ break; -+ } -+ if (do_put) -+ do_put_plink(plink, 1); -+ } -+ ii_write_unlock(inode); -+ iput(inode); -+ } ++ if (do_put) { ++ for (bindex = bstart; bindex <= bend; bindex++) ++ if (au_h_iptr(inode, bindex)) { ++ do_put = 0; ++ break; ++ } ++ if (do_put) ++ do_put_plink(plink, 1); ++ } ++ ii_write_unlock(inode); ++ iput(inode); ++ } ++} +diff -urN linux-2.6.30.org/fs/aufs/poll.c linux-2.6.30/fs/aufs/poll.c +--- linux-2.6.30.org/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/poll.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (C) 2005-2009 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* ++ * poll operation ++ * There is only one filesystem which implements ->poll operation, currently. ++ */ ++ ++#include "aufs.h" ++ ++unsigned int aufs_poll(struct file *file, poll_table *wait) ++{ ++ unsigned int mask; ++ int err; ++ struct file *h_file; ++ struct dentry *dentry; ++ struct super_block *sb; ++ ++ /* We should pretend an error happened. */ ++ mask = POLLERR /* | POLLIN | POLLOUT */; ++ dentry = file->f_dentry; ++ sb = dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); ++ if (unlikely(err)) ++ goto out; ++ ++ /* it is not an error if h_file has no operation */ ++ mask = DEFAULT_POLLMASK; ++ h_file = au_h_fptr(file, au_fbstart(file)); ++ if (h_file->f_op && h_file->f_op->poll) ++ mask = h_file->f_op->poll(h_file, wait); ++ ++ di_read_unlock(dentry, AuLock_IR); ++ fi_read_unlock(file); ++ ++ out: ++ si_read_unlock(sb); ++ AuTraceErr((int)mask); ++ return mask; +} -diff --git a/fs/aufs/rwsem.h b/fs/aufs/rwsem.h -new file mode 100644 -index 0000000..d569243 ---- /dev/null -+++ b/fs/aufs/rwsem.h -@@ -0,0 +1,52 @@ +diff -urN linux-2.6.30.org/fs/aufs/rwsem.h linux-2.6.30/fs/aufs/rwsem.h +--- linux-2.6.30.org/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/rwsem.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,186 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -18619,6 +16571,15 @@ index 0000000..d569243 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -18630,33 +16591,158 @@ index 0000000..d569243 + +#ifdef __KERNEL__ + -+#include ++#include ++ ++struct au_rwsem { ++ struct rw_semaphore rwsem; ++#ifdef CONFIG_AUFS_DEBUG ++ /* just for debugging, not almighty counter */ ++ atomic_t rcnt, wcnt; ++#endif ++}; ++ ++#ifdef CONFIG_AUFS_DEBUG ++#define AuDbgCntInit(rw) do { \ ++ atomic_set(&(rw)->rcnt, 0); \ ++ atomic_set(&(rw)->wcnt, 0); \ ++ smp_mb(); /* atomic set */ \ ++} while (0) ++ ++#define AuDbgRcntInc(rw) atomic_inc_return(&(rw)->rcnt) ++#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0) ++#define AuDbgWcntInc(rw) WARN_ON(atomic_inc_return(&(rw)->wcnt) > 1) ++#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0) ++#else ++#define AuDbgCntInit(rw) do {} while (0) ++#define AuDbgRcntInc(rw) do {} while (0) ++#define AuDbgRcntDec(rw) do {} while (0) ++#define AuDbgWcntInc(rw) do {} while (0) ++#define AuDbgWcntDec(rw) do {} while (0) ++#endif /* CONFIG_AUFS_DEBUG */ ++ ++/* to debug easier, do not make them inlined functions */ ++#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list)) ++/* rwsem_is_locked() is unusable */ ++#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0) ++#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0) ++#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \ ++ && atomic_read(&(rw)->wcnt) <= 0) ++#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \ ++ || atomic_read(&(rw)->wcnt)) ++ ++static inline void au_rw_init(struct au_rwsem *rw) ++{ ++ AuDbgCntInit(rw); ++ init_rwsem(&rw->rwsem); ++} ++ ++static inline void au_rw_init_wlock(struct au_rwsem *rw) ++{ ++ au_rw_init(rw); ++ down_write(&rw->rwsem); ++ AuDbgWcntInc(rw); ++} ++ ++static inline void au_rw_init_wlock_nested(struct au_rwsem *rw, ++ unsigned int lsc) ++{ ++ au_rw_init(rw); ++ down_write_nested(&rw->rwsem, lsc); ++ AuDbgWcntInc(rw); ++} ++ ++static inline void au_rw_read_lock(struct au_rwsem *rw) ++{ ++ down_read(&rw->rwsem); ++ AuDbgRcntInc(rw); ++} ++ ++static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc) ++{ ++ down_read_nested(&rw->rwsem, lsc); ++ AuDbgRcntInc(rw); ++} ++ ++static inline void au_rw_read_unlock(struct au_rwsem *rw) ++{ ++ AuRwMustReadLock(rw); ++ AuDbgRcntDec(rw); ++ up_read(&rw->rwsem); ++} ++ ++static inline void au_rw_dgrade_lock(struct au_rwsem *rw) ++{ ++ AuRwMustWriteLock(rw); ++ AuDbgRcntInc(rw); ++ AuDbgWcntDec(rw); ++ downgrade_write(&rw->rwsem); ++} ++ ++static inline void au_rw_write_lock(struct au_rwsem *rw) ++{ ++ down_write(&rw->rwsem); ++ AuDbgWcntInc(rw); ++} ++ ++static inline void au_rw_write_lock_nested(struct au_rwsem *rw, ++ unsigned int lsc) ++{ ++ down_write_nested(&rw->rwsem, lsc); ++ AuDbgWcntInc(rw); ++} + -+#define au_rwsem_destroy(rw) AuDebugOn(rwsem_is_locked(rw)) -+#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->wait_list)) ++static inline void au_rw_write_unlock(struct au_rwsem *rw) ++{ ++ AuRwMustWriteLock(rw); ++ AuDbgWcntDec(rw); ++ up_write(&rw->rwsem); ++} ++ ++/* why is not _nested version defined */ ++static inline int au_rw_read_trylock(struct au_rwsem *rw) ++{ ++ int ret = down_read_trylock(&rw->rwsem); ++ if (ret) ++ AuDbgRcntInc(rw); ++ return ret; ++} ++ ++static inline int au_rw_write_trylock(struct au_rwsem *rw) ++{ ++ int ret = down_write_trylock(&rw->rwsem); ++ if (ret) ++ AuDbgWcntInc(rw); ++ return ret; ++} ++ ++#undef AuDbgCntInit ++#undef AuDbgRcntInc ++#undef AuDbgRcntDec ++#undef AuDbgWcntInc ++#undef AuDbgWcntDec + +#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \ +static inline void prefix##_read_lock(param) \ -+{ down_read(rwsem); } \ ++{ au_rw_read_lock(rwsem); } \ +static inline void prefix##_write_lock(param) \ -+{ down_write(rwsem); } \ ++{ au_rw_write_lock(rwsem); } \ +static inline int prefix##_read_trylock(param) \ -+{ return down_read_trylock(rwsem); } \ ++{ return au_rw_read_trylock(rwsem); } \ +static inline int prefix##_write_trylock(param) \ -+{ return down_write_trylock(rwsem); } ++{ return au_rw_write_trylock(rwsem); } +/* why is not _nested version defined */ +/* static inline void prefix##_read_trylock_nested(param, lsc) -+{ down_write_trylock_nested(rwsem, lsc)); } ++{ au_rw_read_trylock_nested(rwsem, lsc)); } +static inline void prefix##_write_trylock_nestd(param, lsc) -+{ down_write_trylock_nested(rwsem, lsc); } */ ++{ au_rw_write_trylock_nested(rwsem, lsc); } */ + +#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \ +static inline void prefix##_read_unlock(param) \ -+{ up_read(rwsem); } \ ++{ au_rw_read_unlock(rwsem); } \ +static inline void prefix##_write_unlock(param) \ -+{ up_write(rwsem); } \ ++{ au_rw_write_unlock(rwsem); } \ +static inline void prefix##_downgrade_lock(param) \ -+{ downgrade_write(rwsem); } ++{ au_rw_dgrade_lock(rwsem); } + +#define AuSimpleRwsemFuncs(prefix, param, rwsem) \ + AuSimpleLockRwsemFuncs(prefix, param, rwsem) \ @@ -18664,12 +16750,10 @@ index 0000000..d569243 + +#endif /* __KERNEL__ */ +#endif /* __AUFS_RWSEM_H__ */ -diff --git a/fs/aufs/sbinfo.c b/fs/aufs/sbinfo.c -new file mode 100644 -index 0000000..37e0649 ---- /dev/null -+++ b/fs/aufs/sbinfo.c -@@ -0,0 +1,194 @@ +diff -urN linux-2.6.30.org/fs/aufs/sbinfo.c linux-2.6.30/fs/aufs/sbinfo.c +--- linux-2.6.30.org/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/sbinfo.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,208 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -18677,6 +16761,15 @@ index 0000000..37e0649 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -18703,7 +16796,7 @@ index 0000000..37e0649 + kfree(sbinfo->si_branch); + mutex_destroy(&sbinfo->si_xib_mtx); + si_write_unlock(sb); -+ au_rwsem_destroy(&sbinfo->si_rwsem); ++ AuRwDestroy(&sbinfo->si_rwsem); + + kfree(sbinfo); +} @@ -18729,8 +16822,7 @@ index 0000000..37e0649 + goto out_br; + + au_nwt_init(&sbinfo->si_nowait); -+ init_rwsem(&sbinfo->si_rwsem); -+ down_write(&sbinfo->si_rwsem); ++ au_rw_init_wlock(&sbinfo->si_rwsem); + sbinfo->si_generation = 0; + sbinfo->au_si_status = 0; + sbinfo->si_bend = -1; @@ -18778,6 +16870,8 @@ index 0000000..37e0649 + int err, sz; + struct au_branch **brp; + ++ AuRwMustWriteLock(&sbinfo->si_rwsem); ++ + err = -ENOMEM; + sz = sizeof(*brp) * (sbinfo->si_bend + 1); + if (unlikely(!sz)) @@ -18797,6 +16891,8 @@ index 0000000..37e0649 +{ + unsigned int gen; + ++ SiMustWriteLock(sb); ++ + gen = ++au_sbi(sb)->si_generation; + au_update_digen(sb->s_root); + au_update_iigen(sb->s_root->d_inode); @@ -18810,6 +16906,8 @@ index 0000000..37e0649 + int i; + struct au_sbinfo *sbinfo; + ++ SiMustWriteLock(sb); ++ + sbinfo = au_sbi(sb); + for (i = 0; i <= AUFS_BRANCH_MAX; i++) { + br_id = ++sbinfo->si_last_br_id; @@ -18864,12 +16962,10 @@ index 0000000..37e0649 + di_write_unlock2(d1, d2); + si_read_unlock(d1->d_sb); +} -diff --git a/fs/aufs/spl.h b/fs/aufs/spl.h -new file mode 100644 -index 0000000..89ea7a2 ---- /dev/null -+++ b/fs/aufs/spl.h -@@ -0,0 +1,47 @@ +diff -urN linux-2.6.30.org/fs/aufs/spl.h linux-2.6.30/fs/aufs/spl.h +--- linux-2.6.30.org/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/spl.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,57 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -18877,6 +16973,15 @@ index 0000000..89ea7a2 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -18888,7 +16993,8 @@ index 0000000..89ea7a2 + +#ifdef __KERNEL__ + -+#include ++#include ++#include + +struct au_splhead { + spinlock_t spin; @@ -18917,12 +17023,10 @@ index 0000000..89ea7a2 + +#endif /* __KERNEL__ */ +#endif /* __AUFS_SPL_H__ */ -diff --git a/fs/aufs/super.c b/fs/aufs/super.c -new file mode 100644 -index 0000000..0f6ad13 ---- /dev/null -+++ b/fs/aufs/super.c -@@ -0,0 +1,859 @@ +diff -urN linux-2.6.30.org/fs/aufs/super.c linux-2.6.30/fs/aufs/super.c +--- linux-2.6.30.org/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/super.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,874 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -18930,6 +17034,15 @@ index 0000000..0f6ad13 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -18937,6 +17050,7 @@ index 0000000..0f6ad13 + */ + +#include ++#include +#include +#include +#include "aufs.h" @@ -19026,6 +17140,8 @@ index 0000000..0f6ad13 +{ + const char *pat; + ++ AuRwMustAnyLock(&sbinfo->si_rwsem); ++ + seq_printf(m, ",create="); + pat = au_optstr_wbr_create(v); + switch (v) { @@ -19068,6 +17184,8 @@ index 0000000..0f6ad13 + struct file *f; + struct dentry *d, *h_root; + ++ AuRwMustAnyLock(&sbinfo->si_rwsem); ++ + err = 0; + sb = mnt->mnt_sb; + f = au_sbi(sb)->si_xib; @@ -19138,6 +17256,7 @@ index 0000000..0f6ad13 + + AuBool(TRUNC_XINO, trunc_xino); + AuStr(UDBA, udba); ++ AuBool(SHWH, shwh); + AuBool(PLINK, plink); + /* AuBool(DIRPERM1, dirperm1); */ + /* AuBool(REFROF, refrof); */ @@ -19782,12 +17901,10 @@ index 0000000..0f6ad13 + /* no need to __module_get() and module_put(). */ + .owner = THIS_MODULE, +}; -diff --git a/fs/aufs/super.h b/fs/aufs/super.h -new file mode 100644 -index 0000000..9897dba ---- /dev/null -+++ b/fs/aufs/super.h -@@ -0,0 +1,351 @@ +diff -urN linux-2.6.30.org/fs/aufs/super.h linux-2.6.30/fs/aufs/super.h +--- linux-2.6.30.org/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/super.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,384 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -19795,6 +17912,15 @@ index 0000000..9897dba + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -19807,7 +17933,6 @@ index 0000000..9897dba +#ifdef __KERNEL__ + +#include -+#include +#include +#include "rwsem.h" +#include "spl.h" @@ -19838,26 +17963,12 @@ index 0000000..9897dba + unsigned long long mfsrr_watermark; +}; + -+/* sbinfo status flags */ -+/* -+ * set true when refresh_dirs() failed at remount time. -+ * then try refreshing dirs at access time again. -+ * if it is false, refreshing dirs at access time is unnecesary -+ */ -+#define AuSi_FAILED_REFRESH_DIRS 1 -+#define AuSi_MAINTAIN_PLINK (1 << 1) /* ioctl */ -+#define au_ftest_si(sbinfo, name) ((sbinfo)->au_si_status & AuSi_##name) -+#define au_fset_si(sbinfo, name) \ -+ { (sbinfo)->au_si_status |= AuSi_##name; } -+#define au_fclr_si(sbinfo, name) \ -+ { (sbinfo)->au_si_status &= ~AuSi_##name; } -+ +struct au_branch; +struct au_sbinfo { + /* nowait tasks in the system-wide workqueue */ + struct au_nowait_tasks si_nowait; + -+ struct rw_semaphore si_rwsem; ++ struct au_rwsem si_rwsem; + + /* branch management */ + unsigned int si_generation; @@ -19944,6 +18055,30 @@ index 0000000..9897dba + struct super_block *si_sb; +}; + ++/* sbinfo status flags */ ++/* ++ * set true when refresh_dirs() failed at remount time. ++ * then try refreshing dirs at access time again. ++ * if it is false, refreshing dirs at access time is unnecesary ++ */ ++#define AuSi_FAILED_REFRESH_DIRS 1 ++#define AuSi_MAINTAIN_PLINK (1 << 1) /* ioctl */ ++static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi, ++ unsigned int flag) ++{ ++ AuRwMustAnyLock(&sbi->si_rwsem); ++ return sbi->au_si_status & flag; ++} ++#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name) ++#define au_fset_si(sbinfo, name) do { \ ++ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \ ++ (sbinfo)->au_si_status |= AuSi_##name; \ ++} while (0) ++#define au_fclr_si(sbinfo, name) do { \ ++ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \ ++ (sbinfo)->au_si_status &= ~AuSi_##name; \ ++} while (0) ++ +/* ---------------------------------------------------------------------- */ + +/* policy to select one among writable branches */ @@ -20057,6 +18192,11 @@ index 0000000..9897dba + +static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo) +{ ++ /* ++ * This function is a dynamic '__init' fucntion actually, ++ * so the tiny check for si_rwsem is unnecessary. ++ */ ++ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ +#ifdef CONFIG_DEBUG_FS + sbinfo->si_dbgaufs = NULL; + sbinfo->si_dbgaufs_xib = NULL; @@ -20077,6 +18217,10 @@ index 0000000..9897dba + &au_sbi(sb)->si_rwsem); +AuSimpleUnlockRwsemFuncs(si, struct super_block *sb, &au_sbi(sb)->si_rwsem); + ++#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem) ++#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem) ++#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem) ++ +static inline void si_read_lock(struct super_block *sb, int flags) +{ + if (au_ftest_lock(flags, FLUSH)) @@ -20108,43 +18252,47 @@ index 0000000..9897dba + +static inline aufs_bindex_t au_sbend(struct super_block *sb) +{ ++ SiMustAnyLock(sb); + return au_sbi(sb)->si_bend; +} + +static inline unsigned int au_mntflags(struct super_block *sb) +{ ++ SiMustAnyLock(sb); + return au_sbi(sb)->si_mntflags; +} + +static inline unsigned int au_sigen(struct super_block *sb) +{ ++ SiMustAnyLock(sb); + return au_sbi(sb)->si_generation; +} + +static inline struct au_branch *au_sbr(struct super_block *sb, + aufs_bindex_t bindex) +{ ++ SiMustAnyLock(sb); + return au_sbi(sb)->si_branch[0 + bindex]; +} + +static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid) +{ ++ SiMustWriteLock(sb); + au_sbi(sb)->si_xino_brid = brid; +} + +static inline aufs_bindex_t au_xino_brid(struct super_block *sb) +{ ++ SiMustAnyLock(sb); + return au_sbi(sb)->si_xino_brid; +} + +#endif /* __KERNEL__ */ +#endif /* __AUFS_SUPER_H__ */ -diff --git a/fs/aufs/sysaufs.c b/fs/aufs/sysaufs.c -new file mode 100644 -index 0000000..623f2e6 ---- /dev/null -+++ b/fs/aufs/sysaufs.c -@@ -0,0 +1,95 @@ +diff -urN linux-2.6.30.org/fs/aufs/sysaufs.c linux-2.6.30/fs/aufs/sysaufs.c +--- linux-2.6.30.org/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/sysaufs.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,104 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -20152,6 +18300,15 @@ index 0000000..623f2e6 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -20240,12 +18397,10 @@ index 0000000..623f2e6 + out: + return err; +} -diff --git a/fs/aufs/sysaufs.h b/fs/aufs/sysaufs.h -new file mode 100644 -index 0000000..c1202fa ---- /dev/null -+++ b/fs/aufs/sysaufs.h -@@ -0,0 +1,109 @@ +diff -urN linux-2.6.30.org/fs/aufs/sysaufs.h linux-2.6.30/fs/aufs/sysaufs.h +--- linux-2.6.30.org/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/sysaufs.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,120 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -20253,6 +18408,15 @@ index 0000000..c1202fa + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -20264,11 +18428,13 @@ index 0000000..c1202fa + +#ifdef __KERNEL__ + -+#include +#include +#include +#include "module.h" + ++struct super_block; ++struct au_sbinfo; ++ +struct sysaufs_si_attr { + struct attribute attr; + int (*show)(struct seq_file *seq, struct super_block *sb); @@ -20355,12 +18521,10 @@ index 0000000..c1202fa + +#endif /* __KERNEL__ */ +#endif /* __SYSAUFS_H__ */ -diff --git a/fs/aufs/sysfs.c b/fs/aufs/sysfs.c -new file mode 100644 -index 0000000..496a2fc ---- /dev/null -+++ b/fs/aufs/sysfs.c -@@ -0,0 +1,198 @@ +diff -urN linux-2.6.30.org/fs/aufs/sysfs.c linux-2.6.30/fs/aufs/sysfs.c +--- linux-2.6.30.org/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/sysfs.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,210 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -20368,6 +18532,15 @@ index 0000000..496a2fc + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -20375,6 +18548,7 @@ index 0000000..496a2fc + */ + +#include ++#include +#include +#include +#include "aufs.h" @@ -20395,6 +18569,8 @@ index 0000000..496a2fc +{ + int err; + ++ SiMustAnyLock(sb); ++ + err = 0; + if (au_opt_test(au_mntflags(sb), XINO)) { + err = au_xino_path(seq, au_sbi(sb)->si_xib); @@ -20559,12 +18735,10 @@ index 0000000..496a2fc + AuWarn("failed %s under sysfs(%d)\n", br->br_name, err); + } +} -diff --git a/fs/aufs/sysrq.c b/fs/aufs/sysrq.c -new file mode 100644 -index 0000000..419ff2c ---- /dev/null -+++ b/fs/aufs/sysrq.c -@@ -0,0 +1,106 @@ +diff -urN linux-2.6.30.org/fs/aufs/sysrq.c linux-2.6.30/fs/aufs/sysrq.c +--- linux-2.6.30.org/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/sysrq.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,115 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -20572,6 +18746,15 @@ index 0000000..419ff2c + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -20671,12 +18854,10 @@ index 0000000..419ff2c + if (unlikely(err)) + AuErr("err %d (ignored)\n", err); +} -diff --git a/fs/aufs/vdir.c b/fs/aufs/vdir.c -new file mode 100644 -index 0000000..9c7b64a ---- /dev/null -+++ b/fs/aufs/vdir.c -@@ -0,0 +1,796 @@ +diff -urN linux-2.6.30.org/fs/aufs/vdir.c linux-2.6.30/fs/aufs/vdir.c +--- linux-2.6.30.org/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/vdir.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,882 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -20684,29 +18865,28 @@ index 0000000..9c7b64a + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * virtual or vertical directory + */ + ++#include +#include "aufs.h" + -+static unsigned int calc_size(int namelen) ++static unsigned int calc_size(int nlen) +{ -+ int sz; -+ const int mask = sizeof(ino_t) - 1; -+ + BUILD_BUG_ON(sizeof(ino_t) != sizeof(long)); -+ -+ sz = sizeof(struct au_vdir_de) + namelen; -+ if (sz & mask) { -+ sz += sizeof(ino_t); -+ sz &= ~mask; -+ } -+ -+ AuDebugOn(sz % sizeof(ino_t)); -+ return sz; ++ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t)); +} + +static int set_deblk_end(union au_vdir_deblk_p *p, @@ -20738,98 +18918,83 @@ index 0000000..9c7b64a + +/* + * the allocated memory has to be freed by -+ * au_nhash_wh_free() or void au_nhash_de_free(). ++ * au_nhash_wh_free() or au_nhash_de_free(). + */ -+struct au_nhash *au_nhash_alloc(struct super_block *sb, aufs_bindex_t bend, -+ gfp_t gfp) ++int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp) +{ -+ struct au_nhash *nhash; + struct hlist_head *head; -+ unsigned int u, n; -+ aufs_bindex_t bindex; -+ -+ nhash = kmalloc(sizeof(*nhash) * (bend + 1), gfp); -+ if (unlikely(!nhash)) -+ goto out; ++ unsigned int u; + -+ n = au_sbi(sb)->si_rdhash; -+ for (bindex = 0; bindex <= bend; bindex++) { -+ head = kmalloc(sizeof(*nhash->nh_head) * n, gfp); -+ if (unlikely(!head)) -+ goto out_free; -+ nhash[bindex].nh_num = n; -+ nhash[bindex].nh_head = head; -+ for (u = 0; u < n; u++) ++ head = kmalloc(sizeof(*nhash->nh_head) * num_hash, gfp); ++ if (head) { ++ nhash->nh_num = num_hash; ++ nhash->nh_head = head; ++ for (u = 0; u < num_hash; u++) + INIT_HLIST_HEAD(head++); ++ return 0; /* success */ + } -+ return nhash; /* success */ + -+ out_free: -+ for (bindex--; bindex >= 0; bindex--) -+ kfree(nhash[bindex].nh_head); -+ kfree(nhash); -+ out: -+ return ERR_PTR(-ENOMEM); ++ return -ENOMEM; +} + -+static void au_nhash_wh_do_free(struct au_nhash *whlist) ++static void nhash_count(struct hlist_head *head) ++{ ++#if 0 ++ unsigned long n; ++ struct hlist_node *pos; ++ ++ n = 0; ++ hlist_for_each(pos, head) ++ n++; ++ AuInfo("%lu\n", n); ++#endif ++} ++ ++static void au_nhash_wh_do_free(struct hlist_head *head) +{ -+ unsigned int u, n; -+ struct hlist_head *head; + struct au_vdir_wh *tpos; + struct hlist_node *pos, *node; + -+ n = whlist->nh_num; -+ head = whlist->nh_head; -+ for (u = 0; u < n; u++) { -+ hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) { -+ /* hlist_del(pos); */ -+ kfree(tpos); -+ } -+ head++; ++ hlist_for_each_entry_safe(tpos, pos, node, head, wh_hash) { ++ /* hlist_del(pos); */ ++ kfree(tpos); + } +} + -+void au_nhash_wh_free(struct au_nhash *whlist, aufs_bindex_t bend) ++static void au_nhash_de_do_free(struct hlist_head *head) +{ -+ aufs_bindex_t bindex; ++ struct au_vdir_dehstr *tpos; ++ struct hlist_node *pos, *node; + -+ for (bindex = 0; bindex <= bend; bindex++) { -+ au_nhash_wh_do_free(whlist + bindex); -+ kfree(whlist[bindex].nh_head); ++ hlist_for_each_entry_safe(tpos, pos, node, head, hash) { ++ /* hlist_del(pos); */ ++ au_cache_free_dehstr(tpos); + } -+ -+ kfree(whlist); +} + -+static void au_nhash_de_do_free(struct au_nhash *delist) ++static void au_nhash_do_free(struct au_nhash *nhash, ++ void (*free)(struct hlist_head *head)) +{ + unsigned int u, n; + struct hlist_head *head; -+ struct au_vdir_dehstr *tpos; -+ struct hlist_node *pos, *node; + -+ n = delist->nh_num; -+ head = delist->nh_head; ++ n = nhash->nh_num; ++ head = nhash->nh_head; + for (u = 0; u < n; u++) { -+ hlist_for_each_entry_safe(tpos, pos, node, head, hash) { -+ /* hlist_del(pos); */ -+ au_cache_free_dehstr(tpos); -+ } -+ head++; ++ nhash_count(head); ++ free(head++); + } ++ kfree(nhash->nh_head); +} + -+static void au_nhash_de_free(struct au_nhash *delist, aufs_bindex_t bend) ++void au_nhash_wh_free(struct au_nhash *whlist) +{ -+ aufs_bindex_t bindex; -+ -+ for (bindex = 0; bindex <= bend; bindex++) { -+ au_nhash_de_do_free(delist + bindex); -+ kfree(delist[bindex].nh_head); -+ } ++ au_nhash_do_free(whlist, au_nhash_wh_do_free); ++} + -+ kfree(delist); ++static void au_nhash_de_free(struct au_nhash *delist) ++{ ++ au_nhash_do_free(delist, au_nhash_de_do_free); +} + +/* ---------------------------------------------------------------------- */ @@ -20856,78 +19021,112 @@ index 0000000..9c7b64a +} + +static struct hlist_head *au_name_hash(struct au_nhash *nhash, -+ const unsigned char *name, ++ unsigned char *name, + unsigned int len) +{ -+ return nhash->nh_head + full_name_hash(name, len) % nhash->nh_num; ++ unsigned int v; ++ /* const unsigned int magic_bit = 12; */ ++ ++ v = 0; ++ while (len--) ++ v += *name++; ++ /* v = hash_long(v, magic_bit); */ ++ v %= nhash->nh_num; ++ return nhash->nh_head + v; ++} ++ ++static int au_nhash_test_name(struct au_vdir_destr *str, const char *name, ++ int nlen) ++{ ++ return str->len == nlen && !memcmp(str->name, name, nlen); +} + +/* returns found or not */ -+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int namelen) ++int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen) +{ + struct hlist_head *head; + struct au_vdir_wh *tpos; + struct hlist_node *pos; + struct au_vdir_destr *str; + -+ head = au_name_hash(whlist, name, namelen); ++ head = au_name_hash(whlist, name, nlen); + hlist_for_each_entry(tpos, pos, head, wh_hash) { + str = &tpos->wh_str; + AuDbg("%.*s\n", str->len, str->name); -+ if (str->len == namelen && !memcmp(str->name, name, namelen)) ++ if (au_nhash_test_name(str, name, nlen)) ++ return 1; ++ } ++ return 0; ++} ++ ++/* returns found(true) or not */ ++static int test_known(struct au_nhash *delist, char *name, int nlen) ++{ ++ struct hlist_head *head; ++ struct au_vdir_dehstr *tpos; ++ struct hlist_node *pos; ++ struct au_vdir_destr *str; ++ ++ head = au_name_hash(delist, name, nlen); ++ hlist_for_each_entry(tpos, pos, head, hash) { ++ str = tpos->str; ++ AuDbg("%.*s\n", str->len, str->name); ++ if (au_nhash_test_name(str, name, nlen)) + return 1; + } + return 0; +} + -+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int namelen, -+ aufs_bindex_t bindex) ++static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino, ++ unsigned char d_type) ++{ ++#ifdef CONFIG_AUFS_SHWH ++ wh->wh_ino = ino; ++ wh->wh_type = d_type; ++#endif ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino, ++ unsigned int d_type, aufs_bindex_t bindex, ++ unsigned char shwh) +{ + int err; + struct au_vdir_destr *str; + struct au_vdir_wh *wh; + ++ AuDbg("%.*s\n", nlen, name); + err = -ENOMEM; -+ wh = kmalloc(sizeof(*wh) + namelen, GFP_NOFS); ++ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS); + if (unlikely(!wh)) + goto out; + + err = 0; + wh->wh_bindex = bindex; ++ if (shwh) ++ au_shwh_init_wh(wh, ino, d_type); + str = &wh->wh_str; -+ str->len = namelen; -+ memcpy(str->name, name, namelen); -+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, namelen)); ++ str->len = nlen; ++ memcpy(str->name, name, nlen); ++ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen)); + /* smp_mb(); */ + + out: + return err; +} + -+/* ---------------------------------------------------------------------- */ -+ -+void au_vdir_free(struct au_vdir *vdir) -+{ -+ unsigned char **deblk; -+ -+ deblk = vdir->vd_deblk; -+ while (vdir->vd_nblk--) -+ kfree(*deblk++); -+ kfree(vdir->vd_deblk); -+ au_cache_free_vdir(vdir); -+} -+ +static int append_deblk(struct au_vdir *vdir) +{ + int err; -+ unsigned long sz, ul; ++ unsigned long ul; + const unsigned int deblk_sz = vdir->vd_deblk_sz; + union au_vdir_deblk_p p, deblk_end; + unsigned char **o; + + err = -ENOMEM; -+ sz = sizeof(*o) * vdir->vd_nblk; -+ o = au_kzrealloc(vdir->vd_deblk, sz, sz + sizeof(*o), GFP_NOFS); ++ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1), ++ GFP_NOFS); + if (unlikely(!o)) + goto out; + @@ -20946,11 +19145,75 @@ index 0000000..9c7b64a + return err; +} + ++static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino, ++ unsigned int d_type, struct au_nhash *delist) ++{ ++ int err; ++ unsigned int sz; ++ const unsigned int deblk_sz = vdir->vd_deblk_sz; ++ union au_vdir_deblk_p p, *room, deblk_end; ++ struct au_vdir_dehstr *dehstr; ++ ++ p.deblk = last_deblk(vdir); ++ deblk_end.deblk = p.deblk + deblk_sz; ++ room = &vdir->vd_last.p; ++ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk ++ || !is_deblk_end(room, &deblk_end)); ++ ++ sz = calc_size(nlen); ++ if (unlikely(sz > deblk_end.deblk - room->deblk)) { ++ err = append_deblk(vdir); ++ if (unlikely(err)) ++ goto out; ++ ++ p.deblk = last_deblk(vdir); ++ deblk_end.deblk = p.deblk + deblk_sz; ++ /* smp_mb(); */ ++ AuDebugOn(room->deblk != p.deblk); ++ } ++ ++ err = -ENOMEM; ++ dehstr = au_cache_alloc_dehstr(); ++ if (unlikely(!dehstr)) ++ goto out; ++ ++ dehstr->str = &room->de->de_str; ++ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen)); ++ room->de->de_ino = ino; ++ room->de->de_type = d_type; ++ room->de->de_str.len = nlen; ++ memcpy(room->de->de_str.name, name, nlen); ++ ++ err = 0; ++ room->deblk += sz; ++ if (unlikely(set_deblk_end(room, &deblk_end))) ++ err = append_deblk(vdir); ++ /* smp_mb(); */ ++ ++ out: ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++void au_vdir_free(struct au_vdir *vdir) ++{ ++ unsigned char **deblk; ++ ++ deblk = vdir->vd_deblk; ++ while (vdir->vd_nblk--) ++ kfree(*deblk++); ++ kfree(vdir->vd_deblk); ++ au_cache_free_vdir(vdir); ++} ++ +static struct au_vdir *alloc_vdir(struct super_block *sb) +{ + struct au_vdir *vdir; + int err; + ++ SiMustAnyLock(sb); ++ + err = -ENOMEM; + vdir = au_cache_alloc_vdir(); + if (unlikely(!vdir)) @@ -21001,76 +19264,6 @@ index 0000000..9c7b64a + +/* ---------------------------------------------------------------------- */ + -+/* returns found(true) or not */ -+static int test_known(struct au_nhash *delist, char *name, int namelen) -+{ -+ struct hlist_head *head; -+ struct au_vdir_dehstr *tpos; -+ struct hlist_node *pos; -+ struct au_vdir_destr *str; -+ -+ head = au_name_hash(delist, name, namelen); -+ hlist_for_each_entry(tpos, pos, head, hash) { -+ str = tpos->str; -+ AuDbg("%.*s\n", str->len, str->name); -+ if (str->len == namelen && !memcmp(str->name, name, namelen)) -+ return 1; -+ } -+ return 0; -+ -+} -+ -+static int append_de(struct au_vdir *vdir, char *name, int namelen, ino_t ino, -+ unsigned int d_type, struct au_nhash *delist) -+{ -+ int err; -+ unsigned int sz; -+ const unsigned int deblk_sz = vdir->vd_deblk_sz; -+ union au_vdir_deblk_p p, *room, deblk_end; -+ struct au_vdir_dehstr *dehstr; -+ -+ p.deblk = last_deblk(vdir); -+ deblk_end.deblk = p.deblk + deblk_sz; -+ room = &vdir->vd_last.p; -+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk -+ || !is_deblk_end(room, &deblk_end)); -+ -+ sz = calc_size(namelen); -+ if (unlikely(sz > deblk_end.deblk - room->deblk)) { -+ err = append_deblk(vdir); -+ if (unlikely(err)) -+ goto out; -+ -+ p.deblk = last_deblk(vdir); -+ deblk_end.deblk = p.deblk + deblk_sz; -+ /* smp_mb(); */ -+ AuDebugOn(room->deblk != p.deblk); -+ } -+ -+ err = -ENOMEM; -+ dehstr = au_cache_alloc_dehstr(); -+ if (unlikely(!dehstr)) -+ goto out; -+ -+ dehstr->str = &room->de->de_str; -+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, namelen)); -+ room->de->de_ino = ino; -+ room->de->de_type = d_type; -+ room->de->de_str.len = namelen; -+ memcpy(room->de->de_str.name, name, namelen); -+ -+ err = 0; -+ room->deblk += sz; -+ if (unlikely(set_deblk_end(room, &deblk_end))) -+ err = append_deblk(vdir); -+ /* smp_mb(); */ -+ -+ out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ +static int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, + unsigned int d_type, ino_t *ino) +{ @@ -21104,63 +19297,76 @@ index 0000000..9c7b64a + return err; +} + ++static int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, ++ unsigned int d_type, ino_t *ino) ++{ ++#ifdef CONFIG_AUFS_SHWH ++ return au_ino(sb, bindex, h_ino, d_type, ino); ++#else ++ return 0; ++#endif ++} ++ +#define AuFillVdir_CALLED 1 +#define AuFillVdir_WHABLE (1 << 1) ++#define AuFillVdir_SHWH (1 << 2) +#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name) +#define au_fset_fillvdir(flags, name) { (flags) |= AuFillVdir_##name; } +#define au_fclr_fillvdir(flags, name) { (flags) &= ~AuFillVdir_##name; } + ++#ifndef CONFIG_AUFS_SHWH ++#undef AuFillVdir_SHWH ++#define AuFillVdir_SHWH 0 ++#endif ++ +struct fillvdir_arg { + struct file *file; + struct au_vdir *vdir; -+ struct au_nhash *delist; -+ struct au_nhash *whlist; ++ struct au_nhash delist; ++ struct au_nhash whlist; + aufs_bindex_t bindex; + unsigned int flags; + int err; +}; + -+static int fillvdir(void *__arg, const char *__name, int namelen, ++static int fillvdir(void *__arg, const char *__name, int nlen, + loff_t offset __maybe_unused, u64 h_ino, + unsigned int d_type) +{ + struct fillvdir_arg *arg = __arg; + char *name = (void *)__name; + struct super_block *sb; -+ struct au_nhash *delist, *whlist; + ino_t ino; -+ aufs_bindex_t bindex, bend; ++ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH); + -+ bend = arg->bindex; + arg->err = 0; ++ sb = arg->file->f_dentry->d_sb; + au_fset_fillvdir(arg->flags, CALLED); + /* smp_mb(); */ -+ if (namelen <= AUFS_WH_PFX_LEN ++ if (nlen <= AUFS_WH_PFX_LEN + || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) { -+ delist = arg->delist; -+ whlist = arg->whlist; -+ for (bindex = 0; bindex < bend; bindex++) -+ if (test_known(delist++, name, namelen) -+ || au_nhash_test_known_wh(whlist + bindex, name, -+ namelen)) -+ goto out; /* already exists or whiteouted */ ++ if (test_known(&arg->delist, name, nlen) ++ || au_nhash_test_known_wh(&arg->whlist, name, nlen)) ++ goto out; /* already exists or whiteouted */ + + sb = arg->file->f_dentry->d_sb; -+ arg->err = au_ino(sb, bend, h_ino, d_type, &ino); ++ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino); + if (!arg->err) -+ arg->err = append_de(arg->vdir, name, namelen, ino, -+ d_type, arg->delist + bend); ++ arg->err = append_de(arg->vdir, name, nlen, ino, ++ d_type, &arg->delist); + } else if (au_ftest_fillvdir(arg->flags, WHABLE)) { + name += AUFS_WH_PFX_LEN; -+ namelen -= AUFS_WH_PFX_LEN; -+ whlist = arg->whlist; -+ for (bindex = 0; bindex < bend; bindex++) -+ if (au_nhash_test_known_wh(whlist++, name, namelen)) -+ goto out; /* already whiteouted */ ++ nlen -= AUFS_WH_PFX_LEN; ++ if (au_nhash_test_known_wh(&arg->whlist, name, nlen)) ++ goto out; /* already whiteouted */ + ++ if (shwh) ++ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type, ++ &ino); + if (!arg->err) + arg->err = au_nhash_append_wh -+ (arg->whlist + bend, name, namelen, bend); ++ (&arg->whlist, name, nlen, ino, d_type, ++ arg->bindex, shwh); + } + + out: @@ -21171,28 +19377,83 @@ index 0000000..9c7b64a + return arg->err; +} + ++static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir, ++ struct au_nhash *whlist, struct au_nhash *delist) ++{ ++#ifdef CONFIG_AUFS_SHWH ++ int err; ++ unsigned int nh, u; ++ struct hlist_head *head; ++ struct au_vdir_wh *tpos; ++ struct hlist_node *pos, *n; ++ char *p, *o; ++ struct au_vdir_destr *destr; ++ ++ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH)); ++ ++ err = -ENOMEM; ++ o = p = __getname(); ++ if (unlikely(!p)) ++ goto out; ++ ++ err = 0; ++ nh = whlist->nh_num; ++ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN); ++ p += AUFS_WH_PFX_LEN; ++ for (u = 0; u < nh; u++) { ++ head = whlist->nh_head + u; ++ hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) { ++ destr = &tpos->wh_str; ++ memcpy(p, destr->name, destr->len); ++ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN, ++ tpos->wh_ino, tpos->wh_type, delist); ++ if (unlikely(err)) ++ break; ++ } ++ } ++ ++ __putname(o); ++ ++ out: ++ AuTraceErr(err); ++ return err; ++#else ++ return 0; ++#endif ++} ++ +static int au_do_read_vdir(struct fillvdir_arg *arg) +{ + int err; ++ unsigned int rdhash; + loff_t offset; -+ aufs_bindex_t bend, bindex; ++ aufs_bindex_t bend, bindex, bstart; ++ unsigned char shwh; + struct file *hf, *file; + struct super_block *sb; + -+ err = -ENOMEM; + file = arg->file; + sb = file->f_dentry->d_sb; -+ bend = au_fbend(file); -+ arg->delist = au_nhash_alloc(sb, bend, GFP_NOFS); -+ if (unlikely(!arg->delist)) ++ SiMustAnyLock(sb); ++ ++ rdhash = au_sbi(sb)->si_rdhash; ++ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS); ++ if (unlikely(err)) + goto out; -+ arg->whlist = au_nhash_alloc(sb, bend, GFP_NOFS); -+ if (unlikely(!arg->whlist)) ++ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS); ++ if (unlikely(err)) + goto out_delist; + + err = 0; + arg->flags = 0; -+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) { ++ shwh = 0; ++ if (au_opt_test(au_mntflags(sb), SHWH)) { ++ shwh = 1; ++ au_fset_fillvdir(arg->flags, SHWH); ++ } ++ bstart = au_fbstart(file); ++ bend = au_fbend(file); ++ for (bindex = bstart; !err && bindex <= bend; bindex++) { + hf = au_h_fptr(file, bindex); + if (!hf) + continue; @@ -21204,8 +19465,9 @@ index 0000000..9c7b64a + + arg->bindex = bindex; + au_fclr_fillvdir(arg->flags, WHABLE); -+ if (bindex != bend -+ && au_br_whable(au_sbr_perm(sb, bindex))) ++ if (shwh ++ || (bindex != bend ++ && au_br_whable(au_sbr_perm(sb, bindex)))) + au_fset_fillvdir(arg->flags, WHABLE); + do { + arg->err = 0; @@ -21216,10 +19478,14 @@ index 0000000..9c7b64a + err = arg->err; + } while (!err && au_ftest_fillvdir(arg->flags, CALLED)); + } -+ au_nhash_wh_free(arg->whlist, bend); ++ ++ if (!err && shwh) ++ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist); ++ ++ au_nhash_wh_free(&arg->whlist); + + out_delist: -+ au_nhash_de_free(arg->delist, bend); ++ au_nhash_de_free(&arg->delist); + out: + return err; +} @@ -21236,6 +19502,8 @@ index 0000000..9c7b64a + err = 0; + inode = file->f_dentry->d_inode; + IMustLock(inode); ++ SiMustAnyLock(inode->i_sb); ++ + allocated = NULL; + do_read = 0; + expire = au_sbi(inode->i_sb)->si_rdcache; @@ -21289,8 +19557,8 @@ index 0000000..9c7b64a + if (tgt->vd_nblk < src->vd_nblk) { + unsigned char **p; + -+ p = au_kzrealloc(tgt->vd_deblk, sizeof(*p) * tgt->vd_nblk, -+ sizeof(*p) * src->vd_nblk, GFP_NOFS); ++ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk, ++ GFP_NOFS); + if (unlikely(!p)) + goto out; + tgt->vd_deblk = p; @@ -21306,10 +19574,9 @@ index 0000000..9c7b64a + + n = src->vd_nblk; + for (ul = 1; ul < n; ul++) { -+ tgt->vd_deblk[ul] = kmalloc(deblk_sz, GFP_NOFS); -+ if (tgt->vd_deblk[ul]) -+ memcpy(tgt->vd_deblk[ul], src->vd_deblk[ul], deblk_sz); -+ else ++ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz, ++ GFP_NOFS); ++ if (unlikely(!tgt->vd_deblk[ul])) + goto out; + } + /* smp_mb(); */ @@ -21473,12 +19740,10 @@ index 0000000..9c7b64a + /* smp_mb(); */ + return 0; +} -diff --git a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c -new file mode 100644 -index 0000000..f58d07d ---- /dev/null -+++ b/fs/aufs/vfsub.c -@@ -0,0 +1,724 @@ +diff -urN linux-2.6.30.org/fs/aufs/vfsub.c linux-2.6.30/fs/aufs/vfsub.c +--- linux-2.6.30.org/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/vfsub.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,736 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -21486,12 +19751,24 @@ index 0000000..f58d07d + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * sub-routines for VFS + */ + ++#include ++#include ++#include +#include +#include "aufs.h" + @@ -22203,12 +20480,10 @@ index 0000000..f58d07d + + return err; +} -diff --git a/fs/aufs/vfsub.h b/fs/aufs/vfsub.h -new file mode 100644 -index 0000000..d0172c7 ---- /dev/null -+++ b/fs/aufs/vfsub.h -@@ -0,0 +1,165 @@ +diff -urN linux-2.6.30.org/fs/aufs/vfsub.h linux-2.6.30/fs/aufs/vfsub.h +--- linux-2.6.30.org/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/vfsub.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,172 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -22216,6 +20491,15 @@ index 0000000..d0172c7 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -22229,9 +20513,6 @@ index 0000000..d0172c7 + +#include +#include -+#include -+#include -+#include + +/* ---------------------------------------------------------------------- */ + @@ -22242,6 +20523,7 @@ index 0000000..d0172c7 + AuLsc_I_Begin = I_MUTEX_QUOTA, /* 4 */ + AuLsc_I_PARENT, /* lower inode, parent first */ + AuLsc_I_PARENT2, /* copyup dirs */ ++ AuLsc_I_PARENT3, /* copyup wh */ + AuLsc_I_CHILD, + AuLsc_I_CHILD2, + AuLsc_I_End @@ -22374,12 +20656,10 @@ index 0000000..d0172c7 + +#endif /* __KERNEL__ */ +#endif /* __AUFS_VFSUB_H__ */ -diff --git a/fs/aufs/wbr_policy.c b/fs/aufs/wbr_policy.c -new file mode 100644 -index 0000000..436ebfc ---- /dev/null -+++ b/fs/aufs/wbr_policy.c -@@ -0,0 +1,628 @@ +diff -urN linux-2.6.30.org/fs/aufs/wbr_policy.c linux-2.6.30/fs/aufs/wbr_policy.c +--- linux-2.6.30.org/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/wbr_policy.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,641 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -22387,6 +20667,15 @@ index 0000000..436ebfc + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -22681,6 +20970,7 @@ index 0000000..436ebfc + + err = au_wbr_bu(sb, au_sbend(sb)); + atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */ ++ /* smp_mb(); */ + + AuDbg("b%d\n", err); + return err; @@ -22747,6 +21037,7 @@ index 0000000..436ebfc + bavail = 0; + sb = dentry->d_sb; + mfs = &au_sbi(sb)->si_wbr_mfs; ++ MtxMustLock(&mfs->mfs_lock); + mfs->mfs_bindex = -EROFS; + mfs->mfsrr_bytes = 0; + bend = au_sbend(sb); @@ -22833,8 +21124,10 @@ index 0000000..436ebfc + err = au_wbr_create_mfs(dentry, isdir); + if (err >= 0) { + mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs; ++ mutex_lock(&mfs->mfs_lock); + if (mfs->mfsrr_bytes < mfs->mfsrr_watermark) + err = au_wbr_create_rr(dentry, isdir); ++ mutex_unlock(&mfs->mfs_lock); + } + + AuDbg("b%d\n", err); @@ -23008,12 +21301,10 @@ index 0000000..436ebfc + .fin = au_wbr_create_fin_mfs + } +}; -diff --git a/fs/aufs/whout.c b/fs/aufs/whout.c -new file mode 100644 -index 0000000..a45e126 ---- /dev/null -+++ b/fs/aufs/whout.c -@@ -0,0 +1,1030 @@ +diff -urN linux-2.6.30.org/fs/aufs/whout.c linux-2.6.30/fs/aufs/whout.c +--- linux-2.6.30.org/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/whout.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,1044 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -23021,6 +21312,15 @@ index 0000000..a45e126 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -23876,7 +22176,7 @@ index 0000000..a45e126 +struct del_wh_children_args { + int *errp; + struct dentry *h_dentry; -+ struct au_nhash *whlist; ++ struct au_nhash whlist; + aufs_bindex_t bindex; + struct au_branch *br; +}; @@ -23884,7 +22184,7 @@ index 0000000..a45e126 +static void call_del_wh_children(void *args) +{ + struct del_wh_children_args *a = args; -+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br); ++ *a->errp = del_wh_children(a->h_dentry, &a->whlist, a->bindex, a->br); +} + +/* ---------------------------------------------------------------------- */ @@ -23892,29 +22192,34 @@ index 0000000..a45e126 +struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp) +{ + struct au_whtmp_rmdir *whtmp; ++ int err; ++ ++ SiMustAnyLock(sb); + + whtmp = kmalloc(sizeof(*whtmp), gfp); -+ if (unlikely(!whtmp)) ++ if (unlikely(!whtmp)) { ++ whtmp = ERR_PTR(-ENOMEM); + goto out; ++ } + + whtmp->dir = NULL; + whtmp->wh_dentry = NULL; -+ whtmp->whlist = au_nhash_alloc(sb, /*bend*/0, gfp); -+ if (unlikely(!whtmp->whlist)) -+ goto out_whtmp; -+ return whtmp; /* success */ ++ err = au_nhash_alloc(&whtmp->whlist, au_sbi(sb)->si_rdhash, gfp); ++ if (!err) ++ return whtmp; /* success */ + -+ out_whtmp: + kfree(whtmp); ++ whtmp = ERR_PTR(err); ++ + out: -+ return ERR_PTR(-ENOMEM); ++ return whtmp; +} + +void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp) +{ + dput(whtmp->wh_dentry); + iput(whtmp->dir); -+ au_nhash_wh_free(whtmp->whlist, /*bend*/0); ++ au_nhash_wh_free(&whtmp->whlist); + kfree(whtmp); +} + @@ -23948,7 +22253,7 @@ index 0000000..a45e126 + struct del_wh_children_args args = { + .errp = &err, + .h_dentry = wh_dentry, -+ .whlist = whlist, ++ .whlist = *whlist, + .bindex = bindex, + .br = br + }; @@ -24009,7 +22314,7 @@ index 0000000..a45e126 + err = mnt_want_write(br->br_mnt); + if (!err) { + err = au_whtmp_rmdir(a->dir, a->bindex, a->wh_dentry, -+ a->whlist); ++ &a->whlist); + mnt_drop_write(br->br_mnt); + } + } @@ -24044,12 +22349,10 @@ index 0000000..a45e126 + au_whtmp_rmdir_free(args); + } +} -diff --git a/fs/aufs/whout.h b/fs/aufs/whout.h -new file mode 100644 -index 0000000..3247a4f ---- /dev/null -+++ b/fs/aufs/whout.h -@@ -0,0 +1,79 @@ +diff -urN linux-2.6.30.org/fs/aufs/whout.h linux-2.6.30/fs/aufs/whout.h +--- linux-2.6.30.org/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/whout.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,87 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -24057,6 +22360,15 @@ index 0000000..3247a4f + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -24068,7 +22380,6 @@ index 0000000..3247a4f + +#ifdef __KERNEL__ + -+#include +#include +#include "dir.h" + @@ -24104,7 +22415,7 @@ index 0000000..3247a4f + struct inode *dir; + aufs_bindex_t bindex; + struct dentry *wh_dentry; -+ struct au_nhash *whlist; ++ struct au_nhash whlist; +}; + +struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp); @@ -24129,12 +22440,10 @@ index 0000000..3247a4f + +#endif /* __KERNEL__ */ +#endif /* __AUFS_WHOUT_H__ */ -diff --git a/fs/aufs/wkq.c b/fs/aufs/wkq.c -new file mode 100644 -index 0000000..41e30be ---- /dev/null -+++ b/fs/aufs/wkq.c -@@ -0,0 +1,249 @@ +diff -urN linux-2.6.30.org/fs/aufs/wkq.c linux-2.6.30/fs/aufs/wkq.c +--- linux-2.6.30.org/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/wkq.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,259 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -24142,6 +22451,15 @@ index 0000000..41e30be + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -24149,6 +22467,7 @@ index 0000000..41e30be + * todo: try new dredential scheme + */ + ++#include +#include "aufs.h" + +/* internal workqueue named AUFS_WKQ_NAME */ @@ -24221,7 +22540,7 @@ index 0000000..41e30be + struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk); + + wkinfo->func(wkinfo->args); -+ atomic_dec(wkinfo->busyp); ++ atomic_dec_return(wkinfo->busyp); + if (au_ftest_wkq(wkinfo->flags, WAIT)) + complete(wkinfo->comp); + else { @@ -24384,12 +22703,10 @@ index 0000000..41e30be + out: + return err; +} -diff --git a/fs/aufs/wkq.h b/fs/aufs/wkq.h -new file mode 100644 -index 0000000..17d24d8 ---- /dev/null -+++ b/fs/aufs/wkq.h -@@ -0,0 +1,72 @@ +diff -urN linux-2.6.30.org/fs/aufs/wkq.h linux-2.6.30/fs/aufs/wkq.h +--- linux-2.6.30.org/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/wkq.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,82 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -24397,6 +22714,15 @@ index 0000000..17d24d8 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* @@ -24409,11 +22735,12 @@ index 0000000..17d24d8 + +#ifdef __KERNEL__ + -+#include +#include -+#include ++#include +#include + ++struct super_block; ++ +/* ---------------------------------------------------------------------- */ + +/* @@ -24462,12 +22789,10 @@ index 0000000..17d24d8 + +#endif /* __KERNEL__ */ +#endif /* __AUFS_WKQ_H__ */ -diff --git a/fs/aufs/xino.c b/fs/aufs/xino.c -new file mode 100644 -index 0000000..7d11111 ---- /dev/null -+++ b/fs/aufs/xino.c -@@ -0,0 +1,1181 @@ +diff -urN linux-2.6.30.org/fs/aufs/xino.c linux-2.6.30/fs/aufs/xino.c +--- linux-2.6.30.org/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/fs/aufs/xino.c 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,1200 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -24475,12 +22800,22 @@ index 0000000..7d11111 + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * external inode number translation table and bitmap + */ + ++#include +#include +#include +#include "aufs.h" @@ -24757,14 +23092,11 @@ index 0000000..7d11111 + ii_read_lock_parent(dir); + bindex = au_br_index(sb, br->br_id); + err = au_xino_trunc(sb, bindex); -+ if (unlikely(err)) -+ goto out; -+ -+ if (br->br_xino.xi_file->f_dentry->d_inode->i_blocks ++ if (!err ++ && br->br_xino.xi_file->f_dentry->d_inode->i_blocks + >= br->br_xino_upper) + br->br_xino_upper += AUFS_XINO_TRUNC_STEP; + -+ out: + ii_read_unlock(dir); + if (unlikely(err)) + AuWarn("err b%d, (%d)\n", bindex, err); @@ -24794,7 +23126,7 @@ index 0000000..7d11111 + goto out_args; + } + -+ atomic_inc(&br->br_count); ++ atomic_inc_return(&br->br_count); + args->sb = sb; + args->br = br; + wkq_err = au_wkq_nowait(xino_do_trunc, args, sb); @@ -24802,12 +23134,12 @@ index 0000000..7d11111 + return; /* success */ + + AuErr("wkq %d\n", wkq_err); -+ atomic_dec(&br->br_count); ++ atomic_dec_return(&br->br_count); + + out_args: + kfree(args); + out: -+ atomic_dec(&br->br_xino_running); ++ atomic_dec_return(&br->br_xino_running); +} + +/* ---------------------------------------------------------------------- */ @@ -24848,6 +23180,7 @@ index 0000000..7d11111 + + BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max) + || ((loff_t)-1) > 0); ++ SiMustAnyLock(sb); + + mnt_flags = au_mntflags(sb); + if (!au_opt_test(mnt_flags, XINO)) @@ -25222,6 +23555,7 @@ index 0000000..7d11111 + + err = 0; + sbinfo = au_sbi(sb); ++ MtxMustLock(&sbinfo->si_xib_mtx); + p = sbinfo->si_xib_buf; + func = sbinfo->si_xread; + pend = i_size_read(file->f_dentry->d_inode); @@ -25286,6 +23620,8 @@ index 0000000..7d11111 + unsigned long *p; + struct file *file; + ++ SiMustWriteLock(sb); ++ + err = 0; + sbinfo = au_sbi(sb); + if (!au_opt_test(sbinfo->si_mntflags, XINO)) @@ -25362,6 +23698,8 @@ index 0000000..7d11111 +{ + struct au_sbinfo *sbinfo; + ++ SiMustWriteLock(sb); ++ + sbinfo = au_sbi(sb); + sbinfo->si_xread = NULL; + sbinfo->si_xwrite = NULL; @@ -25379,6 +23717,8 @@ index 0000000..7d11111 + struct au_sbinfo *sbinfo; + struct file *file; + ++ SiMustWriteLock(sb); ++ + sbinfo = au_sbi(sb); + file = au_xino_create2(base, sbinfo->si_xib); + err = PTR_ERR(file); @@ -25451,6 +23791,8 @@ index 0000000..7d11111 + struct inode *inode; + au_writef_t writef; + ++ SiMustWriteLock(sb); ++ + err = -ENOMEM; + bend = au_sbend(sb); + fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS); @@ -25526,6 +23868,8 @@ index 0000000..7d11111 + struct inode *dir; + struct au_sbinfo *sbinfo; + ++ SiMustWriteLock(sb); ++ + err = 0; + sbinfo = au_sbi(sb); + parent = dget_parent(xino->file->f_dentry); @@ -25649,11 +23993,37 @@ index 0000000..7d11111 + out: + return err; +} -diff --git a/fs/namei.c b/fs/namei.c -index bbc15c2..db581b4 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -1196,7 +1196,7 @@ out: +diff -urN linux-2.6.30.org/fs/Kconfig linux-2.6.30/fs/Kconfig +--- linux-2.6.30.org/fs/Kconfig 2009-06-10 05:05:27.000000000 +0200 ++++ linux-2.6.30/fs/Kconfig 2009-07-21 09:13:25.176633466 +0200 +@@ -176,6 +176,7 @@ + source "fs/sysv/Kconfig" + source "fs/ufs/Kconfig" + source "fs/exofs/Kconfig" ++source "fs/aufs/Kconfig" + + config NILFS2_FS + tristate "NILFS2 file system support (EXPERIMENTAL)" +diff -urN linux-2.6.30.org/fs/Makefile linux-2.6.30/fs/Makefile +--- linux-2.6.30.org/fs/Makefile 2009-06-10 05:05:27.000000000 +0200 ++++ linux-2.6.30/fs/Makefile 2009-07-21 09:13:25.184549275 +0200 +@@ -124,3 +124,4 @@ + obj-$(CONFIG_BTRFS_FS) += btrfs/ + obj-$(CONFIG_GFS2_FS) += gfs2/ + obj-$(CONFIG_EXOFS_FS) += exofs/ ++obj-$(CONFIG_AUFS_FS) += aufs/ +diff -urN linux-2.6.30.org/fs/namei.c linux-2.6.30/fs/namei.c +--- linux-2.6.30.org/fs/namei.c 2009-06-10 05:05:27.000000000 +0200 ++++ linux-2.6.30/fs/namei.c 2009-07-21 09:13:29.146633618 +0200 +@@ -337,6 +337,7 @@ + + return 0; + } ++EXPORT_SYMBOL(deny_write_access); + + /** + * path_get - get a reference to a path +@@ -1200,7 +1201,7 @@ * needs parent already locked. Doesn't follow mounts. * SMP-safe. */ @@ -25662,20 +24032,51 @@ index bbc15c2..db581b4 100644 { int err; -@@ -1206,7 +1206,7 @@ static struct dentry *lookup_hash(struct nameidata *nd) +@@ -1209,8 +1210,9 @@ + return ERR_PTR(err); return __lookup_hash(&nd->last, nd->path.dentry, nd); } ++EXPORT_SYMBOL(lookup_hash); -static int __lookup_one_len(const char *name, struct qstr *this, +int __lookup_one_len(const char *name, struct qstr *this, struct dentry *base, int len) { unsigned long hash; -diff --git a/fs/splice.c b/fs/splice.c -index 4ed0ba4..2fb3d17 100644 ---- a/fs/splice.c -+++ b/fs/splice.c -@@ -888,8 +888,8 @@ EXPORT_SYMBOL(generic_splice_sendpage); +@@ -1231,6 +1233,7 @@ + this->hash = end_name_hash(hash); + return 0; + } ++EXPORT_SYMBOL(__lookup_one_len); + + /** + * lookup_one_len - filesystem helper to lookup single pathname component +diff -urN linux-2.6.30.org/fs/namespace.c linux-2.6.30/fs/namespace.c +--- linux-2.6.30.org/fs/namespace.c 2009-06-10 05:05:27.000000000 +0200 ++++ linux-2.6.30/fs/namespace.c 2009-07-21 09:13:29.166633410 +0200 +@@ -38,6 +38,7 @@ + + /* spinlock for vfsmount related operations, inplace of dcache_lock */ + __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); ++EXPORT_SYMBOL(vfsmount_lock); + + static int event; + static DEFINE_IDA(mnt_id_ida); +diff -urN linux-2.6.30.org/fs/open.c linux-2.6.30/fs/open.c +--- linux-2.6.30.org/fs/open.c 2009-06-10 05:05:27.000000000 +0200 ++++ linux-2.6.30/fs/open.c 2009-07-21 09:13:29.183798348 +0200 +@@ -221,6 +221,7 @@ + mutex_unlock(&dentry->d_inode->i_mutex); + return err; + } ++EXPORT_SYMBOL(do_truncate); + + static long do_sys_truncate(const char __user *pathname, loff_t length) + { +diff -urN linux-2.6.30.org/fs/splice.c linux-2.6.30/fs/splice.c +--- linux-2.6.30.org/fs/splice.c 2009-06-10 05:05:27.000000000 +0200 ++++ linux-2.6.30/fs/splice.c 2009-07-21 09:13:29.216632034 +0200 +@@ -905,8 +905,8 @@ /* * Attempt to initiate a splice from pipe to file. */ @@ -25686,7 +24087,12 @@ index 4ed0ba4..2fb3d17 100644 { int ret; -@@ -912,9 +912,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, +@@ -925,13 +925,14 @@ + + return out->f_op->splice_write(pipe, out, ppos, len, flags); + } ++EXPORT_SYMBOL(do_splice_from); + /* * Attempt to initiate a splice from a file to a pipe. */ @@ -25699,24 +24105,18 @@ index 4ed0ba4..2fb3d17 100644 { int ret; -diff --git a/include/linux/Kbuild b/include/linux/Kbuild -index 106c3ba..d0c7262 100644 ---- a/include/linux/Kbuild -+++ b/include/linux/Kbuild -@@ -34,6 +34,7 @@ header-y += atmppp.h - header-y += atmsap.h - header-y += atmsvc.h - header-y += atm_zatm.h -+header-y += aufs_type.h - header-y += auto_fs4.h - header-y += ax25.h - header-y += b1lli.h -diff --git a/include/linux/aufs_type.h b/include/linux/aufs_type.h -new file mode 100644 -index 0000000..5c98a5f ---- /dev/null -+++ b/include/linux/aufs_type.h -@@ -0,0 +1,100 @@ +@@ -947,6 +948,7 @@ + + return in->f_op->splice_read(in, ppos, pipe, len, flags); + } ++EXPORT_SYMBOL(do_splice_to); + + /** + * splice_direct_to_actor - splices data directly between two non-pipes +diff -urN linux-2.6.30.org/include/linux/aufs_type.h linux-2.6.30/include/linux/aufs_type.h +--- linux-2.6.30.org/include/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30/include/linux/aufs_type.h 2009-07-21 08:54:19.000000000 +0200 +@@ -0,0 +1,109 @@ +/* + * Copyright (C) 2005-2009 Junjiro R. Okajima + * @@ -25724,6 +24124,15 @@ index 0000000..5c98a5f + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __AUFS_TYPE_H__ @@ -25731,7 +24140,7 @@ index 0000000..5c98a5f + +#include + -+#define AUFS_VERSION "2-29" ++#define AUFS_VERSION "2-standalone.tree-30-20090720" + +/* todo? move this to linux-2.6.19/include/magic.h */ +#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's') @@ -25817,11 +24226,21 @@ index 0000000..5c98a5f +#define AUFS_CTL_PLINK_CLEAN _IO(AuCtlType, AuCtl_PLINK_CLEAN) + +#endif /* __AUFS_TYPE_H__ */ -diff --git a/include/linux/namei.h b/include/linux/namei.h -index fc2e035..182d43b 100644 ---- a/include/linux/namei.h -+++ b/include/linux/namei.h -@@ -75,6 +75,9 @@ extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry +diff -urN linux-2.6.30.org/include/linux/Kbuild linux-2.6.30/include/linux/Kbuild +--- linux-2.6.30.org/include/linux/Kbuild 2009-06-10 05:05:27.000000000 +0200 ++++ linux-2.6.30/include/linux/Kbuild 2009-07-21 09:13:25.239966985 +0200 +@@ -34,6 +34,7 @@ + header-y += atmsap.h + header-y += atmsvc.h + header-y += atm_zatm.h ++header-y += aufs_type.h + header-y += auto_fs4.h + header-y += ax25.h + header-y += b1lli.h +diff -urN linux-2.6.30.org/include/linux/namei.h linux-2.6.30/include/linux/namei.h +--- linux-2.6.30.org/include/linux/namei.h 2009-06-10 05:05:27.000000000 +0200 ++++ linux-2.6.30/include/linux/namei.h 2009-07-21 09:13:29.216632034 +0200 +@@ -74,6 +74,9 @@ extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); extern void release_open_intent(struct nameidata *); @@ -25831,11 +24250,10 @@ index fc2e035..182d43b 100644 extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_noperm(const char *, struct dentry *); -diff --git a/include/linux/splice.h b/include/linux/splice.h -index 528dcb9..5123bc6 100644 ---- a/include/linux/splice.h -+++ b/include/linux/splice.h -@@ -71,4 +71,10 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *, +diff -urN linux-2.6.30.org/include/linux/splice.h linux-2.6.30/include/linux/splice.h +--- linux-2.6.30.org/include/linux/splice.h 2009-06-10 05:05:27.000000000 +0200 ++++ linux-2.6.30/include/linux/splice.h 2009-07-21 09:13:29.234192642 +0200 +@@ -83,4 +83,10 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, splice_direct_actor *); @@ -25846,3 +24264,97 @@ index 528dcb9..5123bc6 100644 + unsigned int flags); + #endif +diff -urN linux-2.6.30.org/security/device_cgroup.c linux-2.6.30/security/device_cgroup.c +--- linux-2.6.30.org/security/device_cgroup.c 2009-06-10 05:05:27.000000000 +0200 ++++ linux-2.6.30/security/device_cgroup.c 2009-07-21 09:13:29.234192642 +0200 +@@ -512,6 +512,7 @@ + + return -EPERM; + } ++EXPORT_SYMBOL(devcgroup_inode_permission); + + int devcgroup_inode_mknod(int mode, dev_t dev) + { +diff -urN linux-2.6.30.org/security/security.c linux-2.6.30/security/security.c +--- linux-2.6.30.org/security/security.c 2009-06-10 05:05:27.000000000 +0200 ++++ linux-2.6.30/security/security.c 2009-07-21 09:13:29.234192642 +0200 +@@ -389,6 +389,7 @@ + return 0; + return security_ops->path_mkdir(path, dentry, mode); + } ++EXPORT_SYMBOL(security_path_mkdir); + + int security_path_rmdir(struct path *path, struct dentry *dentry) + { +@@ -396,6 +397,7 @@ + return 0; + return security_ops->path_rmdir(path, dentry); + } ++EXPORT_SYMBOL(security_path_rmdir); + + int security_path_unlink(struct path *path, struct dentry *dentry) + { +@@ -403,6 +405,7 @@ + return 0; + return security_ops->path_unlink(path, dentry); + } ++EXPORT_SYMBOL(security_path_unlink); + + int security_path_symlink(struct path *path, struct dentry *dentry, + const char *old_name) +@@ -411,6 +414,7 @@ + return 0; + return security_ops->path_symlink(path, dentry, old_name); + } ++EXPORT_SYMBOL(security_path_symlink); + + int security_path_link(struct dentry *old_dentry, struct path *new_dir, + struct dentry *new_dentry) +@@ -419,6 +423,7 @@ + return 0; + return security_ops->path_link(old_dentry, new_dir, new_dentry); + } ++EXPORT_SYMBOL(security_path_link); + + int security_path_rename(struct path *old_dir, struct dentry *old_dentry, + struct path *new_dir, struct dentry *new_dentry) +@@ -429,6 +434,7 @@ + return security_ops->path_rename(old_dir, old_dentry, new_dir, + new_dentry); + } ++EXPORT_SYMBOL(security_path_rename); + + int security_path_truncate(struct path *path, loff_t length, + unsigned int time_attrs) +@@ -437,6 +443,7 @@ + return 0; + return security_ops->path_truncate(path, length, time_attrs); + } ++EXPORT_SYMBOL(security_path_truncate); + #endif + + int security_inode_create(struct inode *dir, struct dentry *dentry, int mode) +@@ -508,6 +515,7 @@ + return 0; + return security_ops->inode_readlink(dentry); + } ++EXPORT_SYMBOL(security_inode_readlink); + + int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd) + { +@@ -522,6 +530,7 @@ + return 0; + return security_ops->inode_permission(inode, mask); + } ++EXPORT_SYMBOL(security_inode_permission); + + int security_inode_setattr(struct dentry *dentry, struct iattr *attr) + { +@@ -622,6 +631,7 @@ + { + return security_ops->file_permission(file, mask); + } ++EXPORT_SYMBOL(security_file_permission); + + int security_file_alloc(struct file *file) + { diff --git a/kernel-inittmpfs.patch b/kernel-inittmpfs.patch index 70fdf2a5..3bb4c247 100644 --- a/kernel-inittmpfs.patch +++ b/kernel-inittmpfs.patch @@ -82,16 +82,16 @@ diff -ur linux-2.6.14.orig/init/initramfs.c linux-2.6.14/init/initramfs.c extern char __initramfs_start[], __initramfs_end[]; #ifdef CONFIG_BLK_DEV_INITRD #include -@@ -482,6 +526,9 @@ - initrd_end - initrd_start, 1); - if (!err) { - printk(" it is\n"); +@@ -619,6 +619,9 @@ + #ifdef CONFIG_BLK_DEV_RAM + int fd; + printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n"); +#ifdef CONFIG_EARLYUSERSPACE_ON_TMPFS -+ overmount_rootfs(); ++ overmount_rootfs(); +#endif /* CONFIG_EARLYUSERSPACE_ON_TMPFS */ - unpack_to_rootfs((char *)initrd_start, - initrd_end - initrd_start, 0); - free_initrd_mem(initrd_start, initrd_end); + err = unpack_to_rootfs((char *)initrd_start, + initrd_end - initrd_start); + if (!err) { diff -ur linux-2.6.14.orig/init/main.c linux-2.6.14/init/main.c --- linux-2.6.14.orig/init/main.c 2005-09-11 03:19:07.000000000 +0000 +++ linux-2.6.14/init/main.c 2005-09-11 03:22:43.000000000 +0000 diff --git a/kernel-patches.config b/kernel-patches.config index 06dbcce4..50595067 100644 --- a/kernel-patches.config +++ b/kernel-patches.config @@ -40,6 +40,7 @@ AUFS_RR_SQUASHFS all=y AUFS_BR_NFS all=y AUFS_BR_NFS_V4 all=y AUFS_BR_XFS all=y +AUFS_BR_FUSE all=y AUFS_GETATTR all=y AUFS_HIN_OR_DLGT all=y AUFS_INO_T_64 all=y -- 2.44.0