]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-unionfs.patch
- up for 3.2
[packages/kernel.git] / kernel-unionfs.patch
CommitLineData
0c5527e5 1diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
82260373 2index 8c624a1..4aa288b 100644
0c5527e5
AM
3--- a/Documentation/filesystems/00-INDEX
4+++ b/Documentation/filesystems/00-INDEX
82260373 5@@ -110,6 +110,8 @@ udf.txt
2380c486
JR
6 - info and mount options for the UDF filesystem.
7 ufs.txt
8 - info on the ufs filesystem.
9+unionfs/
10+ - info on the unionfs filesystem
11 vfat.txt
12 - info on using the VFAT filesystem used in Windows NT and Windows 95
13 vfs.txt
0c5527e5
AM
14diff --git a/Documentation/filesystems/unionfs/00-INDEX b/Documentation/filesystems/unionfs/00-INDEX
15new file mode 100644
16index 0000000..96fdf67
17--- /dev/null
18+++ b/Documentation/filesystems/unionfs/00-INDEX
2380c486
JR
19@@ -0,0 +1,10 @@
20+00-INDEX
21+ - this file.
22+concepts.txt
23+ - A brief introduction of concepts.
24+issues.txt
25+ - A summary of known issues with unionfs.
26+rename.txt
27+ - Information regarding rename operations.
28+usage.txt
29+ - Usage information and examples.
0c5527e5
AM
30diff --git a/Documentation/filesystems/unionfs/concepts.txt b/Documentation/filesystems/unionfs/concepts.txt
31new file mode 100644
32index 0000000..b853788
33--- /dev/null
34+++ b/Documentation/filesystems/unionfs/concepts.txt
2380c486
JR
35@@ -0,0 +1,287 @@
36+Unionfs 2.x CONCEPTS:
37+=====================
38+
39+This file describes the concepts needed by a namespace unification file
40+system.
41+
42+
43+Branch Priority:
44+================
45+
46+Each branch is assigned a unique priority - starting from 0 (highest
47+priority). No two branches can have the same priority.
48+
49+
50+Branch Mode:
51+============
52+
53+Each branch is assigned a mode - read-write or read-only. This allows
54+directories on media mounted read-write to be used in a read-only manner.
55+
56+
57+Whiteouts:
58+==========
59+
60+A whiteout removes a file name from the namespace. Whiteouts are needed when
61+one attempts to remove a file on a read-only branch.
62+
63+Suppose we have a two-branch union, where branch 0 is read-write and branch
64+1 is read-only. And a file 'foo' on branch 1:
65+
66+./b0/
67+./b1/
68+./b1/foo
69+
70+The unified view would simply be:
71+
72+./union/
73+./union/foo
74+
75+Since 'foo' is stored on a read-only branch, it cannot be removed. A
76+whiteout is used to remove the name 'foo' from the unified namespace. Again,
77+since branch 1 is read-only, the whiteout cannot be created there. So, we
78+try on a higher priority (lower numerically) branch and create the whiteout
79+there.
80+
81+./b0/
82+./b0/.wh.foo
83+./b1/
84+./b1/foo
85+
86+Later, when Unionfs traverses branches (due to lookup or readdir), it
87+eliminate 'foo' from the namespace (as well as the whiteout itself.)
88+
89+
90+Opaque Directories:
91+===================
92+
93+Assume we have a unionfs mount comprising of two branches. Branch 0 is
94+empty; branch 1 has the directory /a and file /a/f. Let's say we mount a
95+union of branch 0 as read-write and branch 1 as read-only. Now, let's say
96+we try to perform the following operation in the union:
97+
98+ rm -fr a
99+
100+Because branch 1 is not writable, we cannot physically remove the file /a/f
101+or the directory /a. So instead, we will create a whiteout in branch 0
102+named /.wh.a, masking out the name "a" from branch 1. Next, let's say we
103+try to create a directory named "a" as follows:
104+
105+ mkdir a
106+
107+Because we have a whiteout for "a" already, Unionfs behaves as if "a"
108+doesn't exist, and thus will delete the whiteout and replace it with an
109+actual directory named "a".
110+
111+The problem now is that if you try to "ls" in the union, Unionfs will
112+perform is normal directory name unification, for *all* directories named
113+"a" in all branches. This will cause the file /a/f from branch 1 to
114+re-appear in the union's namespace, which violates Unix semantics.
115+
116+To avoid this problem, we have a different form of whiteouts for
117+directories, called "opaque directories" (same as BSD Union Mount does).
118+Whenever we replace a whiteout with a directory, that directory is marked as
119+opaque. In Unionfs 2.x, it means that we create a file named
120+/a/.wh.__dir_opaque in branch 0, after having created directory /a there.
121+When unionfs notices that a directory is opaque, it stops all namespace
122+operations (including merging readdir contents) at that opaque directory.
123+This prevents re-exposing names from masked out directories.
124+
125+
126+Duplicate Elimination:
127+======================
128+
129+It is possible for files on different branches to have the same name.
130+Unionfs then has to select which instance of the file to show to the user.
131+Given the fact that each branch has a priority associated with it, the
132+simplest solution is to take the instance from the highest priority
133+(numerically lowest value) and "hide" the others.
134+
135+
136+Unlinking:
137+=========
138+
139+Unlink operation on non-directory instances is optimized to remove the
140+maximum possible objects in case multiple underlying branches have the same
141+file name. The unlink operation will first try to delete file instances
142+from highest priority branch and then move further to delete from remaining
143+branches in order of their decreasing priority. Consider a case (F..D..F),
144+where F is a file and D is a directory of the same name; here, some
145+intermediate branch could have an empty directory instance with the same
146+name, so this operation also tries to delete this directory instance and
147+proceed further to delete from next possible lower priority branch. The
148+unionfs unlink operation will smoothly delete the files with same name from
149+all possible underlying branches. In case if some error occurs, it creates
150+whiteout in highest priority branch that will hide file instance in rest of
151+the branches. An error could occur either if an unlink operations in any of
152+the underlying branch failed or if a branch has no write permission.
153+
154+This unlinking policy is known as "delete all" and it has the benefit of
155+overall reducing the number of inodes used by duplicate files, and further
156+reducing the total number of inodes consumed by whiteouts. The cost is of
157+extra processing, but testing shows this extra processing is well worth the
158+savings.
159+
160+
161+Copyup:
162+=======
163+
164+When a change is made to the contents of a file's data or meta-data, they
165+have to be stored somewhere. The best way is to create a copy of the
166+original file on a branch that is writable, and then redirect the write
167+though to this copy. The copy must be made on a higher priority branch so
168+that lookup and readdir return this newer "version" of the file rather than
169+the original (see duplicate elimination).
170+
171+An entire unionfs mount can be read-only or read-write. If it's read-only,
172+then none of the branches will be written to, even if some of the branches
173+are physically writeable. If the unionfs mount is read-write, then the
174+leftmost (highest priority) branch must be writeable (for copyup to take
175+place); the remaining branches can be any mix of read-write and read-only.
176+
177+In a writeable mount, unionfs will create new files/dir in the leftmost
178+branch. If one tries to modify a file in a read-only branch/media, unionfs
179+will copyup the file to the leftmost branch and modify it there. If you try
180+to modify a file from a writeable branch which is not the leftmost branch,
181+then unionfs will modify it in that branch; this is useful if you, say,
182+unify differnet packages (e.g., apache, sendmail, ftpd, etc.) and you want
183+changes to specific package files to remain logically in the directory where
184+they came from.
185+
186+Cache Coherency:
187+================
188+
189+Unionfs users often want to be able to modify files and directories directly
190+on the lower branches, and have those changes be visible at the Unionfs
191+level. This means that data (e.g., pages) and meta-data (dentries, inodes,
192+open files, etc.) have to be synchronized between the upper and lower
193+layers. In other words, the newest changes from a layer below have to be
194+propagated to the Unionfs layer above. If the two layers are not in sync, a
195+cache incoherency ensues, which could lead to application failures and even
196+oopses. The Linux kernel, however, has a rather limited set of mechanisms
197+to ensure this inter-layer cache coherency---so Unionfs has to do most of
198+the hard work on its own.
199+
200+Maintaining Invariants:
201+
202+The way Unionfs ensures cache coherency is as follows. At each entry point
203+to a Unionfs file system method, we call a utility function to validate the
204+primary objects of this method. Generally, we call unionfs_file_revalidate
205+on open files, and __unionfs_d_revalidate_chain on dentries (which also
206+validates inodes). These utility functions check to see whether the upper
207+Unionfs object is in sync with any of the lower objects that it represents.
208+The checks we perform include whether the Unionfs superblock has a newer
209+generation number, or if any of the lower objects mtime's or ctime's are
210+newer. (Note: generation numbers change when branch-management commands are
211+issued, so in a way, maintaining cache coherency is also very important for
212+branch-management.) If indeed we determine that any Unionfs object is no
213+longer in sync with its lower counterparts, then we rebuild that object
214+similarly to how we do so for branch-management.
215+
216+While rebuilding Unionfs's objects, we also purge any page mappings and
217+truncate inode pages (see fs/unionfs/dentry.c:purge_inode_data). This is to
218+ensure that Unionfs will re-get the newer data from the lower branches. We
219+perform this purging only if the Unionfs operation in question is a reading
220+operation; if Unionfs is performing a data writing operation (e.g., ->write,
221+->commit_write, etc.) then we do NOT flush the lower mappings/pages: this is
222+because (1) a self-deadlock could occur and (2) the upper Unionfs pages are
223+considered more authoritative anyway, as they are newer and will overwrite
224+any lower pages.
225+
226+Unionfs maintains the following important invariant regarding mtime's,
227+ctime's, and atime's: the upper inode object's times are the max() of all of
228+the lower ones. For non-directory objects, there's only one object below,
229+so the mapping is simple; for directory objects, there could me multiple
230+lower objects and we have to sync up with the newest one of all the lower
231+ones. This invariant is important to maintain, especially for directories
232+(besides, we need this to be POSIX compliant). A union could comprise
233+multiple writable branches, each of which could change. If we don't reflect
234+the newest possible mtime/ctime, some applications could fail. For example,
235+NFSv2/v3 exports check for newer directory mtimes on the server to determine
236+if the client-side attribute cache should be purged.
237+
238+To maintain these important invariants, of course, Unionfs carefully
239+synchronizes upper and lower times in various places. For example, if we
240+copy-up a file to a top-level branch, the parent directory where the file
241+was copied up to will now have a new mtime: so after a successful copy-up,
242+we sync up with the new top-level branch's parent directory mtime.
243+
244+Implementation:
245+
246+This cache-coherency implementation is efficient because it defers any
247+synchronizing between the upper and lower layers until absolutely needed.
248+Consider the example a common situation where users perform a lot of lower
249+changes, such as untarring a whole package. While these take place,
250+typically the user doesn't access the files via Unionfs; only after the
251+lower changes are done, does the user try to access the lower files. With
252+our cache-coherency implementation, the entirety of the changes to the lower
253+branches will not result in a single CPU cycle spent at the Unionfs level
254+until the user invokes a system call that goes through Unionfs.
255+
256+We have considered two alternate cache-coherency designs. (1) Using the
257+dentry/inode notify functionality to register interest in finding out about
258+any lower changes. This is a somewhat limited and also a heavy-handed
259+approach which could result in many notifications to the Unionfs layer upon
260+each small change at the lower layer (imagine a file being modified multiple
261+times in rapid succession). (2) Rewriting the VFS to support explicit
262+callbacks from lower objects to upper objects. We began exploring such an
263+implementation, but found it to be very complicated--it would have resulted
264+in massive VFS/MM changes which are unlikely to be accepted by the LKML
265+community. We therefore believe that our current cache-coherency design and
266+implementation represent the best approach at this time.
267+
268+Limitations:
269+
270+Our implementation works in that as long as a user process will have caused
271+Unionfs to be called, directly or indirectly, even to just do
272+->d_revalidate; then we will have purged the current Unionfs data and the
273+process will see the new data. For example, a process that continually
274+re-reads the same file's data will see the NEW data as soon as the lower
275+file had changed, upon the next read(2) syscall (even if the file is still
276+open!) However, this doesn't work when the process re-reads the open file's
277+data via mmap(2) (unless the user unmaps/closes the file and remaps/reopens
278+it). Once we respond to ->readpage(s), then the kernel maps the page into
279+the process's address space and there doesn't appear to be a way to force
280+the kernel to invalidate those pages/mappings, and force the process to
281+re-issue ->readpage. If there's a way to invalidate active mappings and
282+force a ->readpage, let us know please (invalidate_inode_pages2 doesn't do
283+the trick).
284+
285+Our current Unionfs code has to perform many file-revalidation calls. It
286+would be really nice if the VFS would export an optional file system hook
287+->file_revalidate (similarly to dentry->d_revalidate) that will be called
288+before each VFS op that has a "struct file" in it.
289+
290+Certain file systems have micro-second granularity (or better) for inode
291+times, and asynchronous actions could cause those times to change with some
292+small delay. In such cases, Unionfs may see a changed inode time that only
293+differs by a tiny fraction of a second: such a change may be a false
294+positive indication that the lower object has changed, whereas if unionfs
295+waits a little longer, that false indication will not be seen. (These false
296+positives are harmless, because they would at most cause unionfs to
297+re-validate an object that may need no revalidation, and print a debugging
298+message that clutters the console/logs.) Therefore, to minimize the chances
299+of these situations, we delay the detection of changed times by a small
300+factor of a few seconds, called UNIONFS_MIN_CC_TIME (which defaults to 3
301+seconds, as does NFS). This means that we will detect the change, only a
302+couple of seconds later, if indeed the time change persists in the lower
303+file object. This delayed detection has an added performance benefit: we
304+reduce the number of times that unionfs has to revalidate objects, in case
305+there's a lot of concurrent activity on both the upper and lower objects,
306+for the same file(s). Lastly, this delayed time attribute detection is
307+similar to how NFS clients operate (e.g., acregmin).
308+
309+Finally, there is no way currently in Linux to prevent lower directories
310+from being moved around (i.e., topology changes); there's no way to prevent
311+modifications to directory sub-trees of whole file systems which are mounted
312+read-write. It is therefore possible for in-flight operations in unionfs to
313+take place, while a lower directory is being moved around. Therefore, if
314+you try to, say, create a new file in a directory through unionfs, while the
315+directory is being moved around directly, then the new file may get created
316+in the new location where that directory was moved to. This is a somewhat
317+similar behaviour in NFS: an NFS client could be creating a new file while
318+th NFS server is moving th directory around; the file will get successfully
319+created in the new location. (The one exception in unionfs is that if the
320+branch is marked read-only by unionfs, then a copyup will take place.)
321+
322+For more information, see <http://unionfs.filesystems.org/>.
0c5527e5
AM
323diff --git a/Documentation/filesystems/unionfs/issues.txt b/Documentation/filesystems/unionfs/issues.txt
324new file mode 100644
325index 0000000..f4b7e7e
326--- /dev/null
327+++ b/Documentation/filesystems/unionfs/issues.txt
2380c486
JR
328@@ -0,0 +1,28 @@
329+KNOWN Unionfs 2.x ISSUES:
330+=========================
331+
332+1. Unionfs should not use lookup_one_len() on the underlying f/s as it
333+ confuses NFSv4. Currently, unionfs_lookup() passes lookup intents to the
334+ lower file-system, this eliminates part of the problem. The remaining
335+ calls to lookup_one_len may need to be changed to pass an intent. We are
336+ currently introducing VFS changes to fs/namei.c's do_path_lookup() to
337+ allow proper file lookup and opening in stackable file systems.
338+
339+2. Lockdep (a debugging feature) isn't aware of stacking, and so it
340+ incorrectly complains about locking problems. The problem boils down to
341+ this: Lockdep considers all objects of a certain type to be in the same
342+ class, for example, all inodes. Lockdep doesn't like to see a lock held
343+ on two inodes within the same task, and warns that it could lead to a
344+ deadlock. However, stackable file systems do precisely that: they lock
345+ an upper object, and then a lower object, in a strict order to avoid
346+ locking problems; in addition, Unionfs, as a fan-out file system, may
347+ have to lock several lower inodes. We are currently looking into Lockdep
348+ to see how to make it aware of stackable file systems. For now, we
349+ temporarily disable lockdep when calling vfs methods on lower objects,
350+ but only for those places where lockdep complained. While this solution
351+ may seem unclean, it is not without precedent: other places in the kernel
352+ also do similar temporary disabling, of course after carefully having
353+ checked that it is the right thing to do. Anyway, you get any warnings
354+ from Lockdep, please report them to the Unionfs maintainers.
355+
356+For more information, see <http://unionfs.filesystems.org/>.
0c5527e5
AM
357diff --git a/Documentation/filesystems/unionfs/rename.txt b/Documentation/filesystems/unionfs/rename.txt
358new file mode 100644
359index 0000000..e20bb82
360--- /dev/null
361+++ b/Documentation/filesystems/unionfs/rename.txt
2380c486
JR
362@@ -0,0 +1,31 @@
363+Rename is a complex beast. The following table shows which rename(2) operations
364+should succeed and which should fail.
365+
366+o: success
367+E: error (either unionfs or vfs)
368+X: EXDEV
369+
370+none = file does not exist
371+file = file is a file
372+dir = file is a empty directory
373+child= file is a non-empty directory
374+wh = file is a directory containing only whiteouts; this makes it logically
375+ empty
376+
377+ none file dir child wh
378+file o o E E E
379+dir o E o E o
380+child X E X E X
381+wh o E o E o
382+
383+
384+Renaming directories:
385+=====================
386+
387+Whenever a empty (either physically or logically) directory is being renamed,
388+the following sequence of events should take place:
389+
390+1) Remove whiteouts from both source and destination directory
391+2) Rename source to destination
392+3) Make destination opaque to prevent anything under it from showing up
393+
0c5527e5
AM
394diff --git a/Documentation/filesystems/unionfs/usage.txt b/Documentation/filesystems/unionfs/usage.txt
395new file mode 100644
396index 0000000..1adde69
397--- /dev/null
398+++ b/Documentation/filesystems/unionfs/usage.txt
2380c486
JR
399@@ -0,0 +1,134 @@
400+Unionfs is a stackable unification file system, which can appear to merge
401+the contents of several directories (branches), while keeping their physical
402+content separate. Unionfs is useful for unified source tree management,
403+merged contents of split CD-ROM, merged separate software package
404+directories, data grids, and more. Unionfs allows any mix of read-only and
405+read-write branches, as well as insertion and deletion of branches anywhere
406+in the fan-out. To maintain Unix semantics, Unionfs handles elimination of
407+duplicates, partial-error conditions, and more.
408+
409+GENERAL SYNTAX
410+==============
411+
412+# mount -t unionfs -o <OPTIONS>,<BRANCH-OPTIONS> none MOUNTPOINT
413+
414+OPTIONS can be any legal combination of:
415+
416+- ro # mount file system read-only
417+- rw # mount file system read-write
418+- remount # remount the file system (see Branch Management below)
419+- incgen # increment generation no. (see Cache Consistency below)
420+
421+BRANCH-OPTIONS can be either (1) a list of branches given to the "dirs="
422+option, or (2) a list of individual branch manipulation commands, combined
423+with the "remount" option, and is further described in the "Branch
424+Management" section below.
425+
426+The syntax for the "dirs=" mount option is:
427+
428+ dirs=branch[=ro|=rw][:...]
429+
430+The "dirs=" option takes a colon-delimited list of directories to compose
431+the union, with an optional branch mode for each of those directories.
432+Directories that come earlier (specified first, on the left) in the list
433+have a higher precedence than those which come later. Additionally,
434+read-only or read-write permissions of the branch can be specified by
435+appending =ro or =rw (default) to each directory. See the Copyup section in
436+concepts.txt, for a description of Unionfs's behavior when mixing read-only
437+and read-write branches and mounts.
438+
439+Syntax:
440+
441+ dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw]
442+
443+Example:
444+
445+ dirs=/writable_branch=rw:/read-only_branch=ro
446+
447+
448+BRANCH MANAGEMENT
449+=================
450+
451+Once you mount your union for the first time, using the "dirs=" option, you
452+can then change the union's overall mode or reconfigure the branches, using
453+the remount option, as follows.
454+
455+To downgrade a union from read-write to read-only:
456+
457+# mount -t unionfs -o remount,ro none MOUNTPOINT
458+
459+To upgrade a union from read-only to read-write:
460+
461+# mount -t unionfs -o remount,rw none MOUNTPOINT
462+
463+To delete a branch /foo, regardless where it is in the current union:
464+
465+# mount -t unionfs -o remount,del=/foo none MOUNTPOINT
466+
467+To insert (add) a branch /foo before /bar:
468+
469+# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT
470+
471+To insert (add) a branch /foo (with the "rw" mode flag) before /bar:
472+
473+# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT
474+
475+To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a
476+new highest-priority branch), you can use the above syntax, or use a short
477+hand version as follows:
478+
479+# mount -t unionfs -o remount,add=/foo none MOUNTPOINT
480+
481+To append a branch to the very end (new lowest-priority branch):
482+
483+# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT
484+
485+To append a branch to the very end (new lowest-priority branch), in
486+read-only mode:
487+
488+# mount -t unionfs -o remount,add=:/foo=ro none MOUNTPOINT
489+
490+Finally, to change the mode of one existing branch, say /foo, from read-only
491+to read-write, and change /bar from read-write to read-only:
492+
493+# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT
494+
495+Note: in Unionfs 2.x, you cannot set the leftmost branch to readonly because
496+then Unionfs won't have any writable place for copyups to take place.
497+Moreover, the VFS can get confused when it tries to modify something in a
498+file system mounted read-write, but isn't permitted to write to it.
499+Instead, you should set the whole union as readonly, as described above.
500+If, however, you must set the leftmost branch as readonly, perhaps so you
501+can get a snapshot of it at a point in time, then you should insert a new
502+writable top-level branch, and mark the one you want as readonly. This can
503+be accomplished as follows, assuming that /foo is your current leftmost
504+branch:
505+
506+# mount -t tmpfs -o size=NNN /new
507+# mount -t unionfs -o remount,add=/new,mode=/foo=ro none MOUNTPOINT
508+<do what you want safely in /foo>
509+# mount -t unionfs -o remount,del=/new,mode=/foo=rw none MOUNTPOINT
510+<check if there's anything in /new you want to preserve>
511+# umount /new
512+
513+CACHE CONSISTENCY
514+=================
515+
516+If you modify any file on any of the lower branches directly, while there is
517+a Unionfs 2.x mounted above any of those branches, you should tell Unionfs
518+to purge its caches and re-get the objects. To do that, you have to
519+increment the generation number of the superblock using the following
520+command:
521+
522+# mount -t unionfs -o remount,incgen none MOUNTPOINT
523+
524+Note that the older way of incrementing the generation number using an
525+ioctl, is no longer supported in Unionfs 2.0 and newer. Ioctls in general
526+are not encouraged. Plus, an ioctl is per-file concept, whereas the
527+generation number is a per-file-system concept. Worse, such an ioctl
528+requires an open file, which then has to be invalidated by the very nature
529+of the generation number increase (read: the old generation increase ioctl
530+was pretty racy).
531+
532+
533+For more information, see <http://unionfs.filesystems.org/>.
0c5527e5 534diff --git a/MAINTAINERS b/MAINTAINERS
6b53c3da 535index 28f65c2..512f312 100644
0c5527e5
AM
536--- a/MAINTAINERS
537+++ b/MAINTAINERS
6b53c3da 538@@ -6612,6 +6612,14 @@ F: Documentation/cdrom/
0c5527e5
AM
539 F: drivers/cdrom/cdrom.c
540 F: include/linux/cdrom.h
541
542+UNIONFS
543+P: Erez Zadok
544+M: ezk@cs.sunysb.edu
545+L: unionfs@filesystems.org
546+W: http://unionfs.filesystems.org/
547+T: git git.kernel.org/pub/scm/linux/kernel/git/ezk/unionfs.git
548+S: Maintained
549+
550 UNSORTED BLOCK IMAGES (UBI)
551 M: Artem Bityutskiy <dedekind1@gmail.com>
552 W: http://www.linux-mtd.infradead.org/
553diff --git a/fs/Kconfig b/fs/Kconfig
6b53c3da 554index 9fe0b34..e863a66 100644
0c5527e5
AM
555--- a/fs/Kconfig
556+++ b/fs/Kconfig
6b53c3da 557@@ -194,6 +194,7 @@ if MISC_FILESYSTEMS
2380c486
JR
558 source "fs/adfs/Kconfig"
559 source "fs/affs/Kconfig"
560 source "fs/ecryptfs/Kconfig"
561+source "fs/unionfs/Kconfig"
562 source "fs/hfs/Kconfig"
563 source "fs/hfsplus/Kconfig"
564 source "fs/befs/Kconfig"
0c5527e5 565diff --git a/fs/Makefile b/fs/Makefile
6b53c3da 566index afc1096..2c0627b 100644
0c5527e5
AM
567--- a/fs/Makefile
568+++ b/fs/Makefile
6b53c3da 569@@ -82,6 +82,7 @@ obj-$(CONFIG_ISO9660_FS) += isofs/
2380c486
JR
570 obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
571 obj-$(CONFIG_HFS_FS) += hfs/
572 obj-$(CONFIG_ECRYPT_FS) += ecryptfs/
573+obj-$(CONFIG_UNION_FS) += unionfs/
574 obj-$(CONFIG_VXFS_FS) += freevxfs/
575 obj-$(CONFIG_NFS_FS) += nfs/
576 obj-$(CONFIG_EXPORTFS) += exportfs/
0c5527e5 577diff --git a/fs/namei.c b/fs/namei.c
6b53c3da 578index 2826db3..38628ef 100644
0c5527e5
AM
579--- a/fs/namei.c
580+++ b/fs/namei.c
6b53c3da 581@@ -490,6 +490,7 @@ void release_open_intent(struct nameidata *nd)
82260373
AM
582 fput(file);
583 }
2380c486
JR
584 }
585+EXPORT_SYMBOL_GPL(release_open_intent);
586
82260373
AM
587 static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
588 {
6b53c3da 589@@ -1807,6 +1808,42 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
63b09289
JR
590 return __lookup_hash(&this, base, NULL);
591 }
592
593+/* pass nameidata from caller (useful for NFS) */
594+struct dentry *lookup_one_len_nd(const char *name, struct dentry *base,
595+ int len, struct nameidata *nd)
596+{
597+ struct qstr this;
598+ unsigned long hash;
599+ unsigned int c;
600+
601+ WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
602+
603+ this.name = name;
604+ this.len = len;
605+ if (!len)
606+ return ERR_PTR(-EACCES);
607+
608+ hash = init_name_hash();
609+ while (len--) {
610+ c = *(const unsigned char *)name++;
611+ if (c == '/' || c == '\0')
612+ return ERR_PTR(-EACCES);
613+ hash = partial_name_hash(c, hash);
614+ }
615+ this.hash = end_name_hash(hash);
616+ /*
617+ * See if the low-level filesystem might want
618+ * to use its own hash..
619+ */
620+ if (base->d_flags & DCACHE_OP_HASH) {
621+ int err = base->d_op->d_hash(base, base->d_inode, &this);
622+ if (err < 0)
623+ return ERR_PTR(err);
624+ }
625+
626+ return __lookup_hash(&this, base, nd);
627+}
628+
629 int user_path_at(int dfd, const char __user *name, unsigned flags,
630 struct path *path)
631 {
6b53c3da 632@@ -3369,6 +3406,7 @@ EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
63b09289
JR
633 EXPORT_SYMBOL(getname);
634 EXPORT_SYMBOL(lock_rename);
635 EXPORT_SYMBOL(lookup_one_len);
636+EXPORT_SYMBOL(lookup_one_len_nd);
637 EXPORT_SYMBOL(page_follow_link_light);
638 EXPORT_SYMBOL(page_put_link);
639 EXPORT_SYMBOL(page_readlink);
0c5527e5 640diff --git a/fs/splice.c b/fs/splice.c
6b53c3da 641index fa2defa..ba17a96 100644
0c5527e5
AM
642--- a/fs/splice.c
643+++ b/fs/splice.c
63b09289 644@@ -1085,8 +1085,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
2380c486
JR
645 /*
646 * Attempt to initiate a splice from pipe to file.
647 */
648-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
649- loff_t *ppos, size_t len, unsigned int flags)
650+long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out,
651+ loff_t *ppos, size_t len, unsigned int flags)
652 {
4ae1df7a
JR
653 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
654 loff_t *, size_t, unsigned int);
63b09289 655@@ -1109,13 +1109,14 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
2380c486 656
4ae1df7a 657 return splice_write(pipe, out, ppos, len, flags);
2380c486
JR
658 }
659+EXPORT_SYMBOL_GPL(vfs_splice_from);
660
661 /*
662 * Attempt to initiate a splice from a file to a pipe.
663 */
664-static long do_splice_to(struct file *in, loff_t *ppos,
665- struct pipe_inode_info *pipe, size_t len,
666- unsigned int flags)
667+long vfs_splice_to(struct file *in, loff_t *ppos,
668+ struct pipe_inode_info *pipe, size_t len,
669+ unsigned int flags)
670 {
4ae1df7a
JR
671 ssize_t (*splice_read)(struct file *, loff_t *,
672 struct pipe_inode_info *, size_t, unsigned int);
63b09289 673@@ -1135,6 +1136,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
2380c486 674
4ae1df7a 675 return splice_read(in, ppos, pipe, len, flags);
2380c486
JR
676 }
677+EXPORT_SYMBOL_GPL(vfs_splice_to);
678
679 /**
680 * splice_direct_to_actor - splices data directly between two non-pipes
63b09289 681@@ -1204,7 +1206,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
2380c486
JR
682 size_t read_len;
683 loff_t pos = sd->pos, prev_pos = pos;
684
685- ret = do_splice_to(in, &pos, pipe, len, flags);
686+ ret = vfs_splice_to(in, &pos, pipe, len, flags);
687 if (unlikely(ret <= 0))
688 goto out_release;
689
63b09289 690@@ -1263,8 +1265,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
2380c486
JR
691 {
692 struct file *file = sd->u.file;
693
76514441 694- return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
0c5527e5 695- sd->flags);
76514441 696+ return vfs_splice_from(pipe, file, &file->f_pos, sd->total_len,
0c5527e5 697+ sd->flags);
2380c486
JR
698 }
699
0c5527e5 700 /**
63b09289 701@@ -1349,7 +1351,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
2380c486
JR
702 } else
703 off = &out->f_pos;
704
13e5c3b1
AM
705- ret = do_splice_from(ipipe, out, off, len, flags);
706+ ret = vfs_splice_from(ipipe, out, off, len, flags);
2380c486
JR
707
708 if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
709 ret = -EFAULT;
63b09289 710@@ -1369,7 +1371,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
2380c486
JR
711 } else
712 off = &in->f_pos;
713
13e5c3b1
AM
714- ret = do_splice_to(in, off, opipe, len, flags);
715+ ret = vfs_splice_to(in, off, opipe, len, flags);
2380c486
JR
716
717 if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
718 ret = -EFAULT;
0c5527e5 719diff --git a/fs/stack.c b/fs/stack.c
6b53c3da 720index b4f2ab4..7c61d05 100644
0c5527e5
AM
721--- a/fs/stack.c
722+++ b/fs/stack.c
7670a7fc
AM
723@@ -1,8 +1,20 @@
724+/*
725+ * Copyright (c) 2006-2009 Erez Zadok
726+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
727+ * Copyright (c) 2006-2009 Stony Brook University
728+ * Copyright (c) 2006-2009 The Research Foundation of SUNY
729+ *
730+ * This program is free software; you can redistribute it and/or modify
731+ * it under the terms of the GNU General Public License version 2 as
732+ * published by the Free Software Foundation.
733+ */
734+
735 #include <linux/module.h>
736 #include <linux/fs.h>
737 #include <linux/fs_stack.h>
738
739-/* does _NOT_ require i_mutex to be held.
740+/*
741+ * does _NOT_ require i_mutex to be held.
742 *
743 * This function cannot be inlined since i_size_{read,write} is rather
744 * heavy-weight on 32-bit systems
0c5527e5
AM
745diff --git a/fs/unionfs/Kconfig b/fs/unionfs/Kconfig
746new file mode 100644
747index 0000000..f3c1ac4
748--- /dev/null
749+++ b/fs/unionfs/Kconfig
750@@ -0,0 +1,24 @@
751+config UNION_FS
752+ tristate "Union file system (EXPERIMENTAL)"
753+ depends on EXPERIMENTAL
754+ help
755+ Unionfs is a stackable unification file system, which appears to
756+ merge the contents of several directories (branches), while keeping
757+ their physical content separate.
758+
759+ See <http://unionfs.filesystems.org> for details
760+
761+config UNION_FS_XATTR
762+ bool "Unionfs extended attributes"
763+ depends on UNION_FS
764+ help
765+ Extended attributes are name:value pairs associated with inodes by
766+ the kernel or by users (see the attr(5) manual page).
767+
768+ If unsure, say N.
769+
770+config UNION_FS_DEBUG
771+ bool "Debug Unionfs"
772+ depends on UNION_FS
773+ help
774+ If you say Y here, you can turn on debugging output from Unionfs.
775diff --git a/fs/unionfs/Makefile b/fs/unionfs/Makefile
776new file mode 100644
6b53c3da 777index 0000000..623e2b5
0c5527e5
AM
778--- /dev/null
779+++ b/fs/unionfs/Makefile
780@@ -0,0 +1,17 @@
6b53c3da 781+UNIONFS_VERSION="2.5.10 (for 3.1.0-rc4)"
0c5527e5
AM
782+
783+EXTRA_CFLAGS += -DUNIONFS_VERSION=\"$(UNIONFS_VERSION)\"
784+
785+obj-$(CONFIG_UNION_FS) += unionfs.o
786+
787+unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
788+ rdstate.o copyup.o dirhelper.o rename.o unlink.o \
789+ lookup.o commonfops.o dirfops.o sioq.o mmap.o whiteout.o
790+
791+unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
792+
793+unionfs-$(CONFIG_UNION_FS_DEBUG) += debug.o
794+
795+ifeq ($(CONFIG_UNION_FS_DEBUG),y)
796+EXTRA_CFLAGS += -DDEBUG
797+endif
798diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
799new file mode 100644
6b53c3da 800index 0000000..71cacfe
0c5527e5
AM
801--- /dev/null
802+++ b/fs/unionfs/commonfops.c
6b53c3da 803@@ -0,0 +1,901 @@
2380c486 804+/*
63b09289 805+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
806+ * Copyright (c) 2003-2006 Charles P. Wright
807+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
808+ * Copyright (c) 2005-2006 Junjiro Okajima
809+ * Copyright (c) 2005 Arun M. Krishnakumar
810+ * Copyright (c) 2004-2006 David P. Quigley
811+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
812+ * Copyright (c) 2003 Puja Gupta
813+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
814+ * Copyright (c) 2003-2011 Stony Brook University
815+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
816+ *
817+ * This program is free software; you can redistribute it and/or modify
818+ * it under the terms of the GNU General Public License version 2 as
819+ * published by the Free Software Foundation.
820+ */
821+
822+#include "union.h"
823+
824+/*
825+ * 1) Copyup the file
826+ * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously
827+ * stolen from NFS's silly rename
828+ */
829+static int copyup_deleted_file(struct file *file, struct dentry *dentry,
830+ struct dentry *parent, int bstart, int bindex)
831+{
832+ static unsigned int counter;
833+ const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2;
834+ const int countersize = sizeof(counter) * 2;
835+ const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1;
836+ char name[nlen + 1];
837+ int err;
838+ struct dentry *tmp_dentry = NULL;
839+ struct dentry *lower_dentry;
840+ struct dentry *lower_dir_dentry = NULL;
841+
842+ lower_dentry = unionfs_lower_dentry_idx(dentry, bstart);
843+
844+ sprintf(name, ".unionfs%*.*lx",
845+ i_inosize, i_inosize, lower_dentry->d_inode->i_ino);
846+
847+ /*
848+ * Loop, looking for an unused temp name to copyup to.
849+ *
850+ * It's somewhat silly that we look for a free temp tmp name in the
851+ * source branch (bstart) instead of the dest branch (bindex), where
852+ * the final name will be created. We _will_ catch it if somehow
853+ * the name exists in the dest branch, but it'd be nice to catch it
854+ * sooner than later.
855+ */
856+retry:
857+ tmp_dentry = NULL;
858+ do {
859+ char *suffix = name + nlen - countersize;
860+
861+ dput(tmp_dentry);
862+ counter++;
863+ sprintf(suffix, "%*.*x", countersize, countersize, counter);
864+
865+ pr_debug("unionfs: trying to rename %s to %s\n",
866+ dentry->d_name.name, name);
867+
4ae1df7a 868+ tmp_dentry = lookup_lck_len(name, lower_dentry->d_parent,
2380c486
JR
869+ nlen);
870+ if (IS_ERR(tmp_dentry)) {
871+ err = PTR_ERR(tmp_dentry);
872+ goto out;
873+ }
874+ } while (tmp_dentry->d_inode != NULL); /* need negative dentry */
875+ dput(tmp_dentry);
876+
877+ err = copyup_named_file(parent->d_inode, file, name, bstart, bindex,
878+ i_size_read(file->f_path.dentry->d_inode));
879+ if (err) {
880+ if (unlikely(err == -EEXIST))
881+ goto retry;
882+ goto out;
883+ }
884+
885+ /* bring it to the same state as an unlinked file */
886+ lower_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry));
887+ if (!unionfs_lower_inode_idx(dentry->d_inode, bindex)) {
888+ atomic_inc(&lower_dentry->d_inode->i_count);
889+ unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
890+ lower_dentry->d_inode);
891+ }
892+ lower_dir_dentry = lock_parent(lower_dentry);
893+ err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
894+ unlock_dir(lower_dir_dentry);
895+
896+out:
897+ if (!err)
898+ unionfs_check_dentry(dentry);
899+ return err;
900+}
901+
902+/*
903+ * put all references held by upper struct file and free lower file pointer
904+ * array
905+ */
906+static void cleanup_file(struct file *file)
907+{
908+ int bindex, bstart, bend;
909+ struct file **lower_files;
910+ struct file *lower_file;
911+ struct super_block *sb = file->f_path.dentry->d_sb;
912+
913+ lower_files = UNIONFS_F(file)->lower_files;
914+ bstart = fbstart(file);
915+ bend = fbend(file);
916+
917+ for (bindex = bstart; bindex <= bend; bindex++) {
918+ int i; /* holds (possibly) updated branch index */
919+ int old_bid;
920+
921+ lower_file = unionfs_lower_file_idx(file, bindex);
922+ if (!lower_file)
923+ continue;
924+
925+ /*
926+ * Find new index of matching branch with an open
927+ * file, since branches could have been added or
928+ * deleted causing the one with open files to shift.
929+ */
930+ old_bid = UNIONFS_F(file)->saved_branch_ids[bindex];
931+ i = branch_id_to_idx(sb, old_bid);
932+ if (unlikely(i < 0)) {
933+ printk(KERN_ERR "unionfs: no superblock for "
934+ "file %p\n", file);
935+ continue;
936+ }
937+
938+ /* decrement count of open files */
939+ branchput(sb, i);
940+ /*
941+ * fput will perform an mntput for us on the correct branch.
942+ * Although we're using the file's old branch configuration,
943+ * bindex, which is the old index, correctly points to the
944+ * right branch in the file's branch list. In other words,
945+ * we're going to mntput the correct branch even if branches
946+ * have been added/removed.
947+ */
948+ fput(lower_file);
949+ UNIONFS_F(file)->lower_files[bindex] = NULL;
950+ UNIONFS_F(file)->saved_branch_ids[bindex] = -1;
951+ }
952+
953+ UNIONFS_F(file)->lower_files = NULL;
954+ kfree(lower_files);
955+ kfree(UNIONFS_F(file)->saved_branch_ids);
956+ /* set to NULL because caller needs to know if to kfree on error */
957+ UNIONFS_F(file)->saved_branch_ids = NULL;
958+}
959+
960+/* open all lower files for a given file */
961+static int open_all_files(struct file *file)
962+{
963+ int bindex, bstart, bend, err = 0;
964+ struct file *lower_file;
965+ struct dentry *lower_dentry;
966+ struct dentry *dentry = file->f_path.dentry;
967+ struct super_block *sb = dentry->d_sb;
968+
969+ bstart = dbstart(dentry);
970+ bend = dbend(dentry);
971+
972+ for (bindex = bstart; bindex <= bend; bindex++) {
973+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
974+ if (!lower_dentry)
975+ continue;
976+
977+ dget(lower_dentry);
978+ unionfs_mntget(dentry, bindex);
979+ branchget(sb, bindex);
980+
981+ lower_file =
982+ dentry_open(lower_dentry,
983+ unionfs_lower_mnt_idx(dentry, bindex),
984+ file->f_flags, current_cred());
985+ if (IS_ERR(lower_file)) {
986+ branchput(sb, bindex);
987+ err = PTR_ERR(lower_file);
988+ goto out;
989+ } else {
990+ unionfs_set_lower_file_idx(file, bindex, lower_file);
991+ }
992+ }
993+out:
994+ return err;
995+}
996+
997+/* open the highest priority file for a given upper file */
998+static int open_highest_file(struct file *file, bool willwrite)
999+{
1000+ int bindex, bstart, bend, err = 0;
1001+ struct file *lower_file;
1002+ struct dentry *lower_dentry;
1003+ struct dentry *dentry = file->f_path.dentry;
1004+ struct dentry *parent = dget_parent(dentry);
1005+ struct inode *parent_inode = parent->d_inode;
1006+ struct super_block *sb = dentry->d_sb;
1007+
1008+ bstart = dbstart(dentry);
1009+ bend = dbend(dentry);
1010+
1011+ lower_dentry = unionfs_lower_dentry(dentry);
1012+ if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) {
1013+ for (bindex = bstart - 1; bindex >= 0; bindex--) {
1014+ err = copyup_file(parent_inode, file, bstart, bindex,
1015+ i_size_read(dentry->d_inode));
1016+ if (!err)
1017+ break;
1018+ }
1019+ atomic_set(&UNIONFS_F(file)->generation,
1020+ atomic_read(&UNIONFS_I(dentry->d_inode)->
1021+ generation));
1022+ goto out;
1023+ }
1024+
1025+ dget(lower_dentry);
1026+ unionfs_mntget(dentry, bstart);
1027+ lower_file = dentry_open(lower_dentry,
1028+ unionfs_lower_mnt_idx(dentry, bstart),
1029+ file->f_flags, current_cred());
1030+ if (IS_ERR(lower_file)) {
1031+ err = PTR_ERR(lower_file);
1032+ goto out;
1033+ }
1034+ branchget(sb, bstart);
1035+ unionfs_set_lower_file(file, lower_file);
1036+ /* Fix up the position. */
1037+ lower_file->f_pos = file->f_pos;
1038+
1039+ memcpy(&lower_file->f_ra, &file->f_ra, sizeof(struct file_ra_state));
1040+out:
1041+ dput(parent);
1042+ return err;
1043+}
1044+
1045+/* perform a delayed copyup of a read-write file on a read-only branch */
1046+static int do_delayed_copyup(struct file *file, struct dentry *parent)
1047+{
1048+ int bindex, bstart, bend, err = 0;
1049+ struct dentry *dentry = file->f_path.dentry;
1050+ struct inode *parent_inode = parent->d_inode;
1051+
1052+ bstart = fbstart(file);
1053+ bend = fbend(file);
1054+
1055+ BUG_ON(!S_ISREG(dentry->d_inode->i_mode));
1056+
1057+ unionfs_check_file(file);
1058+ for (bindex = bstart - 1; bindex >= 0; bindex--) {
1059+ if (!d_deleted(dentry))
1060+ err = copyup_file(parent_inode, file, bstart,
1061+ bindex,
1062+ i_size_read(dentry->d_inode));
1063+ else
1064+ err = copyup_deleted_file(file, dentry, parent,
1065+ bstart, bindex);
1066+ /* if succeeded, set lower open-file flags and break */
1067+ if (!err) {
1068+ struct file *lower_file;
1069+ lower_file = unionfs_lower_file_idx(file, bindex);
1070+ lower_file->f_flags = file->f_flags;
1071+ break;
1072+ }
1073+ }
1074+ if (err || (bstart <= fbstart(file)))
1075+ goto out;
1076+ bend = fbend(file);
1077+ for (bindex = bstart; bindex <= bend; bindex++) {
1078+ if (unionfs_lower_file_idx(file, bindex)) {
1079+ branchput(dentry->d_sb, bindex);
1080+ fput(unionfs_lower_file_idx(file, bindex));
1081+ unionfs_set_lower_file_idx(file, bindex, NULL);
1082+ }
1083+ }
1084+ path_put_lowers(dentry, bstart, bend, false);
1085+ iput_lowers(dentry->d_inode, bstart, bend, false);
1086+ /* for reg file, we only open it "once" */
1087+ fbend(file) = fbstart(file);
1088+ dbend(dentry) = dbstart(dentry);
1089+ ibend(dentry->d_inode) = ibstart(dentry->d_inode);
1090+
1091+out:
1092+ unionfs_check_file(file);
1093+ return err;
1094+}
1095+
1096+/*
1097+ * Helper function for unionfs_file_revalidate/locked.
1098+ * Expects dentry/parent to be locked already, and revalidated.
1099+ */
1100+static int __unionfs_file_revalidate(struct file *file, struct dentry *dentry,
1101+ struct dentry *parent,
1102+ struct super_block *sb, int sbgen,
1103+ int dgen, bool willwrite)
1104+{
1105+ int fgen;
1106+ int bstart, bend, orig_brid;
1107+ int size;
1108+ int err = 0;
1109+
1110+ fgen = atomic_read(&UNIONFS_F(file)->generation);
1111+
1112+ /*
1113+ * There are two cases we are interested in. The first is if the
1114+ * generation is lower than the super-block. The second is if
1115+ * someone has copied up this file from underneath us, we also need
1116+ * to refresh things.
1117+ */
6b53c3da 1118+ if (d_deleted(dentry) ||
2380c486
JR
1119+ (sbgen <= fgen &&
1120+ dbstart(dentry) == fbstart(file) &&
1121+ unionfs_lower_file(file)))
1122+ goto out_may_copyup;
1123+
1124+ /* save orig branch ID */
1125+ orig_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1126+
1127+ /* First we throw out the existing files. */
1128+ cleanup_file(file);
1129+
1130+ /* Now we reopen the file(s) as in unionfs_open. */
1131+ bstart = fbstart(file) = dbstart(dentry);
1132+ bend = fbend(file) = dbend(dentry);
1133+
1134+ size = sizeof(struct file *) * sbmax(sb);
1135+ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1136+ if (unlikely(!UNIONFS_F(file)->lower_files)) {
1137+ err = -ENOMEM;
1138+ goto out;
1139+ }
1140+ size = sizeof(int) * sbmax(sb);
1141+ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1142+ if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1143+ err = -ENOMEM;
1144+ goto out;
1145+ }
1146+
1147+ if (S_ISDIR(dentry->d_inode->i_mode)) {
1148+ /* We need to open all the files. */
1149+ err = open_all_files(file);
1150+ if (err)
1151+ goto out;
1152+ } else {
1153+ int new_brid;
1154+ /* We only open the highest priority branch. */
1155+ err = open_highest_file(file, willwrite);
1156+ if (err)
1157+ goto out;
1158+ new_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1159+ if (unlikely(new_brid != orig_brid && sbgen > fgen)) {
1160+ /*
1161+ * If we re-opened the file on a different branch
1162+ * than the original one, and this was due to a new
1163+ * branch inserted, then update the mnt counts of
1164+ * the old and new branches accordingly.
1165+ */
1166+ unionfs_mntget(dentry, bstart);
1167+ unionfs_mntput(sb->s_root,
1168+ branch_id_to_idx(sb, orig_brid));
1169+ }
1170+ /* regular files have only one open lower file */
1171+ fbend(file) = fbstart(file);
1172+ }
1173+ atomic_set(&UNIONFS_F(file)->generation,
1174+ atomic_read(&UNIONFS_I(dentry->d_inode)->generation));
1175+
1176+out_may_copyup:
1177+ /* Copyup on the first write to a file on a readonly branch. */
1178+ if (willwrite && IS_WRITE_FLAG(file->f_flags) &&
1179+ !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) &&
1180+ is_robranch(dentry)) {
1181+ pr_debug("unionfs: do delay copyup of \"%s\"\n",
1182+ dentry->d_name.name);
1183+ err = do_delayed_copyup(file, parent);
1184+ /* regular files have only one open lower file */
1185+ if (!err && !S_ISDIR(dentry->d_inode->i_mode))
1186+ fbend(file) = fbstart(file);
1187+ }
1188+
1189+out:
1190+ if (err) {
1191+ kfree(UNIONFS_F(file)->lower_files);
1192+ kfree(UNIONFS_F(file)->saved_branch_ids);
1193+ }
1194+ return err;
1195+}
1196+
1197+/*
1198+ * Revalidate the struct file
1199+ * @file: file to revalidate
1200+ * @parent: parent dentry (locked by caller)
1201+ * @willwrite: true if caller may cause changes to the file; false otherwise.
1202+ * Caller must lock/unlock dentry's branch configuration.
1203+ */
1204+int unionfs_file_revalidate(struct file *file, struct dentry *parent,
1205+ bool willwrite)
1206+{
1207+ struct super_block *sb;
1208+ struct dentry *dentry;
1209+ int sbgen, dgen;
1210+ int err = 0;
1211+
1212+ dentry = file->f_path.dentry;
1213+ sb = dentry->d_sb;
1214+ verify_locked(dentry);
1215+ verify_locked(parent);
1216+
1217+ /*
1218+ * First revalidate the dentry inside struct file,
1219+ * but not unhashed dentries.
1220+ */
1221+ if (!d_deleted(dentry) &&
1222+ !__unionfs_d_revalidate(dentry, parent, willwrite)) {
1223+ err = -ESTALE;
1224+ goto out;
1225+ }
1226+
1227+ sbgen = atomic_read(&UNIONFS_SB(sb)->generation);
1228+ dgen = atomic_read(&UNIONFS_D(dentry)->generation);
1229+
1230+ if (unlikely(sbgen > dgen)) { /* XXX: should never happen */
1231+ pr_debug("unionfs: failed to revalidate dentry (%s)\n",
1232+ dentry->d_name.name);
1233+ err = -ESTALE;
1234+ goto out;
1235+ }
1236+
1237+ err = __unionfs_file_revalidate(file, dentry, parent, sb,
1238+ sbgen, dgen, willwrite);
1239+out:
1240+ return err;
1241+}
1242+
1243+/* unionfs_open helper function: open a directory */
6b53c3da
AM
1244+static int __open_dir(struct inode *inode, struct file *file,
1245+ struct dentry *parent)
2380c486
JR
1246+{
1247+ struct dentry *lower_dentry;
1248+ struct file *lower_file;
1249+ int bindex, bstart, bend;
6b53c3da
AM
1250+ struct vfsmount *lower_mnt;
1251+ struct dentry *dentry = file->f_path.dentry;
2380c486 1252+
6b53c3da
AM
1253+ bstart = fbstart(file) = dbstart(dentry);
1254+ bend = fbend(file) = dbend(dentry);
2380c486
JR
1255+
1256+ for (bindex = bstart; bindex <= bend; bindex++) {
1257+ lower_dentry =
6b53c3da 1258+ unionfs_lower_dentry_idx(dentry, bindex);
2380c486
JR
1259+ if (!lower_dentry)
1260+ continue;
1261+
1262+ dget(lower_dentry);
6b53c3da
AM
1263+ lower_mnt = unionfs_mntget(dentry, bindex);
1264+ if (!lower_mnt)
1265+ lower_mnt = unionfs_mntget(parent, bindex);
1266+ lower_file = dentry_open(lower_dentry, lower_mnt, file->f_flags,
2380c486
JR
1267+ current_cred());
1268+ if (IS_ERR(lower_file))
1269+ return PTR_ERR(lower_file);
1270+
1271+ unionfs_set_lower_file_idx(file, bindex, lower_file);
6b53c3da
AM
1272+ if (!unionfs_lower_mnt_idx(dentry, bindex))
1273+ unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
2380c486
JR
1274+
1275+ /*
1276+ * The branchget goes after the open, because otherwise
1277+ * we would miss the reference on release.
1278+ */
1279+ branchget(inode->i_sb, bindex);
1280+ }
1281+
1282+ return 0;
1283+}
1284+
1285+/* unionfs_open helper function: open a file */
1286+static int __open_file(struct inode *inode, struct file *file,
1287+ struct dentry *parent)
1288+{
1289+ struct dentry *lower_dentry;
1290+ struct file *lower_file;
1291+ int lower_flags;
1292+ int bindex, bstart, bend;
6b53c3da
AM
1293+ struct dentry *dentry = file->f_path.dentry;
1294+ struct vfsmount *lower_mnt;
2380c486 1295+
6b53c3da 1296+ lower_dentry = unionfs_lower_dentry(dentry);
2380c486
JR
1297+ lower_flags = file->f_flags;
1298+
6b53c3da
AM
1299+ bstart = fbstart(file) = dbstart(dentry);
1300+ bend = fbend(file) = dbend(dentry);
2380c486
JR
1301+
1302+ /*
1303+ * check for the permission for lower file. If the error is
1304+ * COPYUP_ERR, copyup the file.
1305+ */
6b53c3da 1306+ if (lower_dentry->d_inode && is_robranch(dentry)) {
2380c486
JR
1307+ /*
1308+ * if the open will change the file, copy it up otherwise
1309+ * defer it.
1310+ */
1311+ if (lower_flags & O_TRUNC) {
1312+ int size = 0;
1313+ int err = -EROFS;
1314+
1315+ /* copyup the file */
1316+ for (bindex = bstart - 1; bindex >= 0; bindex--) {
1317+ err = copyup_file(parent->d_inode, file,
1318+ bstart, bindex, size);
63b09289
JR
1319+ if (!err) {
1320+ /* only one regular file open */
1321+ fbend(file) = fbstart(file);
2380c486 1322+ break;
63b09289 1323+ }
2380c486
JR
1324+ }
1325+ return err;
1326+ } else {
1327+ /*
1328+ * turn off writeable flags, to force delayed copyup
1329+ * by caller.
1330+ */
1331+ lower_flags &= ~(OPEN_WRITE_FLAGS);
1332+ }
1333+ }
1334+
1335+ dget(lower_dentry);
1336+
1337+ /*
1338+ * dentry_open will decrement mnt refcnt if err.
1339+ * otherwise fput() will do an mntput() for us upon file close.
1340+ */
6b53c3da
AM
1341+ lower_mnt = unionfs_mntget(dentry, bstart);
1342+ lower_file = dentry_open(lower_dentry, lower_mnt, lower_flags,
1343+ current_cred());
2380c486
JR
1344+ if (IS_ERR(lower_file))
1345+ return PTR_ERR(lower_file);
1346+
1347+ unionfs_set_lower_file(file, lower_file);
1348+ branchget(inode->i_sb, bstart);
1349+
1350+ return 0;
1351+}
1352+
1353+int unionfs_open(struct inode *inode, struct file *file)
1354+{
1355+ int err = 0;
1356+ struct file *lower_file = NULL;
1357+ struct dentry *dentry = file->f_path.dentry;
1358+ struct dentry *parent;
1359+ int bindex = 0, bstart = 0, bend = 0;
1360+ int size;
1361+ int valid = 0;
1362+
1363+ unionfs_read_lock(inode->i_sb, UNIONFS_SMUTEX_PARENT);
1364+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1365+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1366+
1367+ /* don't open unhashed/deleted files */
1368+ if (d_deleted(dentry)) {
1369+ err = -ENOENT;
1370+ goto out_nofree;
1371+ }
1372+
1373+ /* XXX: should I change 'false' below to the 'willwrite' flag? */
1374+ valid = __unionfs_d_revalidate(dentry, parent, false);
1375+ if (unlikely(!valid)) {
1376+ err = -ESTALE;
1377+ goto out_nofree;
1378+ }
1379+
1380+ file->private_data =
1381+ kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL);
1382+ if (unlikely(!UNIONFS_F(file))) {
1383+ err = -ENOMEM;
1384+ goto out_nofree;
1385+ }
1386+ fbstart(file) = -1;
1387+ fbend(file) = -1;
1388+ atomic_set(&UNIONFS_F(file)->generation,
1389+ atomic_read(&UNIONFS_I(inode)->generation));
1390+
1391+ size = sizeof(struct file *) * sbmax(inode->i_sb);
1392+ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1393+ if (unlikely(!UNIONFS_F(file)->lower_files)) {
1394+ err = -ENOMEM;
1395+ goto out;
1396+ }
1397+ size = sizeof(int) * sbmax(inode->i_sb);
1398+ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1399+ if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1400+ err = -ENOMEM;
1401+ goto out;
1402+ }
1403+
1404+ bstart = fbstart(file) = dbstart(dentry);
1405+ bend = fbend(file) = dbend(dentry);
1406+
1407+ /*
1408+ * open all directories and make the unionfs file struct point to
1409+ * these lower file structs
1410+ */
1411+ if (S_ISDIR(inode->i_mode))
6b53c3da 1412+ err = __open_dir(inode, file, parent); /* open a dir */
2380c486
JR
1413+ else
1414+ err = __open_file(inode, file, parent); /* open a file */
1415+
1416+ /* freeing the allocated resources, and fput the opened files */
1417+ if (err) {
1418+ for (bindex = bstart; bindex <= bend; bindex++) {
1419+ lower_file = unionfs_lower_file_idx(file, bindex);
1420+ if (!lower_file)
1421+ continue;
1422+
1423+ branchput(dentry->d_sb, bindex);
1424+ /* fput calls dput for lower_dentry */
1425+ fput(lower_file);
1426+ }
1427+ }
1428+
1429+out:
1430+ if (err) {
1431+ kfree(UNIONFS_F(file)->lower_files);
1432+ kfree(UNIONFS_F(file)->saved_branch_ids);
1433+ kfree(UNIONFS_F(file));
1434+ }
1435+out_nofree:
1436+ if (!err) {
1437+ unionfs_postcopyup_setmnt(dentry);
1438+ unionfs_copy_attr_times(inode);
1439+ unionfs_check_file(file);
1440+ unionfs_check_inode(inode);
1441+ }
1442+ unionfs_unlock_dentry(dentry);
1443+ unionfs_unlock_parent(dentry, parent);
1444+ unionfs_read_unlock(inode->i_sb);
1445+ return err;
1446+}
1447+
1448+/*
1449+ * release all lower object references & free the file info structure
1450+ *
1451+ * No need to grab sb info's rwsem.
1452+ */
1453+int unionfs_file_release(struct inode *inode, struct file *file)
1454+{
1455+ struct file *lower_file = NULL;
1456+ struct unionfs_file_info *fileinfo;
1457+ struct unionfs_inode_info *inodeinfo;
1458+ struct super_block *sb = inode->i_sb;
1459+ struct dentry *dentry = file->f_path.dentry;
1460+ struct dentry *parent;
1461+ int bindex, bstart, bend;
63b09289 1462+ int err = 0;
2380c486 1463+
4ae1df7a
JR
1464+ /*
1465+ * Since mm/memory.c:might_fault() (under PROVE_LOCKING) was
1466+ * modified in 2.6.29-rc1 to call might_lock_read on mmap_sem, this
1467+ * has been causing false positives in file system stacking layers.
1468+ * In particular, our ->mmap is called after sys_mmap2 already holds
1469+ * mmap_sem, then we lock our own mutexes; but earlier, it's
1470+ * possible for lockdep to have locked our mutexes first, and then
1471+ * we call a lower ->readdir which could call might_fault. The
1472+ * different ordering of the locks is what lockdep complains about
1473+ * -- unnecessarily. Therefore, we have no choice but to tell
1474+ * lockdep to temporarily turn off lockdep here. Note: the comments
1475+ * inside might_sleep also suggest that it would have been
1476+ * nicer to only annotate paths that needs that might_lock_read.
1477+ */
1478+ lockdep_off();
2380c486
JR
1479+ unionfs_read_lock(sb, UNIONFS_SMUTEX_PARENT);
1480+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1481+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1482+
1483+ /*
1484+ * We try to revalidate, but the VFS ignores return return values
1485+ * from file->release, so we must always try to succeed here,
1486+ * including to do the kfree and dput below. So if revalidation
1487+ * failed, all we can do is print some message and keep going.
1488+ */
1489+ err = unionfs_file_revalidate(file, parent,
1490+ UNIONFS_F(file)->wrote_to_file);
1491+ if (!err)
1492+ unionfs_check_file(file);
1493+ fileinfo = UNIONFS_F(file);
1494+ BUG_ON(file->f_path.dentry->d_inode != inode);
1495+ inodeinfo = UNIONFS_I(inode);
1496+
1497+ /* fput all the lower files */
2380c486
JR
1498+ bstart = fbstart(file);
1499+ bend = fbend(file);
1500+
1501+ for (bindex = bstart; bindex <= bend; bindex++) {
1502+ lower_file = unionfs_lower_file_idx(file, bindex);
1503+
1504+ if (lower_file) {
1505+ unionfs_set_lower_file_idx(file, bindex, NULL);
1506+ fput(lower_file);
1507+ branchput(sb, bindex);
1508+ }
1509+
1510+ /* if there are no more refs to the dentry, dput it */
1511+ if (d_deleted(dentry)) {
1512+ dput(unionfs_lower_dentry_idx(dentry, bindex));
1513+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1514+ }
1515+ }
1516+
1517+ kfree(fileinfo->lower_files);
1518+ kfree(fileinfo->saved_branch_ids);
1519+
1520+ if (fileinfo->rdstate) {
1521+ fileinfo->rdstate->access = jiffies;
1522+ spin_lock(&inodeinfo->rdlock);
1523+ inodeinfo->rdcount++;
1524+ list_add_tail(&fileinfo->rdstate->cache,
1525+ &inodeinfo->readdircache);
1526+ mark_inode_dirty(inode);
1527+ spin_unlock(&inodeinfo->rdlock);
1528+ fileinfo->rdstate = NULL;
1529+ }
1530+ kfree(fileinfo);
1531+
1532+ unionfs_unlock_dentry(dentry);
1533+ unionfs_unlock_parent(dentry, parent);
1534+ unionfs_read_unlock(sb);
4ae1df7a 1535+ lockdep_on();
2380c486
JR
1536+ return err;
1537+}
1538+
1539+/* pass the ioctl to the lower fs */
1540+static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1541+{
1542+ struct file *lower_file;
1543+ int err;
1544+
1545+ lower_file = unionfs_lower_file(file);
1546+
1547+ err = -ENOTTY;
1548+ if (!lower_file || !lower_file->f_op)
1549+ goto out;
1550+ if (lower_file->f_op->unlocked_ioctl) {
1551+ err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
0c5527e5 1552+#ifdef CONFIG_COMPAT
63b09289
JR
1553+ } else if (lower_file->f_op->compat_ioctl) {
1554+ err = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
0c5527e5 1555+#endif
2380c486
JR
1556+ }
1557+
1558+out:
1559+ return err;
1560+}
1561+
1562+/*
1563+ * return to user-space the branch indices containing the file in question
1564+ *
1565+ * We use fd_set and therefore we are limited to the number of the branches
1566+ * to FD_SETSIZE, which is currently 1024 - plenty for most people
1567+ */
1568+static int unionfs_ioctl_queryfile(struct file *file, struct dentry *parent,
1569+ unsigned int cmd, unsigned long arg)
1570+{
1571+ int err = 0;
1572+ fd_set branchlist;
1573+ int bstart = 0, bend = 0, bindex = 0;
1574+ int orig_bstart, orig_bend;
1575+ struct dentry *dentry, *lower_dentry;
1576+ struct vfsmount *mnt;
1577+
1578+ dentry = file->f_path.dentry;
1579+ orig_bstart = dbstart(dentry);
1580+ orig_bend = dbend(dentry);
1581+ err = unionfs_partial_lookup(dentry, parent);
1582+ if (err)
1583+ goto out;
1584+ bstart = dbstart(dentry);
1585+ bend = dbend(dentry);
1586+
1587+ FD_ZERO(&branchlist);
1588+
1589+ for (bindex = bstart; bindex <= bend; bindex++) {
1590+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
1591+ if (!lower_dentry)
1592+ continue;
1593+ if (likely(lower_dentry->d_inode))
1594+ FD_SET(bindex, &branchlist);
1595+ /* purge any lower objects after partial_lookup */
1596+ if (bindex < orig_bstart || bindex > orig_bend) {
1597+ dput(lower_dentry);
1598+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1599+ iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1600+ unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1601+ NULL);
1602+ mnt = unionfs_lower_mnt_idx(dentry, bindex);
1603+ if (!mnt)
1604+ continue;
1605+ unionfs_mntput(dentry, bindex);
1606+ unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
1607+ }
1608+ }
1609+ /* restore original dentry's offsets */
1610+ dbstart(dentry) = orig_bstart;
1611+ dbend(dentry) = orig_bend;
1612+ ibstart(dentry->d_inode) = orig_bstart;
1613+ ibend(dentry->d_inode) = orig_bend;
1614+
1615+ err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
1616+ if (unlikely(err))
1617+ err = -EFAULT;
1618+
1619+out:
1620+ return err < 0 ? err : bend;
1621+}
1622+
1623+long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1624+{
1625+ long err;
1626+ struct dentry *dentry = file->f_path.dentry;
1627+ struct dentry *parent;
1628+
1629+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1630+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1631+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1632+
1633+ err = unionfs_file_revalidate(file, parent, true);
1634+ if (unlikely(err))
1635+ goto out;
1636+
1637+ /* check if asked for local commands */
1638+ switch (cmd) {
1639+ case UNIONFS_IOCTL_INCGEN:
1640+ /* Increment the superblock generation count */
1641+ pr_info("unionfs: incgen ioctl deprecated; "
1642+ "use \"-o remount,incgen\"\n");
1643+ err = -ENOSYS;
1644+ break;
1645+
1646+ case UNIONFS_IOCTL_QUERYFILE:
1647+ /* Return list of branches containing the given file */
1648+ err = unionfs_ioctl_queryfile(file, parent, cmd, arg);
1649+ break;
1650+
1651+ default:
1652+ /* pass the ioctl down */
1653+ err = do_ioctl(file, cmd, arg);
1654+ break;
1655+ }
1656+
1657+out:
1658+ unionfs_check_file(file);
1659+ unionfs_unlock_dentry(dentry);
1660+ unionfs_unlock_parent(dentry, parent);
1661+ unionfs_read_unlock(dentry->d_sb);
1662+ return err;
1663+}
1664+
1665+int unionfs_flush(struct file *file, fl_owner_t id)
1666+{
1667+ int err = 0;
1668+ struct file *lower_file = NULL;
1669+ struct dentry *dentry = file->f_path.dentry;
1670+ struct dentry *parent;
1671+ int bindex, bstart, bend;
1672+
1673+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1674+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
1675+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1676+
1677+ err = unionfs_file_revalidate(file, parent,
1678+ UNIONFS_F(file)->wrote_to_file);
1679+ if (unlikely(err))
1680+ goto out;
1681+ unionfs_check_file(file);
1682+
1683+ bstart = fbstart(file);
1684+ bend = fbend(file);
1685+ for (bindex = bstart; bindex <= bend; bindex++) {
1686+ lower_file = unionfs_lower_file_idx(file, bindex);
1687+
1688+ if (lower_file && lower_file->f_op &&
1689+ lower_file->f_op->flush) {
1690+ err = lower_file->f_op->flush(lower_file, id);
1691+ if (err)
1692+ goto out;
1693+ }
1694+
1695+ }
1696+
1697+out:
1698+ if (!err)
1699+ unionfs_check_file(file);
1700+ unionfs_unlock_dentry(dentry);
1701+ unionfs_unlock_parent(dentry, parent);
1702+ unionfs_read_unlock(dentry->d_sb);
1703+ return err;
1704+}
0c5527e5
AM
1705diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
1706new file mode 100644
6b53c3da 1707index 0000000..078ca27
0c5527e5
AM
1708--- /dev/null
1709+++ b/fs/unionfs/copyup.c
6b53c3da 1710@@ -0,0 +1,899 @@
2380c486 1711+/*
63b09289 1712+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
1713+ * Copyright (c) 2003-2006 Charles P. Wright
1714+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
1715+ * Copyright (c) 2005-2006 Junjiro Okajima
1716+ * Copyright (c) 2005 Arun M. Krishnakumar
1717+ * Copyright (c) 2004-2006 David P. Quigley
1718+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
1719+ * Copyright (c) 2003 Puja Gupta
1720+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
1721+ * Copyright (c) 2003-2011 Stony Brook University
1722+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
1723+ *
1724+ * This program is free software; you can redistribute it and/or modify
1725+ * it under the terms of the GNU General Public License version 2 as
1726+ * published by the Free Software Foundation.
1727+ */
1728+
1729+#include "union.h"
1730+
1731+/*
1732+ * For detailed explanation of copyup see:
1733+ * Documentation/filesystems/unionfs/concepts.txt
1734+ */
1735+
1736+#ifdef CONFIG_UNION_FS_XATTR
1737+/* copyup all extended attrs for a given dentry */
1738+static int copyup_xattrs(struct dentry *old_lower_dentry,
1739+ struct dentry *new_lower_dentry)
1740+{
1741+ int err = 0;
1742+ ssize_t list_size = -1;
1743+ char *name_list = NULL;
1744+ char *attr_value = NULL;
1745+ char *name_list_buf = NULL;
1746+
1747+ /* query the actual size of the xattr list */
1748+ list_size = vfs_listxattr(old_lower_dentry, NULL, 0);
1749+ if (list_size <= 0) {
1750+ err = list_size;
1751+ goto out;
1752+ }
1753+
1754+ /* allocate space for the actual list */
1755+ name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX);
1756+ if (unlikely(!name_list || IS_ERR(name_list))) {
1757+ err = PTR_ERR(name_list);
1758+ goto out;
1759+ }
1760+
1761+ name_list_buf = name_list; /* save for kfree at end */
1762+
1763+ /* now get the actual xattr list of the source file */
1764+ list_size = vfs_listxattr(old_lower_dentry, name_list, list_size);
1765+ if (list_size <= 0) {
1766+ err = list_size;
1767+ goto out;
1768+ }
1769+
1770+ /* allocate space to hold each xattr's value */
1771+ attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX);
1772+ if (unlikely(!attr_value || IS_ERR(attr_value))) {
1773+ err = PTR_ERR(name_list);
1774+ goto out;
1775+ }
1776+
1777+ /* in a loop, get and set each xattr from src to dst file */
1778+ while (*name_list) {
1779+ ssize_t size;
1780+
1781+ /* Lock here since vfs_getxattr doesn't lock for us */
1782+ mutex_lock(&old_lower_dentry->d_inode->i_mutex);
1783+ size = vfs_getxattr(old_lower_dentry, name_list,
1784+ attr_value, XATTR_SIZE_MAX);
1785+ mutex_unlock(&old_lower_dentry->d_inode->i_mutex);
1786+ if (size < 0) {
1787+ err = size;
1788+ goto out;
1789+ }
1790+ if (size > XATTR_SIZE_MAX) {
1791+ err = -E2BIG;
1792+ goto out;
1793+ }
1794+ /* Don't lock here since vfs_setxattr does it for us. */
1795+ err = vfs_setxattr(new_lower_dentry, name_list, attr_value,
1796+ size, 0);
1797+ /*
1798+ * Selinux depends on "security.*" xattrs, so to maintain
1799+ * the security of copied-up files, if Selinux is active,
1800+ * then we must copy these xattrs as well. So we need to
1801+ * temporarily get FOWNER privileges.
1802+ * XXX: move entire copyup code to SIOQ.
1803+ */
1804+ if (err == -EPERM && !capable(CAP_FOWNER)) {
1805+ const struct cred *old_creds;
1806+ struct cred *new_creds;
1807+
1808+ new_creds = prepare_creds();
1809+ if (unlikely(!new_creds)) {
1810+ err = -ENOMEM;
1811+ goto out;
1812+ }
1813+ cap_raise(new_creds->cap_effective, CAP_FOWNER);
1814+ old_creds = override_creds(new_creds);
1815+ err = vfs_setxattr(new_lower_dentry, name_list,
1816+ attr_value, size, 0);
1817+ revert_creds(old_creds);
1818+ }
1819+ if (err < 0)
1820+ goto out;
1821+ name_list += strlen(name_list) + 1;
1822+ }
1823+out:
1824+ unionfs_xattr_kfree(name_list_buf);
1825+ unionfs_xattr_kfree(attr_value);
1826+ /* Ignore if xattr isn't supported */
1827+ if (err == -ENOTSUPP || err == -EOPNOTSUPP)
1828+ err = 0;
1829+ return err;
1830+}
1831+#endif /* CONFIG_UNION_FS_XATTR */
1832+
1833+/*
1834+ * Determine the mode based on the copyup flags, and the existing dentry.
1835+ *
1836+ * Handle file systems which may not support certain options. For example
1837+ * jffs2 doesn't allow one to chmod a symlink. So we ignore such harmless
1838+ * errors, rather than propagating them up, which results in copyup errors
1839+ * and errors returned back to users.
1840+ */
1841+static int copyup_permissions(struct super_block *sb,
1842+ struct dentry *old_lower_dentry,
1843+ struct dentry *new_lower_dentry)
1844+{
1845+ struct inode *i = old_lower_dentry->d_inode;
1846+ struct iattr newattrs;
1847+ int err;
1848+
1849+ newattrs.ia_atime = i->i_atime;
1850+ newattrs.ia_mtime = i->i_mtime;
1851+ newattrs.ia_ctime = i->i_ctime;
1852+ newattrs.ia_gid = i->i_gid;
1853+ newattrs.ia_uid = i->i_uid;
1854+ newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME |
1855+ ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE |
1856+ ATTR_GID | ATTR_UID;
1857+ mutex_lock(&new_lower_dentry->d_inode->i_mutex);
1858+ err = notify_change(new_lower_dentry, &newattrs);
1859+ if (err)
1860+ goto out;
1861+
1862+ /* now try to change the mode and ignore EOPNOTSUPP on symlinks */
1863+ newattrs.ia_mode = i->i_mode;
1864+ newattrs.ia_valid = ATTR_MODE | ATTR_FORCE;
1865+ err = notify_change(new_lower_dentry, &newattrs);
1866+ if (err == -EOPNOTSUPP &&
1867+ S_ISLNK(new_lower_dentry->d_inode->i_mode)) {
1868+ printk(KERN_WARNING
1869+ "unionfs: changing \"%s\" symlink mode unsupported\n",
1870+ new_lower_dentry->d_name.name);
1871+ err = 0;
1872+ }
1873+
1874+out:
1875+ mutex_unlock(&new_lower_dentry->d_inode->i_mutex);
1876+ return err;
1877+}
1878+
1879+/*
1880+ * create the new device/file/directory - use copyup_permission to copyup
1881+ * times, and mode
1882+ *
1883+ * if the object being copied up is a regular file, the file is only created,
1884+ * the contents have to be copied up separately
1885+ */
1886+static int __copyup_ndentry(struct dentry *old_lower_dentry,
1887+ struct dentry *new_lower_dentry,
1888+ struct dentry *new_lower_parent_dentry,
1889+ char *symbuf)
1890+{
1891+ int err = 0;
1892+ umode_t old_mode = old_lower_dentry->d_inode->i_mode;
1893+ struct sioq_args args;
1894+
1895+ if (S_ISDIR(old_mode)) {
1896+ args.mkdir.parent = new_lower_parent_dentry->d_inode;
1897+ args.mkdir.dentry = new_lower_dentry;
1898+ args.mkdir.mode = old_mode;
1899+
1900+ run_sioq(__unionfs_mkdir, &args);
1901+ err = args.err;
1902+ } else if (S_ISLNK(old_mode)) {
1903+ args.symlink.parent = new_lower_parent_dentry->d_inode;
1904+ args.symlink.dentry = new_lower_dentry;
1905+ args.symlink.symbuf = symbuf;
1906+
1907+ run_sioq(__unionfs_symlink, &args);
1908+ err = args.err;
1909+ } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) ||
1910+ S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) {
1911+ args.mknod.parent = new_lower_parent_dentry->d_inode;
1912+ args.mknod.dentry = new_lower_dentry;
1913+ args.mknod.mode = old_mode;
1914+ args.mknod.dev = old_lower_dentry->d_inode->i_rdev;
1915+
1916+ run_sioq(__unionfs_mknod, &args);
1917+ err = args.err;
1918+ } else if (S_ISREG(old_mode)) {
1919+ struct nameidata nd;
1920+ err = init_lower_nd(&nd, LOOKUP_CREATE);
1921+ if (unlikely(err < 0))
1922+ goto out;
1923+ args.create.nd = &nd;
1924+ args.create.parent = new_lower_parent_dentry->d_inode;
1925+ args.create.dentry = new_lower_dentry;
1926+ args.create.mode = old_mode;
1927+
1928+ run_sioq(__unionfs_create, &args);
1929+ err = args.err;
1930+ release_lower_nd(&nd, err);
1931+ } else {
1932+ printk(KERN_CRIT "unionfs: unknown inode type %d\n",
1933+ old_mode);
1934+ BUG();
1935+ }
1936+
1937+out:
1938+ return err;
1939+}
1940+
1941+static int __copyup_reg_data(struct dentry *dentry,
1942+ struct dentry *new_lower_dentry, int new_bindex,
1943+ struct dentry *old_lower_dentry, int old_bindex,
1944+ struct file **copyup_file, loff_t len)
1945+{
1946+ struct super_block *sb = dentry->d_sb;
1947+ struct file *input_file;
1948+ struct file *output_file;
1949+ struct vfsmount *output_mnt;
1950+ mm_segment_t old_fs;
1951+ char *buf = NULL;
1952+ ssize_t read_bytes, write_bytes;
1953+ loff_t size;
1954+ int err = 0;
1955+
1956+ /* open old file */
1957+ unionfs_mntget(dentry, old_bindex);
1958+ branchget(sb, old_bindex);
1959+ /* dentry_open calls dput and mntput if it returns an error */
1960+ input_file = dentry_open(old_lower_dentry,
1961+ unionfs_lower_mnt_idx(dentry, old_bindex),
1962+ O_RDONLY | O_LARGEFILE, current_cred());
1963+ if (IS_ERR(input_file)) {
1964+ dput(old_lower_dentry);
1965+ err = PTR_ERR(input_file);
1966+ goto out;
1967+ }
1968+ if (unlikely(!input_file->f_op || !input_file->f_op->read)) {
1969+ err = -EINVAL;
1970+ goto out_close_in;
1971+ }
1972+
1973+ /* open new file */
1974+ dget(new_lower_dentry);
1975+ output_mnt = unionfs_mntget(sb->s_root, new_bindex);
1976+ branchget(sb, new_bindex);
1977+ output_file = dentry_open(new_lower_dentry, output_mnt,
1978+ O_RDWR | O_LARGEFILE, current_cred());
1979+ if (IS_ERR(output_file)) {
1980+ err = PTR_ERR(output_file);
1981+ goto out_close_in2;
1982+ }
1983+ if (unlikely(!output_file->f_op || !output_file->f_op->write)) {
1984+ err = -EINVAL;
1985+ goto out_close_out;
1986+ }
1987+
1988+ /* allocating a buffer */
1989+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1990+ if (unlikely(!buf)) {
1991+ err = -ENOMEM;
1992+ goto out_close_out;
1993+ }
1994+
1995+ input_file->f_pos = 0;
1996+ output_file->f_pos = 0;
1997+
1998+ old_fs = get_fs();
1999+ set_fs(KERNEL_DS);
2000+
2001+ size = len;
2002+ err = 0;
2003+ do {
2004+ if (len >= PAGE_SIZE)
2005+ size = PAGE_SIZE;
2006+ else if ((len < PAGE_SIZE) && (len > 0))
2007+ size = len;
2008+
2009+ len -= PAGE_SIZE;
2010+
2011+ read_bytes =
2012+ input_file->f_op->read(input_file,
2013+ (char __user *)buf, size,
2014+ &input_file->f_pos);
2015+ if (read_bytes <= 0) {
2016+ err = read_bytes;
2017+ break;
2018+ }
2019+
2020+ /* see Documentation/filesystems/unionfs/issues.txt */
2021+ lockdep_off();
2022+ write_bytes =
2023+ output_file->f_op->write(output_file,
2024+ (char __user *)buf,
2025+ read_bytes,
2026+ &output_file->f_pos);
2027+ lockdep_on();
2028+ if ((write_bytes < 0) || (write_bytes < read_bytes)) {
2029+ err = write_bytes;
2030+ break;
2031+ }
2032+ } while ((read_bytes > 0) && (len > 0));
2033+
2034+ set_fs(old_fs);
2035+
2036+ kfree(buf);
2037+
6b53c3da
AM
2038+#if 0
2039+ /* XXX: code no longer needed? */
2380c486 2040+ if (!err)
0c5527e5 2041+ err = output_file->f_op->fsync(output_file, 0);
6b53c3da 2042+#endif
2380c486
JR
2043+
2044+ if (err)
2045+ goto out_close_out;
2046+
2047+ if (copyup_file) {
2048+ *copyup_file = output_file;
2049+ goto out_close_in;
2050+ }
2051+
2052+out_close_out:
2053+ fput(output_file);
2054+
2055+out_close_in2:
2056+ branchput(sb, new_bindex);
2057+
2058+out_close_in:
2059+ fput(input_file);
2060+
2061+out:
2062+ branchput(sb, old_bindex);
2063+
2064+ return err;
2065+}
2066+
2067+/*
2068+ * dput the lower references for old and new dentry & clear a lower dentry
2069+ * pointer
2070+ */
2071+static void __clear(struct dentry *dentry, struct dentry *old_lower_dentry,
2072+ int old_bstart, int old_bend,
2073+ struct dentry *new_lower_dentry, int new_bindex)
2074+{
2075+ /* get rid of the lower dentry and all its traces */
2076+ unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL);
2077+ dbstart(dentry) = old_bstart;
2078+ dbend(dentry) = old_bend;
2079+
2080+ dput(new_lower_dentry);
2081+ dput(old_lower_dentry);
2082+}
2083+
2084+/*
2085+ * Copy up a dentry to a file of specified name.
2086+ *
2087+ * @dir: used to pull the ->i_sb to access other branches
2088+ * @dentry: the non-negative dentry whose lower_inode we should copy
2089+ * @bstart: the branch of the lower_inode to copy from
2090+ * @new_bindex: the branch to create the new file in
2091+ * @name: the name of the file to create
2092+ * @namelen: length of @name
2093+ * @copyup_file: the "struct file" to return (optional)
2094+ * @len: how many bytes to copy-up?
2095+ */
2096+int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart,
2097+ int new_bindex, const char *name, int namelen,
2098+ struct file **copyup_file, loff_t len)
2099+{
2100+ struct dentry *new_lower_dentry;
2101+ struct dentry *old_lower_dentry = NULL;
2102+ struct super_block *sb;
2103+ int err = 0;
2104+ int old_bindex;
2105+ int old_bstart;
2106+ int old_bend;
2107+ struct dentry *new_lower_parent_dentry = NULL;
2108+ mm_segment_t oldfs;
2109+ char *symbuf = NULL;
2110+
2111+ verify_locked(dentry);
2112+
2113+ old_bindex = bstart;
2114+ old_bstart = dbstart(dentry);
2115+ old_bend = dbend(dentry);
2116+
2117+ BUG_ON(new_bindex < 0);
2118+ BUG_ON(new_bindex >= old_bindex);
2119+
2120+ sb = dir->i_sb;
2121+
2122+ err = is_robranch_super(sb, new_bindex);
2123+ if (err)
2124+ goto out;
2125+
2126+ /* Create the directory structure above this dentry. */
2127+ new_lower_dentry = create_parents(dir, dentry, name, new_bindex);
2128+ if (IS_ERR(new_lower_dentry)) {
2129+ err = PTR_ERR(new_lower_dentry);
2130+ goto out;
2131+ }
2132+
2133+ old_lower_dentry = unionfs_lower_dentry_idx(dentry, old_bindex);
2134+ /* we conditionally dput this old_lower_dentry at end of function */
2135+ dget(old_lower_dentry);
2136+
2137+ /* For symlinks, we must read the link before we lock the directory. */
2138+ if (S_ISLNK(old_lower_dentry->d_inode->i_mode)) {
2139+
2140+ symbuf = kmalloc(PATH_MAX, GFP_KERNEL);
2141+ if (unlikely(!symbuf)) {
2142+ __clear(dentry, old_lower_dentry,
2143+ old_bstart, old_bend,
2144+ new_lower_dentry, new_bindex);
2145+ err = -ENOMEM;
2146+ goto out_free;
2147+ }
2148+
2149+ oldfs = get_fs();
2150+ set_fs(KERNEL_DS);
2151+ err = old_lower_dentry->d_inode->i_op->readlink(
2152+ old_lower_dentry,
2153+ (char __user *)symbuf,
2154+ PATH_MAX);
2155+ set_fs(oldfs);
2156+ if (err < 0) {
2157+ __clear(dentry, old_lower_dentry,
2158+ old_bstart, old_bend,
2159+ new_lower_dentry, new_bindex);
2160+ goto out_free;
2161+ }
2162+ symbuf[err] = '\0';
2163+ }
2164+
2165+ /* Now we lock the parent, and create the object in the new branch. */
2166+ new_lower_parent_dentry = lock_parent(new_lower_dentry);
2167+
2168+ /* create the new inode */
2169+ err = __copyup_ndentry(old_lower_dentry, new_lower_dentry,
2170+ new_lower_parent_dentry, symbuf);
2171+
2172+ if (err) {
2173+ __clear(dentry, old_lower_dentry,
2174+ old_bstart, old_bend,
2175+ new_lower_dentry, new_bindex);
2176+ goto out_unlock;
2177+ }
2178+
2179+ /* We actually copyup the file here. */
2180+ if (S_ISREG(old_lower_dentry->d_inode->i_mode))
2181+ err = __copyup_reg_data(dentry, new_lower_dentry, new_bindex,
2182+ old_lower_dentry, old_bindex,
2183+ copyup_file, len);
2184+ if (err)
2185+ goto out_unlink;
2186+
2187+ /* Set permissions. */
2188+ err = copyup_permissions(sb, old_lower_dentry, new_lower_dentry);
2189+ if (err)
2190+ goto out_unlink;
2191+
2192+#ifdef CONFIG_UNION_FS_XATTR
2193+ /* Selinux uses extended attributes for permissions. */
2194+ err = copyup_xattrs(old_lower_dentry, new_lower_dentry);
2195+ if (err)
2196+ goto out_unlink;
2197+#endif /* CONFIG_UNION_FS_XATTR */
2198+
2199+ /* do not allow files getting deleted to be re-interposed */
2200+ if (!d_deleted(dentry))
2201+ unionfs_reinterpose(dentry);
2202+
2203+ goto out_unlock;
2204+
2205+out_unlink:
2206+ /*
2207+ * copyup failed, because we possibly ran out of space or
2208+ * quota, or something else happened so let's unlink; we don't
2209+ * really care about the return value of vfs_unlink
2210+ */
2211+ vfs_unlink(new_lower_parent_dentry->d_inode, new_lower_dentry);
2212+
2213+ if (copyup_file) {
2214+ /* need to close the file */
2215+
2216+ fput(*copyup_file);
2217+ branchput(sb, new_bindex);
2218+ }
2219+
2220+ /*
2221+ * TODO: should we reset the error to something like -EIO?
2222+ *
2223+ * If we don't reset, the user may get some nonsensical errors, but
2224+ * on the other hand, if we reset to EIO, we guarantee that the user
2225+ * will get a "confusing" error message.
2226+ */
2227+
2228+out_unlock:
2229+ unlock_dir(new_lower_parent_dentry);
2230+
2231+out_free:
2232+ /*
2233+ * If old_lower_dentry was not a file, then we need to dput it. If
2234+ * it was a file, then it was already dput indirectly by other
2235+ * functions we call above which operate on regular files.
2236+ */
2237+ if (old_lower_dentry && old_lower_dentry->d_inode &&
2238+ !S_ISREG(old_lower_dentry->d_inode->i_mode))
2239+ dput(old_lower_dentry);
2240+ kfree(symbuf);
2241+
2242+ if (err) {
2243+ /*
2244+ * if directory creation succeeded, but inode copyup failed,
2245+ * then purge new dentries.
2246+ */
2247+ if (dbstart(dentry) < old_bstart &&
2248+ ibstart(dentry->d_inode) > dbstart(dentry))
2249+ __clear(dentry, NULL, old_bstart, old_bend,
2250+ unionfs_lower_dentry(dentry), dbstart(dentry));
2251+ goto out;
2252+ }
2253+ if (!S_ISDIR(dentry->d_inode->i_mode)) {
2254+ unionfs_postcopyup_release(dentry);
2255+ if (!unionfs_lower_inode(dentry->d_inode)) {
2256+ /*
2257+ * If we got here, then we copied up to an
2258+ * unlinked-open file, whose name is .unionfsXXXXX.
2259+ */
2260+ struct inode *inode = new_lower_dentry->d_inode;
2261+ atomic_inc(&inode->i_count);
2262+ unionfs_set_lower_inode_idx(dentry->d_inode,
2263+ ibstart(dentry->d_inode),
2264+ inode);
2265+ }
2266+ }
2267+ unionfs_postcopyup_setmnt(dentry);
2268+ /* sync inode times from copied-up inode to our inode */
2269+ unionfs_copy_attr_times(dentry->d_inode);
2270+ unionfs_check_inode(dir);
2271+ unionfs_check_dentry(dentry);
2272+out:
2273+ return err;
2274+}
2275+
2276+/*
2277+ * This function creates a copy of a file represented by 'file' which
2278+ * currently resides in branch 'bstart' to branch 'new_bindex.' The copy
2279+ * will be named "name".
2280+ */
2281+int copyup_named_file(struct inode *dir, struct file *file, char *name,
2282+ int bstart, int new_bindex, loff_t len)
2283+{
2284+ int err = 0;
2285+ struct file *output_file = NULL;
2286+
2287+ err = copyup_dentry(dir, file->f_path.dentry, bstart, new_bindex,
2288+ name, strlen(name), &output_file, len);
2289+ if (!err) {
2290+ fbstart(file) = new_bindex;
2291+ unionfs_set_lower_file_idx(file, new_bindex, output_file);
2292+ }
2293+
2294+ return err;
2295+}
2296+
2297+/*
2298+ * This function creates a copy of a file represented by 'file' which
2299+ * currently resides in branch 'bstart' to branch 'new_bindex'.
2300+ */
2301+int copyup_file(struct inode *dir, struct file *file, int bstart,
2302+ int new_bindex, loff_t len)
2303+{
2304+ int err = 0;
2305+ struct file *output_file = NULL;
2306+ struct dentry *dentry = file->f_path.dentry;
2307+
2308+ err = copyup_dentry(dir, dentry, bstart, new_bindex,
2309+ dentry->d_name.name, dentry->d_name.len,
2310+ &output_file, len);
2311+ if (!err) {
2312+ fbstart(file) = new_bindex;
2313+ unionfs_set_lower_file_idx(file, new_bindex, output_file);
2314+ }
2315+
2316+ return err;
2317+}
2318+
2319+/* purge a dentry's lower-branch states (dput/mntput, etc.) */
2320+static void __cleanup_dentry(struct dentry *dentry, int bindex,
2321+ int old_bstart, int old_bend)
2322+{
2323+ int loop_start;
2324+ int loop_end;
2325+ int new_bstart = -1;
2326+ int new_bend = -1;
2327+ int i;
2328+
2329+ loop_start = min(old_bstart, bindex);
2330+ loop_end = max(old_bend, bindex);
2331+
2332+ /*
2333+ * This loop sets the bstart and bend for the new dentry by
2334+ * traversing from left to right. It also dputs all negative
2335+ * dentries except bindex
2336+ */
2337+ for (i = loop_start; i <= loop_end; i++) {
2338+ if (!unionfs_lower_dentry_idx(dentry, i))
2339+ continue;
2340+
2341+ if (i == bindex) {
2342+ new_bend = i;
2343+ if (new_bstart < 0)
2344+ new_bstart = i;
2345+ continue;
2346+ }
2347+
2348+ if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) {
2349+ dput(unionfs_lower_dentry_idx(dentry, i));
2350+ unionfs_set_lower_dentry_idx(dentry, i, NULL);
2351+
2352+ unionfs_mntput(dentry, i);
2353+ unionfs_set_lower_mnt_idx(dentry, i, NULL);
2354+ } else {
2355+ if (new_bstart < 0)
2356+ new_bstart = i;
2357+ new_bend = i;
2358+ }
2359+ }
2360+
2361+ if (new_bstart < 0)
2362+ new_bstart = bindex;
2363+ if (new_bend < 0)
2364+ new_bend = bindex;
2365+ dbstart(dentry) = new_bstart;
2366+ dbend(dentry) = new_bend;
2367+
2368+}
2369+
2370+/* set lower inode ptr and update bstart & bend if necessary */
2371+static void __set_inode(struct dentry *upper, struct dentry *lower,
2372+ int bindex)
2373+{
2374+ unionfs_set_lower_inode_idx(upper->d_inode, bindex,
2375+ igrab(lower->d_inode));
2376+ if (likely(ibstart(upper->d_inode) > bindex))
2377+ ibstart(upper->d_inode) = bindex;
2378+ if (likely(ibend(upper->d_inode) < bindex))
2379+ ibend(upper->d_inode) = bindex;
2380+
2381+}
2382+
2383+/* set lower dentry ptr and update bstart & bend if necessary */
2384+static void __set_dentry(struct dentry *upper, struct dentry *lower,
2385+ int bindex)
2386+{
2387+ unionfs_set_lower_dentry_idx(upper, bindex, lower);
2388+ if (likely(dbstart(upper) > bindex))
2389+ dbstart(upper) = bindex;
2390+ if (likely(dbend(upper) < bindex))
2391+ dbend(upper) = bindex;
2392+}
2393+
2394+/*
2395+ * This function replicates the directory structure up-to given dentry
2396+ * in the bindex branch.
2397+ */
2398+struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
2399+ const char *name, int bindex)
2400+{
2401+ int err;
2402+ struct dentry *child_dentry;
2403+ struct dentry *parent_dentry;
2404+ struct dentry *lower_parent_dentry = NULL;
2405+ struct dentry *lower_dentry = NULL;
2406+ const char *childname;
2407+ unsigned int childnamelen;
2408+ int nr_dentry;
2409+ int count = 0;
2410+ int old_bstart;
2411+ int old_bend;
2412+ struct dentry **path = NULL;
2413+ struct super_block *sb;
2414+
2415+ verify_locked(dentry);
2416+
2417+ err = is_robranch_super(dir->i_sb, bindex);
2418+ if (err) {
2419+ lower_dentry = ERR_PTR(err);
2420+ goto out;
2421+ }
2422+
2423+ old_bstart = dbstart(dentry);
2424+ old_bend = dbend(dentry);
2425+
2426+ lower_dentry = ERR_PTR(-ENOMEM);
2427+
2428+ /* There is no sense allocating any less than the minimum. */
2429+ nr_dentry = 1;
2430+ path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
2431+ if (unlikely(!path))
2432+ goto out;
2433+
2434+ /* assume the negative dentry of unionfs as the parent dentry */
2435+ parent_dentry = dentry;
2436+
2437+ /*
2438+ * This loop finds the first parent that exists in the given branch.
2439+ * We start building the directory structure from there. At the end
2440+ * of the loop, the following should hold:
2441+ * - child_dentry is the first nonexistent child
2442+ * - parent_dentry is the first existent parent
2443+ * - path[0] is the = deepest child
2444+ * - path[count] is the first child to create
2445+ */
2446+ do {
2447+ child_dentry = parent_dentry;
2448+
2449+ /* find the parent directory dentry in unionfs */
2450+ parent_dentry = dget_parent(child_dentry);
2451+
2452+ /* find out the lower_parent_dentry in the given branch */
2453+ lower_parent_dentry =
2454+ unionfs_lower_dentry_idx(parent_dentry, bindex);
2455+
2456+ /* grow path table */
2457+ if (count == nr_dentry) {
2458+ void *p;
2459+
2460+ nr_dentry *= 2;
2461+ p = krealloc(path, nr_dentry * sizeof(struct dentry *),
2462+ GFP_KERNEL);
2463+ if (unlikely(!p)) {
2464+ lower_dentry = ERR_PTR(-ENOMEM);
2465+ goto out;
2466+ }
2467+ path = p;
2468+ }
2469+
2470+ /* store the child dentry */
2471+ path[count++] = child_dentry;
2472+ } while (!lower_parent_dentry);
2473+ count--;
2474+
2475+ sb = dentry->d_sb;
2476+
2477+ /*
2478+ * This code goes between the begin/end labels and basically
2479+ * emulates a while(child_dentry != dentry), only cleaner and
2480+ * shorter than what would be a much longer while loop.
2481+ */
2482+begin:
2483+ /* get lower parent dir in the current branch */
2484+ lower_parent_dentry = unionfs_lower_dentry_idx(parent_dentry, bindex);
2485+ dput(parent_dentry);
2486+
2487+ /* init the values to lookup */
2488+ childname = child_dentry->d_name.name;
2489+ childnamelen = child_dentry->d_name.len;
2490+
2491+ if (child_dentry != dentry) {
2492+ /* lookup child in the underlying file system */
4ae1df7a 2493+ lower_dentry = lookup_lck_len(childname, lower_parent_dentry,
2380c486
JR
2494+ childnamelen);
2495+ if (IS_ERR(lower_dentry))
2496+ goto out;
2497+ } else {
2498+ /*
2499+ * Is the name a whiteout of the child name ? lookup the
2500+ * whiteout child in the underlying file system
2501+ */
4ae1df7a 2502+ lower_dentry = lookup_lck_len(name, lower_parent_dentry,
2380c486
JR
2503+ strlen(name));
2504+ if (IS_ERR(lower_dentry))
2505+ goto out;
2506+
2507+ /* Replace the current dentry (if any) with the new one */
2508+ dput(unionfs_lower_dentry_idx(dentry, bindex));
2509+ unionfs_set_lower_dentry_idx(dentry, bindex,
2510+ lower_dentry);
2511+
2512+ __cleanup_dentry(dentry, bindex, old_bstart, old_bend);
2513+ goto out;
2514+ }
2515+
2516+ if (lower_dentry->d_inode) {
2517+ /*
2518+ * since this already exists we dput to avoid
2519+ * multiple references on the same dentry
2520+ */
2521+ dput(lower_dentry);
2522+ } else {
2523+ struct sioq_args args;
2524+
2525+ /* it's a negative dentry, create a new dir */
2526+ lower_parent_dentry = lock_parent(lower_dentry);
2527+
2528+ args.mkdir.parent = lower_parent_dentry->d_inode;
2529+ args.mkdir.dentry = lower_dentry;
2530+ args.mkdir.mode = child_dentry->d_inode->i_mode;
2531+
2532+ run_sioq(__unionfs_mkdir, &args);
2533+ err = args.err;
2534+
2535+ if (!err)
2536+ err = copyup_permissions(dir->i_sb, child_dentry,
2537+ lower_dentry);
2538+ unlock_dir(lower_parent_dentry);
2539+ if (err) {
2540+ dput(lower_dentry);
2541+ lower_dentry = ERR_PTR(err);
2542+ goto out;
2543+ }
2544+
2545+ }
2546+
2547+ __set_inode(child_dentry, lower_dentry, bindex);
2548+ __set_dentry(child_dentry, lower_dentry, bindex);
2549+ /*
2550+ * update times of this dentry, but also the parent, because if
2551+ * we changed, the parent may have changed too.
2552+ */
2553+ fsstack_copy_attr_times(parent_dentry->d_inode,
2554+ lower_parent_dentry->d_inode);
2555+ unionfs_copy_attr_times(child_dentry->d_inode);
2556+
2557+ parent_dentry = child_dentry;
2558+ child_dentry = path[--count];
2559+ goto begin;
2560+out:
2561+ /* cleanup any leftover locks from the do/while loop above */
2562+ if (IS_ERR(lower_dentry))
2563+ while (count)
2564+ dput(path[count--]);
2565+ kfree(path);
2566+ return lower_dentry;
2567+}
2568+
2569+/*
2570+ * Post-copyup helper to ensure we have valid mnts: set lower mnt of
2571+ * dentry+parents to the first parent node that has an mnt.
2572+ */
2573+void unionfs_postcopyup_setmnt(struct dentry *dentry)
2574+{
2575+ struct dentry *parent, *hasone;
2576+ int bindex = dbstart(dentry);
2577+
2578+ if (unionfs_lower_mnt_idx(dentry, bindex))
2579+ return;
2580+ hasone = dentry->d_parent;
2581+ /* this loop should stop at root dentry */
2582+ while (!unionfs_lower_mnt_idx(hasone, bindex))
2583+ hasone = hasone->d_parent;
2584+ parent = dentry;
2585+ while (!unionfs_lower_mnt_idx(parent, bindex)) {
2586+ unionfs_set_lower_mnt_idx(parent, bindex,
2587+ unionfs_mntget(hasone, bindex));
2588+ parent = parent->d_parent;
2589+ }
2590+}
2591+
2592+/*
2593+ * Post-copyup helper to release all non-directory source objects of a
2594+ * copied-up file. Regular files should have only one lower object.
2595+ */
2596+void unionfs_postcopyup_release(struct dentry *dentry)
2597+{
2598+ int bstart, bend;
2599+
2600+ BUG_ON(S_ISDIR(dentry->d_inode->i_mode));
2601+ bstart = dbstart(dentry);
2602+ bend = dbend(dentry);
2603+
2604+ path_put_lowers(dentry, bstart + 1, bend, false);
2605+ iput_lowers(dentry->d_inode, bstart + 1, bend, false);
2606+
2607+ dbend(dentry) = bstart;
2608+ ibend(dentry->d_inode) = ibstart(dentry->d_inode) = bstart;
2609+}
0c5527e5
AM
2610diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
2611new file mode 100644
6b53c3da 2612index 0000000..c07f697
0c5527e5
AM
2613--- /dev/null
2614+++ b/fs/unionfs/debug.c
6b53c3da 2615@@ -0,0 +1,549 @@
2380c486 2616+/*
63b09289 2617+ * Copyright (c) 2003-2011 Erez Zadok
2380c486 2618+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
63b09289
JR
2619+ * Copyright (c) 2003-2011 Stony Brook University
2620+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
2621+ *
2622+ * This program is free software; you can redistribute it and/or modify
2623+ * it under the terms of the GNU General Public License version 2 as
2624+ * published by the Free Software Foundation.
2625+ */
2626+
2627+#include "union.h"
2628+
2629+/*
2630+ * Helper debugging functions for maintainers (and for users to report back
2631+ * useful information back to maintainers)
2632+ */
2633+
2634+/* it's always useful to know what part of the code called us */
2635+#define PRINT_CALLER(fname, fxn, line) \
2636+ do { \
2637+ if (!printed_caller) { \
2638+ pr_debug("PC:%s:%s:%d\n", (fname), (fxn), (line)); \
2639+ printed_caller = 1; \
2640+ } \
2641+ } while (0)
2642+
2643+/*
2644+ * __unionfs_check_{inode,dentry,file} perform exhaustive sanity checking on
2645+ * the fan-out of various Unionfs objects. We check that no lower objects
2646+ * exist outside the start/end branch range; that all objects within are
2647+ * non-NULL (with some allowed exceptions); that for every lower file
2648+ * there's a lower dentry+inode; that the start/end ranges match for all
2649+ * corresponding lower objects; that open files/symlinks have only one lower
2650+ * objects, but directories can have several; and more.
2651+ */
2652+void __unionfs_check_inode(const struct inode *inode,
2653+ const char *fname, const char *fxn, int line)
2654+{
2655+ int bindex;
2656+ int istart, iend;
2657+ struct inode *lower_inode;
2658+ struct super_block *sb;
2659+ int printed_caller = 0;
2660+ void *poison_ptr;
2661+
2662+ /* for inodes now */
2663+ BUG_ON(!inode);
2664+ sb = inode->i_sb;
2665+ istart = ibstart(inode);
2666+ iend = ibend(inode);
2667+ /* don't check inode if no lower branches */
2668+ if (istart < 0 && iend < 0)
2669+ return;
2670+ if (unlikely(istart > iend)) {
2671+ PRINT_CALLER(fname, fxn, line);
2672+ pr_debug(" Ci0: inode=%p istart/end=%d:%d\n",
2673+ inode, istart, iend);
2674+ }
2675+ if (unlikely((istart == -1 && iend != -1) ||
2676+ (istart != -1 && iend == -1))) {
2677+ PRINT_CALLER(fname, fxn, line);
2678+ pr_debug(" Ci1: inode=%p istart/end=%d:%d\n",
2679+ inode, istart, iend);
2680+ }
2681+ if (!S_ISDIR(inode->i_mode)) {
2682+ if (unlikely(iend != istart)) {
2683+ PRINT_CALLER(fname, fxn, line);
2684+ pr_debug(" Ci2: inode=%p istart=%d iend=%d\n",
2685+ inode, istart, iend);
2686+ }
2687+ }
2688+
2689+ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2690+ if (unlikely(!UNIONFS_I(inode))) {
2691+ PRINT_CALLER(fname, fxn, line);
2692+ pr_debug(" Ci3: no inode_info %p\n", inode);
2693+ return;
2694+ }
2695+ if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
2696+ PRINT_CALLER(fname, fxn, line);
2697+ pr_debug(" Ci4: no lower_inodes %p\n", inode);
2698+ return;
2699+ }
2700+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
2701+ if (lower_inode) {
2702+ memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2703+ if (unlikely(bindex < istart || bindex > iend)) {
2704+ PRINT_CALLER(fname, fxn, line);
2705+ pr_debug(" Ci5: inode/linode=%p:%p bindex=%d "
2706+ "istart/end=%d:%d\n", inode,
2707+ lower_inode, bindex, istart, iend);
2708+ } else if (unlikely(lower_inode == poison_ptr)) {
2709+ /* freed inode! */
2710+ PRINT_CALLER(fname, fxn, line);
2711+ pr_debug(" Ci6: inode/linode=%p:%p bindex=%d "
2712+ "istart/end=%d:%d\n", inode,
2713+ lower_inode, bindex, istart, iend);
2714+ }
2715+ continue;
2716+ }
2717+ /* if we get here, then lower_inode == NULL */
2718+ if (bindex < istart || bindex > iend)
2719+ continue;
2720+ /*
2721+ * directories can have NULL lower inodes in b/t start/end,
2722+ * but NOT if at the start/end range.
2723+ */
2724+ if (unlikely(S_ISDIR(inode->i_mode) &&
2725+ bindex > istart && bindex < iend))
2726+ continue;
2727+ PRINT_CALLER(fname, fxn, line);
2728+ pr_debug(" Ci7: inode/linode=%p:%p "
2729+ "bindex=%d istart/end=%d:%d\n",
2730+ inode, lower_inode, bindex, istart, iend);
2731+ }
2732+}
2733+
2734+void __unionfs_check_dentry(const struct dentry *dentry,
2735+ const char *fname, const char *fxn, int line)
2736+{
2737+ int bindex;
2738+ int dstart, dend, istart, iend;
2739+ struct dentry *lower_dentry;
2740+ struct inode *inode, *lower_inode;
2741+ struct super_block *sb;
2742+ struct vfsmount *lower_mnt;
2743+ int printed_caller = 0;
2744+ void *poison_ptr;
2745+
2746+ BUG_ON(!dentry);
2747+ sb = dentry->d_sb;
2748+ inode = dentry->d_inode;
2749+ dstart = dbstart(dentry);
2750+ dend = dbend(dentry);
2751+ /* don't check dentry/mnt if no lower branches */
2752+ if (dstart < 0 && dend < 0)
2753+ goto check_inode;
2754+ BUG_ON(dstart > dend);
2755+
2756+ if (unlikely((dstart == -1 && dend != -1) ||
2757+ (dstart != -1 && dend == -1))) {
2758+ PRINT_CALLER(fname, fxn, line);
2759+ pr_debug(" CD0: dentry=%p dstart/end=%d:%d\n",
2760+ dentry, dstart, dend);
2761+ }
2762+ /*
2763+ * check for NULL dentries inside the start/end range, or
2764+ * non-NULL dentries outside the start/end range.
2765+ */
2766+ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2767+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
2768+ if (lower_dentry) {
2769+ if (unlikely(bindex < dstart || bindex > dend)) {
2770+ PRINT_CALLER(fname, fxn, line);
2771+ pr_debug(" CD1: dentry/lower=%p:%p(%p) "
2772+ "bindex=%d dstart/end=%d:%d\n",
2773+ dentry, lower_dentry,
2774+ (lower_dentry ? lower_dentry->d_inode :
2775+ (void *) -1L),
2776+ bindex, dstart, dend);
2777+ }
2778+ } else { /* lower_dentry == NULL */
2779+ if (bindex < dstart || bindex > dend)
2780+ continue;
2781+ /*
2782+ * Directories can have NULL lower inodes in b/t
2783+ * start/end, but NOT if at the start/end range.
2784+ * Ignore this rule, however, if this is a NULL
2785+ * dentry or a deleted dentry.
2786+ */
2787+ if (unlikely(!d_deleted((struct dentry *) dentry) &&
2788+ inode &&
2789+ !(inode && S_ISDIR(inode->i_mode) &&
2790+ bindex > dstart && bindex < dend))) {
2791+ PRINT_CALLER(fname, fxn, line);
2792+ pr_debug(" CD2: dentry/lower=%p:%p(%p) "
2793+ "bindex=%d dstart/end=%d:%d\n",
2794+ dentry, lower_dentry,
2795+ (lower_dentry ?
2796+ lower_dentry->d_inode :
2797+ (void *) -1L),
2798+ bindex, dstart, dend);
2799+ }
2800+ }
2801+ }
2802+
2803+ /* check for vfsmounts same as for dentries */
2804+ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2805+ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2806+ if (lower_mnt) {
2807+ if (unlikely(bindex < dstart || bindex > dend)) {
2808+ PRINT_CALLER(fname, fxn, line);
2809+ pr_debug(" CM0: dentry/lmnt=%p:%p bindex=%d "
2810+ "dstart/end=%d:%d\n", dentry,
2811+ lower_mnt, bindex, dstart, dend);
2812+ }
2813+ } else { /* lower_mnt == NULL */
2814+ if (bindex < dstart || bindex > dend)
2815+ continue;
2816+ /*
2817+ * Directories can have NULL lower inodes in b/t
2818+ * start/end, but NOT if at the start/end range.
2819+ * Ignore this rule, however, if this is a NULL
2820+ * dentry.
2821+ */
2822+ if (unlikely(inode &&
2823+ !(inode && S_ISDIR(inode->i_mode) &&
2824+ bindex > dstart && bindex < dend))) {
2825+ PRINT_CALLER(fname, fxn, line);
2826+ pr_debug(" CM1: dentry/lmnt=%p:%p "
2827+ "bindex=%d dstart/end=%d:%d\n",
2828+ dentry, lower_mnt, bindex,
2829+ dstart, dend);
2830+ }
2831+ }
2832+ }
2833+
2834+check_inode:
2835+ /* for inodes now */
2836+ if (!inode)
2837+ return;
2838+ istart = ibstart(inode);
2839+ iend = ibend(inode);
2840+ /* don't check inode if no lower branches */
2841+ if (istart < 0 && iend < 0)
2842+ return;
2843+ BUG_ON(istart > iend);
2844+ if (unlikely((istart == -1 && iend != -1) ||
2845+ (istart != -1 && iend == -1))) {
2846+ PRINT_CALLER(fname, fxn, line);
2847+ pr_debug(" CI0: dentry/inode=%p:%p istart/end=%d:%d\n",
2848+ dentry, inode, istart, iend);
2849+ }
2850+ if (unlikely(istart != dstart)) {
2851+ PRINT_CALLER(fname, fxn, line);
2852+ pr_debug(" CI1: dentry/inode=%p:%p istart=%d dstart=%d\n",
2853+ dentry, inode, istart, dstart);
2854+ }
2855+ if (unlikely(iend != dend)) {
2856+ PRINT_CALLER(fname, fxn, line);
2857+ pr_debug(" CI2: dentry/inode=%p:%p iend=%d dend=%d\n",
2858+ dentry, inode, iend, dend);
2859+ }
2860+
2861+ if (!S_ISDIR(inode->i_mode)) {
2862+ if (unlikely(dend != dstart)) {
2863+ PRINT_CALLER(fname, fxn, line);
2864+ pr_debug(" CI3: dentry/inode=%p:%p dstart=%d dend=%d\n",
2865+ dentry, inode, dstart, dend);
2866+ }
2867+ if (unlikely(iend != istart)) {
2868+ PRINT_CALLER(fname, fxn, line);
2869+ pr_debug(" CI4: dentry/inode=%p:%p istart=%d iend=%d\n",
2870+ dentry, inode, istart, iend);
2871+ }
2872+ }
2873+
2874+ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2875+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
2876+ if (lower_inode) {
2877+ memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2878+ if (unlikely(bindex < istart || bindex > iend)) {
2879+ PRINT_CALLER(fname, fxn, line);
2880+ pr_debug(" CI5: dentry/linode=%p:%p bindex=%d "
2881+ "istart/end=%d:%d\n", dentry,
2882+ lower_inode, bindex, istart, iend);
2883+ } else if (unlikely(lower_inode == poison_ptr)) {
2884+ /* freed inode! */
2885+ PRINT_CALLER(fname, fxn, line);
2886+ pr_debug(" CI6: dentry/linode=%p:%p bindex=%d "
2887+ "istart/end=%d:%d\n", dentry,
2888+ lower_inode, bindex, istart, iend);
2889+ }
2890+ continue;
2891+ }
2892+ /* if we get here, then lower_inode == NULL */
2893+ if (bindex < istart || bindex > iend)
2894+ continue;
2895+ /*
2896+ * directories can have NULL lower inodes in b/t start/end,
2897+ * but NOT if at the start/end range.
2898+ */
2899+ if (unlikely(S_ISDIR(inode->i_mode) &&
2900+ bindex > istart && bindex < iend))
2901+ continue;
2902+ PRINT_CALLER(fname, fxn, line);
2903+ pr_debug(" CI7: dentry/linode=%p:%p "
2904+ "bindex=%d istart/end=%d:%d\n",
2905+ dentry, lower_inode, bindex, istart, iend);
2906+ }
2907+
2908+ /*
2909+ * If it's a directory, then intermediate objects b/t start/end can
2910+ * be NULL. But, check that all three are NULL: lower dentry, mnt,
2911+ * and inode.
2912+ */
2913+ if (dstart >= 0 && dend >= 0 && S_ISDIR(inode->i_mode))
2914+ for (bindex = dstart+1; bindex < dend; bindex++) {
2915+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
2916+ lower_dentry = unionfs_lower_dentry_idx(dentry,
2917+ bindex);
2918+ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2919+ if (unlikely(!((lower_inode && lower_dentry &&
2920+ lower_mnt) ||
2921+ (!lower_inode &&
2922+ !lower_dentry && !lower_mnt)))) {
2923+ PRINT_CALLER(fname, fxn, line);
2924+ pr_debug(" Cx: lmnt/ldentry/linode=%p:%p:%p "
2925+ "bindex=%d dstart/end=%d:%d\n",
2926+ lower_mnt, lower_dentry, lower_inode,
2927+ bindex, dstart, dend);
2928+ }
2929+ }
2930+ /* check if lower inode is newer than upper one (it shouldn't) */
2931+ if (unlikely(is_newer_lower(dentry) && !is_negative_lower(dentry))) {
2932+ PRINT_CALLER(fname, fxn, line);
2933+ for (bindex = ibstart(inode); bindex <= ibend(inode);
2934+ bindex++) {
2935+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
2936+ if (unlikely(!lower_inode))
2937+ continue;
2938+ pr_debug(" CI8: bindex=%d mtime/lmtime=%lu.%lu/%lu.%lu "
2939+ "ctime/lctime=%lu.%lu/%lu.%lu\n",
2940+ bindex,
2941+ inode->i_mtime.tv_sec,
2942+ inode->i_mtime.tv_nsec,
2943+ lower_inode->i_mtime.tv_sec,
2944+ lower_inode->i_mtime.tv_nsec,
2945+ inode->i_ctime.tv_sec,
2946+ inode->i_ctime.tv_nsec,
2947+ lower_inode->i_ctime.tv_sec,
2948+ lower_inode->i_ctime.tv_nsec);
2949+ }
2950+ }
2951+}
2952+
2953+void __unionfs_check_file(const struct file *file,
2954+ const char *fname, const char *fxn, int line)
2955+{
2956+ int bindex;
2957+ int dstart, dend, fstart, fend;
2958+ struct dentry *dentry;
2959+ struct file *lower_file;
2960+ struct inode *inode;
2961+ struct super_block *sb;
2962+ int printed_caller = 0;
2963+
2964+ BUG_ON(!file);
2965+ dentry = file->f_path.dentry;
2966+ sb = dentry->d_sb;
2967+ dstart = dbstart(dentry);
2968+ dend = dbend(dentry);
2969+ BUG_ON(dstart > dend);
2970+ fstart = fbstart(file);
2971+ fend = fbend(file);
2972+ BUG_ON(fstart > fend);
2973+
2974+ if (unlikely((fstart == -1 && fend != -1) ||
2975+ (fstart != -1 && fend == -1))) {
2976+ PRINT_CALLER(fname, fxn, line);
2977+ pr_debug(" CF0: file/dentry=%p:%p fstart/end=%d:%d\n",
2978+ file, dentry, fstart, fend);
2979+ }
6b53c3da
AM
2980+ /* d_deleted dentries can be ignored for this test */
2981+ if (unlikely(fstart != dstart) && !d_deleted(dentry)) {
2380c486
JR
2982+ PRINT_CALLER(fname, fxn, line);
2983+ pr_debug(" CF1: file/dentry=%p:%p fstart=%d dstart=%d\n",
2984+ file, dentry, fstart, dstart);
2985+ }
6b53c3da 2986+ if (unlikely(fend != dend) && !d_deleted(dentry)) {
2380c486
JR
2987+ PRINT_CALLER(fname, fxn, line);
2988+ pr_debug(" CF2: file/dentry=%p:%p fend=%d dend=%d\n",
2989+ file, dentry, fend, dend);
2990+ }
2991+ inode = dentry->d_inode;
2992+ if (!S_ISDIR(inode->i_mode)) {
2993+ if (unlikely(fend != fstart)) {
2994+ PRINT_CALLER(fname, fxn, line);
2995+ pr_debug(" CF3: file/inode=%p:%p fstart=%d fend=%d\n",
2996+ file, inode, fstart, fend);
2997+ }
2998+ if (unlikely(dend != dstart)) {
2999+ PRINT_CALLER(fname, fxn, line);
3000+ pr_debug(" CF4: file/dentry=%p:%p dstart=%d dend=%d\n",
3001+ file, dentry, dstart, dend);
3002+ }
3003+ }
3004+
3005+ /*
3006+ * check for NULL dentries inside the start/end range, or
3007+ * non-NULL dentries outside the start/end range.
3008+ */
3009+ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
3010+ lower_file = unionfs_lower_file_idx(file, bindex);
3011+ if (lower_file) {
3012+ if (unlikely(bindex < fstart || bindex > fend)) {
3013+ PRINT_CALLER(fname, fxn, line);
3014+ pr_debug(" CF5: file/lower=%p:%p bindex=%d "
3015+ "fstart/end=%d:%d\n", file,
3016+ lower_file, bindex, fstart, fend);
3017+ }
3018+ } else { /* lower_file == NULL */
3019+ if (bindex >= fstart && bindex <= fend) {
3020+ /*
3021+ * directories can have NULL lower inodes in
3022+ * b/t start/end, but NOT if at the
3023+ * start/end range.
3024+ */
3025+ if (unlikely(!(S_ISDIR(inode->i_mode) &&
3026+ bindex > fstart &&
3027+ bindex < fend))) {
3028+ PRINT_CALLER(fname, fxn, line);
3029+ pr_debug(" CF6: file/lower=%p:%p "
3030+ "bindex=%d fstart/end=%d:%d\n",
3031+ file, lower_file, bindex,
3032+ fstart, fend);
3033+ }
3034+ }
3035+ }
3036+ }
3037+
3038+ __unionfs_check_dentry(dentry, fname, fxn, line);
3039+}
3040+
3041+void __unionfs_check_nd(const struct nameidata *nd,
3042+ const char *fname, const char *fxn, int line)
3043+{
3044+ struct file *file;
3045+ int printed_caller = 0;
3046+
3047+ if (unlikely(!nd))
3048+ return;
3049+ if (nd->flags & LOOKUP_OPEN) {
3050+ file = nd->intent.open.file;
3051+ if (unlikely(file->f_path.dentry &&
3052+ strcmp(file->f_path.dentry->d_sb->s_type->name,
3053+ UNIONFS_NAME))) {
3054+ PRINT_CALLER(fname, fxn, line);
3055+ pr_debug(" CND1: lower_file of type %s\n",
3056+ file->f_path.dentry->d_sb->s_type->name);
2380c486
JR
3057+ }
3058+ }
3059+}
3060+
82260373
AM
3061+static unsigned int __mnt_get_count(struct vfsmount *mnt)
3062+{
3063+#ifdef CONFIG_SMP
3064+ unsigned int count = 0;
3065+ int cpu;
3066+
3067+ for_each_possible_cpu(cpu) {
3068+ count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
3069+ }
3070+
3071+ return count;
3072+#else
3073+ return mnt->mnt_count;
3074+#endif
3075+}
3076+
2380c486
JR
3077+/* useful to track vfsmount leaks that could cause EBUSY on unmount */
3078+void __show_branch_counts(const struct super_block *sb,
3079+ const char *file, const char *fxn, int line)
3080+{
3081+ int i;
3082+ struct vfsmount *mnt;
3083+
3084+ pr_debug("BC:");
3085+ for (i = 0; i < sbmax(sb); i++) {
3086+ if (likely(sb->s_root))
3087+ mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt;
3088+ else
3089+ mnt = NULL;
3090+ printk(KERN_CONT "%d:",
82260373 3091+ (mnt ? __mnt_get_count(mnt) : -99));
2380c486
JR
3092+ }
3093+ printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
3094+}
3095+
3096+void __show_inode_times(const struct inode *inode,
3097+ const char *file, const char *fxn, int line)
3098+{
3099+ struct inode *lower_inode;
3100+ int bindex;
3101+
3102+ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3103+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
3104+ if (unlikely(!lower_inode))
3105+ continue;
3106+ pr_debug("IT(%lu:%d): %s:%s:%d "
3107+ "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
3108+ inode->i_ino, bindex,
3109+ file, fxn, line,
3110+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
3111+ lower_inode->i_mtime.tv_sec,
3112+ lower_inode->i_mtime.tv_nsec,
3113+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
3114+ lower_inode->i_ctime.tv_sec,
3115+ lower_inode->i_ctime.tv_nsec);
3116+ }
3117+}
3118+
3119+void __show_dinode_times(const struct dentry *dentry,
3120+ const char *file, const char *fxn, int line)
3121+{
3122+ struct inode *inode = dentry->d_inode;
3123+ struct inode *lower_inode;
3124+ int bindex;
3125+
3126+ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3127+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
3128+ if (!lower_inode)
3129+ continue;
3130+ pr_debug("DT(%s:%lu:%d): %s:%s:%d "
3131+ "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
3132+ dentry->d_name.name, inode->i_ino, bindex,
3133+ file, fxn, line,
3134+ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
3135+ lower_inode->i_mtime.tv_sec,
3136+ lower_inode->i_mtime.tv_nsec,
3137+ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
3138+ lower_inode->i_ctime.tv_sec,
3139+ lower_inode->i_ctime.tv_nsec);
3140+ }
3141+}
3142+
3143+void __show_inode_counts(const struct inode *inode,
3144+ const char *file, const char *fxn, int line)
3145+{
3146+ struct inode *lower_inode;
3147+ int bindex;
3148+
3149+ if (unlikely(!inode)) {
3150+ pr_debug("SiC: Null inode\n");
3151+ return;
3152+ }
3153+ for (bindex = sbstart(inode->i_sb); bindex <= sbend(inode->i_sb);
3154+ bindex++) {
3155+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
3156+ if (unlikely(!lower_inode))
3157+ continue;
3158+ pr_debug("SIC(%lu:%d:%d): lc=%d %s:%s:%d\n",
3159+ inode->i_ino, bindex,
3160+ atomic_read(&(inode)->i_count),
3161+ atomic_read(&(lower_inode)->i_count),
3162+ file, fxn, line);
3163+ }
3164+}
0c5527e5
AM
3165diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
3166new file mode 100644
6b53c3da 3167index 0000000..1628dad
0c5527e5
AM
3168--- /dev/null
3169+++ b/fs/unionfs/dentry.c
6b53c3da 3170@@ -0,0 +1,409 @@
2380c486 3171+/*
63b09289 3172+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
3173+ * Copyright (c) 2003-2006 Charles P. Wright
3174+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3175+ * Copyright (c) 2005-2006 Junjiro Okajima
3176+ * Copyright (c) 2005 Arun M. Krishnakumar
3177+ * Copyright (c) 2004-2006 David P. Quigley
3178+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3179+ * Copyright (c) 2003 Puja Gupta
3180+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
3181+ * Copyright (c) 2003-2011 Stony Brook University
3182+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
3183+ *
3184+ * This program is free software; you can redistribute it and/or modify
3185+ * it under the terms of the GNU General Public License version 2 as
3186+ * published by the Free Software Foundation.
3187+ */
3188+
3189+#include "union.h"
3190+
3191+bool is_negative_lower(const struct dentry *dentry)
3192+{
3193+ int bindex;
3194+ struct dentry *lower_dentry;
3195+
3196+ BUG_ON(!dentry);
3197+ /* cache coherency: check if file was deleted on lower branch */
3198+ if (dbstart(dentry) < 0)
3199+ return true;
3200+ for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
3201+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3202+ /* unhashed (i.e., unlinked) lower dentries don't count */
3203+ if (lower_dentry && lower_dentry->d_inode &&
3204+ !d_deleted(lower_dentry) &&
3205+ !(lower_dentry->d_flags & DCACHE_NFSFS_RENAMED))
3206+ return false;
3207+ }
3208+ return true;
3209+}
3210+
3211+static inline void __dput_lowers(struct dentry *dentry, int start, int end)
3212+{
3213+ struct dentry *lower_dentry;
3214+ int bindex;
3215+
3216+ if (start < 0)
3217+ return;
3218+ for (bindex = start; bindex <= end; bindex++) {
3219+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3220+ if (!lower_dentry)
3221+ continue;
3222+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
3223+ dput(lower_dentry);
3224+ }
3225+}
3226+
3227+/*
3228+ * Purge and invalidate as many data pages of a unionfs inode. This is
3229+ * called when the lower inode has changed, and we want to force processes
3230+ * to re-get the new data.
3231+ */
3232+static inline void purge_inode_data(struct inode *inode)
3233+{
3234+ /* remove all non-private mappings */
3235+ unmap_mapping_range(inode->i_mapping, 0, 0, 0);
3236+ /* invalidate as many pages as possible */
3237+ invalidate_mapping_pages(inode->i_mapping, 0, -1);
3238+ /*
3239+ * Don't try to truncate_inode_pages here, because this could lead
3240+ * to a deadlock between some of address_space ops and dentry
3241+ * revalidation: the address space op is invoked with a lock on our
3242+ * own page, and truncate_inode_pages will block on locked pages.
3243+ */
3244+}
3245+
3246+/*
3247+ * Revalidate a single file/symlink/special dentry. Assume that info nodes
3248+ * of the @dentry and its @parent are locked. Assume parent is valid,
3249+ * otherwise return false (and let's hope the VFS will try to re-lookup this
3250+ * dentry). Returns true if valid, false otherwise.
3251+ */
3252+bool __unionfs_d_revalidate(struct dentry *dentry, struct dentry *parent,
3253+ bool willwrite)
3254+{
3255+ bool valid = true; /* default is valid */
3256+ struct dentry *lower_dentry;
3257+ struct dentry *result;
3258+ int bindex, bstart, bend;
3259+ int sbgen, dgen, pdgen;
3260+ int positive = 0;
3261+ int interpose_flag;
3262+
3263+ verify_locked(dentry);
3264+ verify_locked(parent);
3265+
3266+ /* if the dentry is unhashed, do NOT revalidate */
3267+ if (d_deleted(dentry))
3268+ goto out;
3269+
3270+ dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3271+
3272+ if (is_newer_lower(dentry)) {
3273+ /* root dentry is always valid */
3274+ if (IS_ROOT(dentry)) {
3275+ unionfs_copy_attr_times(dentry->d_inode);
3276+ } else {
3277+ /*
3278+ * reset generation number to zero, guaranteed to be
3279+ * "old"
3280+ */
3281+ dgen = 0;
3282+ atomic_set(&UNIONFS_D(dentry)->generation, dgen);
3283+ }
3284+ if (!willwrite)
3285+ purge_inode_data(dentry->d_inode);
3286+ }
3287+
3288+ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3289+
3290+ BUG_ON(dbstart(dentry) == -1);
3291+ if (dentry->d_inode)
3292+ positive = 1;
3293+
3294+ /* if our dentry is valid, then validate all lower ones */
3295+ if (sbgen == dgen)
3296+ goto validate_lowers;
3297+
3298+ /* The root entry should always be valid */
3299+ BUG_ON(IS_ROOT(dentry));
3300+
3301+ /* We can't work correctly if our parent isn't valid. */
3302+ pdgen = atomic_read(&UNIONFS_D(parent)->generation);
3303+
3304+ /* Free the pointers for our inodes and this dentry. */
3305+ path_put_lowers_all(dentry, false);
3306+
3307+ interpose_flag = INTERPOSE_REVAL_NEG;
3308+ if (positive) {
3309+ interpose_flag = INTERPOSE_REVAL;
3310+ iput_lowers_all(dentry->d_inode, true);
3311+ }
3312+
3313+ if (realloc_dentry_private_data(dentry) != 0) {
3314+ valid = false;
3315+ goto out;
3316+ }
3317+
3318+ result = unionfs_lookup_full(dentry, parent, interpose_flag);
3319+ if (result) {
3320+ if (IS_ERR(result)) {
3321+ valid = false;
3322+ goto out;
3323+ }
3324+ /*
3325+ * current unionfs_lookup_backend() doesn't return
3326+ * a valid dentry
3327+ */
3328+ dput(dentry);
3329+ dentry = result;
3330+ }
3331+
3332+ if (unlikely(positive && is_negative_lower(dentry))) {
3333+ /* call make_bad_inode here ? */
3334+ d_drop(dentry);
3335+ valid = false;
3336+ goto out;
3337+ }
3338+
3339+ /*
3340+ * if we got here then we have revalidated our dentry and all lower
3341+ * ones, so we can return safely.
3342+ */
3343+ if (!valid) /* lower dentry revalidation failed */
3344+ goto out;
3345+
3346+ /*
3347+ * If the parent's gen no. matches the superblock's gen no., then
3348+ * we can update our denty's gen no. If they didn't match, then it
3349+ * was OK to revalidate this dentry with a stale parent, but we'll
3350+ * purposely not update our dentry's gen no. (so it can be redone);
3351+ * and, we'll mark our parent dentry as invalid so it'll force it
3352+ * (and our dentry) to be revalidated.
3353+ */
3354+ if (pdgen == sbgen)
3355+ atomic_set(&UNIONFS_D(dentry)->generation, sbgen);
3356+ goto out;
3357+
3358+validate_lowers:
3359+
3360+ /* The revalidation must occur across all branches */
3361+ bstart = dbstart(dentry);
3362+ bend = dbend(dentry);
3363+ BUG_ON(bstart == -1);
3364+ for (bindex = bstart; bindex <= bend; bindex++) {
63b09289
JR
3365+ int err;
3366+ struct nameidata lower_nd;
3367+
2380c486
JR
3368+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3369+ if (!lower_dentry || !lower_dentry->d_op
3370+ || !lower_dentry->d_op->d_revalidate)
3371+ continue;
3372+ /*
3373+ * Don't pass nameidata to lower file system, because we
3374+ * don't want an arbitrary lower file being opened or
3375+ * returned to us: it may be useless to us because of the
3376+ * fanout nature of unionfs (cf. file/directory open-file
3377+ * invariants). We will open lower files as and when needed
3378+ * later on.
3379+ */
63b09289
JR
3380+ err = init_lower_nd(&lower_nd, LOOKUP_OPEN);
3381+ if (unlikely(err < 0)) {
2380c486 3382+ valid = false;
63b09289
JR
3383+ break;
3384+ }
3385+ if (!lower_dentry->d_op->d_revalidate(lower_dentry, &lower_nd))
3386+ valid = false;
3387+ release_lower_nd(&lower_nd, err);
2380c486
JR
3388+ }
3389+
3390+ if (!dentry->d_inode ||
3391+ ibstart(dentry->d_inode) < 0 ||
3392+ ibend(dentry->d_inode) < 0) {
3393+ valid = false;
3394+ goto out;
3395+ }
3396+
3397+ if (valid) {
3398+ /*
3399+ * If we get here, and we copy the meta-data from the lower
3400+ * inode to our inode, then it is vital that we have already
3401+ * purged all unionfs-level file data. We do that in the
3402+ * caller (__unionfs_d_revalidate) by calling
3403+ * purge_inode_data.
3404+ */
3405+ unionfs_copy_attr_all(dentry->d_inode,
3406+ unionfs_lower_inode(dentry->d_inode));
3407+ fsstack_copy_inode_size(dentry->d_inode,
3408+ unionfs_lower_inode(dentry->d_inode));
3409+ }
3410+
3411+out:
3412+ return valid;
3413+}
3414+
3415+/*
3416+ * Determine if the lower inode objects have changed from below the unionfs
3417+ * inode. Return true if changed, false otherwise.
3418+ *
3419+ * We check if the mtime or ctime have changed. However, the inode times
3420+ * can be changed by anyone without much protection, including
3421+ * asynchronously. This can sometimes cause unionfs to find that the lower
3422+ * file system doesn't change its inode times quick enough, resulting in a
3423+ * false positive indication (which is harmless, it just makes unionfs do
3424+ * extra work in re-validating the objects). To minimize the chances of
3425+ * these situations, we still consider such small time changes valid, but we
3426+ * don't print debugging messages unless the time changes are greater than
3427+ * UNIONFS_MIN_CC_TIME (which defaults to 3 seconds, as with NFS's acregmin)
3428+ * because significant changes are more likely due to users manually
3429+ * touching lower files.
3430+ */
3431+bool is_newer_lower(const struct dentry *dentry)
3432+{
3433+ int bindex;
3434+ struct inode *inode;
3435+ struct inode *lower_inode;
3436+
3437+ /* ignore if we're called on semi-initialized dentries/inodes */
3438+ if (!dentry || !UNIONFS_D(dentry))
3439+ return false;
3440+ inode = dentry->d_inode;
3441+ if (!inode || !UNIONFS_I(inode)->lower_inodes ||
3442+ ibstart(inode) < 0 || ibend(inode) < 0)
3443+ return false;
3444+
3445+ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3446+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
3447+ if (!lower_inode)
3448+ continue;
3449+
3450+ /* check if mtime/ctime have changed */
3451+ if (unlikely(timespec_compare(&inode->i_mtime,
3452+ &lower_inode->i_mtime) < 0)) {
3453+ if ((lower_inode->i_mtime.tv_sec -
3454+ inode->i_mtime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3455+ pr_info("unionfs: new lower inode mtime "
3456+ "(bindex=%d, name=%s)\n", bindex,
3457+ dentry->d_name.name);
3458+ show_dinode_times(dentry);
3459+ }
3460+ return true;
3461+ }
3462+ if (unlikely(timespec_compare(&inode->i_ctime,
3463+ &lower_inode->i_ctime) < 0)) {
3464+ if ((lower_inode->i_ctime.tv_sec -
3465+ inode->i_ctime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3466+ pr_info("unionfs: new lower inode ctime "
3467+ "(bindex=%d, name=%s)\n", bindex,
3468+ dentry->d_name.name);
3469+ show_dinode_times(dentry);
3470+ }
3471+ return true;
3472+ }
3473+ }
3474+
3475+ /*
3476+ * Last check: if this is a positive dentry, but somehow all lower
3477+ * dentries are negative or unhashed, then this dentry needs to be
3478+ * revalidated, because someone probably deleted the objects from
3479+ * the lower branches directly.
3480+ */
3481+ if (is_negative_lower(dentry))
3482+ return true;
3483+
3484+ return false; /* default: lower is not newer */
3485+}
3486+
3487+static int unionfs_d_revalidate(struct dentry *dentry,
6b53c3da 3488+ struct nameidata *nd)
2380c486
JR
3489+{
3490+ bool valid = true;
3491+ int err = 1; /* 1 means valid for the VFS */
3492+ struct dentry *parent;
3493+
6b53c3da
AM
3494+ if (nd && nd->flags & LOOKUP_RCU)
3495+ return -ECHILD;
3496+
2380c486
JR
3497+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3498+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3499+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3500+
3501+ valid = __unionfs_d_revalidate(dentry, parent, false);
3502+ if (valid) {
3503+ unionfs_postcopyup_setmnt(dentry);
3504+ unionfs_check_dentry(dentry);
3505+ } else {
3506+ d_drop(dentry);
3507+ err = valid;
3508+ }
3509+ unionfs_unlock_dentry(dentry);
3510+ unionfs_unlock_parent(dentry, parent);
3511+ unionfs_read_unlock(dentry->d_sb);
3512+
3513+ return err;
3514+}
3515+
3516+static void unionfs_d_release(struct dentry *dentry)
3517+{
3518+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3519+ if (unlikely(!UNIONFS_D(dentry)))
3520+ goto out; /* skip if no lower branches */
3521+ /* must lock our branch configuration here */
3522+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3523+
3524+ unionfs_check_dentry(dentry);
3525+ /* this could be a negative dentry, so check first */
3526+ if (dbstart(dentry) < 0) {
3527+ unionfs_unlock_dentry(dentry);
3528+ goto out; /* due to a (normal) failed lookup */
3529+ }
3530+
3531+ /* Release all the lower dentries */
3532+ path_put_lowers_all(dentry, true);
3533+
3534+ unionfs_unlock_dentry(dentry);
3535+
3536+out:
3537+ free_dentry_private_data(dentry);
3538+ unionfs_read_unlock(dentry->d_sb);
3539+ return;
3540+}
3541+
3542+/*
3543+ * Called when we're removing the last reference to our dentry. So we
3544+ * should drop all lower references too.
3545+ */
3546+static void unionfs_d_iput(struct dentry *dentry, struct inode *inode)
3547+{
3548+ int rc;
3549+
3550+ BUG_ON(!dentry);
3551+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3552+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3553+
3554+ if (!UNIONFS_D(dentry) || dbstart(dentry) < 0)
3555+ goto drop_lower_inodes;
3556+ path_put_lowers_all(dentry, false);
3557+
3558+drop_lower_inodes:
3559+ rc = atomic_read(&inode->i_count);
3560+ if (rc == 1 && inode->i_nlink == 1 && ibstart(inode) >= 0) {
3561+ /* see Documentation/filesystems/unionfs/issues.txt */
3562+ lockdep_off();
3563+ iput(unionfs_lower_inode(inode));
3564+ lockdep_on();
3565+ unionfs_set_lower_inode(inode, NULL);
3566+ /* XXX: may need to set start/end to -1? */
3567+ }
3568+
3569+ iput(inode);
3570+
3571+ unionfs_unlock_dentry(dentry);
3572+ unionfs_read_unlock(dentry->d_sb);
3573+}
3574+
3575+struct dentry_operations unionfs_dops = {
3576+ .d_revalidate = unionfs_d_revalidate,
3577+ .d_release = unionfs_d_release,
3578+ .d_iput = unionfs_d_iput,
3579+};
0c5527e5
AM
3580diff --git a/fs/unionfs/dirfops.c b/fs/unionfs/dirfops.c
3581new file mode 100644
63b09289 3582index 0000000..72a9c1a
0c5527e5
AM
3583--- /dev/null
3584+++ b/fs/unionfs/dirfops.c
2380c486
JR
3585@@ -0,0 +1,302 @@
3586+/*
63b09289 3587+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
3588+ * Copyright (c) 2003-2006 Charles P. Wright
3589+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3590+ * Copyright (c) 2005-2006 Junjiro Okajima
3591+ * Copyright (c) 2005 Arun M. Krishnakumar
3592+ * Copyright (c) 2004-2006 David P. Quigley
3593+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3594+ * Copyright (c) 2003 Puja Gupta
3595+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
3596+ * Copyright (c) 2003-2011 Stony Brook University
3597+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
3598+ *
3599+ * This program is free software; you can redistribute it and/or modify
3600+ * it under the terms of the GNU General Public License version 2 as
3601+ * published by the Free Software Foundation.
3602+ */
3603+
3604+#include "union.h"
3605+
3606+/* Make sure our rdstate is playing by the rules. */
3607+static void verify_rdstate_offset(struct unionfs_dir_state *rdstate)
3608+{
3609+ BUG_ON(rdstate->offset >= DIREOF);
3610+ BUG_ON(rdstate->cookie >= MAXRDCOOKIE);
3611+}
3612+
3613+struct unionfs_getdents_callback {
3614+ struct unionfs_dir_state *rdstate;
3615+ void *dirent;
3616+ int entries_written;
3617+ int filldir_called;
3618+ int filldir_error;
3619+ filldir_t filldir;
3620+ struct super_block *sb;
3621+};
3622+
3623+/* based on generic filldir in fs/readir.c */
3624+static int unionfs_filldir(void *dirent, const char *oname, int namelen,
3625+ loff_t offset, u64 ino, unsigned int d_type)
3626+{
3627+ struct unionfs_getdents_callback *buf = dirent;
3628+ struct filldir_node *found = NULL;
3629+ int err = 0;
3630+ int is_whiteout;
3631+ char *name = (char *) oname;
3632+
3633+ buf->filldir_called++;
3634+
3635+ is_whiteout = is_whiteout_name(&name, &namelen);
3636+
3637+ found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout);
3638+
3639+ if (found) {
3640+ /*
3641+ * If we had non-whiteout entry in dir cache, then mark it
3642+ * as a whiteout and but leave it in the dir cache.
3643+ */
3644+ if (is_whiteout && !found->whiteout)
3645+ found->whiteout = is_whiteout;
3646+ goto out;
3647+ }
3648+
3649+ /* if 'name' isn't a whiteout, filldir it. */
3650+ if (!is_whiteout) {
3651+ off_t pos = rdstate2offset(buf->rdstate);
3652+ u64 unionfs_ino = ino;
3653+
3654+ err = buf->filldir(buf->dirent, name, namelen, pos,
3655+ unionfs_ino, d_type);
3656+ buf->rdstate->offset++;
3657+ verify_rdstate_offset(buf->rdstate);
3658+ }
3659+ /*
3660+ * If we did fill it, stuff it in our hash, otherwise return an
3661+ * error.
3662+ */
3663+ if (err) {
3664+ buf->filldir_error = err;
3665+ goto out;
3666+ }
3667+ buf->entries_written++;
3668+ err = add_filldir_node(buf->rdstate, name, namelen,
3669+ buf->rdstate->bindex, is_whiteout);
3670+ if (err)
3671+ buf->filldir_error = err;
3672+
3673+out:
3674+ return err;
3675+}
3676+
3677+static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir)
3678+{
3679+ int err = 0;
3680+ struct file *lower_file = NULL;
3681+ struct dentry *dentry = file->f_path.dentry;
3682+ struct dentry *parent;
3683+ struct inode *inode = NULL;
3684+ struct unionfs_getdents_callback buf;
3685+ struct unionfs_dir_state *uds;
3686+ int bend;
3687+ loff_t offset;
3688+
3689+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3690+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3691+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3692+
3693+ err = unionfs_file_revalidate(file, parent, false);
3694+ if (unlikely(err))
3695+ goto out;
3696+
3697+ inode = dentry->d_inode;
3698+
3699+ uds = UNIONFS_F(file)->rdstate;
3700+ if (!uds) {
3701+ if (file->f_pos == DIREOF) {
3702+ goto out;
3703+ } else if (file->f_pos > 0) {
3704+ uds = find_rdstate(inode, file->f_pos);
3705+ if (unlikely(!uds)) {
3706+ err = -ESTALE;
3707+ goto out;
3708+ }
3709+ UNIONFS_F(file)->rdstate = uds;
3710+ } else {
3711+ init_rdstate(file);
3712+ uds = UNIONFS_F(file)->rdstate;
3713+ }
3714+ }
3715+ bend = fbend(file);
3716+
3717+ while (uds->bindex <= bend) {
3718+ lower_file = unionfs_lower_file_idx(file, uds->bindex);
3719+ if (!lower_file) {
3720+ uds->bindex++;
3721+ uds->dirpos = 0;
3722+ continue;
3723+ }
3724+
3725+ /* prepare callback buffer */
3726+ buf.filldir_called = 0;
3727+ buf.filldir_error = 0;
3728+ buf.entries_written = 0;
3729+ buf.dirent = dirent;
3730+ buf.filldir = filldir;
3731+ buf.rdstate = uds;
3732+ buf.sb = inode->i_sb;
3733+
3734+ /* Read starting from where we last left off. */
3735+ offset = vfs_llseek(lower_file, uds->dirpos, SEEK_SET);
3736+ if (offset < 0) {
3737+ err = offset;
3738+ goto out;
3739+ }
3740+ err = vfs_readdir(lower_file, unionfs_filldir, &buf);
3741+
3742+ /* Save the position for when we continue. */
3743+ offset = vfs_llseek(lower_file, 0, SEEK_CUR);
3744+ if (offset < 0) {
3745+ err = offset;
3746+ goto out;
3747+ }
3748+ uds->dirpos = offset;
3749+
3750+ /* Copy the atime. */
3751+ fsstack_copy_attr_atime(inode,
3752+ lower_file->f_path.dentry->d_inode);
3753+
3754+ if (err < 0)
3755+ goto out;
3756+
3757+ if (buf.filldir_error)
3758+ break;
3759+
3760+ if (!buf.entries_written) {
3761+ uds->bindex++;
3762+ uds->dirpos = 0;
3763+ }
3764+ }
3765+
3766+ if (!buf.filldir_error && uds->bindex >= bend) {
3767+ /* Save the number of hash entries for next time. */
3768+ UNIONFS_I(inode)->hashsize = uds->hashentries;
3769+ free_rdstate(uds);
3770+ UNIONFS_F(file)->rdstate = NULL;
3771+ file->f_pos = DIREOF;
3772+ } else {
3773+ file->f_pos = rdstate2offset(uds);
3774+ }
3775+
3776+out:
3777+ if (!err)
3778+ unionfs_check_file(file);
3779+ unionfs_unlock_dentry(dentry);
3780+ unionfs_unlock_parent(dentry, parent);
3781+ unionfs_read_unlock(dentry->d_sb);
3782+ return err;
3783+}
3784+
3785+/*
3786+ * This is not meant to be a generic repositioning function. If you do
3787+ * things that aren't supported, then we return EINVAL.
3788+ *
3789+ * What is allowed:
3790+ * (1) seeking to the same position that you are currently at
3791+ * This really has no effect, but returns where you are.
3792+ * (2) seeking to the beginning of the file
3793+ * This throws out all state, and lets you begin again.
3794+ */
3795+static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin)
3796+{
3797+ struct unionfs_dir_state *rdstate;
3798+ struct dentry *dentry = file->f_path.dentry;
3799+ struct dentry *parent;
3800+ loff_t err;
3801+
3802+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3803+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
3804+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3805+
3806+ err = unionfs_file_revalidate(file, parent, false);
3807+ if (unlikely(err))
3808+ goto out;
3809+
3810+ rdstate = UNIONFS_F(file)->rdstate;
3811+
3812+ /*
3813+ * we let users seek to their current position, but not anywhere
3814+ * else.
3815+ */
3816+ if (!offset) {
3817+ switch (origin) {
3818+ case SEEK_SET:
3819+ if (rdstate) {
3820+ free_rdstate(rdstate);
3821+ UNIONFS_F(file)->rdstate = NULL;
3822+ }
3823+ init_rdstate(file);
3824+ err = 0;
3825+ break;
3826+ case SEEK_CUR:
3827+ err = file->f_pos;
3828+ break;
3829+ case SEEK_END:
3830+ /* Unsupported, because we would break everything. */
3831+ err = -EINVAL;
3832+ break;
3833+ }
3834+ } else {
3835+ switch (origin) {
3836+ case SEEK_SET:
3837+ if (rdstate) {
3838+ if (offset == rdstate2offset(rdstate))
3839+ err = offset;
3840+ else if (file->f_pos == DIREOF)
3841+ err = DIREOF;
3842+ else
3843+ err = -EINVAL;
3844+ } else {
3845+ struct inode *inode;
3846+ inode = dentry->d_inode;
3847+ rdstate = find_rdstate(inode, offset);
3848+ if (rdstate) {
3849+ UNIONFS_F(file)->rdstate = rdstate;
3850+ err = rdstate->offset;
3851+ } else {
3852+ err = -EINVAL;
3853+ }
3854+ }
3855+ break;
3856+ case SEEK_CUR:
3857+ case SEEK_END:
3858+ /* Unsupported, because we would break everything. */
3859+ err = -EINVAL;
3860+ break;
3861+ }
3862+ }
3863+
3864+out:
3865+ if (!err)
3866+ unionfs_check_file(file);
3867+ unionfs_unlock_dentry(dentry);
3868+ unionfs_unlock_parent(dentry, parent);
3869+ unionfs_read_unlock(dentry->d_sb);
3870+ return err;
3871+}
3872+
3873+/*
3874+ * Trimmed directory options, we shouldn't pass everything down since
3875+ * we don't want to operate on partial directories.
3876+ */
3877+struct file_operations unionfs_dir_fops = {
3878+ .llseek = unionfs_dir_llseek,
3879+ .read = generic_read_dir,
3880+ .readdir = unionfs_readdir,
3881+ .unlocked_ioctl = unionfs_ioctl,
3882+ .open = unionfs_open,
3883+ .release = unionfs_file_release,
3884+ .flush = unionfs_flush,
3885+ .fsync = unionfs_fsync,
3886+ .fasync = unionfs_fasync,
3887+};
0c5527e5
AM
3888diff --git a/fs/unionfs/dirhelper.c b/fs/unionfs/dirhelper.c
3889new file mode 100644
63b09289 3890index 0000000..62ec9af
0c5527e5
AM
3891--- /dev/null
3892+++ b/fs/unionfs/dirhelper.c
2380c486
JR
3893@@ -0,0 +1,158 @@
3894+/*
63b09289 3895+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
3896+ * Copyright (c) 2003-2006 Charles P. Wright
3897+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3898+ * Copyright (c) 2005-2006 Junjiro Okajima
3899+ * Copyright (c) 2005 Arun M. Krishnakumar
3900+ * Copyright (c) 2004-2006 David P. Quigley
3901+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3902+ * Copyright (c) 2003 Puja Gupta
3903+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
3904+ * Copyright (c) 2003-2011 Stony Brook University
3905+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
3906+ *
3907+ * This program is free software; you can redistribute it and/or modify
3908+ * it under the terms of the GNU General Public License version 2 as
3909+ * published by the Free Software Foundation.
3910+ */
3911+
3912+#include "union.h"
3913+
3914+#define RD_NONE 0
3915+#define RD_CHECK_EMPTY 1
3916+/* The callback structure for check_empty. */
3917+struct unionfs_rdutil_callback {
3918+ int err;
3919+ int filldir_called;
3920+ struct unionfs_dir_state *rdstate;
3921+ int mode;
3922+};
3923+
3924+/* This filldir function makes sure only whiteouts exist within a directory. */
3925+static int readdir_util_callback(void *dirent, const char *oname, int namelen,
3926+ loff_t offset, u64 ino, unsigned int d_type)
3927+{
3928+ int err = 0;
3929+ struct unionfs_rdutil_callback *buf = dirent;
3930+ int is_whiteout;
3931+ struct filldir_node *found;
3932+ char *name = (char *) oname;
3933+
3934+ buf->filldir_called = 1;
3935+
3936+ if (name[0] == '.' && (namelen == 1 ||
3937+ (name[1] == '.' && namelen == 2)))
3938+ goto out;
3939+
3940+ is_whiteout = is_whiteout_name(&name, &namelen);
3941+
3942+ found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout);
3943+ /* If it was found in the table there was a previous whiteout. */
3944+ if (found)
3945+ goto out;
3946+
3947+ /*
3948+ * if it wasn't found and isn't a whiteout, the directory isn't
3949+ * empty.
3950+ */
3951+ err = -ENOTEMPTY;
3952+ if ((buf->mode == RD_CHECK_EMPTY) && !is_whiteout)
3953+ goto out;
3954+
3955+ err = add_filldir_node(buf->rdstate, name, namelen,
3956+ buf->rdstate->bindex, is_whiteout);
3957+
3958+out:
3959+ buf->err = err;
3960+ return err;
3961+}
3962+
3963+/* Is a directory logically empty? */
3964+int check_empty(struct dentry *dentry, struct dentry *parent,
3965+ struct unionfs_dir_state **namelist)
3966+{
3967+ int err = 0;
3968+ struct dentry *lower_dentry = NULL;
3969+ struct vfsmount *mnt;
3970+ struct super_block *sb;
3971+ struct file *lower_file;
3972+ struct unionfs_rdutil_callback *buf = NULL;
3973+ int bindex, bstart, bend, bopaque;
3974+
3975+ sb = dentry->d_sb;
3976+
3977+
3978+ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
3979+
3980+ err = unionfs_partial_lookup(dentry, parent);
3981+ if (err)
3982+ goto out;
3983+
3984+ bstart = dbstart(dentry);
3985+ bend = dbend(dentry);
3986+ bopaque = dbopaque(dentry);
3987+ if (0 <= bopaque && bopaque < bend)
3988+ bend = bopaque;
3989+
3990+ buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL);
3991+ if (unlikely(!buf)) {
3992+ err = -ENOMEM;
3993+ goto out;
3994+ }
3995+ buf->err = 0;
3996+ buf->mode = RD_CHECK_EMPTY;
3997+ buf->rdstate = alloc_rdstate(dentry->d_inode, bstart);
3998+ if (unlikely(!buf->rdstate)) {
3999+ err = -ENOMEM;
4000+ goto out;
4001+ }
4002+
4003+ /* Process the lower directories with rdutil_callback as a filldir. */
4004+ for (bindex = bstart; bindex <= bend; bindex++) {
4005+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4006+ if (!lower_dentry)
4007+ continue;
4008+ if (!lower_dentry->d_inode)
4009+ continue;
4010+ if (!S_ISDIR(lower_dentry->d_inode->i_mode))
4011+ continue;
4012+
4013+ dget(lower_dentry);
4014+ mnt = unionfs_mntget(dentry, bindex);
4015+ branchget(sb, bindex);
4016+ lower_file = dentry_open(lower_dentry, mnt, O_RDONLY, current_cred());
4017+ if (IS_ERR(lower_file)) {
4018+ err = PTR_ERR(lower_file);
4019+ branchput(sb, bindex);
4020+ goto out;
4021+ }
4022+
4023+ do {
4024+ buf->filldir_called = 0;
4025+ buf->rdstate->bindex = bindex;
4026+ err = vfs_readdir(lower_file,
4027+ readdir_util_callback, buf);
4028+ if (buf->err)
4029+ err = buf->err;
4030+ } while ((err >= 0) && buf->filldir_called);
4031+
4032+ /* fput calls dput for lower_dentry */
4033+ fput(lower_file);
4034+ branchput(sb, bindex);
4035+
4036+ if (err < 0)
4037+ goto out;
4038+ }
4039+
4040+out:
4041+ if (buf) {
4042+ if (namelist && !err)
4043+ *namelist = buf->rdstate;
4044+ else if (buf->rdstate)
4045+ free_rdstate(buf->rdstate);
4046+ kfree(buf);
4047+ }
4048+
4049+
4050+ return err;
4051+}
0c5527e5
AM
4052diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h
4053new file mode 100644
63b09289 4054index 0000000..ae1b86a
0c5527e5
AM
4055--- /dev/null
4056+++ b/fs/unionfs/fanout.h
2380c486
JR
4057@@ -0,0 +1,407 @@
4058+/*
63b09289 4059+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
4060+ * Copyright (c) 2003-2006 Charles P. Wright
4061+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4062+ * Copyright (c) 2005 Arun M. Krishnakumar
4063+ * Copyright (c) 2004-2006 David P. Quigley
4064+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4065+ * Copyright (c) 2003 Puja Gupta
4066+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
4067+ * Copyright (c) 2003-2011 Stony Brook University
4068+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
4069+ *
4070+ * This program is free software; you can redistribute it and/or modify
4071+ * it under the terms of the GNU General Public License version 2 as
4072+ * published by the Free Software Foundation.
4073+ */
4074+
4075+#ifndef _FANOUT_H_
4076+#define _FANOUT_H_
4077+
4078+/*
4079+ * Inode to private data
4080+ *
4081+ * Since we use containers and the struct inode is _inside_ the
4082+ * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL
4083+ * inode pointer), return a valid non-NULL pointer.
4084+ */
4085+static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode)
4086+{
4087+ return container_of(inode, struct unionfs_inode_info, vfs_inode);
4088+}
4089+
4090+#define ibstart(ino) (UNIONFS_I(ino)->bstart)
4091+#define ibend(ino) (UNIONFS_I(ino)->bend)
4092+
4093+/* Dentry to private data */
4094+#define UNIONFS_D(dent) ((struct unionfs_dentry_info *)(dent)->d_fsdata)
4095+#define dbstart(dent) (UNIONFS_D(dent)->bstart)
4096+#define dbend(dent) (UNIONFS_D(dent)->bend)
4097+#define dbopaque(dent) (UNIONFS_D(dent)->bopaque)
4098+
4099+/* Superblock to private data */
4100+#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info)
4101+#define sbstart(sb) 0
4102+#define sbend(sb) (UNIONFS_SB(sb)->bend)
4103+#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1)
4104+#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id)
4105+
4106+/* File to private Data */
4107+#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data))
4108+#define fbstart(file) (UNIONFS_F(file)->bstart)
4109+#define fbend(file) (UNIONFS_F(file)->bend)
4110+
4111+/* macros to manipulate branch IDs in stored in our superblock */
4112+static inline int branch_id(struct super_block *sb, int index)
4113+{
4114+ BUG_ON(!sb || index < 0);
4115+ return UNIONFS_SB(sb)->data[index].branch_id;
4116+}
4117+
4118+static inline void set_branch_id(struct super_block *sb, int index, int val)
4119+{
4120+ BUG_ON(!sb || index < 0);
4121+ UNIONFS_SB(sb)->data[index].branch_id = val;
4122+}
4123+
4124+static inline void new_branch_id(struct super_block *sb, int index)
4125+{
4126+ BUG_ON(!sb || index < 0);
4127+ set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id);
4128+}
4129+
4130+/*
4131+ * Find new index of matching branch with an existing superblock of a known
4132+ * (possibly old) id. This is needed because branches could have been
4133+ * added/deleted causing the branches of any open files to shift.
4134+ *
4135+ * @sb: the new superblock which may have new/different branch IDs
4136+ * @id: the old/existing id we're looking for
4137+ * Returns index of newly found branch (0 or greater), -1 otherwise.
4138+ */
4139+static inline int branch_id_to_idx(struct super_block *sb, int id)
4140+{
4141+ int i;
4142+ for (i = 0; i < sbmax(sb); i++) {
4143+ if (branch_id(sb, i) == id)
4144+ return i;
4145+ }
4146+ /* in the non-ODF code, this should really never happen */
4147+ printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id);
4148+ return -1;
4149+}
4150+
4151+/* File to lower file. */
4152+static inline struct file *unionfs_lower_file(const struct file *f)
4153+{
4154+ BUG_ON(!f);
4155+ return UNIONFS_F(f)->lower_files[fbstart(f)];
4156+}
4157+
4158+static inline struct file *unionfs_lower_file_idx(const struct file *f,
4159+ int index)
4160+{
4161+ BUG_ON(!f || index < 0);
4162+ return UNIONFS_F(f)->lower_files[index];
4163+}
4164+
4165+static inline void unionfs_set_lower_file_idx(struct file *f, int index,
4166+ struct file *val)
4167+{
4168+ BUG_ON(!f || index < 0);
4169+ UNIONFS_F(f)->lower_files[index] = val;
4170+ /* save branch ID (may be redundant?) */
4171+ UNIONFS_F(f)->saved_branch_ids[index] =
4172+ branch_id((f)->f_path.dentry->d_sb, index);
4173+}
4174+
4175+static inline void unionfs_set_lower_file(struct file *f, struct file *val)
4176+{
4177+ BUG_ON(!f);
4178+ unionfs_set_lower_file_idx((f), fbstart(f), (val));
4179+}
4180+
4181+/* Inode to lower inode. */
4182+static inline struct inode *unionfs_lower_inode(const struct inode *i)
4183+{
4184+ BUG_ON(!i);
4185+ return UNIONFS_I(i)->lower_inodes[ibstart(i)];
4186+}
4187+
4188+static inline struct inode *unionfs_lower_inode_idx(const struct inode *i,
4189+ int index)
4190+{
4191+ BUG_ON(!i || index < 0);
4192+ return UNIONFS_I(i)->lower_inodes[index];
4193+}
4194+
4195+static inline void unionfs_set_lower_inode_idx(struct inode *i, int index,
4196+ struct inode *val)
4197+{
4198+ BUG_ON(!i || index < 0);
4199+ UNIONFS_I(i)->lower_inodes[index] = val;
4200+}
4201+
4202+static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val)
4203+{
4204+ BUG_ON(!i);
4205+ UNIONFS_I(i)->lower_inodes[ibstart(i)] = val;
4206+}
4207+
4208+/* Superblock to lower superblock. */
4209+static inline struct super_block *unionfs_lower_super(
4210+ const struct super_block *sb)
4211+{
4212+ BUG_ON(!sb);
4213+ return UNIONFS_SB(sb)->data[sbstart(sb)].sb;
4214+}
4215+
4216+static inline struct super_block *unionfs_lower_super_idx(
4217+ const struct super_block *sb,
4218+ int index)
4219+{
4220+ BUG_ON(!sb || index < 0);
4221+ return UNIONFS_SB(sb)->data[index].sb;
4222+}
4223+
4224+static inline void unionfs_set_lower_super_idx(struct super_block *sb,
4225+ int index,
4226+ struct super_block *val)
4227+{
4228+ BUG_ON(!sb || index < 0);
4229+ UNIONFS_SB(sb)->data[index].sb = val;
4230+}
4231+
4232+static inline void unionfs_set_lower_super(struct super_block *sb,
4233+ struct super_block *val)
4234+{
4235+ BUG_ON(!sb);
4236+ UNIONFS_SB(sb)->data[sbstart(sb)].sb = val;
4237+}
4238+
4239+/* Branch count macros. */
4240+static inline int branch_count(const struct super_block *sb, int index)
4241+{
4242+ BUG_ON(!sb || index < 0);
4243+ return atomic_read(&UNIONFS_SB(sb)->data[index].open_files);
4244+}
4245+
4246+static inline void set_branch_count(struct super_block *sb, int index, int val)
4247+{
4248+ BUG_ON(!sb || index < 0);
4249+ atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val);
4250+}
4251+
4252+static inline void branchget(struct super_block *sb, int index)
4253+{
4254+ BUG_ON(!sb || index < 0);
4255+ atomic_inc(&UNIONFS_SB(sb)->data[index].open_files);
4256+}
4257+
4258+static inline void branchput(struct super_block *sb, int index)
4259+{
4260+ BUG_ON(!sb || index < 0);
4261+ atomic_dec(&UNIONFS_SB(sb)->data[index].open_files);
4262+}
4263+
4264+/* Dentry macros */
4265+static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index,
4266+ struct dentry *val)
4267+{
4268+ BUG_ON(!dent || index < 0);
4269+ UNIONFS_D(dent)->lower_paths[index].dentry = val;
4270+}
4271+
4272+static inline struct dentry *unionfs_lower_dentry_idx(
4273+ const struct dentry *dent,
4274+ int index)
4275+{
4276+ BUG_ON(!dent || index < 0);
4277+ return UNIONFS_D(dent)->lower_paths[index].dentry;
4278+}
4279+
4280+static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent)
4281+{
4282+ BUG_ON(!dent);
4283+ return unionfs_lower_dentry_idx(dent, dbstart(dent));
4284+}
4285+
4286+static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index,
4287+ struct vfsmount *mnt)
4288+{
4289+ BUG_ON(!dent || index < 0);
4290+ UNIONFS_D(dent)->lower_paths[index].mnt = mnt;
4291+}
4292+
4293+static inline struct vfsmount *unionfs_lower_mnt_idx(
4294+ const struct dentry *dent,
4295+ int index)
4296+{
4297+ BUG_ON(!dent || index < 0);
4298+ return UNIONFS_D(dent)->lower_paths[index].mnt;
4299+}
4300+
4301+static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent)
4302+{
4303+ BUG_ON(!dent);
4304+ return unionfs_lower_mnt_idx(dent, dbstart(dent));
4305+}
4306+
4307+/* Macros for locking a dentry. */
4308+enum unionfs_dentry_lock_class {
4309+ UNIONFS_DMUTEX_NORMAL,
4310+ UNIONFS_DMUTEX_ROOT,
4311+ UNIONFS_DMUTEX_PARENT,
4312+ UNIONFS_DMUTEX_CHILD,
4313+ UNIONFS_DMUTEX_WHITEOUT,
4314+ UNIONFS_DMUTEX_REVAL_PARENT, /* for file/dentry revalidate */
4315+ UNIONFS_DMUTEX_REVAL_CHILD, /* for file/dentry revalidate */
4316+};
4317+
4318+static inline void unionfs_lock_dentry(struct dentry *d,
4319+ unsigned int subclass)
4320+{
4321+ BUG_ON(!d);
4322+ mutex_lock_nested(&UNIONFS_D(d)->lock, subclass);
4323+}
4324+
4325+static inline void unionfs_unlock_dentry(struct dentry *d)
4326+{
4327+ BUG_ON(!d);
4328+ mutex_unlock(&UNIONFS_D(d)->lock);
4329+}
4330+
4331+static inline struct dentry *unionfs_lock_parent(struct dentry *d,
4332+ unsigned int subclass)
4333+{
4334+ struct dentry *p;
4335+
4336+ BUG_ON(!d);
4337+ p = dget_parent(d);
4338+ if (p != d)
4339+ mutex_lock_nested(&UNIONFS_D(p)->lock, subclass);
4340+ return p;
4341+}
4342+
4343+static inline void unionfs_unlock_parent(struct dentry *d, struct dentry *p)
4344+{
4345+ BUG_ON(!d);
4346+ BUG_ON(!p);
4347+ if (p != d) {
4348+ BUG_ON(!mutex_is_locked(&UNIONFS_D(p)->lock));
4349+ mutex_unlock(&UNIONFS_D(p)->lock);
4350+ }
4351+ dput(p);
4352+}
4353+
4354+static inline void verify_locked(struct dentry *d)
4355+{
4356+ BUG_ON(!d);
4357+ BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock));
4358+}
4359+
4360+/* macros to put lower objects */
4361+
4362+/*
4363+ * iput lower inodes of an unionfs dentry, from bstart to bend. If
4364+ * @free_lower is true, then also kfree the memory used to hold the lower
4365+ * object pointers.
4366+ */
4367+static inline void iput_lowers(struct inode *inode,
4368+ int bstart, int bend, bool free_lower)
4369+{
4370+ struct inode *lower_inode;
4371+ int bindex;
4372+
4373+ BUG_ON(!inode);
4374+ BUG_ON(!UNIONFS_I(inode));
4375+ BUG_ON(bstart < 0);
4376+
4377+ for (bindex = bstart; bindex <= bend; bindex++) {
4378+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
4379+ if (lower_inode) {
4380+ unionfs_set_lower_inode_idx(inode, bindex, NULL);
4381+ /* see Documentation/filesystems/unionfs/issues.txt */
4382+ lockdep_off();
4383+ iput(lower_inode);
4384+ lockdep_on();
4385+ }
4386+ }
4387+
4388+ if (free_lower) {
4389+ kfree(UNIONFS_I(inode)->lower_inodes);
4390+ UNIONFS_I(inode)->lower_inodes = NULL;
4391+ }
4392+}
4393+
4394+/* iput all lower inodes, and reset start/end branch indices to -1 */
4395+static inline void iput_lowers_all(struct inode *inode, bool free_lower)
4396+{
4397+ int bstart, bend;
4398+
4399+ BUG_ON(!inode);
4400+ BUG_ON(!UNIONFS_I(inode));
4401+ bstart = ibstart(inode);
4402+ bend = ibend(inode);
4403+ BUG_ON(bstart < 0);
4404+
4405+ iput_lowers(inode, bstart, bend, free_lower);
4406+ ibstart(inode) = ibend(inode) = -1;
4407+}
4408+
4409+/*
4410+ * dput/mntput all lower dentries and vfsmounts of an unionfs dentry, from
4411+ * bstart to bend. If @free_lower is true, then also kfree the memory used
4412+ * to hold the lower object pointers.
4413+ *
4414+ * XXX: implement using path_put VFS macros
4415+ */
4416+static inline void path_put_lowers(struct dentry *dentry,
4417+ int bstart, int bend, bool free_lower)
4418+{
4419+ struct dentry *lower_dentry;
4420+ struct vfsmount *lower_mnt;
4421+ int bindex;
4422+
4423+ BUG_ON(!dentry);
4424+ BUG_ON(!UNIONFS_D(dentry));
4425+ BUG_ON(bstart < 0);
4426+
4427+ for (bindex = bstart; bindex <= bend; bindex++) {
4428+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4429+ if (lower_dentry) {
4430+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
4431+ dput(lower_dentry);
4432+ }
4433+ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
4434+ if (lower_mnt) {
4435+ unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
4436+ mntput(lower_mnt);
4437+ }
4438+ }
4439+
4440+ if (free_lower) {
4441+ kfree(UNIONFS_D(dentry)->lower_paths);
4442+ UNIONFS_D(dentry)->lower_paths = NULL;
4443+ }
4444+}
4445+
4446+/*
4447+ * dput/mntput all lower dentries and vfsmounts, and reset start/end branch
4448+ * indices to -1.
4449+ */
4450+static inline void path_put_lowers_all(struct dentry *dentry, bool free_lower)
4451+{
4452+ int bstart, bend;
4453+
4454+ BUG_ON(!dentry);
4455+ BUG_ON(!UNIONFS_D(dentry));
4456+ bstart = dbstart(dentry);
4457+ bend = dbend(dentry);
4458+ BUG_ON(bstart < 0);
4459+
4460+ path_put_lowers(dentry, bstart, bend, free_lower);
4461+ dbstart(dentry) = dbend(dentry) = -1;
4462+}
4463+
4464+#endif /* not _FANOUT_H */
0c5527e5
AM
4465diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c
4466new file mode 100644
6b53c3da 4467index 0000000..f583c8f
0c5527e5
AM
4468--- /dev/null
4469+++ b/fs/unionfs/file.c
6b53c3da 4470@@ -0,0 +1,386 @@
2380c486 4471+/*
63b09289 4472+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
4473+ * Copyright (c) 2003-2006 Charles P. Wright
4474+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4475+ * Copyright (c) 2005-2006 Junjiro Okajima
4476+ * Copyright (c) 2005 Arun M. Krishnakumar
4477+ * Copyright (c) 2004-2006 David P. Quigley
4478+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4479+ * Copyright (c) 2003 Puja Gupta
4480+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
4481+ * Copyright (c) 2003-2011 Stony Brook University
4482+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
4483+ *
4484+ * This program is free software; you can redistribute it and/or modify
4485+ * it under the terms of the GNU General Public License version 2 as
4486+ * published by the Free Software Foundation.
4487+ */
4488+
4489+#include "union.h"
4490+
4491+static ssize_t unionfs_read(struct file *file, char __user *buf,
4492+ size_t count, loff_t *ppos)
4493+{
4494+ int err;
4495+ struct file *lower_file;
4496+ struct dentry *dentry = file->f_path.dentry;
4497+ struct dentry *parent;
4498+
4499+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4500+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4501+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4502+
4503+ err = unionfs_file_revalidate(file, parent, false);
4504+ if (unlikely(err))
4505+ goto out;
4506+
4507+ lower_file = unionfs_lower_file(file);
4508+ err = vfs_read(lower_file, buf, count, ppos);
4509+ /* update our inode atime upon a successful lower read */
4510+ if (err >= 0) {
4511+ fsstack_copy_attr_atime(dentry->d_inode,
4512+ lower_file->f_path.dentry->d_inode);
4513+ unionfs_check_file(file);
4514+ }
4515+
4516+out:
4517+ unionfs_unlock_dentry(dentry);
4518+ unionfs_unlock_parent(dentry, parent);
4519+ unionfs_read_unlock(dentry->d_sb);
4520+ return err;
4521+}
4522+
4523+static ssize_t unionfs_write(struct file *file, const char __user *buf,
4524+ size_t count, loff_t *ppos)
4525+{
4526+ int err = 0;
4527+ struct file *lower_file;
4528+ struct dentry *dentry = file->f_path.dentry;
4529+ struct dentry *parent;
4530+
4531+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4532+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4533+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4534+
4535+ err = unionfs_file_revalidate(file, parent, true);
4536+ if (unlikely(err))
4537+ goto out;
4538+
4539+ lower_file = unionfs_lower_file(file);
4540+ err = vfs_write(lower_file, buf, count, ppos);
4541+ /* update our inode times+sizes upon a successful lower write */
4542+ if (err >= 0) {
4543+ fsstack_copy_inode_size(dentry->d_inode,
4544+ lower_file->f_path.dentry->d_inode);
4545+ fsstack_copy_attr_times(dentry->d_inode,
4546+ lower_file->f_path.dentry->d_inode);
4547+ UNIONFS_F(file)->wrote_to_file = true; /* for delayed copyup */
4548+ unionfs_check_file(file);
4549+ }
4550+
4551+out:
4552+ unionfs_unlock_dentry(dentry);
4553+ unionfs_unlock_parent(dentry, parent);
4554+ unionfs_read_unlock(dentry->d_sb);
4555+ return err;
4556+}
4557+
4558+static int unionfs_file_readdir(struct file *file, void *dirent,
4559+ filldir_t filldir)
4560+{
4561+ return -ENOTDIR;
4562+}
4563+
4564+static int unionfs_mmap(struct file *file, struct vm_area_struct *vma)
4565+{
4566+ int err = 0;
4567+ bool willwrite;
4568+ struct file *lower_file;
4569+ struct dentry *dentry = file->f_path.dentry;
4570+ struct dentry *parent;
7670a7fc 4571+ const struct vm_operations_struct *saved_vm_ops = NULL;
2380c486
JR
4572+
4573+ /*
4574+ * Since mm/memory.c:might_fault() (under PROVE_LOCKING) was
4575+ * modified in 2.6.29-rc1 to call might_lock_read on mmap_sem, this
4576+ * has been causing false positives in file system stacking layers.
4577+ * In particular, our ->mmap is called after sys_mmap2 already holds
4578+ * mmap_sem, then we lock our own mutexes; but earlier, it's
4579+ * possible for lockdep to have locked our mutexes first, and then
4580+ * we call a lower ->readdir which could call might_fault. The
4581+ * different ordering of the locks is what lockdep complains about
4582+ * -- unnecessarily. Therefore, we have no choice but to tell
4583+ * lockdep to temporarily turn off lockdep here. Note: the comments
4584+ * inside might_sleep also suggest that it would have been
4585+ * nicer to only annotate paths that needs that might_lock_read.
4586+ */
4587+ lockdep_off();
4588+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4589+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4590+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4591+
4592+ /* This might be deferred to mmap's writepage */
4593+ willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
4594+ err = unionfs_file_revalidate(file, parent, willwrite);
4595+ if (unlikely(err))
4596+ goto out;
4597+ unionfs_check_file(file);
4598+
4599+ /*
4600+ * File systems which do not implement ->writepage may use
4601+ * generic_file_readonly_mmap as their ->mmap op. If you call
4602+ * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
4603+ * But we cannot call the lower ->mmap op, so we can't tell that
4604+ * writeable mappings won't work. Therefore, our only choice is to
4605+ * check if the lower file system supports the ->writepage, and if
4606+ * not, return EINVAL (the same error that
4607+ * generic_file_readonly_mmap returns in that case).
4608+ */
4609+ lower_file = unionfs_lower_file(file);
4610+ if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
4611+ err = -EINVAL;
4612+ printk(KERN_ERR "unionfs: branch %d file system does not "
4613+ "support writeable mmap\n", fbstart(file));
4614+ goto out;
4615+ }
4616+
4617+ /*
4618+ * find and save lower vm_ops.
4619+ *
4620+ * XXX: the VFS should have a cleaner way of finding the lower vm_ops
4621+ */
4622+ if (!UNIONFS_F(file)->lower_vm_ops) {
4623+ err = lower_file->f_op->mmap(lower_file, vma);
4624+ if (err) {
4625+ printk(KERN_ERR "unionfs: lower mmap failed %d\n", err);
4626+ goto out;
4627+ }
4628+ saved_vm_ops = vma->vm_ops;
4629+ err = do_munmap(current->mm, vma->vm_start,
4630+ vma->vm_end - vma->vm_start);
4631+ if (err) {
4632+ printk(KERN_ERR "unionfs: do_munmap failed %d\n", err);
4633+ goto out;
4634+ }
4635+ }
4636+
4637+ file->f_mapping->a_ops = &unionfs_dummy_aops;
4638+ err = generic_file_mmap(file, vma);
4639+ file->f_mapping->a_ops = &unionfs_aops;
4640+ if (err) {
4641+ printk(KERN_ERR "unionfs: generic_file_mmap failed %d\n", err);
4642+ goto out;
4643+ }
4644+ vma->vm_ops = &unionfs_vm_ops;
4645+ if (!UNIONFS_F(file)->lower_vm_ops)
4646+ UNIONFS_F(file)->lower_vm_ops = saved_vm_ops;
4647+
4648+out:
4649+ if (!err) {
4650+ /* copyup could cause parent dir times to change */
4651+ unionfs_copy_attr_times(parent->d_inode);
4652+ unionfs_check_file(file);
4653+ }
4654+ unionfs_unlock_dentry(dentry);
4655+ unionfs_unlock_parent(dentry, parent);
4656+ unionfs_read_unlock(dentry->d_sb);
4657+ lockdep_on();
4658+ return err;
4659+}
4660+
6b53c3da 4661+int unionfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2380c486
JR
4662+{
4663+ int bindex, bstart, bend;
4664+ struct file *lower_file;
0c5527e5 4665+ struct dentry *dentry = file->f_path.dentry;
2380c486
JR
4666+ struct dentry *lower_dentry;
4667+ struct dentry *parent;
4668+ struct inode *lower_inode, *inode;
4669+ int err = -EINVAL;
4670+
6b53c3da 4671+ lockdep_off();
2380c486
JR
4672+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4673+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4674+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4675+
4676+ err = unionfs_file_revalidate(file, parent, true);
4677+ if (unlikely(err))
4678+ goto out;
4679+ unionfs_check_file(file);
4680+
6b53c3da
AM
4681+ err = generic_file_fsync(file, start, end, datasync);
4682+ if (err)
4683+ goto out;
4684+
2380c486
JR
4685+ bstart = fbstart(file);
4686+ bend = fbend(file);
4687+ if (bstart < 0 || bend < 0)
4688+ goto out;
4689+
4690+ inode = dentry->d_inode;
4691+ if (unlikely(!inode)) {
4692+ printk(KERN_ERR
4693+ "unionfs: null lower inode in unionfs_fsync\n");
4694+ goto out;
4695+ }
4696+ for (bindex = bstart; bindex <= bend; bindex++) {
4697+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
4698+ if (!lower_inode || !lower_inode->i_fop->fsync)
4699+ continue;
4700+ lower_file = unionfs_lower_file_idx(file, bindex);
4701+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6b53c3da 4702+ err = vfs_fsync_range(lower_file, start, end, datasync);
2380c486
JR
4703+ if (!err && bindex == bstart)
4704+ fsstack_copy_attr_times(inode, lower_inode);
2380c486
JR
4705+ if (err)
4706+ goto out;
4707+ }
4708+
4709+out:
4710+ if (!err)
4711+ unionfs_check_file(file);
4712+ unionfs_unlock_dentry(dentry);
4713+ unionfs_unlock_parent(dentry, parent);
4714+ unionfs_read_unlock(dentry->d_sb);
6b53c3da 4715+ lockdep_on();
2380c486
JR
4716+ return err;
4717+}
4718+
4719+int unionfs_fasync(int fd, struct file *file, int flag)
4720+{
4721+ int bindex, bstart, bend;
4722+ struct file *lower_file;
4723+ struct dentry *dentry = file->f_path.dentry;
4724+ struct dentry *parent;
4725+ struct inode *lower_inode, *inode;
4726+ int err = 0;
4727+
4728+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4729+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4730+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4731+
4732+ err = unionfs_file_revalidate(file, parent, true);
4733+ if (unlikely(err))
4734+ goto out;
4735+ unionfs_check_file(file);
4736+
4737+ bstart = fbstart(file);
4738+ bend = fbend(file);
4739+ if (bstart < 0 || bend < 0)
4740+ goto out;
4741+
4742+ inode = dentry->d_inode;
4743+ if (unlikely(!inode)) {
4744+ printk(KERN_ERR
4745+ "unionfs: null lower inode in unionfs_fasync\n");
4746+ goto out;
4747+ }
4748+ for (bindex = bstart; bindex <= bend; bindex++) {
4749+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
4750+ if (!lower_inode || !lower_inode->i_fop->fasync)
4751+ continue;
4752+ lower_file = unionfs_lower_file_idx(file, bindex);
4753+ mutex_lock(&lower_inode->i_mutex);
4754+ err = lower_inode->i_fop->fasync(fd, lower_file, flag);
4755+ if (!err && bindex == bstart)
4756+ fsstack_copy_attr_times(inode, lower_inode);
4757+ mutex_unlock(&lower_inode->i_mutex);
4758+ if (err)
4759+ goto out;
4760+ }
4761+
4762+out:
4763+ if (!err)
4764+ unionfs_check_file(file);
4765+ unionfs_unlock_dentry(dentry);
4766+ unionfs_unlock_parent(dentry, parent);
4767+ unionfs_read_unlock(dentry->d_sb);
4768+ return err;
4769+}
4770+
4771+static ssize_t unionfs_splice_read(struct file *file, loff_t *ppos,
4772+ struct pipe_inode_info *pipe, size_t len,
4773+ unsigned int flags)
4774+{
4775+ ssize_t err;
4776+ struct file *lower_file;
4777+ struct dentry *dentry = file->f_path.dentry;
4778+ struct dentry *parent;
4779+
4780+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4781+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4782+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4783+
4784+ err = unionfs_file_revalidate(file, parent, false);
4785+ if (unlikely(err))
4786+ goto out;
4787+
4788+ lower_file = unionfs_lower_file(file);
4789+ err = vfs_splice_to(lower_file, ppos, pipe, len, flags);
4790+ /* update our inode atime upon a successful lower splice-read */
4791+ if (err >= 0) {
4792+ fsstack_copy_attr_atime(dentry->d_inode,
4793+ lower_file->f_path.dentry->d_inode);
4794+ unionfs_check_file(file);
4795+ }
4796+
4797+out:
4798+ unionfs_unlock_dentry(dentry);
4799+ unionfs_unlock_parent(dentry, parent);
4800+ unionfs_read_unlock(dentry->d_sb);
4801+ return err;
4802+}
4803+
4804+static ssize_t unionfs_splice_write(struct pipe_inode_info *pipe,
4805+ struct file *file, loff_t *ppos,
4806+ size_t len, unsigned int flags)
4807+{
4808+ ssize_t err = 0;
4809+ struct file *lower_file;
4810+ struct dentry *dentry = file->f_path.dentry;
4811+ struct dentry *parent;
4812+
4813+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4814+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4815+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4816+
4817+ err = unionfs_file_revalidate(file, parent, true);
4818+ if (unlikely(err))
4819+ goto out;
4820+
4821+ lower_file = unionfs_lower_file(file);
4822+ err = vfs_splice_from(pipe, lower_file, ppos, len, flags);
4823+ /* update our inode times+sizes upon a successful lower write */
4824+ if (err >= 0) {
4825+ fsstack_copy_inode_size(dentry->d_inode,
4826+ lower_file->f_path.dentry->d_inode);
4827+ fsstack_copy_attr_times(dentry->d_inode,
4828+ lower_file->f_path.dentry->d_inode);
4829+ unionfs_check_file(file);
4830+ }
4831+
4832+out:
4833+ unionfs_unlock_dentry(dentry);
4834+ unionfs_unlock_parent(dentry, parent);
4835+ unionfs_read_unlock(dentry->d_sb);
4836+ return err;
4837+}
4838+
4839+struct file_operations unionfs_main_fops = {
4840+ .llseek = generic_file_llseek,
4841+ .read = unionfs_read,
4842+ .write = unionfs_write,
4843+ .readdir = unionfs_file_readdir,
4844+ .unlocked_ioctl = unionfs_ioctl,
0c5527e5
AM
4845+#ifdef CONFIG_COMPAT
4846+ .compat_ioctl = unionfs_ioctl,
4847+#endif
2380c486
JR
4848+ .mmap = unionfs_mmap,
4849+ .open = unionfs_open,
4850+ .flush = unionfs_flush,
4851+ .release = unionfs_file_release,
4852+ .fsync = unionfs_fsync,
4853+ .fasync = unionfs_fasync,
4854+ .splice_read = unionfs_splice_read,
4855+ .splice_write = unionfs_splice_write,
4856+};
0c5527e5
AM
4857diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
4858new file mode 100644
6b53c3da 4859index 0000000..83cb681
0c5527e5
AM
4860--- /dev/null
4861+++ b/fs/unionfs/inode.c
6b53c3da 4862@@ -0,0 +1,1084 @@
2380c486 4863+/*
63b09289 4864+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
4865+ * Copyright (c) 2003-2006 Charles P. Wright
4866+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4867+ * Copyright (c) 2005-2006 Junjiro Okajima
4868+ * Copyright (c) 2005 Arun M. Krishnakumar
4869+ * Copyright (c) 2004-2006 David P. Quigley
4870+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4871+ * Copyright (c) 2003 Puja Gupta
4872+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
4873+ * Copyright (c) 2003-2011 Stony Brook University
4874+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
4875+ *
4876+ * This program is free software; you can redistribute it and/or modify
4877+ * it under the terms of the GNU General Public License version 2 as
4878+ * published by the Free Software Foundation.
4879+ */
4880+
4881+#include "union.h"
4882+
4883+/*
4884+ * Find a writeable branch to create new object in. Checks all writeble
4885+ * branches of the parent inode, from istart to iend order; if none are
4886+ * suitable, also tries branch 0 (which may require a copyup).
4887+ *
4888+ * Return a lower_dentry we can use to create object in, or ERR_PTR.
4889+ */
4890+static struct dentry *find_writeable_branch(struct inode *parent,
4891+ struct dentry *dentry)
4892+{
4893+ int err = -EINVAL;
4894+ int bindex, istart, iend;
4895+ struct dentry *lower_dentry = NULL;
4896+
4897+ istart = ibstart(parent);
4898+ iend = ibend(parent);
4899+ if (istart < 0)
4900+ goto out;
4901+
4902+begin:
4903+ for (bindex = istart; bindex <= iend; bindex++) {
4904+ /* skip non-writeable branches */
4905+ err = is_robranch_super(dentry->d_sb, bindex);
4906+ if (err) {
4907+ err = -EROFS;
4908+ continue;
4909+ }
4910+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4911+ if (!lower_dentry)
4912+ continue;
4913+ /*
4914+ * check for whiteouts in writeable branch, and remove them
4915+ * if necessary.
4916+ */
4917+ err = check_unlink_whiteout(dentry, lower_dentry, bindex);
4918+ if (err > 0) /* ignore if whiteout found and removed */
4919+ err = 0;
4920+ if (err)
4921+ continue;
4922+ /* if get here, we can write to the branch */
4923+ break;
4924+ }
4925+ /*
4926+ * If istart wasn't already branch 0, and we got any error, then try
4927+ * branch 0 (which may require copyup)
4928+ */
4929+ if (err && istart > 0) {
4930+ istart = iend = 0;
4931+ goto begin;
4932+ }
4933+
4934+ /*
4935+ * If we tried even branch 0, and still got an error, abort. But if
4936+ * the error was an EROFS, then we should try to copyup.
4937+ */
4938+ if (err && err != -EROFS)
4939+ goto out;
4940+
4941+ /*
4942+ * If we get here, then check if copyup needed. If lower_dentry is
4943+ * NULL, create the entire dentry directory structure in branch 0.
4944+ */
4945+ if (!lower_dentry) {
4946+ bindex = 0;
4947+ lower_dentry = create_parents(parent, dentry,
4948+ dentry->d_name.name, bindex);
4949+ if (IS_ERR(lower_dentry)) {
4950+ err = PTR_ERR(lower_dentry);
4951+ goto out;
4952+ }
4953+ }
4954+ err = 0; /* all's well */
4955+out:
4956+ if (err)
4957+ return ERR_PTR(err);
4958+ return lower_dentry;
4959+}
4960+
4961+static int unionfs_create(struct inode *dir, struct dentry *dentry,
4962+ int mode, struct nameidata *nd_unused)
4963+{
4964+ int err = 0;
4965+ struct dentry *lower_dentry = NULL;
4966+ struct dentry *lower_parent_dentry = NULL;
4967+ struct dentry *parent;
4968+ int valid = 0;
4969+ struct nameidata lower_nd;
4970+
4971+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
4972+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
4973+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4974+
4975+ valid = __unionfs_d_revalidate(dentry, parent, false);
4976+ if (unlikely(!valid)) {
4977+ err = -ESTALE; /* same as what real_lookup does */
4978+ goto out;
4979+ }
4980+
4981+ lower_dentry = find_writeable_branch(dir, dentry);
4982+ if (IS_ERR(lower_dentry)) {
4983+ err = PTR_ERR(lower_dentry);
4984+ goto out;
4985+ }
4986+
4987+ lower_parent_dentry = lock_parent(lower_dentry);
4988+ if (IS_ERR(lower_parent_dentry)) {
4989+ err = PTR_ERR(lower_parent_dentry);
7670a7fc 4990+ goto out_unlock;
2380c486
JR
4991+ }
4992+
4993+ err = init_lower_nd(&lower_nd, LOOKUP_CREATE);
4994+ if (unlikely(err < 0))
7670a7fc 4995+ goto out_unlock;
2380c486
JR
4996+ err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode,
4997+ &lower_nd);
4998+ release_lower_nd(&lower_nd, err);
4999+
5000+ if (!err) {
5001+ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5002+ if (!err) {
5003+ unionfs_copy_attr_times(dir);
5004+ fsstack_copy_inode_size(dir,
5005+ lower_parent_dentry->d_inode);
5006+ /* update no. of links on parent directory */
f4ea99f3 5007+ set_nlink(dir, unionfs_get_nlinks(dir));
2380c486
JR
5008+ }
5009+ }
5010+
7670a7fc 5011+out_unlock:
2380c486 5012+ unlock_dir(lower_parent_dentry);
2380c486
JR
5013+out:
5014+ if (!err) {
5015+ unionfs_postcopyup_setmnt(dentry);
5016+ unionfs_check_inode(dir);
5017+ unionfs_check_dentry(dentry);
5018+ }
5019+ unionfs_unlock_dentry(dentry);
5020+ unionfs_unlock_parent(dentry, parent);
5021+ unionfs_read_unlock(dentry->d_sb);
5022+ return err;
5023+}
5024+
5025+/*
5026+ * unionfs_lookup is the only special function which takes a dentry, yet we
5027+ * do NOT want to call __unionfs_d_revalidate_chain because by definition,
5028+ * we don't have a valid dentry here yet.
5029+ */
5030+static struct dentry *unionfs_lookup(struct inode *dir,
5031+ struct dentry *dentry,
5032+ struct nameidata *nd_unused)
5033+{
5034+ struct dentry *ret, *parent;
5035+ int err = 0;
5036+
5037+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5038+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5039+
5040+ /*
5041+ * As long as we lock/dget the parent, then can skip validating the
5042+ * parent now; we may have to rebuild this dentry on the next
5043+ * ->d_revalidate, however.
5044+ */
5045+
5046+ /* allocate dentry private data. We free it in ->d_release */
5047+ err = new_dentry_private_data(dentry, UNIONFS_DMUTEX_CHILD);
5048+ if (unlikely(err)) {
5049+ ret = ERR_PTR(err);
5050+ goto out;
5051+ }
5052+
5053+ ret = unionfs_lookup_full(dentry, parent, INTERPOSE_LOOKUP);
5054+
5055+ if (!IS_ERR(ret)) {
5056+ if (ret)
5057+ dentry = ret;
5058+ /* lookup_full can return multiple positive dentries */
5059+ if (dentry->d_inode && !S_ISDIR(dentry->d_inode->i_mode)) {
5060+ BUG_ON(dbstart(dentry) < 0);
5061+ unionfs_postcopyup_release(dentry);
5062+ }
5063+ unionfs_copy_attr_times(dentry->d_inode);
5064+ }
5065+
5066+ unionfs_check_inode(dir);
5067+ if (!IS_ERR(ret))
5068+ unionfs_check_dentry(dentry);
5069+ unionfs_check_dentry(parent);
5070+ unionfs_unlock_dentry(dentry); /* locked in new_dentry_private data */
5071+
5072+out:
5073+ unionfs_unlock_parent(dentry, parent);
5074+ unionfs_read_unlock(dentry->d_sb);
5075+
5076+ return ret;
5077+}
5078+
5079+static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
5080+ struct dentry *new_dentry)
5081+{
5082+ int err = 0;
5083+ struct dentry *lower_old_dentry = NULL;
5084+ struct dentry *lower_new_dentry = NULL;
5085+ struct dentry *lower_dir_dentry = NULL;
5086+ struct dentry *old_parent, *new_parent;
5087+ char *name = NULL;
5088+ bool valid;
5089+
5090+ unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5091+ old_parent = dget_parent(old_dentry);
5092+ new_parent = dget_parent(new_dentry);
5093+ unionfs_double_lock_parents(old_parent, new_parent);
5094+ unionfs_double_lock_dentry(old_dentry, new_dentry);
5095+
5096+ valid = __unionfs_d_revalidate(old_dentry, old_parent, false);
5097+ if (unlikely(!valid)) {
5098+ err = -ESTALE;
5099+ goto out;
5100+ }
5101+ if (new_dentry->d_inode) {
5102+ valid = __unionfs_d_revalidate(new_dentry, new_parent, false);
5103+ if (unlikely(!valid)) {
5104+ err = -ESTALE;
5105+ goto out;
5106+ }
5107+ }
5108+
5109+ lower_new_dentry = unionfs_lower_dentry(new_dentry);
5110+
5111+ /* check for a whiteout in new dentry branch, and delete it */
5112+ err = check_unlink_whiteout(new_dentry, lower_new_dentry,
5113+ dbstart(new_dentry));
5114+ if (err > 0) { /* whiteout found and removed successfully */
5115+ lower_dir_dentry = dget_parent(lower_new_dentry);
5116+ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
5117+ dput(lower_dir_dentry);
f4ea99f3 5118+ set_nlink(dir, unionfs_get_nlinks(dir));
2380c486
JR
5119+ err = 0;
5120+ }
5121+ if (err)
5122+ goto out;
5123+
5124+ /* check if parent hierachy is needed, then link in same branch */
5125+ if (dbstart(old_dentry) != dbstart(new_dentry)) {
5126+ lower_new_dentry = create_parents(dir, new_dentry,
5127+ new_dentry->d_name.name,
5128+ dbstart(old_dentry));
5129+ err = PTR_ERR(lower_new_dentry);
5130+ if (IS_COPYUP_ERR(err))
5131+ goto docopyup;
5132+ if (!lower_new_dentry || IS_ERR(lower_new_dentry))
5133+ goto out;
5134+ }
5135+ lower_new_dentry = unionfs_lower_dentry(new_dentry);
5136+ lower_old_dentry = unionfs_lower_dentry(old_dentry);
5137+
5138+ BUG_ON(dbstart(old_dentry) != dbstart(new_dentry));
5139+ lower_dir_dentry = lock_parent(lower_new_dentry);
5140+ err = is_robranch(old_dentry);
5141+ if (!err) {
5142+ /* see Documentation/filesystems/unionfs/issues.txt */
5143+ lockdep_off();
5144+ err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
5145+ lower_new_dentry);
5146+ lockdep_on();
5147+ }
5148+ unlock_dir(lower_dir_dentry);
5149+
5150+docopyup:
5151+ if (IS_COPYUP_ERR(err)) {
5152+ int old_bstart = dbstart(old_dentry);
5153+ int bindex;
5154+
5155+ for (bindex = old_bstart - 1; bindex >= 0; bindex--) {
5156+ err = copyup_dentry(old_parent->d_inode,
5157+ old_dentry, old_bstart,
5158+ bindex, old_dentry->d_name.name,
5159+ old_dentry->d_name.len, NULL,
5160+ i_size_read(old_dentry->d_inode));
5161+ if (err)
5162+ continue;
5163+ lower_new_dentry =
5164+ create_parents(dir, new_dentry,
5165+ new_dentry->d_name.name,
5166+ bindex);
5167+ lower_old_dentry = unionfs_lower_dentry(old_dentry);
5168+ lower_dir_dentry = lock_parent(lower_new_dentry);
5169+ /* see Documentation/filesystems/unionfs/issues.txt */
5170+ lockdep_off();
5171+ /* do vfs_link */
5172+ err = vfs_link(lower_old_dentry,
5173+ lower_dir_dentry->d_inode,
5174+ lower_new_dentry);
5175+ lockdep_on();
5176+ unlock_dir(lower_dir_dentry);
5177+ goto check_link;
5178+ }
5179+ goto out;
5180+ }
5181+
5182+check_link:
5183+ if (err || !lower_new_dentry->d_inode)
5184+ goto out;
5185+
5186+ /* Its a hard link, so use the same inode */
5187+ new_dentry->d_inode = igrab(old_dentry->d_inode);
5188+ d_add(new_dentry, new_dentry->d_inode);
5189+ unionfs_copy_attr_all(dir, lower_new_dentry->d_parent->d_inode);
5190+ fsstack_copy_inode_size(dir, lower_new_dentry->d_parent->d_inode);
5191+
5192+ /* propagate number of hard-links */
f4ea99f3 5193+ set_nlink(old_dentry->d_inode, unionfs_get_nlinks(old_dentry->d_inode));
2380c486
JR
5194+ /* new dentry's ctime may have changed due to hard-link counts */
5195+ unionfs_copy_attr_times(new_dentry->d_inode);
5196+
5197+out:
5198+ if (!new_dentry->d_inode)
5199+ d_drop(new_dentry);
5200+
5201+ kfree(name);
5202+ if (!err)
5203+ unionfs_postcopyup_setmnt(new_dentry);
5204+
5205+ unionfs_check_inode(dir);
5206+ unionfs_check_dentry(new_dentry);
5207+ unionfs_check_dentry(old_dentry);
5208+
5209+ unionfs_double_unlock_dentry(old_dentry, new_dentry);
5210+ unionfs_double_unlock_parents(old_parent, new_parent);
5211+ dput(new_parent);
5212+ dput(old_parent);
5213+ unionfs_read_unlock(old_dentry->d_sb);
5214+
5215+ return err;
5216+}
5217+
5218+static int unionfs_symlink(struct inode *dir, struct dentry *dentry,
5219+ const char *symname)
5220+{
5221+ int err = 0;
5222+ struct dentry *lower_dentry = NULL;
5223+ struct dentry *wh_dentry = NULL;
5224+ struct dentry *lower_parent_dentry = NULL;
5225+ struct dentry *parent;
5226+ char *name = NULL;
5227+ int valid = 0;
5228+ umode_t mode;
5229+
5230+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5231+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5232+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5233+
5234+ valid = __unionfs_d_revalidate(dentry, parent, false);
5235+ if (unlikely(!valid)) {
5236+ err = -ESTALE;
5237+ goto out;
5238+ }
5239+
5240+ /*
5241+ * It's only a bug if this dentry was not negative and couldn't be
5242+ * revalidated (shouldn't happen).
5243+ */
5244+ BUG_ON(!valid && dentry->d_inode);
5245+
5246+ lower_dentry = find_writeable_branch(dir, dentry);
5247+ if (IS_ERR(lower_dentry)) {
5248+ err = PTR_ERR(lower_dentry);
5249+ goto out;
5250+ }
5251+
5252+ lower_parent_dentry = lock_parent(lower_dentry);
5253+ if (IS_ERR(lower_parent_dentry)) {
5254+ err = PTR_ERR(lower_parent_dentry);
7670a7fc 5255+ goto out_unlock;
2380c486
JR
5256+ }
5257+
5258+ mode = S_IALLUGO;
5259+ err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname);
5260+ if (!err) {
5261+ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5262+ if (!err) {
5263+ unionfs_copy_attr_times(dir);
5264+ fsstack_copy_inode_size(dir,
5265+ lower_parent_dentry->d_inode);
5266+ /* update no. of links on parent directory */
f4ea99f3 5267+ set_nlink(dir), unionfs_get_nlinks(dir));
2380c486
JR
5268+ }
5269+ }
5270+
7670a7fc 5271+out_unlock:
2380c486 5272+ unlock_dir(lower_parent_dentry);
2380c486
JR
5273+out:
5274+ dput(wh_dentry);
5275+ kfree(name);
5276+
5277+ if (!err) {
5278+ unionfs_postcopyup_setmnt(dentry);
5279+ unionfs_check_inode(dir);
5280+ unionfs_check_dentry(dentry);
5281+ }
5282+ unionfs_unlock_dentry(dentry);
5283+ unionfs_unlock_parent(dentry, parent);
5284+ unionfs_read_unlock(dentry->d_sb);
5285+ return err;
5286+}
5287+
5288+static int unionfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
5289+{
5290+ int err = 0;
5291+ struct dentry *lower_dentry = NULL;
5292+ struct dentry *lower_parent_dentry = NULL;
5293+ struct dentry *parent;
5294+ int bindex = 0, bstart;
5295+ char *name = NULL;
5296+ int valid;
5297+
5298+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5299+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5300+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5301+
5302+ valid = __unionfs_d_revalidate(dentry, parent, false);
5303+ if (unlikely(!valid)) {
5304+ err = -ESTALE; /* same as what real_lookup does */
5305+ goto out;
5306+ }
5307+
5308+ bstart = dbstart(dentry);
5309+
5310+ lower_dentry = unionfs_lower_dentry(dentry);
5311+
5312+ /* check for a whiteout in new dentry branch, and delete it */
5313+ err = check_unlink_whiteout(dentry, lower_dentry, bstart);
5314+ if (err > 0) /* whiteout found and removed successfully */
5315+ err = 0;
5316+ if (err) {
5317+ /* exit if the error returned was NOT -EROFS */
5318+ if (!IS_COPYUP_ERR(err))
5319+ goto out;
5320+ bstart--;
5321+ }
5322+
5323+ /* check if copyup's needed, and mkdir */
5324+ for (bindex = bstart; bindex >= 0; bindex--) {
5325+ int i;
5326+ int bend = dbend(dentry);
5327+
5328+ if (is_robranch_super(dentry->d_sb, bindex))
5329+ continue;
5330+
5331+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5332+ if (!lower_dentry) {
5333+ lower_dentry = create_parents(dir, dentry,
5334+ dentry->d_name.name,
5335+ bindex);
5336+ if (!lower_dentry || IS_ERR(lower_dentry)) {
5337+ printk(KERN_ERR "unionfs: lower dentry "
5338+ " NULL for bindex = %d\n", bindex);
5339+ continue;
5340+ }
5341+ }
5342+
5343+ lower_parent_dentry = lock_parent(lower_dentry);
5344+
5345+ if (IS_ERR(lower_parent_dentry)) {
5346+ err = PTR_ERR(lower_parent_dentry);
5347+ goto out;
5348+ }
5349+
5350+ err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry,
5351+ mode);
5352+
5353+ unlock_dir(lower_parent_dentry);
5354+
5355+ /* did the mkdir succeed? */
5356+ if (err)
5357+ break;
5358+
5359+ for (i = bindex + 1; i <= bend; i++) {
5360+ /* XXX: use path_put_lowers? */
5361+ if (unionfs_lower_dentry_idx(dentry, i)) {
5362+ dput(unionfs_lower_dentry_idx(dentry, i));
5363+ unionfs_set_lower_dentry_idx(dentry, i, NULL);
5364+ }
5365+ }
5366+ dbend(dentry) = bindex;
5367+
5368+ /*
5369+ * Only INTERPOSE_LOOKUP can return a value other than 0 on
5370+ * err.
5371+ */
5372+ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5373+ if (!err) {
5374+ unionfs_copy_attr_times(dir);
5375+ fsstack_copy_inode_size(dir,
5376+ lower_parent_dentry->d_inode);
5377+
5378+ /* update number of links on parent directory */
f4ea99f3 5379+ set_nlink(dir, unionfs_get_nlinks(dir));
2380c486
JR
5380+ }
5381+
5382+ err = make_dir_opaque(dentry, dbstart(dentry));
5383+ if (err) {
5384+ printk(KERN_ERR "unionfs: mkdir: error creating "
5385+ ".wh.__dir_opaque: %d\n", err);
5386+ goto out;
5387+ }
5388+
5389+ /* we are done! */
5390+ break;
5391+ }
5392+
5393+out:
5394+ if (!dentry->d_inode)
5395+ d_drop(dentry);
5396+
5397+ kfree(name);
5398+
5399+ if (!err) {
5400+ unionfs_copy_attr_times(dentry->d_inode);
5401+ unionfs_postcopyup_setmnt(dentry);
5402+ }
5403+ unionfs_check_inode(dir);
5404+ unionfs_check_dentry(dentry);
5405+ unionfs_unlock_dentry(dentry);
5406+ unionfs_unlock_parent(dentry, parent);
5407+ unionfs_read_unlock(dentry->d_sb);
5408+
5409+ return err;
5410+}
5411+
5412+static int unionfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
5413+ dev_t dev)
5414+{
5415+ int err = 0;
5416+ struct dentry *lower_dentry = NULL;
5417+ struct dentry *wh_dentry = NULL;
5418+ struct dentry *lower_parent_dentry = NULL;
5419+ struct dentry *parent;
5420+ char *name = NULL;
5421+ int valid = 0;
5422+
5423+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5424+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5425+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5426+
5427+ valid = __unionfs_d_revalidate(dentry, parent, false);
5428+ if (unlikely(!valid)) {
5429+ err = -ESTALE;
5430+ goto out;
5431+ }
5432+
5433+ /*
5434+ * It's only a bug if this dentry was not negative and couldn't be
5435+ * revalidated (shouldn't happen).
5436+ */
5437+ BUG_ON(!valid && dentry->d_inode);
5438+
5439+ lower_dentry = find_writeable_branch(dir, dentry);
5440+ if (IS_ERR(lower_dentry)) {
5441+ err = PTR_ERR(lower_dentry);
5442+ goto out;
5443+ }
5444+
5445+ lower_parent_dentry = lock_parent(lower_dentry);
5446+ if (IS_ERR(lower_parent_dentry)) {
5447+ err = PTR_ERR(lower_parent_dentry);
7670a7fc 5448+ goto out_unlock;
2380c486
JR
5449+ }
5450+
5451+ err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev);
5452+ if (!err) {
5453+ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0));
5454+ if (!err) {
5455+ unionfs_copy_attr_times(dir);
5456+ fsstack_copy_inode_size(dir,
5457+ lower_parent_dentry->d_inode);
5458+ /* update no. of links on parent directory */
f4ea99f3 5459+ set_nlink(dir, unionfs_get_nlinks(dir));
2380c486
JR
5460+ }
5461+ }
5462+
7670a7fc 5463+out_unlock:
2380c486 5464+ unlock_dir(lower_parent_dentry);
2380c486
JR
5465+out:
5466+ dput(wh_dentry);
5467+ kfree(name);
5468+
5469+ if (!err) {
5470+ unionfs_postcopyup_setmnt(dentry);
5471+ unionfs_check_inode(dir);
5472+ unionfs_check_dentry(dentry);
5473+ }
5474+ unionfs_unlock_dentry(dentry);
5475+ unionfs_unlock_parent(dentry, parent);
5476+ unionfs_read_unlock(dentry->d_sb);
5477+ return err;
5478+}
5479+
5480+/* requires sb, dentry, and parent to already be locked */
5481+static int __unionfs_readlink(struct dentry *dentry, char __user *buf,
5482+ int bufsiz)
5483+{
5484+ int err;
5485+ struct dentry *lower_dentry;
5486+
5487+ lower_dentry = unionfs_lower_dentry(dentry);
5488+
5489+ if (!lower_dentry->d_inode->i_op ||
5490+ !lower_dentry->d_inode->i_op->readlink) {
5491+ err = -EINVAL;
5492+ goto out;
5493+ }
5494+
5495+ err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
5496+ buf, bufsiz);
5497+ if (err >= 0)
5498+ fsstack_copy_attr_atime(dentry->d_inode,
5499+ lower_dentry->d_inode);
5500+
5501+out:
5502+ return err;
5503+}
5504+
5505+static int unionfs_readlink(struct dentry *dentry, char __user *buf,
5506+ int bufsiz)
5507+{
5508+ int err;
5509+ struct dentry *parent;
5510+
5511+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5512+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5513+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5514+
5515+ if (unlikely(!__unionfs_d_revalidate(dentry, parent, false))) {
5516+ err = -ESTALE;
5517+ goto out;
5518+ }
5519+
5520+ err = __unionfs_readlink(dentry, buf, bufsiz);
5521+
5522+out:
5523+ unionfs_check_dentry(dentry);
5524+ unionfs_unlock_dentry(dentry);
5525+ unionfs_unlock_parent(dentry, parent);
5526+ unionfs_read_unlock(dentry->d_sb);
5527+
5528+ return err;
5529+}
5530+
5531+static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd)
5532+{
5533+ char *buf;
5534+ int len = PAGE_SIZE, err;
5535+ mm_segment_t old_fs;
5536+ struct dentry *parent;
5537+
5538+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5539+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5540+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5541+
5542+ /* This is freed by the put_link method assuming a successful call. */
5543+ buf = kmalloc(len, GFP_KERNEL);
5544+ if (unlikely(!buf)) {
5545+ err = -ENOMEM;
5546+ goto out;
5547+ }
5548+
5549+ /* read the symlink, and then we will follow it */
5550+ old_fs = get_fs();
5551+ set_fs(KERNEL_DS);
5552+ err = __unionfs_readlink(dentry, buf, len);
5553+ set_fs(old_fs);
5554+ if (err < 0) {
5555+ kfree(buf);
5556+ buf = NULL;
5557+ goto out;
5558+ }
5559+ buf[err] = 0;
5560+ nd_set_link(nd, buf);
5561+ err = 0;
5562+
5563+out:
5564+ if (err >= 0) {
5565+ unionfs_check_nd(nd);
5566+ unionfs_check_dentry(dentry);
5567+ }
5568+
5569+ unionfs_unlock_dentry(dentry);
5570+ unionfs_unlock_parent(dentry, parent);
5571+ unionfs_read_unlock(dentry->d_sb);
5572+
5573+ return ERR_PTR(err);
5574+}
5575+
5576+/* this @nd *IS* still used */
5577+static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd,
5578+ void *cookie)
5579+{
5580+ struct dentry *parent;
0c5527e5 5581+ char *buf;
2380c486
JR
5582+
5583+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5584+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5585+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5586+
5587+ if (unlikely(!__unionfs_d_revalidate(dentry, parent, false)))
5588+ printk(KERN_ERR
5589+ "unionfs: put_link failed to revalidate dentry\n");
5590+
5591+ unionfs_check_dentry(dentry);
0c5527e5
AM
5592+#if 0
5593+ /* XXX: can't run this check b/c this fxn can receive a poisoned 'nd' PTR */
2380c486 5594+ unionfs_check_nd(nd);
0c5527e5
AM
5595+#endif
5596+ buf = nd_get_link(nd);
5597+ if (!IS_ERR(buf))
5598+ kfree(buf);
2380c486
JR
5599+ unionfs_unlock_dentry(dentry);
5600+ unionfs_unlock_parent(dentry, parent);
5601+ unionfs_read_unlock(dentry->d_sb);
5602+}
5603+
5604+/*
5605+ * This is a variant of fs/namei.c:permission() or inode_permission() which
5606+ * skips over EROFS tests (because we perform copyup on EROFS).
5607+ */
6b53c3da 5608+static int __inode_permission(struct inode *inode, int mask)
2380c486
JR
5609+{
5610+ int retval;
5611+
5612+ /* nobody gets write access to an immutable file */
5613+ if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
5614+ return -EACCES;
5615+
5616+ /* Ordinary permission routines do not understand MAY_APPEND. */
5617+ if (inode->i_op && inode->i_op->permission) {
6b53c3da 5618+ retval = inode->i_op->permission(inode, mask);
2380c486
JR
5619+ if (!retval) {
5620+ /*
5621+ * Exec permission on a regular file is denied if none
5622+ * of the execute bits are set.
5623+ *
5624+ * This check should be done by the ->permission()
5625+ * method.
5626+ */
5627+ if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) &&
5628+ !(inode->i_mode & S_IXUGO))
5629+ return -EACCES;
5630+ }
5631+ } else {
6b53c3da 5632+ retval = generic_permission(inode, mask);
2380c486
JR
5633+ }
5634+ if (retval)
5635+ return retval;
5636+
5637+ return security_inode_permission(inode,
5638+ mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
5639+}
5640+
5641+/*
5642+ * Don't grab the superblock read-lock in unionfs_permission, which prevents
5643+ * a deadlock with the branch-management "add branch" code (which grabbed
5644+ * the write lock). It is safe to not grab the read lock here, because even
5645+ * with branch management taking place, there is no chance that
5646+ * unionfs_permission, or anything it calls, will use stale branch
5647+ * information.
5648+ */
6b53c3da 5649+static int unionfs_permission(struct inode *inode, int mask)
2380c486
JR
5650+{
5651+ struct inode *lower_inode = NULL;
5652+ int err = 0;
5653+ int bindex, bstart, bend;
63b09289 5654+ int is_file;
2380c486 5655+ const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ);
63b09289 5656+ struct inode *inode_grabbed;
2380c486 5657+
63b09289
JR
5658+ inode_grabbed = igrab(inode);
5659+ is_file = !S_ISDIR(inode->i_mode);
5660+
2380c486
JR
5661+ if (!UNIONFS_I(inode)->lower_inodes) {
5662+ if (is_file) /* dirs can be unlinked but chdir'ed to */
5663+ err = -ESTALE; /* force revalidate */
5664+ goto out;
5665+ }
5666+ bstart = ibstart(inode);
5667+ bend = ibend(inode);
5668+ if (unlikely(bstart < 0 || bend < 0)) {
5669+ /*
5670+ * With branch-management, we can get a stale inode here.
5671+ * If so, we return ESTALE back to link_path_walk, which
5672+ * would discard the dcache entry and re-lookup the
5673+ * dentry+inode. This should be equivalent to issuing
5674+ * __unionfs_d_revalidate_chain on nd.dentry here.
5675+ */
5676+ if (is_file) /* dirs can be unlinked but chdir'ed to */
5677+ err = -ESTALE; /* force revalidate */
5678+ goto out;
5679+ }
5680+
5681+ for (bindex = bstart; bindex <= bend; bindex++) {
5682+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
5683+ if (!lower_inode)
5684+ continue;
5685+
5686+ /*
5687+ * check the condition for D-F-D underlying files/directories,
5688+ * we don't have to check for files, if we are checking for
5689+ * directories.
5690+ */
5691+ if (!is_file && !S_ISDIR(lower_inode->i_mode))
5692+ continue;
5693+
5694+ /*
5695+ * We check basic permissions, but we ignore any conditions
5696+ * such as readonly file systems or branches marked as
5697+ * readonly, because those conditions should lead to a
5698+ * copyup taking place later on. However, if user never had
5699+ * access to the file, then no copyup could ever take place.
5700+ */
6b53c3da 5701+ err = __inode_permission(lower_inode, mask);
2380c486
JR
5702+ if (err && err != -EACCES && err != EPERM && bindex > 0) {
5703+ umode_t mode = lower_inode->i_mode;
5704+ if ((is_robranch_super(inode->i_sb, bindex) ||
4ae1df7a 5705+ __is_rdonly(lower_inode)) &&
2380c486
JR
5706+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
5707+ err = 0;
5708+ if (IS_COPYUP_ERR(err))
5709+ err = 0;
5710+ }
5711+
5712+ /*
4ae1df7a
JR
5713+ * NFS HACK: NFSv2/3 return EACCES on readonly-exported,
5714+ * locally readonly-mounted file systems, instead of EROFS
5715+ * like other file systems do. So we have no choice here
5716+ * but to intercept this and ignore it for NFS branches
5717+ * marked readonly. Specifically, we avoid using NFS's own
5718+ * "broken" ->permission method, and rely on
5719+ * generic_permission() to do basic checking for us.
5720+ */
5721+ if (err && err == -EACCES &&
5722+ is_robranch_super(inode->i_sb, bindex) &&
5723+ lower_inode->i_sb->s_magic == NFS_SUPER_MAGIC)
6b53c3da 5724+ err = generic_permission(lower_inode, mask);
4ae1df7a
JR
5725+
5726+ /*
2380c486
JR
5727+ * The permissions are an intersection of the overall directory
5728+ * permissions, so we fail if one fails.
5729+ */
5730+ if (err)
5731+ goto out;
5732+
5733+ /* only the leftmost file matters. */
5734+ if (is_file || write_mask) {
5735+ if (is_file && write_mask) {
5736+ err = get_write_access(lower_inode);
5737+ if (!err)
5738+ put_write_access(lower_inode);
5739+ }
5740+ break;
5741+ }
5742+ }
5743+ /* sync times which may have changed (asynchronously) below */
5744+ unionfs_copy_attr_times(inode);
5745+
5746+out:
5747+ unionfs_check_inode(inode);
2380c486
JR
5748+ iput(inode_grabbed);
5749+ return err;
5750+}
5751+
5752+static int unionfs_setattr(struct dentry *dentry, struct iattr *ia)
5753+{
5754+ int err = 0;
5755+ struct dentry *lower_dentry;
5756+ struct dentry *parent;
5757+ struct inode *inode;
5758+ struct inode *lower_inode;
5759+ int bstart, bend, bindex;
5760+ loff_t size;
82260373
AM
5761+ struct iattr lower_ia;
5762+
5763+ /* check if user has permission to change inode */
5764+ err = inode_change_ok(dentry->d_inode, ia);
5765+ if (err)
5766+ goto out_err;
2380c486
JR
5767+
5768+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5769+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
5770+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5771+
5772+ if (unlikely(!__unionfs_d_revalidate(dentry, parent, false))) {
5773+ err = -ESTALE;
5774+ goto out;
5775+ }
5776+
5777+ bstart = dbstart(dentry);
5778+ bend = dbend(dentry);
5779+ inode = dentry->d_inode;
5780+
5781+ /*
5782+ * mode change is for clearing setuid/setgid. Allow lower filesystem
5783+ * to reinterpret it in its own way.
5784+ */
5785+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
5786+ ia->ia_valid &= ~ATTR_MODE;
5787+
5788+ lower_dentry = unionfs_lower_dentry(dentry);
5789+ if (!lower_dentry) { /* should never happen after above revalidate */
5790+ err = -EINVAL;
5791+ goto out;
5792+ }
63b09289
JR
5793+
5794+ /*
5795+ * Get the lower inode directly from lower dentry, in case ibstart
5796+ * is -1 (which happens when the file is open but unlinked.
5797+ */
5798+ lower_inode = lower_dentry->d_inode;
2380c486
JR
5799+
5800+ /* check if user has permission to change lower inode */
5801+ err = inode_change_ok(lower_inode, ia);
5802+ if (err)
5803+ goto out;
5804+
5805+ /* copyup if the file is on a read only branch */
5806+ if (is_robranch_super(dentry->d_sb, bstart)
4ae1df7a 5807+ || __is_rdonly(lower_inode)) {
2380c486
JR
5808+ /* check if we have a branch to copy up to */
5809+ if (bstart <= 0) {
5810+ err = -EACCES;
5811+ goto out;
5812+ }
5813+
5814+ if (ia->ia_valid & ATTR_SIZE)
5815+ size = ia->ia_size;
5816+ else
5817+ size = i_size_read(inode);
5818+ /* copyup to next available branch */
5819+ for (bindex = bstart - 1; bindex >= 0; bindex--) {
5820+ err = copyup_dentry(parent->d_inode,
5821+ dentry, bstart, bindex,
5822+ dentry->d_name.name,
5823+ dentry->d_name.len,
5824+ NULL, size);
5825+ if (!err)
5826+ break;
5827+ }
5828+ if (err)
5829+ goto out;
5830+ /* get updated lower_dentry/inode after copyup */
5831+ lower_dentry = unionfs_lower_dentry(dentry);
5832+ lower_inode = unionfs_lower_inode(inode);
63b09289
JR
5833+ /*
5834+ * check for whiteouts in writeable branch, and remove them
5835+ * if necessary.
5836+ */
5837+ if (lower_dentry) {
5838+ err = check_unlink_whiteout(dentry, lower_dentry,
5839+ bindex);
5840+ if (err > 0) /* ignore if whiteout found and removed */
5841+ err = 0;
5842+ }
2380c486
JR
5843+ }
5844+
5845+ /*
5846+ * If shrinking, first truncate upper level to cancel writing dirty
5847+ * pages beyond the new eof; and also if its' maxbytes is more
5848+ * limiting (fail with -EFBIG before making any change to the lower
5849+ * level). There is no need to vmtruncate the upper level
5850+ * afterwards in the other cases: we fsstack_copy_inode_size from
5851+ * the lower level.
5852+ */
5853+ if (ia->ia_valid & ATTR_SIZE) {
5854+ size = i_size_read(inode);
5855+ if (ia->ia_size < size || (ia->ia_size > size &&
5856+ inode->i_sb->s_maxbytes < lower_inode->i_sb->s_maxbytes)) {
5857+ err = vmtruncate(inode, ia->ia_size);
5858+ if (err)
5859+ goto out;
5860+ }
5861+ }
5862+
5863+ /* notify the (possibly copied-up) lower inode */
4ae1df7a
JR
5864+ /*
5865+ * Note: we use lower_dentry->d_inode, because lower_inode may be
5866+ * unlinked (no inode->i_sb and i_ino==0. This happens if someone
5867+ * tries to open(), unlink(), then ftruncate() a file.
5868+ */
82260373
AM
5869+ /* prepare our own lower struct iattr (with our own lower file) */
5870+ memcpy(&lower_ia, ia, sizeof(lower_ia));
5871+ if (ia->ia_valid & ATTR_FILE) {
5872+ lower_ia.ia_file = unionfs_lower_file(ia->ia_file);
5873+ BUG_ON(!lower_ia.ia_file); // XXX?
5874+ }
5875+
4ae1df7a 5876+ mutex_lock(&lower_dentry->d_inode->i_mutex);
82260373 5877+ err = notify_change(lower_dentry, &lower_ia);
4ae1df7a 5878+ mutex_unlock(&lower_dentry->d_inode->i_mutex);
2380c486
JR
5879+ if (err)
5880+ goto out;
5881+
5882+ /* get attributes from the first lower inode */
4ae1df7a
JR
5883+ if (ibstart(inode) >= 0)
5884+ unionfs_copy_attr_all(inode, lower_inode);
2380c486
JR
5885+ /*
5886+ * unionfs_copy_attr_all will copy the lower times to our inode if
5887+ * the lower ones are newer (useful for cache coherency). However,
5888+ * ->setattr is the only place in which we may have to copy the
5889+ * lower inode times absolutely, to support utimes(2).
5890+ */
5891+ if (ia->ia_valid & ATTR_MTIME_SET)
5892+ inode->i_mtime = lower_inode->i_mtime;
5893+ if (ia->ia_valid & ATTR_CTIME)
5894+ inode->i_ctime = lower_inode->i_ctime;
5895+ if (ia->ia_valid & ATTR_ATIME_SET)
5896+ inode->i_atime = lower_inode->i_atime;
5897+ fsstack_copy_inode_size(inode, lower_inode);
5898+
5899+out:
5900+ if (!err)
5901+ unionfs_check_dentry(dentry);
5902+ unionfs_unlock_dentry(dentry);
5903+ unionfs_unlock_parent(dentry, parent);
5904+ unionfs_read_unlock(dentry->d_sb);
82260373 5905+out_err:
2380c486
JR
5906+ return err;
5907+}
5908+
5909+struct inode_operations unionfs_symlink_iops = {
5910+ .readlink = unionfs_readlink,
5911+ .permission = unionfs_permission,
5912+ .follow_link = unionfs_follow_link,
5913+ .setattr = unionfs_setattr,
5914+ .put_link = unionfs_put_link,
5915+};
5916+
5917+struct inode_operations unionfs_dir_iops = {
5918+ .create = unionfs_create,
5919+ .lookup = unionfs_lookup,
5920+ .link = unionfs_link,
5921+ .unlink = unionfs_unlink,
5922+ .symlink = unionfs_symlink,
5923+ .mkdir = unionfs_mkdir,
5924+ .rmdir = unionfs_rmdir,
5925+ .mknod = unionfs_mknod,
5926+ .rename = unionfs_rename,
5927+ .permission = unionfs_permission,
5928+ .setattr = unionfs_setattr,
5929+#ifdef CONFIG_UNION_FS_XATTR
5930+ .setxattr = unionfs_setxattr,
5931+ .getxattr = unionfs_getxattr,
5932+ .removexattr = unionfs_removexattr,
5933+ .listxattr = unionfs_listxattr,
5934+#endif /* CONFIG_UNION_FS_XATTR */
5935+};
5936+
5937+struct inode_operations unionfs_main_iops = {
5938+ .permission = unionfs_permission,
5939+ .setattr = unionfs_setattr,
5940+#ifdef CONFIG_UNION_FS_XATTR
5941+ .setxattr = unionfs_setxattr,
5942+ .getxattr = unionfs_getxattr,
5943+ .removexattr = unionfs_removexattr,
5944+ .listxattr = unionfs_listxattr,
5945+#endif /* CONFIG_UNION_FS_XATTR */
5946+};
0c5527e5
AM
5947diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c
5948new file mode 100644
6b53c3da 5949index 0000000..041d674
0c5527e5
AM
5950--- /dev/null
5951+++ b/fs/unionfs/lookup.c
6b53c3da 5952@@ -0,0 +1,570 @@
2380c486 5953+/*
63b09289 5954+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
5955+ * Copyright (c) 2003-2006 Charles P. Wright
5956+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
5957+ * Copyright (c) 2005-2006 Junjiro Okajima
5958+ * Copyright (c) 2005 Arun M. Krishnakumar
5959+ * Copyright (c) 2004-2006 David P. Quigley
5960+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
5961+ * Copyright (c) 2003 Puja Gupta
5962+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
5963+ * Copyright (c) 2003-2011 Stony Brook University
5964+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
5965+ *
5966+ * This program is free software; you can redistribute it and/or modify
5967+ * it under the terms of the GNU General Public License version 2 as
5968+ * published by the Free Software Foundation.
5969+ */
5970+
5971+#include "union.h"
5972+
5973+/*
5974+ * Lookup one path component @name relative to a <base,mnt> path pair.
5975+ * Behaves nearly the same as lookup_one_len (i.e., return negative dentry
5976+ * on ENOENT), but uses the @mnt passed, so it can cross bind mounts and
5977+ * other lower mounts properly. If @new_mnt is non-null, will fill in the
5978+ * new mnt there. Caller is responsible to dput/mntput/path_put returned
5979+ * @dentry and @new_mnt.
5980+ */
5981+struct dentry *__lookup_one(struct dentry *base, struct vfsmount *mnt,
5982+ const char *name, struct vfsmount **new_mnt)
5983+{
5984+ struct dentry *dentry = NULL;
6b53c3da 5985+ struct path lower_path = {NULL, NULL};
2380c486
JR
5986+ int err;
5987+
5988+ /* we use flags=0 to get basic lookup */
6b53c3da 5989+ err = vfs_path_lookup(base, mnt, name, 0, &lower_path);
2380c486
JR
5990+
5991+ switch (err) {
5992+ case 0: /* no error */
6b53c3da 5993+ dentry = lower_path.dentry;
2380c486 5994+ if (new_mnt)
6b53c3da 5995+ *new_mnt = lower_path.mnt; /* rc already inc'ed */
2380c486
JR
5996+ break;
5997+ case -ENOENT:
5998+ /*
5999+ * We don't consider ENOENT an error, and we want to return
6000+ * a negative dentry (ala lookup_one_len). As we know
6001+ * there was no inode for this name before (-ENOENT), then
6002+ * it's safe to call lookup_one_len (which doesn't take a
6003+ * vfsmount).
6004+ */
4ae1df7a 6005+ dentry = lookup_lck_len(name, base, strlen(name));
2380c486 6006+ if (new_mnt)
6b53c3da 6007+ *new_mnt = mntget(lower_path.mnt);
2380c486
JR
6008+ break;
6009+ default: /* all other real errors */
6010+ dentry = ERR_PTR(err);
6011+ break;
6012+ }
6013+
6014+ return dentry;
6015+}
6016+
6017+/*
6018+ * This is a utility function that fills in a unionfs dentry.
6019+ * Caller must lock this dentry with unionfs_lock_dentry.
6020+ *
6021+ * Returns: 0 (ok), or -ERRNO if an error occurred.
6022+ * XXX: get rid of _partial_lookup and make callers call _lookup_full directly
6023+ */
6024+int unionfs_partial_lookup(struct dentry *dentry, struct dentry *parent)
6025+{
6026+ struct dentry *tmp;
6027+ int err = -ENOSYS;
6028+
6029+ tmp = unionfs_lookup_full(dentry, parent, INTERPOSE_PARTIAL);
6030+
6031+ if (!tmp) {
6032+ err = 0;
6033+ goto out;
6034+ }
6035+ if (IS_ERR(tmp)) {
6036+ err = PTR_ERR(tmp);
6037+ goto out;
6038+ }
6039+ /* XXX: need to change the interface */
6040+ BUG_ON(tmp != dentry);
6041+out:
6042+ return err;
6043+}
6044+
6045+/* The dentry cache is just so we have properly sized dentries. */
6046+static struct kmem_cache *unionfs_dentry_cachep;
6047+int unionfs_init_dentry_cache(void)
6048+{
6049+ unionfs_dentry_cachep =
6050+ kmem_cache_create("unionfs_dentry",
6051+ sizeof(struct unionfs_dentry_info),
6052+ 0, SLAB_RECLAIM_ACCOUNT, NULL);
6053+
6054+ return (unionfs_dentry_cachep ? 0 : -ENOMEM);
6055+}
6056+
6057+void unionfs_destroy_dentry_cache(void)
6058+{
6059+ if (unionfs_dentry_cachep)
6060+ kmem_cache_destroy(unionfs_dentry_cachep);
6061+}
6062+
6063+void free_dentry_private_data(struct dentry *dentry)
6064+{
6065+ if (!dentry || !dentry->d_fsdata)
6066+ return;
6067+ kfree(UNIONFS_D(dentry)->lower_paths);
6068+ UNIONFS_D(dentry)->lower_paths = NULL;
6069+ kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata);
6070+ dentry->d_fsdata = NULL;
6071+}
6072+
6073+static inline int __realloc_dentry_private_data(struct dentry *dentry)
6074+{
6075+ struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6076+ void *p;
6077+ int size;
6078+
6079+ BUG_ON(!info);
6080+
6081+ size = sizeof(struct path) * sbmax(dentry->d_sb);
6082+ p = krealloc(info->lower_paths, size, GFP_ATOMIC);
6083+ if (unlikely(!p))
6084+ return -ENOMEM;
6085+
6086+ info->lower_paths = p;
6087+
6088+ info->bstart = -1;
6089+ info->bend = -1;
6090+ info->bopaque = -1;
6091+ info->bcount = sbmax(dentry->d_sb);
6092+ atomic_set(&info->generation,
6093+ atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
6094+
6095+ memset(info->lower_paths, 0, size);
6096+
6097+ return 0;
6098+}
6099+
6100+/* UNIONFS_D(dentry)->lock must be locked */
6101+int realloc_dentry_private_data(struct dentry *dentry)
6102+{
6103+ if (!__realloc_dentry_private_data(dentry))
6104+ return 0;
6105+
6106+ kfree(UNIONFS_D(dentry)->lower_paths);
6107+ free_dentry_private_data(dentry);
6108+ return -ENOMEM;
6109+}
6110+
6111+/* allocate new dentry private data */
6112+int new_dentry_private_data(struct dentry *dentry, int subclass)
6113+{
6114+ struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6115+
6116+ BUG_ON(info);
6117+
6118+ info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC);
6119+ if (unlikely(!info))
6120+ return -ENOMEM;
6121+
6122+ mutex_init(&info->lock);
6123+ mutex_lock_nested(&info->lock, subclass);
6124+
6125+ info->lower_paths = NULL;
6126+
6127+ dentry->d_fsdata = info;
6128+
6129+ if (!__realloc_dentry_private_data(dentry))
6130+ return 0;
6131+
6132+ mutex_unlock(&info->lock);
6133+ free_dentry_private_data(dentry);
6134+ return -ENOMEM;
6135+}
6136+
6137+/*
6138+ * scan through the lower dentry objects, and set bstart to reflect the
6139+ * starting branch
6140+ */
6141+void update_bstart(struct dentry *dentry)
6142+{
6143+ int bindex;
6144+ int bstart = dbstart(dentry);
6145+ int bend = dbend(dentry);
6146+ struct dentry *lower_dentry;
6147+
6148+ for (bindex = bstart; bindex <= bend; bindex++) {
6149+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6150+ if (!lower_dentry)
6151+ continue;
6152+ if (lower_dentry->d_inode) {
6153+ dbstart(dentry) = bindex;
6154+ break;
6155+ }
6156+ dput(lower_dentry);
6157+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
6158+ }
6159+}
6160+
6161+
6162+/*
6163+ * Initialize a nameidata structure (the intent part) we can pass to a lower
6164+ * file system. Returns 0 on success or -error (only -ENOMEM possible).
6165+ * Inside that nd structure, this function may also return an allocated
6166+ * struct file (for open intents). The caller, when done with this nd, must
6167+ * kfree the intent file (using release_lower_nd).
6168+ *
6169+ * XXX: this code, and the callers of this code, should be redone using
6170+ * vfs_path_lookup() when (1) the nameidata structure is refactored into a
6171+ * separate intent-structure, and (2) open_namei() is broken into a VFS-only
6172+ * function and a method that other file systems can call.
6173+ */
6174+int init_lower_nd(struct nameidata *nd, unsigned int flags)
6175+{
6176+ int err = 0;
6177+#ifdef ALLOC_LOWER_ND_FILE
6178+ /*
6179+ * XXX: one day we may need to have the lower return an open file
6180+ * for us. It is not needed in 2.6.23-rc1 for nfs2/nfs3, but may
6181+ * very well be needed for nfs4.
6182+ */
6183+ struct file *file;
6184+#endif /* ALLOC_LOWER_ND_FILE */
6185+
6186+ memset(nd, 0, sizeof(struct nameidata));
6187+ if (!flags)
6188+ return err;
6189+
6190+ switch (flags) {
6191+ case LOOKUP_CREATE:
6192+ nd->intent.open.flags |= O_CREAT;
6193+ /* fall through: shared code for create/open cases */
6194+ case LOOKUP_OPEN:
6195+ nd->flags = flags;
6196+ nd->intent.open.flags |= (FMODE_READ | FMODE_WRITE);
6197+#ifdef ALLOC_LOWER_ND_FILE
6198+ file = kzalloc(sizeof(struct file), GFP_KERNEL);
6199+ if (unlikely(!file)) {
6200+ err = -ENOMEM;
6201+ break; /* exit switch statement and thus return */
6202+ }
6203+ nd->intent.open.file = file;
6204+#endif /* ALLOC_LOWER_ND_FILE */
6205+ break;
6206+ default:
6207+ /*
6208+ * We should never get here, for now.
6209+ * We can add new cases here later on.
6210+ */
6211+ pr_debug("unionfs: unknown nameidata flag 0x%x\n", flags);
6212+ BUG();
6213+ break;
6214+ }
6215+
6216+ return err;
6217+}
6218+
6219+void release_lower_nd(struct nameidata *nd, int err)
6220+{
6221+ if (!nd->intent.open.file)
6222+ return;
6223+ else if (!err)
6224+ release_open_intent(nd);
6225+#ifdef ALLOC_LOWER_ND_FILE
6226+ kfree(nd->intent.open.file);
6227+#endif /* ALLOC_LOWER_ND_FILE */
6228+}
6229+
6230+/*
6231+ * Main (and complex) driver function for Unionfs's lookup
6232+ *
6233+ * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error
6234+ * PTR if d_splice returned a different dentry.
6235+ *
6236+ * If lookupmode is INTERPOSE_PARTIAL/REVAL/REVAL_NEG, the passed dentry's
6237+ * inode info must be locked. If lookupmode is INTERPOSE_LOOKUP (i.e., a
6238+ * newly looked-up dentry), then unionfs_lookup_backend will return a locked
6239+ * dentry's info, which the caller must unlock.
6240+ */
6241+struct dentry *unionfs_lookup_full(struct dentry *dentry,
6242+ struct dentry *parent, int lookupmode)
6243+{
6244+ int err = 0;
6245+ struct dentry *lower_dentry = NULL;
6246+ struct vfsmount *lower_mnt;
6247+ struct vfsmount *lower_dir_mnt;
6248+ struct dentry *wh_lower_dentry = NULL;
6249+ struct dentry *lower_dir_dentry = NULL;
6250+ struct dentry *d_interposed = NULL;
6251+ int bindex, bstart, bend, bopaque;
6252+ int opaque, num_positive = 0;
6253+ const char *name;
6254+ int namelen;
6255+ int pos_start, pos_end;
6256+
6257+ /*
6258+ * We should already have a lock on this dentry in the case of a
6259+ * partial lookup, or a revalidation. Otherwise it is returned from
6260+ * new_dentry_private_data already locked.
6261+ */
6262+ verify_locked(dentry);
6263+ verify_locked(parent);
6264+
6265+ /* must initialize dentry operations */
6b53c3da
AM
6266+ if (lookupmode == INTERPOSE_LOOKUP)
6267+ d_set_d_op(dentry, &unionfs_dops);
2380c486
JR
6268+
6269+ /* We never partial lookup the root directory. */
6270+ if (IS_ROOT(dentry))
6271+ goto out;
6272+
6273+ name = dentry->d_name.name;
6274+ namelen = dentry->d_name.len;
6275+
6276+ /* No dentries should get created for possible whiteout names. */
6277+ if (!is_validname(name)) {
6278+ err = -EPERM;
6279+ goto out_free;
6280+ }
6281+
6282+ /* Now start the actual lookup procedure. */
6283+ bstart = dbstart(parent);
6284+ bend = dbend(parent);
6285+ bopaque = dbopaque(parent);
6286+ BUG_ON(bstart < 0);
6287+
6288+ /* adjust bend to bopaque if needed */
6289+ if ((bopaque >= 0) && (bopaque < bend))
6290+ bend = bopaque;
6291+
6292+ /* lookup all possible dentries */
6293+ for (bindex = bstart; bindex <= bend; bindex++) {
6294+
6295+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6296+ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
6297+
6298+ /* skip if we already have a positive lower dentry */
6299+ if (lower_dentry) {
6300+ if (dbstart(dentry) < 0)
6301+ dbstart(dentry) = bindex;
6302+ if (bindex > dbend(dentry))
6303+ dbend(dentry) = bindex;
6304+ if (lower_dentry->d_inode)
6305+ num_positive++;
6306+ continue;
6307+ }
6308+
6309+ lower_dir_dentry =
6310+ unionfs_lower_dentry_idx(parent, bindex);
6311+ /* if the lower dentry's parent does not exist, skip this */
6312+ if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
6313+ continue;
6314+
6315+ /* also skip it if the parent isn't a directory. */
6316+ if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
6317+ continue; /* XXX: should be BUG_ON */
6318+
6319+ /* check for whiteouts: stop lookup if found */
6320+ wh_lower_dentry = lookup_whiteout(name, lower_dir_dentry);
6321+ if (IS_ERR(wh_lower_dentry)) {
6322+ err = PTR_ERR(wh_lower_dentry);
6323+ goto out_free;
6324+ }
6325+ if (wh_lower_dentry->d_inode) {
6326+ dbend(dentry) = dbopaque(dentry) = bindex;
6327+ if (dbstart(dentry) < 0)
6328+ dbstart(dentry) = bindex;
6329+ dput(wh_lower_dentry);
6330+ break;
6331+ }
6332+ dput(wh_lower_dentry);
6333+
6334+ /* Now do regular lookup; lookup @name */
6335+ lower_dir_mnt = unionfs_lower_mnt_idx(parent, bindex);
6336+ lower_mnt = NULL; /* XXX: needed? */
6337+
6338+ lower_dentry = __lookup_one(lower_dir_dentry, lower_dir_mnt,
6339+ name, &lower_mnt);
6340+
6341+ if (IS_ERR(lower_dentry)) {
6342+ err = PTR_ERR(lower_dentry);
6343+ goto out_free;
6344+ }
6345+ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6346+ if (!lower_mnt)
6347+ lower_mnt = unionfs_mntget(dentry->d_sb->s_root,
6348+ bindex);
6349+ unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
6350+
6351+ /* adjust dbstart/end */
6352+ if (dbstart(dentry) < 0)
6353+ dbstart(dentry) = bindex;
6354+ if (bindex > dbend(dentry))
6355+ dbend(dentry) = bindex;
6356+ /*
6357+ * We always store the lower dentries above, and update
6358+ * dbstart/dbend, even if the whole unionfs dentry is
6359+ * negative (i.e., no lower inodes).
6360+ */
6361+ if (!lower_dentry->d_inode)
6362+ continue;
6363+ num_positive++;
6364+
6365+ /*
6366+ * check if we just found an opaque directory, if so, stop
6367+ * lookups here.
6368+ */
6369+ if (!S_ISDIR(lower_dentry->d_inode->i_mode))
6370+ continue;
6371+ opaque = is_opaque_dir(dentry, bindex);
6372+ if (opaque < 0) {
6373+ err = opaque;
6374+ goto out_free;
6375+ } else if (opaque) {
6376+ dbend(dentry) = dbopaque(dentry) = bindex;
6377+ break;
6378+ }
6379+ dbend(dentry) = bindex;
6380+
6381+ /* update parent directory's atime with the bindex */
6382+ fsstack_copy_attr_atime(parent->d_inode,
6383+ lower_dir_dentry->d_inode);
6384+ }
6385+
6386+ /* sanity checks, then decide if to process a negative dentry */
6387+ BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
6388+ BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
6389+
6390+ if (num_positive > 0)
6391+ goto out_positive;
6392+
6393+ /*** handle NEGATIVE dentries ***/
6394+
6395+ /*
6396+ * If negative, keep only first lower negative dentry, to save on
6397+ * memory.
6398+ */
6399+ if (dbstart(dentry) < dbend(dentry)) {
6400+ path_put_lowers(dentry, dbstart(dentry) + 1,
6401+ dbend(dentry), false);
6402+ dbend(dentry) = dbstart(dentry);
6403+ }
6404+ if (lookupmode == INTERPOSE_PARTIAL)
6405+ goto out;
6406+ if (lookupmode == INTERPOSE_LOOKUP) {
6407+ /*
6408+ * If all we found was a whiteout in the first available
6409+ * branch, then create a negative dentry for a possibly new
6410+ * file to be created.
6411+ */
6412+ if (dbopaque(dentry) < 0)
6413+ goto out;
6414+ /* XXX: need to get mnt here */
6415+ bindex = dbstart(dentry);
6416+ if (unionfs_lower_dentry_idx(dentry, bindex))
6417+ goto out;
6418+ lower_dir_dentry =
6419+ unionfs_lower_dentry_idx(parent, bindex);
6420+ if (!lower_dir_dentry || !lower_dir_dentry->d_inode)
6421+ goto out;
6422+ if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
6423+ goto out; /* XXX: should be BUG_ON */
6424+ /* XXX: do we need to cross bind mounts here? */
4ae1df7a 6425+ lower_dentry = lookup_lck_len(name, lower_dir_dentry, namelen);
2380c486
JR
6426+ if (IS_ERR(lower_dentry)) {
6427+ err = PTR_ERR(lower_dentry);
6428+ goto out;
6429+ }
6430+ /* XXX: need to mntget/mntput as needed too! */
6431+ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6432+ /* XXX: wrong mnt for crossing bind mounts! */
6433+ lower_mnt = unionfs_mntget(dentry->d_sb->s_root, bindex);
6434+ unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt);
6435+
6436+ goto out;
6437+ }
6438+
6439+ /* if we're revalidating a positive dentry, don't make it negative */
6440+ if (lookupmode != INTERPOSE_REVAL)
6441+ d_add(dentry, NULL);
6442+
6443+ goto out;
6444+
6445+out_positive:
6446+ /*** handle POSITIVE dentries ***/
6447+
6448+ /*
6449+ * This unionfs dentry is positive (at least one lower inode
6450+ * exists), so scan entire dentry from beginning to end, and remove
6451+ * any negative lower dentries, if any. Then, update dbstart/dbend
6452+ * to reflect the start/end of positive dentries.
6453+ */
6454+ pos_start = pos_end = -1;
6455+ for (bindex = bstart; bindex <= bend; bindex++) {
6456+ lower_dentry = unionfs_lower_dentry_idx(dentry,
6457+ bindex);
6458+ if (lower_dentry && lower_dentry->d_inode) {
6459+ if (pos_start < 0)
6460+ pos_start = bindex;
6461+ if (bindex > pos_end)
6462+ pos_end = bindex;
6463+ continue;
6464+ }
6465+ path_put_lowers(dentry, bindex, bindex, false);
6466+ }
6467+ if (pos_start >= 0)
6468+ dbstart(dentry) = pos_start;
6469+ if (pos_end >= 0)
6470+ dbend(dentry) = pos_end;
6471+
6472+ /* Partial lookups need to re-interpose, or throw away older negs. */
6473+ if (lookupmode == INTERPOSE_PARTIAL) {
6474+ if (dentry->d_inode) {
6475+ unionfs_reinterpose(dentry);
6476+ goto out;
6477+ }
6478+
6479+ /*
6480+ * This dentry was positive, so it is as if we had a
6481+ * negative revalidation.
6482+ */
6483+ lookupmode = INTERPOSE_REVAL_NEG;
6484+ update_bstart(dentry);
6485+ }
6486+
6487+ /*
6488+ * Interpose can return a dentry if d_splice returned a different
6489+ * dentry.
6490+ */
6491+ d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
6492+ if (IS_ERR(d_interposed))
6493+ err = PTR_ERR(d_interposed);
6494+ else if (d_interposed)
6495+ dentry = d_interposed;
6496+
6497+ if (!err)
6498+ goto out;
6499+ d_drop(dentry);
6500+
6501+out_free:
6502+ /* should dput/mntput all the underlying dentries on error condition */
6503+ if (dbstart(dentry) >= 0)
6504+ path_put_lowers_all(dentry, false);
6505+ /* free lower_paths unconditionally */
6506+ kfree(UNIONFS_D(dentry)->lower_paths);
6507+ UNIONFS_D(dentry)->lower_paths = NULL;
6508+
6509+out:
6510+ if (dentry && UNIONFS_D(dentry)) {
6511+ BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0);
6512+ BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0);
6513+ }
6514+ if (d_interposed && UNIONFS_D(d_interposed)) {
6515+ BUG_ON(dbstart(d_interposed) < 0 && dbend(d_interposed) >= 0);
6516+ BUG_ON(dbstart(d_interposed) >= 0 && dbend(d_interposed) < 0);
6517+ }
6518+
6519+ if (!err && d_interposed)
6520+ return d_interposed;
6521+ return ERR_PTR(err);
6522+}
0c5527e5
AM
6523diff --git a/fs/unionfs/main.c b/fs/unionfs/main.c
6524new file mode 100644
6b53c3da 6525index 0000000..87cd1fc
0c5527e5
AM
6526--- /dev/null
6527+++ b/fs/unionfs/main.c
6b53c3da 6528@@ -0,0 +1,752 @@
2380c486 6529+/*
63b09289 6530+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
6531+ * Copyright (c) 2003-2006 Charles P. Wright
6532+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
6533+ * Copyright (c) 2005-2006 Junjiro Okajima
6534+ * Copyright (c) 2005 Arun M. Krishnakumar
6535+ * Copyright (c) 2004-2006 David P. Quigley
6536+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
6537+ * Copyright (c) 2003 Puja Gupta
6538+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
6539+ * Copyright (c) 2003-2011 Stony Brook University
6540+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
6541+ *
6542+ * This program is free software; you can redistribute it and/or modify
6543+ * it under the terms of the GNU General Public License version 2 as
6544+ * published by the Free Software Foundation.
6545+ */
6546+
6547+#include "union.h"
6548+#include <linux/module.h>
6549+#include <linux/moduleparam.h>
6550+
6551+static void unionfs_fill_inode(struct dentry *dentry,
6552+ struct inode *inode)
6553+{
6554+ struct inode *lower_inode;
6555+ struct dentry *lower_dentry;
6556+ int bindex, bstart, bend;
6557+
6558+ bstart = dbstart(dentry);
6559+ bend = dbend(dentry);
6560+
6561+ for (bindex = bstart; bindex <= bend; bindex++) {
6562+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6563+ if (!lower_dentry) {
6564+ unionfs_set_lower_inode_idx(inode, bindex, NULL);
6565+ continue;
6566+ }
6567+
6568+ /* Initialize the lower inode to the new lower inode. */
6569+ if (!lower_dentry->d_inode)
6570+ continue;
6571+
6572+ unionfs_set_lower_inode_idx(inode, bindex,
6573+ igrab(lower_dentry->d_inode));
6574+ }
6575+
6576+ ibstart(inode) = dbstart(dentry);
6577+ ibend(inode) = dbend(dentry);
6578+
6579+ /* Use attributes from the first branch. */
6580+ lower_inode = unionfs_lower_inode(inode);
6581+
6582+ /* Use different set of inode ops for symlinks & directories */
6583+ if (S_ISLNK(lower_inode->i_mode))
6584+ inode->i_op = &unionfs_symlink_iops;
6585+ else if (S_ISDIR(lower_inode->i_mode))
6586+ inode->i_op = &unionfs_dir_iops;
6587+
6588+ /* Use different set of file ops for directories */
6589+ if (S_ISDIR(lower_inode->i_mode))
6590+ inode->i_fop = &unionfs_dir_fops;
6591+
6592+ /* properly initialize special inodes */
6593+ if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
6594+ S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
6595+ init_special_inode(inode, lower_inode->i_mode,
6596+ lower_inode->i_rdev);
6597+
6598+ /* all well, copy inode attributes */
6599+ unionfs_copy_attr_all(inode, lower_inode);
6600+ fsstack_copy_inode_size(inode, lower_inode);
6601+}
6602+
6603+/*
6604+ * Connect a unionfs inode dentry/inode with several lower ones. This is
6605+ * the classic stackable file system "vnode interposition" action.
6606+ *
6607+ * @sb: unionfs's super_block
6608+ */
6609+struct dentry *unionfs_interpose(struct dentry *dentry, struct super_block *sb,
6610+ int flag)
6611+{
6612+ int err = 0;
6613+ struct inode *inode;
6614+ int need_fill_inode = 1;
6615+ struct dentry *spliced = NULL;
6616+
6617+ verify_locked(dentry);
6618+
6619+ /*
6620+ * We allocate our new inode below by calling unionfs_iget,
6621+ * which will initialize some of the new inode's fields
6622+ */
6623+
6624+ /*
6625+ * On revalidate we've already got our own inode and just need
6626+ * to fix it up.
6627+ */
6628+ if (flag == INTERPOSE_REVAL) {
6629+ inode = dentry->d_inode;
6630+ UNIONFS_I(inode)->bstart = -1;
6631+ UNIONFS_I(inode)->bend = -1;
6632+ atomic_set(&UNIONFS_I(inode)->generation,
6633+ atomic_read(&UNIONFS_SB(sb)->generation));
6634+
6635+ UNIONFS_I(inode)->lower_inodes =
6636+ kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL);
6637+ if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
6638+ err = -ENOMEM;
6639+ goto out;
6640+ }
6641+ } else {
6642+ /* get unique inode number for unionfs */
6643+ inode = unionfs_iget(sb, iunique(sb, UNIONFS_ROOT_INO));
6644+ if (IS_ERR(inode)) {
6645+ err = PTR_ERR(inode);
6646+ goto out;
6647+ }
6648+ if (atomic_read(&inode->i_count) > 1)
6649+ goto skip;
6650+ }
6651+
6652+ need_fill_inode = 0;
6653+ unionfs_fill_inode(dentry, inode);
6654+
6655+skip:
6656+ /* only (our) lookup wants to do a d_add */
6657+ switch (flag) {
6658+ case INTERPOSE_DEFAULT:
6659+ /* for operations which create new inodes */
6660+ d_add(dentry, inode);
6661+ break;
6662+ case INTERPOSE_REVAL_NEG:
6663+ d_instantiate(dentry, inode);
6664+ break;
6665+ case INTERPOSE_LOOKUP:
6666+ spliced = d_splice_alias(inode, dentry);
6667+ if (spliced && spliced != dentry) {
6668+ /*
6669+ * d_splice can return a dentry if it was
6670+ * disconnected and had to be moved. We must ensure
6671+ * that the private data of the new dentry is
6672+ * correct and that the inode info was filled
6673+ * properly. Finally we must return this new
6674+ * dentry.
6675+ */
6b53c3da 6676+ d_set_d_op(spliced, &unionfs_dops);
2380c486
JR
6677+ spliced->d_fsdata = dentry->d_fsdata;
6678+ dentry->d_fsdata = NULL;
6679+ dentry = spliced;
6680+ if (need_fill_inode) {
6681+ need_fill_inode = 0;
6682+ unionfs_fill_inode(dentry, inode);
6683+ }
6684+ goto out_spliced;
6685+ } else if (!spliced) {
6686+ if (need_fill_inode) {
6687+ need_fill_inode = 0;
6688+ unionfs_fill_inode(dentry, inode);
6689+ goto out_spliced;
6690+ }
6691+ }
6692+ break;
6693+ case INTERPOSE_REVAL:
6694+ /* Do nothing. */
6695+ break;
6696+ default:
6697+ printk(KERN_CRIT "unionfs: invalid interpose flag passed!\n");
6698+ BUG();
6699+ }
6700+ goto out;
6701+
6702+out_spliced:
6703+ if (!err)
6704+ return spliced;
6705+out:
6706+ return ERR_PTR(err);
6707+}
6708+
6709+/* like interpose above, but for an already existing dentry */
6710+void unionfs_reinterpose(struct dentry *dentry)
6711+{
6712+ struct dentry *lower_dentry;
6713+ struct inode *inode;
6714+ int bindex, bstart, bend;
6715+
6716+ verify_locked(dentry);
6717+
6718+ /* This is pre-allocated inode */
6719+ inode = dentry->d_inode;
6720+
6721+ bstart = dbstart(dentry);
6722+ bend = dbend(dentry);
6723+ for (bindex = bstart; bindex <= bend; bindex++) {
6724+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6725+ if (!lower_dentry)
6726+ continue;
6727+
6728+ if (!lower_dentry->d_inode)
6729+ continue;
6730+ if (unionfs_lower_inode_idx(inode, bindex))
6731+ continue;
6732+ unionfs_set_lower_inode_idx(inode, bindex,
6733+ igrab(lower_dentry->d_inode));
6734+ }
6735+ ibstart(inode) = dbstart(dentry);
6736+ ibend(inode) = dbend(dentry);
6737+}
6738+
6739+/*
6740+ * make sure the branch we just looked up (nd) makes sense:
6741+ *
6742+ * 1) we're not trying to stack unionfs on top of unionfs
6743+ * 2) it exists
6744+ * 3) is a directory
6745+ */
63b09289 6746+int check_branch(const struct path *path)
2380c486
JR
6747+{
6748+ /* XXX: remove in ODF code -- stacking unions allowed there */
63b09289 6749+ if (!strcmp(path->dentry->d_sb->s_type->name, UNIONFS_NAME))
2380c486 6750+ return -EINVAL;
63b09289 6751+ if (!path->dentry->d_inode)
2380c486 6752+ return -ENOENT;
63b09289 6753+ if (!S_ISDIR(path->dentry->d_inode->i_mode))
2380c486
JR
6754+ return -ENOTDIR;
6755+ return 0;
6756+}
6757+
6758+/* checks if two lower_dentries have overlapping branches */
6759+static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2)
6760+{
6761+ struct dentry *dent = NULL;
6762+
6763+ dent = dent1;
6764+ while ((dent != dent2) && (dent->d_parent != dent))
6765+ dent = dent->d_parent;
6766+
6767+ if (dent == dent2)
6768+ return 1;
6769+
6770+ dent = dent2;
6771+ while ((dent != dent1) && (dent->d_parent != dent))
6772+ dent = dent->d_parent;
6773+
6774+ return (dent == dent1);
6775+}
6776+
6777+/*
6778+ * Parse "ro" or "rw" options, but default to "rw" if no mode options was
6779+ * specified. Fill the mode bits in @perms. If encounter an unknown
6780+ * string, return -EINVAL. Otherwise return 0.
6781+ */
6782+int parse_branch_mode(const char *name, int *perms)
6783+{
6784+ if (!name || !strcmp(name, "rw")) {
6785+ *perms = MAY_READ | MAY_WRITE;
6786+ return 0;
6787+ }
6788+ if (!strcmp(name, "ro")) {
6789+ *perms = MAY_READ;
6790+ return 0;
6791+ }
6792+ return -EINVAL;
6793+}
6794+
6795+/*
6796+ * parse the dirs= mount argument
6797+ *
6798+ * We don't need to lock the superblock private data's rwsem, as we get
6799+ * called only by unionfs_read_super - it is still a long time before anyone
6800+ * can even get a reference to us.
6801+ */
6802+static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info
6803+ *lower_root_info, char *options)
6804+{
63b09289 6805+ struct path path;
2380c486
JR
6806+ char *name;
6807+ int err = 0;
6808+ int branches = 1;
6809+ int bindex = 0;
6810+ int i = 0;
6811+ int j = 0;
6812+ struct dentry *dent1;
6813+ struct dentry *dent2;
6814+
6815+ if (options[0] == '\0') {
6816+ printk(KERN_ERR "unionfs: no branches specified\n");
6817+ err = -EINVAL;
82260373 6818+ goto out_return;
2380c486
JR
6819+ }
6820+
6821+ /*
6822+ * Each colon means we have a separator, this is really just a rough
6823+ * guess, since strsep will handle empty fields for us.
6824+ */
6825+ for (i = 0; options[i]; i++)
6826+ if (options[i] == ':')
6827+ branches++;
6828+
6829+ /* allocate space for underlying pointers to lower dentry */
6830+ UNIONFS_SB(sb)->data =
6831+ kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL);
6832+ if (unlikely(!UNIONFS_SB(sb)->data)) {
6833+ err = -ENOMEM;
82260373 6834+ goto out_return;
2380c486
JR
6835+ }
6836+
6837+ lower_root_info->lower_paths =
6838+ kcalloc(branches, sizeof(struct path), GFP_KERNEL);
6839+ if (unlikely(!lower_root_info->lower_paths)) {
6840+ err = -ENOMEM;
82260373
AM
6841+ /* free the underlying pointer array */
6842+ kfree(UNIONFS_SB(sb)->data);
6843+ UNIONFS_SB(sb)->data = NULL;
6844+ goto out_return;
2380c486
JR
6845+ }
6846+
6847+ /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */
6848+ branches = 0;
6849+ while ((name = strsep(&options, ":")) != NULL) {
6850+ int perms;
6851+ char *mode = strchr(name, '=');
6852+
6853+ if (!name)
6854+ continue;
6855+ if (!*name) { /* bad use of ':' (extra colons) */
6856+ err = -EINVAL;
6857+ goto out;
6858+ }
6859+
6860+ branches++;
6861+
6862+ /* strip off '=' if any */
6863+ if (mode)
6864+ *mode++ = '\0';
6865+
6866+ err = parse_branch_mode(mode, &perms);
6867+ if (err) {
6868+ printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
6869+ "branch %d\n", mode, bindex);
6870+ goto out;
6871+ }
6872+ /* ensure that leftmost branch is writeable */
6873+ if (!bindex && !(perms & MAY_WRITE)) {
6874+ printk(KERN_ERR "unionfs: leftmost branch cannot be "
6875+ "read-only (use \"-o ro\" to create a "
6876+ "read-only union)\n");
6877+ err = -EINVAL;
6878+ goto out;
6879+ }
6880+
63b09289 6881+ err = kern_path(name, LOOKUP_FOLLOW, &path);
2380c486
JR
6882+ if (err) {
6883+ printk(KERN_ERR "unionfs: error accessing "
6884+ "lower directory '%s' (error %d)\n",
6885+ name, err);
6886+ goto out;
6887+ }
6888+
63b09289 6889+ err = check_branch(&path);
2380c486
JR
6890+ if (err) {
6891+ printk(KERN_ERR "unionfs: lower directory "
6892+ "'%s' is not a valid branch\n", name);
63b09289 6893+ path_put(&path);
2380c486
JR
6894+ goto out;
6895+ }
6896+
63b09289
JR
6897+ lower_root_info->lower_paths[bindex].dentry = path.dentry;
6898+ lower_root_info->lower_paths[bindex].mnt = path.mnt;
2380c486
JR
6899+
6900+ set_branchperms(sb, bindex, perms);
6901+ set_branch_count(sb, bindex, 0);
6902+ new_branch_id(sb, bindex);
6903+
6904+ if (lower_root_info->bstart < 0)
6905+ lower_root_info->bstart = bindex;
6906+ lower_root_info->bend = bindex;
6907+ bindex++;
6908+ }
6909+
6910+ if (branches == 0) {
6911+ printk(KERN_ERR "unionfs: no branches specified\n");
6912+ err = -EINVAL;
6913+ goto out;
6914+ }
6915+
6916+ BUG_ON(branches != (lower_root_info->bend + 1));
6917+
6918+ /*
6919+ * Ensure that no overlaps exist in the branches.
6920+ *
6921+ * This test is required because the Linux kernel has no support
6922+ * currently for ensuring coherency between stackable layers and
6923+ * branches. If we were to allow overlapping branches, it would be
6924+ * possible, for example, to delete a file via one branch, which
6925+ * would not be reflected in another branch. Such incoherency could
6926+ * lead to inconsistencies and even kernel oopses. Rather than
6927+ * implement hacks to work around some of these cache-coherency
6928+ * problems, we prevent branch overlapping, for now. A complete
6929+ * solution will involve proper kernel/VFS support for cache
6930+ * coherency, at which time we could safely remove this
6931+ * branch-overlapping test.
6932+ */
6933+ for (i = 0; i < branches; i++) {
6934+ dent1 = lower_root_info->lower_paths[i].dentry;
6935+ for (j = i + 1; j < branches; j++) {
6936+ dent2 = lower_root_info->lower_paths[j].dentry;
6937+ if (is_branch_overlap(dent1, dent2)) {
6938+ printk(KERN_ERR "unionfs: branches %d and "
6939+ "%d overlap\n", i, j);
6940+ err = -EINVAL;
6941+ goto out;
6942+ }
6943+ }
6944+ }
6945+
6946+out:
6947+ if (err) {
6948+ for (i = 0; i < branches; i++)
6949+ path_put(&lower_root_info->lower_paths[i]);
6950+
6951+ kfree(lower_root_info->lower_paths);
6952+ kfree(UNIONFS_SB(sb)->data);
6953+
6954+ /*
6955+ * MUST clear the pointers to prevent potential double free if
6956+ * the caller dies later on
6957+ */
6958+ lower_root_info->lower_paths = NULL;
6959+ UNIONFS_SB(sb)->data = NULL;
6960+ }
82260373 6961+out_return:
2380c486
JR
6962+ return err;
6963+}
6964+
6965+/*
6966+ * Parse mount options. See the manual page for usage instructions.
6967+ *
6968+ * Returns the dentry object of the lower-level (lower) directory;
6969+ * We want to mount our stackable file system on top of that lower directory.
6970+ */
6971+static struct unionfs_dentry_info *unionfs_parse_options(
6972+ struct super_block *sb,
6973+ char *options)
6974+{
6975+ struct unionfs_dentry_info *lower_root_info;
6976+ char *optname;
6977+ int err = 0;
6978+ int bindex;
6979+ int dirsfound = 0;
6980+
6981+ /* allocate private data area */
6982+ err = -ENOMEM;
6983+ lower_root_info =
6984+ kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL);
6985+ if (unlikely(!lower_root_info))
6986+ goto out_error;
6987+ lower_root_info->bstart = -1;
6988+ lower_root_info->bend = -1;
6989+ lower_root_info->bopaque = -1;
6990+
6991+ while ((optname = strsep(&options, ",")) != NULL) {
6992+ char *optarg;
6993+
6994+ if (!optname || !*optname)
6995+ continue;
6996+
6997+ optarg = strchr(optname, '=');
6998+ if (optarg)
6999+ *optarg++ = '\0';
7000+
7001+ /*
7002+ * All of our options take an argument now. Insert ones that
7003+ * don't, above this check.
7004+ */
7005+ if (!optarg) {
7006+ printk(KERN_ERR "unionfs: %s requires an argument\n",
7007+ optname);
7008+ err = -EINVAL;
7009+ goto out_error;
7010+ }
7011+
7012+ if (!strcmp("dirs", optname)) {
7013+ if (++dirsfound > 1) {
7014+ printk(KERN_ERR
7015+ "unionfs: multiple dirs specified\n");
7016+ err = -EINVAL;
7017+ goto out_error;
7018+ }
7019+ err = parse_dirs_option(sb, lower_root_info, optarg);
7020+ if (err)
7021+ goto out_error;
7022+ continue;
7023+ }
7024+
7025+ err = -EINVAL;
7026+ printk(KERN_ERR
7027+ "unionfs: unrecognized option '%s'\n", optname);
7028+ goto out_error;
7029+ }
7030+ if (dirsfound != 1) {
7031+ printk(KERN_ERR "unionfs: dirs option required\n");
7032+ err = -EINVAL;
7033+ goto out_error;
7034+ }
7035+ goto out;
7036+
7037+out_error:
7038+ if (lower_root_info && lower_root_info->lower_paths) {
7039+ for (bindex = lower_root_info->bstart;
7040+ bindex >= 0 && bindex <= lower_root_info->bend;
7041+ bindex++)
7042+ path_put(&lower_root_info->lower_paths[bindex]);
7043+ }
7044+
7045+ kfree(lower_root_info->lower_paths);
7046+ kfree(lower_root_info);
7047+
7048+ kfree(UNIONFS_SB(sb)->data);
7049+ UNIONFS_SB(sb)->data = NULL;
7050+
7051+ lower_root_info = ERR_PTR(err);
7052+out:
7053+ return lower_root_info;
7054+}
7055+
7056+/*
2380c486
JR
7057+ * There is no need to lock the unionfs_super_info's rwsem as there is no
7058+ * way anyone can have a reference to the superblock at this point in time.
7059+ */
7060+static int unionfs_read_super(struct super_block *sb, void *raw_data,
7061+ int silent)
7062+{
7063+ int err = 0;
7064+ struct unionfs_dentry_info *lower_root_info = NULL;
7065+ int bindex, bstart, bend;
6b53c3da 7066+ struct inode *inode = NULL;
2380c486
JR
7067+
7068+ if (!raw_data) {
7069+ printk(KERN_ERR
7070+ "unionfs: read_super: missing data argument\n");
7071+ err = -EINVAL;
7072+ goto out;
7073+ }
7074+
7075+ /* Allocate superblock private data */
7076+ sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL);
7077+ if (unlikely(!UNIONFS_SB(sb))) {
7078+ printk(KERN_CRIT "unionfs: read_super: out of memory\n");
7079+ err = -ENOMEM;
7080+ goto out;
7081+ }
7082+
7083+ UNIONFS_SB(sb)->bend = -1;
7084+ atomic_set(&UNIONFS_SB(sb)->generation, 1);
7085+ init_rwsem(&UNIONFS_SB(sb)->rwsem);
7086+ UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */
7087+
7088+ lower_root_info = unionfs_parse_options(sb, raw_data);
7089+ if (IS_ERR(lower_root_info)) {
7090+ printk(KERN_ERR
7091+ "unionfs: read_super: error while parsing options "
7092+ "(err = %ld)\n", PTR_ERR(lower_root_info));
7093+ err = PTR_ERR(lower_root_info);
7094+ lower_root_info = NULL;
7095+ goto out_free;
7096+ }
7097+ if (lower_root_info->bstart == -1) {
7098+ err = -ENOENT;
7099+ goto out_free;
7100+ }
7101+
7102+ /* set the lower superblock field of upper superblock */
7103+ bstart = lower_root_info->bstart;
7104+ BUG_ON(bstart != 0);
7105+ sbend(sb) = bend = lower_root_info->bend;
7106+ for (bindex = bstart; bindex <= bend; bindex++) {
7107+ struct dentry *d = lower_root_info->lower_paths[bindex].dentry;
7108+ atomic_inc(&d->d_sb->s_active);
7109+ unionfs_set_lower_super_idx(sb, bindex, d->d_sb);
7110+ }
7111+
7112+ /* max Bytes is the maximum bytes from highest priority branch */
7113+ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
7114+
7115+ /*
7116+ * Our c/m/atime granularity is 1 ns because we may stack on file
7117+ * systems whose granularity is as good. This is important for our
7118+ * time-based cache coherency.
7119+ */
7120+ sb->s_time_gran = 1;
7121+
7122+ sb->s_op = &unionfs_sops;
7123+
6b53c3da
AM
7124+ /* get a new inode and allocate our root dentry */
7125+ inode = unionfs_iget(sb, iunique(sb, UNIONFS_ROOT_INO));
7126+ if (IS_ERR(inode)) {
7127+ err = PTR_ERR(inode);
7128+ goto out_dput;
7129+ }
7130+ sb->s_root = d_alloc_root(inode);
2380c486
JR
7131+ if (unlikely(!sb->s_root)) {
7132+ err = -ENOMEM;
6b53c3da 7133+ goto out_iput;
2380c486 7134+ }
6b53c3da 7135+ d_set_d_op(sb->s_root, &unionfs_dops);
2380c486
JR
7136+
7137+ /* link the upper and lower dentries */
7138+ sb->s_root->d_fsdata = NULL;
7139+ err = new_dentry_private_data(sb->s_root, UNIONFS_DMUTEX_ROOT);
7140+ if (unlikely(err))
7141+ goto out_freedpd;
7142+
6b53c3da
AM
7143+ /* if get here: cannot have error */
7144+
2380c486
JR
7145+ /* Set the lower dentries for s_root */
7146+ for (bindex = bstart; bindex <= bend; bindex++) {
7147+ struct dentry *d;
7148+ struct vfsmount *m;
7149+
7150+ d = lower_root_info->lower_paths[bindex].dentry;
7151+ m = lower_root_info->lower_paths[bindex].mnt;
7152+
7153+ unionfs_set_lower_dentry_idx(sb->s_root, bindex, d);
7154+ unionfs_set_lower_mnt_idx(sb->s_root, bindex, m);
7155+ }
7156+ dbstart(sb->s_root) = bstart;
7157+ dbend(sb->s_root) = bend;
7158+
7159+ /* Set the generation number to one, since this is for the mount. */
7160+ atomic_set(&UNIONFS_D(sb->s_root)->generation, 1);
7161+
6b53c3da
AM
7162+ if (atomic_read(&inode->i_count) <= 1)
7163+ unionfs_fill_inode(sb->s_root, inode);
7164+
2380c486 7165+ /*
6b53c3da
AM
7166+ * No need to call interpose because we already have a positive
7167+ * dentry, which was instantiated by d_alloc_root. Just need to
7168+ * d_rehash it.
2380c486 7169+ */
6b53c3da
AM
7170+ d_rehash(sb->s_root);
7171+
2380c486 7172+ unionfs_unlock_dentry(sb->s_root);
6b53c3da 7173+ goto out; /* all is well */
2380c486
JR
7174+
7175+out_freedpd:
7176+ if (UNIONFS_D(sb->s_root)) {
7177+ kfree(UNIONFS_D(sb->s_root)->lower_paths);
7178+ free_dentry_private_data(sb->s_root);
7179+ }
7180+ dput(sb->s_root);
7181+
6b53c3da
AM
7182+out_iput:
7183+ iput(inode);
7184+
2380c486
JR
7185+out_dput:
7186+ if (lower_root_info && !IS_ERR(lower_root_info)) {
7187+ for (bindex = lower_root_info->bstart;
7188+ bindex <= lower_root_info->bend; bindex++) {
7189+ struct dentry *d;
7190+ d = lower_root_info->lower_paths[bindex].dentry;
7191+ /* drop refs we took earlier */
7192+ atomic_dec(&d->d_sb->s_active);
7193+ path_put(&lower_root_info->lower_paths[bindex]);
7194+ }
7195+ kfree(lower_root_info->lower_paths);
7196+ kfree(lower_root_info);
7197+ lower_root_info = NULL;
7198+ }
7199+
7200+out_free:
7201+ kfree(UNIONFS_SB(sb)->data);
7202+ kfree(UNIONFS_SB(sb));
7203+ sb->s_fs_info = NULL;
7204+
7205+out:
7206+ if (lower_root_info && !IS_ERR(lower_root_info)) {
7207+ kfree(lower_root_info->lower_paths);
7208+ kfree(lower_root_info);
7209+ }
7210+ return err;
7211+}
7212+
63b09289
JR
7213+static struct dentry *unionfs_mount(struct file_system_type *fs_type,
7214+ int flags, const char *dev_name,
7215+ void *raw_data)
2380c486 7216+{
63b09289
JR
7217+ struct dentry *dentry;
7218+
7219+ dentry = mount_nodev(fs_type, flags, raw_data, unionfs_read_super);
7220+ if (!PTR_ERR(dentry))
7221+ UNIONFS_SB(dentry->d_sb)->dev_name =
2380c486 7222+ kstrdup(dev_name, GFP_KERNEL);
63b09289 7223+ return dentry;
2380c486
JR
7224+}
7225+
7226+static struct file_system_type unionfs_fs_type = {
7227+ .owner = THIS_MODULE,
7228+ .name = UNIONFS_NAME,
63b09289 7229+ .mount = unionfs_mount,
2380c486
JR
7230+ .kill_sb = generic_shutdown_super,
7231+ .fs_flags = FS_REVAL_DOT,
7232+};
7233+
7234+static int __init init_unionfs_fs(void)
7235+{
7236+ int err;
7237+
7238+ pr_info("Registering unionfs " UNIONFS_VERSION "\n");
7239+
7240+ err = unionfs_init_filldir_cache();
7241+ if (unlikely(err))
7242+ goto out;
7243+ err = unionfs_init_inode_cache();
7244+ if (unlikely(err))
7245+ goto out;
7246+ err = unionfs_init_dentry_cache();
7247+ if (unlikely(err))
7248+ goto out;
7249+ err = init_sioq();
7250+ if (unlikely(err))
7251+ goto out;
7252+ err = register_filesystem(&unionfs_fs_type);
7253+out:
7254+ if (unlikely(err)) {
7255+ stop_sioq();
7256+ unionfs_destroy_filldir_cache();
7257+ unionfs_destroy_inode_cache();
7258+ unionfs_destroy_dentry_cache();
7259+ }
7260+ return err;
7261+}
7262+
7263+static void __exit exit_unionfs_fs(void)
7264+{
7265+ stop_sioq();
7266+ unionfs_destroy_filldir_cache();
7267+ unionfs_destroy_inode_cache();
7268+ unionfs_destroy_dentry_cache();
7269+ unregister_filesystem(&unionfs_fs_type);
7270+ pr_info("Completed unionfs module unload\n");
7271+}
7272+
7273+MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
7274+ " (http://www.fsl.cs.sunysb.edu)");
7275+MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION
7276+ " (http://unionfs.filesystems.org)");
7277+MODULE_LICENSE("GPL");
7278+
7279+module_init(init_unionfs_fs);
7280+module_exit(exit_unionfs_fs);
0c5527e5
AM
7281diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
7282new file mode 100644
63b09289 7283index 0000000..bcc5652
0c5527e5
AM
7284--- /dev/null
7285+++ b/fs/unionfs/mmap.c
2380c486
JR
7286@@ -0,0 +1,89 @@
7287+/*
63b09289 7288+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
7289+ * Copyright (c) 2003-2006 Charles P. Wright
7290+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7291+ * Copyright (c) 2005-2006 Junjiro Okajima
7292+ * Copyright (c) 2006 Shaya Potter
7293+ * Copyright (c) 2005 Arun M. Krishnakumar
7294+ * Copyright (c) 2004-2006 David P. Quigley
7295+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7296+ * Copyright (c) 2003 Puja Gupta
7297+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
7298+ * Copyright (c) 2003-2011 Stony Brook University
7299+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
7300+ *
7301+ * This program is free software; you can redistribute it and/or modify
7302+ * it under the terms of the GNU General Public License version 2 as
7303+ * published by the Free Software Foundation.
7304+ */
7305+
7306+#include "union.h"
7307+
7308+
7309+/*
7310+ * XXX: we need a dummy readpage handler because generic_file_mmap (which we
7311+ * use in unionfs_mmap) checks for the existence of
7312+ * mapping->a_ops->readpage, else it returns -ENOEXEC. The VFS will need to
7313+ * be fixed to allow a file system to define vm_ops->fault without any
7314+ * address_space_ops whatsoever.
7315+ *
7316+ * Otherwise, we don't want to use our readpage method at all.
7317+ */
7318+static int unionfs_readpage(struct file *file, struct page *page)
7319+{
7320+ BUG();
7321+ return -EINVAL;
7322+}
7323+
7324+static int unionfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
7325+{
7326+ int err;
7327+ struct file *file, *lower_file;
7670a7fc 7328+ const struct vm_operations_struct *lower_vm_ops;
2380c486
JR
7329+ struct vm_area_struct lower_vma;
7330+
7331+ BUG_ON(!vma);
7332+ memcpy(&lower_vma, vma, sizeof(struct vm_area_struct));
7333+ file = lower_vma.vm_file;
7334+ lower_vm_ops = UNIONFS_F(file)->lower_vm_ops;
7335+ BUG_ON(!lower_vm_ops);
7336+
7337+ lower_file = unionfs_lower_file(file);
7338+ BUG_ON(!lower_file);
7339+ /*
7340+ * XXX: vm_ops->fault may be called in parallel. Because we have to
7341+ * resort to temporarily changing the vma->vm_file to point to the
7342+ * lower file, a concurrent invocation of unionfs_fault could see a
7343+ * different value. In this workaround, we keep a different copy of
7344+ * the vma structure in our stack, so we never expose a different
7345+ * value of the vma->vm_file called to us, even temporarily. A
7346+ * better fix would be to change the calling semantics of ->fault to
7347+ * take an explicit file pointer.
7348+ */
7349+ lower_vma.vm_file = lower_file;
7350+ err = lower_vm_ops->fault(&lower_vma, vmf);
7351+ return err;
7352+}
7353+
7354+/*
7355+ * XXX: the default address_space_ops for unionfs is empty. We cannot set
7356+ * our inode->i_mapping->a_ops to NULL because too many code paths expect
7357+ * the a_ops vector to be non-NULL.
7358+ */
7359+struct address_space_operations unionfs_aops = {
7360+ /* empty on purpose */
7361+};
7362+
7363+/*
7364+ * XXX: we need a second, dummy address_space_ops vector, to be used
7365+ * temporarily during unionfs_mmap, because the latter calls
7366+ * generic_file_mmap, which checks if ->readpage exists, else returns
7367+ * -ENOEXEC.
7368+ */
7369+struct address_space_operations unionfs_dummy_aops = {
7370+ .readpage = unionfs_readpage,
7371+};
7372+
7373+struct vm_operations_struct unionfs_vm_ops = {
7374+ .fault = unionfs_fault,
7375+};
0c5527e5
AM
7376diff --git a/fs/unionfs/rdstate.c b/fs/unionfs/rdstate.c
7377new file mode 100644
63b09289 7378index 0000000..59b7333
0c5527e5
AM
7379--- /dev/null
7380+++ b/fs/unionfs/rdstate.c
2380c486
JR
7381@@ -0,0 +1,285 @@
7382+/*
63b09289 7383+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
7384+ * Copyright (c) 2003-2006 Charles P. Wright
7385+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7386+ * Copyright (c) 2005-2006 Junjiro Okajima
7387+ * Copyright (c) 2005 Arun M. Krishnakumar
7388+ * Copyright (c) 2004-2006 David P. Quigley
7389+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7390+ * Copyright (c) 2003 Puja Gupta
7391+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
7392+ * Copyright (c) 2003-2011 Stony Brook University
7393+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
7394+ *
7395+ * This program is free software; you can redistribute it and/or modify
7396+ * it under the terms of the GNU General Public License version 2 as
7397+ * published by the Free Software Foundation.
7398+ */
7399+
7400+#include "union.h"
7401+
7402+/* This file contains the routines for maintaining readdir state. */
7403+
7404+/*
7405+ * There are two structures here, rdstate which is a hash table
7406+ * of the second structure which is a filldir_node.
7407+ */
7408+
7409+/*
7410+ * This is a struct kmem_cache for filldir nodes, because we allocate a lot
7411+ * of them and they shouldn't waste memory. If the node has a small name
7412+ * (as defined by the dentry structure), then we use an inline name to
7413+ * preserve kmalloc space.
7414+ */
7415+static struct kmem_cache *unionfs_filldir_cachep;
7416+
7417+int unionfs_init_filldir_cache(void)
7418+{
7419+ unionfs_filldir_cachep =
7420+ kmem_cache_create("unionfs_filldir",
7421+ sizeof(struct filldir_node), 0,
7422+ SLAB_RECLAIM_ACCOUNT, NULL);
7423+
7424+ return (unionfs_filldir_cachep ? 0 : -ENOMEM);
7425+}
7426+
7427+void unionfs_destroy_filldir_cache(void)
7428+{
7429+ if (unionfs_filldir_cachep)
7430+ kmem_cache_destroy(unionfs_filldir_cachep);
7431+}
7432+
7433+/*
7434+ * This is a tuning parameter that tells us roughly how big to make the
7435+ * hash table in directory entries per page. This isn't perfect, but
7436+ * at least we get a hash table size that shouldn't be too overloaded.
7437+ * The following averages are based on my home directory.
7438+ * 14.44693 Overall
7439+ * 12.29 Single Page Directories
7440+ * 117.93 Multi-page directories
7441+ */
7442+#define DENTPAGE 4096
7443+#define DENTPERONEPAGE 12
7444+#define DENTPERPAGE 118
7445+#define MINHASHSIZE 1
7446+static int guesstimate_hash_size(struct inode *inode)
7447+{
7448+ struct inode *lower_inode;
7449+ int bindex;
7450+ int hashsize = MINHASHSIZE;
7451+
7452+ if (UNIONFS_I(inode)->hashsize > 0)
7453+ return UNIONFS_I(inode)->hashsize;
7454+
7455+ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
7456+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
7457+ if (!lower_inode)
7458+ continue;
7459+
7460+ if (i_size_read(lower_inode) == DENTPAGE)
7461+ hashsize += DENTPERONEPAGE;
7462+ else
7463+ hashsize += (i_size_read(lower_inode) / DENTPAGE) *
7464+ DENTPERPAGE;
7465+ }
7466+
7467+ return hashsize;
7468+}
7469+
7470+int init_rdstate(struct file *file)
7471+{
7472+ BUG_ON(sizeof(loff_t) !=
7473+ (sizeof(unsigned int) + sizeof(unsigned int)));
7474+ BUG_ON(UNIONFS_F(file)->rdstate != NULL);
7475+
7476+ UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_path.dentry->d_inode,
7477+ fbstart(file));
7478+
7479+ return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM);
7480+}
7481+
7482+struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos)
7483+{
7484+ struct unionfs_dir_state *rdstate = NULL;
7485+ struct list_head *pos;
7486+
7487+ spin_lock(&UNIONFS_I(inode)->rdlock);
7488+ list_for_each(pos, &UNIONFS_I(inode)->readdircache) {
7489+ struct unionfs_dir_state *r =
7490+ list_entry(pos, struct unionfs_dir_state, cache);
7491+ if (fpos == rdstate2offset(r)) {
7492+ UNIONFS_I(inode)->rdcount--;
7493+ list_del(&r->cache);
7494+ rdstate = r;
7495+ break;
7496+ }
7497+ }
7498+ spin_unlock(&UNIONFS_I(inode)->rdlock);
7499+ return rdstate;
7500+}
7501+
7502+struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex)
7503+{
7504+ int i = 0;
7505+ int hashsize;
7506+ unsigned long mallocsize = sizeof(struct unionfs_dir_state);
7507+ struct unionfs_dir_state *rdstate;
7508+
7509+ hashsize = guesstimate_hash_size(inode);
7510+ mallocsize += hashsize * sizeof(struct list_head);
7511+ mallocsize = __roundup_pow_of_two(mallocsize);
7512+
7513+ /* This should give us about 500 entries anyway. */
7514+ if (mallocsize > PAGE_SIZE)
7515+ mallocsize = PAGE_SIZE;
7516+
7517+ hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) /
7518+ sizeof(struct list_head);
7519+
7520+ rdstate = kmalloc(mallocsize, GFP_KERNEL);
7521+ if (unlikely(!rdstate))
7522+ return NULL;
7523+
7524+ spin_lock(&UNIONFS_I(inode)->rdlock);
7525+ if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1))
7526+ UNIONFS_I(inode)->cookie = 1;
7527+ else
7528+ UNIONFS_I(inode)->cookie++;
7529+
7530+ rdstate->cookie = UNIONFS_I(inode)->cookie;
7531+ spin_unlock(&UNIONFS_I(inode)->rdlock);
7532+ rdstate->offset = 1;
7533+ rdstate->access = jiffies;
7534+ rdstate->bindex = bindex;
7535+ rdstate->dirpos = 0;
7536+ rdstate->hashentries = 0;
7537+ rdstate->size = hashsize;
7538+ for (i = 0; i < rdstate->size; i++)
7539+ INIT_LIST_HEAD(&rdstate->list[i]);
7540+
7541+ return rdstate;
7542+}
7543+
7544+static void free_filldir_node(struct filldir_node *node)
7545+{
82260373 7546+ if (node->namelen >= DNAME_INLINE_LEN)
2380c486
JR
7547+ kfree(node->name);
7548+ kmem_cache_free(unionfs_filldir_cachep, node);
7549+}
7550+
7551+void free_rdstate(struct unionfs_dir_state *state)
7552+{
7553+ struct filldir_node *tmp;
7554+ int i;
7555+
7556+ for (i = 0; i < state->size; i++) {
7557+ struct list_head *head = &(state->list[i]);
7558+ struct list_head *pos, *n;
7559+
7560+ /* traverse the list and deallocate space */
7561+ list_for_each_safe(pos, n, head) {
7562+ tmp = list_entry(pos, struct filldir_node, file_list);
7563+ list_del(&tmp->file_list);
7564+ free_filldir_node(tmp);
7565+ }
7566+ }
7567+
7568+ kfree(state);
7569+}
7570+
7571+struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
7572+ const char *name, int namelen,
7573+ int is_whiteout)
7574+{
7575+ int index;
7576+ unsigned int hash;
7577+ struct list_head *head;
7578+ struct list_head *pos;
7579+ struct filldir_node *cursor = NULL;
7580+ int found = 0;
7581+
7582+ BUG_ON(namelen <= 0);
7583+
7584+ hash = full_name_hash(name, namelen);
7585+ index = hash % rdstate->size;
7586+
7587+ head = &(rdstate->list[index]);
7588+ list_for_each(pos, head) {
7589+ cursor = list_entry(pos, struct filldir_node, file_list);
7590+
7591+ if (cursor->namelen == namelen && cursor->hash == hash &&
7592+ !strncmp(cursor->name, name, namelen)) {
7593+ /*
7594+ * a duplicate exists, and hence no need to create
7595+ * entry to the list
7596+ */
7597+ found = 1;
7598+
7599+ /*
7600+ * if a duplicate is found in this branch, and is
7601+ * not due to the caller looking for an entry to
7602+ * whiteout, then the file system may be corrupted.
7603+ */
7604+ if (unlikely(!is_whiteout &&
7605+ cursor->bindex == rdstate->bindex))
7606+ printk(KERN_ERR "unionfs: filldir: possible "
7607+ "I/O error: a file is duplicated "
7608+ "in the same branch %d: %s\n",
7609+ rdstate->bindex, cursor->name);
7610+ break;
7611+ }
7612+ }
7613+
7614+ if (!found)
7615+ cursor = NULL;
7616+
7617+ return cursor;
7618+}
7619+
7620+int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
7621+ int namelen, int bindex, int whiteout)
7622+{
7623+ struct filldir_node *new;
7624+ unsigned int hash;
7625+ int index;
7626+ int err = 0;
7627+ struct list_head *head;
7628+
7629+ BUG_ON(namelen <= 0);
7630+
7631+ hash = full_name_hash(name, namelen);
7632+ index = hash % rdstate->size;
7633+ head = &(rdstate->list[index]);
7634+
7635+ new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL);
7636+ if (unlikely(!new)) {
7637+ err = -ENOMEM;
7638+ goto out;
7639+ }
7640+
7641+ INIT_LIST_HEAD(&new->file_list);
7642+ new->namelen = namelen;
7643+ new->hash = hash;
7644+ new->bindex = bindex;
7645+ new->whiteout = whiteout;
7646+
82260373 7647+ if (namelen < DNAME_INLINE_LEN) {
2380c486
JR
7648+ new->name = new->iname;
7649+ } else {
7650+ new->name = kmalloc(namelen + 1, GFP_KERNEL);
7651+ if (unlikely(!new->name)) {
7652+ kmem_cache_free(unionfs_filldir_cachep, new);
7653+ new = NULL;
7654+ goto out;
7655+ }
7656+ }
7657+
7658+ memcpy(new->name, name, namelen);
7659+ new->name[namelen] = '\0';
7660+
7661+ rdstate->hashentries++;
7662+
7663+ list_add(&(new->file_list), head);
7664+out:
7665+ return err;
7666+}
0c5527e5
AM
7667diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
7668new file mode 100644
63b09289 7669index 0000000..c8ab910
0c5527e5
AM
7670--- /dev/null
7671+++ b/fs/unionfs/rename.c
63b09289 7672@@ -0,0 +1,522 @@
2380c486 7673+/*
63b09289 7674+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
7675+ * Copyright (c) 2003-2006 Charles P. Wright
7676+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7677+ * Copyright (c) 2005-2006 Junjiro Okajima
7678+ * Copyright (c) 2005 Arun M. Krishnakumar
7679+ * Copyright (c) 2004-2006 David P. Quigley
7680+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7681+ * Copyright (c) 2003 Puja Gupta
7682+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
7683+ * Copyright (c) 2003-2011 Stony Brook University
7684+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
7685+ *
7686+ * This program is free software; you can redistribute it and/or modify
7687+ * it under the terms of the GNU General Public License version 2 as
7688+ * published by the Free Software Foundation.
7689+ */
7690+
7691+#include "union.h"
7692+
7693+/*
7694+ * This is a helper function for rename, used when rename ends up with hosed
7695+ * over dentries and we need to revert.
7696+ */
7697+static int unionfs_refresh_lower_dentry(struct dentry *dentry,
7698+ struct dentry *parent, int bindex)
7699+{
7700+ struct dentry *lower_dentry;
7701+ struct dentry *lower_parent;
7702+ int err = 0;
63b09289 7703+ struct nameidata lower_nd;
2380c486
JR
7704+
7705+ verify_locked(dentry);
7706+
7707+ lower_parent = unionfs_lower_dentry_idx(parent, bindex);
7708+
7709+ BUG_ON(!S_ISDIR(lower_parent->d_inode->i_mode));
7710+
63b09289
JR
7711+ err = init_lower_nd(&lower_nd, LOOKUP_OPEN);
7712+ if (unlikely(err < 0))
7713+ goto out;
7714+ lower_dentry = lookup_one_len_nd(dentry->d_name.name, lower_parent,
7715+ dentry->d_name.len, &lower_nd);
7716+ release_lower_nd(&lower_nd, err);
2380c486
JR
7717+ if (IS_ERR(lower_dentry)) {
7718+ err = PTR_ERR(lower_dentry);
7719+ goto out;
7720+ }
7721+
7722+ dput(unionfs_lower_dentry_idx(dentry, bindex));
7723+ iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
7724+ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL);
7725+
7726+ if (!lower_dentry->d_inode) {
7727+ dput(lower_dentry);
7728+ unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
7729+ } else {
7730+ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
7731+ unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
7732+ igrab(lower_dentry->d_inode));
7733+ }
7734+
7735+out:
7736+ return err;
7737+}
7738+
7739+static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7740+ struct dentry *old_parent,
7741+ struct inode *new_dir, struct dentry *new_dentry,
7742+ struct dentry *new_parent,
7743+ int bindex)
7744+{
7745+ int err = 0;
7746+ struct dentry *lower_old_dentry;
7747+ struct dentry *lower_new_dentry;
7748+ struct dentry *lower_old_dir_dentry;
7749+ struct dentry *lower_new_dir_dentry;
7750+ struct dentry *trap;
7751+
7752+ lower_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7753+ lower_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex);
7754+
7755+ if (!lower_new_dentry) {
7756+ lower_new_dentry =
7757+ create_parents(new_parent->d_inode,
7758+ new_dentry, new_dentry->d_name.name,
7759+ bindex);
7760+ if (IS_ERR(lower_new_dentry)) {
7761+ err = PTR_ERR(lower_new_dentry);
7762+ if (IS_COPYUP_ERR(err))
7763+ goto out;
7764+ printk(KERN_ERR "unionfs: error creating directory "
7765+ "tree for rename, bindex=%d err=%d\n",
7766+ bindex, err);
7767+ goto out;
7768+ }
7769+ }
7770+
7771+ /* check for and remove whiteout, if any */
7772+ err = check_unlink_whiteout(new_dentry, lower_new_dentry, bindex);
7773+ if (err > 0) /* ignore if whiteout found and successfully removed */
7774+ err = 0;
7775+ if (err)
7776+ goto out;
7777+
7778+ /* check of old_dentry branch is writable */
7779+ err = is_robranch_super(old_dentry->d_sb, bindex);
7780+ if (err)
7781+ goto out;
7782+
7783+ dget(lower_old_dentry);
7784+ dget(lower_new_dentry);
7785+ lower_old_dir_dentry = dget_parent(lower_old_dentry);
7786+ lower_new_dir_dentry = dget_parent(lower_new_dentry);
7787+
2380c486
JR
7788+ trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
7789+ /* source should not be ancenstor of target */
7790+ if (trap == lower_old_dentry) {
7791+ err = -EINVAL;
7792+ goto out_err_unlock;
7793+ }
7794+ /* target should not be ancenstor of source */
7795+ if (trap == lower_new_dentry) {
7796+ err = -ENOTEMPTY;
7797+ goto out_err_unlock;
7798+ }
7799+ err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
7800+ lower_new_dir_dentry->d_inode, lower_new_dentry);
7801+out_err_unlock:
7802+ if (!err) {
7803+ /* update parent dir times */
7804+ fsstack_copy_attr_times(old_dir, lower_old_dir_dentry->d_inode);
7805+ fsstack_copy_attr_times(new_dir, lower_new_dir_dentry->d_inode);
7806+ }
7807+ unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
2380c486
JR
7808+
7809+ dput(lower_old_dir_dentry);
7810+ dput(lower_new_dir_dentry);
7811+ dput(lower_old_dentry);
7812+ dput(lower_new_dentry);
7813+
7814+out:
7815+ if (!err) {
7816+ /* Fixup the new_dentry. */
7817+ if (bindex < dbstart(new_dentry))
7818+ dbstart(new_dentry) = bindex;
7819+ else if (bindex > dbend(new_dentry))
7820+ dbend(new_dentry) = bindex;
7821+ }
7822+
7823+ return err;
7824+}
7825+
7826+/*
7827+ * Main rename code. This is sufficiently complex, that it's documented in
7828+ * Documentation/filesystems/unionfs/rename.txt. This routine calls
7829+ * __unionfs_rename() above to perform some of the work.
7830+ */
7831+static int do_unionfs_rename(struct inode *old_dir,
7832+ struct dentry *old_dentry,
7833+ struct dentry *old_parent,
7834+ struct inode *new_dir,
7835+ struct dentry *new_dentry,
7836+ struct dentry *new_parent)
7837+{
7838+ int err = 0;
7839+ int bindex;
7840+ int old_bstart, old_bend;
7841+ int new_bstart, new_bend;
7842+ int do_copyup = -1;
7843+ int local_err = 0;
7844+ int eio = 0;
7845+ int revert = 0;
7846+
7847+ old_bstart = dbstart(old_dentry);
7848+ old_bend = dbend(old_dentry);
7849+
7850+ new_bstart = dbstart(new_dentry);
7851+ new_bend = dbend(new_dentry);
7852+
7853+ /* Rename source to destination. */
7854+ err = __unionfs_rename(old_dir, old_dentry, old_parent,
7855+ new_dir, new_dentry, new_parent,
7856+ old_bstart);
7857+ if (err) {
7858+ if (!IS_COPYUP_ERR(err))
7859+ goto out;
7860+ do_copyup = old_bstart - 1;
7861+ } else {
7862+ revert = 1;
7863+ }
7864+
7865+ /*
7866+ * Unlink all instances of destination that exist to the left of
7867+ * bstart of source. On error, revert back, goto out.
7868+ */
7869+ for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) {
7870+ struct dentry *unlink_dentry;
7871+ struct dentry *unlink_dir_dentry;
7872+
7873+ BUG_ON(bindex < 0);
7874+ unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7875+ if (!unlink_dentry)
7876+ continue;
7877+
7878+ unlink_dir_dentry = lock_parent(unlink_dentry);
7879+ err = is_robranch_super(old_dir->i_sb, bindex);
7880+ if (!err)
7881+ err = vfs_unlink(unlink_dir_dentry->d_inode,
7882+ unlink_dentry);
7883+
7884+ fsstack_copy_attr_times(new_parent->d_inode,
7885+ unlink_dir_dentry->d_inode);
7886+ /* propagate number of hard-links */
f4ea99f3
AM
7887+ set_nlink(new_parent->d_inode,
7888+ unionfs_get_nlinks(new_parent->d_inode));
2380c486
JR
7889+
7890+ unlock_dir(unlink_dir_dentry);
7891+ if (!err) {
7892+ if (bindex != new_bstart) {
7893+ dput(unlink_dentry);
7894+ unionfs_set_lower_dentry_idx(new_dentry,
7895+ bindex, NULL);
7896+ }
7897+ } else if (IS_COPYUP_ERR(err)) {
7898+ do_copyup = bindex - 1;
7899+ } else if (revert) {
7900+ goto revert;
7901+ }
7902+ }
7903+
7904+ if (do_copyup != -1) {
7905+ for (bindex = do_copyup; bindex >= 0; bindex--) {
7906+ /*
7907+ * copyup the file into some left directory, so that
7908+ * you can rename it
7909+ */
7910+ err = copyup_dentry(old_parent->d_inode,
7911+ old_dentry, old_bstart, bindex,
7912+ old_dentry->d_name.name,
7913+ old_dentry->d_name.len, NULL,
7914+ i_size_read(old_dentry->d_inode));
7915+ /* if copyup failed, try next branch to the left */
7916+ if (err)
7917+ continue;
7918+ /*
7919+ * create whiteout before calling __unionfs_rename
7920+ * because the latter will change the old_dentry's
7921+ * lower name and parent dir, resulting in the
7922+ * whiteout getting created in the wrong dir.
7923+ */
7924+ err = create_whiteout(old_dentry, bindex);
7925+ if (err) {
7926+ printk(KERN_ERR "unionfs: can't create a "
7927+ "whiteout for %s in rename (err=%d)\n",
7928+ old_dentry->d_name.name, err);
7929+ continue;
7930+ }
7931+ err = __unionfs_rename(old_dir, old_dentry, old_parent,
7932+ new_dir, new_dentry, new_parent,
7933+ bindex);
7934+ break;
7935+ }
7936+ }
7937+
7938+ /* make it opaque */
7939+ if (S_ISDIR(old_dentry->d_inode->i_mode)) {
7940+ err = make_dir_opaque(old_dentry, dbstart(old_dentry));
7941+ if (err)
7942+ goto revert;
7943+ }
7944+
7945+ /*
7946+ * Create whiteout for source, only if:
7947+ * (1) There is more than one underlying instance of source.
7948+ * (We did a copy_up is taken care of above).
7949+ */
7950+ if ((old_bstart != old_bend) && (do_copyup == -1)) {
7951+ err = create_whiteout(old_dentry, old_bstart);
7952+ if (err) {
7953+ /* can't fix anything now, so we exit with -EIO */
7954+ printk(KERN_ERR "unionfs: can't create a whiteout for "
7955+ "%s in rename!\n", old_dentry->d_name.name);
7956+ err = -EIO;
7957+ }
7958+ }
7959+
7960+out:
7961+ return err;
7962+
7963+revert:
7964+ /* Do revert here. */
7965+ local_err = unionfs_refresh_lower_dentry(new_dentry, new_parent,
7966+ old_bstart);
7967+ if (local_err) {
7968+ printk(KERN_ERR "unionfs: revert failed in rename: "
7969+ "the new refresh failed\n");
7970+ eio = -EIO;
7971+ }
7972+
7973+ local_err = unionfs_refresh_lower_dentry(old_dentry, old_parent,
7974+ old_bstart);
7975+ if (local_err) {
7976+ printk(KERN_ERR "unionfs: revert failed in rename: "
7977+ "the old refresh failed\n");
7978+ eio = -EIO;
7979+ goto revert_out;
7980+ }
7981+
7982+ if (!unionfs_lower_dentry_idx(new_dentry, bindex) ||
7983+ !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) {
7984+ printk(KERN_ERR "unionfs: revert failed in rename: "
7985+ "the object disappeared from under us!\n");
7986+ eio = -EIO;
7987+ goto revert_out;
7988+ }
7989+
7990+ if (unionfs_lower_dentry_idx(old_dentry, bindex) &&
7991+ unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) {
7992+ printk(KERN_ERR "unionfs: revert failed in rename: "
7993+ "the object was created underneath us!\n");
7994+ eio = -EIO;
7995+ goto revert_out;
7996+ }
7997+
7998+ local_err = __unionfs_rename(new_dir, new_dentry, new_parent,
7999+ old_dir, old_dentry, old_parent,
8000+ old_bstart);
8001+
8002+ /* If we can't fix it, then we cop-out with -EIO. */
8003+ if (local_err) {
8004+ printk(KERN_ERR "unionfs: revert failed in rename!\n");
8005+ eio = -EIO;
8006+ }
8007+
8008+ local_err = unionfs_refresh_lower_dentry(new_dentry, new_parent,
8009+ bindex);
8010+ if (local_err)
8011+ eio = -EIO;
8012+ local_err = unionfs_refresh_lower_dentry(old_dentry, old_parent,
8013+ bindex);
8014+ if (local_err)
8015+ eio = -EIO;
8016+
8017+revert_out:
8018+ if (eio)
8019+ err = eio;
8020+ return err;
8021+}
8022+
8023+/*
8024+ * We can't copyup a directory, because it may involve huge numbers of
8025+ * children, etc. Doing that in the kernel would be bad, so instead we
8026+ * return EXDEV to the user-space utility that caused this, and let the
8027+ * user-space recurse and ask us to copy up each file separately.
8028+ */
8029+static int may_rename_dir(struct dentry *dentry, struct dentry *parent)
8030+{
8031+ int err, bstart;
8032+
8033+ err = check_empty(dentry, parent, NULL);
8034+ if (err == -ENOTEMPTY) {
8035+ if (is_robranch(dentry))
8036+ return -EXDEV;
8037+ } else if (err) {
8038+ return err;
8039+ }
8040+
8041+ bstart = dbstart(dentry);
8042+ if (dbend(dentry) == bstart || dbopaque(dentry) == bstart)
8043+ return 0;
8044+
8045+ dbstart(dentry) = bstart + 1;
8046+ err = check_empty(dentry, parent, NULL);
8047+ dbstart(dentry) = bstart;
8048+ if (err == -ENOTEMPTY)
8049+ err = -EXDEV;
8050+ return err;
8051+}
8052+
8053+/*
8054+ * The locking rules in unionfs_rename are complex. We could use a simpler
8055+ * superblock-level name-space lock for renames and copy-ups.
8056+ */
8057+int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8058+ struct inode *new_dir, struct dentry *new_dentry)
8059+{
8060+ int err = 0;
8061+ struct dentry *wh_dentry;
8062+ struct dentry *old_parent, *new_parent;
8063+ int valid = true;
8064+
8065+ unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
8066+ old_parent = dget_parent(old_dentry);
8067+ new_parent = dget_parent(new_dentry);
8068+ /* un/lock parent dentries only if they differ from old/new_dentry */
8069+ if (old_parent != old_dentry &&
8070+ old_parent != new_dentry)
8071+ unionfs_lock_dentry(old_parent, UNIONFS_DMUTEX_REVAL_PARENT);
8072+ if (new_parent != old_dentry &&
8073+ new_parent != new_dentry &&
8074+ new_parent != old_parent)
8075+ unionfs_lock_dentry(new_parent, UNIONFS_DMUTEX_REVAL_CHILD);
8076+ unionfs_double_lock_dentry(old_dentry, new_dentry);
8077+
8078+ valid = __unionfs_d_revalidate(old_dentry, old_parent, false);
8079+ if (!valid) {
8080+ err = -ESTALE;
8081+ goto out;
8082+ }
8083+ if (!d_deleted(new_dentry) && new_dentry->d_inode) {
8084+ valid = __unionfs_d_revalidate(new_dentry, new_parent, false);
8085+ if (!valid) {
8086+ err = -ESTALE;
8087+ goto out;
8088+ }
8089+ }
8090+
8091+ if (!S_ISDIR(old_dentry->d_inode->i_mode))
8092+ err = unionfs_partial_lookup(old_dentry, old_parent);
8093+ else
8094+ err = may_rename_dir(old_dentry, old_parent);
8095+
8096+ if (err)
8097+ goto out;
8098+
8099+ err = unionfs_partial_lookup(new_dentry, new_parent);
8100+ if (err)
8101+ goto out;
8102+
8103+ /*
8104+ * if new_dentry is already lower because of whiteout,
8105+ * simply override it even if the whited-out dir is not empty.
8106+ */
8107+ wh_dentry = find_first_whiteout(new_dentry);
8108+ if (!IS_ERR(wh_dentry)) {
8109+ dput(wh_dentry);
8110+ } else if (new_dentry->d_inode) {
8111+ if (S_ISDIR(old_dentry->d_inode->i_mode) !=
8112+ S_ISDIR(new_dentry->d_inode->i_mode)) {
8113+ err = S_ISDIR(old_dentry->d_inode->i_mode) ?
8114+ -ENOTDIR : -EISDIR;
8115+ goto out;
8116+ }
8117+
8118+ if (S_ISDIR(new_dentry->d_inode->i_mode)) {
8119+ struct unionfs_dir_state *namelist = NULL;
8120+ /* check if this unionfs directory is empty or not */
8121+ err = check_empty(new_dentry, new_parent, &namelist);
8122+ if (err)
8123+ goto out;
8124+
8125+ if (!is_robranch(new_dentry))
8126+ err = delete_whiteouts(new_dentry,
8127+ dbstart(new_dentry),
8128+ namelist);
8129+
8130+ free_rdstate(namelist);
8131+
8132+ if (err)
8133+ goto out;
8134+ }
8135+ }
8136+
8137+ err = do_unionfs_rename(old_dir, old_dentry, old_parent,
8138+ new_dir, new_dentry, new_parent);
8139+ if (err)
8140+ goto out;
8141+
8142+ /*
8143+ * force re-lookup since the dir on ro branch is not renamed, and
8144+ * lower dentries still indicate the un-renamed ones.
8145+ */
8146+ if (S_ISDIR(old_dentry->d_inode->i_mode))
8147+ atomic_dec(&UNIONFS_D(old_dentry)->generation);
8148+ else
8149+ unionfs_postcopyup_release(old_dentry);
8150+ if (new_dentry->d_inode && !S_ISDIR(new_dentry->d_inode->i_mode)) {
8151+ unionfs_postcopyup_release(new_dentry);
8152+ unionfs_postcopyup_setmnt(new_dentry);
8153+ if (!unionfs_lower_inode(new_dentry->d_inode)) {
8154+ /*
8155+ * If we get here, it means that no copyup was
8156+ * needed, and that a file by the old name already
8157+ * existing on the destination branch; that file got
8158+ * renamed earlier in this function, so all we need
8159+ * to do here is set the lower inode.
8160+ */
8161+ struct inode *inode;
8162+ inode = unionfs_lower_inode(old_dentry->d_inode);
8163+ igrab(inode);
8164+ unionfs_set_lower_inode_idx(new_dentry->d_inode,
8165+ dbstart(new_dentry),
8166+ inode);
8167+ }
8168+ }
8169+ /* if all of this renaming succeeded, update our times */
8170+ unionfs_copy_attr_times(old_dentry->d_inode);
8171+ unionfs_copy_attr_times(new_dentry->d_inode);
8172+ unionfs_check_inode(old_dir);
8173+ unionfs_check_inode(new_dir);
8174+ unionfs_check_dentry(old_dentry);
8175+ unionfs_check_dentry(new_dentry);
8176+
8177+out:
8178+ if (err) /* clear the new_dentry stuff created */
8179+ d_drop(new_dentry);
8180+
8181+ unionfs_double_unlock_dentry(old_dentry, new_dentry);
8182+ if (new_parent != old_dentry &&
8183+ new_parent != new_dentry &&
8184+ new_parent != old_parent)
8185+ unionfs_unlock_dentry(new_parent);
8186+ if (old_parent != old_dentry &&
8187+ old_parent != new_dentry)
8188+ unionfs_unlock_dentry(old_parent);
8189+ dput(new_parent);
8190+ dput(old_parent);
8191+ unionfs_read_unlock(old_dentry->d_sb);
8192+
8193+ return err;
8194+}
0c5527e5
AM
8195diff --git a/fs/unionfs/sioq.c b/fs/unionfs/sioq.c
8196new file mode 100644
63b09289 8197index 0000000..b923742
0c5527e5
AM
8198--- /dev/null
8199+++ b/fs/unionfs/sioq.c
2380c486
JR
8200@@ -0,0 +1,101 @@
8201+/*
63b09289 8202+ * Copyright (c) 2006-2011 Erez Zadok
2380c486
JR
8203+ * Copyright (c) 2006 Charles P. Wright
8204+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8205+ * Copyright (c) 2006 Junjiro Okajima
8206+ * Copyright (c) 2006 David P. Quigley
63b09289
JR
8207+ * Copyright (c) 2006-2011 Stony Brook University
8208+ * Copyright (c) 2006-2011 The Research Foundation of SUNY
2380c486
JR
8209+ *
8210+ * This program is free software; you can redistribute it and/or modify
8211+ * it under the terms of the GNU General Public License version 2 as
8212+ * published by the Free Software Foundation.
8213+ */
8214+
8215+#include "union.h"
8216+
8217+/*
8218+ * Super-user IO work Queue - sometimes we need to perform actions which
8219+ * would fail due to the unix permissions on the parent directory (e.g.,
8220+ * rmdir a directory which appears empty, but in reality contains
8221+ * whiteouts).
8222+ */
8223+
8224+static struct workqueue_struct *superio_workqueue;
8225+
8226+int __init init_sioq(void)
8227+{
8228+ int err;
8229+
8230+ superio_workqueue = create_workqueue("unionfs_siod");
8231+ if (!IS_ERR(superio_workqueue))
8232+ return 0;
8233+
8234+ err = PTR_ERR(superio_workqueue);
8235+ printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err);
8236+ superio_workqueue = NULL;
8237+ return err;
8238+}
8239+
8240+void stop_sioq(void)
8241+{
8242+ if (superio_workqueue)
8243+ destroy_workqueue(superio_workqueue);
8244+}
8245+
8246+void run_sioq(work_func_t func, struct sioq_args *args)
8247+{
8248+ INIT_WORK(&args->work, func);
8249+
8250+ init_completion(&args->comp);
8251+ while (!queue_work(superio_workqueue, &args->work)) {
8252+ /* TODO: do accounting if needed */
8253+ schedule();
8254+ }
8255+ wait_for_completion(&args->comp);
8256+}
8257+
8258+void __unionfs_create(struct work_struct *work)
8259+{
8260+ struct sioq_args *args = container_of(work, struct sioq_args, work);
8261+ struct create_args *c = &args->create;
8262+
8263+ args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd);
8264+ complete(&args->comp);
8265+}
8266+
8267+void __unionfs_mkdir(struct work_struct *work)
8268+{
8269+ struct sioq_args *args = container_of(work, struct sioq_args, work);
8270+ struct mkdir_args *m = &args->mkdir;
8271+
8272+ args->err = vfs_mkdir(m->parent, m->dentry, m->mode);
8273+ complete(&args->comp);
8274+}
8275+
8276+void __unionfs_mknod(struct work_struct *work)
8277+{
8278+ struct sioq_args *args = container_of(work, struct sioq_args, work);
8279+ struct mknod_args *m = &args->mknod;
8280+
8281+ args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev);
8282+ complete(&args->comp);
8283+}
8284+
8285+void __unionfs_symlink(struct work_struct *work)
8286+{
8287+ struct sioq_args *args = container_of(work, struct sioq_args, work);
8288+ struct symlink_args *s = &args->symlink;
8289+
8290+ args->err = vfs_symlink(s->parent, s->dentry, s->symbuf);
8291+ complete(&args->comp);
8292+}
8293+
8294+void __unionfs_unlink(struct work_struct *work)
8295+{
8296+ struct sioq_args *args = container_of(work, struct sioq_args, work);
8297+ struct unlink_args *u = &args->unlink;
8298+
8299+ args->err = vfs_unlink(u->parent, u->dentry);
8300+ complete(&args->comp);
8301+}
0c5527e5
AM
8302diff --git a/fs/unionfs/sioq.h b/fs/unionfs/sioq.h
8303new file mode 100644
63b09289 8304index 0000000..c2dfb94
0c5527e5
AM
8305--- /dev/null
8306+++ b/fs/unionfs/sioq.h
2380c486
JR
8307@@ -0,0 +1,91 @@
8308+/*
63b09289 8309+ * Copyright (c) 2006-2011 Erez Zadok
2380c486
JR
8310+ * Copyright (c) 2006 Charles P. Wright
8311+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8312+ * Copyright (c) 2006 Junjiro Okajima
8313+ * Copyright (c) 2006 David P. Quigley
63b09289
JR
8314+ * Copyright (c) 2006-2011 Stony Brook University
8315+ * Copyright (c) 2006-2011 The Research Foundation of SUNY
2380c486
JR
8316+ *
8317+ * This program is free software; you can redistribute it and/or modify
8318+ * it under the terms of the GNU General Public License version 2 as
8319+ * published by the Free Software Foundation.
8320+ */
8321+
8322+#ifndef _SIOQ_H
8323+#define _SIOQ_H
8324+
8325+struct deletewh_args {
8326+ struct unionfs_dir_state *namelist;
8327+ struct dentry *dentry;
8328+ int bindex;
8329+};
8330+
8331+struct is_opaque_args {
8332+ struct dentry *dentry;
8333+};
8334+
8335+struct create_args {
8336+ struct inode *parent;
8337+ struct dentry *dentry;
8338+ umode_t mode;
8339+ struct nameidata *nd;
8340+};
8341+
8342+struct mkdir_args {
8343+ struct inode *parent;
8344+ struct dentry *dentry;
8345+ umode_t mode;
8346+};
8347+
8348+struct mknod_args {
8349+ struct inode *parent;
8350+ struct dentry *dentry;
8351+ umode_t mode;
8352+ dev_t dev;
8353+};
8354+
8355+struct symlink_args {
8356+ struct inode *parent;
8357+ struct dentry *dentry;
8358+ char *symbuf;
8359+};
8360+
8361+struct unlink_args {
8362+ struct inode *parent;
8363+ struct dentry *dentry;
8364+};
8365+
8366+
8367+struct sioq_args {
8368+ struct completion comp;
8369+ struct work_struct work;
8370+ int err;
8371+ void *ret;
8372+
8373+ union {
8374+ struct deletewh_args deletewh;
8375+ struct is_opaque_args is_opaque;
8376+ struct create_args create;
8377+ struct mkdir_args mkdir;
8378+ struct mknod_args mknod;
8379+ struct symlink_args symlink;
8380+ struct unlink_args unlink;
8381+ };
8382+};
8383+
8384+/* Extern definitions for SIOQ functions */
8385+extern int __init init_sioq(void);
8386+extern void stop_sioq(void);
8387+extern void run_sioq(work_func_t func, struct sioq_args *args);
8388+
8389+/* Extern definitions for our privilege escalation helpers */
8390+extern void __unionfs_create(struct work_struct *work);
8391+extern void __unionfs_mkdir(struct work_struct *work);
8392+extern void __unionfs_mknod(struct work_struct *work);
8393+extern void __unionfs_symlink(struct work_struct *work);
8394+extern void __unionfs_unlink(struct work_struct *work);
8395+extern void __delete_whiteouts(struct work_struct *work);
8396+extern void __is_opaque_dir(struct work_struct *work);
8397+
8398+#endif /* not _SIOQ_H */
0c5527e5
AM
8399diff --git a/fs/unionfs/subr.c b/fs/unionfs/subr.c
8400new file mode 100644
63b09289 8401index 0000000..bdca2f7
0c5527e5
AM
8402--- /dev/null
8403+++ b/fs/unionfs/subr.c
2380c486
JR
8404@@ -0,0 +1,95 @@
8405+/*
63b09289 8406+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
8407+ * Copyright (c) 2003-2006 Charles P. Wright
8408+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8409+ * Copyright (c) 2005-2006 Junjiro Okajima
8410+ * Copyright (c) 2005 Arun M. Krishnakumar
8411+ * Copyright (c) 2004-2006 David P. Quigley
8412+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8413+ * Copyright (c) 2003 Puja Gupta
8414+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
8415+ * Copyright (c) 2003-2011 Stony Brook University
8416+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
8417+ *
8418+ * This program is free software; you can redistribute it and/or modify
8419+ * it under the terms of the GNU General Public License version 2 as
8420+ * published by the Free Software Foundation.
8421+ */
8422+
8423+#include "union.h"
8424+
8425+/*
8426+ * returns the right n_link value based on the inode type
8427+ */
8428+int unionfs_get_nlinks(const struct inode *inode)
8429+{
8430+ /* don't bother to do all the work since we're unlinked */
8431+ if (inode->i_nlink == 0)
8432+ return 0;
8433+
8434+ if (!S_ISDIR(inode->i_mode))
8435+ return unionfs_lower_inode(inode)->i_nlink;
8436+
8437+ /*
8438+ * For directories, we return 1. The only place that could cares
8439+ * about links is readdir, and there's d_type there so even that
8440+ * doesn't matter.
8441+ */
8442+ return 1;
8443+}
8444+
8445+/* copy a/m/ctime from the lower branch with the newest times */
8446+void unionfs_copy_attr_times(struct inode *upper)
8447+{
8448+ int bindex;
8449+ struct inode *lower;
8450+
8451+ if (!upper)
8452+ return;
8453+ if (ibstart(upper) < 0) {
8454+#ifdef CONFIG_UNION_FS_DEBUG
8455+ WARN_ON(ibstart(upper) < 0);
8456+#endif /* CONFIG_UNION_FS_DEBUG */
8457+ return;
8458+ }
8459+ for (bindex = ibstart(upper); bindex <= ibend(upper); bindex++) {
8460+ lower = unionfs_lower_inode_idx(upper, bindex);
8461+ if (!lower)
8462+ continue; /* not all lower dir objects may exist */
8463+ if (unlikely(timespec_compare(&upper->i_mtime,
8464+ &lower->i_mtime) < 0))
8465+ upper->i_mtime = lower->i_mtime;
8466+ if (unlikely(timespec_compare(&upper->i_ctime,
8467+ &lower->i_ctime) < 0))
8468+ upper->i_ctime = lower->i_ctime;
8469+ if (unlikely(timespec_compare(&upper->i_atime,
8470+ &lower->i_atime) < 0))
8471+ upper->i_atime = lower->i_atime;
8472+ }
8473+}
8474+
8475+/*
8476+ * A unionfs/fanout version of fsstack_copy_attr_all. Uses a
8477+ * unionfs_get_nlinks to properly calcluate the number of links to a file.
8478+ * Also, copies the max() of all a/m/ctimes for all lower inodes (which is
8479+ * important if the lower inode is a directory type)
8480+ */
8481+void unionfs_copy_attr_all(struct inode *dest,
8482+ const struct inode *src)
8483+{
8484+ dest->i_mode = src->i_mode;
8485+ dest->i_uid = src->i_uid;
8486+ dest->i_gid = src->i_gid;
8487+ dest->i_rdev = src->i_rdev;
8488+
8489+ unionfs_copy_attr_times(dest);
8490+
8491+ dest->i_blkbits = src->i_blkbits;
8492+ dest->i_flags = src->i_flags;
8493+
8494+ /*
8495+ * Update the nlinks AFTER updating the above fields, because the
8496+ * get_links callback may depend on them.
8497+ */
f4ea99f3 8498+ set_nlink(dest, unionfs_get_nlinks(dest));
2380c486 8499+}
0c5527e5
AM
8500diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
8501new file mode 100644
63b09289 8502index 0000000..c3ac814
0c5527e5
AM
8503--- /dev/null
8504+++ b/fs/unionfs/super.c
63b09289 8505@@ -0,0 +1,1030 @@
2380c486 8506+/*
63b09289 8507+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
8508+ * Copyright (c) 2003-2006 Charles P. Wright
8509+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8510+ * Copyright (c) 2005-2006 Junjiro Okajima
8511+ * Copyright (c) 2005 Arun M. Krishnakumar
8512+ * Copyright (c) 2004-2006 David P. Quigley
8513+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8514+ * Copyright (c) 2003 Puja Gupta
8515+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
8516+ * Copyright (c) 2003-2011 Stony Brook University
8517+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
8518+ *
8519+ * This program is free software; you can redistribute it and/or modify
8520+ * it under the terms of the GNU General Public License version 2 as
8521+ * published by the Free Software Foundation.
8522+ */
8523+
8524+#include "union.h"
8525+
8526+/*
8527+ * The inode cache is used with alloc_inode for both our inode info and the
8528+ * vfs inode.
8529+ */
8530+static struct kmem_cache *unionfs_inode_cachep;
8531+
8532+struct inode *unionfs_iget(struct super_block *sb, unsigned long ino)
8533+{
8534+ int size;
8535+ struct unionfs_inode_info *info;
8536+ struct inode *inode;
8537+
8538+ inode = iget_locked(sb, ino);
8539+ if (!inode)
8540+ return ERR_PTR(-ENOMEM);
8541+ if (!(inode->i_state & I_NEW))
8542+ return inode;
8543+
8544+ info = UNIONFS_I(inode);
8545+ memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode));
8546+ info->bstart = -1;
8547+ info->bend = -1;
8548+ atomic_set(&info->generation,
8549+ atomic_read(&UNIONFS_SB(inode->i_sb)->generation));
8550+ spin_lock_init(&info->rdlock);
8551+ info->rdcount = 1;
8552+ info->hashsize = -1;
8553+ INIT_LIST_HEAD(&info->readdircache);
8554+
8555+ size = sbmax(inode->i_sb) * sizeof(struct inode *);
8556+ info->lower_inodes = kzalloc(size, GFP_KERNEL);
8557+ if (unlikely(!info->lower_inodes)) {
8558+ printk(KERN_CRIT "unionfs: no kernel memory when allocating "
8559+ "lower-pointer array!\n");
8560+ iget_failed(inode);
8561+ return ERR_PTR(-ENOMEM);
8562+ }
8563+
8564+ inode->i_version++;
8565+ inode->i_op = &unionfs_main_iops;
8566+ inode->i_fop = &unionfs_main_fops;
8567+
8568+ inode->i_mapping->a_ops = &unionfs_aops;
8569+
8570+ /*
8571+ * reset times so unionfs_copy_attr_all can keep out time invariants
8572+ * right (upper inode time being the max of all lower ones).
8573+ */
8574+ inode->i_atime.tv_sec = inode->i_atime.tv_nsec = 0;
8575+ inode->i_mtime.tv_sec = inode->i_mtime.tv_nsec = 0;
8576+ inode->i_ctime.tv_sec = inode->i_ctime.tv_nsec = 0;
8577+ unlock_new_inode(inode);
8578+ return inode;
8579+}
8580+
8581+/*
2380c486
JR
8582+ * final actions when unmounting a file system
8583+ *
8584+ * No need to lock rwsem.
8585+ */
8586+static void unionfs_put_super(struct super_block *sb)
8587+{
8588+ int bindex, bstart, bend;
8589+ struct unionfs_sb_info *spd;
8590+ int leaks = 0;
8591+
8592+ spd = UNIONFS_SB(sb);
8593+ if (!spd)
8594+ return;
8595+
8596+ bstart = sbstart(sb);
8597+ bend = sbend(sb);
8598+
8599+ /* Make sure we have no leaks of branchget/branchput. */
8600+ for (bindex = bstart; bindex <= bend; bindex++)
8601+ if (unlikely(branch_count(sb, bindex) != 0)) {
8602+ printk(KERN_CRIT
8603+ "unionfs: branch %d has %d references left!\n",
8604+ bindex, branch_count(sb, bindex));
8605+ leaks = 1;
8606+ }
8607+ WARN_ON(leaks != 0);
8608+
8609+ /* decrement lower super references */
8610+ for (bindex = bstart; bindex <= bend; bindex++) {
8611+ struct super_block *s;
8612+ s = unionfs_lower_super_idx(sb, bindex);
8613+ unionfs_set_lower_super_idx(sb, bindex, NULL);
8614+ atomic_dec(&s->s_active);
8615+ }
8616+
8617+ kfree(spd->dev_name);
8618+ kfree(spd->data);
8619+ kfree(spd);
8620+ sb->s_fs_info = NULL;
8621+}
8622+
8623+/*
8624+ * Since people use this to answer the "How big of a file can I write?"
8625+ * question, we report the size of the highest priority branch as the size of
8626+ * the union.
8627+ */
8628+static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf)
8629+{
8630+ int err = 0;
8631+ struct super_block *sb;
8632+ struct dentry *lower_dentry;
8633+ struct dentry *parent;
0c5527e5 8634+ struct path lower_path;
2380c486
JR
8635+ bool valid;
8636+
8637+ sb = dentry->d_sb;
8638+
8639+ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
8640+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
8641+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
8642+
8643+ valid = __unionfs_d_revalidate(dentry, parent, false);
8644+ if (unlikely(!valid)) {
8645+ err = -ESTALE;
8646+ goto out;
8647+ }
8648+ unionfs_check_dentry(dentry);
8649+
8650+ lower_dentry = unionfs_lower_dentry(sb->s_root);
0c5527e5
AM
8651+ lower_path.dentry = lower_dentry;
8652+ lower_path.mnt = unionfs_mntget(sb->s_root, 0);
8653+ err = vfs_statfs(&lower_path, buf);
8654+ mntput(lower_path.mnt);
2380c486
JR
8655+
8656+ /* set return buf to our f/s to avoid confusing user-level utils */
8657+ buf->f_type = UNIONFS_SUPER_MAGIC;
8658+ /*
8659+ * Our maximum file name can is shorter by a few bytes because every
8660+ * file name could potentially be whited-out.
8661+ *
8662+ * XXX: this restriction goes away with ODF.
8663+ */
8664+ unionfs_set_max_namelen(&buf->f_namelen);
8665+
8666+ /*
8667+ * reset two fields to avoid confusing user-land.
8668+ * XXX: is this still necessary?
8669+ */
8670+ memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t));
8671+ memset(&buf->f_spare, 0, sizeof(buf->f_spare));
8672+
8673+out:
8674+ unionfs_check_dentry(dentry);
8675+ unionfs_unlock_dentry(dentry);
8676+ unionfs_unlock_parent(dentry, parent);
8677+ unionfs_read_unlock(sb);
8678+ return err;
8679+}
8680+
8681+/* handle mode changing during remount */
8682+static noinline_for_stack int do_remount_mode_option(
8683+ char *optarg,
8684+ int cur_branches,
8685+ struct unionfs_data *new_data,
8686+ struct path *new_lower_paths)
8687+{
8688+ int err = -EINVAL;
8689+ int perms, idx;
8690+ char *modename = strchr(optarg, '=');
63b09289 8691+ struct path path;
2380c486
JR
8692+
8693+ /* by now, optarg contains the branch name */
8694+ if (!*optarg) {
8695+ printk(KERN_ERR
8696+ "unionfs: no branch specified for mode change\n");
8697+ goto out;
8698+ }
8699+ if (!modename) {
8700+ printk(KERN_ERR "unionfs: branch \"%s\" requires a mode\n",
8701+ optarg);
8702+ goto out;
8703+ }
8704+ *modename++ = '\0';
8705+ err = parse_branch_mode(modename, &perms);
8706+ if (err) {
8707+ printk(KERN_ERR "unionfs: invalid mode \"%s\" for \"%s\"\n",
8708+ modename, optarg);
8709+ goto out;
8710+ }
8711+
8712+ /*
8713+ * Find matching branch index. For now, this assumes that nothing
8714+ * has been mounted on top of this Unionfs stack. Once we have /odf
8715+ * and cache-coherency resolved, we'll address the branch-path
8716+ * uniqueness.
8717+ */
63b09289 8718+ err = kern_path(optarg, LOOKUP_FOLLOW, &path);
2380c486
JR
8719+ if (err) {
8720+ printk(KERN_ERR "unionfs: error accessing "
8721+ "lower directory \"%s\" (error %d)\n",
8722+ optarg, err);
8723+ goto out;
8724+ }
8725+ for (idx = 0; idx < cur_branches; idx++)
63b09289
JR
8726+ if (path.mnt == new_lower_paths[idx].mnt &&
8727+ path.dentry == new_lower_paths[idx].dentry)
2380c486 8728+ break;
63b09289 8729+ path_put(&path); /* no longer needed */
2380c486
JR
8730+ if (idx == cur_branches) {
8731+ err = -ENOENT; /* err may have been reset above */
8732+ printk(KERN_ERR "unionfs: branch \"%s\" "
8733+ "not found\n", optarg);
8734+ goto out;
8735+ }
8736+ /* check/change mode for existing branch */
8737+ /* we don't warn if perms==branchperms */
8738+ new_data[idx].branchperms = perms;
8739+ err = 0;
8740+out:
8741+ return err;
8742+}
8743+
8744+/* handle branch deletion during remount */
8745+static noinline_for_stack int do_remount_del_option(
8746+ char *optarg, int cur_branches,
8747+ struct unionfs_data *new_data,
8748+ struct path *new_lower_paths)
8749+{
8750+ int err = -EINVAL;
8751+ int idx;
63b09289 8752+ struct path path;
2380c486
JR
8753+
8754+ /* optarg contains the branch name to delete */
8755+
8756+ /*
8757+ * Find matching branch index. For now, this assumes that nothing
8758+ * has been mounted on top of this Unionfs stack. Once we have /odf
8759+ * and cache-coherency resolved, we'll address the branch-path
8760+ * uniqueness.
8761+ */
63b09289 8762+ err = kern_path(optarg, LOOKUP_FOLLOW, &path);
2380c486
JR
8763+ if (err) {
8764+ printk(KERN_ERR "unionfs: error accessing "
8765+ "lower directory \"%s\" (error %d)\n",
8766+ optarg, err);
8767+ goto out;
8768+ }
8769+ for (idx = 0; idx < cur_branches; idx++)
63b09289
JR
8770+ if (path.mnt == new_lower_paths[idx].mnt &&
8771+ path.dentry == new_lower_paths[idx].dentry)
2380c486 8772+ break;
63b09289 8773+ path_put(&path); /* no longer needed */
2380c486
JR
8774+ if (idx == cur_branches) {
8775+ printk(KERN_ERR "unionfs: branch \"%s\" "
8776+ "not found\n", optarg);
8777+ err = -ENOENT;
8778+ goto out;
8779+ }
8780+ /* check if there are any open files on the branch to be deleted */
8781+ if (atomic_read(&new_data[idx].open_files) > 0) {
8782+ err = -EBUSY;
8783+ goto out;
8784+ }
8785+
8786+ /*
8787+ * Now we have to delete the branch. First, release any handles it
8788+ * has. Then, move the remaining array indexes past "idx" in
8789+ * new_data and new_lower_paths one to the left. Finally, adjust
8790+ * cur_branches.
8791+ */
8792+ path_put(&new_lower_paths[idx]);
8793+
8794+ if (idx < cur_branches - 1) {
8795+ /* if idx==cur_branches-1, we delete last branch: easy */
8796+ memmove(&new_data[idx], &new_data[idx+1],
8797+ (cur_branches - 1 - idx) *
8798+ sizeof(struct unionfs_data));
8799+ memmove(&new_lower_paths[idx], &new_lower_paths[idx+1],
8800+ (cur_branches - 1 - idx) * sizeof(struct path));
8801+ }
8802+
8803+ err = 0;
8804+out:
8805+ return err;
8806+}
8807+
8808+/* handle branch insertion during remount */
8809+static noinline_for_stack int do_remount_add_option(
8810+ char *optarg, int cur_branches,
8811+ struct unionfs_data *new_data,
8812+ struct path *new_lower_paths,
8813+ int *high_branch_id)
8814+{
8815+ int err = -EINVAL;
8816+ int perms;
8817+ int idx = 0; /* default: insert at beginning */
8818+ char *new_branch , *modename = NULL;
63b09289 8819+ struct path path;
2380c486
JR
8820+
8821+ /*
8822+ * optarg can be of several forms:
8823+ *
8824+ * /bar:/foo insert /foo before /bar
8825+ * /bar:/foo=ro insert /foo in ro mode before /bar
8826+ * /foo insert /foo in the beginning (prepend)
8827+ * :/foo insert /foo at the end (append)
8828+ */
8829+ if (*optarg == ':') { /* append? */
8830+ new_branch = optarg + 1; /* skip ':' */
8831+ idx = cur_branches;
8832+ goto found_insertion_point;
8833+ }
8834+ new_branch = strchr(optarg, ':');
8835+ if (!new_branch) { /* prepend? */
8836+ new_branch = optarg;
8837+ goto found_insertion_point;
8838+ }
8839+ *new_branch++ = '\0'; /* holds path+mode of new branch */
8840+
8841+ /*
8842+ * Find matching branch index. For now, this assumes that nothing
8843+ * has been mounted on top of this Unionfs stack. Once we have /odf
8844+ * and cache-coherency resolved, we'll address the branch-path
8845+ * uniqueness.
8846+ */
63b09289 8847+ err = kern_path(optarg, LOOKUP_FOLLOW, &path);
2380c486
JR
8848+ if (err) {
8849+ printk(KERN_ERR "unionfs: error accessing "
8850+ "lower directory \"%s\" (error %d)\n",
8851+ optarg, err);
8852+ goto out;
8853+ }
8854+ for (idx = 0; idx < cur_branches; idx++)
63b09289
JR
8855+ if (path.mnt == new_lower_paths[idx].mnt &&
8856+ path.dentry == new_lower_paths[idx].dentry)
2380c486 8857+ break;
63b09289 8858+ path_put(&path); /* no longer needed */
2380c486
JR
8859+ if (idx == cur_branches) {
8860+ printk(KERN_ERR "unionfs: branch \"%s\" "
8861+ "not found\n", optarg);
8862+ err = -ENOENT;
8863+ goto out;
8864+ }
8865+
8866+ /*
8867+ * At this point idx will hold the index where the new branch should
8868+ * be inserted before.
8869+ */
8870+found_insertion_point:
8871+ /* find the mode for the new branch */
8872+ if (new_branch)
8873+ modename = strchr(new_branch, '=');
8874+ if (modename)
8875+ *modename++ = '\0';
8876+ if (!new_branch || !*new_branch) {
8877+ printk(KERN_ERR "unionfs: null new branch\n");
8878+ err = -EINVAL;
8879+ goto out;
8880+ }
8881+ err = parse_branch_mode(modename, &perms);
8882+ if (err) {
8883+ printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
8884+ "branch \"%s\"\n", modename, new_branch);
8885+ goto out;
8886+ }
63b09289 8887+ err = kern_path(new_branch, LOOKUP_FOLLOW, &path);
2380c486
JR
8888+ if (err) {
8889+ printk(KERN_ERR "unionfs: error accessing "
8890+ "lower directory \"%s\" (error %d)\n",
8891+ new_branch, err);
8892+ goto out;
8893+ }
8894+ /*
8895+ * It's probably safe to check_mode the new branch to insert. Note:
8896+ * we don't allow inserting branches which are unionfs's by
8897+ * themselves (check_branch returns EINVAL in that case). This is
8898+ * because this code base doesn't support stacking unionfs: the ODF
8899+ * code base supports that correctly.
8900+ */
63b09289 8901+ err = check_branch(&path);
2380c486
JR
8902+ if (err) {
8903+ printk(KERN_ERR "unionfs: lower directory "
8904+ "\"%s\" is not a valid branch\n", optarg);
63b09289 8905+ path_put(&path);
2380c486
JR
8906+ goto out;
8907+ }
8908+
8909+ /*
8910+ * Now we have to insert the new branch. But first, move the bits
8911+ * to make space for the new branch, if needed. Finally, adjust
8912+ * cur_branches.
8913+ * We don't release nd here; it's kept until umount/remount.
8914+ */
8915+ if (idx < cur_branches) {
8916+ /* if idx==cur_branches, we append: easy */
8917+ memmove(&new_data[idx+1], &new_data[idx],
8918+ (cur_branches - idx) * sizeof(struct unionfs_data));
8919+ memmove(&new_lower_paths[idx+1], &new_lower_paths[idx],
8920+ (cur_branches - idx) * sizeof(struct path));
8921+ }
63b09289
JR
8922+ new_lower_paths[idx].dentry = path.dentry;
8923+ new_lower_paths[idx].mnt = path.mnt;
2380c486 8924+
63b09289 8925+ new_data[idx].sb = path.dentry->d_sb;
2380c486
JR
8926+ atomic_set(&new_data[idx].open_files, 0);
8927+ new_data[idx].branchperms = perms;
8928+ new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */
8929+
8930+ err = 0;
8931+out:
8932+ return err;
8933+}
8934+
8935+
8936+/*
8937+ * Support branch management options on remount.
8938+ *
8939+ * See Documentation/filesystems/unionfs/ for details.
8940+ *
8941+ * @flags: numeric mount options
8942+ * @options: mount options string
8943+ *
8944+ * This function can rearrange a mounted union dynamically, adding and
8945+ * removing branches, including changing branch modes. Clearly this has to
8946+ * be done safely and atomically. Luckily, the VFS already calls this
8947+ * function with lock_super(sb) and lock_kernel() held, preventing
8948+ * concurrent mixing of new mounts, remounts, and unmounts. Moreover,
8949+ * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb)
8950+ * to purge dentries/inodes from our superblock, and also called
8951+ * fsync_super(sb) to purge any dirty pages. So we're good.
8952+ *
8953+ * XXX: however, our remount code may also need to invalidate mapped pages
8954+ * so as to force them to be re-gotten from the (newly reconfigured) lower
8955+ * branches. This has to wait for proper mmap and cache coherency support
8956+ * in the VFS.
8957+ *
8958+ */
8959+static int unionfs_remount_fs(struct super_block *sb, int *flags,
8960+ char *options)
8961+{
8962+ int err = 0;
8963+ int i;
8964+ char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */
8965+ char *optname;
8966+ int cur_branches = 0; /* no. of current branches */
8967+ int new_branches = 0; /* no. of branches actually left in the end */
8968+ int add_branches; /* est. no. of branches to add */
8969+ int del_branches; /* est. no. of branches to del */
8970+ int max_branches; /* max possible no. of branches */
8971+ struct unionfs_data *new_data = NULL, *tmp_data = NULL;
8972+ struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
8973+ struct inode **new_lower_inodes = NULL;
8974+ int new_high_branch_id; /* new high branch ID */
8975+ int size; /* memory allocation size, temp var */
8976+ int old_ibstart, old_ibend;
8977+
8978+ unionfs_write_lock(sb);
8979+
8980+ /*
8981+ * The VFS will take care of "ro" and "rw" flags, and we can safely
8982+ * ignore MS_SILENT, but anything else left over is an error. So we
8983+ * need to check if any other flags may have been passed (none are
8984+ * allowed/supported as of now).
8985+ */
8986+ if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) {
8987+ printk(KERN_ERR
8988+ "unionfs: remount flags 0x%x unsupported\n", *flags);
8989+ err = -EINVAL;
8990+ goto out_error;
8991+ }
8992+
8993+ /*
8994+ * If 'options' is NULL, it's probably because the user just changed
8995+ * the union to a "ro" or "rw" and the VFS took care of it. So
8996+ * nothing to do and we're done.
8997+ */
8998+ if (!options || options[0] == '\0')
8999+ goto out_error;
9000+
9001+ /*
9002+ * Find out how many branches we will have in the end, counting
9003+ * "add" and "del" commands. Copy the "options" string because
9004+ * strsep modifies the string and we need it later.
9005+ */
9006+ tmp_to_free = kstrdup(options, GFP_KERNEL);
9007+ optionstmp = tmp_to_free;
9008+ if (unlikely(!optionstmp)) {
9009+ err = -ENOMEM;
9010+ goto out_free;
9011+ }
9012+ cur_branches = sbmax(sb); /* current no. branches */
9013+ new_branches = sbmax(sb);
9014+ del_branches = 0;
9015+ add_branches = 0;
9016+ new_high_branch_id = sbhbid(sb); /* save current high_branch_id */
9017+ while ((optname = strsep(&optionstmp, ",")) != NULL) {
9018+ char *optarg;
9019+
9020+ if (!optname || !*optname)
9021+ continue;
9022+
9023+ optarg = strchr(optname, '=');
9024+ if (optarg)
9025+ *optarg++ = '\0';
9026+
9027+ if (!strcmp("add", optname))
9028+ add_branches++;
9029+ else if (!strcmp("del", optname))
9030+ del_branches++;
9031+ }
9032+ kfree(tmp_to_free);
9033+ /* after all changes, will we have at least one branch left? */
9034+ if ((new_branches + add_branches - del_branches) < 1) {
9035+ printk(KERN_ERR
9036+ "unionfs: no branches left after remount\n");
9037+ err = -EINVAL;
9038+ goto out_free;
9039+ }
9040+
9041+ /*
9042+ * Since we haven't actually parsed all the add/del options, nor
9043+ * have we checked them for errors, we don't know for sure how many
9044+ * branches we will have after all changes have taken place. In
9045+ * fact, the total number of branches left could be less than what
9046+ * we have now. So we need to allocate space for a temporary
9047+ * placeholder that is at least as large as the maximum number of
9048+ * branches we *could* have, which is the current number plus all
9049+ * the additions. Once we're done with these temp placeholders, we
9050+ * may have to re-allocate the final size, copy over from the temp,
9051+ * and then free the temps (done near the end of this function).
9052+ */
9053+ max_branches = cur_branches + add_branches;
9054+ /* allocate space for new pointers to lower dentry */
9055+ tmp_data = kcalloc(max_branches,
9056+ sizeof(struct unionfs_data), GFP_KERNEL);
9057+ if (unlikely(!tmp_data)) {
9058+ err = -ENOMEM;
9059+ goto out_free;
9060+ }
9061+ /* allocate space for new pointers to lower paths */
9062+ tmp_lower_paths = kcalloc(max_branches,
9063+ sizeof(struct path), GFP_KERNEL);
9064+ if (unlikely(!tmp_lower_paths)) {
9065+ err = -ENOMEM;
9066+ goto out_free;
9067+ }
9068+ /* copy current info into new placeholders, incrementing refcnts */
9069+ memcpy(tmp_data, UNIONFS_SB(sb)->data,
9070+ cur_branches * sizeof(struct unionfs_data));
9071+ memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths,
9072+ cur_branches * sizeof(struct path));
9073+ for (i = 0; i < cur_branches; i++)
9074+ path_get(&tmp_lower_paths[i]); /* drop refs at end of fxn */
9075+
9076+ /*******************************************************************
63b09289 9077+ * For each branch command, do kern_path on the requested branch,
2380c486
JR
9078+ * and apply the change to a temp branch list. To handle errors, we
9079+ * already dup'ed the old arrays (above), and increased the refcnts
63b09289 9080+ * on various f/s objects. So now we can do all the kern_path'ss
2380c486
JR
9081+ * and branch-management commands on the new arrays. If it fail mid
9082+ * way, we free the tmp arrays and *put all objects. If we succeed,
9083+ * then we free old arrays and *put its objects, and then replace
9084+ * the arrays with the new tmp list (we may have to re-allocate the
9085+ * memory because the temp lists could have been larger than what we
9086+ * actually needed).
9087+ *******************************************************************/
9088+
9089+ while ((optname = strsep(&options, ",")) != NULL) {
9090+ char *optarg;
9091+
9092+ if (!optname || !*optname)
9093+ continue;
9094+ /*
9095+ * At this stage optname holds a comma-delimited option, but
9096+ * without the commas. Next, we need to break the string on
9097+ * the '=' symbol to separate CMD=ARG, where ARG itself can
9098+ * be KEY=VAL. For example, in mode=/foo=rw, CMD is "mode",
9099+ * KEY is "/foo", and VAL is "rw".
9100+ */
9101+ optarg = strchr(optname, '=');
9102+ if (optarg)
9103+ *optarg++ = '\0';
9104+ /* incgen remount option (instead of old ioctl) */
9105+ if (!strcmp("incgen", optname)) {
9106+ err = 0;
9107+ goto out_no_change;
9108+ }
9109+
9110+ /*
9111+ * All of our options take an argument now. (Insert ones
9112+ * that don't above this check.) So at this stage optname
9113+ * contains the CMD part and optarg contains the ARG part.
9114+ */
9115+ if (!optarg || !*optarg) {
9116+ printk(KERN_ERR "unionfs: all remount options require "
9117+ "an argument (%s)\n", optname);
9118+ err = -EINVAL;
9119+ goto out_release;
9120+ }
9121+
9122+ if (!strcmp("add", optname)) {
9123+ err = do_remount_add_option(optarg, new_branches,
9124+ tmp_data,
9125+ tmp_lower_paths,
9126+ &new_high_branch_id);
9127+ if (err)
9128+ goto out_release;
9129+ new_branches++;
9130+ if (new_branches > UNIONFS_MAX_BRANCHES) {
9131+ printk(KERN_ERR "unionfs: command exceeds "
9132+ "%d branches\n", UNIONFS_MAX_BRANCHES);
9133+ err = -E2BIG;
9134+ goto out_release;
9135+ }
9136+ continue;
9137+ }
9138+ if (!strcmp("del", optname)) {
9139+ err = do_remount_del_option(optarg, new_branches,
9140+ tmp_data,
9141+ tmp_lower_paths);
9142+ if (err)
9143+ goto out_release;
9144+ new_branches--;
9145+ continue;
9146+ }
9147+ if (!strcmp("mode", optname)) {
9148+ err = do_remount_mode_option(optarg, new_branches,
9149+ tmp_data,
9150+ tmp_lower_paths);
9151+ if (err)
9152+ goto out_release;
9153+ continue;
9154+ }
9155+
9156+ /*
9157+ * When you use "mount -o remount,ro", mount(8) will
9158+ * reportedly pass the original dirs= string from
9159+ * /proc/mounts. So for now, we have to ignore dirs= and
9160+ * not consider it an error, unless we want to allow users
9161+ * to pass dirs= in remount. Note that to allow the VFS to
9162+ * actually process the ro/rw remount options, we have to
9163+ * return 0 from this function.
9164+ */
9165+ if (!strcmp("dirs", optname)) {
9166+ printk(KERN_WARNING
9167+ "unionfs: remount ignoring option \"%s\"\n",
9168+ optname);
9169+ continue;
9170+ }
9171+
9172+ err = -EINVAL;
9173+ printk(KERN_ERR
9174+ "unionfs: unrecognized option \"%s\"\n", optname);
9175+ goto out_release;
9176+ }
9177+
9178+out_no_change:
9179+
9180+ /******************************************************************
9181+ * WE'RE ALMOST DONE: check if leftmost branch might be read-only,
9182+ * see if we need to allocate a small-sized new vector, copy the
9183+ * vectors to their correct place, release the refcnt of the older
9184+ * ones, and return. Also handle invalidating any pages that will
9185+ * have to be re-read.
9186+ *******************************************************************/
9187+
9188+ if (!(tmp_data[0].branchperms & MAY_WRITE)) {
9189+ printk(KERN_ERR "unionfs: leftmost branch cannot be read-only "
9190+ "(use \"remount,ro\" to create a read-only union)\n");
9191+ err = -EINVAL;
9192+ goto out_release;
9193+ }
9194+
9195+ /* (re)allocate space for new pointers to lower dentry */
9196+ size = new_branches * sizeof(struct unionfs_data);
9197+ new_data = krealloc(tmp_data, size, GFP_KERNEL);
9198+ if (unlikely(!new_data)) {
9199+ err = -ENOMEM;
9200+ goto out_release;
9201+ }
9202+
9203+ /* allocate space for new pointers to lower paths */
9204+ size = new_branches * sizeof(struct path);
9205+ new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
9206+ if (unlikely(!new_lower_paths)) {
9207+ err = -ENOMEM;
9208+ goto out_release;
9209+ }
9210+
9211+ /* allocate space for new pointers to lower inodes */
9212+ new_lower_inodes = kcalloc(new_branches,
9213+ sizeof(struct inode *), GFP_KERNEL);
9214+ if (unlikely(!new_lower_inodes)) {
9215+ err = -ENOMEM;
9216+ goto out_release;
9217+ }
9218+
9219+ /*
9220+ * OK, just before we actually put the new set of branches in place,
9221+ * we need to ensure that our own f/s has no dirty objects left.
9222+ * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and
9223+ * fsync_super(sb), taking care of dentries, inodes, and dirty
9224+ * pages. So all that's left is for us to invalidate any leftover
9225+ * (non-dirty) pages to ensure that they will be re-read from the
9226+ * new lower branches (and to support mmap).
9227+ */
9228+
9229+ /*
9230+ * Once we finish the remounting successfully, our superblock
9231+ * generation number will have increased. This will be detected by
9232+ * our dentry-revalidation code upon subsequent f/s operations
9233+ * through unionfs. The revalidation code will rebuild the union of
9234+ * lower inodes for a given unionfs inode and invalidate any pages
9235+ * of such "stale" inodes (by calling our purge_inode_data
9236+ * function). This revalidation will happen lazily and
9237+ * incrementally, as users perform operations on cached inodes. We
9238+ * would like to encourage this revalidation to happen sooner if
9239+ * possible, so we like to try to invalidate as many other pages in
9240+ * our superblock as we can. We used to call drop_pagecache_sb() or
9241+ * a variant thereof, but either method was racy (drop_caches alone
9242+ * is known to be racy). So now we let the revalidation happen on a
9243+ * per file basis in ->d_revalidate.
9244+ */
9245+
9246+ /* grab new lower super references; release old ones */
9247+ for (i = 0; i < new_branches; i++)
9248+ atomic_inc(&new_data[i].sb->s_active);
9249+ for (i = 0; i < sbmax(sb); i++)
9250+ atomic_dec(&UNIONFS_SB(sb)->data[i].sb->s_active);
9251+
9252+ /* copy new vectors into their correct place */
9253+ tmp_data = UNIONFS_SB(sb)->data;
9254+ UNIONFS_SB(sb)->data = new_data;
9255+ new_data = NULL; /* so don't free good pointers below */
9256+ tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths;
9257+ UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths;
9258+ new_lower_paths = NULL; /* so don't free good pointers below */
9259+
9260+ /* update our unionfs_sb_info and root dentry index of last branch */
9261+ i = sbmax(sb); /* save no. of branches to release at end */
9262+ sbend(sb) = new_branches - 1;
9263+ dbend(sb->s_root) = new_branches - 1;
9264+ old_ibstart = ibstart(sb->s_root->d_inode);
9265+ old_ibend = ibend(sb->s_root->d_inode);
9266+ ibend(sb->s_root->d_inode) = new_branches - 1;
9267+ UNIONFS_D(sb->s_root)->bcount = new_branches;
9268+ new_branches = i; /* no. of branches to release below */
9269+
9270+ /*
9271+ * Update lower inodes: 3 steps
9272+ * 1. grab ref on all new lower inodes
9273+ */
9274+ for (i = dbstart(sb->s_root); i <= dbend(sb->s_root); i++) {
9275+ struct dentry *lower_dentry =
9276+ unionfs_lower_dentry_idx(sb->s_root, i);
9277+ igrab(lower_dentry->d_inode);
9278+ new_lower_inodes[i] = lower_dentry->d_inode;
9279+ }
9280+ /* 2. release reference on all older lower inodes */
9281+ iput_lowers(sb->s_root->d_inode, old_ibstart, old_ibend, true);
9282+ /* 3. update root dentry's inode to new lower_inodes array */
9283+ UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes;
9284+ new_lower_inodes = NULL;
9285+
9286+ /* maxbytes may have changed */
9287+ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
9288+ /* update high branch ID */
9289+ sbhbid(sb) = new_high_branch_id;
9290+
9291+ /* update our sb->generation for revalidating objects */
9292+ i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
9293+ atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
9294+ atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
9295+ if (!(*flags & MS_SILENT))
9296+ pr_info("unionfs: %s: new generation number %d\n",
9297+ UNIONFS_SB(sb)->dev_name, i);
9298+ /* finally, update the root dentry's times */
9299+ unionfs_copy_attr_times(sb->s_root->d_inode);
9300+ err = 0; /* reset to success */
9301+
9302+ /*
9303+ * The code above falls through to the next label, and releases the
9304+ * refcnts of the older ones (stored in tmp_*): if we fell through
9305+ * here, it means success. However, if we jump directly to this
9306+ * label from any error above, then an error occurred after we
9307+ * grabbed various refcnts, and so we have to release the
9308+ * temporarily constructed structures.
9309+ */
9310+out_release:
9311+ /* no need to cleanup/release anything in tmp_data */
9312+ if (tmp_lower_paths)
9313+ for (i = 0; i < new_branches; i++)
9314+ path_put(&tmp_lower_paths[i]);
9315+out_free:
9316+ kfree(tmp_lower_paths);
9317+ kfree(tmp_data);
9318+ kfree(new_lower_paths);
9319+ kfree(new_data);
9320+ kfree(new_lower_inodes);
9321+out_error:
9322+ unionfs_check_dentry(sb->s_root);
9323+ unionfs_write_unlock(sb);
9324+ return err;
9325+}
9326+
9327+/*
9328+ * Called by iput() when the inode reference count reached zero
9329+ * and the inode is not hashed anywhere. Used to clear anything
9330+ * that needs to be, before the inode is completely destroyed and put
9331+ * on the inode free list.
9332+ *
9333+ * No need to lock sb info's rwsem.
9334+ */
0c5527e5 9335+static void unionfs_evict_inode(struct inode *inode)
2380c486
JR
9336+{
9337+ int bindex, bstart, bend;
9338+ struct inode *lower_inode;
9339+ struct list_head *pos, *n;
9340+ struct unionfs_dir_state *rdstate;
9341+
0c5527e5
AM
9342+ truncate_inode_pages(&inode->i_data, 0);
9343+ end_writeback(inode);
9344+
2380c486
JR
9345+ list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9346+ rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9347+ list_del(&rdstate->cache);
9348+ free_rdstate(rdstate);
9349+ }
9350+
9351+ /*
9352+ * Decrement a reference to a lower_inode, which was incremented
9353+ * by our read_inode when it was created initially.
9354+ */
9355+ bstart = ibstart(inode);
9356+ bend = ibend(inode);
9357+ if (bstart >= 0) {
9358+ for (bindex = bstart; bindex <= bend; bindex++) {
9359+ lower_inode = unionfs_lower_inode_idx(inode, bindex);
9360+ if (!lower_inode)
9361+ continue;
9362+ unionfs_set_lower_inode_idx(inode, bindex, NULL);
9363+ /* see Documentation/filesystems/unionfs/issues.txt */
9364+ lockdep_off();
9365+ iput(lower_inode);
9366+ lockdep_on();
9367+ }
9368+ }
9369+
9370+ kfree(UNIONFS_I(inode)->lower_inodes);
9371+ UNIONFS_I(inode)->lower_inodes = NULL;
9372+}
9373+
9374+static struct inode *unionfs_alloc_inode(struct super_block *sb)
9375+{
9376+ struct unionfs_inode_info *i;
9377+
9378+ i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL);
9379+ if (unlikely(!i))
9380+ return NULL;
9381+
9382+ /* memset everything up to the inode to 0 */
9383+ memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode));
9384+
9385+ i->vfs_inode.i_version = 1;
9386+ return &i->vfs_inode;
9387+}
9388+
9389+static void unionfs_destroy_inode(struct inode *inode)
9390+{
9391+ kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode));
9392+}
9393+
9394+/* unionfs inode cache constructor */
9395+static void init_once(void *obj)
9396+{
9397+ struct unionfs_inode_info *i = obj;
9398+
9399+ inode_init_once(&i->vfs_inode);
9400+}
9401+
9402+int unionfs_init_inode_cache(void)
9403+{
9404+ int err = 0;
9405+
9406+ unionfs_inode_cachep =
9407+ kmem_cache_create("unionfs_inode_cache",
9408+ sizeof(struct unionfs_inode_info), 0,
9409+ SLAB_RECLAIM_ACCOUNT, init_once);
9410+ if (unlikely(!unionfs_inode_cachep))
9411+ err = -ENOMEM;
9412+ return err;
9413+}
9414+
9415+/* unionfs inode cache destructor */
9416+void unionfs_destroy_inode_cache(void)
9417+{
9418+ if (unionfs_inode_cachep)
9419+ kmem_cache_destroy(unionfs_inode_cachep);
9420+}
9421+
9422+/*
9423+ * Called when we have a dirty inode, right here we only throw out
9424+ * parts of our readdir list that are too old.
9425+ *
9426+ * No need to grab sb info's rwsem.
9427+ */
0c5527e5
AM
9428+static int unionfs_write_inode(struct inode *inode,
9429+ struct writeback_control *wbc)
2380c486
JR
9430+{
9431+ struct list_head *pos, *n;
9432+ struct unionfs_dir_state *rdstate;
9433+
9434+ spin_lock(&UNIONFS_I(inode)->rdlock);
9435+ list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9436+ rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9437+ /* We keep this list in LRU order. */
9438+ if ((rdstate->access + RDCACHE_JIFFIES) > jiffies)
9439+ break;
9440+ UNIONFS_I(inode)->rdcount--;
9441+ list_del(&rdstate->cache);
9442+ free_rdstate(rdstate);
9443+ }
9444+ spin_unlock(&UNIONFS_I(inode)->rdlock);
9445+
9446+ return 0;
9447+}
9448+
9449+/*
9450+ * Used only in nfs, to kill any pending RPC tasks, so that subsequent
9451+ * code can actually succeed and won't leave tasks that need handling.
9452+ */
9453+static void unionfs_umount_begin(struct super_block *sb)
9454+{
9455+ struct super_block *lower_sb;
9456+ int bindex, bstart, bend;
9457+
9458+ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9459+
9460+ bstart = sbstart(sb);
9461+ bend = sbend(sb);
9462+ for (bindex = bstart; bindex <= bend; bindex++) {
9463+ lower_sb = unionfs_lower_super_idx(sb, bindex);
9464+
9465+ if (lower_sb && lower_sb->s_op &&
9466+ lower_sb->s_op->umount_begin)
9467+ lower_sb->s_op->umount_begin(lower_sb);
9468+ }
9469+
9470+ unionfs_read_unlock(sb);
9471+}
9472+
9473+static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt)
9474+{
9475+ struct super_block *sb = mnt->mnt_sb;
9476+ int ret = 0;
9477+ char *tmp_page;
9478+ char *path;
9479+ int bindex, bstart, bend;
9480+ int perms;
9481+
63b09289
JR
9482+ /* to prevent a silly lockdep warning with namespace_sem */
9483+ lockdep_off();
2380c486 9484+ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
2380c486
JR
9485+ unionfs_lock_dentry(sb->s_root, UNIONFS_DMUTEX_CHILD);
9486+
9487+ tmp_page = (char *) __get_free_page(GFP_KERNEL);
9488+ if (unlikely(!tmp_page)) {
9489+ ret = -ENOMEM;
9490+ goto out;
9491+ }
9492+
9493+ bstart = sbstart(sb);
9494+ bend = sbend(sb);
9495+
9496+ seq_printf(m, ",dirs=");
9497+ for (bindex = bstart; bindex <= bend; bindex++) {
9498+ struct path p;
9499+ p.dentry = unionfs_lower_dentry_idx(sb->s_root, bindex);
9500+ p.mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
9501+ path = d_path(&p, tmp_page, PAGE_SIZE);
9502+ if (IS_ERR(path)) {
9503+ ret = PTR_ERR(path);
9504+ goto out;
9505+ }
9506+
9507+ perms = branchperms(sb, bindex);
9508+
9509+ seq_printf(m, "%s=%s", path,
9510+ perms & MAY_WRITE ? "rw" : "ro");
9511+ if (bindex != bend)
9512+ seq_printf(m, ":");
9513+ }
9514+
9515+out:
9516+ free_page((unsigned long) tmp_page);
9517+
9518+ unionfs_unlock_dentry(sb->s_root);
2380c486 9519+ unionfs_read_unlock(sb);
63b09289 9520+ lockdep_on();
2380c486
JR
9521+
9522+ return ret;
9523+}
9524+
9525+struct super_operations unionfs_sops = {
2380c486
JR
9526+ .put_super = unionfs_put_super,
9527+ .statfs = unionfs_statfs,
9528+ .remount_fs = unionfs_remount_fs,
0c5527e5 9529+ .evict_inode = unionfs_evict_inode,
2380c486
JR
9530+ .umount_begin = unionfs_umount_begin,
9531+ .show_options = unionfs_show_options,
9532+ .write_inode = unionfs_write_inode,
9533+ .alloc_inode = unionfs_alloc_inode,
9534+ .destroy_inode = unionfs_destroy_inode,
9535+};
0c5527e5
AM
9536diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
9537new file mode 100644
6b53c3da 9538index 0000000..8e7fcfb
0c5527e5
AM
9539--- /dev/null
9540+++ b/fs/unionfs/union.h
6b53c3da 9541@@ -0,0 +1,681 @@
2380c486 9542+/*
63b09289 9543+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
9544+ * Copyright (c) 2003-2006 Charles P. Wright
9545+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
9546+ * Copyright (c) 2005 Arun M. Krishnakumar
9547+ * Copyright (c) 2004-2006 David P. Quigley
9548+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
9549+ * Copyright (c) 2003 Puja Gupta
9550+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
9551+ * Copyright (c) 2003-2011 Stony Brook University
9552+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
9553+ *
9554+ * This program is free software; you can redistribute it and/or modify
9555+ * it under the terms of the GNU General Public License version 2 as
9556+ * published by the Free Software Foundation.
9557+ */
9558+
9559+#ifndef _UNION_H_
9560+#define _UNION_H_
9561+
9562+#include <linux/dcache.h>
9563+#include <linux/file.h>
9564+#include <linux/list.h>
9565+#include <linux/fs.h>
9566+#include <linux/mm.h>
9567+#include <linux/module.h>
9568+#include <linux/mount.h>
9569+#include <linux/namei.h>
9570+#include <linux/page-flags.h>
9571+#include <linux/pagemap.h>
9572+#include <linux/poll.h>
9573+#include <linux/security.h>
9574+#include <linux/seq_file.h>
9575+#include <linux/slab.h>
9576+#include <linux/spinlock.h>
2380c486
JR
9577+#include <linux/statfs.h>
9578+#include <linux/string.h>
9579+#include <linux/vmalloc.h>
9580+#include <linux/writeback.h>
9581+#include <linux/buffer_head.h>
9582+#include <linux/xattr.h>
9583+#include <linux/fs_stack.h>
9584+#include <linux/magic.h>
9585+#include <linux/log2.h>
9586+#include <linux/poison.h>
9587+#include <linux/mman.h>
9588+#include <linux/backing-dev.h>
9589+#include <linux/splice.h>
63b09289 9590+#include <linux/sched.h>
2380c486
JR
9591+
9592+#include <asm/system.h>
9593+
9594+#include <linux/union_fs.h>
9595+
9596+/* the file system name */
9597+#define UNIONFS_NAME "unionfs"
9598+
9599+/* unionfs root inode number */
9600+#define UNIONFS_ROOT_INO 1
9601+
9602+/* number of times we try to get a unique temporary file name */
9603+#define GET_TMPNAM_MAX_RETRY 5
9604+
9605+/* maximum number of branches we support, to avoid memory blowup */
9606+#define UNIONFS_MAX_BRANCHES 128
9607+
9608+/* minimum time (seconds) required for time-based cache-coherency */
9609+#define UNIONFS_MIN_CC_TIME 3
9610+
9611+/* Operations vectors defined in specific files. */
9612+extern struct file_operations unionfs_main_fops;
9613+extern struct file_operations unionfs_dir_fops;
9614+extern struct inode_operations unionfs_main_iops;
9615+extern struct inode_operations unionfs_dir_iops;
9616+extern struct inode_operations unionfs_symlink_iops;
9617+extern struct super_operations unionfs_sops;
9618+extern struct dentry_operations unionfs_dops;
9619+extern struct address_space_operations unionfs_aops, unionfs_dummy_aops;
9620+extern struct vm_operations_struct unionfs_vm_ops;
9621+
9622+/* How long should an entry be allowed to persist */
9623+#define RDCACHE_JIFFIES (5*HZ)
9624+
9625+/* compatibility with Real-Time patches */
9626+#ifdef CONFIG_PREEMPT_RT
9627+# define unionfs_rw_semaphore compat_rw_semaphore
9628+#else /* not CONFIG_PREEMPT_RT */
9629+# define unionfs_rw_semaphore rw_semaphore
9630+#endif /* not CONFIG_PREEMPT_RT */
9631+
9632+/* file private data. */
9633+struct unionfs_file_info {
9634+ int bstart;
9635+ int bend;
9636+ atomic_t generation;
9637+
9638+ struct unionfs_dir_state *rdstate;
9639+ struct file **lower_files;
9640+ int *saved_branch_ids; /* IDs of branches when file was opened */
7670a7fc 9641+ const struct vm_operations_struct *lower_vm_ops;
2380c486
JR
9642+ bool wrote_to_file; /* for delayed copyup */
9643+};
9644+
9645+/* unionfs inode data in memory */
9646+struct unionfs_inode_info {
9647+ int bstart;
9648+ int bend;
9649+ atomic_t generation;
9650+ /* Stuff for readdir over NFS. */
9651+ spinlock_t rdlock;
9652+ struct list_head readdircache;
9653+ int rdcount;
9654+ int hashsize;
9655+ int cookie;
9656+
9657+ /* The lower inodes */
9658+ struct inode **lower_inodes;
9659+
9660+ struct inode vfs_inode;
9661+};
9662+
9663+/* unionfs dentry data in memory */
9664+struct unionfs_dentry_info {
9665+ /*
9666+ * The semaphore is used to lock the dentry as soon as we get into a
9667+ * unionfs function from the VFS. Our lock ordering is that children
9668+ * go before their parents.
9669+ */
9670+ struct mutex lock;
9671+ int bstart;
9672+ int bend;
9673+ int bopaque;
9674+ int bcount;
9675+ atomic_t generation;
9676+ struct path *lower_paths;
9677+};
9678+
9679+/* These are the pointers to our various objects. */
9680+struct unionfs_data {
9681+ struct super_block *sb; /* lower super_block */
9682+ atomic_t open_files; /* number of open files on branch */
9683+ int branchperms;
9684+ int branch_id; /* unique branch ID at re/mount time */
9685+};
9686+
9687+/* unionfs super-block data in memory */
9688+struct unionfs_sb_info {
9689+ int bend;
9690+
9691+ atomic_t generation;
9692+
9693+ /*
9694+ * This rwsem is used to make sure that a branch management
9695+ * operation...
9696+ * 1) will not begin before all currently in-flight operations
9697+ * complete.
9698+ * 2) any new operations do not execute until the currently
9699+ * running branch management operation completes.
9700+ *
9701+ * The write_lock_owner records the PID of the task which grabbed
9702+ * the rw_sem for writing. If the same task also tries to grab the
9703+ * read lock, we allow it. This prevents a self-deadlock when
9704+ * branch-management is used on a pivot_root'ed union, because we
9705+ * have to ->lookup paths which belong to the same union.
9706+ */
9707+ struct unionfs_rw_semaphore rwsem;
9708+ pid_t write_lock_owner; /* PID of rw_sem owner (write lock) */
9709+ int high_branch_id; /* last unique branch ID given */
9710+ char *dev_name; /* to identify different unions in pr_debug */
9711+ struct unionfs_data *data;
9712+};
9713+
9714+/*
9715+ * structure for making the linked list of entries by readdir on left branch
9716+ * to compare with entries on right branch
9717+ */
9718+struct filldir_node {
9719+ struct list_head file_list; /* list for directory entries */
9720+ char *name; /* name entry */
9721+ int hash; /* name hash */
9722+ int namelen; /* name len since name is not 0 terminated */
9723+
9724+ /*
9725+ * we can check for duplicate whiteouts and files in the same branch
9726+ * in order to return -EIO.
9727+ */
9728+ int bindex;
9729+
9730+ /* is this a whiteout entry? */
9731+ int whiteout;
9732+
9733+ /* Inline name, so we don't need to separately kmalloc small ones */
82260373 9734+ char iname[DNAME_INLINE_LEN];
2380c486
JR
9735+};
9736+
9737+/* Directory hash table. */
9738+struct unionfs_dir_state {
9739+ unsigned int cookie; /* the cookie, based off of rdversion */
9740+ unsigned int offset; /* The entry we have returned. */
9741+ int bindex;
9742+ loff_t dirpos; /* offset within the lower level directory */
9743+ int size; /* How big is the hash table? */
9744+ int hashentries; /* How many entries have been inserted? */
9745+ unsigned long access;
9746+
9747+ /* This cache list is used when the inode keeps us around. */
9748+ struct list_head cache;
9749+ struct list_head list[0];
9750+};
9751+
9752+/* externs needed for fanout.h or sioq.h */
9753+extern int unionfs_get_nlinks(const struct inode *inode);
9754+extern void unionfs_copy_attr_times(struct inode *upper);
9755+extern void unionfs_copy_attr_all(struct inode *dest, const struct inode *src);
9756+
9757+/* include miscellaneous macros */
9758+#include "fanout.h"
9759+#include "sioq.h"
9760+
9761+/* externs for cache creation/deletion routines */
9762+extern void unionfs_destroy_filldir_cache(void);
9763+extern int unionfs_init_filldir_cache(void);
9764+extern int unionfs_init_inode_cache(void);
9765+extern void unionfs_destroy_inode_cache(void);
9766+extern int unionfs_init_dentry_cache(void);
9767+extern void unionfs_destroy_dentry_cache(void);
9768+
9769+/* Initialize and free readdir-specific state. */
9770+extern int init_rdstate(struct file *file);
9771+extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode,
9772+ int bindex);
9773+extern struct unionfs_dir_state *find_rdstate(struct inode *inode,
9774+ loff_t fpos);
9775+extern void free_rdstate(struct unionfs_dir_state *state);
9776+extern int add_filldir_node(struct unionfs_dir_state *rdstate,
9777+ const char *name, int namelen, int bindex,
9778+ int whiteout);
9779+extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
9780+ const char *name, int namelen,
9781+ int is_whiteout);
9782+
9783+extern struct dentry **alloc_new_dentries(int objs);
9784+extern struct unionfs_data *alloc_new_data(int objs);
9785+
9786+/* We can only use 32-bits of offset for rdstate --- blech! */
9787+#define DIREOF (0xfffff)
9788+#define RDOFFBITS 20 /* This is the number of bits in DIREOF. */
9789+#define MAXRDCOOKIE (0xfff)
9790+/* Turn an rdstate into an offset. */
9791+static inline off_t rdstate2offset(struct unionfs_dir_state *buf)
9792+{
9793+ off_t tmp;
9794+
9795+ tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS)
9796+ | (buf->offset & DIREOF);
9797+ return tmp;
9798+}
9799+
9800+/* Macros for locking a super_block. */
9801+enum unionfs_super_lock_class {
9802+ UNIONFS_SMUTEX_NORMAL,
9803+ UNIONFS_SMUTEX_PARENT, /* when locking on behalf of file */
9804+ UNIONFS_SMUTEX_CHILD, /* when locking on behalf of dentry */
9805+};
9806+static inline void unionfs_read_lock(struct super_block *sb, int subclass)
9807+{
9808+ if (UNIONFS_SB(sb)->write_lock_owner &&
9809+ UNIONFS_SB(sb)->write_lock_owner == current->pid)
9810+ return;
9811+ down_read_nested(&UNIONFS_SB(sb)->rwsem, subclass);
9812+}
9813+static inline void unionfs_read_unlock(struct super_block *sb)
9814+{
9815+ if (UNIONFS_SB(sb)->write_lock_owner &&
9816+ UNIONFS_SB(sb)->write_lock_owner == current->pid)
9817+ return;
9818+ up_read(&UNIONFS_SB(sb)->rwsem);
9819+}
9820+static inline void unionfs_write_lock(struct super_block *sb)
9821+{
9822+ down_write(&UNIONFS_SB(sb)->rwsem);
9823+ UNIONFS_SB(sb)->write_lock_owner = current->pid;
9824+}
9825+static inline void unionfs_write_unlock(struct super_block *sb)
9826+{
9827+ up_write(&UNIONFS_SB(sb)->rwsem);
9828+ UNIONFS_SB(sb)->write_lock_owner = 0;
9829+}
9830+
9831+static inline void unionfs_double_lock_dentry(struct dentry *d1,
9832+ struct dentry *d2)
9833+{
9834+ BUG_ON(d1 == d2);
9835+ if (d1 < d2) {
9836+ unionfs_lock_dentry(d1, UNIONFS_DMUTEX_PARENT);
9837+ unionfs_lock_dentry(d2, UNIONFS_DMUTEX_CHILD);
9838+ } else {
9839+ unionfs_lock_dentry(d2, UNIONFS_DMUTEX_PARENT);
9840+ unionfs_lock_dentry(d1, UNIONFS_DMUTEX_CHILD);
9841+ }
9842+}
9843+
9844+static inline void unionfs_double_unlock_dentry(struct dentry *d1,
9845+ struct dentry *d2)
9846+{
9847+ BUG_ON(d1 == d2);
9848+ if (d1 < d2) { /* unlock in reverse order than double_lock_dentry */
9849+ unionfs_unlock_dentry(d1);
9850+ unionfs_unlock_dentry(d2);
9851+ } else {
9852+ unionfs_unlock_dentry(d2);
9853+ unionfs_unlock_dentry(d1);
9854+ }
9855+}
9856+
9857+static inline void unionfs_double_lock_parents(struct dentry *p1,
9858+ struct dentry *p2)
9859+{
9860+ if (p1 == p2) {
9861+ unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_PARENT);
9862+ return;
9863+ }
9864+ if (p1 < p2) {
9865+ unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_PARENT);
9866+ unionfs_lock_dentry(p2, UNIONFS_DMUTEX_REVAL_CHILD);
9867+ } else {
9868+ unionfs_lock_dentry(p2, UNIONFS_DMUTEX_REVAL_PARENT);
9869+ unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_CHILD);
9870+ }
9871+}
9872+
9873+static inline void unionfs_double_unlock_parents(struct dentry *p1,
9874+ struct dentry *p2)
9875+{
9876+ if (p1 == p2) {
9877+ unionfs_unlock_dentry(p1);
9878+ return;
9879+ }
9880+ if (p1 < p2) { /* unlock in reverse order of double_lock_parents */
9881+ unionfs_unlock_dentry(p1);
9882+ unionfs_unlock_dentry(p2);
9883+ } else {
9884+ unionfs_unlock_dentry(p2);
9885+ unionfs_unlock_dentry(p1);
9886+ }
9887+}
9888+
9889+extern int new_dentry_private_data(struct dentry *dentry, int subclass);
9890+extern int realloc_dentry_private_data(struct dentry *dentry);
9891+extern void free_dentry_private_data(struct dentry *dentry);
9892+extern void update_bstart(struct dentry *dentry);
9893+extern int init_lower_nd(struct nameidata *nd, unsigned int flags);
9894+extern void release_lower_nd(struct nameidata *nd, int err);
9895+
9896+/*
9897+ * EXTERNALS:
9898+ */
9899+
9900+/* replicates the directory structure up to given dentry in given branch */
9901+extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
9902+ const char *name, int bindex);
9903+
9904+/* partial lookup */
9905+extern int unionfs_partial_lookup(struct dentry *dentry,
9906+ struct dentry *parent);
9907+extern struct dentry *unionfs_lookup_full(struct dentry *dentry,
9908+ struct dentry *parent,
9909+ int lookupmode);
9910+
9911+/* copies a file from dbstart to newbindex branch */
9912+extern int copyup_file(struct inode *dir, struct file *file, int bstart,
9913+ int newbindex, loff_t size);
9914+extern int copyup_named_file(struct inode *dir, struct file *file,
9915+ char *name, int bstart, int new_bindex,
9916+ loff_t len);
9917+/* copies a dentry from dbstart to newbindex branch */
9918+extern int copyup_dentry(struct inode *dir, struct dentry *dentry,
9919+ int bstart, int new_bindex, const char *name,
9920+ int namelen, struct file **copyup_file, loff_t len);
9921+/* helper functions for post-copyup actions */
9922+extern void unionfs_postcopyup_setmnt(struct dentry *dentry);
9923+extern void unionfs_postcopyup_release(struct dentry *dentry);
9924+
9925+/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */
9926+extern int check_empty(struct dentry *dentry, struct dentry *parent,
9927+ struct unionfs_dir_state **namelist);
9928+/* whiteout and opaque directory helpers */
9929+extern char *alloc_whname(const char *name, int len);
9930+extern bool is_whiteout_name(char **namep, int *namelenp);
9931+extern bool is_validname(const char *name);
9932+extern struct dentry *lookup_whiteout(const char *name,
9933+ struct dentry *lower_parent);
9934+extern struct dentry *find_first_whiteout(struct dentry *dentry);
9935+extern int unlink_whiteout(struct dentry *wh_dentry);
9936+extern int check_unlink_whiteout(struct dentry *dentry,
9937+ struct dentry *lower_dentry, int bindex);
9938+extern int create_whiteout(struct dentry *dentry, int start);
9939+extern int delete_whiteouts(struct dentry *dentry, int bindex,
9940+ struct unionfs_dir_state *namelist);
9941+extern int is_opaque_dir(struct dentry *dentry, int bindex);
9942+extern int make_dir_opaque(struct dentry *dir, int bindex);
9943+extern void unionfs_set_max_namelen(long *namelen);
9944+
9945+extern void unionfs_reinterpose(struct dentry *this_dentry);
9946+extern struct super_block *unionfs_duplicate_super(struct super_block *sb);
9947+
9948+/* Locking functions. */
9949+extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl);
9950+extern int unionfs_getlk(struct file *file, struct file_lock *fl);
9951+
9952+/* Common file operations. */
9953+extern int unionfs_file_revalidate(struct file *file, struct dentry *parent,
9954+ bool willwrite);
9955+extern int unionfs_open(struct inode *inode, struct file *file);
9956+extern int unionfs_file_release(struct inode *inode, struct file *file);
9957+extern int unionfs_flush(struct file *file, fl_owner_t id);
9958+extern long unionfs_ioctl(struct file *file, unsigned int cmd,
9959+ unsigned long arg);
6b53c3da
AM
9960+extern int unionfs_fsync(struct file *file, loff_t start, loff_t end,
9961+ int datasync);
2380c486
JR
9962+extern int unionfs_fasync(int fd, struct file *file, int flag);
9963+
9964+/* Inode operations */
9965+extern struct inode *unionfs_iget(struct super_block *sb, unsigned long ino);
9966+extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9967+ struct inode *new_dir, struct dentry *new_dentry);
9968+extern int unionfs_unlink(struct inode *dir, struct dentry *dentry);
9969+extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
9970+
9971+extern bool __unionfs_d_revalidate(struct dentry *dentry,
9972+ struct dentry *parent, bool willwrite);
9973+extern bool is_negative_lower(const struct dentry *dentry);
9974+extern bool is_newer_lower(const struct dentry *dentry);
9975+extern void purge_sb_data(struct super_block *sb);
9976+
9977+/* The values for unionfs_interpose's flag. */
9978+#define INTERPOSE_DEFAULT 0
9979+#define INTERPOSE_LOOKUP 1
9980+#define INTERPOSE_REVAL 2
9981+#define INTERPOSE_REVAL_NEG 3
9982+#define INTERPOSE_PARTIAL 4
9983+
9984+extern struct dentry *unionfs_interpose(struct dentry *this_dentry,
9985+ struct super_block *sb, int flag);
9986+
9987+#ifdef CONFIG_UNION_FS_XATTR
9988+/* Extended attribute functions. */
9989+extern void *unionfs_xattr_alloc(size_t size, size_t limit);
9990+static inline void unionfs_xattr_kfree(const void *p)
9991+{
9992+ kfree(p);
9993+}
9994+extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name,
9995+ void *value, size_t size);
9996+extern int unionfs_removexattr(struct dentry *dentry, const char *name);
9997+extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list,
9998+ size_t size);
9999+extern int unionfs_setxattr(struct dentry *dentry, const char *name,
10000+ const void *value, size_t size, int flags);
10001+#endif /* CONFIG_UNION_FS_XATTR */
10002+
10003+/* The root directory is unhashed, but isn't deleted. */
10004+static inline int d_deleted(struct dentry *d)
10005+{
10006+ return d_unhashed(d) && (d != d->d_sb->s_root);
10007+}
10008+
10009+/* unionfs_permission, check if we should bypass error to facilitate copyup */
10010+#define IS_COPYUP_ERR(err) ((err) == -EROFS)
10011+
10012+/* unionfs_open, check if we need to copyup the file */
10013+#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND)
10014+#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS)
10015+
10016+static inline int branchperms(const struct super_block *sb, int index)
10017+{
10018+ BUG_ON(index < 0);
10019+ return UNIONFS_SB(sb)->data[index].branchperms;
10020+}
10021+
10022+static inline int set_branchperms(struct super_block *sb, int index, int perms)
10023+{
10024+ BUG_ON(index < 0);
10025+ UNIONFS_SB(sb)->data[index].branchperms = perms;
10026+ return perms;
10027+}
10028+
4ae1df7a
JR
10029+/* check if readonly lower inode, but possibly unlinked (no inode->i_sb) */
10030+static inline int __is_rdonly(const struct inode *inode)
10031+{
10032+ /* if unlinked, can't be readonly (?) */
10033+ if (!inode->i_sb)
10034+ return 0;
10035+ return IS_RDONLY(inode);
10036+
10037+}
2380c486
JR
10038+/* Is this file on a read-only branch? */
10039+static inline int is_robranch_super(const struct super_block *sb, int index)
10040+{
10041+ int ret;
10042+
10043+ ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0;
10044+ return ret;
10045+}
10046+
10047+/* Is this file on a read-only branch? */
10048+static inline int is_robranch_idx(const struct dentry *dentry, int index)
10049+{
10050+ struct super_block *lower_sb;
10051+
10052+ BUG_ON(index < 0);
10053+
10054+ if (!(branchperms(dentry->d_sb, index) & MAY_WRITE))
10055+ return -EROFS;
10056+
10057+ lower_sb = unionfs_lower_super_idx(dentry->d_sb, index);
10058+ BUG_ON(lower_sb == NULL);
10059+ /*
10060+ * test sb flags directly, not IS_RDONLY(lower_inode) because the
10061+ * lower_dentry could be a negative.
10062+ */
10063+ if (lower_sb->s_flags & MS_RDONLY)
10064+ return -EROFS;
10065+
10066+ return 0;
10067+}
10068+
10069+static inline int is_robranch(const struct dentry *dentry)
10070+{
10071+ int index;
10072+
10073+ index = UNIONFS_D(dentry)->bstart;
10074+ BUG_ON(index < 0);
10075+
10076+ return is_robranch_idx(dentry, index);
10077+}
10078+
10079+/*
10080+ * EXTERNALS:
10081+ */
63b09289 10082+extern int check_branch(const struct path *path);
2380c486
JR
10083+extern int parse_branch_mode(const char *name, int *perms);
10084+
10085+/* locking helpers */
10086+static inline struct dentry *lock_parent(struct dentry *dentry)
10087+{
10088+ struct dentry *dir = dget_parent(dentry);
10089+ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
10090+ return dir;
10091+}
10092+static inline struct dentry *lock_parent_wh(struct dentry *dentry)
10093+{
10094+ struct dentry *dir = dget_parent(dentry);
10095+
10096+ mutex_lock_nested(&dir->d_inode->i_mutex, UNIONFS_DMUTEX_WHITEOUT);
10097+ return dir;
10098+}
10099+
10100+static inline void unlock_dir(struct dentry *dir)
10101+{
10102+ mutex_unlock(&dir->d_inode->i_mutex);
10103+ dput(dir);
10104+}
10105+
4ae1df7a
JR
10106+/* lock base inode mutex before calling lookup_one_len */
10107+static inline struct dentry *lookup_lck_len(const char *name,
10108+ struct dentry *base, int len)
10109+{
10110+ struct dentry *d;
63b09289
JR
10111+ struct nameidata lower_nd;
10112+ int err;
10113+
10114+ err = init_lower_nd(&lower_nd, LOOKUP_OPEN);
10115+ if (unlikely(err < 0)) {
10116+ d = ERR_PTR(err);
10117+ goto out;
10118+ }
4ae1df7a 10119+ mutex_lock(&base->d_inode->i_mutex);
63b09289
JR
10120+ d = lookup_one_len_nd(name, base, len, &lower_nd);
10121+ release_lower_nd(&lower_nd, err);
4ae1df7a 10122+ mutex_unlock(&base->d_inode->i_mutex);
63b09289 10123+out:
4ae1df7a
JR
10124+ return d;
10125+}
10126+
2380c486
JR
10127+static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
10128+ int bindex)
10129+{
10130+ struct vfsmount *mnt;
10131+
10132+ BUG_ON(!dentry || bindex < 0);
10133+
10134+ mnt = mntget(unionfs_lower_mnt_idx(dentry, bindex));
10135+#ifdef CONFIG_UNION_FS_DEBUG
10136+ if (!mnt)
10137+ pr_debug("unionfs: mntget: mnt=%p bindex=%d\n",
10138+ mnt, bindex);
10139+#endif /* CONFIG_UNION_FS_DEBUG */
10140+
10141+ return mnt;
10142+}
10143+
10144+static inline void unionfs_mntput(struct dentry *dentry, int bindex)
10145+{
10146+ struct vfsmount *mnt;
10147+
10148+ if (!dentry && bindex < 0)
10149+ return;
10150+ BUG_ON(!dentry || bindex < 0);
10151+
10152+ mnt = unionfs_lower_mnt_idx(dentry, bindex);
10153+#ifdef CONFIG_UNION_FS_DEBUG
10154+ /*
10155+ * Directories can have NULL lower objects in between start/end, but
10156+ * NOT if at the start/end range. We cannot verify that this dentry
10157+ * is a type=DIR, because it may already be a negative dentry. But
10158+ * if dbstart is greater than dbend, we know that this couldn't have
10159+ * been a regular file: it had to have been a directory.
10160+ */
10161+ if (!mnt && !(bindex > dbstart(dentry) && bindex < dbend(dentry)))
10162+ pr_debug("unionfs: mntput: mnt=%p bindex=%d\n", mnt, bindex);
10163+#endif /* CONFIG_UNION_FS_DEBUG */
10164+ mntput(mnt);
10165+}
10166+
10167+#ifdef CONFIG_UNION_FS_DEBUG
10168+
10169+/* useful for tracking code reachability */
10170+#define UDBG pr_debug("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__)
10171+
10172+#define unionfs_check_inode(i) __unionfs_check_inode((i), \
10173+ __FILE__, __func__, __LINE__)
10174+#define unionfs_check_dentry(d) __unionfs_check_dentry((d), \
10175+ __FILE__, __func__, __LINE__)
10176+#define unionfs_check_file(f) __unionfs_check_file((f), \
10177+ __FILE__, __func__, __LINE__)
10178+#define unionfs_check_nd(n) __unionfs_check_nd((n), \
10179+ __FILE__, __func__, __LINE__)
10180+#define show_branch_counts(sb) __show_branch_counts((sb), \
10181+ __FILE__, __func__, __LINE__)
10182+#define show_inode_times(i) __show_inode_times((i), \
10183+ __FILE__, __func__, __LINE__)
10184+#define show_dinode_times(d) __show_dinode_times((d), \
10185+ __FILE__, __func__, __LINE__)
10186+#define show_inode_counts(i) __show_inode_counts((i), \
10187+ __FILE__, __func__, __LINE__)
10188+
10189+extern void __unionfs_check_inode(const struct inode *inode, const char *fname,
10190+ const char *fxn, int line);
10191+extern void __unionfs_check_dentry(const struct dentry *dentry,
10192+ const char *fname, const char *fxn,
10193+ int line);
10194+extern void __unionfs_check_file(const struct file *file,
10195+ const char *fname, const char *fxn, int line);
10196+extern void __unionfs_check_nd(const struct nameidata *nd,
10197+ const char *fname, const char *fxn, int line);
10198+extern void __show_branch_counts(const struct super_block *sb,
10199+ const char *file, const char *fxn, int line);
10200+extern void __show_inode_times(const struct inode *inode,
10201+ const char *file, const char *fxn, int line);
10202+extern void __show_dinode_times(const struct dentry *dentry,
10203+ const char *file, const char *fxn, int line);
10204+extern void __show_inode_counts(const struct inode *inode,
10205+ const char *file, const char *fxn, int line);
10206+
10207+#else /* not CONFIG_UNION_FS_DEBUG */
10208+
10209+/* we leave useful hooks for these check functions throughout the code */
10210+#define unionfs_check_inode(i) do { } while (0)
10211+#define unionfs_check_dentry(d) do { } while (0)
10212+#define unionfs_check_file(f) do { } while (0)
10213+#define unionfs_check_nd(n) do { } while (0)
10214+#define show_branch_counts(sb) do { } while (0)
10215+#define show_inode_times(i) do { } while (0)
10216+#define show_dinode_times(d) do { } while (0)
10217+#define show_inode_counts(i) do { } while (0)
6b53c3da 10218+#define UDBG do { } while (0)
2380c486
JR
10219+
10220+#endif /* not CONFIG_UNION_FS_DEBUG */
10221+
10222+#endif /* not _UNION_H_ */
0c5527e5
AM
10223diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c
10224new file mode 100644
63b09289 10225index 0000000..bf447bb
0c5527e5
AM
10226--- /dev/null
10227+++ b/fs/unionfs/unlink.c
7670a7fc 10228@@ -0,0 +1,278 @@
2380c486 10229+/*
63b09289 10230+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
10231+ * Copyright (c) 2003-2006 Charles P. Wright
10232+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10233+ * Copyright (c) 2005-2006 Junjiro Okajima
10234+ * Copyright (c) 2005 Arun M. Krishnakumar
10235+ * Copyright (c) 2004-2006 David P. Quigley
10236+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10237+ * Copyright (c) 2003 Puja Gupta
10238+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
10239+ * Copyright (c) 2003-2011 Stony Brook University
10240+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
10241+ *
10242+ * This program is free software; you can redistribute it and/or modify
10243+ * it under the terms of the GNU General Public License version 2 as
10244+ * published by the Free Software Foundation.
10245+ */
10246+
10247+#include "union.h"
10248+
10249+/*
10250+ * Helper function for Unionfs's unlink operation.
10251+ *
10252+ * The main goal of this function is to optimize the unlinking of non-dir
10253+ * objects in unionfs by deleting all possible lower inode objects from the
10254+ * underlying branches having same dentry name as the non-dir dentry on
10255+ * which this unlink operation is called. This way we delete as many lower
10256+ * inodes as possible, and save space. Whiteouts need to be created in
10257+ * branch0 only if unlinking fails on any of the lower branch other than
10258+ * branch0, or if a lower branch is marked read-only.
10259+ *
10260+ * Also, while unlinking a file, if we encounter any dir type entry in any
10261+ * intermediate branch, then we remove the directory by calling vfs_rmdir.
10262+ * The following special cases are also handled:
10263+
10264+ * (1) If an error occurs in branch0 during vfs_unlink, then we return
10265+ * appropriate error.
10266+ *
10267+ * (2) If we get an error during unlink in any of other lower branch other
10268+ * than branch0, then we create a whiteout in branch0.
10269+ *
10270+ * (3) If a whiteout already exists in any intermediate branch, we delete
10271+ * all possible inodes only up to that branch (this is an "opaqueness"
10272+ * as as per Documentation/filesystems/unionfs/concepts.txt).
10273+ *
10274+ */
10275+static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry,
10276+ struct dentry *parent)
10277+{
10278+ struct dentry *lower_dentry;
10279+ struct dentry *lower_dir_dentry;
10280+ int bindex;
10281+ int err = 0;
10282+
10283+ err = unionfs_partial_lookup(dentry, parent);
10284+ if (err)
10285+ goto out;
10286+
10287+ /* trying to unlink all possible valid instances */
10288+ for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
10289+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10290+ if (!lower_dentry || !lower_dentry->d_inode)
10291+ continue;
10292+
10293+ lower_dir_dentry = lock_parent(lower_dentry);
10294+
10295+ /* avoid destroying the lower inode if the object is in use */
10296+ dget(lower_dentry);
10297+ err = is_robranch_super(dentry->d_sb, bindex);
10298+ if (!err) {
10299+ /* see Documentation/filesystems/unionfs/issues.txt */
10300+ lockdep_off();
10301+ if (!S_ISDIR(lower_dentry->d_inode->i_mode))
10302+ err = vfs_unlink(lower_dir_dentry->d_inode,
10303+ lower_dentry);
10304+ else
10305+ err = vfs_rmdir(lower_dir_dentry->d_inode,
10306+ lower_dentry);
10307+ lockdep_on();
10308+ }
10309+
10310+ /* if lower object deletion succeeds, update inode's times */
10311+ if (!err)
10312+ unionfs_copy_attr_times(dentry->d_inode);
10313+ dput(lower_dentry);
10314+ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10315+ unlock_dir(lower_dir_dentry);
10316+
10317+ if (err)
10318+ break;
10319+ }
10320+
10321+ /*
10322+ * Create the whiteout in branch 0 (highest priority) only if (a)
10323+ * there was an error in any intermediate branch other than branch 0
10324+ * due to failure of vfs_unlink/vfs_rmdir or (b) a branch marked or
10325+ * mounted read-only.
10326+ */
10327+ if (err) {
10328+ if ((bindex == 0) ||
10329+ ((bindex == dbstart(dentry)) &&
10330+ (!IS_COPYUP_ERR(err))))
10331+ goto out;
10332+ else {
10333+ if (!IS_COPYUP_ERR(err))
10334+ pr_debug("unionfs: lower object deletion "
10335+ "failed in branch:%d\n", bindex);
10336+ err = create_whiteout(dentry, sbstart(dentry->d_sb));
10337+ }
10338+ }
10339+
10340+out:
10341+ if (!err)
10342+ inode_dec_link_count(dentry->d_inode);
10343+
10344+ /* We don't want to leave negative leftover dentries for revalidate. */
10345+ if (!err && (dbopaque(dentry) != -1))
10346+ update_bstart(dentry);
10347+
10348+ return err;
10349+}
10350+
10351+int unionfs_unlink(struct inode *dir, struct dentry *dentry)
10352+{
10353+ int err = 0;
10354+ struct inode *inode = dentry->d_inode;
10355+ struct dentry *parent;
10356+ int valid;
10357+
10358+ BUG_ON(S_ISDIR(inode->i_mode));
10359+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10360+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
10361+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10362+
10363+ valid = __unionfs_d_revalidate(dentry, parent, false);
10364+ if (unlikely(!valid)) {
10365+ err = -ESTALE;
10366+ goto out;
10367+ }
10368+ unionfs_check_dentry(dentry);
10369+
10370+ err = unionfs_unlink_whiteout(dir, dentry, parent);
10371+ /* call d_drop so the system "forgets" about us */
10372+ if (!err) {
10373+ unionfs_postcopyup_release(dentry);
10374+ unionfs_postcopyup_setmnt(parent);
10375+ if (inode->i_nlink == 0) /* drop lower inodes */
10376+ iput_lowers_all(inode, false);
10377+ d_drop(dentry);
10378+ /*
10379+ * if unlink/whiteout succeeded, parent dir mtime has
10380+ * changed
10381+ */
10382+ unionfs_copy_attr_times(dir);
10383+ }
10384+
10385+out:
10386+ if (!err) {
10387+ unionfs_check_dentry(dentry);
10388+ unionfs_check_inode(dir);
10389+ }
10390+ unionfs_unlock_dentry(dentry);
10391+ unionfs_unlock_parent(dentry, parent);
10392+ unionfs_read_unlock(dentry->d_sb);
10393+ return err;
10394+}
10395+
10396+static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry,
10397+ struct unionfs_dir_state *namelist)
10398+{
10399+ int err;
10400+ struct dentry *lower_dentry;
10401+ struct dentry *lower_dir_dentry = NULL;
10402+
10403+ /* Here we need to remove whiteout entries. */
10404+ err = delete_whiteouts(dentry, dbstart(dentry), namelist);
10405+ if (err)
10406+ goto out;
10407+
10408+ lower_dentry = unionfs_lower_dentry(dentry);
10409+
10410+ lower_dir_dentry = lock_parent(lower_dentry);
10411+
10412+ /* avoid destroying the lower inode if the file is in use */
10413+ dget(lower_dentry);
10414+ err = is_robranch(dentry);
7670a7fc 10415+ if (!err)
2380c486 10416+ err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
2380c486
JR
10417+ dput(lower_dentry);
10418+
10419+ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10420+ /* propagate number of hard-links */
f4ea99f3 10421+ set_nlink(dentry->d_inode, unionfs_get_nlinks(dentry->d_inode));
2380c486
JR
10422+
10423+out:
10424+ if (lower_dir_dentry)
10425+ unlock_dir(lower_dir_dentry);
10426+ return err;
10427+}
10428+
10429+int unionfs_rmdir(struct inode *dir, struct dentry *dentry)
10430+{
10431+ int err = 0;
10432+ struct unionfs_dir_state *namelist = NULL;
10433+ struct dentry *parent;
10434+ int dstart, dend;
10435+ bool valid;
10436+
10437+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10438+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
10439+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10440+
10441+ valid = __unionfs_d_revalidate(dentry, parent, false);
10442+ if (unlikely(!valid)) {
10443+ err = -ESTALE;
10444+ goto out;
10445+ }
10446+ unionfs_check_dentry(dentry);
10447+
10448+ /* check if this unionfs directory is empty or not */
10449+ err = check_empty(dentry, parent, &namelist);
10450+ if (err)
10451+ goto out;
10452+
10453+ err = unionfs_rmdir_first(dir, dentry, namelist);
10454+ dstart = dbstart(dentry);
10455+ dend = dbend(dentry);
10456+ /*
10457+ * We create a whiteout for the directory if there was an error to
10458+ * rmdir the first directory entry in the union. Otherwise, we
10459+ * create a whiteout only if there is no chance that a lower
10460+ * priority branch might also have the same named directory. IOW,
10461+ * if there is not another same-named directory at a lower priority
10462+ * branch, then we don't need to create a whiteout for it.
10463+ */
10464+ if (!err) {
10465+ if (dstart < dend)
10466+ err = create_whiteout(dentry, dstart);
10467+ } else {
10468+ int new_err;
10469+
10470+ if (dstart == 0)
10471+ goto out;
10472+
10473+ /* exit if the error returned was NOT -EROFS */
10474+ if (!IS_COPYUP_ERR(err))
10475+ goto out;
10476+
10477+ new_err = create_whiteout(dentry, dstart - 1);
10478+ if (new_err != -EEXIST)
10479+ err = new_err;
10480+ }
10481+
10482+out:
10483+ /*
10484+ * Drop references to lower dentry/inode so storage space for them
10485+ * can be reclaimed. Then, call d_drop so the system "forgets"
10486+ * about us.
10487+ */
10488+ if (!err) {
10489+ iput_lowers_all(dentry->d_inode, false);
10490+ dput(unionfs_lower_dentry_idx(dentry, dstart));
10491+ unionfs_set_lower_dentry_idx(dentry, dstart, NULL);
10492+ d_drop(dentry);
10493+ /* update our lower vfsmnts, in case a copyup took place */
10494+ unionfs_postcopyup_setmnt(dentry);
10495+ unionfs_check_dentry(dentry);
10496+ unionfs_check_inode(dir);
10497+ }
10498+
10499+ if (namelist)
10500+ free_rdstate(namelist);
10501+
10502+ unionfs_unlock_dentry(dentry);
10503+ unionfs_unlock_parent(dentry, parent);
10504+ unionfs_read_unlock(dentry->d_sb);
10505+ return err;
10506+}
0c5527e5
AM
10507diff --git a/fs/unionfs/whiteout.c b/fs/unionfs/whiteout.c
10508new file mode 100644
63b09289 10509index 0000000..582cef2
0c5527e5
AM
10510--- /dev/null
10511+++ b/fs/unionfs/whiteout.c
63b09289 10512@@ -0,0 +1,601 @@
2380c486 10513+/*
63b09289 10514+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
10515+ * Copyright (c) 2003-2006 Charles P. Wright
10516+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10517+ * Copyright (c) 2005-2006 Junjiro Okajima
10518+ * Copyright (c) 2005 Arun M. Krishnakumar
10519+ * Copyright (c) 2004-2006 David P. Quigley
10520+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10521+ * Copyright (c) 2003 Puja Gupta
10522+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
10523+ * Copyright (c) 2003-2011 Stony Brook University
10524+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
10525+ *
10526+ * This program is free software; you can redistribute it and/or modify
10527+ * it under the terms of the GNU General Public License version 2 as
10528+ * published by the Free Software Foundation.
10529+ */
10530+
10531+#include "union.h"
10532+
10533+/*
10534+ * whiteout and opaque directory helpers
10535+ */
10536+
10537+/* What do we use for whiteouts. */
10538+#define UNIONFS_WHPFX ".wh."
10539+#define UNIONFS_WHLEN 4
10540+/*
10541+ * If a directory contains this file, then it is opaque. We start with the
10542+ * .wh. flag so that it is blocked by lookup.
10543+ */
10544+#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque"
10545+#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME
10546+
10547+/* construct whiteout filename */
10548+char *alloc_whname(const char *name, int len)
10549+{
10550+ char *buf;
10551+
10552+ buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL);
10553+ if (unlikely(!buf))
10554+ return ERR_PTR(-ENOMEM);
10555+
10556+ strcpy(buf, UNIONFS_WHPFX);
10557+ strlcat(buf, name, len + UNIONFS_WHLEN + 1);
10558+
10559+ return buf;
10560+}
10561+
10562+/*
10563+ * XXX: this can be inline or CPP macro, but is here to keep all whiteout
10564+ * code in one place.
10565+ */
10566+void unionfs_set_max_namelen(long *namelen)
10567+{
10568+ *namelen -= UNIONFS_WHLEN;
10569+}
10570+
10571+/* check if @namep is a whiteout, update @namep and @namelenp accordingly */
10572+bool is_whiteout_name(char **namep, int *namelenp)
10573+{
10574+ if (*namelenp > UNIONFS_WHLEN &&
10575+ !strncmp(*namep, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
10576+ *namep += UNIONFS_WHLEN;
10577+ *namelenp -= UNIONFS_WHLEN;
10578+ return true;
10579+ }
10580+ return false;
10581+}
10582+
10583+/* is the filename valid == !(whiteout for a file or opaque dir marker) */
10584+bool is_validname(const char *name)
10585+{
10586+ if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN))
10587+ return false;
10588+ if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME,
10589+ sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1))
10590+ return false;
10591+ return true;
10592+}
10593+
10594+/*
10595+ * Look for a whiteout @name in @lower_parent directory. If error, return
10596+ * ERR_PTR. Caller must dput() the returned dentry if not an error.
10597+ *
10598+ * XXX: some callers can reuse the whname allocated buffer to avoid repeated
10599+ * free then re-malloc calls. Need to provide a different API for those
10600+ * callers.
10601+ */
10602+struct dentry *lookup_whiteout(const char *name, struct dentry *lower_parent)
10603+{
10604+ char *whname = NULL;
10605+ int err = 0, namelen;
10606+ struct dentry *wh_dentry = NULL;
10607+
10608+ namelen = strlen(name);
10609+ whname = alloc_whname(name, namelen);
10610+ if (unlikely(IS_ERR(whname))) {
10611+ err = PTR_ERR(whname);
10612+ goto out;
10613+ }
10614+
10615+ /* check if whiteout exists in this branch: lookup .wh.foo */
4ae1df7a 10616+ wh_dentry = lookup_lck_len(whname, lower_parent, strlen(whname));
2380c486
JR
10617+ if (IS_ERR(wh_dentry)) {
10618+ err = PTR_ERR(wh_dentry);
10619+ goto out;
10620+ }
10621+
10622+ /* check if negative dentry (ENOENT) */
10623+ if (!wh_dentry->d_inode)
10624+ goto out;
10625+
10626+ /* whiteout found: check if valid type */
10627+ if (!S_ISREG(wh_dentry->d_inode->i_mode)) {
10628+ printk(KERN_ERR "unionfs: invalid whiteout %s entry type %d\n",
10629+ whname, wh_dentry->d_inode->i_mode);
10630+ dput(wh_dentry);
10631+ err = -EIO;
10632+ goto out;
10633+ }
10634+
10635+out:
10636+ kfree(whname);
10637+ if (err)
10638+ wh_dentry = ERR_PTR(err);
10639+ return wh_dentry;
10640+}
10641+
10642+/* find and return first whiteout in parent directory, else ENOENT */
10643+struct dentry *find_first_whiteout(struct dentry *dentry)
10644+{
10645+ int bindex, bstart, bend;
10646+ struct dentry *parent, *lower_parent, *wh_dentry;
10647+
10648+ parent = dget_parent(dentry);
10649+
10650+ bstart = dbstart(parent);
10651+ bend = dbend(parent);
10652+ wh_dentry = ERR_PTR(-ENOENT);
10653+
10654+ for (bindex = bstart; bindex <= bend; bindex++) {
10655+ lower_parent = unionfs_lower_dentry_idx(parent, bindex);
10656+ if (!lower_parent)
10657+ continue;
10658+ wh_dentry = lookup_whiteout(dentry->d_name.name, lower_parent);
10659+ if (IS_ERR(wh_dentry))
10660+ continue;
10661+ if (wh_dentry->d_inode)
10662+ break;
10663+ dput(wh_dentry);
10664+ wh_dentry = ERR_PTR(-ENOENT);
10665+ }
10666+
10667+ dput(parent);
10668+
10669+ return wh_dentry;
10670+}
10671+
10672+/*
10673+ * Unlink a whiteout dentry. Returns 0 or -errno. Caller must hold and
10674+ * release dentry reference.
10675+ */
10676+int unlink_whiteout(struct dentry *wh_dentry)
10677+{
10678+ int err;
10679+ struct dentry *lower_dir_dentry;
10680+
10681+ /* dget and lock parent dentry */
10682+ lower_dir_dentry = lock_parent_wh(wh_dentry);
10683+
10684+ /* see Documentation/filesystems/unionfs/issues.txt */
10685+ lockdep_off();
10686+ err = vfs_unlink(lower_dir_dentry->d_inode, wh_dentry);
10687+ lockdep_on();
10688+ unlock_dir(lower_dir_dentry);
10689+
10690+ /*
10691+ * Whiteouts are special files and should be deleted no matter what
10692+ * (as if they never existed), in order to allow this create
10693+ * operation to succeed. This is especially important in sticky
10694+ * directories: a whiteout may have been created by one user, but
10695+ * the newly created file may be created by another user.
10696+ * Therefore, in order to maintain Unix semantics, if the vfs_unlink
10697+ * above failed, then we have to try to directly unlink the
10698+ * whiteout. Note: in the ODF version of unionfs, whiteout are
10699+ * handled much more cleanly.
10700+ */
10701+ if (err == -EPERM) {
10702+ struct inode *inode = lower_dir_dentry->d_inode;
10703+ err = inode->i_op->unlink(inode, wh_dentry);
10704+ }
10705+ if (err)
10706+ printk(KERN_ERR "unionfs: could not unlink whiteout %s, "
10707+ "err = %d\n", wh_dentry->d_name.name, err);
10708+
10709+ return err;
10710+
10711+}
10712+
10713+/*
10714+ * Helper function when creating new objects (create, symlink, mknod, etc.).
10715+ * Checks to see if there's a whiteout in @lower_dentry's parent directory,
10716+ * whose name is taken from @dentry. Then tries to remove that whiteout, if
10717+ * found. If <dentry,bindex> is a branch marked readonly, return -EROFS.
63b09289
JR
10718+ * If it finds both a regular file and a whiteout, delete whiteout (this
10719+ * should never happen).
2380c486
JR
10720+ *
10721+ * Return 0 if no whiteout was found. Return 1 if one was found and
10722+ * successfully removed. Therefore a value >= 0 tells the caller that
10723+ * @lower_dentry belongs to a good branch to create the new object in).
10724+ * Return -ERRNO if an error occurred during whiteout lookup or in trying to
10725+ * unlink the whiteout.
10726+ */
10727+int check_unlink_whiteout(struct dentry *dentry, struct dentry *lower_dentry,
10728+ int bindex)
10729+{
10730+ int err;
10731+ struct dentry *wh_dentry = NULL;
10732+ struct dentry *lower_dir_dentry = NULL;
10733+
10734+ /* look for whiteout dentry first */
10735+ lower_dir_dentry = dget_parent(lower_dentry);
10736+ wh_dentry = lookup_whiteout(dentry->d_name.name, lower_dir_dentry);
10737+ dput(lower_dir_dentry);
10738+ if (IS_ERR(wh_dentry)) {
10739+ err = PTR_ERR(wh_dentry);
10740+ goto out;
10741+ }
10742+
10743+ if (!wh_dentry->d_inode) { /* no whiteout exists*/
10744+ err = 0;
10745+ goto out_dput;
10746+ }
10747+
10748+ /* check if regular file and whiteout were both found */
63b09289
JR
10749+ if (unlikely(lower_dentry->d_inode))
10750+ printk(KERN_WARNING "unionfs: removing whiteout; regular "
10751+ "file exists in directory %s (branch %d)\n",
2380c486 10752+ lower_dir_dentry->d_name.name, bindex);
2380c486
JR
10753+
10754+ /* check if branch is writeable */
10755+ err = is_robranch_super(dentry->d_sb, bindex);
10756+ if (err)
10757+ goto out_dput;
10758+
10759+ /* .wh.foo has been found, so let's unlink it */
10760+ err = unlink_whiteout(wh_dentry);
10761+ if (!err)
10762+ err = 1; /* a whiteout was found and successfully removed */
10763+out_dput:
10764+ dput(wh_dentry);
10765+out:
10766+ return err;
10767+}
10768+
10769+/*
10770+ * Pass an unionfs dentry and an index. It will try to create a whiteout
10771+ * for the filename in dentry, and will try in branch 'index'. On error,
10772+ * it will proceed to a branch to the left.
10773+ */
10774+int create_whiteout(struct dentry *dentry, int start)
10775+{
10776+ int bstart, bend, bindex;
10777+ struct dentry *lower_dir_dentry;
10778+ struct dentry *lower_dentry;
10779+ struct dentry *lower_wh_dentry;
10780+ struct nameidata nd;
10781+ char *name = NULL;
10782+ int err = -EINVAL;
10783+
10784+ verify_locked(dentry);
10785+
10786+ bstart = dbstart(dentry);
10787+ bend = dbend(dentry);
10788+
10789+ /* create dentry's whiteout equivalent */
10790+ name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
10791+ if (unlikely(IS_ERR(name))) {
10792+ err = PTR_ERR(name);
10793+ goto out;
10794+ }
10795+
10796+ for (bindex = start; bindex >= 0; bindex--) {
10797+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10798+
10799+ if (!lower_dentry) {
10800+ /*
10801+ * if lower dentry is not present, create the
10802+ * entire lower dentry directory structure and go
10803+ * ahead. Since we want to just create whiteout, we
10804+ * only want the parent dentry, and hence get rid of
10805+ * this dentry.
10806+ */
10807+ lower_dentry = create_parents(dentry->d_inode,
10808+ dentry,
10809+ dentry->d_name.name,
10810+ bindex);
10811+ if (!lower_dentry || IS_ERR(lower_dentry)) {
10812+ int ret = PTR_ERR(lower_dentry);
10813+ if (!IS_COPYUP_ERR(ret))
10814+ printk(KERN_ERR
10815+ "unionfs: create_parents for "
10816+ "whiteout failed: bindex=%d "
10817+ "err=%d\n", bindex, ret);
10818+ continue;
10819+ }
10820+ }
10821+
10822+ lower_wh_dentry =
4ae1df7a 10823+ lookup_lck_len(name, lower_dentry->d_parent,
2380c486
JR
10824+ dentry->d_name.len + UNIONFS_WHLEN);
10825+ if (IS_ERR(lower_wh_dentry))
10826+ continue;
10827+
10828+ /*
10829+ * The whiteout already exists. This used to be impossible,
10830+ * but now is possible because of opaqueness.
10831+ */
10832+ if (lower_wh_dentry->d_inode) {
10833+ dput(lower_wh_dentry);
10834+ err = 0;
10835+ goto out;
10836+ }
10837+
10838+ err = init_lower_nd(&nd, LOOKUP_CREATE);
10839+ if (unlikely(err < 0))
10840+ goto out;
10841+ lower_dir_dentry = lock_parent_wh(lower_wh_dentry);
10842+ err = is_robranch_super(dentry->d_sb, bindex);
10843+ if (!err)
10844+ err = vfs_create(lower_dir_dentry->d_inode,
10845+ lower_wh_dentry,
4ae1df7a 10846+ current_umask() & S_IRUGO,
2380c486
JR
10847+ &nd);
10848+ unlock_dir(lower_dir_dentry);
10849+ dput(lower_wh_dentry);
10850+ release_lower_nd(&nd, err);
10851+
10852+ if (!err || !IS_COPYUP_ERR(err))
10853+ break;
10854+ }
10855+
10856+ /* set dbopaque so that lookup will not proceed after this branch */
10857+ if (!err)
10858+ dbopaque(dentry) = bindex;
10859+
10860+out:
10861+ kfree(name);
10862+ return err;
10863+}
10864+
10865+/*
10866+ * Delete all of the whiteouts in a given directory for rmdir.
10867+ *
10868+ * lower directory inode should be locked
10869+ */
10870+static int do_delete_whiteouts(struct dentry *dentry, int bindex,
10871+ struct unionfs_dir_state *namelist)
10872+{
10873+ int err = 0;
10874+ struct dentry *lower_dir_dentry = NULL;
10875+ struct dentry *lower_dentry;
10876+ char *name = NULL, *p;
10877+ struct inode *lower_dir;
10878+ int i;
10879+ struct list_head *pos;
10880+ struct filldir_node *cursor;
10881+
10882+ /* Find out lower parent dentry */
10883+ lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10884+ BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
10885+ lower_dir = lower_dir_dentry->d_inode;
10886+ BUG_ON(!S_ISDIR(lower_dir->i_mode));
10887+
10888+ err = -ENOMEM;
10889+ name = __getname();
10890+ if (unlikely(!name))
10891+ goto out;
10892+ strcpy(name, UNIONFS_WHPFX);
10893+ p = name + UNIONFS_WHLEN;
10894+
10895+ err = 0;
10896+ for (i = 0; !err && i < namelist->size; i++) {
10897+ list_for_each(pos, &namelist->list[i]) {
10898+ cursor =
10899+ list_entry(pos, struct filldir_node,
10900+ file_list);
10901+ /* Only operate on whiteouts in this branch. */
10902+ if (cursor->bindex != bindex)
10903+ continue;
10904+ if (!cursor->whiteout)
10905+ continue;
10906+
10907+ strlcpy(p, cursor->name, PATH_MAX - UNIONFS_WHLEN);
10908+ lower_dentry =
4ae1df7a 10909+ lookup_lck_len(name, lower_dir_dentry,
2380c486
JR
10910+ cursor->namelen +
10911+ UNIONFS_WHLEN);
10912+ if (IS_ERR(lower_dentry)) {
10913+ err = PTR_ERR(lower_dentry);
10914+ break;
10915+ }
10916+ if (lower_dentry->d_inode)
10917+ err = vfs_unlink(lower_dir, lower_dentry);
10918+ dput(lower_dentry);
10919+ if (err)
10920+ break;
10921+ }
10922+ }
10923+
10924+ __putname(name);
10925+
10926+ /* After all of the removals, we should copy the attributes once. */
10927+ fsstack_copy_attr_times(dentry->d_inode, lower_dir_dentry->d_inode);
10928+
10929+out:
10930+ return err;
10931+}
10932+
10933+
10934+void __delete_whiteouts(struct work_struct *work)
10935+{
10936+ struct sioq_args *args = container_of(work, struct sioq_args, work);
10937+ struct deletewh_args *d = &args->deletewh;
10938+
10939+ args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist);
10940+ complete(&args->comp);
10941+}
10942+
10943+/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */
10944+int delete_whiteouts(struct dentry *dentry, int bindex,
10945+ struct unionfs_dir_state *namelist)
10946+{
10947+ int err;
10948+ struct super_block *sb;
10949+ struct dentry *lower_dir_dentry;
10950+ struct inode *lower_dir;
10951+ struct sioq_args args;
10952+
10953+ sb = dentry->d_sb;
10954+
10955+ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
10956+ BUG_ON(bindex < dbstart(dentry));
10957+ BUG_ON(bindex > dbend(dentry));
10958+ err = is_robranch_super(sb, bindex);
10959+ if (err)
10960+ goto out;
10961+
10962+ lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10963+ BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
10964+ lower_dir = lower_dir_dentry->d_inode;
10965+ BUG_ON(!S_ISDIR(lower_dir->i_mode));
10966+
10967+ if (!inode_permission(lower_dir, MAY_WRITE | MAY_EXEC)) {
10968+ err = do_delete_whiteouts(dentry, bindex, namelist);
10969+ } else {
10970+ args.deletewh.namelist = namelist;
10971+ args.deletewh.dentry = dentry;
10972+ args.deletewh.bindex = bindex;
10973+ run_sioq(__delete_whiteouts, &args);
10974+ err = args.err;
10975+ }
10976+
10977+out:
10978+ return err;
10979+}
10980+
10981+/****************************************************************************
10982+ * Opaque directory helpers *
10983+ ****************************************************************************/
10984+
10985+/*
10986+ * is_opaque_dir: returns 0 if it is NOT an opaque dir, 1 if it is, and
10987+ * -errno if an error occurred trying to figure this out.
10988+ */
10989+int is_opaque_dir(struct dentry *dentry, int bindex)
10990+{
10991+ int err = 0;
10992+ struct dentry *lower_dentry;
10993+ struct dentry *wh_lower_dentry;
10994+ struct inode *lower_inode;
10995+ struct sioq_args args;
63b09289 10996+ struct nameidata lower_nd;
2380c486
JR
10997+
10998+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10999+ lower_inode = lower_dentry->d_inode;
11000+
11001+ BUG_ON(!S_ISDIR(lower_inode->i_mode));
11002+
11003+ mutex_lock(&lower_inode->i_mutex);
11004+
11005+ if (!inode_permission(lower_inode, MAY_EXEC)) {
63b09289
JR
11006+ err = init_lower_nd(&lower_nd, LOOKUP_OPEN);
11007+ if (unlikely(err < 0)) {
11008+ mutex_unlock(&lower_inode->i_mutex);
11009+ goto out;
11010+ }
2380c486 11011+ wh_lower_dentry =
63b09289
JR
11012+ lookup_one_len_nd(UNIONFS_DIR_OPAQUE, lower_dentry,
11013+ sizeof(UNIONFS_DIR_OPAQUE) - 1,
11014+ &lower_nd);
11015+ release_lower_nd(&lower_nd, err);
2380c486
JR
11016+ } else {
11017+ args.is_opaque.dentry = lower_dentry;
11018+ run_sioq(__is_opaque_dir, &args);
11019+ wh_lower_dentry = args.ret;
11020+ }
11021+
11022+ mutex_unlock(&lower_inode->i_mutex);
11023+
11024+ if (IS_ERR(wh_lower_dentry)) {
11025+ err = PTR_ERR(wh_lower_dentry);
11026+ goto out;
11027+ }
11028+
11029+ /* This is an opaque dir iff wh_lower_dentry is positive */
11030+ err = !!wh_lower_dentry->d_inode;
11031+
11032+ dput(wh_lower_dentry);
11033+out:
11034+ return err;
11035+}
11036+
11037+void __is_opaque_dir(struct work_struct *work)
11038+{
11039+ struct sioq_args *args = container_of(work, struct sioq_args, work);
63b09289
JR
11040+ struct nameidata lower_nd;
11041+ int err;
2380c486 11042+
63b09289
JR
11043+ err = init_lower_nd(&lower_nd, LOOKUP_OPEN);
11044+ if (unlikely(err < 0))
11045+ return;
11046+ args->ret = lookup_one_len_nd(UNIONFS_DIR_OPAQUE,
11047+ args->is_opaque.dentry,
11048+ sizeof(UNIONFS_DIR_OPAQUE) - 1,
11049+ &lower_nd);
11050+ release_lower_nd(&lower_nd, err);
2380c486
JR
11051+ complete(&args->comp);
11052+}
11053+
11054+int make_dir_opaque(struct dentry *dentry, int bindex)
11055+{
11056+ int err = 0;
11057+ struct dentry *lower_dentry, *diropq;
11058+ struct inode *lower_dir;
11059+ struct nameidata nd;
11060+ const struct cred *old_creds;
11061+ struct cred *new_creds;
11062+
11063+ /*
11064+ * Opaque directory whiteout markers are special files (like regular
11065+ * whiteouts), and should appear to the users as if they don't
11066+ * exist. They should be created/deleted regardless of directory
11067+ * search/create permissions, but only for the duration of this
11068+ * creation of the .wh.__dir_opaque: file. Note, this does not
11069+ * circumvent normal ->permission).
11070+ */
11071+ new_creds = prepare_creds();
11072+ if (unlikely(!new_creds)) {
11073+ err = -ENOMEM;
11074+ goto out_err;
11075+ }
11076+ cap_raise(new_creds->cap_effective, CAP_DAC_READ_SEARCH);
11077+ cap_raise(new_creds->cap_effective, CAP_DAC_OVERRIDE);
11078+ old_creds = override_creds(new_creds);
11079+
11080+ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
11081+ lower_dir = lower_dentry->d_inode;
11082+ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) ||
11083+ !S_ISDIR(lower_dir->i_mode));
11084+
11085+ mutex_lock(&lower_dir->i_mutex);
63b09289
JR
11086+ err = init_lower_nd(&nd, LOOKUP_OPEN);
11087+ if (unlikely(err < 0))
11088+ goto out;
11089+ diropq = lookup_one_len_nd(UNIONFS_DIR_OPAQUE, lower_dentry,
11090+ sizeof(UNIONFS_DIR_OPAQUE) - 1, &nd);
11091+ release_lower_nd(&nd, err);
2380c486
JR
11092+ if (IS_ERR(diropq)) {
11093+ err = PTR_ERR(diropq);
11094+ goto out;
11095+ }
11096+
11097+ err = init_lower_nd(&nd, LOOKUP_CREATE);
11098+ if (unlikely(err < 0))
11099+ goto out;
11100+ if (!diropq->d_inode)
11101+ err = vfs_create(lower_dir, diropq, S_IRUGO, &nd);
11102+ if (!err)
11103+ dbopaque(dentry) = bindex;
11104+ release_lower_nd(&nd, err);
11105+
11106+ dput(diropq);
11107+
11108+out:
11109+ mutex_unlock(&lower_dir->i_mutex);
11110+ revert_creds(old_creds);
11111+out_err:
11112+ return err;
11113+}
0c5527e5
AM
11114diff --git a/fs/unionfs/xattr.c b/fs/unionfs/xattr.c
11115new file mode 100644
63b09289 11116index 0000000..a93d803
0c5527e5
AM
11117--- /dev/null
11118+++ b/fs/unionfs/xattr.c
2380c486
JR
11119@@ -0,0 +1,173 @@
11120+/*
63b09289 11121+ * Copyright (c) 2003-2011 Erez Zadok
2380c486
JR
11122+ * Copyright (c) 2003-2006 Charles P. Wright
11123+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
11124+ * Copyright (c) 2005-2006 Junjiro Okajima
11125+ * Copyright (c) 2005 Arun M. Krishnakumar
11126+ * Copyright (c) 2004-2006 David P. Quigley
11127+ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
11128+ * Copyright (c) 2003 Puja Gupta
11129+ * Copyright (c) 2003 Harikesavan Krishnan
63b09289
JR
11130+ * Copyright (c) 2003-2011 Stony Brook University
11131+ * Copyright (c) 2003-2011 The Research Foundation of SUNY
2380c486
JR
11132+ *
11133+ * This program is free software; you can redistribute it and/or modify
11134+ * it under the terms of the GNU General Public License version 2 as
11135+ * published by the Free Software Foundation.
11136+ */
11137+
11138+#include "union.h"
11139+
11140+/* This is lifted from fs/xattr.c */
11141+void *unionfs_xattr_alloc(size_t size, size_t limit)
11142+{
11143+ void *ptr;
11144+
11145+ if (size > limit)
11146+ return ERR_PTR(-E2BIG);
11147+
11148+ if (!size) /* size request, no buffer is needed */
11149+ return NULL;
11150+
11151+ ptr = kmalloc(size, GFP_KERNEL);
11152+ if (unlikely(!ptr))
11153+ return ERR_PTR(-ENOMEM);
11154+ return ptr;
11155+}
11156+
11157+/*
11158+ * BKL held by caller.
11159+ * dentry->d_inode->i_mutex locked
11160+ */
11161+ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value,
11162+ size_t size)
11163+{
11164+ struct dentry *lower_dentry = NULL;
11165+ struct dentry *parent;
11166+ int err = -EOPNOTSUPP;
11167+ bool valid;
11168+
11169+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11170+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11171+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11172+
11173+ valid = __unionfs_d_revalidate(dentry, parent, false);
11174+ if (unlikely(!valid)) {
11175+ err = -ESTALE;
11176+ goto out;
11177+ }
11178+
11179+ lower_dentry = unionfs_lower_dentry(dentry);
11180+
11181+ err = vfs_getxattr(lower_dentry, (char *) name, value, size);
11182+
11183+out:
11184+ unionfs_check_dentry(dentry);
11185+ unionfs_unlock_dentry(dentry);
11186+ unionfs_unlock_parent(dentry, parent);
11187+ unionfs_read_unlock(dentry->d_sb);
11188+ return err;
11189+}
11190+
11191+/*
11192+ * BKL held by caller.
11193+ * dentry->d_inode->i_mutex locked
11194+ */
11195+int unionfs_setxattr(struct dentry *dentry, const char *name,
11196+ const void *value, size_t size, int flags)
11197+{
11198+ struct dentry *lower_dentry = NULL;
11199+ struct dentry *parent;
11200+ int err = -EOPNOTSUPP;
11201+ bool valid;
11202+
11203+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11204+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11205+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11206+
11207+ valid = __unionfs_d_revalidate(dentry, parent, false);
11208+ if (unlikely(!valid)) {
11209+ err = -ESTALE;
11210+ goto out;
11211+ }
11212+
11213+ lower_dentry = unionfs_lower_dentry(dentry);
11214+
11215+ err = vfs_setxattr(lower_dentry, (char *) name, (void *) value,
11216+ size, flags);
11217+
11218+out:
11219+ unionfs_check_dentry(dentry);
11220+ unionfs_unlock_dentry(dentry);
11221+ unionfs_unlock_parent(dentry, parent);
11222+ unionfs_read_unlock(dentry->d_sb);
11223+ return err;
11224+}
11225+
11226+/*
11227+ * BKL held by caller.
11228+ * dentry->d_inode->i_mutex locked
11229+ */
11230+int unionfs_removexattr(struct dentry *dentry, const char *name)
11231+{
11232+ struct dentry *lower_dentry = NULL;
11233+ struct dentry *parent;
11234+ int err = -EOPNOTSUPP;
11235+ bool valid;
11236+
11237+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11238+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11239+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11240+
11241+ valid = __unionfs_d_revalidate(dentry, parent, false);
11242+ if (unlikely(!valid)) {
11243+ err = -ESTALE;
11244+ goto out;
11245+ }
11246+
11247+ lower_dentry = unionfs_lower_dentry(dentry);
11248+
11249+ err = vfs_removexattr(lower_dentry, (char *) name);
11250+
11251+out:
11252+ unionfs_check_dentry(dentry);
11253+ unionfs_unlock_dentry(dentry);
11254+ unionfs_unlock_parent(dentry, parent);
11255+ unionfs_read_unlock(dentry->d_sb);
11256+ return err;
11257+}
11258+
11259+/*
11260+ * BKL held by caller.
11261+ * dentry->d_inode->i_mutex locked
11262+ */
11263+ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size)
11264+{
11265+ struct dentry *lower_dentry = NULL;
11266+ struct dentry *parent;
11267+ int err = -EOPNOTSUPP;
11268+ char *encoded_list = NULL;
11269+ bool valid;
11270+
11271+ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11272+ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT);
11273+ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11274+
11275+ valid = __unionfs_d_revalidate(dentry, parent, false);
11276+ if (unlikely(!valid)) {
11277+ err = -ESTALE;
11278+ goto out;
11279+ }
11280+
11281+ lower_dentry = unionfs_lower_dentry(dentry);
11282+
11283+ encoded_list = list;
11284+ err = vfs_listxattr(lower_dentry, encoded_list, size);
11285+
11286+out:
11287+ unionfs_check_dentry(dentry);
11288+ unionfs_unlock_dentry(dentry);
11289+ unionfs_unlock_parent(dentry, parent);
11290+ unionfs_read_unlock(dentry->d_sb);
11291+ return err;
11292+}
0c5527e5
AM
11293diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
11294index da317c7..64f1ced 100644
11295--- a/include/linux/fs_stack.h
11296+++ b/include/linux/fs_stack.h
7670a7fc
AM
11297@@ -1,7 +1,19 @@
11298+/*
11299+ * Copyright (c) 2006-2009 Erez Zadok
11300+ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
11301+ * Copyright (c) 2006-2009 Stony Brook University
11302+ * Copyright (c) 2006-2009 The Research Foundation of SUNY
11303+ *
11304+ * This program is free software; you can redistribute it and/or modify
11305+ * it under the terms of the GNU General Public License version 2 as
11306+ * published by the Free Software Foundation.
11307+ */
11308+
11309 #ifndef _LINUX_FS_STACK_H
11310 #define _LINUX_FS_STACK_H
11311
11312-/* This file defines generic functions used primarily by stackable
11313+/*
11314+ * This file defines generic functions used primarily by stackable
11315 * filesystems; none of these functions require i_mutex to be held.
11316 */
11317
0c5527e5 11318diff --git a/include/linux/magic.h b/include/linux/magic.h
63b09289 11319index 1e5df2a..01ee54d 100644
0c5527e5
AM
11320--- a/include/linux/magic.h
11321+++ b/include/linux/magic.h
63b09289 11322@@ -50,6 +50,8 @@
2380c486
JR
11323 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
11324 #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs"
11325
11326+#define UNIONFS_SUPER_MAGIC 0xf15f083d
11327+
11328 #define SMB_SUPER_MAGIC 0x517B
11329 #define USBDEVICE_SUPER_MAGIC 0x9fa2
11330 #define CGROUP_SUPER_MAGIC 0x27e0eb
0c5527e5 11331diff --git a/include/linux/namei.h b/include/linux/namei.h
6b53c3da 11332index 76fe2c6..7230829 100644
0c5527e5
AM
11333--- a/include/linux/namei.h
11334+++ b/include/linux/namei.h
6b53c3da 11335@@ -82,8 +82,11 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
7670a7fc
AM
11336
11337 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
11338 int (*open)(struct inode *, struct file *));
11339+extern void release_open_intent(struct nameidata *);
11340
11341 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
63b09289
JR
11342+extern struct dentry *lookup_one_len_nd(const char *, struct dentry *, int,
11343+ struct nameidata *nd);
7670a7fc 11344
63b09289
JR
11345 extern int follow_down_one(struct path *);
11346 extern int follow_down(struct path *);
0c5527e5 11347diff --git a/include/linux/splice.h b/include/linux/splice.h
6b53c3da 11348index 26e5b61..28213e6 100644
0c5527e5
AM
11349--- a/include/linux/splice.h
11350+++ b/include/linux/splice.h
11351@@ -81,6 +81,11 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *,
2380c486
JR
11352 struct splice_pipe_desc *);
11353 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
11354 splice_direct_actor *);
11355+extern long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out,
11356+ loff_t *ppos, size_t len, unsigned int flags);
11357+extern long vfs_splice_to(struct file *in, loff_t *ppos,
11358+ struct pipe_inode_info *pipe, size_t len,
11359+ unsigned int flags);
11360
76514441
AM
11361 /*
11362 * for dynamic pipe sizing
0c5527e5
AM
11363diff --git a/include/linux/union_fs.h b/include/linux/union_fs.h
11364new file mode 100644
11365index 0000000..c84d97e
11366--- /dev/null
11367+++ b/include/linux/union_fs.h
2380c486
JR
11368@@ -0,0 +1,22 @@
11369+/*
11370+ * Copyright (c) 2003-2009 Erez Zadok
11371+ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
11372+ * Copyright (c) 2003-2009 Stony Brook University
11373+ * Copyright (c) 2003-2009 The Research Foundation of SUNY
11374+ *
11375+ * This program is free software; you can redistribute it and/or modify
11376+ * it under the terms of the GNU General Public License version 2 as
11377+ * published by the Free Software Foundation.
11378+ */
11379+
11380+#ifndef _LINUX_UNION_FS_H
11381+#define _LINUX_UNION_FS_H
11382+
11383+/*
11384+ * DEFINITIONS FOR USER AND KERNEL CODE:
11385+ */
11386+# define UNIONFS_IOCTL_INCGEN _IOR(0x15, 11, int)
11387+# define UNIONFS_IOCTL_QUERYFILE _IOR(0x15, 15, int)
11388+
11389+#endif /* _LINUX_UNIONFS_H */
11390+
0c5527e5 11391diff --git a/security/security.c b/security/security.c
6b53c3da 11392index 0e4fccf..4d1b37d 100644
0c5527e5
AM
11393--- a/security/security.c
11394+++ b/security/security.c
63b09289 11395@@ -520,6 +520,7 @@ int security_inode_permission(struct inode *inode, int mask)
2380c486 11396 return 0;
6b53c3da 11397 return security_ops->inode_permission(inode, mask);
2380c486
JR
11398 }
11399+EXPORT_SYMBOL(security_inode_permission);
11400
6b53c3da 11401 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
2380c486 11402 {
This page took 1.538415 seconds and 4 git commands to generate.