kernel-unionfs.patch

   1 diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
   2 index e68021c..9dcbd6d 100644
   3 --- a/Documentation/filesystems/00-INDEX
   4 +++ b/Documentation/filesystems/00-INDEX
   5 @@ -100,6 +100,8 @@ udf.txt
   6         - info and mount options for the UDF filesystem.
   7  ufs.txt
   8         - info on the ufs filesystem.
   9 +unionfs/
  10 +       - info on the unionfs filesystem
  11  vfat.txt
  12         - info on using the VFAT filesystem used in Windows NT and Windows 95
  13  vfs.txt
  14 diff --git a/Documentation/filesystems/unionfs/00-INDEX b/Documentation/filesystems/unionfs/00-INDEX
  15 new file mode 100644
  16 index 0000000..96fdf67
  17 --- /dev/null
  18 +++ b/Documentation/filesystems/unionfs/00-INDEX
  19 @@ -0,0 +1,10 @@
  20 +00-INDEX
  21 +       - this file.
  22 +concepts.txt
  23 +       - A brief introduction of concepts.
  24 +issues.txt
  25 +       - A summary of known issues with unionfs.
  26 +rename.txt
  27 +       - Information regarding rename operations.
  28 +usage.txt
  29 +       - Usage information and examples.
  30 diff --git a/Documentation/filesystems/unionfs/concepts.txt b/Documentation/filesystems/unionfs/concepts.txt
  31 new file mode 100644
  32 index 0000000..8d9a1c5
  33 --- /dev/null
  34 +++ b/Documentation/filesystems/unionfs/concepts.txt
  35 @@ -0,0 +1,226 @@
  36 +Unionfs 2.x CONCEPTS:
  37 +=====================
  38 +
  39 +This file describes the concepts needed by a namespace unification file
  40 +system.
  41 +
  42 +
  43 +Branch Priority:
  44 +================
  45 +
  46 +Each branch is assigned a unique priority - starting from 0 (highest
  47 +priority).  No two branches can have the same priority.
  48 +
  49 +
  50 +Branch Mode:
  51 +============
  52 +
  53 +Each branch is assigned a mode - read-write or read-only. This allows
  54 +directories on media mounted read-write to be used in a read-only manner.
  55 +
  56 +
  57 +Whiteouts:
  58 +==========
  59 +
  60 +A whiteout removes a file name from the namespace. Whiteouts are needed when
  61 +one attempts to remove a file on a read-only branch.
  62 +
  63 +Suppose we have a two-branch union, where branch 0 is read-write and branch
  64 +1 is read-only. And a file 'foo' on branch 1:
  65 +
  66 +./b0/
  67 +./b1/
  68 +./b1/foo
  69 +
  70 +The unified view would simply be:
  71 +
  72 +./union/
  73 +./union/foo
  74 +
  75 +Since 'foo' is stored on a read-only branch, it cannot be removed. A
  76 +whiteout is used to remove the name 'foo' from the unified namespace. Again,
  77 +since branch 1 is read-only, the whiteout cannot be created there. So, we
  78 +try on a higher priority (lower numerically) branch and create the whiteout
  79 +there.
  80 +
  81 +./b0/
  82 +./b0/.wh.foo
  83 +./b1/
  84 +./b1/foo
  85 +
  86 +Later, when Unionfs traverses branches (due to lookup or readdir), it
  87 +eliminate 'foo' from the namespace (as well as the whiteout itself.)
  88 +
  89 +
  90 +Duplicate Elimination:
  91 +======================
  92 +
  93 +It is possible for files on different branches to have the same name.
  94 +Unionfs then has to select which instance of the file to show to the user.
  95 +Given the fact that each branch has a priority associated with it, the
  96 +simplest solution is to take the instance from the highest priority
  97 +(numerically lowest value) and "hide" the others.
  98 +
  99 +
 100 +Copyup:
 101 +=======
 102 +
 103 +When a change is made to the contents of a file's data or meta-data, they
 104 +have to be stored somewhere.  The best way is to create a copy of the
 105 +original file on a branch that is writable, and then redirect the write
 106 +though to this copy.  The copy must be made on a higher priority branch so
 107 +that lookup and readdir return this newer "version" of the file rather than
 108 +the original (see duplicate elimination).
 109 +
 110 +An entire unionfs mount can be read-only or read-write.  If it's read-only,
 111 +then none of the branches will be written to, even if some of the branches
 112 +are physically writeable.  If the unionfs mount is read-write, then the
 113 +leftmost (highest priority) branch must be writeable (for copyup to take
 114 +place); the remaining branches can be any mix of read-write and read-only.
 115 +
 116 +In a writeable mount, unionfs will create new files/dir in the leftmost
 117 +branch.  If one tries to modify a file in a read-only branch/media, unionfs
 118 +will copyup the file to the leftmost branch and modify it there.  If you try
 119 +to modify a file from a writeable branch which is not the leftmost branch,
 120 +then unionfs will modify it in that branch; this is useful if you, say,
 121 +unify differnet packages (e.g., apache, sendmail, ftpd, etc.) and you want
 122 +changes to specific package files to remain logically in the directory where
 123 +they came from.
 124 +
 125 +Cache Coherency:
 126 +================
 127 +
 128 +Unionfs users often want to be able to modify files and directories directly
 129 +on the lower branches, and have those changes be visible at the Unionfs
 130 +level.  This means that data (e.g., pages) and meta-data (dentries, inodes,
 131 +open files, etc.) have to be synchronized between the upper and lower
 132 +layers.  In other words, the newest changes from a layer below have to be
 133 +propagated to the Unionfs layer above.  If the two layers are not in sync, a
 134 +cache incoherency ensues, which could lead to application failures and even
 135 +oopses.  The Linux kernel, however, has a rather limited set of mechanisms
 136 +to ensure this inter-layer cache coherency---so Unionfs has to do most of
 137 +the hard work on its own.
 138 +
 139 +Maintaining Invariants:
 140 +
 141 +The way Unionfs ensures cache coherency is as follows.  At each entry point
 142 +to a Unionfs file system method, we call a utility function to validate the
 143 +primary objects of this method.  Generally, we call unionfs_file_revalidate
 144 +on open files, and __unionfs_d_revalidate_chain on dentries (which also
 145 +validates inodes).  These utility functions check to see whether the upper
 146 +Unionfs object is in sync with any of the lower objects that it represents.
 147 +The checks we perform include whether the Unionfs superblock has a newer
 148 +generation number, or if any of the lower objects mtime's or ctime's are
 149 +newer.  (Note: generation numbers change when branch-management commands are
 150 +issued, so in a way, maintaining cache coherency is also very important for
 151 +branch-management.)  If indeed we determine that any Unionfs object is no
 152 +longer in sync with its lower counterparts, then we rebuild that object
 153 +similarly to how we do so for branch-management.
 154 +
 155 +While rebuilding Unionfs's objects, we also purge any page mappings and
 156 +truncate inode pages (see fs/unionfs/dentry.c:purge_inode_data).  This is to
 157 +ensure that Unionfs will re-get the newer data from the lower branches.  We
 158 +perform this purging only if the Unionfs operation in question is a reading
 159 +operation; if Unionfs is performing a data writing operation (e.g., ->write,
 160 +->commit_write, etc.) then we do NOT flush the lower mappings/pages: this is
 161 +because (1) a self-deadlock could occur and (2) the upper Unionfs pages are
 162 +considered more authoritative anyway, as they are newer and will overwrite
 163 +any lower pages.
 164 +
 165 +Unionfs maintains the following important invariant regarding mtime's,
 166 +ctime's, and atime's: the upper inode object's times are the max() of all of
 167 +the lower ones.  For non-directory objects, there's only one object below,
 168 +so the mapping is simple; for directory objects, there could me multiple
 169 +lower objects and we have to sync up with the newest one of all the lower
 170 +ones.  This invariant is important to maintain, especially for directories
 171 +(besides, we need this to be POSIX compliant).  A union could comprise
 172 +multiple writable branches, each of which could change.  If we don't reflect
 173 +the newest possible mtime/ctime, some applications could fail.  For example,
 174 +NFSv2/v3 exports check for newer directory mtimes on the server to determine
 175 +if the client-side attribute cache should be purged.
 176 +
 177 +To maintain these important invariants, of course, Unionfs carefully
 178 +synchronizes upper and lower times in various places.  For example, if we
 179 +copy-up a file to a top-level branch, the parent directory where the file
 180 +was copied up to will now have a new mtime: so after a successful copy-up,
 181 +we sync up with the new top-level branch's parent directory mtime.
 182 +
 183 +Implementation:
 184 +
 185 +This cache-coherency implementation is efficient because it defers any
 186 +synchronizing between the upper and lower layers until absolutely needed.
 187 +Consider the example a common situation where users perform a lot of lower
 188 +changes, such as untarring a whole package.  While these take place,
 189 +typically the user doesn't access the files via Unionfs; only after the
 190 +lower changes are done, does the user try to access the lower files.  With
 191 +our cache-coherency implementation, the entirety of the changes to the lower
 192 +branches will not result in a single CPU cycle spent at the Unionfs level
 193 +until the user invokes a system call that goes through Unionfs.
 194 +
 195 +We have considered two alternate cache-coherency designs.  (1) Using the
 196 +dentry/inode notify functionality to register interest in finding out about
 197 +any lower changes.  This is a somewhat limited and also a heavy-handed
 198 +approach which could result in many notifications to the Unionfs layer upon
 199 +each small change at the lower layer (imagine a file being modified multiple
 200 +times in rapid succession).  (2) Rewriting the VFS to support explicit
 201 +callbacks from lower objects to upper objects.  We began exploring such an
 202 +implementation, but found it to be very complicated--it would have resulted
 203 +in massive VFS/MM changes which are unlikely to be accepted by the LKML
 204 +community.  We therefore believe that our current cache-coherency design and
 205 +implementation represent the best approach at this time.
 206 +
 207 +Limitations:
 208 +
 209 +Our implementation works in that as long as a user process will have caused
 210 +Unionfs to be called, directly or indirectly, even to just do
 211 +->d_revalidate; then we will have purged the current Unionfs data and the
 212 +process will see the new data.  For example, a process that continually
 213 +re-reads the same file's data will see the NEW data as soon as the lower
 214 +file had changed, upon the next read(2) syscall (even if the file is still
 215 +open!)  However, this doesn't work when the process re-reads the open file's
 216 +data via mmap(2) (unless the user unmaps/closes the file and remaps/reopens
 217 +it).  Once we respond to ->readpage(s), then the kernel maps the page into
 218 +the process's address space and there doesn't appear to be a way to force
 219 +the kernel to invalidate those pages/mappings, and force the process to
 220 +re-issue ->readpage.  If there's a way to invalidate active mappings and
 221 +force a ->readpage, let us know please (invalidate_inode_pages2 doesn't do
 222 +the trick).
 223 +
 224 +Our current Unionfs code has to perform many file-revalidation calls.  It
 225 +would be really nice if the VFS would export an optional file system hook
 226 +->file_revalidate (similarly to dentry->d_revalidate) that will be called
 227 +before each VFS op that has a "struct file" in it.
 228 +
 229 +Certain file systems have micro-second granularity (or better) for inode
 230 +times, and asynchronous actions could cause those times to change with some
 231 +small delay.  In such cases, Unionfs may see a changed inode time that only
 232 +differs by a tiny fraction of a second: such a change may be a false
 233 +positive indication that the lower object has changed, whereas if unionfs
 234 +waits a little longer, that false indication will not be seen.  (These false
 235 +positives are harmless, because they would at most cause unionfs to
 236 +re-validate an object that may need no revalidation, and print a debugging
 237 +message that clutters the console/logs.)  Therefore, to minimize the chances
 238 +of these situations, we delay the detection of changed times by a small
 239 +factor of a few seconds, called UNIONFS_MIN_CC_TIME (which defaults to 3
 240 +seconds, as does NFS).  This means that we will detect the change, only a
 241 +couple of seconds later, if indeed the time change persists in the lower
 242 +file object.  This delayed detection has an added performance benefit: we
 243 +reduce the number of times that unionfs has to revalidate objects, in case
 244 +there's a lot of concurrent activity on both the upper and lower objects,
 245 +for the same file(s).  Lastly, this delayed time attribute detection is
 246 +similar to how NFS clients operate (e.g., acregmin).
 247 +
 248 +Finally, there is no way currently in Linux to prevent lower directories
 249 +from being moved around (i.e., topology changes); there's no way to prevent
 250 +modifications to directory sub-trees of whole file systems which are mounted
 251 +read-write.  It is therefore possible for in-flight operations in unionfs to
 252 +take place, while a lower directory is being moved around.  Therefore, if
 253 +you try to, say, create a new file in a directory through unionfs, while the
 254 +directory is being moved around directly, then the new file may get created
 255 +in the new location where that directory was moved to.  This is a somewhat
 256 +similar behaviour in NFS: an NFS client could be creating a new file while
 257 +th NFS server is moving th directory around; the file will get successfully
 258 +created in the new location.  (The one exception in unionfs is that if the
 259 +branch is marked read-only by unionfs, then a copyup will take place.)
 260 +
 261 +For more information, see <http://unionfs.filesystems.org/>.
 262 diff --git a/Documentation/filesystems/unionfs/issues.txt b/Documentation/filesystems/unionfs/issues.txt
 263 new file mode 100644
 264 index 0000000..f4b7e7e
 265 --- /dev/null
 266 +++ b/Documentation/filesystems/unionfs/issues.txt
 267 @@ -0,0 +1,28 @@
 268 +KNOWN Unionfs 2.x ISSUES:
 269 +=========================
 270 +
 271 +1. Unionfs should not use lookup_one_len() on the underlying f/s as it
 272 +   confuses NFSv4.  Currently, unionfs_lookup() passes lookup intents to the
 273 +   lower file-system, this eliminates part of the problem.  The remaining
 274 +   calls to lookup_one_len may need to be changed to pass an intent.  We are
 275 +   currently introducing VFS changes to fs/namei.c's do_path_lookup() to
 276 +   allow proper file lookup and opening in stackable file systems.
 277 +
 278 +2. Lockdep (a debugging feature) isn't aware of stacking, and so it
 279 +   incorrectly complains about locking problems.  The problem boils down to
 280 +   this: Lockdep considers all objects of a certain type to be in the same
 281 +   class, for example, all inodes.  Lockdep doesn't like to see a lock held
 282 +   on two inodes within the same task, and warns that it could lead to a
 283 +   deadlock.  However, stackable file systems do precisely that: they lock
 284 +   an upper object, and then a lower object, in a strict order to avoid
 285 +   locking problems; in addition, Unionfs, as a fan-out file system, may
 286 +   have to lock several lower inodes.  We are currently looking into Lockdep
 287 +   to see how to make it aware of stackable file systems.  For now, we
 288 +   temporarily disable lockdep when calling vfs methods on lower objects,
 289 +   but only for those places where lockdep complained.  While this solution
 290 +   may seem unclean, it is not without precedent: other places in the kernel
 291 +   also do similar temporary disabling, of course after carefully having
 292 +   checked that it is the right thing to do.  Anyway, you get any warnings
 293 +   from Lockdep, please report them to the Unionfs maintainers.
 294 +
 295 +For more information, see <http://unionfs.filesystems.org/>.
 296 diff --git a/Documentation/filesystems/unionfs/rename.txt b/Documentation/filesystems/unionfs/rename.txt
 297 new file mode 100644
 298 index 0000000..e20bb82
 299 --- /dev/null
 300 +++ b/Documentation/filesystems/unionfs/rename.txt
 301 @@ -0,0 +1,31 @@
 302 +Rename is a complex beast. The following table shows which rename(2) operations
 303 +should succeed and which should fail.
 304 +
 305 +o: success
 306 +E: error (either unionfs or vfs)
 307 +X: EXDEV
 308 +
 309 +none = file does not exist
 310 +file = file is a file
 311 +dir  = file is a empty directory
 312 +child= file is a non-empty directory
 313 +wh   = file is a directory containing only whiteouts; this makes it logically
 314 +               empty
 315 +
 316 +                      none    file    dir     child   wh
 317 +file                  o       o       E       E       E
 318 +dir                   o       E       o       E       o
 319 +child                 X       E       X       E       X
 320 +wh                    o       E       o       E       o
 321 +
 322 +
 323 +Renaming directories:
 324 +=====================
 325 +
 326 +Whenever a empty (either physically or logically) directory is being renamed,
 327 +the following sequence of events should take place:
 328 +
 329 +1) Remove whiteouts from both source and destination directory
 330 +2) Rename source to destination
 331 +3) Make destination opaque to prevent anything under it from showing up
 332 +
 333 diff --git a/Documentation/filesystems/unionfs/usage.txt b/Documentation/filesystems/unionfs/usage.txt
 334 new file mode 100644
 335 index 0000000..1adde69
 336 --- /dev/null
 337 +++ b/Documentation/filesystems/unionfs/usage.txt
 338 @@ -0,0 +1,134 @@
 339 +Unionfs is a stackable unification file system, which can appear to merge
 340 +the contents of several directories (branches), while keeping their physical
 341 +content separate.  Unionfs is useful for unified source tree management,
 342 +merged contents of split CD-ROM, merged separate software package
 343 +directories, data grids, and more.  Unionfs allows any mix of read-only and
 344 +read-write branches, as well as insertion and deletion of branches anywhere
 345 +in the fan-out.  To maintain Unix semantics, Unionfs handles elimination of
 346 +duplicates, partial-error conditions, and more.
 347 +
 348 +GENERAL SYNTAX
 349 +==============
 350 +
 351 +# mount -t unionfs -o <OPTIONS>,<BRANCH-OPTIONS> none MOUNTPOINT
 352 +
 353 +OPTIONS can be any legal combination of:
 354 +
 355 +- ro           # mount file system read-only
 356 +- rw           # mount file system read-write
 357 +- remount      # remount the file system (see Branch Management below)
 358 +- incgen       # increment generation no. (see Cache Consistency below)
 359 +
 360 +BRANCH-OPTIONS can be either (1) a list of branches given to the "dirs="
 361 +option, or (2) a list of individual branch manipulation commands, combined
 362 +with the "remount" option, and is further described in the "Branch
 363 +Management" section below.
 364 +
 365 +The syntax for the "dirs=" mount option is:
 366 +
 367 +       dirs=branch[=ro|=rw][:...]
 368 +
 369 +The "dirs=" option takes a colon-delimited list of directories to compose
 370 +the union, with an optional branch mode for each of those directories.
 371 +Directories that come earlier (specified first, on the left) in the list
 372 +have a higher precedence than those which come later.  Additionally,
 373 +read-only or read-write permissions of the branch can be specified by
 374 +appending =ro or =rw (default) to each directory.  See the Copyup section in
 375 +concepts.txt, for a description of Unionfs's behavior when mixing read-only
 376 +and read-write branches and mounts.
 377 +
 378 +Syntax:
 379 +
 380 +       dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw]
 381 +
 382 +Example:
 383 +
 384 +       dirs=/writable_branch=rw:/read-only_branch=ro
 385 +
 386 +
 387 +BRANCH MANAGEMENT
 388 +=================
 389 +
 390 +Once you mount your union for the first time, using the "dirs=" option, you
 391 +can then change the union's overall mode or reconfigure the branches, using
 392 +the remount option, as follows.
 393 +
 394 +To downgrade a union from read-write to read-only:
 395 +
 396 +# mount -t unionfs -o remount,ro none MOUNTPOINT
 397 +
 398 +To upgrade a union from read-only to read-write:
 399 +
 400 +# mount -t unionfs -o remount,rw none MOUNTPOINT
 401 +
 402 +To delete a branch /foo, regardless where it is in the current union:
 403 +
 404 +# mount -t unionfs -o remount,del=/foo none MOUNTPOINT
 405 +
 406 +To insert (add) a branch /foo before /bar:
 407 +
 408 +# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT
 409 +
 410 +To insert (add) a branch /foo (with the "rw" mode flag) before /bar:
 411 +
 412 +# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT
 413 +
 414 +To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a
 415 +new highest-priority branch), you can use the above syntax, or use a short
 416 +hand version as follows:
 417 +
 418 +# mount -t unionfs -o remount,add=/foo none MOUNTPOINT
 419 +
 420 +To append a branch to the very end (new lowest-priority branch):
 421 +
 422 +# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT
 423 +
 424 +To append a branch to the very end (new lowest-priority branch), in
 425 +read-only mode:
 426 +
 427 +# mount -t unionfs -o remount,add=:/foo=ro none MOUNTPOINT
 428 +
 429 +Finally, to change the mode of one existing branch, say /foo, from read-only
 430 +to read-write, and change /bar from read-write to read-only:
 431 +
 432 +# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT
 433 +
 434 +Note: in Unionfs 2.x, you cannot set the leftmost branch to readonly because
 435 +then Unionfs won't have any writable place for copyups to take place.
 436 +Moreover, the VFS can get confused when it tries to modify something in a
 437 +file system mounted read-write, but isn't permitted to write to it.
 438 +Instead, you should set the whole union as readonly, as described above.
 439 +If, however, you must set the leftmost branch as readonly, perhaps so you
 440 +can get a snapshot of it at a point in time, then you should insert a new
 441 +writable top-level branch, and mark the one you want as readonly.  This can
 442 +be accomplished as follows, assuming that /foo is your current leftmost
 443 +branch:
 444 +
 445 +# mount -t tmpfs -o size=NNN /new
 446 +# mount -t unionfs -o remount,add=/new,mode=/foo=ro none MOUNTPOINT
 447 +<do what you want safely in /foo>
 448 +# mount -t unionfs -o remount,del=/new,mode=/foo=rw none MOUNTPOINT
 449 +<check if there's anything in /new you want to preserve>
 450 +# umount /new
 451 +
 452 +CACHE CONSISTENCY
 453 +=================
 454 +
 455 +If you modify any file on any of the lower branches directly, while there is
 456 +a Unionfs 2.x mounted above any of those branches, you should tell Unionfs
 457 +to purge its caches and re-get the objects.  To do that, you have to
 458 +increment the generation number of the superblock using the following
 459 +command:
 460 +
 461 +# mount -t unionfs -o remount,incgen none MOUNTPOINT
 462 +
 463 +Note that the older way of incrementing the generation number using an
 464 +ioctl, is no longer supported in Unionfs 2.0 and newer.  Ioctls in general
 465 +are not encouraged.  Plus, an ioctl is per-file concept, whereas the
 466 +generation number is a per-file-system concept.  Worse, such an ioctl
 467 +requires an open file, which then has to be invalidated by the very nature
 468 +of the generation number increase (read: the old generation increase ioctl
 469 +was pretty racy).
 470 +
 471 +
 472 +For more information, see <http://unionfs.filesystems.org/>.
 473 diff --git a/MAINTAINERS b/MAINTAINERS
 474 index 1d2edb4..e58f41f 100644
 475 --- a/MAINTAINERS
 476 +++ b/MAINTAINERS
 477 @@ -3923,6 +3923,15 @@ L:       linux-kernel@vger.kernel.org
 478  W:     http://www.kernel.dk
 479  S:     Maintained
 480
 481 +UNIONFS
 482 +P:     Erez Zadok
 483 +M:     ezk@cs.sunysb.edu
 484 +P:     Josef "Jeff" Sipek
 485 +M:     jsipek@cs.sunysb.edu
 486 +L:     unionfs@filesystems.org
 487 +W:     http://unionfs.filesystems.org
 488 +S:     Maintained
 489 +
 490  USB ACM DRIVER
 491  P:     Oliver Neukum
 492  M:     oliver@neukum.name
 493 diff --git a/fs/Kconfig b/fs/Kconfig
 494 index d731282..9adebb3 100644
 495 --- a/fs/Kconfig
 496 +++ b/fs/Kconfig
 497 @@ -1003,6 +1003,47 @@ config CONFIGFS_FS
 498
 499  endmenu
 500
 501 +menu "Layered filesystems"
 502 +
 503 +config ECRYPT_FS
 504 +       tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
 505 +       depends on EXPERIMENTAL && KEYS && CRYPTO && NET
 506 +       help
 507 +         Encrypted filesystem that operates on the VFS layer.  See
 508 +         <file:Documentation/filesystems/ecryptfs.txt> to learn more about
 509 +         eCryptfs.  Userspace components are required and can be
 510 +         obtained from <http://ecryptfs.sf.net>.
 511 +
 512 +         To compile this file system support as a module, choose M here: the
 513 +         module will be called ecryptfs.
 514 +
 515 +config UNION_FS
 516 +       tristate "Union file system (EXPERIMENTAL)"
 517 +       depends on EXPERIMENTAL
 518 +       help
 519 +         Unionfs is a stackable unification file system, which appears to
 520 +         merge the contents of several directories (branches), while keeping
 521 +         their physical content separate.
 522 +
 523 +         See <http://unionfs.filesystems.org> for details
 524 +
 525 +config UNION_FS_XATTR
 526 +       bool "Unionfs extended attributes"
 527 +       depends on UNION_FS
 528 +       help
 529 +         Extended attributes are name:value pairs associated with inodes by
 530 +         the kernel or by users (see the attr(5) manual page).
 531 +
 532 +         If unsure, say N.
 533 +
 534 +config UNION_FS_DEBUG
 535 +       bool "Debug Unionfs"
 536 +       depends on UNION_FS
 537 +       help
 538 +         If you say Y here, you can turn on debugging output from Unionfs.
 539 +
 540 +endmenu
 541 +
 542  menu "Miscellaneous filesystems"
 543
 544  config ADFS_FS
 545 @@ -1055,18 +1096,6 @@ config AFFS_FS
 546           To compile this file system support as a module, choose M here: the
 547           module will be called affs.  If unsure, say N.
 548
 549 -config ECRYPT_FS
 550 -       tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
 551 -       depends on EXPERIMENTAL && KEYS && CRYPTO && NET
 552 -       help
 553 -         Encrypted filesystem that operates on the VFS layer.  See
 554 -         <file:Documentation/filesystems/ecryptfs.txt> to learn more about
 555 -         eCryptfs.  Userspace components are required and can be
 556 -         obtained from <http://ecryptfs.sf.net>.
 557 -
 558 -         To compile this file system support as a module, choose M here: the
 559 -         module will be called ecryptfs.
 560 -
 561  config HFS_FS
 562         tristate "Apple Macintosh file system support (EXPERIMENTAL)"
 563         depends on BLOCK && EXPERIMENTAL
 564 diff --git a/fs/Makefile b/fs/Makefile
 565 index 1e7a11b..22ebb61 100644
 566 --- a/fs/Makefile
 567 +++ b/fs/Makefile
 568 @@ -119,3 +119,4 @@ obj-$(CONFIG_HPPFS)         += hppfs/
 569  obj-$(CONFIG_DEBUG_FS)         += debugfs/
 570  obj-$(CONFIG_OCFS2_FS)         += ocfs2/
 571  obj-$(CONFIG_GFS2_FS)           += gfs2/
 572 +obj-$(CONFIG_UNION_FS)         += unionfs/
 573 diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
 574 index 841a032..e5c4343 100644
 575 --- a/fs/ecryptfs/dentry.c
 576 +++ b/fs/ecryptfs/dentry.c
 577 @@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 578                 struct inode *lower_inode =
 579                         ecryptfs_inode_to_lower(dentry->d_inode);
 580
 581 -               fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
 582 +               fsstack_copy_attr_all(dentry->d_inode, lower_inode);
 583         }
 584  out:
 585         return rc;
 586 diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
 587 index e238611..687819d 100644
 588 --- a/fs/ecryptfs/inode.c
 589 +++ b/fs/ecryptfs/inode.c
 590 @@ -575,9 +575,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 591                         lower_new_dir_dentry->d_inode, lower_new_dentry);
 592         if (rc)
 593                 goto out_lock;
 594 -       fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
 595 +       fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
 596         if (new_dir != old_dir)
 597 -               fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
 598 +               fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
 599  out_lock:
 600         unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
 601         dput(lower_new_dentry->d_parent);
 602 @@ -910,7 +910,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 603
 604         rc = notify_change(lower_dentry, ia);
 605  out:
 606 -       fsstack_copy_attr_all(inode, lower_inode, NULL);
 607 +       fsstack_copy_attr_all(inode, lower_inode);
 608         return rc;
 609  }
 610
 611 diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
 612 index d25ac95..7eac062 100644
 613 --- a/fs/ecryptfs/main.c
 614 +++ b/fs/ecryptfs/main.c
 615 @@ -211,7 +211,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 616                 d_add(dentry, inode);
 617         else
 618                 d_instantiate(dentry, inode);
 619 -       fsstack_copy_attr_all(inode, lower_inode, NULL);
 620 +       fsstack_copy_attr_all(inode, lower_inode);
 621         /* This size will be overwritten for real files w/ headers and
 622          * other metadata */
 623         fsstack_copy_inode_size(inode, lower_inode);
 624 diff --git a/fs/namei.c b/fs/namei.c
 625 index 941c8e8..60762d8 100644
 626 --- a/fs/namei.c
 627 +++ b/fs/namei.c
 628 @@ -399,6 +399,7 @@ void release_open_intent(struct nameidata *nd)
 629         else
 630                 fput(nd->intent.open.file);
 631  }
 632 +EXPORT_SYMBOL(release_open_intent);
 633
 634  static inline struct dentry *
 635  do_revalidate(struct dentry *dentry, struct nameidata *nd)
 636 diff --git a/fs/stack.c b/fs/stack.c
 637 index 67716f6..4336f2b 100644
 638 --- a/fs/stack.c
 639 +++ b/fs/stack.c
 640 @@ -1,24 +1,42 @@
 641 +/*
 642 + * Copyright (c) 2006-2007 Erez Zadok
 643 + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
 644 + * Copyright (c) 2006-2007 Stony Brook University
 645 + * Copyright (c) 2006-2007 The Research Foundation of SUNY
 646 + *
 647 + * This program is free software; you can redistribute it and/or modify
 648 + * it under the terms of the GNU General Public License version 2 as
 649 + * published by the Free Software Foundation.
 650 + */
 651 +
 652  #include <linux/module.h>
 653  #include <linux/fs.h>
 654  #include <linux/fs_stack.h>
 655
 656 -/* does _NOT_ require i_mutex to be held.
 657 +/*
 658 + * does _NOT_ require i_mutex to be held.
 659   *
 660   * This function cannot be inlined since i_size_{read,write} is rather
 661   * heavy-weight on 32-bit systems
 662   */
 663  void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
 664  {
 665 -       i_size_write(dst, i_size_read((struct inode *)src));
 666 +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
 667 +       spin_lock(&dst->i_lock);
 668 +#endif
 669 +       i_size_write(dst, i_size_read(src));
 670         dst->i_blocks = src->i_blocks;
 671 +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
 672 +       spin_unlock(&dst->i_lock);
 673 +#endif
 674  }
 675  EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
 676
 677 -/* copy all attributes; get_nlinks is optional way to override the i_nlink
 678 +/*
 679 + * copy all attributes; get_nlinks is optional way to override the i_nlink
 680   * copying
 681   */
 682 -void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
 683 -                               int (*get_nlinks)(struct inode *))
 684 +void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
 685  {
 686         dest->i_mode = src->i_mode;
 687         dest->i_uid = src->i_uid;
 688 @@ -29,14 +47,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
 689         dest->i_ctime = src->i_ctime;
 690         dest->i_blkbits = src->i_blkbits;
 691         dest->i_flags = src->i_flags;
 692 -
 693 -       /*
 694 -        * Update the nlinks AFTER updating the above fields, because the
 695 -        * get_links callback may depend on them.
 696 -        */
 697 -       if (!get_nlinks)
 698 -               dest->i_nlink = src->i_nlink;
 699 -       else
 700 -               dest->i_nlink = (*get_nlinks)(dest);
 701 +       dest->i_nlink = src->i_nlink;
 702  }
 703  EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
 704 diff --git a/fs/unionfs/Makefile b/fs/unionfs/Makefile
 705 new file mode 100644
 706 index 0000000..917bc7f
 707 --- /dev/null
 708 +++ b/fs/unionfs/Makefile
 709 @@ -0,0 +1,17 @@
 710 +UNIONFS_VERSION="2.2.4 (for 2.6.25-rc2)"
 711 +
 712 +EXTRA_CFLAGS += -DUNIONFS_VERSION=\"$(UNIONFS_VERSION)\"
 713 +
 714 +obj-$(CONFIG_UNION_FS) += unionfs.o
 715 +
 716 +unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
 717 +       rdstate.o copyup.o dirhelper.o rename.o unlink.o \
 718 +       lookup.o commonfops.o dirfops.o sioq.o mmap.o
 719 +
 720 +unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
 721 +
 722 +unionfs-$(CONFIG_UNION_FS_DEBUG) += debug.o
 723 +
 724 +ifeq ($(CONFIG_UNION_FS_DEBUG),y)
 725 +EXTRA_CFLAGS += -DDEBUG
 726 +endif
 727 diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
 728 new file mode 100644
 729 index 0000000..2add167
 730 --- /dev/null
 731 +++ b/fs/unionfs/commonfops.c
 732 @@ -0,0 +1,849 @@
 733 +/*
 734 + * Copyright (c) 2003-2007 Erez Zadok
 735 + * Copyright (c) 2003-2006 Charles P. Wright
 736 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
 737 + * Copyright (c) 2005-2006 Junjiro Okajima
 738 + * Copyright (c) 2005      Arun M. Krishnakumar
 739 + * Copyright (c) 2004-2006 David P. Quigley
 740 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
 741 + * Copyright (c) 2003      Puja Gupta
 742 + * Copyright (c) 2003      Harikesavan Krishnan
 743 + * Copyright (c) 2003-2007 Stony Brook University
 744 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
 745 + *
 746 + * This program is free software; you can redistribute it and/or modify
 747 + * it under the terms of the GNU General Public License version 2 as
 748 + * published by the Free Software Foundation.
 749 + */
 750 +
 751 +#include "union.h"
 752 +
 753 +/*
 754 + * 1) Copyup the file
 755 + * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously
 756 + * stolen from NFS's silly rename
 757 + */
 758 +static int copyup_deleted_file(struct file *file, struct dentry *dentry,
 759 +                              int bstart, int bindex)
 760 +{
 761 +       static unsigned int counter;
 762 +       const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2;
 763 +       const int countersize = sizeof(counter) * 2;
 764 +       const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1;
 765 +       char name[nlen + 1];
 766 +       int err;
 767 +       struct dentry *tmp_dentry = NULL;
 768 +       struct dentry *lower_dentry;
 769 +       struct dentry *lower_dir_dentry = NULL;
 770 +
 771 +       lower_dentry = unionfs_lower_dentry_idx(dentry, bstart);
 772 +
 773 +       sprintf(name, ".unionfs%*.*lx",
 774 +               i_inosize, i_inosize, lower_dentry->d_inode->i_ino);
 775 +
 776 +       /*
 777 +        * Loop, looking for an unused temp name to copyup to.
 778 +        *
 779 +        * It's somewhat silly that we look for a free temp tmp name in the
 780 +        * source branch (bstart) instead of the dest branch (bindex), where
 781 +        * the final name will be created.  We _will_ catch it if somehow
 782 +        * the name exists in the dest branch, but it'd be nice to catch it
 783 +        * sooner than later.
 784 +        */
 785 +retry:
 786 +       tmp_dentry = NULL;
 787 +       do {
 788 +               char *suffix = name + nlen - countersize;
 789 +
 790 +               dput(tmp_dentry);
 791 +               counter++;
 792 +               sprintf(suffix, "%*.*x", countersize, countersize, counter);
 793 +
 794 +               pr_debug("unionfs: trying to rename %s to %s\n",
 795 +                        dentry->d_name.name, name);
 796 +
 797 +               tmp_dentry = lookup_one_len(name, lower_dentry->d_parent,
 798 +                                           nlen);
 799 +               if (IS_ERR(tmp_dentry)) {
 800 +                       err = PTR_ERR(tmp_dentry);
 801 +                       goto out;
 802 +               }
 803 +       } while (tmp_dentry->d_inode != NULL);  /* need negative dentry */
 804 +       dput(tmp_dentry);
 805 +
 806 +       err = copyup_named_file(dentry->d_parent->d_inode, file, name, bstart,
 807 +                               bindex,
 808 +                               i_size_read(file->f_path.dentry->d_inode));
 809 +       if (err) {
 810 +               if (unlikely(err == -EEXIST))
 811 +                       goto retry;
 812 +               goto out;
 813 +       }
 814 +
 815 +       /* bring it to the same state as an unlinked file */
 816 +       lower_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry));
 817 +       if (!unionfs_lower_inode_idx(dentry->d_inode, bindex)) {
 818 +               atomic_inc(&lower_dentry->d_inode->i_count);
 819 +               unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
 820 +                                           lower_dentry->d_inode);
 821 +       }
 822 +       lower_dir_dentry = lock_parent(lower_dentry);
 823 +       err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
 824 +       unlock_dir(lower_dir_dentry);
 825 +
 826 +out:
 827 +       if (!err)
 828 +               unionfs_check_dentry(dentry);
 829 +       return err;
 830 +}
 831 +
 832 +/*
 833 + * put all references held by upper struct file and free lower file pointer
 834 + * array
 835 + */
 836 +static void cleanup_file(struct file *file)
 837 +{
 838 +       int bindex, bstart, bend;
 839 +       struct file **lower_files;
 840 +       struct file *lower_file;
 841 +       struct super_block *sb = file->f_path.dentry->d_sb;
 842 +
 843 +       lower_files = UNIONFS_F(file)->lower_files;
 844 +       bstart = fbstart(file);
 845 +       bend = fbend(file);
 846 +
 847 +       for (bindex = bstart; bindex <= bend; bindex++) {
 848 +               int i;  /* holds (possibly) updated branch index */
 849 +               int old_bid;
 850 +
 851 +               lower_file = unionfs_lower_file_idx(file, bindex);
 852 +               if (!lower_file)
 853 +                       continue;
 854 +
 855 +               /*
 856 +                * Find new index of matching branch with an open
 857 +                * file, since branches could have been added or
 858 +                * deleted causing the one with open files to shift.
 859 +                */
 860 +               old_bid = UNIONFS_F(file)->saved_branch_ids[bindex];
 861 +               i = branch_id_to_idx(sb, old_bid);
 862 +               if (unlikely(i < 0)) {
 863 +                       printk(KERN_ERR "unionfs: no superblock for "
 864 +                              "file %p\n", file);
 865 +                       continue;
 866 +               }
 867 +
 868 +               /* decrement count of open files */
 869 +               branchput(sb, i);
 870 +               /*
 871 +                * fput will perform an mntput for us on the correct branch.
 872 +                * Although we're using the file's old branch configuration,
 873 +                * bindex, which is the old index, correctly points to the
 874 +                * right branch in the file's branch list.  In other words,
 875 +                * we're going to mntput the correct branch even if branches
 876 +                * have been added/removed.
 877 +                */
 878 +               fput(lower_file);
 879 +               UNIONFS_F(file)->lower_files[bindex] = NULL;
 880 +               UNIONFS_F(file)->saved_branch_ids[bindex] = -1;
 881 +       }
 882 +
 883 +       UNIONFS_F(file)->lower_files = NULL;
 884 +       kfree(lower_files);
 885 +       kfree(UNIONFS_F(file)->saved_branch_ids);
 886 +       /* set to NULL because caller needs to know if to kfree on error */
 887 +       UNIONFS_F(file)->saved_branch_ids = NULL;
 888 +}
 889 +
 890 +/* open all lower files for a given file */
 891 +static int open_all_files(struct file *file)
 892 +{
 893 +       int bindex, bstart, bend, err = 0;
 894 +       struct file *lower_file;
 895 +       struct dentry *lower_dentry;
 896 +       struct dentry *dentry = file->f_path.dentry;
 897 +       struct super_block *sb = dentry->d_sb;
 898 +
 899 +       bstart = dbstart(dentry);
 900 +       bend = dbend(dentry);
 901 +
 902 +       for (bindex = bstart; bindex <= bend; bindex++) {
 903 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
 904 +               if (!lower_dentry)
 905 +                       continue;
 906 +
 907 +               dget(lower_dentry);
 908 +               unionfs_mntget(dentry, bindex);
 909 +               branchget(sb, bindex);
 910 +
 911 +               lower_file =
 912 +                       dentry_open(lower_dentry,
 913 +                                   unionfs_lower_mnt_idx(dentry, bindex),
 914 +                                   file->f_flags);
 915 +               if (IS_ERR(lower_file)) {
 916 +                       err = PTR_ERR(lower_file);
 917 +                       goto out;
 918 +               } else {
 919 +                       unionfs_set_lower_file_idx(file, bindex, lower_file);
 920 +               }
 921 +       }
 922 +out:
 923 +       return err;
 924 +}
 925 +
 926 +/* open the highest priority file for a given upper file */
 927 +static int open_highest_file(struct file *file, bool willwrite)
 928 +{
 929 +       int bindex, bstart, bend, err = 0;
 930 +       struct file *lower_file;
 931 +       struct dentry *lower_dentry;
 932 +       struct dentry *dentry = file->f_path.dentry;
 933 +       struct inode *parent_inode = dentry->d_parent->d_inode;
 934 +       struct super_block *sb = dentry->d_sb;
 935 +
 936 +       bstart = dbstart(dentry);
 937 +       bend = dbend(dentry);
 938 +
 939 +       lower_dentry = unionfs_lower_dentry(dentry);
 940 +       if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) {
 941 +               for (bindex = bstart - 1; bindex >= 0; bindex--) {
 942 +                       err = copyup_file(parent_inode, file, bstart, bindex,
 943 +                                         i_size_read(dentry->d_inode));
 944 +                       if (!err)
 945 +                               break;
 946 +               }
 947 +               atomic_set(&UNIONFS_F(file)->generation,
 948 +                          atomic_read(&UNIONFS_I(dentry->d_inode)->
 949 +                                      generation));
 950 +               goto out;
 951 +       }
 952 +
 953 +       dget(lower_dentry);
 954 +       unionfs_mntget(dentry, bstart);
 955 +       lower_file = dentry_open(lower_dentry,
 956 +                                unionfs_lower_mnt_idx(dentry, bstart),
 957 +                                file->f_flags);
 958 +       if (IS_ERR(lower_file)) {
 959 +               err = PTR_ERR(lower_file);
 960 +               goto out;
 961 +       }
 962 +       branchget(sb, bstart);
 963 +       unionfs_set_lower_file(file, lower_file);
 964 +       /* Fix up the position. */
 965 +       lower_file->f_pos = file->f_pos;
 966 +
 967 +       memcpy(&lower_file->f_ra, &file->f_ra, sizeof(struct file_ra_state));
 968 +out:
 969 +       return err;
 970 +}
 971 +
 972 +/* perform a delayed copyup of a read-write file on a read-only branch */
 973 +static int do_delayed_copyup(struct file *file)
 974 +{
 975 +       int bindex, bstart, bend, err = 0;
 976 +       struct dentry *dentry = file->f_path.dentry;
 977 +       struct inode *parent_inode = dentry->d_parent->d_inode;
 978 +
 979 +       bstart = fbstart(file);
 980 +       bend = fbend(file);
 981 +
 982 +       BUG_ON(!S_ISREG(dentry->d_inode->i_mode));
 983 +
 984 +       unionfs_check_file(file);
 985 +       unionfs_check_dentry(dentry);
 986 +       for (bindex = bstart - 1; bindex >= 0; bindex--) {
 987 +               if (!d_deleted(dentry))
 988 +                       err = copyup_file(parent_inode, file, bstart,
 989 +                                         bindex,
 990 +                                         i_size_read(dentry->d_inode));
 991 +               else
 992 +                       err = copyup_deleted_file(file, dentry, bstart,
 993 +                                                 bindex);
 994 +
 995 +               if (!err)
 996 +                       break;
 997 +       }
 998 +       if (err || (bstart <= fbstart(file)))
 999 +               goto out;
1000 +       bend = fbend(file);
1001 +       for (bindex = bstart; bindex <= bend; bindex++) {
1002 +               if (unionfs_lower_file_idx(file, bindex)) {
1003 +                       branchput(dentry->d_sb, bindex);
1004 +                       fput(unionfs_lower_file_idx(file, bindex));
1005 +                       unionfs_set_lower_file_idx(file, bindex, NULL);
1006 +               }
1007 +               if (unionfs_lower_mnt_idx(dentry, bindex)) {
1008 +                       unionfs_mntput(dentry, bindex);
1009 +                       unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
1010 +               }
1011 +               if (unionfs_lower_dentry_idx(dentry, bindex)) {
1012 +                       BUG_ON(!dentry->d_inode);
1013 +                       iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1014 +                       unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1015 +                                                   NULL);
1016 +                       dput(unionfs_lower_dentry_idx(dentry, bindex));
1017 +                       unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1018 +               }
1019 +       }
1020 +       /* for reg file, we only open it "once" */
1021 +       fbend(file) = fbstart(file);
1022 +       set_dbend(dentry, dbstart(dentry));
1023 +       ibend(dentry->d_inode) = ibstart(dentry->d_inode);
1024 +
1025 +out:
1026 +       unionfs_check_file(file);
1027 +       unionfs_check_dentry(dentry);
1028 +       return err;
1029 +}
1030 +
1031 +/*
1032 + * Revalidate the struct file
1033 + * @file: file to revalidate
1034 + * @willwrite: true if caller may cause changes to the file; false otherwise.
1035 + * Caller must lock/unlock dentry's branch configuration.
1036 + */
1037 +int unionfs_file_revalidate_locked(struct file *file, bool willwrite)
1038 +{
1039 +       struct super_block *sb;
1040 +       struct dentry *dentry;
1041 +       int sbgen, fgen, dgen;
1042 +       int bstart, bend;
1043 +       int size;
1044 +       int err = 0;
1045 +
1046 +       dentry = file->f_path.dentry;
1047 +       sb = dentry->d_sb;
1048 +
1049 +       /*
1050 +        * First revalidate the dentry inside struct file,
1051 +        * but not unhashed dentries.
1052 +        */
1053 +reval_dentry:
1054 +       if (unlikely(!d_deleted(dentry) &&
1055 +                    !__unionfs_d_revalidate_chain(dentry, NULL, willwrite))) {
1056 +               err = -ESTALE;
1057 +               goto out_nofree;
1058 +       }
1059 +
1060 +       sbgen = atomic_read(&UNIONFS_SB(sb)->generation);
1061 +       dgen = atomic_read(&UNIONFS_D(dentry)->generation);
1062 +       fgen = atomic_read(&UNIONFS_F(file)->generation);
1063 +
1064 +       if (unlikely(sbgen > dgen)) {
1065 +               pr_debug("unionfs: retry dentry revalidation\n");
1066 +               schedule();
1067 +               goto reval_dentry;
1068 +       }
1069 +       BUG_ON(sbgen > dgen);
1070 +
1071 +       /*
1072 +        * There are two cases we are interested in.  The first is if the
1073 +        * generation is lower than the super-block.  The second is if
1074 +        * someone has copied up this file from underneath us, we also need
1075 +        * to refresh things.
1076 +        */
1077 +       if (unlikely(!d_deleted(dentry) &&
1078 +                    (sbgen > fgen || dbstart(dentry) != fbstart(file)))) {
1079 +               /* save orig branch ID */
1080 +               int orig_brid =
1081 +                       UNIONFS_F(file)->saved_branch_ids[fbstart(file)];
1082 +
1083 +               /* First we throw out the existing files. */
1084 +               cleanup_file(file);
1085 +
1086 +               /* Now we reopen the file(s) as in unionfs_open. */
1087 +               bstart = fbstart(file) = dbstart(dentry);
1088 +               bend = fbend(file) = dbend(dentry);
1089 +
1090 +               size = sizeof(struct file *) * sbmax(sb);
1091 +               UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1092 +               if (unlikely(!UNIONFS_F(file)->lower_files)) {
1093 +                       err = -ENOMEM;
1094 +                       goto out;
1095 +               }
1096 +               size = sizeof(int) * sbmax(sb);
1097 +               UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1098 +               if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1099 +                       err = -ENOMEM;
1100 +                       goto out;
1101 +               }
1102 +
1103 +               if (S_ISDIR(dentry->d_inode->i_mode)) {
1104 +                       /* We need to open all the files. */
1105 +                       err = open_all_files(file);
1106 +                       if (err)
1107 +                               goto out;
1108 +               } else {
1109 +                       int new_brid;
1110 +                       /* We only open the highest priority branch. */
1111 +                       err = open_highest_file(file, willwrite);
1112 +                       if (err)
1113 +                               goto out;
1114 +                       new_brid = UNIONFS_F(file)->
1115 +                         saved_branch_ids[fbstart(file)];
1116 +                       if (unlikely(new_brid != orig_brid && sbgen > fgen)) {
1117 +                               /*
1118 +                                * If we re-opened the file on a different
1119 +                                * branch than the original one, and this
1120 +                                * was due to a new branch inserted, then
1121 +                                * update the mnt counts of the old and new
1122 +                                * branches accordingly.
1123 +                                */
1124 +                               unionfs_mntget(dentry, bstart);
1125 +                               unionfs_mntput(sb->s_root,
1126 +                                              branch_id_to_idx(sb, orig_brid));
1127 +                       }
1128 +               }
1129 +               atomic_set(&UNIONFS_F(file)->generation,
1130 +                          atomic_read(
1131 +                                  &UNIONFS_I(dentry->d_inode)->generation));
1132 +       }
1133 +
1134 +       /* Copyup on the first write to a file on a readonly branch. */
1135 +       if (willwrite && IS_WRITE_FLAG(file->f_flags) &&
1136 +           !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) &&
1137 +           is_robranch(dentry)) {
1138 +               pr_debug("unionfs: do delay copyup of \"%s\"\n",
1139 +                        dentry->d_name.name);
1140 +               err = do_delayed_copyup(file);
1141 +       }
1142 +
1143 +out:
1144 +       if (err) {
1145 +               kfree(UNIONFS_F(file)->lower_files);
1146 +               kfree(UNIONFS_F(file)->saved_branch_ids);
1147 +       }
1148 +out_nofree:
1149 +       if (!err)
1150 +               unionfs_check_file(file);
1151 +       return err;
1152 +}
1153 +
1154 +int unionfs_file_revalidate(struct file *file, bool willwrite)
1155 +{
1156 +       int err;
1157 +
1158 +       unionfs_lock_dentry(file->f_path.dentry, UNIONFS_DMUTEX_CHILD);
1159 +       err = unionfs_file_revalidate_locked(file, willwrite);
1160 +       unionfs_unlock_dentry(file->f_path.dentry);
1161 +
1162 +       return err;
1163 +}
1164 +
1165 +/* unionfs_open helper function: open a directory */
1166 +static int __open_dir(struct inode *inode, struct file *file)
1167 +{
1168 +       struct dentry *lower_dentry;
1169 +       struct file *lower_file;
1170 +       int bindex, bstart, bend;
1171 +       struct vfsmount *mnt;
1172 +
1173 +       bstart = fbstart(file) = dbstart(file->f_path.dentry);
1174 +       bend = fbend(file) = dbend(file->f_path.dentry);
1175 +
1176 +       for (bindex = bstart; bindex <= bend; bindex++) {
1177 +               lower_dentry =
1178 +                       unionfs_lower_dentry_idx(file->f_path.dentry, bindex);
1179 +               if (!lower_dentry)
1180 +                       continue;
1181 +
1182 +               dget(lower_dentry);
1183 +               unionfs_mntget(file->f_path.dentry, bindex);
1184 +               mnt = unionfs_lower_mnt_idx(file->f_path.dentry, bindex);
1185 +               lower_file = dentry_open(lower_dentry, mnt, file->f_flags);
1186 +               if (IS_ERR(lower_file))
1187 +                       return PTR_ERR(lower_file);
1188 +
1189 +               unionfs_set_lower_file_idx(file, bindex, lower_file);
1190 +
1191 +               /*
1192 +                * The branchget goes after the open, because otherwise
1193 +                * we would miss the reference on release.
1194 +                */
1195 +               branchget(inode->i_sb, bindex);
1196 +       }
1197 +
1198 +       return 0;
1199 +}
1200 +
1201 +/* unionfs_open helper function: open a file */
1202 +static int __open_file(struct inode *inode, struct file *file)
1203 +{
1204 +       struct dentry *lower_dentry;
1205 +       struct file *lower_file;
1206 +       int lower_flags;
1207 +       int bindex, bstart, bend;
1208 +
1209 +       lower_dentry = unionfs_lower_dentry(file->f_path.dentry);
1210 +       lower_flags = file->f_flags;
1211 +
1212 +       bstart = fbstart(file) = dbstart(file->f_path.dentry);
1213 +       bend = fbend(file) = dbend(file->f_path.dentry);
1214 +
1215 +       /*
1216 +        * check for the permission for lower file.  If the error is
1217 +        * COPYUP_ERR, copyup the file.
1218 +        */
1219 +       if (lower_dentry->d_inode && is_robranch(file->f_path.dentry)) {
1220 +               /*
1221 +                * if the open will change the file, copy it up otherwise
1222 +                * defer it.
1223 +                */
1224 +               if (lower_flags & O_TRUNC) {
1225 +                       int size = 0;
1226 +                       int err = -EROFS;
1227 +
1228 +                       /* copyup the file */
1229 +                       for (bindex = bstart - 1; bindex >= 0; bindex--) {
1230 +                               err = copyup_file(
1231 +                                       file->f_path.dentry->d_parent->d_inode,
1232 +                                       file, bstart, bindex, size);
1233 +                               if (!err)
1234 +                                       break;
1235 +                       }
1236 +                       return err;
1237 +               } else {
1238 +                       lower_flags &= ~(OPEN_WRITE_FLAGS);
1239 +               }
1240 +       }
1241 +
1242 +       dget(lower_dentry);
1243 +
1244 +       /*
1245 +        * dentry_open will decrement mnt refcnt if err.
1246 +        * otherwise fput() will do an mntput() for us upon file close.
1247 +        */
1248 +       unionfs_mntget(file->f_path.dentry, bstart);
1249 +       lower_file =
1250 +               dentry_open(lower_dentry,
1251 +                           unionfs_lower_mnt_idx(file->f_path.dentry, bstart),
1252 +                           lower_flags);
1253 +       if (IS_ERR(lower_file))
1254 +               return PTR_ERR(lower_file);
1255 +
1256 +       unionfs_set_lower_file(file, lower_file);
1257 +       branchget(inode->i_sb, bstart);
1258 +
1259 +       return 0;
1260 +}
1261 +
1262 +int unionfs_open(struct inode *inode, struct file *file)
1263 +{
1264 +       int err = 0;
1265 +       struct file *lower_file = NULL;
1266 +       struct dentry *dentry = file->f_path.dentry;
1267 +       int bindex = 0, bstart = 0, bend = 0;
1268 +       int size;
1269 +       int valid = 0;
1270 +
1271 +       unionfs_read_lock(inode->i_sb, UNIONFS_SMUTEX_PARENT);
1272 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1273 +       if (dentry != dentry->d_parent)
1274 +               unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT);
1275 +
1276 +       valid = __unionfs_d_revalidate_chain(dentry->d_parent, NULL, false);
1277 +       if (unlikely(!valid)) {
1278 +               err = -ESTALE;
1279 +               goto out_nofree;
1280 +       }
1281 +
1282 +       file->private_data =
1283 +               kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL);
1284 +       if (unlikely(!UNIONFS_F(file))) {
1285 +               err = -ENOMEM;
1286 +               goto out_nofree;
1287 +       }
1288 +       fbstart(file) = -1;
1289 +       fbend(file) = -1;
1290 +       atomic_set(&UNIONFS_F(file)->generation,
1291 +                  atomic_read(&UNIONFS_I(inode)->generation));
1292 +
1293 +       size = sizeof(struct file *) * sbmax(inode->i_sb);
1294 +       UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
1295 +       if (unlikely(!UNIONFS_F(file)->lower_files)) {
1296 +               err = -ENOMEM;
1297 +               goto out;
1298 +       }
1299 +       size = sizeof(int) * sbmax(inode->i_sb);
1300 +       UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
1301 +       if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) {
1302 +               err = -ENOMEM;
1303 +               goto out;
1304 +       }
1305 +
1306 +       bstart = fbstart(file) = dbstart(dentry);
1307 +       bend = fbend(file) = dbend(dentry);
1308 +
1309 +       /*
1310 +        * open all directories and make the unionfs file struct point to
1311 +        * these lower file structs
1312 +        */
1313 +       if (S_ISDIR(inode->i_mode))
1314 +               err = __open_dir(inode, file);  /* open a dir */
1315 +       else
1316 +               err = __open_file(inode, file); /* open a file */
1317 +
1318 +       /* freeing the allocated resources, and fput the opened files */
1319 +       if (err) {
1320 +               for (bindex = bstart; bindex <= bend; bindex++) {
1321 +                       lower_file = unionfs_lower_file_idx(file, bindex);
1322 +                       if (!lower_file)
1323 +                               continue;
1324 +
1325 +                       branchput(dentry->d_sb, bindex);
1326 +                       /* fput calls dput for lower_dentry */
1327 +                       fput(lower_file);
1328 +               }
1329 +       }
1330 +
1331 +out:
1332 +       if (err) {
1333 +               kfree(UNIONFS_F(file)->lower_files);
1334 +               kfree(UNIONFS_F(file)->saved_branch_ids);
1335 +               kfree(UNIONFS_F(file));
1336 +       }
1337 +out_nofree:
1338 +       if (!err) {
1339 +               unionfs_postcopyup_setmnt(dentry);
1340 +               unionfs_copy_attr_times(inode);
1341 +               unionfs_check_file(file);
1342 +               unionfs_check_inode(inode);
1343 +       }
1344 +       if (dentry != dentry->d_parent)
1345 +               unionfs_unlock_dentry(dentry->d_parent);
1346 +       unionfs_unlock_dentry(dentry);
1347 +       unionfs_read_unlock(inode->i_sb);
1348 +       return err;
1349 +}
1350 +
1351 +/*
1352 + * release all lower object references & free the file info structure
1353 + *
1354 + * No need to grab sb info's rwsem.
1355 + */
1356 +int unionfs_file_release(struct inode *inode, struct file *file)
1357 +{
1358 +       struct file *lower_file = NULL;
1359 +       struct unionfs_file_info *fileinfo;
1360 +       struct unionfs_inode_info *inodeinfo;
1361 +       struct super_block *sb = inode->i_sb;
1362 +       struct dentry *dentry = file->f_path.dentry;
1363 +       int bindex, bstart, bend;
1364 +       int fgen, err = 0;
1365 +
1366 +       unionfs_read_lock(sb, UNIONFS_SMUTEX_PARENT);
1367 +       /*
1368 +        * Yes, we have to revalidate this file even if it's being released.
1369 +        * This is important for open-but-unlinked files, as well as mmap
1370 +        * support.
1371 +        */
1372 +       err = unionfs_file_revalidate(file, true);
1373 +       if (unlikely(err))
1374 +               goto out;
1375 +       unionfs_check_file(file);
1376 +       fileinfo = UNIONFS_F(file);
1377 +       BUG_ON(file->f_path.dentry->d_inode != inode);
1378 +       inodeinfo = UNIONFS_I(inode);
1379 +
1380 +       /* fput all the lower files */
1381 +       fgen = atomic_read(&fileinfo->generation);
1382 +       bstart = fbstart(file);
1383 +       bend = fbend(file);
1384 +
1385 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1386 +       for (bindex = bstart; bindex <= bend; bindex++) {
1387 +               lower_file = unionfs_lower_file_idx(file, bindex);
1388 +
1389 +               if (lower_file) {
1390 +                       fput(lower_file);
1391 +                       branchput(sb, bindex);
1392 +               }
1393 +
1394 +               /* if there are no more refs to the dentry, dput it */
1395 +               if (d_deleted(dentry)) {
1396 +                       dput(unionfs_lower_dentry_idx(dentry, bindex));
1397 +                       unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1398 +               }
1399 +       }
1400 +       unionfs_unlock_dentry(dentry);
1401 +
1402 +       kfree(fileinfo->lower_files);
1403 +       kfree(fileinfo->saved_branch_ids);
1404 +
1405 +       if (fileinfo->rdstate) {
1406 +               fileinfo->rdstate->access = jiffies;
1407 +               spin_lock(&inodeinfo->rdlock);
1408 +               inodeinfo->rdcount++;
1409 +               list_add_tail(&fileinfo->rdstate->cache,
1410 +                             &inodeinfo->readdircache);
1411 +               mark_inode_dirty(inode);
1412 +               spin_unlock(&inodeinfo->rdlock);
1413 +               fileinfo->rdstate = NULL;
1414 +       }
1415 +       kfree(fileinfo);
1416 +
1417 +out:
1418 +       unionfs_read_unlock(sb);
1419 +       return err;
1420 +}
1421 +
1422 +/* pass the ioctl to the lower fs */
1423 +static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1424 +{
1425 +       struct file *lower_file;
1426 +       int err;
1427 +
1428 +       lower_file = unionfs_lower_file(file);
1429 +
1430 +       err = -ENOTTY;
1431 +       if (!lower_file || !lower_file->f_op)
1432 +               goto out;
1433 +       if (lower_file->f_op->unlocked_ioctl) {
1434 +               err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
1435 +       } else if (lower_file->f_op->ioctl) {
1436 +               lock_kernel();
1437 +               err = lower_file->f_op->ioctl(
1438 +                       lower_file->f_path.dentry->d_inode,
1439 +                       lower_file, cmd, arg);
1440 +               unlock_kernel();
1441 +       }
1442 +
1443 +out:
1444 +       return err;
1445 +}
1446 +
1447 +/*
1448 + * return to user-space the branch indices containing the file in question
1449 + *
1450 + * We use fd_set and therefore we are limited to the number of the branches
1451 + * to FD_SETSIZE, which is currently 1024 - plenty for most people
1452 + */
1453 +static int unionfs_ioctl_queryfile(struct file *file, unsigned int cmd,
1454 +                                  unsigned long arg)
1455 +{
1456 +       int err = 0;
1457 +       fd_set branchlist;
1458 +       int bstart = 0, bend = 0, bindex = 0;
1459 +       int orig_bstart, orig_bend;
1460 +       struct dentry *dentry, *lower_dentry;
1461 +       struct vfsmount *mnt;
1462 +
1463 +       dentry = file->f_path.dentry;
1464 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1465 +       orig_bstart = dbstart(dentry);
1466 +       orig_bend = dbend(dentry);
1467 +       err = unionfs_partial_lookup(dentry);
1468 +       if (err)
1469 +               goto out;
1470 +       bstart = dbstart(dentry);
1471 +       bend = dbend(dentry);
1472 +
1473 +       FD_ZERO(&branchlist);
1474 +
1475 +       for (bindex = bstart; bindex <= bend; bindex++) {
1476 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
1477 +               if (!lower_dentry)
1478 +                       continue;
1479 +               if (likely(lower_dentry->d_inode))
1480 +                       FD_SET(bindex, &branchlist);
1481 +               /* purge any lower objects after partial_lookup */
1482 +               if (bindex < orig_bstart || bindex > orig_bend) {
1483 +                       dput(lower_dentry);
1484 +                       unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
1485 +                       iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
1486 +                       unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
1487 +                                                   NULL);
1488 +                       mnt = unionfs_lower_mnt_idx(dentry, bindex);
1489 +                       if (!mnt)
1490 +                               continue;
1491 +                       unionfs_mntput(dentry, bindex);
1492 +                       unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
1493 +               }
1494 +       }
1495 +       /* restore original dentry's offsets */
1496 +       set_dbstart(dentry, orig_bstart);
1497 +       set_dbend(dentry, orig_bend);
1498 +       ibstart(dentry->d_inode) = orig_bstart;
1499 +       ibend(dentry->d_inode) = orig_bend;
1500 +
1501 +       err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
1502 +       if (unlikely(err))
1503 +               err = -EFAULT;
1504 +
1505 +out:
1506 +       unionfs_unlock_dentry(dentry);
1507 +       return err < 0 ? err : bend;
1508 +}
1509 +
1510 +long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1511 +{
1512 +       long err;
1513 +
1514 +       unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1515 +
1516 +       err = unionfs_file_revalidate(file, true);
1517 +       if (unlikely(err))
1518 +               goto out;
1519 +
1520 +       /* check if asked for local commands */
1521 +       switch (cmd) {
1522 +       case UNIONFS_IOCTL_INCGEN:
1523 +               /* Increment the superblock generation count */
1524 +               pr_info("unionfs: incgen ioctl deprecated; "
1525 +                       "use \"-o remount,incgen\"\n");
1526 +               err = -ENOSYS;
1527 +               break;
1528 +
1529 +       case UNIONFS_IOCTL_QUERYFILE:
1530 +               /* Return list of branches containing the given file */
1531 +               err = unionfs_ioctl_queryfile(file, cmd, arg);
1532 +               break;
1533 +
1534 +       default:
1535 +               /* pass the ioctl down */
1536 +               err = do_ioctl(file, cmd, arg);
1537 +               break;
1538 +       }
1539 +
1540 +out:
1541 +       unionfs_check_file(file);
1542 +       unionfs_read_unlock(file->f_path.dentry->d_sb);
1543 +       return err;
1544 +}
1545 +
1546 +int unionfs_flush(struct file *file, fl_owner_t id)
1547 +{
1548 +       int err = 0;
1549 +       struct file *lower_file = NULL;
1550 +       struct dentry *dentry = file->f_path.dentry;
1551 +       int bindex, bstart, bend;
1552 +
1553 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT);
1554 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
1555 +
1556 +       err = unionfs_file_revalidate_locked(file, true);
1557 +       if (unlikely(err))
1558 +               goto out;
1559 +       unionfs_check_file(file);
1560 +
1561 +       bstart = fbstart(file);
1562 +       bend = fbend(file);
1563 +       for (bindex = bstart; bindex <= bend; bindex++) {
1564 +               lower_file = unionfs_lower_file_idx(file, bindex);
1565 +
1566 +               if (lower_file && lower_file->f_op &&
1567 +                   lower_file->f_op->flush) {
1568 +                       err = lower_file->f_op->flush(lower_file, id);
1569 +                       if (err)
1570 +                               goto out;
1571 +               }
1572 +
1573 +       }
1574 +
1575 +out:
1576 +       if (!err)
1577 +               unionfs_check_file(file);
1578 +       unionfs_unlock_dentry(file->f_path.dentry);
1579 +       unionfs_read_unlock(dentry->d_sb);
1580 +       return err;
1581 +}
1582 diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c
1583 new file mode 100644
1584 index 0000000..f71bddf
1585 --- /dev/null
1586 +++ b/fs/unionfs/copyup.c
1587 @@ -0,0 +1,886 @@
1588 +/*
1589 + * Copyright (c) 2003-2007 Erez Zadok
1590 + * Copyright (c) 2003-2006 Charles P. Wright
1591 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
1592 + * Copyright (c) 2005-2006 Junjiro Okajima
1593 + * Copyright (c) 2005      Arun M. Krishnakumar
1594 + * Copyright (c) 2004-2006 David P. Quigley
1595 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
1596 + * Copyright (c) 2003      Puja Gupta
1597 + * Copyright (c) 2003      Harikesavan Krishnan
1598 + * Copyright (c) 2003-2007 Stony Brook University
1599 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
1600 + *
1601 + * This program is free software; you can redistribute it and/or modify
1602 + * it under the terms of the GNU General Public License version 2 as
1603 + * published by the Free Software Foundation.
1604 + */
1605 +
1606 +#include "union.h"
1607 +
1608 +/*
1609 + * For detailed explanation of copyup see:
1610 + * Documentation/filesystems/unionfs/concepts.txt
1611 + */
1612 +
1613 +#ifdef CONFIG_UNION_FS_XATTR
1614 +/* copyup all extended attrs for a given dentry */
1615 +static int copyup_xattrs(struct dentry *old_lower_dentry,
1616 +                        struct dentry *new_lower_dentry)
1617 +{
1618 +       int err = 0;
1619 +       ssize_t list_size = -1;
1620 +       char *name_list = NULL;
1621 +       char *attr_value = NULL;
1622 +       char *name_list_buf = NULL;
1623 +
1624 +       /* query the actual size of the xattr list */
1625 +       list_size = vfs_listxattr(old_lower_dentry, NULL, 0);
1626 +       if (list_size <= 0) {
1627 +               err = list_size;
1628 +               goto out;
1629 +       }
1630 +
1631 +       /* allocate space for the actual list */
1632 +       name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX);
1633 +       if (unlikely(!name_list || IS_ERR(name_list))) {
1634 +               err = PTR_ERR(name_list);
1635 +               goto out;
1636 +       }
1637 +
1638 +       name_list_buf = name_list; /* save for kfree at end */
1639 +
1640 +       /* now get the actual xattr list of the source file */
1641 +       list_size = vfs_listxattr(old_lower_dentry, name_list, list_size);
1642 +       if (list_size <= 0) {
1643 +               err = list_size;
1644 +               goto out;
1645 +       }
1646 +
1647 +       /* allocate space to hold each xattr's value */
1648 +       attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX);
1649 +       if (unlikely(!attr_value || IS_ERR(attr_value))) {
1650 +               err = PTR_ERR(name_list);
1651 +               goto out;
1652 +       }
1653 +
1654 +       /* in a loop, get and set each xattr from src to dst file */
1655 +       while (*name_list) {
1656 +               ssize_t size;
1657 +
1658 +               /* Lock here since vfs_getxattr doesn't lock for us */
1659 +               mutex_lock(&old_lower_dentry->d_inode->i_mutex);
1660 +               size = vfs_getxattr(old_lower_dentry, name_list,
1661 +                                   attr_value, XATTR_SIZE_MAX);
1662 +               mutex_unlock(&old_lower_dentry->d_inode->i_mutex);
1663 +               if (size < 0) {
1664 +                       err = size;
1665 +                       goto out;
1666 +               }
1667 +               if (size > XATTR_SIZE_MAX) {
1668 +                       err = -E2BIG;
1669 +                       goto out;
1670 +               }
1671 +               /* Don't lock here since vfs_setxattr does it for us. */
1672 +               err = vfs_setxattr(new_lower_dentry, name_list, attr_value,
1673 +                                  size, 0);
1674 +               /*
1675 +                * Selinux depends on "security.*" xattrs, so to maintain
1676 +                * the security of copied-up files, if Selinux is active,
1677 +                * then we must copy these xattrs as well.  So we need to
1678 +                * temporarily get FOWNER privileges.
1679 +                * XXX: move entire copyup code to SIOQ.
1680 +                */
1681 +               if (err == -EPERM && !capable(CAP_FOWNER)) {
1682 +                       cap_raise(current->cap_effective, CAP_FOWNER);
1683 +                       err = vfs_setxattr(new_lower_dentry, name_list,
1684 +                                          attr_value, size, 0);
1685 +                       cap_lower(current->cap_effective, CAP_FOWNER);
1686 +               }
1687 +               if (err < 0)
1688 +                       goto out;
1689 +               name_list += strlen(name_list) + 1;
1690 +       }
1691 +out:
1692 +       unionfs_xattr_kfree(name_list_buf);
1693 +       unionfs_xattr_kfree(attr_value);
1694 +       /* Ignore if xattr isn't supported */
1695 +       if (err == -ENOTSUPP || err == -EOPNOTSUPP)
1696 +               err = 0;
1697 +       return err;
1698 +}
1699 +#endif /* CONFIG_UNION_FS_XATTR */
1700 +
1701 +/*
1702 + * Determine the mode based on the copyup flags, and the existing dentry.
1703 + *
1704 + * Handle file systems which may not support certain options.  For example
1705 + * jffs2 doesn't allow one to chmod a symlink.  So we ignore such harmless
1706 + * errors, rather than propagating them up, which results in copyup errors
1707 + * and errors returned back to users.
1708 + */
1709 +static int copyup_permissions(struct super_block *sb,
1710 +                             struct dentry *old_lower_dentry,
1711 +                             struct dentry *new_lower_dentry)
1712 +{
1713 +       struct inode *i = old_lower_dentry->d_inode;
1714 +       struct iattr newattrs;
1715 +       int err;
1716 +
1717 +       newattrs.ia_atime = i->i_atime;
1718 +       newattrs.ia_mtime = i->i_mtime;
1719 +       newattrs.ia_ctime = i->i_ctime;
1720 +       newattrs.ia_gid = i->i_gid;
1721 +       newattrs.ia_uid = i->i_uid;
1722 +       newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME |
1723 +               ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE |
1724 +               ATTR_GID | ATTR_UID;
1725 +       err = notify_change(new_lower_dentry, &newattrs);
1726 +       if (err)
1727 +               goto out;
1728 +
1729 +       /* now try to change the mode and ignore EOPNOTSUPP on symlinks */
1730 +       newattrs.ia_mode = i->i_mode;
1731 +       newattrs.ia_valid = ATTR_MODE | ATTR_FORCE;
1732 +       err = notify_change(new_lower_dentry, &newattrs);
1733 +       if (err == -EOPNOTSUPP &&
1734 +           S_ISLNK(new_lower_dentry->d_inode->i_mode)) {
1735 +               printk(KERN_WARNING
1736 +                      "unionfs: changing \"%s\" symlink mode unsupported\n",
1737 +                      new_lower_dentry->d_name.name);
1738 +               err = 0;
1739 +       }
1740 +
1741 +out:
1742 +       return err;
1743 +}
1744 +
1745 +/*
1746 + * create the new device/file/directory - use copyup_permission to copyup
1747 + * times, and mode
1748 + *
1749 + * if the object being copied up is a regular file, the file is only created,
1750 + * the contents have to be copied up separately
1751 + */
1752 +static int __copyup_ndentry(struct dentry *old_lower_dentry,
1753 +                           struct dentry *new_lower_dentry,
1754 +                           struct dentry *new_lower_parent_dentry,
1755 +                           char *symbuf)
1756 +{
1757 +       int err = 0;
1758 +       umode_t old_mode = old_lower_dentry->d_inode->i_mode;
1759 +       struct sioq_args args;
1760 +
1761 +       if (S_ISDIR(old_mode)) {
1762 +               args.mkdir.parent = new_lower_parent_dentry->d_inode;
1763 +               args.mkdir.dentry = new_lower_dentry;
1764 +               args.mkdir.mode = old_mode;
1765 +
1766 +               run_sioq(__unionfs_mkdir, &args);
1767 +               err = args.err;
1768 +       } else if (S_ISLNK(old_mode)) {
1769 +               args.symlink.parent = new_lower_parent_dentry->d_inode;
1770 +               args.symlink.dentry = new_lower_dentry;
1771 +               args.symlink.symbuf = symbuf;
1772 +               args.symlink.mode = old_mode;
1773 +
1774 +               run_sioq(__unionfs_symlink, &args);
1775 +               err = args.err;
1776 +       } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) ||
1777 +                  S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) {
1778 +               args.mknod.parent = new_lower_parent_dentry->d_inode;
1779 +               args.mknod.dentry = new_lower_dentry;
1780 +               args.mknod.mode = old_mode;
1781 +               args.mknod.dev = old_lower_dentry->d_inode->i_rdev;
1782 +
1783 +               run_sioq(__unionfs_mknod, &args);
1784 +               err = args.err;
1785 +       } else if (S_ISREG(old_mode)) {
1786 +               struct nameidata nd;
1787 +               err = init_lower_nd(&nd, LOOKUP_CREATE);
1788 +               if (unlikely(err < 0))
1789 +                       goto out;
1790 +               args.create.nd = &nd;
1791 +               args.create.parent = new_lower_parent_dentry->d_inode;
1792 +               args.create.dentry = new_lower_dentry;
1793 +               args.create.mode = old_mode;
1794 +
1795 +               run_sioq(__unionfs_create, &args);
1796 +               err = args.err;
1797 +               release_lower_nd(&nd, err);
1798 +       } else {
1799 +               printk(KERN_CRIT "unionfs: unknown inode type %d\n",
1800 +                      old_mode);
1801 +               BUG();
1802 +       }
1803 +
1804 +out:
1805 +       return err;
1806 +}
1807 +
1808 +static int __copyup_reg_data(struct dentry *dentry,
1809 +                            struct dentry *new_lower_dentry, int new_bindex,
1810 +                            struct dentry *old_lower_dentry, int old_bindex,
1811 +                            struct file **copyup_file, loff_t len)
1812 +{
1813 +       struct super_block *sb = dentry->d_sb;
1814 +       struct file *input_file;
1815 +       struct file *output_file;
1816 +       struct vfsmount *output_mnt;
1817 +       mm_segment_t old_fs;
1818 +       char *buf = NULL;
1819 +       ssize_t read_bytes, write_bytes;
1820 +       loff_t size;
1821 +       int err = 0;
1822 +
1823 +       /* open old file */
1824 +       unionfs_mntget(dentry, old_bindex);
1825 +       branchget(sb, old_bindex);
1826 +       /* dentry_open calls dput and mntput if it returns an error */
1827 +       input_file = dentry_open(old_lower_dentry,
1828 +                                unionfs_lower_mnt_idx(dentry, old_bindex),
1829 +                                O_RDONLY | O_LARGEFILE);
1830 +       if (IS_ERR(input_file)) {
1831 +               dput(old_lower_dentry);
1832 +               err = PTR_ERR(input_file);
1833 +               goto out;
1834 +       }
1835 +       if (unlikely(!input_file->f_op || !input_file->f_op->read)) {
1836 +               err = -EINVAL;
1837 +               goto out_close_in;
1838 +       }
1839 +
1840 +       /* open new file */
1841 +       dget(new_lower_dentry);
1842 +       output_mnt = unionfs_mntget(sb->s_root, new_bindex);
1843 +       branchget(sb, new_bindex);
1844 +       output_file = dentry_open(new_lower_dentry, output_mnt,
1845 +                                 O_RDWR | O_LARGEFILE);
1846 +       if (IS_ERR(output_file)) {
1847 +               err = PTR_ERR(output_file);
1848 +               goto out_close_in2;
1849 +       }
1850 +       if (unlikely(!output_file->f_op || !output_file->f_op->write)) {
1851 +               err = -EINVAL;
1852 +               goto out_close_out;
1853 +       }
1854 +
1855 +       /* allocating a buffer */
1856 +       buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1857 +       if (unlikely(!buf)) {
1858 +               err = -ENOMEM;
1859 +               goto out_close_out;
1860 +       }
1861 +
1862 +       input_file->f_pos = 0;
1863 +       output_file->f_pos = 0;
1864 +
1865 +       old_fs = get_fs();
1866 +       set_fs(KERNEL_DS);
1867 +
1868 +       size = len;
1869 +       err = 0;
1870 +       do {
1871 +               if (len >= PAGE_SIZE)
1872 +                       size = PAGE_SIZE;
1873 +               else if ((len < PAGE_SIZE) && (len > 0))
1874 +                       size = len;
1875 +
1876 +               len -= PAGE_SIZE;
1877 +
1878 +               read_bytes =
1879 +                       input_file->f_op->read(input_file,
1880 +                                              (char __user *)buf, size,
1881 +                                              &input_file->f_pos);
1882 +               if (read_bytes <= 0) {
1883 +                       err = read_bytes;
1884 +                       break;
1885 +               }
1886 +
1887 +               /* see Documentation/filesystems/unionfs/issues.txt */
1888 +               lockdep_off();
1889 +               write_bytes =
1890 +                       output_file->f_op->write(output_file,
1891 +                                                (char __user *)buf,
1892 +                                                read_bytes,
1893 +                                                &output_file->f_pos);
1894 +               lockdep_on();
1895 +               if ((write_bytes < 0) || (write_bytes < read_bytes)) {
1896 +                       err = write_bytes;
1897 +                       break;
1898 +               }
1899 +       } while ((read_bytes > 0) && (len > 0));
1900 +
1901 +       set_fs(old_fs);
1902 +
1903 +       kfree(buf);
1904 +
1905 +       if (!err)
1906 +               err = output_file->f_op->fsync(output_file,
1907 +                                              new_lower_dentry, 0);
1908 +
1909 +       if (err)
1910 +               goto out_close_out;
1911 +
1912 +       if (copyup_file) {
1913 +               *copyup_file = output_file;
1914 +               goto out_close_in;
1915 +       }
1916 +
1917 +out_close_out:
1918 +       fput(output_file);
1919 +
1920 +out_close_in2:
1921 +       branchput(sb, new_bindex);
1922 +
1923 +out_close_in:
1924 +       fput(input_file);
1925 +
1926 +out:
1927 +       branchput(sb, old_bindex);
1928 +
1929 +       return err;
1930 +}
1931 +
1932 +/*
1933 + * dput the lower references for old and new dentry & clear a lower dentry
1934 + * pointer
1935 + */
1936 +static void __clear(struct dentry *dentry, struct dentry *old_lower_dentry,
1937 +                   int old_bstart, int old_bend,
1938 +                   struct dentry *new_lower_dentry, int new_bindex)
1939 +{
1940 +       /* get rid of the lower dentry and all its traces */
1941 +       unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL);
1942 +       set_dbstart(dentry, old_bstart);
1943 +       set_dbend(dentry, old_bend);
1944 +
1945 +       dput(new_lower_dentry);
1946 +       dput(old_lower_dentry);
1947 +}
1948 +
1949 +/*
1950 + * Copy up a dentry to a file of specified name.
1951 + *
1952 + * @dir: used to pull the ->i_sb to access other branches
1953 + * @dentry: the non-negative dentry whose lower_inode we should copy
1954 + * @bstart: the branch of the lower_inode to copy from
1955 + * @new_bindex: the branch to create the new file in
1956 + * @name: the name of the file to create
1957 + * @namelen: length of @name
1958 + * @copyup_file: the "struct file" to return (optional)
1959 + * @len: how many bytes to copy-up?
1960 + */
1961 +int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart,
1962 +                 int new_bindex, const char *name, int namelen,
1963 +                 struct file **copyup_file, loff_t len)
1964 +{
1965 +       struct dentry *new_lower_dentry;
1966 +       struct dentry *old_lower_dentry = NULL;
1967 +       struct super_block *sb;
1968 +       int err = 0;
1969 +       int old_bindex;
1970 +       int old_bstart;
1971 +       int old_bend;
1972 +       struct dentry *new_lower_parent_dentry = NULL;
1973 +       mm_segment_t oldfs;
1974 +       char *symbuf = NULL;
1975 +
1976 +       verify_locked(dentry);
1977 +
1978 +       old_bindex = bstart;
1979 +       old_bstart = dbstart(dentry);
1980 +       old_bend = dbend(dentry);
1981 +
1982 +       BUG_ON(new_bindex < 0);
1983 +       BUG_ON(new_bindex >= old_bindex);
1984 +
1985 +       sb = dir->i_sb;
1986 +
1987 +       err = is_robranch_super(sb, new_bindex);
1988 +       if (err)
1989 +               goto out;
1990 +
1991 +       /* Create the directory structure above this dentry. */
1992 +       new_lower_dentry = create_parents(dir, dentry, name, new_bindex);
1993 +       if (IS_ERR(new_lower_dentry)) {
1994 +               err = PTR_ERR(new_lower_dentry);
1995 +               goto out;
1996 +       }
1997 +
1998 +       old_lower_dentry = unionfs_lower_dentry_idx(dentry, old_bindex);
1999 +       /* we conditionally dput this old_lower_dentry at end of function */
2000 +       dget(old_lower_dentry);
2001 +
2002 +       /* For symlinks, we must read the link before we lock the directory. */
2003 +       if (S_ISLNK(old_lower_dentry->d_inode->i_mode)) {
2004 +
2005 +               symbuf = kmalloc(PATH_MAX, GFP_KERNEL);
2006 +               if (unlikely(!symbuf)) {
2007 +                       __clear(dentry, old_lower_dentry,
2008 +                               old_bstart, old_bend,
2009 +                               new_lower_dentry, new_bindex);
2010 +                       err = -ENOMEM;
2011 +                       goto out_free;
2012 +               }
2013 +
2014 +               oldfs = get_fs();
2015 +               set_fs(KERNEL_DS);
2016 +               err = old_lower_dentry->d_inode->i_op->readlink(
2017 +                       old_lower_dentry,
2018 +                       (char __user *)symbuf,
2019 +                       PATH_MAX);
2020 +               set_fs(oldfs);
2021 +               if (err < 0) {
2022 +                       __clear(dentry, old_lower_dentry,
2023 +                               old_bstart, old_bend,
2024 +                               new_lower_dentry, new_bindex);
2025 +                       goto out_free;
2026 +               }
2027 +               symbuf[err] = '\0';
2028 +       }
2029 +
2030 +       /* Now we lock the parent, and create the object in the new branch. */
2031 +       new_lower_parent_dentry = lock_parent(new_lower_dentry);
2032 +
2033 +       /* create the new inode */
2034 +       err = __copyup_ndentry(old_lower_dentry, new_lower_dentry,
2035 +                              new_lower_parent_dentry, symbuf);
2036 +
2037 +       if (err) {
2038 +               __clear(dentry, old_lower_dentry,
2039 +                       old_bstart, old_bend,
2040 +                       new_lower_dentry, new_bindex);
2041 +               goto out_unlock;
2042 +       }
2043 +
2044 +       /* We actually copyup the file here. */
2045 +       if (S_ISREG(old_lower_dentry->d_inode->i_mode))
2046 +               err = __copyup_reg_data(dentry, new_lower_dentry, new_bindex,
2047 +                                       old_lower_dentry, old_bindex,
2048 +                                       copyup_file, len);
2049 +       if (err)
2050 +               goto out_unlink;
2051 +
2052 +       /* Set permissions. */
2053 +       err = copyup_permissions(sb, old_lower_dentry, new_lower_dentry);
2054 +       if (err)
2055 +               goto out_unlink;
2056 +
2057 +#ifdef CONFIG_UNION_FS_XATTR
2058 +       /* Selinux uses extended attributes for permissions. */
2059 +       err = copyup_xattrs(old_lower_dentry, new_lower_dentry);
2060 +       if (err)
2061 +               goto out_unlink;
2062 +#endif /* CONFIG_UNION_FS_XATTR */
2063 +
2064 +       /* do not allow files getting deleted to be re-interposed */
2065 +       if (!d_deleted(dentry))
2066 +               unionfs_reinterpose(dentry);
2067 +
2068 +       goto out_unlock;
2069 +
2070 +out_unlink:
2071 +       /*
2072 +        * copyup failed, because we possibly ran out of space or
2073 +        * quota, or something else happened so let's unlink; we don't
2074 +        * really care about the return value of vfs_unlink
2075 +        */
2076 +       vfs_unlink(new_lower_parent_dentry->d_inode, new_lower_dentry);
2077 +
2078 +       if (copyup_file) {
2079 +               /* need to close the file */
2080 +
2081 +               fput(*copyup_file);
2082 +               branchput(sb, new_bindex);
2083 +       }
2084 +
2085 +       /*
2086 +        * TODO: should we reset the error to something like -EIO?
2087 +        *
2088 +        * If we don't reset, the user may get some nonsensical errors, but
2089 +        * on the other hand, if we reset to EIO, we guarantee that the user
2090 +        * will get a "confusing" error message.
2091 +        */
2092 +
2093 +out_unlock:
2094 +       unlock_dir(new_lower_parent_dentry);
2095 +
2096 +out_free:
2097 +       /*
2098 +        * If old_lower_dentry was not a file, then we need to dput it.  If
2099 +        * it was a file, then it was already dput indirectly by other
2100 +        * functions we call above which operate on regular files.
2101 +        */
2102 +       if (old_lower_dentry && old_lower_dentry->d_inode &&
2103 +           !S_ISREG(old_lower_dentry->d_inode->i_mode))
2104 +               dput(old_lower_dentry);
2105 +       kfree(symbuf);
2106 +
2107 +       if (err)
2108 +               goto out;
2109 +       if (!S_ISDIR(dentry->d_inode->i_mode)) {
2110 +               unionfs_postcopyup_release(dentry);
2111 +               if (!unionfs_lower_inode(dentry->d_inode)) {
2112 +                       /*
2113 +                        * If we got here, then we copied up to an
2114 +                        * unlinked-open file, whose name is .unionfsXXXXX.
2115 +                        */
2116 +                       struct inode *inode = new_lower_dentry->d_inode;
2117 +                       atomic_inc(&inode->i_count);
2118 +                       unionfs_set_lower_inode_idx(dentry->d_inode,
2119 +                                                   ibstart(dentry->d_inode),
2120 +                                                   inode);
2121 +               }
2122 +       }
2123 +       unionfs_postcopyup_setmnt(dentry);
2124 +       /* sync inode times from copied-up inode to our inode */
2125 +       unionfs_copy_attr_times(dentry->d_inode);
2126 +       unionfs_check_inode(dir);
2127 +       unionfs_check_dentry(dentry);
2128 +out:
2129 +       return err;
2130 +}
2131 +
2132 +/*
2133 + * This function creates a copy of a file represented by 'file' which
2134 + * currently resides in branch 'bstart' to branch 'new_bindex.'  The copy
2135 + * will be named "name".
2136 + */
2137 +int copyup_named_file(struct inode *dir, struct file *file, char *name,
2138 +                     int bstart, int new_bindex, loff_t len)
2139 +{
2140 +       int err = 0;
2141 +       struct file *output_file = NULL;
2142 +
2143 +       err = copyup_dentry(dir, file->f_path.dentry, bstart, new_bindex,
2144 +                           name, strlen(name), &output_file, len);
2145 +       if (!err) {
2146 +               fbstart(file) = new_bindex;
2147 +               unionfs_set_lower_file_idx(file, new_bindex, output_file);
2148 +       }
2149 +
2150 +       return err;
2151 +}
2152 +
2153 +/*
2154 + * This function creates a copy of a file represented by 'file' which
2155 + * currently resides in branch 'bstart' to branch 'new_bindex'.
2156 + */
2157 +int copyup_file(struct inode *dir, struct file *file, int bstart,
2158 +               int new_bindex, loff_t len)
2159 +{
2160 +       int err = 0;
2161 +       struct file *output_file = NULL;
2162 +       struct dentry *dentry = file->f_path.dentry;
2163 +
2164 +       err = copyup_dentry(dir, dentry, bstart, new_bindex,
2165 +                           dentry->d_name.name, dentry->d_name.len,
2166 +                           &output_file, len);
2167 +       if (!err) {
2168 +               fbstart(file) = new_bindex;
2169 +               unionfs_set_lower_file_idx(file, new_bindex, output_file);
2170 +       }
2171 +
2172 +       return err;
2173 +}
2174 +
2175 +/* purge a dentry's lower-branch states (dput/mntput, etc.) */
2176 +static void __cleanup_dentry(struct dentry *dentry, int bindex,
2177 +                            int old_bstart, int old_bend)
2178 +{
2179 +       int loop_start;
2180 +       int loop_end;
2181 +       int new_bstart = -1;
2182 +       int new_bend = -1;
2183 +       int i;
2184 +
2185 +       loop_start = min(old_bstart, bindex);
2186 +       loop_end = max(old_bend, bindex);
2187 +
2188 +       /*
2189 +        * This loop sets the bstart and bend for the new dentry by
2190 +        * traversing from left to right.  It also dputs all negative
2191 +        * dentries except bindex
2192 +        */
2193 +       for (i = loop_start; i <= loop_end; i++) {
2194 +               if (!unionfs_lower_dentry_idx(dentry, i))
2195 +                       continue;
2196 +
2197 +               if (i == bindex) {
2198 +                       new_bend = i;
2199 +                       if (new_bstart < 0)
2200 +                               new_bstart = i;
2201 +                       continue;
2202 +               }
2203 +
2204 +               if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) {
2205 +                       dput(unionfs_lower_dentry_idx(dentry, i));
2206 +                       unionfs_set_lower_dentry_idx(dentry, i, NULL);
2207 +
2208 +                       unionfs_mntput(dentry, i);
2209 +                       unionfs_set_lower_mnt_idx(dentry, i, NULL);
2210 +               } else {
2211 +                       if (new_bstart < 0)
2212 +                               new_bstart = i;
2213 +                       new_bend = i;
2214 +               }
2215 +       }
2216 +
2217 +       if (new_bstart < 0)
2218 +               new_bstart = bindex;
2219 +       if (new_bend < 0)
2220 +               new_bend = bindex;
2221 +       set_dbstart(dentry, new_bstart);
2222 +       set_dbend(dentry, new_bend);
2223 +
2224 +}
2225 +
2226 +/* set lower inode ptr and update bstart & bend if necessary */
2227 +static void __set_inode(struct dentry *upper, struct dentry *lower,
2228 +                       int bindex)
2229 +{
2230 +       unionfs_set_lower_inode_idx(upper->d_inode, bindex,
2231 +                                   igrab(lower->d_inode));
2232 +       if (likely(ibstart(upper->d_inode) > bindex))
2233 +               ibstart(upper->d_inode) = bindex;
2234 +       if (likely(ibend(upper->d_inode) < bindex))
2235 +               ibend(upper->d_inode) = bindex;
2236 +
2237 +}
2238 +
2239 +/* set lower dentry ptr and update bstart & bend if necessary */
2240 +static void __set_dentry(struct dentry *upper, struct dentry *lower,
2241 +                        int bindex)
2242 +{
2243 +       unionfs_set_lower_dentry_idx(upper, bindex, lower);
2244 +       if (likely(dbstart(upper) > bindex))
2245 +               set_dbstart(upper, bindex);
2246 +       if (likely(dbend(upper) < bindex))
2247 +               set_dbend(upper, bindex);
2248 +}
2249 +
2250 +/*
2251 + * This function replicates the directory structure up-to given dentry
2252 + * in the bindex branch.
2253 + */
2254 +struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
2255 +                             const char *name, int bindex)
2256 +{
2257 +       int err;
2258 +       struct dentry *child_dentry;
2259 +       struct dentry *parent_dentry;
2260 +       struct dentry *lower_parent_dentry = NULL;
2261 +       struct dentry *lower_dentry = NULL;
2262 +       const char *childname;
2263 +       unsigned int childnamelen;
2264 +       int nr_dentry;
2265 +       int count = 0;
2266 +       int old_bstart;
2267 +       int old_bend;
2268 +       struct dentry **path = NULL;
2269 +       struct super_block *sb;
2270 +
2271 +       verify_locked(dentry);
2272 +
2273 +       err = is_robranch_super(dir->i_sb, bindex);
2274 +       if (err) {
2275 +               lower_dentry = ERR_PTR(err);
2276 +               goto out;
2277 +       }
2278 +
2279 +       old_bstart = dbstart(dentry);
2280 +       old_bend = dbend(dentry);
2281 +
2282 +       lower_dentry = ERR_PTR(-ENOMEM);
2283 +
2284 +       /* There is no sense allocating any less than the minimum. */
2285 +       nr_dentry = 1;
2286 +       path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
2287 +       if (unlikely(!path))
2288 +               goto out;
2289 +
2290 +       /* assume the negative dentry of unionfs as the parent dentry */
2291 +       parent_dentry = dentry;
2292 +
2293 +       /*
2294 +        * This loop finds the first parent that exists in the given branch.
2295 +        * We start building the directory structure from there.  At the end
2296 +        * of the loop, the following should hold:
2297 +        *  - child_dentry is the first nonexistent child
2298 +        *  - parent_dentry is the first existent parent
2299 +        *  - path[0] is the = deepest child
2300 +        *  - path[count] is the first child to create
2301 +        */
2302 +       do {
2303 +               child_dentry = parent_dentry;
2304 +
2305 +               /* find the parent directory dentry in unionfs */
2306 +               parent_dentry = dget_parent(child_dentry);
2307 +
2308 +               /* find out the lower_parent_dentry in the given branch */
2309 +               lower_parent_dentry =
2310 +                       unionfs_lower_dentry_idx(parent_dentry, bindex);
2311 +
2312 +               /* grow path table */
2313 +               if (count == nr_dentry) {
2314 +                       void *p;
2315 +
2316 +                       nr_dentry *= 2;
2317 +                       p = krealloc(path, nr_dentry * sizeof(struct dentry *),
2318 +                                    GFP_KERNEL);
2319 +                       if (unlikely(!p)) {
2320 +                               lower_dentry = ERR_PTR(-ENOMEM);
2321 +                               goto out;
2322 +                       }
2323 +                       path = p;
2324 +               }
2325 +
2326 +               /* store the child dentry */
2327 +               path[count++] = child_dentry;
2328 +       } while (!lower_parent_dentry);
2329 +       count--;
2330 +
2331 +       sb = dentry->d_sb;
2332 +
2333 +       /*
2334 +        * This code goes between the begin/end labels and basically
2335 +        * emulates a while(child_dentry != dentry), only cleaner and
2336 +        * shorter than what would be a much longer while loop.
2337 +        */
2338 +begin:
2339 +       /* get lower parent dir in the current branch */
2340 +       lower_parent_dentry = unionfs_lower_dentry_idx(parent_dentry, bindex);
2341 +       dput(parent_dentry);
2342 +
2343 +       /* init the values to lookup */
2344 +       childname = child_dentry->d_name.name;
2345 +       childnamelen = child_dentry->d_name.len;
2346 +
2347 +       if (child_dentry != dentry) {
2348 +               /* lookup child in the underlying file system */
2349 +               lower_dentry = lookup_one_len(childname, lower_parent_dentry,
2350 +                                             childnamelen);
2351 +               if (IS_ERR(lower_dentry))
2352 +                       goto out;
2353 +       } else {
2354 +               /*
2355 +                * Is the name a whiteout of the child name ?  lookup the
2356 +                * whiteout child in the underlying file system
2357 +                */
2358 +               lower_dentry = lookup_one_len(name, lower_parent_dentry,
2359 +                                             strlen(name));
2360 +               if (IS_ERR(lower_dentry))
2361 +                       goto out;
2362 +
2363 +               /* Replace the current dentry (if any) with the new one */
2364 +               dput(unionfs_lower_dentry_idx(dentry, bindex));
2365 +               unionfs_set_lower_dentry_idx(dentry, bindex,
2366 +                                            lower_dentry);
2367 +
2368 +               __cleanup_dentry(dentry, bindex, old_bstart, old_bend);
2369 +               goto out;
2370 +       }
2371 +
2372 +       if (lower_dentry->d_inode) {
2373 +               /*
2374 +                * since this already exists we dput to avoid
2375 +                * multiple references on the same dentry
2376 +                */
2377 +               dput(lower_dentry);
2378 +       } else {
2379 +               struct sioq_args args;
2380 +
2381 +               /* it's a negative dentry, create a new dir */
2382 +               lower_parent_dentry = lock_parent(lower_dentry);
2383 +
2384 +               args.mkdir.parent = lower_parent_dentry->d_inode;
2385 +               args.mkdir.dentry = lower_dentry;
2386 +               args.mkdir.mode = child_dentry->d_inode->i_mode;
2387 +
2388 +               run_sioq(__unionfs_mkdir, &args);
2389 +               err = args.err;
2390 +
2391 +               if (!err)
2392 +                       err = copyup_permissions(dir->i_sb, child_dentry,
2393 +                                                lower_dentry);
2394 +               unlock_dir(lower_parent_dentry);
2395 +               if (err) {
2396 +                       dput(lower_dentry);
2397 +                       lower_dentry = ERR_PTR(err);
2398 +                       goto out;
2399 +               }
2400 +
2401 +       }
2402 +
2403 +       __set_inode(child_dentry, lower_dentry, bindex);
2404 +       __set_dentry(child_dentry, lower_dentry, bindex);
2405 +       /*
2406 +        * update times of this dentry, but also the parent, because if
2407 +        * we changed, the parent may have changed too.
2408 +        */
2409 +       fsstack_copy_attr_times(parent_dentry->d_inode,
2410 +                               lower_parent_dentry->d_inode);
2411 +       unionfs_copy_attr_times(child_dentry->d_inode);
2412 +
2413 +       parent_dentry = child_dentry;
2414 +       child_dentry = path[--count];
2415 +       goto begin;
2416 +out:
2417 +       /* cleanup any leftover locks from the do/while loop above */
2418 +       if (IS_ERR(lower_dentry))
2419 +               while (count)
2420 +                       dput(path[count--]);
2421 +       kfree(path);
2422 +       return lower_dentry;
2423 +}
2424 +
2425 +/*
2426 + * Post-copyup helper to ensure we have valid mnts: set lower mnt of
2427 + * dentry+parents to the first parent node that has an mnt.
2428 + */
2429 +void unionfs_postcopyup_setmnt(struct dentry *dentry)
2430 +{
2431 +       struct dentry *parent, *hasone;
2432 +       int bindex = dbstart(dentry);
2433 +
2434 +       if (unionfs_lower_mnt_idx(dentry, bindex))
2435 +               return;
2436 +       hasone = dentry->d_parent;
2437 +       /* this loop should stop at root dentry */
2438 +       while (!unionfs_lower_mnt_idx(hasone, bindex))
2439 +               hasone = hasone->d_parent;
2440 +       parent = dentry;
2441 +       while (!unionfs_lower_mnt_idx(parent, bindex)) {
2442 +               unionfs_set_lower_mnt_idx(parent, bindex,
2443 +                                         unionfs_mntget(hasone, bindex));
2444 +               parent = parent->d_parent;
2445 +       }
2446 +}
2447 +
2448 +/*
2449 + * Post-copyup helper to release all non-directory source objects of a
2450 + * copied-up file.  Regular files should have only one lower object.
2451 + */
2452 +void unionfs_postcopyup_release(struct dentry *dentry)
2453 +{
2454 +       int bindex;
2455 +
2456 +       BUG_ON(S_ISDIR(dentry->d_inode->i_mode));
2457 +       for (bindex = dbstart(dentry)+1; bindex <= dbend(dentry); bindex++) {
2458 +               if (unionfs_lower_mnt_idx(dentry, bindex)) {
2459 +                       unionfs_mntput(dentry, bindex);
2460 +                       unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
2461 +               }
2462 +               if (unionfs_lower_dentry_idx(dentry, bindex)) {
2463 +                       dput(unionfs_lower_dentry_idx(dentry, bindex));
2464 +                       unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
2465 +                       iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
2466 +                       unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
2467 +                                                   NULL);
2468 +               }
2469 +       }
2470 +       bindex = dbstart(dentry);
2471 +       set_dbend(dentry, bindex);
2472 +       ibend(dentry->d_inode) = ibstart(dentry->d_inode) = bindex;
2473 +}
2474 diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
2475 new file mode 100644
2476 index 0000000..d154c32
2477 --- /dev/null
2478 +++ b/fs/unionfs/debug.c
2479 @@ -0,0 +1,533 @@
2480 +/*
2481 + * Copyright (c) 2003-2007 Erez Zadok
2482 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
2483 + * Copyright (c) 2003-2007 Stony Brook University
2484 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
2485 + *
2486 + * This program is free software; you can redistribute it and/or modify
2487 + * it under the terms of the GNU General Public License version 2 as
2488 + * published by the Free Software Foundation.
2489 + */
2490 +
2491 +#include "union.h"
2492 +
2493 +/*
2494 + * Helper debugging functions for maintainers (and for users to report back
2495 + * useful information back to maintainers)
2496 + */
2497 +
2498 +/* it's always useful to know what part of the code called us */
2499 +#define PRINT_CALLER(fname, fxn, line)                                 \
2500 +       do {                                                            \
2501 +               if (!printed_caller) {                                  \
2502 +                       pr_debug("PC:%s:%s:%d\n", (fname), (fxn), (line)); \
2503 +                       printed_caller = 1;                             \
2504 +               }                                                       \
2505 +       } while (0)
2506 +
2507 +/*
2508 + * __unionfs_check_{inode,dentry,file} perform exhaustive sanity checking on
2509 + * the fan-out of various Unionfs objects.  We check that no lower objects
2510 + * exist  outside the start/end branch range; that all objects within are
2511 + * non-NULL (with some allowed exceptions); that for every lower file
2512 + * there's a lower dentry+inode; that the start/end ranges match for all
2513 + * corresponding lower objects; that open files/symlinks have only one lower
2514 + * objects, but directories can have several; and more.
2515 + */
2516 +void __unionfs_check_inode(const struct inode *inode,
2517 +                          const char *fname, const char *fxn, int line)
2518 +{
2519 +       int bindex;
2520 +       int istart, iend;
2521 +       struct inode *lower_inode;
2522 +       struct super_block *sb;
2523 +       int printed_caller = 0;
2524 +       void *poison_ptr;
2525 +
2526 +       /* for inodes now */
2527 +       BUG_ON(!inode);
2528 +       sb = inode->i_sb;
2529 +       istart = ibstart(inode);
2530 +       iend = ibend(inode);
2531 +       /* don't check inode if no lower branches */
2532 +       if (istart < 0 && iend < 0)
2533 +               return;
2534 +       if (unlikely(istart > iend)) {
2535 +               PRINT_CALLER(fname, fxn, line);
2536 +               pr_debug(" Ci0: inode=%p istart/end=%d:%d\n",
2537 +                        inode, istart, iend);
2538 +       }
2539 +       if (unlikely((istart == -1 && iend != -1) ||
2540 +                    (istart != -1 && iend == -1))) {
2541 +               PRINT_CALLER(fname, fxn, line);
2542 +               pr_debug(" Ci1: inode=%p istart/end=%d:%d\n",
2543 +                        inode, istart, iend);
2544 +       }
2545 +       if (!S_ISDIR(inode->i_mode)) {
2546 +               if (unlikely(iend != istart)) {
2547 +                       PRINT_CALLER(fname, fxn, line);
2548 +                       pr_debug(" Ci2: inode=%p istart=%d iend=%d\n",
2549 +                                inode, istart, iend);
2550 +               }
2551 +       }
2552 +
2553 +       for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2554 +               if (unlikely(!UNIONFS_I(inode))) {
2555 +                       PRINT_CALLER(fname, fxn, line);
2556 +                       pr_debug(" Ci3: no inode_info %p\n", inode);
2557 +                       return;
2558 +               }
2559 +               if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
2560 +                       PRINT_CALLER(fname, fxn, line);
2561 +                       pr_debug(" Ci4: no lower_inodes %p\n", inode);
2562 +                       return;
2563 +               }
2564 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
2565 +               if (lower_inode) {
2566 +                       memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2567 +                       if (unlikely(bindex < istart || bindex > iend)) {
2568 +                               PRINT_CALLER(fname, fxn, line);
2569 +                               pr_debug(" Ci5: inode/linode=%p:%p bindex=%d "
2570 +                                        "istart/end=%d:%d\n", inode,
2571 +                                        lower_inode, bindex, istart, iend);
2572 +                       } else if (unlikely(lower_inode == poison_ptr)) {
2573 +                               /* freed inode! */
2574 +                               PRINT_CALLER(fname, fxn, line);
2575 +                               pr_debug(" Ci6: inode/linode=%p:%p bindex=%d "
2576 +                                        "istart/end=%d:%d\n", inode,
2577 +                                        lower_inode, bindex, istart, iend);
2578 +                       }
2579 +                       continue;
2580 +               }
2581 +               /* if we get here, then lower_inode == NULL */
2582 +               if (bindex < istart || bindex > iend)
2583 +                       continue;
2584 +               /*
2585 +                * directories can have NULL lower inodes in b/t start/end,
2586 +                * but NOT if at the start/end range.
2587 +                */
2588 +               if (unlikely(S_ISDIR(inode->i_mode) &&
2589 +                            bindex > istart && bindex < iend))
2590 +                       continue;
2591 +               PRINT_CALLER(fname, fxn, line);
2592 +               pr_debug(" Ci7: inode/linode=%p:%p "
2593 +                        "bindex=%d istart/end=%d:%d\n",
2594 +                        inode, lower_inode, bindex, istart, iend);
2595 +       }
2596 +}
2597 +
2598 +void __unionfs_check_dentry(const struct dentry *dentry,
2599 +                           const char *fname, const char *fxn, int line)
2600 +{
2601 +       int bindex;
2602 +       int dstart, dend, istart, iend;
2603 +       struct dentry *lower_dentry;
2604 +       struct inode *inode, *lower_inode;
2605 +       struct super_block *sb;
2606 +       struct vfsmount *lower_mnt;
2607 +       int printed_caller = 0;
2608 +       void *poison_ptr;
2609 +
2610 +       BUG_ON(!dentry);
2611 +       sb = dentry->d_sb;
2612 +       inode = dentry->d_inode;
2613 +       dstart = dbstart(dentry);
2614 +       dend = dbend(dentry);
2615 +       /* don't check dentry/mnt if no lower branches */
2616 +       if (dstart < 0 && dend < 0)
2617 +               goto check_inode;
2618 +       BUG_ON(dstart > dend);
2619 +
2620 +       if (unlikely((dstart == -1 && dend != -1) ||
2621 +                    (dstart != -1 && dend == -1))) {
2622 +               PRINT_CALLER(fname, fxn, line);
2623 +               pr_debug(" CD0: dentry=%p dstart/end=%d:%d\n",
2624 +                        dentry, dstart, dend);
2625 +       }
2626 +       /*
2627 +        * check for NULL dentries inside the start/end range, or
2628 +        * non-NULL dentries outside the start/end range.
2629 +        */
2630 +       for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2631 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
2632 +               if (lower_dentry) {
2633 +                       if (unlikely(bindex < dstart || bindex > dend)) {
2634 +                               PRINT_CALLER(fname, fxn, line);
2635 +                               pr_debug(" CD1: dentry/lower=%p:%p(%p) "
2636 +                                        "bindex=%d dstart/end=%d:%d\n",
2637 +                                        dentry, lower_dentry,
2638 +                                        (lower_dentry ? lower_dentry->d_inode :
2639 +                                         (void *) -1L),
2640 +                                        bindex, dstart, dend);
2641 +                       }
2642 +               } else {        /* lower_dentry == NULL */
2643 +                       if (bindex < dstart || bindex > dend)
2644 +                               continue;
2645 +                       /*
2646 +                        * Directories can have NULL lower inodes in b/t
2647 +                        * start/end, but NOT if at the start/end range.
2648 +                        * Ignore this rule, however, if this is a NULL
2649 +                        * dentry or a deleted dentry.
2650 +                        */
2651 +                       if (unlikely(!d_deleted((struct dentry *) dentry) &&
2652 +                                    inode &&
2653 +                                    !(inode && S_ISDIR(inode->i_mode) &&
2654 +                                      bindex > dstart && bindex < dend))) {
2655 +                               PRINT_CALLER(fname, fxn, line);
2656 +                               pr_debug(" CD2: dentry/lower=%p:%p(%p) "
2657 +                                        "bindex=%d dstart/end=%d:%d\n",
2658 +                                        dentry, lower_dentry,
2659 +                                        (lower_dentry ?
2660 +                                         lower_dentry->d_inode :
2661 +                                         (void *) -1L),
2662 +                                        bindex, dstart, dend);
2663 +                       }
2664 +               }
2665 +       }
2666 +
2667 +       /* check for vfsmounts same as for dentries */
2668 +       for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2669 +               lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2670 +               if (lower_mnt) {
2671 +                       if (unlikely(bindex < dstart || bindex > dend)) {
2672 +                               PRINT_CALLER(fname, fxn, line);
2673 +                               pr_debug(" CM0: dentry/lmnt=%p:%p bindex=%d "
2674 +                                        "dstart/end=%d:%d\n", dentry,
2675 +                                        lower_mnt, bindex, dstart, dend);
2676 +                       }
2677 +               } else {        /* lower_mnt == NULL */
2678 +                       if (bindex < dstart || bindex > dend)
2679 +                               continue;
2680 +                       /*
2681 +                        * Directories can have NULL lower inodes in b/t
2682 +                        * start/end, but NOT if at the start/end range.
2683 +                        * Ignore this rule, however, if this is a NULL
2684 +                        * dentry.
2685 +                        */
2686 +                       if (unlikely(inode &&
2687 +                                    !(inode && S_ISDIR(inode->i_mode) &&
2688 +                                      bindex > dstart && bindex < dend))) {
2689 +                               PRINT_CALLER(fname, fxn, line);
2690 +                               pr_debug(" CM1: dentry/lmnt=%p:%p "
2691 +                                        "bindex=%d dstart/end=%d:%d\n",
2692 +                                        dentry, lower_mnt, bindex,
2693 +                                        dstart, dend);
2694 +                       }
2695 +               }
2696 +       }
2697 +
2698 +check_inode:
2699 +       /* for inodes now */
2700 +       if (!inode)
2701 +               return;
2702 +       istart = ibstart(inode);
2703 +       iend = ibend(inode);
2704 +       /* don't check inode if no lower branches */
2705 +       if (istart < 0 && iend < 0)
2706 +               return;
2707 +       BUG_ON(istart > iend);
2708 +       if (unlikely((istart == -1 && iend != -1) ||
2709 +                    (istart != -1 && iend == -1))) {
2710 +               PRINT_CALLER(fname, fxn, line);
2711 +               pr_debug(" CI0: dentry/inode=%p:%p istart/end=%d:%d\n",
2712 +                        dentry, inode, istart, iend);
2713 +       }
2714 +       if (unlikely(istart != dstart)) {
2715 +               PRINT_CALLER(fname, fxn, line);
2716 +               pr_debug(" CI1: dentry/inode=%p:%p istart=%d dstart=%d\n",
2717 +                        dentry, inode, istart, dstart);
2718 +       }
2719 +       if (unlikely(iend != dend)) {
2720 +               PRINT_CALLER(fname, fxn, line);
2721 +               pr_debug(" CI2: dentry/inode=%p:%p iend=%d dend=%d\n",
2722 +                        dentry, inode, iend, dend);
2723 +       }
2724 +
2725 +       if (!S_ISDIR(inode->i_mode)) {
2726 +               if (unlikely(dend != dstart)) {
2727 +                       PRINT_CALLER(fname, fxn, line);
2728 +                       pr_debug(" CI3: dentry/inode=%p:%p dstart=%d dend=%d\n",
2729 +                                dentry, inode, dstart, dend);
2730 +               }
2731 +               if (unlikely(iend != istart)) {
2732 +                       PRINT_CALLER(fname, fxn, line);
2733 +                       pr_debug(" CI4: dentry/inode=%p:%p istart=%d iend=%d\n",
2734 +                                dentry, inode, istart, iend);
2735 +               }
2736 +       }
2737 +
2738 +       for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2739 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
2740 +               if (lower_inode) {
2741 +                       memset(&poison_ptr, POISON_INUSE, sizeof(void *));
2742 +                       if (unlikely(bindex < istart || bindex > iend)) {
2743 +                               PRINT_CALLER(fname, fxn, line);
2744 +                               pr_debug(" CI5: dentry/linode=%p:%p bindex=%d "
2745 +                                        "istart/end=%d:%d\n", dentry,
2746 +                                        lower_inode, bindex, istart, iend);
2747 +                       } else if (unlikely(lower_inode == poison_ptr)) {
2748 +                               /* freed inode! */
2749 +                               PRINT_CALLER(fname, fxn, line);
2750 +                               pr_debug(" CI6: dentry/linode=%p:%p bindex=%d "
2751 +                                        "istart/end=%d:%d\n", dentry,
2752 +                                        lower_inode, bindex, istart, iend);
2753 +                       }
2754 +                       continue;
2755 +               }
2756 +               /* if we get here, then lower_inode == NULL */
2757 +               if (bindex < istart || bindex > iend)
2758 +                       continue;
2759 +               /*
2760 +                * directories can have NULL lower inodes in b/t start/end,
2761 +                * but NOT if at the start/end range.
2762 +                */
2763 +               if (unlikely(S_ISDIR(inode->i_mode) &&
2764 +                            bindex > istart && bindex < iend))
2765 +                       continue;
2766 +               PRINT_CALLER(fname, fxn, line);
2767 +               pr_debug(" CI7: dentry/linode=%p:%p "
2768 +                        "bindex=%d istart/end=%d:%d\n",
2769 +                        dentry, lower_inode, bindex, istart, iend);
2770 +       }
2771 +
2772 +       /*
2773 +        * If it's a directory, then intermediate objects b/t start/end can
2774 +        * be NULL.  But, check that all three are NULL: lower dentry, mnt,
2775 +        * and inode.
2776 +        */
2777 +       if (dstart >= 0 && dend >= 0 && S_ISDIR(inode->i_mode))
2778 +               for (bindex = dstart+1; bindex < dend; bindex++) {
2779 +                       lower_inode = unionfs_lower_inode_idx(inode, bindex);
2780 +                       lower_dentry = unionfs_lower_dentry_idx(dentry,
2781 +                                                               bindex);
2782 +                       lower_mnt = unionfs_lower_mnt_idx(dentry, bindex);
2783 +                       if (unlikely(!((lower_inode && lower_dentry &&
2784 +                                       lower_mnt) ||
2785 +                                      (!lower_inode &&
2786 +                                       !lower_dentry && !lower_mnt)))) {
2787 +                               PRINT_CALLER(fname, fxn, line);
2788 +                               pr_debug(" Cx: lmnt/ldentry/linode=%p:%p:%p "
2789 +                                        "bindex=%d dstart/end=%d:%d\n",
2790 +                                        lower_mnt, lower_dentry, lower_inode,
2791 +                                        bindex, dstart, dend);
2792 +                       }
2793 +               }
2794 +       /* check if lower inode is newer than upper one (it shouldn't) */
2795 +       if (unlikely(is_newer_lower(dentry))) {
2796 +               PRINT_CALLER(fname, fxn, line);
2797 +               for (bindex = ibstart(inode); bindex <= ibend(inode);
2798 +                    bindex++) {
2799 +                       lower_inode = unionfs_lower_inode_idx(inode, bindex);
2800 +                       if (unlikely(!lower_inode))
2801 +                               continue;
2802 +                       pr_debug(" CI8: bindex=%d mtime/lmtime=%lu.%lu/%lu.%lu "
2803 +                                "ctime/lctime=%lu.%lu/%lu.%lu\n",
2804 +                                bindex,
2805 +                                inode->i_mtime.tv_sec,
2806 +                                inode->i_mtime.tv_nsec,
2807 +                                lower_inode->i_mtime.tv_sec,
2808 +                                lower_inode->i_mtime.tv_nsec,
2809 +                                inode->i_ctime.tv_sec,
2810 +                                inode->i_ctime.tv_nsec,
2811 +                                lower_inode->i_ctime.tv_sec,
2812 +                                lower_inode->i_ctime.tv_nsec);
2813 +               }
2814 +       }
2815 +}
2816 +
2817 +void __unionfs_check_file(const struct file *file,
2818 +                         const char *fname, const char *fxn, int line)
2819 +{
2820 +       int bindex;
2821 +       int dstart, dend, fstart, fend;
2822 +       struct dentry *dentry;
2823 +       struct file *lower_file;
2824 +       struct inode *inode;
2825 +       struct super_block *sb;
2826 +       int printed_caller = 0;
2827 +
2828 +       BUG_ON(!file);
2829 +       dentry = file->f_path.dentry;
2830 +       sb = dentry->d_sb;
2831 +       dstart = dbstart(dentry);
2832 +       dend = dbend(dentry);
2833 +       BUG_ON(dstart > dend);
2834 +       fstart = fbstart(file);
2835 +       fend = fbend(file);
2836 +       BUG_ON(fstart > fend);
2837 +
2838 +       if (unlikely((fstart == -1 && fend != -1) ||
2839 +                    (fstart != -1 && fend == -1))) {
2840 +               PRINT_CALLER(fname, fxn, line);
2841 +               pr_debug(" CF0: file/dentry=%p:%p fstart/end=%d:%d\n",
2842 +                        file, dentry, fstart, fend);
2843 +       }
2844 +       if (unlikely(fstart != dstart)) {
2845 +               PRINT_CALLER(fname, fxn, line);
2846 +               pr_debug(" CF1: file/dentry=%p:%p fstart=%d dstart=%d\n",
2847 +                        file, dentry, fstart, dstart);
2848 +       }
2849 +       if (unlikely(fend != dend)) {
2850 +               PRINT_CALLER(fname, fxn, line);
2851 +               pr_debug(" CF2: file/dentry=%p:%p fend=%d dend=%d\n",
2852 +                        file, dentry, fend, dend);
2853 +       }
2854 +       inode = dentry->d_inode;
2855 +       if (!S_ISDIR(inode->i_mode)) {
2856 +               if (unlikely(fend != fstart)) {
2857 +                       PRINT_CALLER(fname, fxn, line);
2858 +                       pr_debug(" CF3: file/inode=%p:%p fstart=%d fend=%d\n",
2859 +                                file, inode, fstart, fend);
2860 +               }
2861 +               if (unlikely(dend != dstart)) {
2862 +                       PRINT_CALLER(fname, fxn, line);
2863 +                       pr_debug(" CF4: file/dentry=%p:%p dstart=%d dend=%d\n",
2864 +                                file, dentry, dstart, dend);
2865 +               }
2866 +       }
2867 +
2868 +       /*
2869 +        * check for NULL dentries inside the start/end range, or
2870 +        * non-NULL dentries outside the start/end range.
2871 +        */
2872 +       for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) {
2873 +               lower_file = unionfs_lower_file_idx(file, bindex);
2874 +               if (lower_file) {
2875 +                       if (unlikely(bindex < fstart || bindex > fend)) {
2876 +                               PRINT_CALLER(fname, fxn, line);
2877 +                               pr_debug(" CF5: file/lower=%p:%p bindex=%d "
2878 +                                        "fstart/end=%d:%d\n", file,
2879 +                                        lower_file, bindex, fstart, fend);
2880 +                       }
2881 +               } else {        /* lower_file == NULL */
2882 +                       if (bindex >= fstart && bindex <= fend) {
2883 +                               /*
2884 +                                * directories can have NULL lower inodes in
2885 +                                * b/t start/end, but NOT if at the
2886 +                                * start/end range.
2887 +                                */
2888 +                               if (unlikely(!(S_ISDIR(inode->i_mode) &&
2889 +                                              bindex > fstart &&
2890 +                                              bindex < fend))) {
2891 +                                       PRINT_CALLER(fname, fxn, line);
2892 +                                       pr_debug(" CF6: file/lower=%p:%p "
2893 +                                                "bindex=%d fstart/end=%d:%d\n",
2894 +                                                file, lower_file, bindex,
2895 +                                                fstart, fend);
2896 +                               }
2897 +                       }
2898 +               }
2899 +       }
2900 +
2901 +       __unionfs_check_dentry(dentry, fname, fxn, line);
2902 +}
2903 +
2904 +void __unionfs_check_nd(const struct nameidata *nd,
2905 +                       const char *fname, const char *fxn, int line)
2906 +{
2907 +       struct file *file;
2908 +       int printed_caller = 0;
2909 +
2910 +       if (unlikely(!nd))
2911 +               return;
2912 +       if (nd->flags & LOOKUP_OPEN) {
2913 +               file = nd->intent.open.file;
2914 +               if (unlikely(file->f_path.dentry &&
2915 +                            strcmp(file->f_path.dentry->d_sb->s_type->name,
2916 +                                   UNIONFS_NAME))) {
2917 +                       PRINT_CALLER(fname, fxn, line);
2918 +                       pr_debug(" CND1: lower_file of type %s\n",
2919 +                                file->f_path.dentry->d_sb->s_type->name);
2920 +                       BUG();
2921 +               }
2922 +       }
2923 +}
2924 +
2925 +/* useful to track vfsmount leaks that could cause EBUSY on unmount */
2926 +void __show_branch_counts(const struct super_block *sb,
2927 +                         const char *file, const char *fxn, int line)
2928 +{
2929 +       int i;
2930 +       struct vfsmount *mnt;
2931 +
2932 +       pr_debug("BC:");
2933 +       for (i = 0; i < sbmax(sb); i++) {
2934 +               if (likely(sb->s_root))
2935 +                       mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt;
2936 +               else
2937 +                       mnt = NULL;
2938 +               printk(KERN_CONT "%d:",
2939 +                      (mnt ? atomic_read(&mnt->mnt_count) : -99));
2940 +       }
2941 +       printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
2942 +}
2943 +
2944 +void __show_inode_times(const struct inode *inode,
2945 +                       const char *file, const char *fxn, int line)
2946 +{
2947 +       struct inode *lower_inode;
2948 +       int bindex;
2949 +
2950 +       for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
2951 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
2952 +               if (unlikely(!lower_inode))
2953 +                       continue;
2954 +               pr_debug("IT(%lu:%d): %s:%s:%d "
2955 +                        "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
2956 +                        inode->i_ino, bindex,
2957 +                        file, fxn, line,
2958 +                        inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
2959 +                        lower_inode->i_mtime.tv_sec,
2960 +                        lower_inode->i_mtime.tv_nsec,
2961 +                        inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
2962 +                        lower_inode->i_ctime.tv_sec,
2963 +                        lower_inode->i_ctime.tv_nsec);
2964 +       }
2965 +}
2966 +
2967 +void __show_dinode_times(const struct dentry *dentry,
2968 +                       const char *file, const char *fxn, int line)
2969 +{
2970 +       struct inode *inode = dentry->d_inode;
2971 +       struct inode *lower_inode;
2972 +       int bindex;
2973 +
2974 +       for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
2975 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
2976 +               if (!lower_inode)
2977 +                       continue;
2978 +               pr_debug("DT(%s:%lu:%d): %s:%s:%d "
2979 +                        "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n",
2980 +                        dentry->d_name.name, inode->i_ino, bindex,
2981 +                        file, fxn, line,
2982 +                        inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
2983 +                        lower_inode->i_mtime.tv_sec,
2984 +                        lower_inode->i_mtime.tv_nsec,
2985 +                        inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
2986 +                        lower_inode->i_ctime.tv_sec,
2987 +                        lower_inode->i_ctime.tv_nsec);
2988 +       }
2989 +}
2990 +
2991 +void __show_inode_counts(const struct inode *inode,
2992 +                       const char *file, const char *fxn, int line)
2993 +{
2994 +       struct inode *lower_inode;
2995 +       int bindex;
2996 +
2997 +       if (unlikely(!inode)) {
2998 +               pr_debug("SiC: Null inode\n");
2999 +               return;
3000 +       }
3001 +       for (bindex = sbstart(inode->i_sb); bindex <= sbend(inode->i_sb);
3002 +            bindex++) {
3003 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
3004 +               if (unlikely(!lower_inode))
3005 +                       continue;
3006 +               pr_debug("SIC(%lu:%d:%d): lc=%d %s:%s:%d\n",
3007 +                        inode->i_ino, bindex,
3008 +                        atomic_read(&(inode)->i_count),
3009 +                        atomic_read(&(lower_inode)->i_count),
3010 +                        file, fxn, line);
3011 +       }
3012 +}
3013 diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
3014 new file mode 100644
3015 index 0000000..f8f65e1
3016 --- /dev/null
3017 +++ b/fs/unionfs/dentry.c
3018 @@ -0,0 +1,578 @@
3019 +/*
3020 + * Copyright (c) 2003-2007 Erez Zadok
3021 + * Copyright (c) 2003-2006 Charles P. Wright
3022 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3023 + * Copyright (c) 2005-2006 Junjiro Okajima
3024 + * Copyright (c) 2005      Arun M. Krishnakumar
3025 + * Copyright (c) 2004-2006 David P. Quigley
3026 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3027 + * Copyright (c) 2003      Puja Gupta
3028 + * Copyright (c) 2003      Harikesavan Krishnan
3029 + * Copyright (c) 2003-2007 Stony Brook University
3030 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
3031 + *
3032 + * This program is free software; you can redistribute it and/or modify
3033 + * it under the terms of the GNU General Public License version 2 as
3034 + * published by the Free Software Foundation.
3035 + */
3036 +
3037 +#include "union.h"
3038 +
3039 +
3040 +static inline void __dput_lowers(struct dentry *dentry, int start, int end)
3041 +{
3042 +       struct dentry *lower_dentry;
3043 +       int bindex;
3044 +
3045 +       if (start < 0)
3046 +               return;
3047 +       for (bindex = start; bindex <= end; bindex++) {
3048 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3049 +               if (!lower_dentry)
3050 +                       continue;
3051 +               unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
3052 +               dput(lower_dentry);
3053 +       }
3054 +}
3055 +
3056 +static inline void __iput_lowers(struct inode *inode, int start, int end)
3057 +{
3058 +       struct inode *lower_inode;
3059 +       int bindex;
3060 +
3061 +       if (start < 0)
3062 +               return;
3063 +       for (bindex = start; bindex <= end; bindex++) {
3064 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
3065 +               if (!lower_inode)
3066 +                       continue;
3067 +               unionfs_set_lower_inode_idx(inode, bindex, NULL);
3068 +               iput(lower_inode);
3069 +       }
3070 +}
3071 +
3072 +/*
3073 + * Revalidate a single dentry.
3074 + * Assume that dentry's info node is locked.
3075 + * Assume that parent(s) are all valid already, but
3076 + * the child may not yet be valid.
3077 + * Returns true if valid, false otherwise.
3078 + */
3079 +static bool __unionfs_d_revalidate_one(struct dentry *dentry,
3080 +                                      struct nameidata *nd)
3081 +{
3082 +       bool valid = true;      /* default is valid */
3083 +       struct dentry *lower_dentry;
3084 +       int bindex, bstart, bend;
3085 +       int sbgen, dgen;
3086 +       int positive = 0;
3087 +       int interpose_flag;
3088 +       struct nameidata lowernd; /* TODO: be gentler to the stack */
3089 +
3090 +       if (nd)
3091 +               memcpy(&lowernd, nd, sizeof(struct nameidata));
3092 +       else
3093 +               memset(&lowernd, 0, sizeof(struct nameidata));
3094 +
3095 +       verify_locked(dentry);
3096 +       verify_locked(dentry->d_parent);
3097 +
3098 +       /* if the dentry is unhashed, do NOT revalidate */
3099 +       if (d_deleted(dentry))
3100 +               goto out;
3101 +
3102 +       BUG_ON(dbstart(dentry) == -1);
3103 +       if (dentry->d_inode)
3104 +               positive = 1;
3105 +       dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3106 +       sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3107 +       /*
3108 +        * If we are working on an unconnected dentry, then there is no
3109 +        * revalidation to be done, because this file does not exist within
3110 +        * the namespace, and Unionfs operates on the namespace, not data.
3111 +        */
3112 +       if (unlikely(sbgen != dgen)) {
3113 +               struct dentry *result;
3114 +               int pdgen;
3115 +
3116 +               /* The root entry should always be valid */
3117 +               BUG_ON(IS_ROOT(dentry));
3118 +
3119 +               /* We can't work correctly if our parent isn't valid. */
3120 +               pdgen = atomic_read(&UNIONFS_D(dentry->d_parent)->generation);
3121 +               BUG_ON(pdgen != sbgen); /* should never happen here */
3122 +
3123 +               /* Free the pointers for our inodes and this dentry. */
3124 +               bstart = dbstart(dentry);
3125 +               bend = dbend(dentry);
3126 +               __dput_lowers(dentry, bstart, bend);
3127 +               set_dbstart(dentry, -1);
3128 +               set_dbend(dentry, -1);
3129 +
3130 +               interpose_flag = INTERPOSE_REVAL_NEG;
3131 +               if (positive) {
3132 +                       interpose_flag = INTERPOSE_REVAL;
3133 +
3134 +                       bstart = ibstart(dentry->d_inode);
3135 +                       bend = ibend(dentry->d_inode);
3136 +                       __iput_lowers(dentry->d_inode, bstart, bend);
3137 +                       kfree(UNIONFS_I(dentry->d_inode)->lower_inodes);
3138 +                       UNIONFS_I(dentry->d_inode)->lower_inodes = NULL;
3139 +                       ibstart(dentry->d_inode) = -1;
3140 +                       ibend(dentry->d_inode) = -1;
3141 +               }
3142 +
3143 +               result = unionfs_lookup_backend(dentry, &lowernd,
3144 +                                               interpose_flag);
3145 +               if (result) {
3146 +                       if (IS_ERR(result)) {
3147 +                               valid = false;
3148 +                               goto out;
3149 +                       }
3150 +                       /*
3151 +                        * current unionfs_lookup_backend() doesn't return
3152 +                        * a valid dentry
3153 +                        */
3154 +                       dput(dentry);
3155 +                       dentry = result;
3156 +               }
3157 +
3158 +               if (unlikely(positive && UNIONFS_I(dentry->d_inode)->stale)) {
3159 +                       make_bad_inode(dentry->d_inode);
3160 +                       d_drop(dentry);
3161 +                       valid = false;
3162 +                       goto out;
3163 +               }
3164 +               goto out;
3165 +       }
3166 +
3167 +       /* The revalidation must occur across all branches */
3168 +       bstart = dbstart(dentry);
3169 +       bend = dbend(dentry);
3170 +       BUG_ON(bstart == -1);
3171 +       for (bindex = bstart; bindex <= bend; bindex++) {
3172 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3173 +               if (!lower_dentry || !lower_dentry->d_op
3174 +                   || !lower_dentry->d_op->d_revalidate)
3175 +                       continue;
3176 +               /*
3177 +                * Don't pass nameidata to lower file system, because we
3178 +                * don't want an arbitrary lower file being opened or
3179 +                * returned to us: it may be useless to us because of the
3180 +                * fanout nature of unionfs (cf. file/directory open-file
3181 +                * invariants).  We will open lower files as and when needed
3182 +                * later on.
3183 +                */
3184 +               if (!lower_dentry->d_op->d_revalidate(lower_dentry, NULL))
3185 +                       valid = false;
3186 +       }
3187 +
3188 +       if (!dentry->d_inode ||
3189 +           ibstart(dentry->d_inode) < 0 ||
3190 +           ibend(dentry->d_inode) < 0) {
3191 +               valid = false;
3192 +               goto out;
3193 +       }
3194 +
3195 +       if (valid) {
3196 +               /*
3197 +                * If we get here, and we copy the meta-data from the lower
3198 +                * inode to our inode, then it is vital that we have already
3199 +                * purged all unionfs-level file data.  We do that in the
3200 +                * caller (__unionfs_d_revalidate_chain) by calling
3201 +                * purge_inode_data.
3202 +                */
3203 +               unionfs_copy_attr_all(dentry->d_inode,
3204 +                                     unionfs_lower_inode(dentry->d_inode));
3205 +               fsstack_copy_inode_size(dentry->d_inode,
3206 +                                       unionfs_lower_inode(dentry->d_inode));
3207 +       }
3208 +
3209 +out:
3210 +       return valid;
3211 +}
3212 +
3213 +/*
3214 + * Determine if the lower inode objects have changed from below the unionfs
3215 + * inode.  Return true if changed, false otherwise.
3216 + *
3217 + * We check if the mtime or ctime have changed.  However, the inode times
3218 + * can be changed by anyone without much protection, including
3219 + * asynchronously.  This can sometimes cause unionfs to find that the lower
3220 + * file system doesn't change its inode times quick enough, resulting in a
3221 + * false positive indication (which is harmless, it just makes unionfs do
3222 + * extra work in re-validating the objects).  To minimize the chances of
3223 + * these situations, we still consider such small time changes valid, but we
3224 + * don't print debugging messages unless the time changes are greater than
3225 + * UNIONFS_MIN_CC_TIME (which defaults to 3 seconds, as with NFS's acregmin)
3226 + * because significant changes are more likely due to users manually
3227 + * touching lower files.
3228 + */
3229 +bool is_newer_lower(const struct dentry *dentry)
3230 +{
3231 +       int bindex;
3232 +       struct inode *inode;
3233 +       struct inode *lower_inode;
3234 +
3235 +       /* ignore if we're called on semi-initialized dentries/inodes */
3236 +       if (!dentry || !UNIONFS_D(dentry))
3237 +               return false;
3238 +       inode = dentry->d_inode;
3239 +       if (!inode || !UNIONFS_I(inode)->lower_inodes ||
3240 +           ibstart(inode) < 0 || ibend(inode) < 0)
3241 +               return false;
3242 +
3243 +       for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
3244 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
3245 +               if (!lower_inode)
3246 +                       continue;
3247 +
3248 +               /* check if mtime/ctime have changed */
3249 +               if (unlikely(timespec_compare(&inode->i_mtime,
3250 +                                             &lower_inode->i_mtime) < 0)) {
3251 +                       if ((lower_inode->i_mtime.tv_sec -
3252 +                            inode->i_mtime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3253 +                               pr_info("unionfs: new lower inode mtime "
3254 +                                       "(bindex=%d, name=%s)\n", bindex,
3255 +                                       dentry->d_name.name);
3256 +                               show_dinode_times(dentry);
3257 +                       }
3258 +                       return true;
3259 +               }
3260 +               if (unlikely(timespec_compare(&inode->i_ctime,
3261 +                                             &lower_inode->i_ctime) < 0)) {
3262 +                       if ((lower_inode->i_ctime.tv_sec -
3263 +                            inode->i_ctime.tv_sec) > UNIONFS_MIN_CC_TIME) {
3264 +                               pr_info("unionfs: new lower inode ctime "
3265 +                                       "(bindex=%d, name=%s)\n", bindex,
3266 +                                       dentry->d_name.name);
3267 +                               show_dinode_times(dentry);
3268 +                       }
3269 +                       return true;
3270 +               }
3271 +       }
3272 +       return false;           /* default: lower is not newer */
3273 +}
3274 +
3275 +/*
3276 + * Purge and invalidate as many data pages of a unionfs inode.  This is
3277 + * called when the lower inode has changed, and we want to force processes
3278 + * to re-get the new data.
3279 + */
3280 +static inline void purge_inode_data(struct inode *inode)
3281 +{
3282 +       /* remove all non-private mappings */
3283 +       unmap_mapping_range(inode->i_mapping, 0, 0, 0);
3284 +       /* invalidate as many pages as possible */
3285 +       invalidate_mapping_pages(inode->i_mapping, 0, -1);
3286 +       /*
3287 +        * Don't try to truncate_inode_pages here, because this could lead
3288 +        * to a deadlock between some of address_space ops and dentry
3289 +        * revalidation: the address space op is invoked with a lock on our
3290 +        * own page, and truncate_inode_pages will block on locked pages.
3291 +        */
3292 +}
3293 +
3294 +void purge_sb_data(struct super_block *sb)
3295 +{
3296 +       struct inode *inode;
3297 +
3298 +       list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
3299 +               if (inode->i_state & (I_FREEING|I_WILL_FREE))
3300 +                       continue;
3301 +               purge_inode_data(inode);
3302 +       }
3303 +}
3304 +
3305 +/*
3306 + * Revalidate a single file/symlink/special dentry.  Assume that info nodes
3307 + * of the dentry and its parent are locked.  Assume that parent(s) are all
3308 + * valid already, but the child may not yet be valid.  Returns true if
3309 + * valid, false otherwise.
3310 + */
3311 +bool __unionfs_d_revalidate_one_locked(struct dentry *dentry,
3312 +                                      struct nameidata *nd,
3313 +                                      bool willwrite)
3314 +{
3315 +       bool valid = false;     /* default is invalid */
3316 +       int sbgen, dgen, bindex;
3317 +
3318 +       verify_locked(dentry);
3319 +       verify_locked(dentry->d_parent);
3320 +
3321 +       sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3322 +       dgen = atomic_read(&UNIONFS_D(dentry)->generation);
3323 +
3324 +       if (unlikely(is_newer_lower(dentry))) {
3325 +               /* root dentry special case as aforementioned */
3326 +               if (IS_ROOT(dentry)) {
3327 +                       unionfs_copy_attr_times(dentry->d_inode);
3328 +               } else {
3329 +                       /*
3330 +                        * reset generation number to zero, guaranteed to be
3331 +                        * "old"
3332 +                        */
3333 +                       dgen = 0;
3334 +                       atomic_set(&UNIONFS_D(dentry)->generation, dgen);
3335 +               }
3336 +               if (!willwrite)
3337 +                       purge_inode_data(dentry->d_inode);
3338 +       }
3339 +       valid = __unionfs_d_revalidate_one(dentry, nd);
3340 +
3341 +       /*
3342 +        * If __unionfs_d_revalidate_one() succeeded above, then it will
3343 +        * have incremented the refcnt of the mnt's, but also the branch
3344 +        * indices of the dentry will have been updated (to take into
3345 +        * account any branch insertions/deletion.  So the current
3346 +        * dbstart/dbend match the current, and new, indices of the mnts
3347 +        * which __unionfs_d_revalidate_one has incremented.  Note: the "if"
3348 +        * test below does not depend on whether chain_len was 0 or greater.
3349 +        */
3350 +       if (!valid || sbgen == dgen)
3351 +               goto out;
3352 +       for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++)
3353 +               unionfs_mntput(dentry, bindex);
3354 +out:
3355 +       return valid;
3356 +}
3357 +
3358 +/*
3359 + * Revalidate a parent chain of dentries, then the actual node.
3360 + * Assumes that dentry is locked, but will lock all parents if/when needed.
3361 + *
3362 + * If 'willwrite' is true, and the lower inode times are not in sync, then
3363 + * *don't* purge_inode_data, as it could deadlock if ->write calls us and we
3364 + * try to truncate a locked page.  Besides, if unionfs is about to write
3365 + * data to a file, then there's the data unionfs is about to write is more
3366 + * authoritative than what's below, therefore we can safely overwrite the
3367 + * lower inode times and data.
3368 + */
3369 +bool __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd,
3370 +                                 bool willwrite)
3371 +{
3372 +       bool valid = false;     /* default is invalid */
3373 +       struct dentry **chain = NULL; /* chain of dentries to reval */
3374 +       int chain_len = 0;
3375 +       struct dentry *dtmp;
3376 +       int sbgen, dgen, i;
3377 +       int saved_bstart, saved_bend, bindex;
3378 +
3379 +       /* find length of chain needed to revalidate */
3380 +       /* XXX: should I grab some global (dcache?) lock? */
3381 +       chain_len = 0;
3382 +       sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3383 +       dtmp = dentry->d_parent;
3384 +       verify_locked(dentry);
3385 +       if (dentry != dtmp)
3386 +               unionfs_lock_dentry(dtmp, UNIONFS_DMUTEX_REVAL_PARENT);
3387 +       dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
3388 +       /* XXX: should we check if is_newer_lower all the way up? */
3389 +       if (unlikely(is_newer_lower(dtmp))) {
3390 +               /*
3391 +                * Special case: the root dentry's generation number must
3392 +                * always be valid, but its lower inode times don't have to
3393 +                * be, so sync up the times only.
3394 +                */
3395 +               if (IS_ROOT(dtmp)) {
3396 +                       unionfs_copy_attr_times(dtmp->d_inode);
3397 +               } else {
3398 +                       /*
3399 +                        * reset generation number to zero, guaranteed to be
3400 +                        * "old"
3401 +                        */
3402 +                       dgen = 0;
3403 +                       atomic_set(&UNIONFS_D(dtmp)->generation, dgen);
3404 +               }
3405 +               purge_inode_data(dtmp->d_inode);
3406 +       }
3407 +       if (dentry != dtmp)
3408 +               unionfs_unlock_dentry(dtmp);
3409 +       while (sbgen != dgen) {
3410 +               /* The root entry should always be valid */
3411 +               BUG_ON(IS_ROOT(dtmp));
3412 +               chain_len++;
3413 +               dtmp = dtmp->d_parent;
3414 +               dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
3415 +       }
3416 +       if (chain_len == 0)
3417 +               goto out_this;  /* shortcut if parents are OK */
3418 +
3419 +       /*
3420 +        * Allocate array of dentries to reval.  We could use linked lists,
3421 +        * but the number of entries we need to alloc here is often small,
3422 +        * and short lived, so locality will be better.
3423 +        */
3424 +       chain = kzalloc(chain_len * sizeof(struct dentry *), GFP_KERNEL);
3425 +       if (unlikely(!chain)) {
3426 +               printk(KERN_CRIT "unionfs: no more memory in %s\n",
3427 +                      __FUNCTION__);
3428 +               goto out;
3429 +       }
3430 +
3431 +       /* grab all dentries in chain, in child to parent order */
3432 +       dtmp = dentry;
3433 +       for (i = chain_len-1; i >= 0; i--)
3434 +               dtmp = chain[i] = dget_parent(dtmp);
3435 +
3436 +       /*
3437 +        * call __unionfs_d_revalidate_one() on each dentry, but in parent
3438 +        * to child order.
3439 +        */
3440 +       for (i = 0; i < chain_len; i++) {
3441 +               unionfs_lock_dentry(chain[i], UNIONFS_DMUTEX_REVAL_CHILD);
3442 +               if (chain[i] != chain[i]->d_parent)
3443 +                       unionfs_lock_dentry(chain[i]->d_parent,
3444 +                                           UNIONFS_DMUTEX_REVAL_PARENT);
3445 +               saved_bstart = dbstart(chain[i]);
3446 +               saved_bend = dbend(chain[i]);
3447 +               sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
3448 +               dgen = atomic_read(&UNIONFS_D(chain[i])->generation);
3449 +
3450 +               valid = __unionfs_d_revalidate_one(chain[i], nd);
3451 +               /* XXX: is this the correct mntput condition?! */
3452 +               if (valid && chain_len > 0 &&
3453 +                   sbgen != dgen && chain[i]->d_inode &&
3454 +                   S_ISDIR(chain[i]->d_inode->i_mode)) {
3455 +                       for (bindex = saved_bstart; bindex <= saved_bend;
3456 +                            bindex++)
3457 +                               unionfs_mntput(chain[i], bindex);
3458 +               }
3459 +               if (chain[i] != chain[i]->d_parent)
3460 +                       unionfs_unlock_dentry(chain[i]->d_parent);
3461 +               unionfs_unlock_dentry(chain[i]);
3462 +
3463 +               if (unlikely(!valid))
3464 +                       goto out_free;
3465 +       }
3466 +
3467 +
3468 +out_this:
3469 +       /* finally, lock this dentry and revalidate it */
3470 +       verify_locked(dentry);  /* verify child is locked */
3471 +       if (dentry != dentry->d_parent)
3472 +               unionfs_lock_dentry(dentry->d_parent,
3473 +                                   UNIONFS_DMUTEX_REVAL_PARENT);
3474 +       valid = __unionfs_d_revalidate_one_locked(dentry, nd, willwrite);
3475 +       if (dentry != dentry->d_parent)
3476 +               unionfs_unlock_dentry(dentry->d_parent);
3477 +
3478 +out_free:
3479 +       /* unlock/dput all dentries in chain and return status */
3480 +       if (chain_len > 0) {
3481 +               for (i = 0; i < chain_len; i++)
3482 +                       dput(chain[i]);
3483 +               kfree(chain);
3484 +       }
3485 +out:
3486 +       return valid;
3487 +}
3488 +
3489 +static int unionfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
3490 +{
3491 +       int err;
3492 +
3493 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3494 +
3495 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3496 +       err = __unionfs_d_revalidate_chain(dentry, nd, false);
3497 +       if (likely(err > 0)) { /* true==1: dentry is valid */
3498 +               unionfs_postcopyup_setmnt(dentry);
3499 +               unionfs_check_dentry(dentry);
3500 +               unionfs_check_nd(nd);
3501 +       }
3502 +       unionfs_unlock_dentry(dentry);
3503 +
3504 +       unionfs_read_unlock(dentry->d_sb);
3505 +
3506 +       return err;
3507 +}
3508 +
3509 +static void unionfs_d_release(struct dentry *dentry)
3510 +{
3511 +       int bindex, bstart, bend;
3512 +
3513 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3514 +       /* must lock our branch configuration here */
3515 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3516 +
3517 +       unionfs_check_dentry(dentry);
3518 +       /* this could be a negative dentry, so check first */
3519 +       if (unlikely(!UNIONFS_D(dentry) || dbstart(dentry) < 0)) {
3520 +               unionfs_unlock_dentry(dentry);
3521 +               goto out;       /* due to a (normal) failed lookup */
3522 +       }
3523 +
3524 +       /* Release all the lower dentries */
3525 +       bstart = dbstart(dentry);
3526 +       bend = dbend(dentry);
3527 +       for (bindex = bstart; bindex <= bend; bindex++) {
3528 +               dput(unionfs_lower_dentry_idx(dentry, bindex));
3529 +               unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
3530 +               /* NULL lower mnt is ok if this is a negative dentry */
3531 +               if (!dentry->d_inode && !unionfs_lower_mnt_idx(dentry, bindex))
3532 +                       continue;
3533 +               unionfs_mntput(dentry, bindex);
3534 +               unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
3535 +       }
3536 +       /* free private data (unionfs_dentry_info) here */
3537 +       kfree(UNIONFS_D(dentry)->lower_paths);
3538 +       UNIONFS_D(dentry)->lower_paths = NULL;
3539 +
3540 +       unionfs_unlock_dentry(dentry);
3541 +
3542 +out:
3543 +       free_dentry_private_data(dentry);
3544 +       unionfs_read_unlock(dentry->d_sb);
3545 +       return;
3546 +}
3547 +
3548 +/*
3549 + * Called when we're removing the last reference to our dentry.  So we
3550 + * should drop all lower references too.
3551 + */
3552 +static void unionfs_d_iput(struct dentry *dentry, struct inode *inode)
3553 +{
3554 +       int bindex, rc;
3555 +
3556 +       BUG_ON(!dentry);
3557 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
3558 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
3559 +
3560 +       if (!UNIONFS_D(dentry) || dbstart(dentry) < 0)
3561 +               goto drop_lower_inodes;
3562 +       for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) {
3563 +               if (unionfs_lower_mnt_idx(dentry, bindex)) {
3564 +                       unionfs_mntput(dentry, bindex);
3565 +                       unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
3566 +               }
3567 +               if (unionfs_lower_dentry_idx(dentry, bindex)) {
3568 +                       dput(unionfs_lower_dentry_idx(dentry, bindex));
3569 +                       unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
3570 +               }
3571 +       }
3572 +       set_dbstart(dentry, -1);
3573 +       set_dbend(dentry, -1);
3574 +
3575 +drop_lower_inodes:
3576 +       rc = atomic_read(&inode->i_count);
3577 +       if (rc == 1 && inode->i_nlink == 1 && ibstart(inode) >= 0) {
3578 +               /* see Documentation/filesystems/unionfs/issues.txt */
3579 +               lockdep_off();
3580 +               iput(unionfs_lower_inode(inode));
3581 +               lockdep_on();
3582 +               unionfs_set_lower_inode(inode, NULL);
3583 +               /* XXX: may need to set start/end to -1? */
3584 +       }
3585 +
3586 +       iput(inode);
3587 +
3588 +       unionfs_unlock_dentry(dentry);
3589 +       unionfs_read_unlock(dentry->d_sb);
3590 +}
3591 +
3592 +struct dentry_operations unionfs_dops = {
3593 +       .d_revalidate   = unionfs_d_revalidate,
3594 +       .d_release      = unionfs_d_release,
3595 +       .d_iput         = unionfs_d_iput,
3596 +};
3597 diff --git a/fs/unionfs/dirfops.c b/fs/unionfs/dirfops.c
3598 new file mode 100644
3599 index 0000000..a613862
3600 --- /dev/null
3601 +++ b/fs/unionfs/dirfops.c
3602 @@ -0,0 +1,290 @@
3603 +/*
3604 + * Copyright (c) 2003-2007 Erez Zadok
3605 + * Copyright (c) 2003-2006 Charles P. Wright
3606 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3607 + * Copyright (c) 2005-2006 Junjiro Okajima
3608 + * Copyright (c) 2005      Arun M. Krishnakumar
3609 + * Copyright (c) 2004-2006 David P. Quigley
3610 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3611 + * Copyright (c) 2003      Puja Gupta
3612 + * Copyright (c) 2003      Harikesavan Krishnan
3613 + * Copyright (c) 2003-2007 Stony Brook University
3614 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
3615 + *
3616 + * This program is free software; you can redistribute it and/or modify
3617 + * it under the terms of the GNU General Public License version 2 as
3618 + * published by the Free Software Foundation.
3619 + */
3620 +
3621 +#include "union.h"
3622 +
3623 +/* Make sure our rdstate is playing by the rules. */
3624 +static void verify_rdstate_offset(struct unionfs_dir_state *rdstate)
3625 +{
3626 +       BUG_ON(rdstate->offset >= DIREOF);
3627 +       BUG_ON(rdstate->cookie >= MAXRDCOOKIE);
3628 +}
3629 +
3630 +struct unionfs_getdents_callback {
3631 +       struct unionfs_dir_state *rdstate;
3632 +       void *dirent;
3633 +       int entries_written;
3634 +       int filldir_called;
3635 +       int filldir_error;
3636 +       filldir_t filldir;
3637 +       struct super_block *sb;
3638 +};
3639 +
3640 +/* based on generic filldir in fs/readir.c */
3641 +static int unionfs_filldir(void *dirent, const char *name, int namelen,
3642 +                          loff_t offset, u64 ino, unsigned int d_type)
3643 +{
3644 +       struct unionfs_getdents_callback *buf = dirent;
3645 +       struct filldir_node *found = NULL;
3646 +       int err = 0;
3647 +       int is_wh_entry = 0;
3648 +
3649 +       buf->filldir_called++;
3650 +
3651 +       if ((namelen > UNIONFS_WHLEN) &&
3652 +           !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
3653 +               name += UNIONFS_WHLEN;
3654 +               namelen -= UNIONFS_WHLEN;
3655 +               is_wh_entry = 1;
3656 +       }
3657 +
3658 +       found = find_filldir_node(buf->rdstate, name, namelen, is_wh_entry);
3659 +
3660 +       if (found) {
3661 +               /*
3662 +                * If we had non-whiteout entry in dir cache, then mark it
3663 +                * as a whiteout and but leave it in the dir cache.
3664 +                */
3665 +               if (is_wh_entry && !found->whiteout)
3666 +                       found->whiteout = is_wh_entry;
3667 +               goto out;
3668 +       }
3669 +
3670 +       /* if 'name' isn't a whiteout, filldir it. */
3671 +       if (!is_wh_entry) {
3672 +               off_t pos = rdstate2offset(buf->rdstate);
3673 +               u64 unionfs_ino = ino;
3674 +
3675 +               err = buf->filldir(buf->dirent, name, namelen, pos,
3676 +                                  unionfs_ino, d_type);
3677 +               buf->rdstate->offset++;
3678 +               verify_rdstate_offset(buf->rdstate);
3679 +       }
3680 +       /*
3681 +        * If we did fill it, stuff it in our hash, otherwise return an
3682 +        * error.
3683 +        */
3684 +       if (err) {
3685 +               buf->filldir_error = err;
3686 +               goto out;
3687 +       }
3688 +       buf->entries_written++;
3689 +       err = add_filldir_node(buf->rdstate, name, namelen,
3690 +                              buf->rdstate->bindex, is_wh_entry);
3691 +       if (err)
3692 +               buf->filldir_error = err;
3693 +
3694 +out:
3695 +       return err;
3696 +}
3697 +
3698 +static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir)
3699 +{
3700 +       int err = 0;
3701 +       struct file *lower_file = NULL;
3702 +       struct inode *inode = NULL;
3703 +       struct unionfs_getdents_callback buf;
3704 +       struct unionfs_dir_state *uds;
3705 +       int bend;
3706 +       loff_t offset;
3707 +
3708 +       unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3709 +
3710 +       err = unionfs_file_revalidate(file, false);
3711 +       if (unlikely(err))
3712 +               goto out;
3713 +
3714 +       inode = file->f_path.dentry->d_inode;
3715 +
3716 +       uds = UNIONFS_F(file)->rdstate;
3717 +       if (!uds) {
3718 +               if (file->f_pos == DIREOF) {
3719 +                       goto out;
3720 +               } else if (file->f_pos > 0) {
3721 +                       uds = find_rdstate(inode, file->f_pos);
3722 +                       if (unlikely(!uds)) {
3723 +                               err = -ESTALE;
3724 +                               goto out;
3725 +                       }
3726 +                       UNIONFS_F(file)->rdstate = uds;
3727 +               } else {
3728 +                       init_rdstate(file);
3729 +                       uds = UNIONFS_F(file)->rdstate;
3730 +               }
3731 +       }
3732 +       bend = fbend(file);
3733 +
3734 +       while (uds->bindex <= bend) {
3735 +               lower_file = unionfs_lower_file_idx(file, uds->bindex);
3736 +               if (!lower_file) {
3737 +                       uds->bindex++;
3738 +                       uds->dirpos = 0;
3739 +                       continue;
3740 +               }
3741 +
3742 +               /* prepare callback buffer */
3743 +               buf.filldir_called = 0;
3744 +               buf.filldir_error = 0;
3745 +               buf.entries_written = 0;
3746 +               buf.dirent = dirent;
3747 +               buf.filldir = filldir;
3748 +               buf.rdstate = uds;
3749 +               buf.sb = inode->i_sb;
3750 +
3751 +               /* Read starting from where we last left off. */
3752 +               offset = vfs_llseek(lower_file, uds->dirpos, SEEK_SET);
3753 +               if (offset < 0) {
3754 +                       err = offset;
3755 +                       goto out;
3756 +               }
3757 +               err = vfs_readdir(lower_file, unionfs_filldir, &buf);
3758 +
3759 +               /* Save the position for when we continue. */
3760 +               offset = vfs_llseek(lower_file, 0, SEEK_CUR);
3761 +               if (offset < 0) {
3762 +                       err = offset;
3763 +                       goto out;
3764 +               }
3765 +               uds->dirpos = offset;
3766 +
3767 +               /* Copy the atime. */
3768 +               fsstack_copy_attr_atime(inode,
3769 +                                       lower_file->f_path.dentry->d_inode);
3770 +
3771 +               if (err < 0)
3772 +                       goto out;
3773 +
3774 +               if (buf.filldir_error)
3775 +                       break;
3776 +
3777 +               if (!buf.entries_written) {
3778 +                       uds->bindex++;
3779 +                       uds->dirpos = 0;
3780 +               }
3781 +       }
3782 +
3783 +       if (!buf.filldir_error && uds->bindex >= bend) {
3784 +               /* Save the number of hash entries for next time. */
3785 +               UNIONFS_I(inode)->hashsize = uds->hashentries;
3786 +               free_rdstate(uds);
3787 +               UNIONFS_F(file)->rdstate = NULL;
3788 +               file->f_pos = DIREOF;
3789 +       } else {
3790 +               file->f_pos = rdstate2offset(uds);
3791 +       }
3792 +
3793 +out:
3794 +       unionfs_read_unlock(file->f_path.dentry->d_sb);
3795 +       return err;
3796 +}
3797 +
3798 +/*
3799 + * This is not meant to be a generic repositioning function.  If you do
3800 + * things that aren't supported, then we return EINVAL.
3801 + *
3802 + * What is allowed:
3803 + *  (1) seeking to the same position that you are currently at
3804 + *     This really has no effect, but returns where you are.
3805 + *  (2) seeking to the beginning of the file
3806 + *     This throws out all state, and lets you begin again.
3807 + */
3808 +static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin)
3809 +{
3810 +       struct unionfs_dir_state *rdstate;
3811 +       loff_t err;
3812 +
3813 +       unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
3814 +
3815 +       err = unionfs_file_revalidate(file, false);
3816 +       if (unlikely(err))
3817 +               goto out;
3818 +
3819 +       rdstate = UNIONFS_F(file)->rdstate;
3820 +
3821 +       /*
3822 +        * we let users seek to their current position, but not anywhere
3823 +        * else.
3824 +        */
3825 +       if (!offset) {
3826 +               switch (origin) {
3827 +               case SEEK_SET:
3828 +                       if (rdstate) {
3829 +                               free_rdstate(rdstate);
3830 +                               UNIONFS_F(file)->rdstate = NULL;
3831 +                       }
3832 +                       init_rdstate(file);
3833 +                       err = 0;
3834 +                       break;
3835 +               case SEEK_CUR:
3836 +                       err = file->f_pos;
3837 +                       break;
3838 +               case SEEK_END:
3839 +                       /* Unsupported, because we would break everything.  */
3840 +                       err = -EINVAL;
3841 +                       break;
3842 +               }
3843 +       } else {
3844 +               switch (origin) {
3845 +               case SEEK_SET:
3846 +                       if (rdstate) {
3847 +                               if (offset == rdstate2offset(rdstate))
3848 +                                       err = offset;
3849 +                               else if (file->f_pos == DIREOF)
3850 +                                       err = DIREOF;
3851 +                               else
3852 +                                       err = -EINVAL;
3853 +                       } else {
3854 +                               struct inode *inode;
3855 +                               inode = file->f_path.dentry->d_inode;
3856 +                               rdstate = find_rdstate(inode, offset);
3857 +                               if (rdstate) {
3858 +                                       UNIONFS_F(file)->rdstate = rdstate;
3859 +                                       err = rdstate->offset;
3860 +                               } else {
3861 +                                       err = -EINVAL;
3862 +                               }
3863 +                       }
3864 +                       break;
3865 +               case SEEK_CUR:
3866 +               case SEEK_END:
3867 +                       /* Unsupported, because we would break everything.  */
3868 +                       err = -EINVAL;
3869 +                       break;
3870 +               }
3871 +       }
3872 +
3873 +out:
3874 +       unionfs_read_unlock(file->f_path.dentry->d_sb);
3875 +       return err;
3876 +}
3877 +
3878 +/*
3879 + * Trimmed directory options, we shouldn't pass everything down since
3880 + * we don't want to operate on partial directories.
3881 + */
3882 +struct file_operations unionfs_dir_fops = {
3883 +       .llseek         = unionfs_dir_llseek,
3884 +       .read           = generic_read_dir,
3885 +       .readdir        = unionfs_readdir,
3886 +       .unlocked_ioctl = unionfs_ioctl,
3887 +       .open           = unionfs_open,
3888 +       .release        = unionfs_file_release,
3889 +       .flush          = unionfs_flush,
3890 +       .fsync          = unionfs_fsync,
3891 +       .fasync         = unionfs_fasync,
3892 +};
3893 diff --git a/fs/unionfs/dirhelper.c b/fs/unionfs/dirhelper.c
3894 new file mode 100644
3895 index 0000000..4b73bb6
3896 --- /dev/null
3897 +++ b/fs/unionfs/dirhelper.c
3898 @@ -0,0 +1,267 @@
3899 +/*
3900 + * Copyright (c) 2003-2007 Erez Zadok
3901 + * Copyright (c) 2003-2006 Charles P. Wright
3902 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
3903 + * Copyright (c) 2005-2006 Junjiro Okajima
3904 + * Copyright (c) 2005      Arun M. Krishnakumar
3905 + * Copyright (c) 2004-2006 David P. Quigley
3906 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
3907 + * Copyright (c) 2003      Puja Gupta
3908 + * Copyright (c) 2003      Harikesavan Krishnan
3909 + * Copyright (c) 2003-2007 Stony Brook University
3910 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
3911 + *
3912 + * This program is free software; you can redistribute it and/or modify
3913 + * it under the terms of the GNU General Public License version 2 as
3914 + * published by the Free Software Foundation.
3915 + */
3916 +
3917 +#include "union.h"
3918 +
3919 +/*
3920 + * Delete all of the whiteouts in a given directory for rmdir.
3921 + *
3922 + * lower directory inode should be locked
3923 + */
3924 +int do_delete_whiteouts(struct dentry *dentry, int bindex,
3925 +                       struct unionfs_dir_state *namelist)
3926 +{
3927 +       int err = 0;
3928 +       struct dentry *lower_dir_dentry = NULL;
3929 +       struct dentry *lower_dentry;
3930 +       char *name = NULL, *p;
3931 +       struct inode *lower_dir;
3932 +       int i;
3933 +       struct list_head *pos;
3934 +       struct filldir_node *cursor;
3935 +
3936 +       /* Find out lower parent dentry */
3937 +       lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
3938 +       BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
3939 +       lower_dir = lower_dir_dentry->d_inode;
3940 +       BUG_ON(!S_ISDIR(lower_dir->i_mode));
3941 +
3942 +       err = -ENOMEM;
3943 +       name = __getname();
3944 +       if (unlikely(!name))
3945 +               goto out;
3946 +       strcpy(name, UNIONFS_WHPFX);
3947 +       p = name + UNIONFS_WHLEN;
3948 +
3949 +       err = 0;
3950 +       for (i = 0; !err && i < namelist->size; i++) {
3951 +               list_for_each(pos, &namelist->list[i]) {
3952 +                       cursor =
3953 +                               list_entry(pos, struct filldir_node,
3954 +                                          file_list);
3955 +                       /* Only operate on whiteouts in this branch. */
3956 +                       if (cursor->bindex != bindex)
3957 +                               continue;
3958 +                       if (!cursor->whiteout)
3959 +                               continue;
3960 +
3961 +                       strcpy(p, cursor->name);
3962 +                       lower_dentry =
3963 +                               lookup_one_len(name, lower_dir_dentry,
3964 +                                              cursor->namelen +
3965 +                                              UNIONFS_WHLEN);
3966 +                       if (IS_ERR(lower_dentry)) {
3967 +                               err = PTR_ERR(lower_dentry);
3968 +                               break;
3969 +                       }
3970 +                       if (lower_dentry->d_inode)
3971 +                               err = vfs_unlink(lower_dir, lower_dentry);
3972 +                       dput(lower_dentry);
3973 +                       if (err)
3974 +                               break;
3975 +               }
3976 +       }
3977 +
3978 +       __putname(name);
3979 +
3980 +       /* After all of the removals, we should copy the attributes once. */
3981 +       fsstack_copy_attr_times(dentry->d_inode, lower_dir_dentry->d_inode);
3982 +
3983 +out:
3984 +       return err;
3985 +}
3986 +
3987 +/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */
3988 +int delete_whiteouts(struct dentry *dentry, int bindex,
3989 +                    struct unionfs_dir_state *namelist)
3990 +{
3991 +       int err;
3992 +       struct super_block *sb;
3993 +       struct dentry *lower_dir_dentry;
3994 +       struct inode *lower_dir;
3995 +       struct sioq_args args;
3996 +
3997 +       sb = dentry->d_sb;
3998 +
3999 +       BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
4000 +       BUG_ON(bindex < dbstart(dentry));
4001 +       BUG_ON(bindex > dbend(dentry));
4002 +       err = is_robranch_super(sb, bindex);
4003 +       if (err)
4004 +               goto out;
4005 +
4006 +       lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4007 +       BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode));
4008 +       lower_dir = lower_dir_dentry->d_inode;
4009 +       BUG_ON(!S_ISDIR(lower_dir->i_mode));
4010 +
4011 +       if (!permission(lower_dir, MAY_WRITE | MAY_EXEC, NULL)) {
4012 +               err = do_delete_whiteouts(dentry, bindex, namelist);
4013 +       } else {
4014 +               args.deletewh.namelist = namelist;
4015 +               args.deletewh.dentry = dentry;
4016 +               args.deletewh.bindex = bindex;
4017 +               run_sioq(__delete_whiteouts, &args);
4018 +               err = args.err;
4019 +       }
4020 +
4021 +out:
4022 +       return err;
4023 +}
4024 +
4025 +#define RD_NONE 0
4026 +#define RD_CHECK_EMPTY 1
4027 +/* The callback structure for check_empty. */
4028 +struct unionfs_rdutil_callback {
4029 +       int err;
4030 +       int filldir_called;
4031 +       struct unionfs_dir_state *rdstate;
4032 +       int mode;
4033 +};
4034 +
4035 +/* This filldir function makes sure only whiteouts exist within a directory. */
4036 +static int readdir_util_callback(void *dirent, const char *name, int namelen,
4037 +                                loff_t offset, u64 ino, unsigned int d_type)
4038 +{
4039 +       int err = 0;
4040 +       struct unionfs_rdutil_callback *buf = dirent;
4041 +       int whiteout = 0;
4042 +       struct filldir_node *found;
4043 +
4044 +       buf->filldir_called = 1;
4045 +
4046 +       if (name[0] == '.' && (namelen == 1 ||
4047 +                              (name[1] == '.' && namelen == 2)))
4048 +               goto out;
4049 +
4050 +       if (namelen > UNIONFS_WHLEN &&
4051 +           !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
4052 +               namelen -= UNIONFS_WHLEN;
4053 +               name += UNIONFS_WHLEN;
4054 +               whiteout = 1;
4055 +       }
4056 +
4057 +       found = find_filldir_node(buf->rdstate, name, namelen, whiteout);
4058 +       /* If it was found in the table there was a previous whiteout. */
4059 +       if (found)
4060 +               goto out;
4061 +
4062 +       /*
4063 +        * if it wasn't found and isn't a whiteout, the directory isn't
4064 +        * empty.
4065 +        */
4066 +       err = -ENOTEMPTY;
4067 +       if ((buf->mode == RD_CHECK_EMPTY) && !whiteout)
4068 +               goto out;
4069 +
4070 +       err = add_filldir_node(buf->rdstate, name, namelen,
4071 +                              buf->rdstate->bindex, whiteout);
4072 +
4073 +out:
4074 +       buf->err = err;
4075 +       return err;
4076 +}
4077 +
4078 +/* Is a directory logically empty? */
4079 +int check_empty(struct dentry *dentry, struct unionfs_dir_state **namelist)
4080 +{
4081 +       int err = 0;
4082 +       struct dentry *lower_dentry = NULL;
4083 +       struct vfsmount *mnt;
4084 +       struct super_block *sb;
4085 +       struct file *lower_file;
4086 +       struct unionfs_rdutil_callback *buf = NULL;
4087 +       int bindex, bstart, bend, bopaque;
4088 +
4089 +       sb = dentry->d_sb;
4090 +
4091 +
4092 +       BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
4093 +
4094 +       err = unionfs_partial_lookup(dentry);
4095 +       if (err)
4096 +               goto out;
4097 +
4098 +       bstart = dbstart(dentry);
4099 +       bend = dbend(dentry);
4100 +       bopaque = dbopaque(dentry);
4101 +       if (0 <= bopaque && bopaque < bend)
4102 +               bend = bopaque;
4103 +
4104 +       buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL);
4105 +       if (unlikely(!buf)) {
4106 +               err = -ENOMEM;
4107 +               goto out;
4108 +       }
4109 +       buf->err = 0;
4110 +       buf->mode = RD_CHECK_EMPTY;
4111 +       buf->rdstate = alloc_rdstate(dentry->d_inode, bstart);
4112 +       if (unlikely(!buf->rdstate)) {
4113 +               err = -ENOMEM;
4114 +               goto out;
4115 +       }
4116 +
4117 +       /* Process the lower directories with rdutil_callback as a filldir. */
4118 +       for (bindex = bstart; bindex <= bend; bindex++) {
4119 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4120 +               if (!lower_dentry)
4121 +                       continue;
4122 +               if (!lower_dentry->d_inode)
4123 +                       continue;
4124 +               if (!S_ISDIR(lower_dentry->d_inode->i_mode))
4125 +                       continue;
4126 +
4127 +               dget(lower_dentry);
4128 +               mnt = unionfs_mntget(dentry, bindex);
4129 +               branchget(sb, bindex);
4130 +               lower_file = dentry_open(lower_dentry, mnt, O_RDONLY);
4131 +               if (IS_ERR(lower_file)) {
4132 +                       err = PTR_ERR(lower_file);
4133 +                       branchput(sb, bindex);
4134 +                       goto out;
4135 +               }
4136 +
4137 +               do {
4138 +                       buf->filldir_called = 0;
4139 +                       buf->rdstate->bindex = bindex;
4140 +                       err = vfs_readdir(lower_file,
4141 +                                         readdir_util_callback, buf);
4142 +                       if (buf->err)
4143 +                               err = buf->err;
4144 +               } while ((err >= 0) && buf->filldir_called);
4145 +
4146 +               /* fput calls dput for lower_dentry */
4147 +               fput(lower_file);
4148 +               branchput(sb, bindex);
4149 +
4150 +               if (err < 0)
4151 +                       goto out;
4152 +       }
4153 +
4154 +out:
4155 +       if (buf) {
4156 +               if (namelist && !err)
4157 +                       *namelist = buf->rdstate;
4158 +               else if (buf->rdstate)
4159 +                       free_rdstate(buf->rdstate);
4160 +               kfree(buf);
4161 +       }
4162 +
4163 +
4164 +       return err;
4165 +}
4166 diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h
4167 new file mode 100644
4168 index 0000000..29d42fb
4169 --- /dev/null
4170 +++ b/fs/unionfs/fanout.h
4171 @@ -0,0 +1,316 @@
4172 +/*
4173 + * Copyright (c) 2003-2007 Erez Zadok
4174 + * Copyright (c) 2003-2006 Charles P. Wright
4175 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4176 + * Copyright (c) 2005      Arun M. Krishnakumar
4177 + * Copyright (c) 2004-2006 David P. Quigley
4178 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4179 + * Copyright (c) 2003      Puja Gupta
4180 + * Copyright (c) 2003      Harikesavan Krishnan
4181 + * Copyright (c) 2003-2007 Stony Brook University
4182 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4183 + *
4184 + * This program is free software; you can redistribute it and/or modify
4185 + * it under the terms of the GNU General Public License version 2 as
4186 + * published by the Free Software Foundation.
4187 + */
4188 +
4189 +#ifndef _FANOUT_H_
4190 +#define _FANOUT_H_
4191 +
4192 +/*
4193 + * Inode to private data
4194 + *
4195 + * Since we use containers and the struct inode is _inside_ the
4196 + * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL
4197 + * inode pointer), return a valid non-NULL pointer.
4198 + */
4199 +static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode)
4200 +{
4201 +       return container_of(inode, struct unionfs_inode_info, vfs_inode);
4202 +}
4203 +
4204 +#define ibstart(ino) (UNIONFS_I(ino)->bstart)
4205 +#define ibend(ino) (UNIONFS_I(ino)->bend)
4206 +
4207 +/* Superblock to private data */
4208 +#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info)
4209 +#define sbstart(sb) 0
4210 +#define sbend(sb) (UNIONFS_SB(sb)->bend)
4211 +#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1)
4212 +#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id)
4213 +
4214 +/* File to private Data */
4215 +#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data))
4216 +#define fbstart(file) (UNIONFS_F(file)->bstart)
4217 +#define fbend(file) (UNIONFS_F(file)->bend)
4218 +
4219 +/* macros to manipulate branch IDs in stored in our superblock */
4220 +static inline int branch_id(struct super_block *sb, int index)
4221 +{
4222 +       BUG_ON(!sb || index < 0);
4223 +       return UNIONFS_SB(sb)->data[index].branch_id;
4224 +}
4225 +
4226 +static inline void set_branch_id(struct super_block *sb, int index, int val)
4227 +{
4228 +       BUG_ON(!sb || index < 0);
4229 +       UNIONFS_SB(sb)->data[index].branch_id = val;
4230 +}
4231 +
4232 +static inline void new_branch_id(struct super_block *sb, int index)
4233 +{
4234 +       BUG_ON(!sb || index < 0);
4235 +       set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id);
4236 +}
4237 +
4238 +/*
4239 + * Find new index of matching branch with an existing superblock of a known
4240 + * (possibly old) id.  This is needed because branches could have been
4241 + * added/deleted causing the branches of any open files to shift.
4242 + *
4243 + * @sb: the new superblock which may have new/different branch IDs
4244 + * @id: the old/existing id we're looking for
4245 + * Returns index of newly found branch (0 or greater), -1 otherwise.
4246 + */
4247 +static inline int branch_id_to_idx(struct super_block *sb, int id)
4248 +{
4249 +       int i;
4250 +       for (i = 0; i < sbmax(sb); i++) {
4251 +               if (branch_id(sb, i) == id)
4252 +                       return i;
4253 +       }
4254 +       /* in the non-ODF code, this should really never happen */
4255 +       printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id);
4256 +       return -1;
4257 +}
4258 +
4259 +/* File to lower file. */
4260 +static inline struct file *unionfs_lower_file(const struct file *f)
4261 +{
4262 +       BUG_ON(!f);
4263 +       return UNIONFS_F(f)->lower_files[fbstart(f)];
4264 +}
4265 +
4266 +static inline struct file *unionfs_lower_file_idx(const struct file *f,
4267 +                                                 int index)
4268 +{
4269 +       BUG_ON(!f || index < 0);
4270 +       return UNIONFS_F(f)->lower_files[index];
4271 +}
4272 +
4273 +static inline void unionfs_set_lower_file_idx(struct file *f, int index,
4274 +                                             struct file *val)
4275 +{
4276 +       BUG_ON(!f || index < 0);
4277 +       UNIONFS_F(f)->lower_files[index] = val;
4278 +       /* save branch ID (may be redundant?) */
4279 +       UNIONFS_F(f)->saved_branch_ids[index] =
4280 +               branch_id((f)->f_path.dentry->d_sb, index);
4281 +}
4282 +
4283 +static inline void unionfs_set_lower_file(struct file *f, struct file *val)
4284 +{
4285 +       BUG_ON(!f);
4286 +       unionfs_set_lower_file_idx((f), fbstart(f), (val));
4287 +}
4288 +
4289 +/* Inode to lower inode. */
4290 +static inline struct inode *unionfs_lower_inode(const struct inode *i)
4291 +{
4292 +       BUG_ON(!i);
4293 +       return UNIONFS_I(i)->lower_inodes[ibstart(i)];
4294 +}
4295 +
4296 +static inline struct inode *unionfs_lower_inode_idx(const struct inode *i,
4297 +                                                   int index)
4298 +{
4299 +       BUG_ON(!i || index < 0);
4300 +       return UNIONFS_I(i)->lower_inodes[index];
4301 +}
4302 +
4303 +static inline void unionfs_set_lower_inode_idx(struct inode *i, int index,
4304 +                                              struct inode *val)
4305 +{
4306 +       BUG_ON(!i || index < 0);
4307 +       UNIONFS_I(i)->lower_inodes[index] = val;
4308 +}
4309 +
4310 +static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val)
4311 +{
4312 +       BUG_ON(!i);
4313 +       UNIONFS_I(i)->lower_inodes[ibstart(i)] = val;
4314 +}
4315 +
4316 +/* Superblock to lower superblock. */
4317 +static inline struct super_block *unionfs_lower_super(
4318 +                                       const struct super_block *sb)
4319 +{
4320 +       BUG_ON(!sb);
4321 +       return UNIONFS_SB(sb)->data[sbstart(sb)].sb;
4322 +}
4323 +
4324 +static inline struct super_block *unionfs_lower_super_idx(
4325 +                                       const struct super_block *sb,
4326 +                                       int index)
4327 +{
4328 +       BUG_ON(!sb || index < 0);
4329 +       return UNIONFS_SB(sb)->data[index].sb;
4330 +}
4331 +
4332 +static inline void unionfs_set_lower_super_idx(struct super_block *sb,
4333 +                                              int index,
4334 +                                              struct super_block *val)
4335 +{
4336 +       BUG_ON(!sb || index < 0);
4337 +       UNIONFS_SB(sb)->data[index].sb = val;
4338 +}
4339 +
4340 +static inline void unionfs_set_lower_super(struct super_block *sb,
4341 +                                          struct super_block *val)
4342 +{
4343 +       BUG_ON(!sb);
4344 +       UNIONFS_SB(sb)->data[sbstart(sb)].sb = val;
4345 +}
4346 +
4347 +/* Branch count macros. */
4348 +static inline int branch_count(const struct super_block *sb, int index)
4349 +{
4350 +       BUG_ON(!sb || index < 0);
4351 +       return atomic_read(&UNIONFS_SB(sb)->data[index].open_files);
4352 +}
4353 +
4354 +static inline void set_branch_count(struct super_block *sb, int index, int val)
4355 +{
4356 +       BUG_ON(!sb || index < 0);
4357 +       atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val);
4358 +}
4359 +
4360 +static inline void branchget(struct super_block *sb, int index)
4361 +{
4362 +       BUG_ON(!sb || index < 0);
4363 +       atomic_inc(&UNIONFS_SB(sb)->data[index].open_files);
4364 +}
4365 +
4366 +static inline void branchput(struct super_block *sb, int index)
4367 +{
4368 +       BUG_ON(!sb || index < 0);
4369 +       atomic_dec(&UNIONFS_SB(sb)->data[index].open_files);
4370 +}
4371 +
4372 +/* Dentry macros */
4373 +static inline struct unionfs_dentry_info *UNIONFS_D(const struct dentry *dent)
4374 +{
4375 +       BUG_ON(!dent);
4376 +       return dent->d_fsdata;
4377 +}
4378 +
4379 +static inline int dbstart(const struct dentry *dent)
4380 +{
4381 +       BUG_ON(!dent);
4382 +       return UNIONFS_D(dent)->bstart;
4383 +}
4384 +
4385 +static inline void set_dbstart(struct dentry *dent, int val)
4386 +{
4387 +       BUG_ON(!dent);
4388 +       UNIONFS_D(dent)->bstart = val;
4389 +}
4390 +
4391 +static inline int dbend(const struct dentry *dent)
4392 +{
4393 +       BUG_ON(!dent);
4394 +       return UNIONFS_D(dent)->bend;
4395 +}
4396 +
4397 +static inline void set_dbend(struct dentry *dent, int val)
4398 +{
4399 +       BUG_ON(!dent);
4400 +       UNIONFS_D(dent)->bend = val;
4401 +}
4402 +
4403 +static inline int dbopaque(const struct dentry *dent)
4404 +{
4405 +       BUG_ON(!dent);
4406 +       return UNIONFS_D(dent)->bopaque;
4407 +}
4408 +
4409 +static inline void set_dbopaque(struct dentry *dent, int val)
4410 +{
4411 +       BUG_ON(!dent);
4412 +       UNIONFS_D(dent)->bopaque = val;
4413 +}
4414 +
4415 +static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index,
4416 +                                               struct dentry *val)
4417 +{
4418 +       BUG_ON(!dent || index < 0);
4419 +       UNIONFS_D(dent)->lower_paths[index].dentry = val;
4420 +}
4421 +
4422 +static inline struct dentry *unionfs_lower_dentry_idx(
4423 +                               const struct dentry *dent,
4424 +                               int index)
4425 +{
4426 +       BUG_ON(!dent || index < 0);
4427 +       return UNIONFS_D(dent)->lower_paths[index].dentry;
4428 +}
4429 +
4430 +static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent)
4431 +{
4432 +       BUG_ON(!dent);
4433 +       return unionfs_lower_dentry_idx(dent, dbstart(dent));
4434 +}
4435 +
4436 +static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index,
4437 +                                            struct vfsmount *mnt)
4438 +{
4439 +       BUG_ON(!dent || index < 0);
4440 +       UNIONFS_D(dent)->lower_paths[index].mnt = mnt;
4441 +}
4442 +
4443 +static inline struct vfsmount *unionfs_lower_mnt_idx(
4444 +                                       const struct dentry *dent,
4445 +                                       int index)
4446 +{
4447 +       BUG_ON(!dent || index < 0);
4448 +       return UNIONFS_D(dent)->lower_paths[index].mnt;
4449 +}
4450 +
4451 +static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent)
4452 +{
4453 +       BUG_ON(!dent);
4454 +       return unionfs_lower_mnt_idx(dent, dbstart(dent));
4455 +}
4456 +
4457 +/* Macros for locking a dentry. */
4458 +enum unionfs_dentry_lock_class {
4459 +       UNIONFS_DMUTEX_NORMAL,
4460 +       UNIONFS_DMUTEX_ROOT,
4461 +       UNIONFS_DMUTEX_PARENT,
4462 +       UNIONFS_DMUTEX_CHILD,
4463 +       UNIONFS_DMUTEX_WHITEOUT,
4464 +       UNIONFS_DMUTEX_REVAL_PARENT, /* for file/dentry revalidate */
4465 +       UNIONFS_DMUTEX_REVAL_CHILD,   /* for file/dentry revalidate */
4466 +};
4467 +
4468 +static inline void unionfs_lock_dentry(struct dentry *d,
4469 +                                      unsigned int subclass)
4470 +{
4471 +       BUG_ON(!d);
4472 +       mutex_lock_nested(&UNIONFS_D(d)->lock, subclass);
4473 +}
4474 +
4475 +static inline void unionfs_unlock_dentry(struct dentry *d)
4476 +{
4477 +       BUG_ON(!d);
4478 +       mutex_unlock(&UNIONFS_D(d)->lock);
4479 +}
4480 +
4481 +static inline void verify_locked(struct dentry *d)
4482 +{
4483 +       BUG_ON(!d);
4484 +       BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock));
4485 +}
4486 +
4487 +#endif /* not _FANOUT_H */
4488 diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c
4489 new file mode 100644
4490 index 0000000..0c424f6
4491 --- /dev/null
4492 +++ b/fs/unionfs/file.c
4493 @@ -0,0 +1,184 @@
4494 +/*
4495 + * Copyright (c) 2003-2007 Erez Zadok
4496 + * Copyright (c) 2003-2006 Charles P. Wright
4497 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4498 + * Copyright (c) 2005-2006 Junjiro Okajima
4499 + * Copyright (c) 2005      Arun M. Krishnakumar
4500 + * Copyright (c) 2004-2006 David P. Quigley
4501 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4502 + * Copyright (c) 2003      Puja Gupta
4503 + * Copyright (c) 2003      Harikesavan Krishnan
4504 + * Copyright (c) 2003-2007 Stony Brook University
4505 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4506 + *
4507 + * This program is free software; you can redistribute it and/or modify
4508 + * it under the terms of the GNU General Public License version 2 as
4509 + * published by the Free Software Foundation.
4510 + */
4511 +
4512 +#include "union.h"
4513 +
4514 +static int unionfs_file_readdir(struct file *file, void *dirent,
4515 +                               filldir_t filldir)
4516 +{
4517 +       return -ENOTDIR;
4518 +}
4519 +
4520 +static int unionfs_mmap(struct file *file, struct vm_area_struct *vma)
4521 +{
4522 +       int err = 0;
4523 +       bool willwrite;
4524 +       struct file *lower_file;
4525 +
4526 +       unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4527 +
4528 +       /* This might be deferred to mmap's writepage */
4529 +       willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
4530 +       err = unionfs_file_revalidate(file, willwrite);
4531 +       if (unlikely(err))
4532 +               goto out;
4533 +       unionfs_check_file(file);
4534 +
4535 +       /*
4536 +        * File systems which do not implement ->writepage may use
4537 +        * generic_file_readonly_mmap as their ->mmap op.  If you call
4538 +        * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
4539 +        * But we cannot call the lower ->mmap op, so we can't tell that
4540 +        * writeable mappings won't work.  Therefore, our only choice is to
4541 +        * check if the lower file system supports the ->writepage, and if
4542 +        * not, return EINVAL (the same error that
4543 +        * generic_file_readonly_mmap returns in that case).
4544 +        */
4545 +       lower_file = unionfs_lower_file(file);
4546 +       if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
4547 +               err = -EINVAL;
4548 +               printk(KERN_ERR "unionfs: branch %d file system does not "
4549 +                      "support writeable mmap\n", fbstart(file));
4550 +       } else {
4551 +               err = generic_file_mmap(file, vma);
4552 +               if (err)
4553 +                       printk(KERN_ERR
4554 +                              "unionfs: generic_file_mmap failed %d\n", err);
4555 +       }
4556 +
4557 +out:
4558 +       if (!err) {
4559 +               /* copyup could cause parent dir times to change */
4560 +               unionfs_copy_attr_times(file->f_path.dentry->d_parent->d_inode);
4561 +               unionfs_check_file(file);
4562 +       }
4563 +       unionfs_read_unlock(file->f_path.dentry->d_sb);
4564 +       return err;
4565 +}
4566 +
4567 +int unionfs_fsync(struct file *file, struct dentry *dentry, int datasync)
4568 +{
4569 +       int bindex, bstart, bend;
4570 +       struct file *lower_file;
4571 +       struct dentry *lower_dentry;
4572 +       struct inode *lower_inode, *inode;
4573 +       int err = -EINVAL;
4574 +
4575 +       unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4576 +       err = unionfs_file_revalidate(file, true);
4577 +       if (unlikely(err))
4578 +               goto out;
4579 +       unionfs_check_file(file);
4580 +
4581 +       bstart = fbstart(file);
4582 +       bend = fbend(file);
4583 +       if (bstart < 0 || bend < 0)
4584 +               goto out;
4585 +
4586 +       inode = dentry->d_inode;
4587 +       if (unlikely(!inode)) {
4588 +               printk(KERN_ERR
4589 +                      "unionfs: null lower inode in unionfs_fsync\n");
4590 +               goto out;
4591 +       }
4592 +       for (bindex = bstart; bindex <= bend; bindex++) {
4593 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
4594 +               if (!lower_inode || !lower_inode->i_fop->fsync)
4595 +                       continue;
4596 +               lower_file = unionfs_lower_file_idx(file, bindex);
4597 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4598 +               mutex_lock(&lower_inode->i_mutex);
4599 +               err = lower_inode->i_fop->fsync(lower_file,
4600 +                                               lower_dentry,
4601 +                                               datasync);
4602 +               mutex_unlock(&lower_inode->i_mutex);
4603 +               if (err)
4604 +                       goto out;
4605 +       }
4606 +
4607 +       unionfs_copy_attr_times(inode);
4608 +
4609 +out:
4610 +       unionfs_check_file(file);
4611 +       unionfs_read_unlock(file->f_path.dentry->d_sb);
4612 +       return err;
4613 +}
4614 +
4615 +int unionfs_fasync(int fd, struct file *file, int flag)
4616 +{
4617 +       int bindex, bstart, bend;
4618 +       struct file *lower_file;
4619 +       struct dentry *dentry;
4620 +       struct inode *lower_inode, *inode;
4621 +       int err = 0;
4622 +
4623 +       unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
4624 +       err = unionfs_file_revalidate(file, true);
4625 +       if (unlikely(err))
4626 +               goto out;
4627 +       unionfs_check_file(file);
4628 +
4629 +       bstart = fbstart(file);
4630 +       bend = fbend(file);
4631 +       if (bstart < 0 || bend < 0)
4632 +               goto out;
4633 +
4634 +       dentry = file->f_path.dentry;
4635 +       inode = dentry->d_inode;
4636 +       if (unlikely(!inode)) {
4637 +               printk(KERN_ERR
4638 +                      "unionfs: null lower inode in unionfs_fasync\n");
4639 +               goto out;
4640 +       }
4641 +       for (bindex = bstart; bindex <= bend; bindex++) {
4642 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
4643 +               if (!lower_inode || !lower_inode->i_fop->fasync)
4644 +                       continue;
4645 +               lower_file = unionfs_lower_file_idx(file, bindex);
4646 +               mutex_lock(&lower_inode->i_mutex);
4647 +               err = lower_inode->i_fop->fasync(fd, lower_file, flag);
4648 +               mutex_unlock(&lower_inode->i_mutex);
4649 +               if (err)
4650 +                       goto out;
4651 +       }
4652 +
4653 +       unionfs_copy_attr_times(inode);
4654 +
4655 +out:
4656 +       unionfs_check_file(file);
4657 +       unionfs_read_unlock(file->f_path.dentry->d_sb);
4658 +       return err;
4659 +}
4660 +
4661 +struct file_operations unionfs_main_fops = {
4662 +       .llseek         = generic_file_llseek,
4663 +       .read           = do_sync_read,
4664 +       .aio_read       = generic_file_aio_read,
4665 +       .write          = do_sync_write,
4666 +       .aio_write      = generic_file_aio_write,
4667 +       .readdir        = unionfs_file_readdir,
4668 +       .unlocked_ioctl = unionfs_ioctl,
4669 +       .mmap           = unionfs_mmap,
4670 +       .open           = unionfs_open,
4671 +       .flush          = unionfs_flush,
4672 +       .release        = unionfs_file_release,
4673 +       .fsync          = unionfs_fsync,
4674 +       .fasync         = unionfs_fasync,
4675 +       .splice_read    = generic_file_splice_read,
4676 +       .splice_write   = generic_file_splice_write,
4677 +};
4678 diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c
4679 new file mode 100644
4680 index 0000000..640969d
4681 --- /dev/null
4682 +++ b/fs/unionfs/inode.c
4683 @@ -0,0 +1,1115 @@
4684 +/*
4685 + * Copyright (c) 2003-2007 Erez Zadok
4686 + * Copyright (c) 2003-2006 Charles P. Wright
4687 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
4688 + * Copyright (c) 2005-2006 Junjiro Okajima
4689 + * Copyright (c) 2005      Arun M. Krishnakumar
4690 + * Copyright (c) 2004-2006 David P. Quigley
4691 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
4692 + * Copyright (c) 2003      Puja Gupta
4693 + * Copyright (c) 2003      Harikesavan Krishnan
4694 + * Copyright (c) 2003-2007 Stony Brook University
4695 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
4696 + *
4697 + * This program is free software; you can redistribute it and/or modify
4698 + * it under the terms of the GNU General Public License version 2 as
4699 + * published by the Free Software Foundation.
4700 + */
4701 +
4702 +#include "union.h"
4703 +
4704 +/*
4705 + * Helper function when creating new objects (create, symlink, and mknod).
4706 + * Checks to see if there's a whiteout in @lower_dentry's parent directory,
4707 + * whose name is taken from @dentry.  Then tries to remove that whiteout, if
4708 + * found.
4709 + *
4710 + * Return 0 if no whiteout was found, or if one was found and successfully
4711 + * removed (a zero tells the caller that @lower_dentry belongs to a good
4712 + * branch to create the new object in).  Return -ERRNO if an error occurred
4713 + * during whiteout lookup or in trying to unlink the whiteout.
4714 + */
4715 +static int check_for_whiteout(struct dentry *dentry,
4716 +                             struct dentry *lower_dentry)
4717 +{
4718 +       int err = 0;
4719 +       struct dentry *wh_dentry = NULL;
4720 +       struct dentry *lower_dir_dentry;
4721 +       char *name = NULL;
4722 +
4723 +       /*
4724 +        * check if whiteout exists in this branch, i.e. lookup .wh.foo
4725 +        * first.
4726 +        */
4727 +       name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
4728 +       if (unlikely(IS_ERR(name))) {
4729 +               err = PTR_ERR(name);
4730 +               goto out;
4731 +       }
4732 +
4733 +       wh_dentry = lookup_one_len(name, lower_dentry->d_parent,
4734 +                                  dentry->d_name.len + UNIONFS_WHLEN);
4735 +       if (IS_ERR(wh_dentry)) {
4736 +               err = PTR_ERR(wh_dentry);
4737 +               wh_dentry = NULL;
4738 +               goto out;
4739 +       }
4740 +
4741 +       if (!wh_dentry->d_inode) /* no whiteout exists */
4742 +               goto out;
4743 +
4744 +       /* .wh.foo has been found, so let's unlink it */
4745 +       lower_dir_dentry = lock_parent_wh(wh_dentry);
4746 +       /* see Documentation/filesystems/unionfs/issues.txt */
4747 +       lockdep_off();
4748 +       err = vfs_unlink(lower_dir_dentry->d_inode, wh_dentry);
4749 +       lockdep_on();
4750 +       unlock_dir(lower_dir_dentry);
4751 +
4752 +       /*
4753 +        * Whiteouts are special files and should be deleted no matter what
4754 +        * (as if they never existed), in order to allow this create
4755 +        * operation to succeed.  This is especially important in sticky
4756 +        * directories: a whiteout may have been created by one user, but
4757 +        * the newly created file may be created by another user.
4758 +        * Therefore, in order to maintain Unix semantics, if the vfs_unlink
4759 +        * above failed, then we have to try to directly unlink the
4760 +        * whiteout.  Note: in the ODF version of unionfs, whiteout are
4761 +        * handled much more cleanly.
4762 +        */
4763 +       if (err == -EPERM) {
4764 +               struct inode *inode = lower_dir_dentry->d_inode;
4765 +               err = inode->i_op->unlink(inode, wh_dentry);
4766 +       }
4767 +       if (err)
4768 +               printk(KERN_ERR "unionfs: could not "
4769 +                      "unlink whiteout, err = %d\n", err);
4770 +
4771 +out:
4772 +       dput(wh_dentry);
4773 +       kfree(name);
4774 +       return err;
4775 +}
4776 +
4777 +/*
4778 + * Find a writeable branch to create new object in.  Checks all writeble
4779 + * branches of the parent inode, from istart to iend order; if none are
4780 + * suitable, also tries branch 0 (which may require a copyup).
4781 + *
4782 + * Return a lower_dentry we can use to create object in, or ERR_PTR.
4783 + */
4784 +static struct dentry *find_writeable_branch(struct inode *parent,
4785 +                                           struct dentry *dentry)
4786 +{
4787 +       int err = -EINVAL;
4788 +       int bindex, istart, iend;
4789 +       struct dentry *lower_dentry = NULL;
4790 +
4791 +       istart = ibstart(parent);
4792 +       iend = ibend(parent);
4793 +       if (istart < 0)
4794 +               goto out;
4795 +
4796 +begin:
4797 +       for (bindex = istart; bindex <= iend; bindex++) {
4798 +               /* skip non-writeable branches */
4799 +               err = is_robranch_super(dentry->d_sb, bindex);
4800 +               if (err) {
4801 +                       err = -EROFS;
4802 +                       continue;
4803 +               }
4804 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
4805 +               if (!lower_dentry)
4806 +                       continue;
4807 +               /*
4808 +                * check for whiteouts in writeable branch, and remove them
4809 +                * if necessary.
4810 +                */
4811 +               err = check_for_whiteout(dentry, lower_dentry);
4812 +               if (err)
4813 +                       continue;
4814 +       }
4815 +       /*
4816 +        * If istart wasn't already branch 0, and we got any error, then try
4817 +        * branch 0 (which may require copyup)
4818 +        */
4819 +       if (err && istart > 0) {
4820 +               istart = iend = 0;
4821 +               goto begin;
4822 +       }
4823 +
4824 +       /*
4825 +        * If we tried even branch 0, and still got an error, abort.  But if
4826 +        * the error was an EROFS, then we should try to copyup.
4827 +        */
4828 +       if (err && err != -EROFS)
4829 +               goto out;
4830 +
4831 +       /*
4832 +        * If we get here, then check if copyup needed.  If lower_dentry is
4833 +        * NULL, create the entire dentry directory structure in branch 0.
4834 +        */
4835 +       if (!lower_dentry) {
4836 +               bindex = 0;
4837 +               lower_dentry = create_parents(parent, dentry,
4838 +                                             dentry->d_name.name, bindex);
4839 +               if (IS_ERR(lower_dentry)) {
4840 +                       err = PTR_ERR(lower_dentry);
4841 +                       goto out;
4842 +               }
4843 +       }
4844 +       err = 0;                /* all's well */
4845 +out:
4846 +       if (err)
4847 +               return ERR_PTR(err);
4848 +       return lower_dentry;
4849 +}
4850 +
4851 +static int unionfs_create(struct inode *parent, struct dentry *dentry,
4852 +                         int mode, struct nameidata *nd)
4853 +{
4854 +       int err = 0;
4855 +       struct dentry *lower_dentry = NULL;
4856 +       struct dentry *lower_parent_dentry = NULL;
4857 +       int valid = 0;
4858 +       struct nameidata lower_nd;
4859 +
4860 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
4861 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
4862 +       unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT);
4863 +
4864 +       valid = __unionfs_d_revalidate_chain(dentry->d_parent, nd, false);
4865 +       if (unlikely(!valid)) {
4866 +               err = -ESTALE;  /* same as what real_lookup does */
4867 +               goto out;
4868 +       }
4869 +
4870 +       valid = __unionfs_d_revalidate_one_locked(dentry, nd, false);
4871 +       /*
4872 +        * It's only a bug if this dentry was not negative and couldn't be
4873 +        * revalidated (shouldn't happen).
4874 +        */
4875 +       BUG_ON(!valid && dentry->d_inode);
4876 +
4877 +       lower_dentry = find_writeable_branch(parent, dentry);
4878 +       if (IS_ERR(lower_dentry)) {
4879 +               err = PTR_ERR(lower_dentry);
4880 +               goto out;
4881 +       }
4882 +
4883 +       lower_parent_dentry = lock_parent(lower_dentry);
4884 +       if (IS_ERR(lower_parent_dentry)) {
4885 +               err = PTR_ERR(lower_parent_dentry);
4886 +               goto out;
4887 +       }
4888 +
4889 +       err = init_lower_nd(&lower_nd, LOOKUP_CREATE);
4890 +       if (unlikely(err < 0))
4891 +               goto out;
4892 +       err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode,
4893 +                        &lower_nd);
4894 +       release_lower_nd(&lower_nd, err);
4895 +
4896 +       if (!err) {
4897 +               err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0));
4898 +               if (!err) {
4899 +                       unionfs_copy_attr_times(parent);
4900 +                       fsstack_copy_inode_size(parent,
4901 +                                               lower_parent_dentry->d_inode);
4902 +                       /* update no. of links on parent directory */
4903 +                       parent->i_nlink = unionfs_get_nlinks(parent);
4904 +               }
4905 +       }
4906 +
4907 +       unlock_dir(lower_parent_dentry);
4908 +
4909 +out:
4910 +       if (!err) {
4911 +               unionfs_postcopyup_setmnt(dentry);
4912 +               unionfs_check_inode(parent);
4913 +               unionfs_check_dentry(dentry);
4914 +               unionfs_check_nd(nd);
4915 +       }
4916 +       unionfs_unlock_dentry(dentry->d_parent);
4917 +       unionfs_unlock_dentry(dentry);
4918 +       unionfs_read_unlock(dentry->d_sb);
4919 +       return err;
4920 +}
4921 +
4922 +/*
4923 + * unionfs_lookup is the only special function which takes a dentry, yet we
4924 + * do NOT want to call __unionfs_d_revalidate_chain because by definition,
4925 + * we don't have a valid dentry here yet.
4926 + */
4927 +static struct dentry *unionfs_lookup(struct inode *parent,
4928 +                                    struct dentry *dentry,
4929 +                                    struct nameidata *nd)
4930 +{
4931 +       struct path path_save = {NULL, NULL};
4932 +       struct dentry *ret;
4933 +
4934 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
4935 +       if (dentry != dentry->d_parent)
4936 +               unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_ROOT);
4937 +
4938 +       /* save the dentry & vfsmnt from namei */
4939 +       if (nd) {
4940 +               path_save.dentry = nd->path.dentry;
4941 +               path_save.mnt = nd->path.mnt;
4942 +       }
4943 +
4944 +       /*
4945 +        * unionfs_lookup_backend returns a locked dentry upon success,
4946 +        * so we'll have to unlock it below.
4947 +        */
4948 +       ret = unionfs_lookup_backend(dentry, nd, INTERPOSE_LOOKUP);
4949 +
4950 +       /* restore the dentry & vfsmnt in namei */
4951 +       if (nd) {
4952 +               nd->path.dentry = path_save.dentry;
4953 +               nd->path.mnt = path_save.mnt;
4954 +       }
4955 +       if (!IS_ERR(ret)) {
4956 +               if (ret)
4957 +                       dentry = ret;
4958 +               unionfs_copy_attr_times(dentry->d_inode);
4959 +               /* parent times may have changed */
4960 +               unionfs_copy_attr_times(dentry->d_parent->d_inode);
4961 +       }
4962 +
4963 +       unionfs_check_inode(parent);
4964 +       if (!IS_ERR(ret)) {
4965 +               unionfs_check_dentry(dentry);
4966 +               unionfs_check_nd(nd);
4967 +               unionfs_unlock_dentry(dentry);
4968 +       }
4969 +
4970 +       if (dentry != dentry->d_parent) {
4971 +               unionfs_check_dentry(dentry->d_parent);
4972 +               unionfs_unlock_dentry(dentry->d_parent);
4973 +       }
4974 +       unionfs_read_unlock(dentry->d_sb);
4975 +
4976 +       return ret;
4977 +}
4978 +
4979 +static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
4980 +                       struct dentry *new_dentry)
4981 +{
4982 +       int err = 0;
4983 +       struct dentry *lower_old_dentry = NULL;
4984 +       struct dentry *lower_new_dentry = NULL;
4985 +       struct dentry *lower_dir_dentry = NULL;
4986 +       struct dentry *whiteout_dentry;
4987 +       char *name = NULL;
4988 +
4989 +       unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
4990 +       unionfs_double_lock_dentry(new_dentry, old_dentry);
4991 +
4992 +       if (unlikely(!__unionfs_d_revalidate_chain(old_dentry, NULL, false))) {
4993 +               err = -ESTALE;
4994 +               goto out;
4995 +       }
4996 +       if (unlikely(new_dentry->d_inode &&
4997 +                    !__unionfs_d_revalidate_chain(new_dentry, NULL, false))) {
4998 +               err = -ESTALE;
4999 +               goto out;
5000 +       }
5001 +
5002 +       lower_new_dentry = unionfs_lower_dentry(new_dentry);
5003 +
5004 +       /*
5005 +        * check if whiteout exists in the branch of new dentry, i.e. lookup
5006 +        * .wh.foo first. If present, delete it
5007 +        */
5008 +       name = alloc_whname(new_dentry->d_name.name, new_dentry->d_name.len);
5009 +       if (unlikely(IS_ERR(name))) {
5010 +               err = PTR_ERR(name);
5011 +               goto out;
5012 +       }
5013 +
5014 +       whiteout_dentry = lookup_one_len(name, lower_new_dentry->d_parent,
5015 +                                        new_dentry->d_name.len +
5016 +                                        UNIONFS_WHLEN);
5017 +       if (IS_ERR(whiteout_dentry)) {
5018 +               err = PTR_ERR(whiteout_dentry);
5019 +               goto out;
5020 +       }
5021 +
5022 +       if (!whiteout_dentry->d_inode) {
5023 +               dput(whiteout_dentry);
5024 +               whiteout_dentry = NULL;
5025 +       } else {
5026 +               /* found a .wh.foo entry, unlink it and then call vfs_link() */
5027 +               lower_dir_dentry = lock_parent_wh(whiteout_dentry);
5028 +               err = is_robranch_super(new_dentry->d_sb, dbstart(new_dentry));
5029 +               if (!err) {
5030 +                       /* see Documentation/filesystems/unionfs/issues.txt */
5031 +                       lockdep_off();
5032 +                       err = vfs_unlink(lower_dir_dentry->d_inode,
5033 +                                        whiteout_dentry);
5034 +                       lockdep_on();
5035 +               }
5036 +
5037 +               fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
5038 +               dir->i_nlink = unionfs_get_nlinks(dir);
5039 +               unlock_dir(lower_dir_dentry);
5040 +               lower_dir_dentry = NULL;
5041 +               dput(whiteout_dentry);
5042 +               if (err)
5043 +                       goto out;
5044 +       }
5045 +
5046 +       if (dbstart(old_dentry) != dbstart(new_dentry)) {
5047 +               lower_new_dentry = create_parents(dir, new_dentry,
5048 +                                                 new_dentry->d_name.name,
5049 +                                                 dbstart(old_dentry));
5050 +               err = PTR_ERR(lower_new_dentry);
5051 +               if (IS_COPYUP_ERR(err))
5052 +                       goto docopyup;
5053 +               if (!lower_new_dentry || IS_ERR(lower_new_dentry))
5054 +                       goto out;
5055 +       }
5056 +       lower_new_dentry = unionfs_lower_dentry(new_dentry);
5057 +       lower_old_dentry = unionfs_lower_dentry(old_dentry);
5058 +
5059 +       BUG_ON(dbstart(old_dentry) != dbstart(new_dentry));
5060 +       lower_dir_dentry = lock_parent(lower_new_dentry);
5061 +       err = is_robranch(old_dentry);
5062 +       if (!err) {
5063 +               /* see Documentation/filesystems/unionfs/issues.txt */
5064 +               lockdep_off();
5065 +               err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
5066 +                              lower_new_dentry);
5067 +               lockdep_on();
5068 +       }
5069 +       unlock_dir(lower_dir_dentry);
5070 +
5071 +docopyup:
5072 +       if (IS_COPYUP_ERR(err)) {
5073 +               int old_bstart = dbstart(old_dentry);
5074 +               int bindex;
5075 +
5076 +               for (bindex = old_bstart - 1; bindex >= 0; bindex--) {
5077 +                       err = copyup_dentry(old_dentry->d_parent->d_inode,
5078 +                                           old_dentry, old_bstart,
5079 +                                           bindex, old_dentry->d_name.name,
5080 +                                           old_dentry->d_name.len, NULL,
5081 +                                           i_size_read(old_dentry->d_inode));
5082 +                       if (!err) {
5083 +                               lower_new_dentry =
5084 +                                       create_parents(dir, new_dentry,
5085 +                                                      new_dentry->d_name.name,
5086 +                                                      bindex);
5087 +                               lower_old_dentry =
5088 +                                       unionfs_lower_dentry(old_dentry);
5089 +                               lower_dir_dentry =
5090 +                                       lock_parent(lower_new_dentry);
5091 +                               /*
5092 +                                * see
5093 +                                * Documentation/filesystems/unionfs/issues.txt
5094 +                                */
5095 +                               lockdep_off();
5096 +                               /* do vfs_link */
5097 +                               err = vfs_link(lower_old_dentry,
5098 +                                              lower_dir_dentry->d_inode,
5099 +                                              lower_new_dentry);
5100 +                               lockdep_on();
5101 +                               unlock_dir(lower_dir_dentry);
5102 +                               goto check_link;
5103 +                       }
5104 +               }
5105 +               goto out;
5106 +       }
5107 +
5108 +check_link:
5109 +       if (err || !lower_new_dentry->d_inode)
5110 +               goto out;
5111 +
5112 +       /* Its a hard link, so use the same inode */
5113 +       new_dentry->d_inode = igrab(old_dentry->d_inode);
5114 +       d_instantiate(new_dentry, new_dentry->d_inode);
5115 +       unionfs_copy_attr_all(dir, lower_new_dentry->d_parent->d_inode);
5116 +       fsstack_copy_inode_size(dir, lower_new_dentry->d_parent->d_inode);
5117 +
5118 +       /* propagate number of hard-links */
5119 +       old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode);
5120 +       /* new dentry's ctime may have changed due to hard-link counts */
5121 +       unionfs_copy_attr_times(new_dentry->d_inode);
5122 +
5123 +out:
5124 +       if (!new_dentry->d_inode)
5125 +               d_drop(new_dentry);
5126 +
5127 +       kfree(name);
5128 +       if (!err)
5129 +               unionfs_postcopyup_setmnt(new_dentry);
5130 +
5131 +       unionfs_check_inode(dir);
5132 +       unionfs_check_dentry(new_dentry);
5133 +       unionfs_check_dentry(old_dentry);
5134 +
5135 +       unionfs_unlock_dentry(new_dentry);
5136 +       unionfs_unlock_dentry(old_dentry);
5137 +       unionfs_read_unlock(old_dentry->d_sb);
5138 +
5139 +       return err;
5140 +}
5141 +
5142 +static int unionfs_symlink(struct inode *parent, struct dentry *dentry,
5143 +                          const char *symname)
5144 +{
5145 +       int err = 0;
5146 +       struct dentry *lower_dentry = NULL;
5147 +       struct dentry *wh_dentry = NULL;
5148 +       struct dentry *lower_parent_dentry = NULL;
5149 +       char *name = NULL;
5150 +       int valid = 0;
5151 +       umode_t mode;
5152 +
5153 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5154 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5155 +       unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT);
5156 +
5157 +       valid = __unionfs_d_revalidate_chain(dentry->d_parent, NULL, false);
5158 +       if (unlikely(!valid)) {
5159 +               err = -ESTALE;
5160 +               goto out;
5161 +       }
5162 +       if (unlikely(dentry->d_inode &&
5163 +                    !__unionfs_d_revalidate_one_locked(dentry, NULL, false))) {
5164 +               err = -ESTALE;
5165 +               goto out;
5166 +       }
5167 +
5168 +       /*
5169 +        * It's only a bug if this dentry was not negative and couldn't be
5170 +        * revalidated (shouldn't happen).
5171 +        */
5172 +       BUG_ON(!valid && dentry->d_inode);
5173 +
5174 +       lower_dentry = find_writeable_branch(parent, dentry);
5175 +       if (IS_ERR(lower_dentry)) {
5176 +               err = PTR_ERR(lower_dentry);
5177 +               goto out;
5178 +       }
5179 +
5180 +       lower_parent_dentry = lock_parent(lower_dentry);
5181 +       if (IS_ERR(lower_parent_dentry)) {
5182 +               err = PTR_ERR(lower_parent_dentry);
5183 +               goto out;
5184 +       }
5185 +
5186 +       mode = S_IALLUGO;
5187 +       err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry,
5188 +                         symname, mode);
5189 +       if (!err) {
5190 +               err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0));
5191 +               if (!err) {
5192 +                       unionfs_copy_attr_times(parent);
5193 +                       fsstack_copy_inode_size(parent,
5194 +                                               lower_parent_dentry->d_inode);
5195 +                       /* update no. of links on parent directory */
5196 +                       parent->i_nlink = unionfs_get_nlinks(parent);
5197 +               }
5198 +       }
5199 +
5200 +       unlock_dir(lower_parent_dentry);
5201 +
5202 +out:
5203 +       dput(wh_dentry);
5204 +       kfree(name);
5205 +
5206 +       if (!err) {
5207 +               unionfs_postcopyup_setmnt(dentry);
5208 +               unionfs_check_inode(parent);
5209 +               unionfs_check_dentry(dentry);
5210 +       }
5211 +       unionfs_unlock_dentry(dentry->d_parent);
5212 +       unionfs_unlock_dentry(dentry);
5213 +       unionfs_read_unlock(dentry->d_sb);
5214 +       return err;
5215 +}
5216 +
5217 +static int unionfs_mkdir(struct inode *parent, struct dentry *dentry, int mode)
5218 +{
5219 +       int err = 0;
5220 +       struct dentry *lower_dentry = NULL, *whiteout_dentry = NULL;
5221 +       struct dentry *lower_parent_dentry = NULL;
5222 +       int bindex = 0, bstart;
5223 +       char *name = NULL;
5224 +       int whiteout_unlinked = 0;
5225 +       struct sioq_args args;
5226 +       int valid;
5227 +
5228 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5229 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5230 +       unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT);
5231 +
5232 +       valid = __unionfs_d_revalidate_chain(dentry->d_parent, NULL, false);
5233 +       if (unlikely(!valid)) {
5234 +               err = -ESTALE;  /* same as what real_lookup does */
5235 +               goto out;
5236 +       }
5237 +       if (unlikely(dentry->d_inode &&
5238 +                    !__unionfs_d_revalidate_one_locked(dentry, NULL, false))) {
5239 +               err = -ESTALE;
5240 +               goto out;
5241 +       }
5242 +
5243 +       bstart = dbstart(dentry);
5244 +
5245 +       lower_dentry = unionfs_lower_dentry(dentry);
5246 +
5247 +       /*
5248 +        * check if whiteout exists in this branch, i.e. lookup .wh.foo
5249 +        * first.
5250 +        */
5251 +       name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
5252 +       if (unlikely(IS_ERR(name))) {
5253 +               err = PTR_ERR(name);
5254 +               goto out;
5255 +       }
5256 +
5257 +       whiteout_dentry = lookup_one_len(name, lower_dentry->d_parent,
5258 +                                        dentry->d_name.len + UNIONFS_WHLEN);
5259 +       if (IS_ERR(whiteout_dentry)) {
5260 +               err = PTR_ERR(whiteout_dentry);
5261 +               goto out;
5262 +       }
5263 +
5264 +       if (!whiteout_dentry->d_inode) {
5265 +               dput(whiteout_dentry);
5266 +               whiteout_dentry = NULL;
5267 +       } else {
5268 +               lower_parent_dentry = lock_parent_wh(whiteout_dentry);
5269 +
5270 +               /* found a.wh.foo entry, remove it then do vfs_mkdir */
5271 +               err = is_robranch_super(dentry->d_sb, bstart);
5272 +               if (!err) {
5273 +                       args.unlink.parent = lower_parent_dentry->d_inode;
5274 +                       args.unlink.dentry = whiteout_dentry;
5275 +                       run_sioq(__unionfs_unlink, &args);
5276 +                       err = args.err;
5277 +               }
5278 +               dput(whiteout_dentry);
5279 +
5280 +               unlock_dir(lower_parent_dentry);
5281 +
5282 +               if (err) {
5283 +                       /* exit if the error returned was NOT -EROFS */
5284 +                       if (!IS_COPYUP_ERR(err))
5285 +                               goto out;
5286 +                       bstart--;
5287 +               } else {
5288 +                       whiteout_unlinked = 1;
5289 +               }
5290 +       }
5291 +
5292 +       for (bindex = bstart; bindex >= 0; bindex--) {
5293 +               int i;
5294 +               int bend = dbend(dentry);
5295 +
5296 +               if (is_robranch_super(dentry->d_sb, bindex))
5297 +                       continue;
5298 +
5299 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5300 +               if (!lower_dentry) {
5301 +                       lower_dentry = create_parents(parent, dentry,
5302 +                                                     dentry->d_name.name,
5303 +                                                     bindex);
5304 +                       if (!lower_dentry || IS_ERR(lower_dentry)) {
5305 +                               printk(KERN_ERR "unionfs: lower dentry "
5306 +                                      " NULL for bindex = %d\n", bindex);
5307 +                               continue;
5308 +                       }
5309 +               }
5310 +
5311 +               lower_parent_dentry = lock_parent(lower_dentry);
5312 +
5313 +               if (IS_ERR(lower_parent_dentry)) {
5314 +                       err = PTR_ERR(lower_parent_dentry);
5315 +                       goto out;
5316 +               }
5317 +
5318 +               err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry,
5319 +                               mode);
5320 +
5321 +               unlock_dir(lower_parent_dentry);
5322 +
5323 +               /* did the mkdir succeed? */
5324 +               if (err)
5325 +                       break;
5326 +
5327 +               for (i = bindex + 1; i < bend; i++) {
5328 +                       if (unionfs_lower_dentry_idx(dentry, i)) {
5329 +                               dput(unionfs_lower_dentry_idx(dentry, i));
5330 +                               unionfs_set_lower_dentry_idx(dentry, i, NULL);
5331 +                       }
5332 +               }
5333 +               set_dbend(dentry, bindex);
5334 +
5335 +               /*
5336 +                * Only INTERPOSE_LOOKUP can return a value other than 0 on
5337 +                * err.
5338 +                */
5339 +               err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0));
5340 +               if (!err) {
5341 +                       unionfs_copy_attr_times(parent);
5342 +                       fsstack_copy_inode_size(parent,
5343 +                                               lower_parent_dentry->d_inode);
5344 +
5345 +                       /* update number of links on parent directory */
5346 +                       parent->i_nlink = unionfs_get_nlinks(parent);
5347 +               }
5348 +
5349 +               err = make_dir_opaque(dentry, dbstart(dentry));
5350 +               if (err) {
5351 +                       printk(KERN_ERR "unionfs: mkdir: error creating "
5352 +                              ".wh.__dir_opaque: %d\n", err);
5353 +                       goto out;
5354 +               }
5355 +
5356 +               /* we are done! */
5357 +               break;
5358 +       }
5359 +
5360 +out:
5361 +       if (!dentry->d_inode)
5362 +               d_drop(dentry);
5363 +
5364 +       kfree(name);
5365 +
5366 +       if (!err) {
5367 +               unionfs_copy_attr_times(dentry->d_inode);
5368 +               unionfs_postcopyup_setmnt(dentry);
5369 +       }
5370 +       unionfs_check_inode(parent);
5371 +       unionfs_check_dentry(dentry);
5372 +       unionfs_unlock_dentry(dentry->d_parent);
5373 +       unionfs_unlock_dentry(dentry);
5374 +       unionfs_read_unlock(dentry->d_sb);
5375 +
5376 +       return err;
5377 +}
5378 +
5379 +static int unionfs_mknod(struct inode *parent, struct dentry *dentry, int mode,
5380 +                        dev_t dev)
5381 +{
5382 +       int err = 0;
5383 +       struct dentry *lower_dentry = NULL;
5384 +       struct dentry *wh_dentry = NULL;
5385 +       struct dentry *lower_parent_dentry = NULL;
5386 +       char *name = NULL;
5387 +       int valid = 0;
5388 +
5389 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5390 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5391 +       unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT);
5392 +
5393 +       valid = __unionfs_d_revalidate_chain(dentry->d_parent, NULL, false);
5394 +       if (unlikely(!valid)) {
5395 +               err = -ESTALE;
5396 +               goto out;
5397 +       }
5398 +       if (unlikely(dentry->d_inode &&
5399 +                    !__unionfs_d_revalidate_one_locked(dentry, NULL, false))) {
5400 +               err = -ESTALE;
5401 +               goto out;
5402 +       }
5403 +
5404 +       /*
5405 +        * It's only a bug if this dentry was not negative and couldn't be
5406 +        * revalidated (shouldn't happen).
5407 +        */
5408 +       BUG_ON(!valid && dentry->d_inode);
5409 +
5410 +       lower_dentry = find_writeable_branch(parent, dentry);
5411 +       if (IS_ERR(lower_dentry)) {
5412 +               err = PTR_ERR(lower_dentry);
5413 +               goto out;
5414 +       }
5415 +
5416 +       lower_parent_dentry = lock_parent(lower_dentry);
5417 +       if (IS_ERR(lower_parent_dentry)) {
5418 +               err = PTR_ERR(lower_parent_dentry);
5419 +               goto out;
5420 +       }
5421 +
5422 +       err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev);
5423 +       if (!err) {
5424 +               err = PTR_ERR(unionfs_interpose(dentry, parent->i_sb, 0));
5425 +               if (!err) {
5426 +                       unionfs_copy_attr_times(parent);
5427 +                       fsstack_copy_inode_size(parent,
5428 +                                               lower_parent_dentry->d_inode);
5429 +                       /* update no. of links on parent directory */
5430 +                       parent->i_nlink = unionfs_get_nlinks(parent);
5431 +               }
5432 +       }
5433 +
5434 +       unlock_dir(lower_parent_dentry);
5435 +
5436 +out:
5437 +       dput(wh_dentry);
5438 +       kfree(name);
5439 +
5440 +       if (!err) {
5441 +               unionfs_postcopyup_setmnt(dentry);
5442 +               unionfs_check_inode(parent);
5443 +               unionfs_check_dentry(dentry);
5444 +       }
5445 +       unionfs_unlock_dentry(dentry->d_parent);
5446 +       unionfs_unlock_dentry(dentry);
5447 +       unionfs_read_unlock(dentry->d_sb);
5448 +       return err;
5449 +}
5450 +
5451 +static int unionfs_readlink(struct dentry *dentry, char __user *buf,
5452 +                           int bufsiz)
5453 +{
5454 +       int err;
5455 +       struct dentry *lower_dentry;
5456 +
5457 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5458 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5459 +
5460 +       if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
5461 +               err = -ESTALE;
5462 +               goto out;
5463 +       }
5464 +
5465 +       lower_dentry = unionfs_lower_dentry(dentry);
5466 +
5467 +       if (!lower_dentry->d_inode->i_op ||
5468 +           !lower_dentry->d_inode->i_op->readlink) {
5469 +               err = -EINVAL;
5470 +               goto out;
5471 +       }
5472 +
5473 +       err = lower_dentry->d_inode->i_op->readlink(lower_dentry,
5474 +                                                   buf, bufsiz);
5475 +       if (err > 0)
5476 +               fsstack_copy_attr_atime(dentry->d_inode,
5477 +                                       lower_dentry->d_inode);
5478 +
5479 +out:
5480 +       unionfs_check_dentry(dentry);
5481 +       unionfs_unlock_dentry(dentry);
5482 +       unionfs_read_unlock(dentry->d_sb);
5483 +
5484 +       return err;
5485 +}
5486 +
5487 +/*
5488 + * unionfs_follow_link takes a dentry, but it is simple.  It only needs to
5489 + * allocate some memory and then call our ->readlink method.  Our
5490 + * unionfs_readlink *does* lock our dentry and revalidate the dentry.
5491 + * Therefore, we do not have to lock our dentry here, to prevent a deadlock;
5492 + * nor do we need to revalidate it either.  It is safe to not lock our
5493 + * dentry here, nor revalidate it, because unionfs_follow_link does not do
5494 + * anything (prior to calling ->readlink) which could become inconsistent
5495 + * due to branch management.  We also don't need to lock our super because
5496 + * this function isn't affected by branch-management.
5497 + */
5498 +static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd)
5499 +{
5500 +       char *buf;
5501 +       int len = PAGE_SIZE, err;
5502 +       mm_segment_t old_fs;
5503 +
5504 +       /* This is freed by the put_link method assuming a successful call. */
5505 +       buf = kmalloc(len, GFP_KERNEL);
5506 +       if (unlikely(!buf)) {
5507 +               err = -ENOMEM;
5508 +               goto out;
5509 +       }
5510 +
5511 +       /* read the symlink, and then we will follow it */
5512 +       old_fs = get_fs();
5513 +       set_fs(KERNEL_DS);
5514 +       err = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
5515 +       set_fs(old_fs);
5516 +       if (err < 0) {
5517 +               kfree(buf);
5518 +               buf = NULL;
5519 +               goto out;
5520 +       }
5521 +       buf[err] = 0;
5522 +       nd_set_link(nd, buf);
5523 +       err = 0;
5524 +
5525 +out:
5526 +       if (!err) {
5527 +               unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5528 +               unionfs_check_dentry(dentry);
5529 +               unionfs_unlock_dentry(dentry);
5530 +       }
5531 +       unionfs_check_nd(nd);
5532 +       return ERR_PTR(err);
5533 +}
5534 +
5535 +/* FIXME: We may not have to lock here */
5536 +static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd,
5537 +                            void *cookie)
5538 +{
5539 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5540 +
5541 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5542 +       if (unlikely(!__unionfs_d_revalidate_chain(dentry, nd, false)))
5543 +               printk(KERN_ERR
5544 +                      "unionfs: put_link failed to revalidate dentry\n");
5545 +
5546 +       unionfs_check_dentry(dentry);
5547 +       unionfs_check_nd(nd);
5548 +       kfree(nd_get_link(nd));
5549 +       unionfs_unlock_dentry(dentry);
5550 +       unionfs_read_unlock(dentry->d_sb);
5551 +}
5552 +
5553 +/*
5554 + * Don't grab the superblock read-lock in unionfs_permission, which prevents
5555 + * a deadlock with the branch-management "add branch" code (which grabbed
5556 + * the write lock).  It is safe to not grab the read lock here, because even
5557 + * with branch management taking place, there is no chance that
5558 + * unionfs_permission, or anything it calls, will use stale branch
5559 + * information.
5560 + */
5561 +static int unionfs_permission(struct inode *inode, int mask,
5562 +                             struct nameidata *nd)
5563 +{
5564 +       struct inode *lower_inode = NULL;
5565 +       int err = 0;
5566 +       int bindex, bstart, bend;
5567 +       const int is_file = !S_ISDIR(inode->i_mode);
5568 +       const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ);
5569 +
5570 +       if (nd)
5571 +               unionfs_lock_dentry(nd->path.dentry, UNIONFS_DMUTEX_CHILD);
5572 +
5573 +       if (!UNIONFS_I(inode)->lower_inodes) {
5574 +               if (is_file)    /* dirs can be unlinked but chdir'ed to */
5575 +                       err = -ESTALE;  /* force revalidate */
5576 +               goto out;
5577 +       }
5578 +       bstart = ibstart(inode);
5579 +       bend = ibend(inode);
5580 +       if (unlikely(bstart < 0 || bend < 0)) {
5581 +               /*
5582 +                * With branch-management, we can get a stale inode here.
5583 +                * If so, we return ESTALE back to link_path_walk, which
5584 +                * would discard the dcache entry and re-lookup the
5585 +                * dentry+inode.  This should be equivalent to issuing
5586 +                * __unionfs_d_revalidate_chain on nd.dentry here.
5587 +                */
5588 +               if (is_file)    /* dirs can be unlinked but chdir'ed to */
5589 +                       err = -ESTALE;  /* force revalidate */
5590 +               goto out;
5591 +       }
5592 +
5593 +       for (bindex = bstart; bindex <= bend; bindex++) {
5594 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
5595 +               if (!lower_inode)
5596 +                       continue;
5597 +
5598 +               /*
5599 +                * check the condition for D-F-D underlying files/directories,
5600 +                * we don't have to check for files, if we are checking for
5601 +                * directories.
5602 +                */
5603 +               if (!is_file && !S_ISDIR(lower_inode->i_mode))
5604 +                       continue;
5605 +
5606 +               /*
5607 +                * We check basic permissions, but we ignore any conditions
5608 +                * such as readonly file systems or branches marked as
5609 +                * readonly, because those conditions should lead to a
5610 +                * copyup taking place later on.
5611 +                */
5612 +               err = permission(lower_inode, mask, nd);
5613 +               if (err && bindex > 0) {
5614 +                       umode_t mode = lower_inode->i_mode;
5615 +                       if (is_robranch_super(inode->i_sb, bindex) &&
5616 +                           (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
5617 +                               err = 0;
5618 +                       if (IS_COPYUP_ERR(err))
5619 +                               err = 0;
5620 +               }
5621 +
5622 +               /*
5623 +                * The permissions are an intersection of the overall directory
5624 +                * permissions, so we fail if one fails.
5625 +                */
5626 +               if (err)
5627 +                       goto out;
5628 +
5629 +               /* only the leftmost file matters. */
5630 +               if (is_file || write_mask) {
5631 +                       if (is_file && write_mask) {
5632 +                               err = get_write_access(lower_inode);
5633 +                               if (!err)
5634 +                                       put_write_access(lower_inode);
5635 +                       }
5636 +                       break;
5637 +               }
5638 +       }
5639 +       /* sync times which may have changed (asynchronously) below */
5640 +       unionfs_copy_attr_times(inode);
5641 +
5642 +out:
5643 +       unionfs_check_inode(inode);
5644 +       unionfs_check_nd(nd);
5645 +       if (nd)
5646 +               unionfs_unlock_dentry(nd->path.dentry);
5647 +       return err;
5648 +}
5649 +
5650 +static int unionfs_setattr(struct dentry *dentry, struct iattr *ia)
5651 +{
5652 +       int err = 0;
5653 +       struct dentry *lower_dentry;
5654 +       struct inode *inode;
5655 +       struct inode *lower_inode;
5656 +       int bstart, bend, bindex;
5657 +       loff_t size;
5658 +
5659 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
5660 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
5661 +
5662 +       if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
5663 +               err = -ESTALE;
5664 +               goto out;
5665 +       }
5666 +
5667 +       bstart = dbstart(dentry);
5668 +       bend = dbend(dentry);
5669 +       inode = dentry->d_inode;
5670 +
5671 +       /*
5672 +        * mode change is for clearing setuid/setgid. Allow lower filesystem
5673 +        * to reinterpret it in its own way.
5674 +        */
5675 +       if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
5676 +               ia->ia_valid &= ~ATTR_MODE;
5677 +
5678 +       lower_dentry = unionfs_lower_dentry(dentry);
5679 +       BUG_ON(!lower_dentry);  /* should never happen after above revalidate */
5680 +
5681 +       /* copyup if the file is on a read only branch */
5682 +       if (is_robranch_super(dentry->d_sb, bstart)
5683 +           || IS_RDONLY(lower_dentry->d_inode)) {
5684 +               /* check if we have a branch to copy up to */
5685 +               if (bstart <= 0) {
5686 +                       err = -EACCES;
5687 +                       goto out;
5688 +               }
5689 +
5690 +               if (ia->ia_valid & ATTR_SIZE)
5691 +                       size = ia->ia_size;
5692 +               else
5693 +                       size = i_size_read(inode);
5694 +               /* copyup to next available branch */
5695 +               for (bindex = bstart - 1; bindex >= 0; bindex--) {
5696 +                       err = copyup_dentry(dentry->d_parent->d_inode,
5697 +                                           dentry, bstart, bindex,
5698 +                                           dentry->d_name.name,
5699 +                                           dentry->d_name.len,
5700 +                                           NULL, size);
5701 +                       if (!err)
5702 +                               break;
5703 +               }
5704 +               if (err)
5705 +                       goto out;
5706 +               /* get updated lower_dentry after copyup */
5707 +               lower_dentry = unionfs_lower_dentry(dentry);
5708 +       }
5709 +
5710 +       lower_inode = unionfs_lower_inode(inode);
5711 +
5712 +       /*
5713 +        * If shrinking, first truncate upper level to cancel writing dirty
5714 +        * pages beyond the new eof; and also if its' maxbytes is more
5715 +        * limiting (fail with -EFBIG before making any change to the lower
5716 +        * level).  There is no need to vmtruncate the upper level
5717 +        * afterwards in the other cases: we fsstack_copy_inode_size from
5718 +        * the lower level.
5719 +        */
5720 +       if (ia->ia_valid & ATTR_SIZE) {
5721 +               size = i_size_read(inode);
5722 +               if (ia->ia_size < size || (ia->ia_size > size &&
5723 +                   inode->i_sb->s_maxbytes < lower_inode->i_sb->s_maxbytes)) {
5724 +                       err = vmtruncate(inode, ia->ia_size);
5725 +                       if (err)
5726 +                               goto out;
5727 +               }
5728 +       }
5729 +
5730 +       /* notify the (possibly copied-up) lower inode */
5731 +       err = notify_change(lower_dentry, ia);
5732 +       if (err)
5733 +               goto out;
5734 +
5735 +       /* get attributes from the first lower inode */
5736 +       unionfs_copy_attr_all(inode, lower_inode);
5737 +       /*
5738 +        * unionfs_copy_attr_all will copy the lower times to our inode if
5739 +        * the lower ones are newer (useful for cache coherency).  However,
5740 +        * ->setattr is the only place in which we may have to copy the
5741 +        * lower inode times absolutely, to support utimes(2).
5742 +        */
5743 +       if (ia->ia_valid & ATTR_MTIME_SET)
5744 +               inode->i_mtime = lower_inode->i_mtime;
5745 +       if (ia->ia_valid & ATTR_CTIME)
5746 +               inode->i_ctime = lower_inode->i_ctime;
5747 +       if (ia->ia_valid & ATTR_ATIME_SET)
5748 +               inode->i_atime = lower_inode->i_atime;
5749 +       fsstack_copy_inode_size(inode, lower_inode);
5750 +       /* if setattr succeeded, then parent dir may have changed */
5751 +       unionfs_copy_attr_times(dentry->d_parent->d_inode);
5752 +out:
5753 +       if (!err)
5754 +               unionfs_check_dentry(dentry);
5755 +       unionfs_unlock_dentry(dentry);
5756 +       unionfs_read_unlock(dentry->d_sb);
5757 +
5758 +       return err;
5759 +}
5760 +
5761 +struct inode_operations unionfs_symlink_iops = {
5762 +       .readlink       = unionfs_readlink,
5763 +       .permission     = unionfs_permission,
5764 +       .follow_link    = unionfs_follow_link,
5765 +       .setattr        = unionfs_setattr,
5766 +       .put_link       = unionfs_put_link,
5767 +};
5768 +
5769 +struct inode_operations unionfs_dir_iops = {
5770 +       .create         = unionfs_create,
5771 +       .lookup         = unionfs_lookup,
5772 +       .link           = unionfs_link,
5773 +       .unlink         = unionfs_unlink,
5774 +       .symlink        = unionfs_symlink,
5775 +       .mkdir          = unionfs_mkdir,
5776 +       .rmdir          = unionfs_rmdir,
5777 +       .mknod          = unionfs_mknod,
5778 +       .rename         = unionfs_rename,
5779 +       .permission     = unionfs_permission,
5780 +       .setattr        = unionfs_setattr,
5781 +#ifdef CONFIG_UNION_FS_XATTR
5782 +       .setxattr       = unionfs_setxattr,
5783 +       .getxattr       = unionfs_getxattr,
5784 +       .removexattr    = unionfs_removexattr,
5785 +       .listxattr      = unionfs_listxattr,
5786 +#endif /* CONFIG_UNION_FS_XATTR */
5787 +};
5788 +
5789 +struct inode_operations unionfs_main_iops = {
5790 +       .permission     = unionfs_permission,
5791 +       .setattr        = unionfs_setattr,
5792 +#ifdef CONFIG_UNION_FS_XATTR
5793 +       .setxattr       = unionfs_setxattr,
5794 +       .getxattr       = unionfs_getxattr,
5795 +       .removexattr    = unionfs_removexattr,
5796 +       .listxattr      = unionfs_listxattr,
5797 +#endif /* CONFIG_UNION_FS_XATTR */
5798 +};
5799 diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c
5800 new file mode 100644
5801 index 0000000..755158e
5802 --- /dev/null
5803 +++ b/fs/unionfs/lookup.c
5804 @@ -0,0 +1,665 @@
5805 +/*
5806 + * Copyright (c) 2003-2007 Erez Zadok
5807 + * Copyright (c) 2003-2006 Charles P. Wright
5808 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
5809 + * Copyright (c) 2005-2006 Junjiro Okajima
5810 + * Copyright (c) 2005      Arun M. Krishnakumar
5811 + * Copyright (c) 2004-2006 David P. Quigley
5812 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
5813 + * Copyright (c) 2003      Puja Gupta
5814 + * Copyright (c) 2003      Harikesavan Krishnan
5815 + * Copyright (c) 2003-2007 Stony Brook University
5816 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
5817 + *
5818 + * This program is free software; you can redistribute it and/or modify
5819 + * it under the terms of the GNU General Public License version 2 as
5820 + * published by the Free Software Foundation.
5821 + */
5822 +
5823 +#include "union.h"
5824 +
5825 +static int realloc_dentry_private_data(struct dentry *dentry);
5826 +
5827 +/* is the filename valid == !(whiteout for a file or opaque dir marker) */
5828 +static int is_validname(const char *name)
5829 +{
5830 +       if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN))
5831 +               return 0;
5832 +       if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME,
5833 +                    sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1))
5834 +               return 0;
5835 +       return 1;
5836 +}
5837 +
5838 +/* The rest of these are utility functions for lookup. */
5839 +static noinline int is_opaque_dir(struct dentry *dentry, int bindex)
5840 +{
5841 +       int err = 0;
5842 +       struct dentry *lower_dentry;
5843 +       struct dentry *wh_lower_dentry;
5844 +       struct inode *lower_inode;
5845 +       struct sioq_args args;
5846 +
5847 +       lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5848 +       lower_inode = lower_dentry->d_inode;
5849 +
5850 +       BUG_ON(!S_ISDIR(lower_inode->i_mode));
5851 +
5852 +       mutex_lock(&lower_inode->i_mutex);
5853 +
5854 +       if (!permission(lower_inode, MAY_EXEC, NULL)) {
5855 +               wh_lower_dentry =
5856 +                       lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
5857 +                                      sizeof(UNIONFS_DIR_OPAQUE) - 1);
5858 +       } else {
5859 +               args.is_opaque.dentry = lower_dentry;
5860 +               run_sioq(__is_opaque_dir, &args);
5861 +               wh_lower_dentry = args.ret;
5862 +       }
5863 +
5864 +       mutex_unlock(&lower_inode->i_mutex);
5865 +
5866 +       if (IS_ERR(wh_lower_dentry)) {
5867 +               err = PTR_ERR(wh_lower_dentry);
5868 +               goto out;
5869 +       }
5870 +
5871 +       /* This is an opaque dir iff wh_lower_dentry is positive */
5872 +       err = !!wh_lower_dentry->d_inode;
5873 +
5874 +       dput(wh_lower_dentry);
5875 +out:
5876 +       return err;
5877 +}
5878 +
5879 +/*
5880 + * Main (and complex) driver function for Unionfs's lookup
5881 + *
5882 + * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error
5883 + * PTR if d_splice returned a different dentry.
5884 + *
5885 + * If lookupmode is INTERPOSE_PARTIAL/REVAL/REVAL_NEG, the passed dentry's
5886 + * inode info must be locked.  If lookupmode is INTERPOSE_LOOKUP (i.e., a
5887 + * newly looked-up dentry), then unionfs_lookup_backend will return a locked
5888 + * dentry's info, which the caller must unlock.
5889 + */
5890 +struct dentry *unionfs_lookup_backend(struct dentry *dentry,
5891 +                                     struct nameidata *nd, int lookupmode)
5892 +{
5893 +       int err = 0;
5894 +       struct dentry *lower_dentry = NULL;
5895 +       struct dentry *wh_lower_dentry = NULL;
5896 +       struct dentry *lower_dir_dentry = NULL;
5897 +       struct dentry *parent_dentry = NULL;
5898 +       struct dentry *d_interposed = NULL;
5899 +       int bindex, bstart = -1, bend, bopaque;
5900 +       int dentry_count = 0;   /* Number of positive dentries. */
5901 +       int first_dentry_offset = -1; /* -1 is uninitialized */
5902 +       struct dentry *first_dentry = NULL;
5903 +       struct dentry *first_lower_dentry = NULL;
5904 +       struct vfsmount *first_lower_mnt = NULL;
5905 +       int opaque;
5906 +       char *whname = NULL;
5907 +       const char *name;
5908 +       int namelen;
5909 +
5910 +       /*
5911 +        * We should already have a lock on this dentry in the case of a
5912 +        * partial lookup, or a revalidation. Otherwise it is returned from
5913 +        * new_dentry_private_data already locked.
5914 +        */
5915 +       if (lookupmode == INTERPOSE_PARTIAL || lookupmode == INTERPOSE_REVAL ||
5916 +           lookupmode == INTERPOSE_REVAL_NEG)
5917 +               verify_locked(dentry);
5918 +       else                    /* this could only be INTERPOSE_LOOKUP */
5919 +               BUG_ON(UNIONFS_D(dentry) != NULL);
5920 +
5921 +       switch (lookupmode) {
5922 +       case INTERPOSE_PARTIAL:
5923 +               break;
5924 +       case INTERPOSE_LOOKUP:
5925 +               err = new_dentry_private_data(dentry, UNIONFS_DMUTEX_CHILD);
5926 +               if (unlikely(err))
5927 +                       goto out;
5928 +               break;
5929 +       default:
5930 +               /* default: can only be INTERPOSE_REVAL/REVAL_NEG */
5931 +               err = realloc_dentry_private_data(dentry);
5932 +               if (unlikely(err))
5933 +                       goto out;
5934 +               break;
5935 +       }
5936 +
5937 +       /* must initialize dentry operations */
5938 +       dentry->d_op = &unionfs_dops;
5939 +
5940 +       parent_dentry = dget_parent(dentry);
5941 +       /* We never partial lookup the root directory. */
5942 +       if (parent_dentry == dentry) {
5943 +               dput(parent_dentry);
5944 +               parent_dentry = NULL;
5945 +               goto out;
5946 +       }
5947 +
5948 +       name = dentry->d_name.name;
5949 +       namelen = dentry->d_name.len;
5950 +
5951 +       /* No dentries should get created for possible whiteout names. */
5952 +       if (!is_validname(name)) {
5953 +               err = -EPERM;
5954 +               goto out_free;
5955 +       }
5956 +
5957 +       /* Now start the actual lookup procedure. */
5958 +       bstart = dbstart(parent_dentry);
5959 +       bend = dbend(parent_dentry);
5960 +       bopaque = dbopaque(parent_dentry);
5961 +       BUG_ON(bstart < 0);
5962 +
5963 +       /*
5964 +        * It would be ideal if we could convert partial lookups to only have
5965 +        * to do this work when they really need to.  It could probably improve
5966 +        * performance quite a bit, and maybe simplify the rest of the code.
5967 +        */
5968 +       if (lookupmode == INTERPOSE_PARTIAL) {
5969 +               bstart++;
5970 +               if ((bopaque != -1) && (bopaque < bend))
5971 +                       bend = bopaque;
5972 +       }
5973 +
5974 +       for (bindex = bstart; bindex <= bend; bindex++) {
5975 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
5976 +               if (lookupmode == INTERPOSE_PARTIAL && lower_dentry)
5977 +                       continue;
5978 +               BUG_ON(lower_dentry != NULL);
5979 +
5980 +               lower_dir_dentry =
5981 +                       unionfs_lower_dentry_idx(parent_dentry, bindex);
5982 +
5983 +               /* if the parent lower dentry does not exist skip this */
5984 +               if (!(lower_dir_dentry && lower_dir_dentry->d_inode))
5985 +                       continue;
5986 +
5987 +               /* also skip it if the parent isn't a directory. */
5988 +               if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode))
5989 +                       continue;
5990 +
5991 +               /* Reuse the whiteout name because its value doesn't change. */
5992 +               if (!whname) {
5993 +                       whname = alloc_whname(name, namelen);
5994 +                       if (unlikely(IS_ERR(whname))) {
5995 +                               err = PTR_ERR(whname);
5996 +                               goto out_free;
5997 +                       }
5998 +               }
5999 +
6000 +               /* check if whiteout exists in this branch: lookup .wh.foo */
6001 +               wh_lower_dentry = lookup_one_len(whname, lower_dir_dentry,
6002 +                                                namelen + UNIONFS_WHLEN);
6003 +               if (IS_ERR(wh_lower_dentry)) {
6004 +                       dput(first_lower_dentry);
6005 +                       unionfs_mntput(first_dentry, first_dentry_offset);
6006 +                       err = PTR_ERR(wh_lower_dentry);
6007 +                       goto out_free;
6008 +               }
6009 +
6010 +               if (wh_lower_dentry->d_inode) {
6011 +                       /* We found a whiteout so let's give up. */
6012 +                       if (S_ISREG(wh_lower_dentry->d_inode->i_mode)) {
6013 +                               set_dbend(dentry, bindex);
6014 +                               set_dbopaque(dentry, bindex);
6015 +                               dput(wh_lower_dentry);
6016 +                               break;
6017 +                       }
6018 +                       err = -EIO;
6019 +                       printk(KERN_ERR "unionfs: EIO: invalid whiteout "
6020 +                              "entry type %d\n",
6021 +                              wh_lower_dentry->d_inode->i_mode);
6022 +                       dput(wh_lower_dentry);
6023 +                       dput(first_lower_dentry);
6024 +                       unionfs_mntput(first_dentry, first_dentry_offset);
6025 +                       goto out_free;
6026 +               }
6027 +
6028 +               dput(wh_lower_dentry);
6029 +               wh_lower_dentry = NULL;
6030 +
6031 +               /* Now do regular lookup; lookup foo */
6032 +               BUG_ON(!lower_dir_dentry);
6033 +               lower_dentry = lookup_one_len(name, lower_dir_dentry, namelen);
6034 +               if (IS_ERR(lower_dentry)) {
6035 +                       dput(first_lower_dentry);
6036 +                       unionfs_mntput(first_dentry, first_dentry_offset);
6037 +                       err = PTR_ERR(lower_dentry);
6038 +                       goto out_free;
6039 +               }
6040 +
6041 +               /*
6042 +                * Store the first negative dentry specially, because if they
6043 +                * are all negative we need this for future creates.
6044 +                */
6045 +               if (!lower_dentry->d_inode) {
6046 +                       if (!first_lower_dentry && (dbstart(dentry) == -1)) {
6047 +                               first_lower_dentry = lower_dentry;
6048 +                               /*
6049 +                                * FIXME: following line needs to be changed
6050 +                                * to allow mount-point crossing
6051 +                                */
6052 +                               first_dentry = parent_dentry;
6053 +                               first_lower_mnt =
6054 +                                       unionfs_mntget(parent_dentry, bindex);
6055 +                               first_dentry_offset = bindex;
6056 +                       } else {
6057 +                               dput(lower_dentry);
6058 +                       }
6059 +
6060 +                       continue;
6061 +               }
6062 +
6063 +               /*
6064 +                * If we already found at least one positive dentry
6065 +                * (dentry_count is non-zero), then we skip all remaining
6066 +                * positive dentries if their type is a non-dir.  This is
6067 +                * because only directories are allowed to stack on multiple
6068 +                * branches, but we have to skip non-dirs (to avoid, say,
6069 +                * calling readdir on a regular file).
6070 +                */
6071 +               if (!S_ISDIR(lower_dentry->d_inode->i_mode) && dentry_count) {
6072 +                       dput(lower_dentry);
6073 +                       continue;
6074 +               }
6075 +
6076 +               /* number of positive dentries */
6077 +               dentry_count++;
6078 +
6079 +               /* store underlying dentry */
6080 +               if (dbstart(dentry) == -1)
6081 +                       set_dbstart(dentry, bindex);
6082 +               unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
6083 +               /*
6084 +                * FIXME: the following line needs to get fixed to allow
6085 +                * mount-point crossing
6086 +                */
6087 +               unionfs_set_lower_mnt_idx(dentry, bindex,
6088 +                                         unionfs_mntget(parent_dentry,
6089 +                                                        bindex));
6090 +               set_dbend(dentry, bindex);
6091 +
6092 +               /* update parent directory's atime with the bindex */
6093 +               fsstack_copy_attr_atime(parent_dentry->d_inode,
6094 +                                       lower_dir_dentry->d_inode);
6095 +
6096 +               /* We terminate file lookups here. */
6097 +               if (!S_ISDIR(lower_dentry->d_inode->i_mode)) {
6098 +                       if (lookupmode == INTERPOSE_PARTIAL)
6099 +                               continue;
6100 +                       if (dentry_count == 1)
6101 +                               goto out_positive;
6102 +                       /* This can only happen with mixed D-*-F-* */
6103 +                       BUG_ON(!S_ISDIR(unionfs_lower_dentry(dentry)->
6104 +                                       d_inode->i_mode));
6105 +                       continue;
6106 +               }
6107 +
6108 +               opaque = is_opaque_dir(dentry, bindex);
6109 +               if (opaque < 0) {
6110 +                       dput(first_lower_dentry);
6111 +                       unionfs_mntput(first_dentry, first_dentry_offset);
6112 +                       err = opaque;
6113 +                       goto out_free;
6114 +               } else if (opaque) {
6115 +                       set_dbend(dentry, bindex);
6116 +                       set_dbopaque(dentry, bindex);
6117 +                       break;
6118 +               }
6119 +       }
6120 +
6121 +       if (dentry_count)
6122 +               goto out_positive;
6123 +       else
6124 +               goto out_negative;
6125 +
6126 +out_negative:
6127 +       if (lookupmode == INTERPOSE_PARTIAL)
6128 +               goto out;
6129 +
6130 +       /* If we've only got negative dentries, then use the leftmost one. */
6131 +       if (lookupmode == INTERPOSE_REVAL) {
6132 +               if (dentry->d_inode)
6133 +                       UNIONFS_I(dentry->d_inode)->stale = 1;
6134 +               goto out;
6135 +       }
6136 +       if (!lower_dir_dentry) {
6137 +               err = -ENOENT;
6138 +               goto out;
6139 +       }
6140 +       /* This should only happen if we found a whiteout. */
6141 +       if (first_dentry_offset == -1) {
6142 +               first_lower_dentry = lookup_one_len(name, lower_dir_dentry,
6143 +                                                   namelen);
6144 +               first_dentry_offset = bindex;
6145 +               if (IS_ERR(first_lower_dentry)) {
6146 +                       err = PTR_ERR(first_lower_dentry);
6147 +                       goto out;
6148 +               }
6149 +
6150 +               /*
6151 +                * FIXME: the following line needs to be changed to allow
6152 +                * mount-point crossing
6153 +                */
6154 +               first_dentry = dentry;
6155 +               first_lower_mnt = unionfs_mntget(dentry->d_sb->s_root,
6156 +                                                bindex);
6157 +       }
6158 +       unionfs_set_lower_dentry_idx(dentry, first_dentry_offset,
6159 +                                    first_lower_dentry);
6160 +       unionfs_set_lower_mnt_idx(dentry, first_dentry_offset,
6161 +                                 first_lower_mnt);
6162 +       set_dbstart(dentry, first_dentry_offset);
6163 +       set_dbend(dentry, first_dentry_offset);
6164 +
6165 +       if (lookupmode == INTERPOSE_REVAL_NEG)
6166 +               BUG_ON(dentry->d_inode != NULL);
6167 +       else
6168 +               d_add(dentry, NULL);
6169 +       goto out;
6170 +
6171 +/* This part of the code is for positive dentries. */
6172 +out_positive:
6173 +       BUG_ON(dentry_count <= 0);
6174 +
6175 +       /*
6176 +        * If we're holding onto the first negative dentry & corresponding
6177 +        * vfsmount - throw it out.
6178 +        */
6179 +       dput(first_lower_dentry);
6180 +       unionfs_mntput(first_dentry, first_dentry_offset);
6181 +
6182 +       /* Partial lookups need to re-interpose, or throw away older negs. */
6183 +       if (lookupmode == INTERPOSE_PARTIAL) {
6184 +               if (dentry->d_inode) {
6185 +                       unionfs_reinterpose(dentry);
6186 +                       goto out;
6187 +               }
6188 +
6189 +               /*
6190 +                * This somehow turned positive, so it is as if we had a
6191 +                * negative revalidation.
6192 +                */
6193 +               lookupmode = INTERPOSE_REVAL_NEG;
6194 +
6195 +               update_bstart(dentry);
6196 +               bstart = dbstart(dentry);
6197 +               bend = dbend(dentry);
6198 +       }
6199 +
6200 +       /*
6201 +        * Interpose can return a dentry if d_splice returned a different
6202 +        * dentry.
6203 +        */
6204 +       d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
6205 +       if (IS_ERR(d_interposed))
6206 +               err = PTR_ERR(d_interposed);
6207 +       else if (d_interposed)
6208 +               dentry = d_interposed;
6209 +
6210 +       if (err)
6211 +               goto out_drop;
6212 +
6213 +       goto out;
6214 +
6215 +out_drop:
6216 +       d_drop(dentry);
6217 +
6218 +out_free:
6219 +       /* should dput all the underlying dentries on error condition */
6220 +       bstart = dbstart(dentry);
6221 +       if (bstart >= 0) {
6222 +               bend = dbend(dentry);
6223 +               for (bindex = bstart; bindex <= bend; bindex++) {
6224 +                       dput(unionfs_lower_dentry_idx(dentry, bindex));
6225 +                       unionfs_mntput(dentry, bindex);
6226 +               }
6227 +       }
6228 +       kfree(UNIONFS_D(dentry)->lower_paths);
6229 +       UNIONFS_D(dentry)->lower_paths = NULL;
6230 +       set_dbstart(dentry, -1);
6231 +       set_dbend(dentry, -1);
6232 +
6233 +out:
6234 +       if (!err && UNIONFS_D(dentry)) {
6235 +               BUG_ON(dbend(dentry) > UNIONFS_D(dentry)->bcount);
6236 +               BUG_ON(dbend(dentry) > sbmax(dentry->d_sb));
6237 +               if (dbstart(dentry) < 0 &&
6238 +                   dentry->d_inode && bstart >= 0 &&
6239 +                   (!UNIONFS_I(dentry->d_inode) ||
6240 +                    !UNIONFS_I(dentry->d_inode)->lower_inodes)) {
6241 +                       unionfs_mntput(dentry->d_sb->s_root, bstart);
6242 +                       dput(first_lower_dentry);
6243 +                       UNIONFS_I(dentry->d_inode)->stale = 1;
6244 +               }
6245 +       }
6246 +       kfree(whname);
6247 +       dput(parent_dentry);
6248 +       if (err && (lookupmode == INTERPOSE_LOOKUP))
6249 +               unionfs_unlock_dentry(dentry);
6250 +       if (!err && d_interposed)
6251 +               return d_interposed;
6252 +       if (dentry->d_inode && UNIONFS_I(dentry->d_inode)->stale &&
6253 +           first_dentry_offset >= 0)
6254 +               unionfs_mntput(dentry->d_sb->s_root, first_dentry_offset);
6255 +       return ERR_PTR(err);
6256 +}
6257 +
6258 +/*
6259 + * This is a utility function that fills in a unionfs dentry.
6260 + * Caller must lock this dentry with unionfs_lock_dentry.
6261 + *
6262 + * Returns: 0 (ok), or -ERRNO if an error occurred.
6263 + */
6264 +int unionfs_partial_lookup(struct dentry *dentry)
6265 +{
6266 +       struct dentry *tmp;
6267 +       struct nameidata nd = { .flags = 0 };
6268 +       int err = -ENOSYS;
6269 +
6270 +       tmp = unionfs_lookup_backend(dentry, &nd, INTERPOSE_PARTIAL);
6271 +       if (!tmp) {
6272 +               err = 0;
6273 +               goto out;
6274 +       }
6275 +       if (IS_ERR(tmp)) {
6276 +               err = PTR_ERR(tmp);
6277 +               goto out;
6278 +       }
6279 +       /* need to change the interface */
6280 +       BUG_ON(tmp != dentry);
6281 +out:
6282 +       return err;
6283 +}
6284 +
6285 +/* The dentry cache is just so we have properly sized dentries. */
6286 +static struct kmem_cache *unionfs_dentry_cachep;
6287 +int unionfs_init_dentry_cache(void)
6288 +{
6289 +       unionfs_dentry_cachep =
6290 +               kmem_cache_create("unionfs_dentry",
6291 +                                 sizeof(struct unionfs_dentry_info),
6292 +                                 0, SLAB_RECLAIM_ACCOUNT, NULL);
6293 +
6294 +       return (unionfs_dentry_cachep ? 0 : -ENOMEM);
6295 +}
6296 +
6297 +void unionfs_destroy_dentry_cache(void)
6298 +{
6299 +       if (unionfs_dentry_cachep)
6300 +               kmem_cache_destroy(unionfs_dentry_cachep);
6301 +}
6302 +
6303 +void free_dentry_private_data(struct dentry *dentry)
6304 +{
6305 +       if (!dentry || !dentry->d_fsdata)
6306 +               return;
6307 +       kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata);
6308 +       dentry->d_fsdata = NULL;
6309 +}
6310 +
6311 +static inline int __realloc_dentry_private_data(struct dentry *dentry)
6312 +{
6313 +       struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6314 +       void *p;
6315 +       int size;
6316 +
6317 +       BUG_ON(!info);
6318 +
6319 +       size = sizeof(struct path) * sbmax(dentry->d_sb);
6320 +       p = krealloc(info->lower_paths, size, GFP_ATOMIC);
6321 +       if (unlikely(!p))
6322 +               return -ENOMEM;
6323 +
6324 +       info->lower_paths = p;
6325 +
6326 +       info->bstart = -1;
6327 +       info->bend = -1;
6328 +       info->bopaque = -1;
6329 +       info->bcount = sbmax(dentry->d_sb);
6330 +       atomic_set(&info->generation,
6331 +                       atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
6332 +
6333 +       memset(info->lower_paths, 0, size);
6334 +
6335 +       return 0;
6336 +}
6337 +
6338 +/* UNIONFS_D(dentry)->lock must be locked */
6339 +static int realloc_dentry_private_data(struct dentry *dentry)
6340 +{
6341 +       if (!__realloc_dentry_private_data(dentry))
6342 +               return 0;
6343 +
6344 +       kfree(UNIONFS_D(dentry)->lower_paths);
6345 +       free_dentry_private_data(dentry);
6346 +       return -ENOMEM;
6347 +}
6348 +
6349 +/* allocate new dentry private data */
6350 +int new_dentry_private_data(struct dentry *dentry, int subclass)
6351 +{
6352 +       struct unionfs_dentry_info *info = UNIONFS_D(dentry);
6353 +
6354 +       BUG_ON(info);
6355 +
6356 +       info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC);
6357 +       if (unlikely(!info))
6358 +               return -ENOMEM;
6359 +
6360 +       mutex_init(&info->lock);
6361 +       mutex_lock_nested(&info->lock, subclass);
6362 +
6363 +       info->lower_paths = NULL;
6364 +
6365 +       dentry->d_fsdata = info;
6366 +
6367 +       if (!__realloc_dentry_private_data(dentry))
6368 +               return 0;
6369 +
6370 +       mutex_unlock(&info->lock);
6371 +       free_dentry_private_data(dentry);
6372 +       return -ENOMEM;
6373 +}
6374 +
6375 +/*
6376 + * scan through the lower dentry objects, and set bstart to reflect the
6377 + * starting branch
6378 + */
6379 +void update_bstart(struct dentry *dentry)
6380 +{
6381 +       int bindex;
6382 +       int bstart = dbstart(dentry);
6383 +       int bend = dbend(dentry);
6384 +       struct dentry *lower_dentry;
6385 +
6386 +       for (bindex = bstart; bindex <= bend; bindex++) {
6387 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6388 +               if (!lower_dentry)
6389 +                       continue;
6390 +               if (lower_dentry->d_inode) {
6391 +                       set_dbstart(dentry, bindex);
6392 +                       break;
6393 +               }
6394 +               dput(lower_dentry);
6395 +               unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
6396 +       }
6397 +}
6398 +
6399 +
6400 +/*
6401 + * Initialize a nameidata structure (the intent part) we can pass to a lower
6402 + * file system.  Returns 0 on success or -error (only -ENOMEM possible).
6403 + * Inside that nd structure, this function may also return an allocated
6404 + * struct file (for open intents).  The caller, when done with this nd, must
6405 + * kfree the intent file (using release_lower_nd).
6406 + *
6407 + * XXX: this code, and the callers of this code, should be redone using
6408 + * vfs_path_lookup() when (1) the nameidata structure is refactored into a
6409 + * separate intent-structure, and (2) open_namei() is broken into a VFS-only
6410 + * function and a method that other file systems can call.
6411 + */
6412 +int init_lower_nd(struct nameidata *nd, unsigned int flags)
6413 +{
6414 +       int err = 0;
6415 +#ifdef ALLOC_LOWER_ND_FILE
6416 +       /*
6417 +        * XXX: one day we may need to have the lower return an open file
6418 +        * for us.  It is not needed in 2.6.23-rc1 for nfs2/nfs3, but may
6419 +        * very well be needed for nfs4.
6420 +        */
6421 +       struct file *file;
6422 +#endif /* ALLOC_LOWER_ND_FILE */
6423 +
6424 +       memset(nd, 0, sizeof(struct nameidata));
6425 +       if (!flags)
6426 +               return err;
6427 +
6428 +       switch (flags) {
6429 +       case LOOKUP_CREATE:
6430 +               nd->intent.open.flags |= O_CREAT;
6431 +               /* fall through: shared code for create/open cases */
6432 +       case LOOKUP_OPEN:
6433 +               nd->flags = flags;
6434 +               nd->intent.open.flags |= (FMODE_READ | FMODE_WRITE);
6435 +#ifdef ALLOC_LOWER_ND_FILE
6436 +               file = kzalloc(sizeof(struct file), GFP_KERNEL);
6437 +               if (unlikely(!file)) {
6438 +                       err = -ENOMEM;
6439 +                       break; /* exit switch statement and thus return */
6440 +               }
6441 +               nd->intent.open.file = file;
6442 +#endif /* ALLOC_LOWER_ND_FILE */
6443 +               break;
6444 +       case LOOKUP_ACCESS:
6445 +               nd->flags = flags;
6446 +               break;
6447 +       default:
6448 +               /*
6449 +                * We should never get here, for now.
6450 +                * We can add new cases here later on.
6451 +                */
6452 +               pr_debug("unionfs: unknown nameidata flag 0x%x\n", flags);
6453 +               BUG();
6454 +               break;
6455 +       }
6456 +
6457 +       return err;
6458 +}
6459 +
6460 +void release_lower_nd(struct nameidata *nd, int err)
6461 +{
6462 +       if (!nd->intent.open.file)
6463 +               return;
6464 +       else if (!err)
6465 +               release_open_intent(nd);
6466 +#ifdef ALLOC_LOWER_ND_FILE
6467 +       kfree(nd->intent.open.file);
6468 +#endif /* ALLOC_LOWER_ND_FILE */
6469 +}
6470 diff --git a/fs/unionfs/main.c b/fs/unionfs/main.c
6471 new file mode 100644
6472 index 0000000..8f59fb5
6473 --- /dev/null
6474 +++ b/fs/unionfs/main.c
6475 @@ -0,0 +1,796 @@
6476 +/*
6477 + * Copyright (c) 2003-2007 Erez Zadok
6478 + * Copyright (c) 2003-2006 Charles P. Wright
6479 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
6480 + * Copyright (c) 2005-2006 Junjiro Okajima
6481 + * Copyright (c) 2005      Arun M. Krishnakumar
6482 + * Copyright (c) 2004-2006 David P. Quigley
6483 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
6484 + * Copyright (c) 2003      Puja Gupta
6485 + * Copyright (c) 2003      Harikesavan Krishnan
6486 + * Copyright (c) 2003-2007 Stony Brook University
6487 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
6488 + *
6489 + * This program is free software; you can redistribute it and/or modify
6490 + * it under the terms of the GNU General Public License version 2 as
6491 + * published by the Free Software Foundation.
6492 + */
6493 +
6494 +#include "union.h"
6495 +#include <linux/module.h>
6496 +#include <linux/moduleparam.h>
6497 +
6498 +static void unionfs_fill_inode(struct dentry *dentry,
6499 +                              struct inode *inode)
6500 +{
6501 +       struct inode *lower_inode;
6502 +       struct dentry *lower_dentry;
6503 +       int bindex, bstart, bend;
6504 +
6505 +       bstart = dbstart(dentry);
6506 +       bend = dbend(dentry);
6507 +
6508 +       for (bindex = bstart; bindex <= bend; bindex++) {
6509 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6510 +               if (!lower_dentry) {
6511 +                       unionfs_set_lower_inode_idx(inode, bindex, NULL);
6512 +                       continue;
6513 +               }
6514 +
6515 +               /* Initialize the lower inode to the new lower inode. */
6516 +               if (!lower_dentry->d_inode)
6517 +                       continue;
6518 +
6519 +               unionfs_set_lower_inode_idx(inode, bindex,
6520 +                                           igrab(lower_dentry->d_inode));
6521 +       }
6522 +
6523 +       ibstart(inode) = dbstart(dentry);
6524 +       ibend(inode) = dbend(dentry);
6525 +
6526 +       /* Use attributes from the first branch. */
6527 +       lower_inode = unionfs_lower_inode(inode);
6528 +
6529 +       /* Use different set of inode ops for symlinks & directories */
6530 +       if (S_ISLNK(lower_inode->i_mode))
6531 +               inode->i_op = &unionfs_symlink_iops;
6532 +       else if (S_ISDIR(lower_inode->i_mode))
6533 +               inode->i_op = &unionfs_dir_iops;
6534 +
6535 +       /* Use different set of file ops for directories */
6536 +       if (S_ISDIR(lower_inode->i_mode))
6537 +               inode->i_fop = &unionfs_dir_fops;
6538 +
6539 +       /* properly initialize special inodes */
6540 +       if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
6541 +           S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode))
6542 +               init_special_inode(inode, lower_inode->i_mode,
6543 +                                  lower_inode->i_rdev);
6544 +
6545 +       /* all well, copy inode attributes */
6546 +       unionfs_copy_attr_all(inode, lower_inode);
6547 +       fsstack_copy_inode_size(inode, lower_inode);
6548 +}
6549 +
6550 +/*
6551 + * Connect a unionfs inode dentry/inode with several lower ones.  This is
6552 + * the classic stackable file system "vnode interposition" action.
6553 + *
6554 + * @sb: unionfs's super_block
6555 + */
6556 +struct dentry *unionfs_interpose(struct dentry *dentry, struct super_block *sb,
6557 +                                int flag)
6558 +{
6559 +       int err = 0;
6560 +       struct inode *inode;
6561 +       int is_negative_dentry = 1;
6562 +       int bindex, bstart, bend;
6563 +       int need_fill_inode = 1;
6564 +       struct dentry *spliced = NULL;
6565 +
6566 +       verify_locked(dentry);
6567 +
6568 +       bstart = dbstart(dentry);
6569 +       bend = dbend(dentry);
6570 +
6571 +       /* Make sure that we didn't get a negative dentry. */
6572 +       for (bindex = bstart; bindex <= bend; bindex++) {
6573 +               if (unionfs_lower_dentry_idx(dentry, bindex) &&
6574 +                   unionfs_lower_dentry_idx(dentry, bindex)->d_inode) {
6575 +                       is_negative_dentry = 0;
6576 +                       break;
6577 +               }
6578 +       }
6579 +       BUG_ON(is_negative_dentry);
6580 +
6581 +       /*
6582 +        * We allocate our new inode below by calling unionfs_iget,
6583 +        * which will initialize some of the new inode's fields
6584 +        */
6585 +
6586 +       /*
6587 +        * On revalidate we've already got our own inode and just need
6588 +        * to fix it up.
6589 +        */
6590 +       if (flag == INTERPOSE_REVAL) {
6591 +               inode = dentry->d_inode;
6592 +               UNIONFS_I(inode)->bstart = -1;
6593 +               UNIONFS_I(inode)->bend = -1;
6594 +               atomic_set(&UNIONFS_I(inode)->generation,
6595 +                          atomic_read(&UNIONFS_SB(sb)->generation));
6596 +
6597 +               UNIONFS_I(inode)->lower_inodes =
6598 +                       kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL);
6599 +               if (unlikely(!UNIONFS_I(inode)->lower_inodes)) {
6600 +                       err = -ENOMEM;
6601 +                       goto out;
6602 +               }
6603 +       } else {
6604 +               /* get unique inode number for unionfs */
6605 +               inode = unionfs_iget(sb, iunique(sb, UNIONFS_ROOT_INO));
6606 +               if (IS_ERR(inode)) {
6607 +                       err = PTR_ERR(inode);
6608 +                       goto out;
6609 +               }
6610 +               if (atomic_read(&inode->i_count) > 1)
6611 +                       goto skip;
6612 +       }
6613 +
6614 +       need_fill_inode = 0;
6615 +       unionfs_fill_inode(dentry, inode);
6616 +
6617 +skip:
6618 +       /* only (our) lookup wants to do a d_add */
6619 +       switch (flag) {
6620 +       case INTERPOSE_DEFAULT:
6621 +       case INTERPOSE_REVAL_NEG:
6622 +               d_instantiate(dentry, inode);
6623 +               break;
6624 +       case INTERPOSE_LOOKUP:
6625 +               spliced = d_splice_alias(inode, dentry);
6626 +               if (spliced && spliced != dentry) {
6627 +                       /*
6628 +                        * d_splice can return a dentry if it was
6629 +                        * disconnected and had to be moved.  We must ensure
6630 +                        * that the private data of the new dentry is
6631 +                        * correct and that the inode info was filled
6632 +                        * properly.  Finally we must return this new
6633 +                        * dentry.
6634 +                        */
6635 +                       spliced->d_op = &unionfs_dops;
6636 +                       spliced->d_fsdata = dentry->d_fsdata;
6637 +                       dentry->d_fsdata = NULL;
6638 +                       dentry = spliced;
6639 +                       if (need_fill_inode) {
6640 +                               need_fill_inode = 0;
6641 +                               unionfs_fill_inode(dentry, inode);
6642 +                       }
6643 +                       goto out_spliced;
6644 +               } else if (!spliced) {
6645 +                       if (need_fill_inode) {
6646 +                               need_fill_inode = 0;
6647 +                               unionfs_fill_inode(dentry, inode);
6648 +                               goto out_spliced;
6649 +                       }
6650 +               }
6651 +               break;
6652 +       case INTERPOSE_REVAL:
6653 +               /* Do nothing. */
6654 +               break;
6655 +       default:
6656 +               printk(KERN_CRIT "unionfs: invalid interpose flag passed!\n");
6657 +               BUG();
6658 +       }
6659 +       goto out;
6660 +
6661 +out_spliced:
6662 +       if (!err)
6663 +               return spliced;
6664 +out:
6665 +       return ERR_PTR(err);
6666 +}
6667 +
6668 +/* like interpose above, but for an already existing dentry */
6669 +void unionfs_reinterpose(struct dentry *dentry)
6670 +{
6671 +       struct dentry *lower_dentry;
6672 +       struct inode *inode;
6673 +       int bindex, bstart, bend;
6674 +
6675 +       verify_locked(dentry);
6676 +
6677 +       /* This is pre-allocated inode */
6678 +       inode = dentry->d_inode;
6679 +
6680 +       bstart = dbstart(dentry);
6681 +       bend = dbend(dentry);
6682 +       for (bindex = bstart; bindex <= bend; bindex++) {
6683 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
6684 +               if (!lower_dentry)
6685 +                       continue;
6686 +
6687 +               if (!lower_dentry->d_inode)
6688 +                       continue;
6689 +               if (unionfs_lower_inode_idx(inode, bindex))
6690 +                       continue;
6691 +               unionfs_set_lower_inode_idx(inode, bindex,
6692 +                                           igrab(lower_dentry->d_inode));
6693 +       }
6694 +       ibstart(inode) = dbstart(dentry);
6695 +       ibend(inode) = dbend(dentry);
6696 +}
6697 +
6698 +/*
6699 + * make sure the branch we just looked up (nd) makes sense:
6700 + *
6701 + * 1) we're not trying to stack unionfs on top of unionfs
6702 + * 2) it exists
6703 + * 3) is a directory
6704 + */
6705 +int check_branch(struct nameidata *nd)
6706 +{
6707 +       /* XXX: remove in ODF code -- stacking unions allowed there */
6708 +       if (!strcmp(nd->path.dentry->d_sb->s_type->name, UNIONFS_NAME))
6709 +               return -EINVAL;
6710 +       if (!nd->path.dentry->d_inode)
6711 +               return -ENOENT;
6712 +       if (!S_ISDIR(nd->path.dentry->d_inode->i_mode))
6713 +               return -ENOTDIR;
6714 +       return 0;
6715 +}
6716 +
6717 +/* checks if two lower_dentries have overlapping branches */
6718 +static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2)
6719 +{
6720 +       struct dentry *dent = NULL;
6721 +
6722 +       dent = dent1;
6723 +       while ((dent != dent2) && (dent->d_parent != dent))
6724 +               dent = dent->d_parent;
6725 +
6726 +       if (dent == dent2)
6727 +               return 1;
6728 +
6729 +       dent = dent2;
6730 +       while ((dent != dent1) && (dent->d_parent != dent))
6731 +               dent = dent->d_parent;
6732 +
6733 +       return (dent == dent1);
6734 +}
6735 +
6736 +/*
6737 + * Parse "ro" or "rw" options, but default to "rw" if no mode options was
6738 + * specified.  Fill the mode bits in @perms.  If encounter an unknown
6739 + * string, return -EINVAL.  Otherwise return 0.
6740 + */
6741 +int parse_branch_mode(const char *name, int *perms)
6742 +{
6743 +       if (!name || !strcmp(name, "rw")) {
6744 +               *perms = MAY_READ | MAY_WRITE;
6745 +               return 0;
6746 +       }
6747 +       if (!strcmp(name, "ro")) {
6748 +               *perms = MAY_READ;
6749 +               return 0;
6750 +       }
6751 +       return -EINVAL;
6752 +}
6753 +
6754 +/*
6755 + * parse the dirs= mount argument
6756 + *
6757 + * We don't need to lock the superblock private data's rwsem, as we get
6758 + * called only by unionfs_read_super - it is still a long time before anyone
6759 + * can even get a reference to us.
6760 + */
6761 +static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info
6762 +                            *lower_root_info, char *options)
6763 +{
6764 +       struct nameidata nd;
6765 +       char *name;
6766 +       int err = 0;
6767 +       int branches = 1;
6768 +       int bindex = 0;
6769 +       int i = 0;
6770 +       int j = 0;
6771 +       struct dentry *dent1;
6772 +       struct dentry *dent2;
6773 +
6774 +       if (options[0] == '\0') {
6775 +               printk(KERN_ERR "unionfs: no branches specified\n");
6776 +               err = -EINVAL;
6777 +               goto out;
6778 +       }
6779 +
6780 +       /*
6781 +        * Each colon means we have a separator, this is really just a rough
6782 +        * guess, since strsep will handle empty fields for us.
6783 +        */
6784 +       for (i = 0; options[i]; i++)
6785 +               if (options[i] == ':')
6786 +                       branches++;
6787 +
6788 +       /* allocate space for underlying pointers to lower dentry */
6789 +       UNIONFS_SB(sb)->data =
6790 +               kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL);
6791 +       if (unlikely(!UNIONFS_SB(sb)->data)) {
6792 +               err = -ENOMEM;
6793 +               goto out;
6794 +       }
6795 +
6796 +       lower_root_info->lower_paths =
6797 +               kcalloc(branches, sizeof(struct path), GFP_KERNEL);
6798 +       if (unlikely(!lower_root_info->lower_paths)) {
6799 +               err = -ENOMEM;
6800 +               goto out;
6801 +       }
6802 +
6803 +       /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */
6804 +       branches = 0;
6805 +       while ((name = strsep(&options, ":")) != NULL) {
6806 +               int perms;
6807 +               char *mode = strchr(name, '=');
6808 +
6809 +               if (!name)
6810 +                       continue;
6811 +               if (!*name) {   /* bad use of ':' (extra colons) */
6812 +                       err = -EINVAL;
6813 +                       goto out;
6814 +               }
6815 +
6816 +               branches++;
6817 +
6818 +               /* strip off '=' if any */
6819 +               if (mode)
6820 +                       *mode++ = '\0';
6821 +
6822 +               err = parse_branch_mode(mode, &perms);
6823 +               if (err) {
6824 +                       printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
6825 +                              "branch %d\n", mode, bindex);
6826 +                       goto out;
6827 +               }
6828 +               /* ensure that leftmost branch is writeable */
6829 +               if (!bindex && !(perms & MAY_WRITE)) {
6830 +                       printk(KERN_ERR "unionfs: leftmost branch cannot be "
6831 +                              "read-only (use \"-o ro\" to create a "
6832 +                              "read-only union)\n");
6833 +                       err = -EINVAL;
6834 +                       goto out;
6835 +               }
6836 +
6837 +               err = path_lookup(name, LOOKUP_FOLLOW, &nd);
6838 +               if (err) {
6839 +                       printk(KERN_ERR "unionfs: error accessing "
6840 +                              "lower directory '%s' (error %d)\n",
6841 +                              name, err);
6842 +                       goto out;
6843 +               }
6844 +
6845 +               err = check_branch(&nd);
6846 +               if (err) {
6847 +                       printk(KERN_ERR "unionfs: lower directory "
6848 +                              "'%s' is not a valid branch\n", name);
6849 +                       path_put(&nd.path);
6850 +                       goto out;
6851 +               }
6852 +
6853 +               lower_root_info->lower_paths[bindex].dentry = nd.path.dentry;
6854 +               lower_root_info->lower_paths[bindex].mnt = nd.path.mnt;
6855 +
6856 +               set_branchperms(sb, bindex, perms);
6857 +               set_branch_count(sb, bindex, 0);
6858 +               new_branch_id(sb, bindex);
6859 +
6860 +               if (lower_root_info->bstart < 0)
6861 +                       lower_root_info->bstart = bindex;
6862 +               lower_root_info->bend = bindex;
6863 +               bindex++;
6864 +       }
6865 +
6866 +       if (branches == 0) {
6867 +               printk(KERN_ERR "unionfs: no branches specified\n");
6868 +               err = -EINVAL;
6869 +               goto out;
6870 +       }
6871 +
6872 +       BUG_ON(branches != (lower_root_info->bend + 1));
6873 +
6874 +       /*
6875 +        * Ensure that no overlaps exist in the branches.
6876 +        *
6877 +        * This test is required because the Linux kernel has no support
6878 +        * currently for ensuring coherency between stackable layers and
6879 +        * branches.  If we were to allow overlapping branches, it would be
6880 +        * possible, for example, to delete a file via one branch, which
6881 +        * would not be reflected in another branch.  Such incoherency could
6882 +        * lead to inconsistencies and even kernel oopses.  Rather than
6883 +        * implement hacks to work around some of these cache-coherency
6884 +        * problems, we prevent branch overlapping, for now.  A complete
6885 +        * solution will involve proper kernel/VFS support for cache
6886 +        * coherency, at which time we could safely remove this
6887 +        * branch-overlapping test.
6888 +        */
6889 +       for (i = 0; i < branches; i++) {
6890 +               dent1 = lower_root_info->lower_paths[i].dentry;
6891 +               for (j = i + 1; j < branches; j++) {
6892 +                       dent2 = lower_root_info->lower_paths[j].dentry;
6893 +                       if (is_branch_overlap(dent1, dent2)) {
6894 +                               printk(KERN_ERR "unionfs: branches %d and "
6895 +                                      "%d overlap\n", i, j);
6896 +                               err = -EINVAL;
6897 +                               goto out;
6898 +                       }
6899 +               }
6900 +       }
6901 +
6902 +out:
6903 +       if (err) {
6904 +               for (i = 0; i < branches; i++)
6905 +                       if (lower_root_info->lower_paths[i].dentry) {
6906 +                               dput(lower_root_info->lower_paths[i].dentry);
6907 +                               /* initialize: can't use unionfs_mntput here */
6908 +                               mntput(lower_root_info->lower_paths[i].mnt);
6909 +                       }
6910 +
6911 +               kfree(lower_root_info->lower_paths);
6912 +               kfree(UNIONFS_SB(sb)->data);
6913 +
6914 +               /*
6915 +                * MUST clear the pointers to prevent potential double free if
6916 +                * the caller dies later on
6917 +                */
6918 +               lower_root_info->lower_paths = NULL;
6919 +               UNIONFS_SB(sb)->data = NULL;
6920 +       }
6921 +       return err;
6922 +}
6923 +
6924 +/*
6925 + * Parse mount options.  See the manual page for usage instructions.
6926 + *
6927 + * Returns the dentry object of the lower-level (lower) directory;
6928 + * We want to mount our stackable file system on top of that lower directory.
6929 + */
6930 +static struct unionfs_dentry_info *unionfs_parse_options(
6931 +                                        struct super_block *sb,
6932 +                                        char *options)
6933 +{
6934 +       struct unionfs_dentry_info *lower_root_info;
6935 +       char *optname;
6936 +       int err = 0;
6937 +       int bindex;
6938 +       int dirsfound = 0;
6939 +
6940 +       /* allocate private data area */
6941 +       err = -ENOMEM;
6942 +       lower_root_info =
6943 +               kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL);
6944 +       if (unlikely(!lower_root_info))
6945 +               goto out_error;
6946 +       lower_root_info->bstart = -1;
6947 +       lower_root_info->bend = -1;
6948 +       lower_root_info->bopaque = -1;
6949 +
6950 +       while ((optname = strsep(&options, ",")) != NULL) {
6951 +               char *optarg;
6952 +               char *endptr;
6953 +               int intval;
6954 +
6955 +               if (!optname || !*optname)
6956 +                       continue;
6957 +
6958 +               optarg = strchr(optname, '=');
6959 +               if (optarg)
6960 +                       *optarg++ = '\0';
6961 +
6962 +               /*
6963 +                * All of our options take an argument now. Insert ones that
6964 +                * don't, above this check.
6965 +                */
6966 +               if (!optarg) {
6967 +                       printk(KERN_ERR "unionfs: %s requires an argument\n",
6968 +                              optname);
6969 +                       err = -EINVAL;
6970 +                       goto out_error;
6971 +               }
6972 +
6973 +               if (!strcmp("dirs", optname)) {
6974 +                       if (++dirsfound > 1) {
6975 +                               printk(KERN_ERR
6976 +                                      "unionfs: multiple dirs specified\n");
6977 +                               err = -EINVAL;
6978 +                               goto out_error;
6979 +                       }
6980 +                       err = parse_dirs_option(sb, lower_root_info, optarg);
6981 +                       if (err)
6982 +                               goto out_error;
6983 +                       continue;
6984 +               }
6985 +
6986 +               /* All of these options require an integer argument. */
6987 +               intval = simple_strtoul(optarg, &endptr, 0);
6988 +               if (*endptr) {
6989 +                       printk(KERN_ERR
6990 +                              "unionfs: invalid %s option '%s'\n",
6991 +                              optname, optarg);
6992 +                       err = -EINVAL;
6993 +                       goto out_error;
6994 +               }
6995 +
6996 +               err = -EINVAL;
6997 +               printk(KERN_ERR
6998 +                      "unionfs: unrecognized option '%s'\n", optname);
6999 +               goto out_error;
7000 +       }
7001 +       if (dirsfound != 1) {
7002 +               printk(KERN_ERR "unionfs: dirs option required\n");
7003 +               err = -EINVAL;
7004 +               goto out_error;
7005 +       }
7006 +       goto out;
7007 +
7008 +out_error:
7009 +       if (lower_root_info && lower_root_info->lower_paths) {
7010 +               for (bindex = lower_root_info->bstart;
7011 +                    bindex >= 0 && bindex <= lower_root_info->bend;
7012 +                    bindex++) {
7013 +                       struct dentry *d;
7014 +                       struct vfsmount *m;
7015 +
7016 +                       d = lower_root_info->lower_paths[bindex].dentry;
7017 +                       m = lower_root_info->lower_paths[bindex].mnt;
7018 +
7019 +                       dput(d);
7020 +                       /* initializing: can't use unionfs_mntput here */
7021 +                       mntput(m);
7022 +               }
7023 +       }
7024 +
7025 +       kfree(lower_root_info->lower_paths);
7026 +       kfree(lower_root_info);
7027 +
7028 +       kfree(UNIONFS_SB(sb)->data);
7029 +       UNIONFS_SB(sb)->data = NULL;
7030 +
7031 +       lower_root_info = ERR_PTR(err);
7032 +out:
7033 +       return lower_root_info;
7034 +}
7035 +
7036 +/*
7037 + * our custom d_alloc_root work-alike
7038 + *
7039 + * we can't use d_alloc_root if we want to use our own interpose function
7040 + * unchanged, so we simply call our own "fake" d_alloc_root
7041 + */
7042 +static struct dentry *unionfs_d_alloc_root(struct super_block *sb)
7043 +{
7044 +       struct dentry *ret = NULL;
7045 +
7046 +       if (sb) {
7047 +               static const struct qstr name = {
7048 +                       .name = "/",
7049 +                       .len = 1
7050 +               };
7051 +
7052 +               ret = d_alloc(NULL, &name);
7053 +               if (likely(ret)) {
7054 +                       ret->d_op = &unionfs_dops;
7055 +                       ret->d_sb = sb;
7056 +                       ret->d_parent = ret;
7057 +               }
7058 +       }
7059 +       return ret;
7060 +}
7061 +
7062 +/*
7063 + * There is no need to lock the unionfs_super_info's rwsem as there is no
7064 + * way anyone can have a reference to the superblock at this point in time.
7065 + */
7066 +static int unionfs_read_super(struct super_block *sb, void *raw_data,
7067 +                             int silent)
7068 +{
7069 +       int err = 0;
7070 +       struct unionfs_dentry_info *lower_root_info = NULL;
7071 +       int bindex, bstart, bend;
7072 +
7073 +       if (!raw_data) {
7074 +               printk(KERN_ERR
7075 +                      "unionfs: read_super: missing data argument\n");
7076 +               err = -EINVAL;
7077 +               goto out;
7078 +       }
7079 +
7080 +       /* Allocate superblock private data */
7081 +       sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL);
7082 +       if (unlikely(!UNIONFS_SB(sb))) {
7083 +               printk(KERN_CRIT "unionfs: read_super: out of memory\n");
7084 +               err = -ENOMEM;
7085 +               goto out;
7086 +       }
7087 +
7088 +       UNIONFS_SB(sb)->bend = -1;
7089 +       atomic_set(&UNIONFS_SB(sb)->generation, 1);
7090 +       init_rwsem(&UNIONFS_SB(sb)->rwsem);
7091 +       UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */
7092 +
7093 +       lower_root_info = unionfs_parse_options(sb, raw_data);
7094 +       if (IS_ERR(lower_root_info)) {
7095 +               printk(KERN_ERR
7096 +                      "unionfs: read_super: error while parsing options "
7097 +                      "(err = %ld)\n", PTR_ERR(lower_root_info));
7098 +               err = PTR_ERR(lower_root_info);
7099 +               lower_root_info = NULL;
7100 +               goto out_free;
7101 +       }
7102 +       if (lower_root_info->bstart == -1) {
7103 +               err = -ENOENT;
7104 +               goto out_free;
7105 +       }
7106 +
7107 +       /* set the lower superblock field of upper superblock */
7108 +       bstart = lower_root_info->bstart;
7109 +       BUG_ON(bstart != 0);
7110 +       sbend(sb) = bend = lower_root_info->bend;
7111 +       for (bindex = bstart; bindex <= bend; bindex++) {
7112 +               struct dentry *d = lower_root_info->lower_paths[bindex].dentry;
7113 +               atomic_inc(&d->d_sb->s_active);
7114 +               unionfs_set_lower_super_idx(sb, bindex, d->d_sb);
7115 +       }
7116 +
7117 +       /* max Bytes is the maximum bytes from highest priority branch */
7118 +       sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
7119 +
7120 +       /*
7121 +        * Our c/m/atime granularity is 1 ns because we may stack on file
7122 +        * systems whose granularity is as good.  This is important for our
7123 +        * time-based cache coherency.
7124 +        */
7125 +       sb->s_time_gran = 1;
7126 +
7127 +       sb->s_op = &unionfs_sops;
7128 +
7129 +       /* See comment next to the definition of unionfs_d_alloc_root */
7130 +       sb->s_root = unionfs_d_alloc_root(sb);
7131 +       if (unlikely(!sb->s_root)) {
7132 +               err = -ENOMEM;
7133 +               goto out_dput;
7134 +       }
7135 +
7136 +       /* link the upper and lower dentries */
7137 +       sb->s_root->d_fsdata = NULL;
7138 +       err = new_dentry_private_data(sb->s_root, UNIONFS_DMUTEX_ROOT);
7139 +       if (unlikely(err))
7140 +               goto out_freedpd;
7141 +
7142 +       /* Set the lower dentries for s_root */
7143 +       for (bindex = bstart; bindex <= bend; bindex++) {
7144 +               struct dentry *d;
7145 +               struct vfsmount *m;
7146 +
7147 +               d = lower_root_info->lower_paths[bindex].dentry;
7148 +               m = lower_root_info->lower_paths[bindex].mnt;
7149 +
7150 +               unionfs_set_lower_dentry_idx(sb->s_root, bindex, d);
7151 +               unionfs_set_lower_mnt_idx(sb->s_root, bindex, m);
7152 +       }
7153 +       set_dbstart(sb->s_root, bstart);
7154 +       set_dbend(sb->s_root, bend);
7155 +
7156 +       /* Set the generation number to one, since this is for the mount. */
7157 +       atomic_set(&UNIONFS_D(sb->s_root)->generation, 1);
7158 +
7159 +       /*
7160 +        * Call interpose to create the upper level inode.  Only
7161 +        * INTERPOSE_LOOKUP can return a value other than 0 on err.
7162 +        */
7163 +       err = PTR_ERR(unionfs_interpose(sb->s_root, sb, 0));
7164 +       unionfs_unlock_dentry(sb->s_root);
7165 +       if (!err)
7166 +               goto out;
7167 +       /* else fall through */
7168 +
7169 +out_freedpd:
7170 +       if (UNIONFS_D(sb->s_root)) {
7171 +               kfree(UNIONFS_D(sb->s_root)->lower_paths);
7172 +               free_dentry_private_data(sb->s_root);
7173 +       }
7174 +       dput(sb->s_root);
7175 +
7176 +out_dput:
7177 +       if (lower_root_info && !IS_ERR(lower_root_info)) {
7178 +               for (bindex = lower_root_info->bstart;
7179 +                    bindex <= lower_root_info->bend; bindex++) {
7180 +                       struct dentry *d;
7181 +                       struct vfsmount *m;
7182 +
7183 +                       d = lower_root_info->lower_paths[bindex].dentry;
7184 +                       m = lower_root_info->lower_paths[bindex].mnt;
7185 +
7186 +                       dput(d);
7187 +                       /* initializing: can't use unionfs_mntput here */
7188 +                       mntput(m);
7189 +                       /* drop refs we took earlier */
7190 +                       atomic_dec(&d->d_sb->s_active);
7191 +               }
7192 +               kfree(lower_root_info->lower_paths);
7193 +               kfree(lower_root_info);
7194 +               lower_root_info = NULL;
7195 +       }
7196 +
7197 +out_free:
7198 +       kfree(UNIONFS_SB(sb)->data);
7199 +       kfree(UNIONFS_SB(sb));
7200 +       sb->s_fs_info = NULL;
7201 +
7202 +out:
7203 +       if (lower_root_info && !IS_ERR(lower_root_info)) {
7204 +               kfree(lower_root_info->lower_paths);
7205 +               kfree(lower_root_info);
7206 +       }
7207 +       return err;
7208 +}
7209 +
7210 +static int unionfs_get_sb(struct file_system_type *fs_type,
7211 +                         int flags, const char *dev_name,
7212 +                         void *raw_data, struct vfsmount *mnt)
7213 +{
7214 +       return get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt);
7215 +}
7216 +
7217 +static struct file_system_type unionfs_fs_type = {
7218 +       .owner          = THIS_MODULE,
7219 +       .name           = UNIONFS_NAME,
7220 +       .get_sb         = unionfs_get_sb,
7221 +       .kill_sb        = generic_shutdown_super,
7222 +       .fs_flags       = FS_REVAL_DOT,
7223 +};
7224 +
7225 +static int __init init_unionfs_fs(void)
7226 +{
7227 +       int err;
7228 +
7229 +       pr_info("Registering unionfs " UNIONFS_VERSION "\n");
7230 +
7231 +       err = unionfs_init_filldir_cache();
7232 +       if (unlikely(err))
7233 +               goto out;
7234 +       err = unionfs_init_inode_cache();
7235 +       if (unlikely(err))
7236 +               goto out;
7237 +       err = unionfs_init_dentry_cache();
7238 +       if (unlikely(err))
7239 +               goto out;
7240 +       err = init_sioq();
7241 +       if (unlikely(err))
7242 +               goto out;
7243 +       err = register_filesystem(&unionfs_fs_type);
7244 +out:
7245 +       if (unlikely(err)) {
7246 +               stop_sioq();
7247 +               unionfs_destroy_filldir_cache();
7248 +               unionfs_destroy_inode_cache();
7249 +               unionfs_destroy_dentry_cache();
7250 +       }
7251 +       return err;
7252 +}
7253 +
7254 +static void __exit exit_unionfs_fs(void)
7255 +{
7256 +       stop_sioq();
7257 +       unionfs_destroy_filldir_cache();
7258 +       unionfs_destroy_inode_cache();
7259 +       unionfs_destroy_dentry_cache();
7260 +       unregister_filesystem(&unionfs_fs_type);
7261 +       pr_info("Completed unionfs module unload\n");
7262 +}
7263 +
7264 +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
7265 +             " (http://www.fsl.cs.sunysb.edu)");
7266 +MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION
7267 +                  " (http://unionfs.filesystems.org)");
7268 +MODULE_LICENSE("GPL");
7269 +
7270 +module_init(init_unionfs_fs);
7271 +module_exit(exit_unionfs_fs);
7272 diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
7273 new file mode 100644
7274 index 0000000..d6ac61e
7275 --- /dev/null
7276 +++ b/fs/unionfs/mmap.c
7277 @@ -0,0 +1,334 @@
7278 +/*
7279 + * Copyright (c) 2003-2007 Erez Zadok
7280 + * Copyright (c) 2003-2006 Charles P. Wright
7281 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7282 + * Copyright (c) 2005-2006 Junjiro Okajima
7283 + * Copyright (c) 2006      Shaya Potter
7284 + * Copyright (c) 2005      Arun M. Krishnakumar
7285 + * Copyright (c) 2004-2006 David P. Quigley
7286 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7287 + * Copyright (c) 2003      Puja Gupta
7288 + * Copyright (c) 2003      Harikesavan Krishnan
7289 + * Copyright (c) 2003-2007 Stony Brook University
7290 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7291 + *
7292 + * This program is free software; you can redistribute it and/or modify
7293 + * it under the terms of the GNU General Public License version 2 as
7294 + * published by the Free Software Foundation.
7295 + */
7296 +
7297 +#include "union.h"
7298 +
7299 +static int unionfs_writepage(struct page *page, struct writeback_control *wbc)
7300 +{
7301 +       int err = -EIO;
7302 +       struct inode *inode;
7303 +       struct inode *lower_inode;
7304 +       struct page *lower_page;
7305 +       struct address_space *lower_mapping; /* lower inode mapping */
7306 +       gfp_t mask;
7307 +
7308 +       BUG_ON(!PageUptodate(page));
7309 +       inode = page->mapping->host;
7310 +       /* if no lower inode, nothing to do */
7311 +       if (!inode || !UNIONFS_I(inode) || UNIONFS_I(inode)->lower_inodes) {
7312 +               err = 0;
7313 +               goto out;
7314 +       }
7315 +       lower_inode = unionfs_lower_inode(inode);
7316 +       lower_mapping = lower_inode->i_mapping;
7317 +
7318 +       /*
7319 +        * find lower page (returns a locked page)
7320 +        *
7321 +        * We turn off __GFP_FS while we look for or create a new lower
7322 +        * page.  This prevents a recursion into the file system code, which
7323 +        * under memory pressure conditions could lead to a deadlock.  This
7324 +        * is similar to how the loop driver behaves (see loop_set_fd in
7325 +        * drivers/block/loop.c).  If we can't find the lower page, we
7326 +        * redirty our page and return "success" so that the VM will call us
7327 +        * again in the (hopefully near) future.
7328 +        */
7329 +       mask = mapping_gfp_mask(lower_mapping) & ~(__GFP_FS);
7330 +       lower_page = find_or_create_page(lower_mapping, page->index, mask);
7331 +       if (!lower_page) {
7332 +               err = 0;
7333 +               set_page_dirty(page);
7334 +               goto out;
7335 +       }
7336 +
7337 +       /* copy page data from our upper page to the lower page */
7338 +       copy_highpage(lower_page, page);
7339 +       flush_dcache_page(lower_page);
7340 +       SetPageUptodate(lower_page);
7341 +       set_page_dirty(lower_page);
7342 +
7343 +       /*
7344 +        * Call lower writepage (expects locked page).  However, if we are
7345 +        * called with wbc->for_reclaim, then the VFS/VM just wants to
7346 +        * reclaim our page.  Therefore, we don't need to call the lower
7347 +        * ->writepage: just copy our data to the lower page (already done
7348 +        * above), then mark the lower page dirty and unlock it, and return
7349 +        * success.
7350 +        */
7351 +       if (wbc->for_reclaim) {
7352 +               unlock_page(lower_page);
7353 +               goto out_release;
7354 +       }
7355 +
7356 +       BUG_ON(!lower_mapping->a_ops->writepage);
7357 +       wait_on_page_writeback(lower_page); /* prevent multiple writers */
7358 +       clear_page_dirty_for_io(lower_page); /* emulate VFS behavior */
7359 +       err = lower_mapping->a_ops->writepage(lower_page, wbc);
7360 +       if (err < 0)
7361 +               goto out_release;
7362 +
7363 +       /*
7364 +        * Lower file systems such as ramfs and tmpfs, may return
7365 +        * AOP_WRITEPAGE_ACTIVATE so that the VM won't try to (pointlessly)
7366 +        * write the page again for a while.  But those lower file systems
7367 +        * also set the page dirty bit back again.  Since we successfully
7368 +        * copied our page data to the lower page, then the VM will come
7369 +        * back to the lower page (directly) and try to flush it.  So we can
7370 +        * save the VM the hassle of coming back to our page and trying to
7371 +        * flush too.  Therefore, we don't re-dirty our own page, and we
7372 +        * never return AOP_WRITEPAGE_ACTIVATE back to the VM (we consider
7373 +        * this a success).
7374 +        *
7375 +        * We also unlock the lower page if the lower ->writepage returned
7376 +        * AOP_WRITEPAGE_ACTIVATE.  (This "anomalous" behaviour may be
7377 +        * addressed in future shmem/VM code.)
7378 +        */
7379 +       if (err == AOP_WRITEPAGE_ACTIVATE) {
7380 +               err = 0;
7381 +               unlock_page(lower_page);
7382 +       }
7383 +
7384 +       /* all is well */
7385 +
7386 +       /* lower mtimes have changed: update ours */
7387 +       unionfs_copy_attr_times(inode);
7388 +
7389 +out_release:
7390 +       /* b/c find_or_create_page increased refcnt */
7391 +       page_cache_release(lower_page);
7392 +out:
7393 +       /*
7394 +        * We unlock our page unconditionally, because we never return
7395 +        * AOP_WRITEPAGE_ACTIVATE.
7396 +        */
7397 +       unlock_page(page);
7398 +       return err;
7399 +}
7400 +
7401 +static int unionfs_writepages(struct address_space *mapping,
7402 +                             struct writeback_control *wbc)
7403 +{
7404 +       int err = 0;
7405 +       struct inode *lower_inode;
7406 +       struct inode *inode;
7407 +
7408 +       inode = mapping->host;
7409 +       if (ibstart(inode) < 0 && ibend(inode) < 0)
7410 +               goto out;
7411 +       lower_inode = unionfs_lower_inode(inode);
7412 +       if (!lower_inode)
7413 +               goto out;
7414 +
7415 +       err = generic_writepages(mapping, wbc);
7416 +       if (!err)
7417 +               unionfs_copy_attr_times(inode);
7418 +out:
7419 +       return err;
7420 +}
7421 +
7422 +/* Readpage expects a locked page, and must unlock it */
7423 +static int unionfs_readpage(struct file *file, struct page *page)
7424 +{
7425 +       int err;
7426 +       struct file *lower_file;
7427 +       struct inode *inode;
7428 +       mm_segment_t old_fs;
7429 +       char *page_data = NULL;
7430 +       mode_t orig_mode;
7431 +
7432 +       unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
7433 +       err = unionfs_file_revalidate(file, false);
7434 +       if (unlikely(err))
7435 +               goto out;
7436 +       unionfs_check_file(file);
7437 +
7438 +       if (!UNIONFS_F(file)) {
7439 +               err = -ENOENT;
7440 +               goto out;
7441 +       }
7442 +
7443 +       lower_file = unionfs_lower_file(file);
7444 +       /* FIXME: is this assertion right here? */
7445 +       BUG_ON(lower_file == NULL);
7446 +
7447 +       inode = file->f_path.dentry->d_inode;
7448 +
7449 +       page_data = (char *) kmap(page);
7450 +       /*
7451 +        * Use vfs_read because some lower file systems don't have a
7452 +        * readpage method, and some file systems (esp. distributed ones)
7453 +        * don't like their pages to be accessed directly.  Using vfs_read
7454 +        * may be a little slower, but a lot safer, as the VFS does a lot of
7455 +        * the necessary magic for us.
7456 +        */
7457 +       lower_file->f_pos = page_offset(page);
7458 +       old_fs = get_fs();
7459 +       set_fs(KERNEL_DS);
7460 +       /*
7461 +        * generic_file_splice_write may call us on a file not opened for
7462 +        * reading, so temporarily allow reading.
7463 +        */
7464 +       orig_mode = lower_file->f_mode;
7465 +       lower_file->f_mode |= FMODE_READ;
7466 +       err = vfs_read(lower_file, page_data, PAGE_CACHE_SIZE,
7467 +                      &lower_file->f_pos);
7468 +       lower_file->f_mode = orig_mode;
7469 +       set_fs(old_fs);
7470 +       if (err >= 0 && err < PAGE_CACHE_SIZE)
7471 +               memset(page_data + err, 0, PAGE_CACHE_SIZE - err);
7472 +       kunmap(page);
7473 +
7474 +       if (err < 0)
7475 +               goto out;
7476 +       err = 0;
7477 +
7478 +       /* if vfs_read succeeded above, sync up our times */
7479 +       unionfs_copy_attr_times(inode);
7480 +
7481 +       flush_dcache_page(page);
7482 +
7483 +       /*
7484 +        * we have to unlock our page, b/c we _might_ have gotten a locked
7485 +        * page.  but we no longer have to wakeup on our page here, b/c
7486 +        * UnlockPage does it
7487 +        */
7488 +out:
7489 +       if (err == 0)
7490 +               SetPageUptodate(page);
7491 +       else
7492 +               ClearPageUptodate(page);
7493 +
7494 +       unlock_page(page);
7495 +       unionfs_check_file(file);
7496 +       unionfs_read_unlock(file->f_path.dentry->d_sb);
7497 +
7498 +       return err;
7499 +}
7500 +
7501 +static int unionfs_prepare_write(struct file *file, struct page *page,
7502 +                                unsigned from, unsigned to)
7503 +{
7504 +       int err;
7505 +
7506 +       unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
7507 +       err = unionfs_file_revalidate(file, true);
7508 +       if (!err) {
7509 +               unionfs_copy_attr_times(file->f_path.dentry->d_inode);
7510 +               unionfs_check_file(file);
7511 +       }
7512 +       unionfs_read_unlock(file->f_path.dentry->d_sb);
7513 +
7514 +       return err;
7515 +}
7516 +
7517 +static int unionfs_commit_write(struct file *file, struct page *page,
7518 +                               unsigned from, unsigned to)
7519 +{
7520 +       int err = -ENOMEM;
7521 +       struct inode *inode, *lower_inode;
7522 +       struct file *lower_file = NULL;
7523 +       unsigned bytes = to - from;
7524 +       char *page_data = NULL;
7525 +       mm_segment_t old_fs;
7526 +
7527 +       BUG_ON(file == NULL);
7528 +
7529 +       unionfs_read_lock(file->f_path.dentry->d_sb, UNIONFS_SMUTEX_PARENT);
7530 +       err = unionfs_file_revalidate(file, true);
7531 +       if (unlikely(err))
7532 +               goto out;
7533 +       unionfs_check_file(file);
7534 +
7535 +       inode = page->mapping->host;
7536 +
7537 +       if (UNIONFS_F(file) != NULL)
7538 +               lower_file = unionfs_lower_file(file);
7539 +
7540 +       /* FIXME: is this assertion right here? */
7541 +       BUG_ON(lower_file == NULL);
7542 +
7543 +       page_data = (char *)kmap(page);
7544 +       lower_file->f_pos = page_offset(page) + from;
7545 +
7546 +       /*
7547 +        * We use vfs_write instead of copying page data and the
7548 +        * prepare_write/commit_write combo because file system's like
7549 +        * GFS/OCFS2 don't like things touching those directly,
7550 +        * calling the underlying write op, while a little bit slower, will
7551 +        * call all the FS specific code as well
7552 +        */
7553 +       old_fs = get_fs();
7554 +       set_fs(KERNEL_DS);
7555 +       err = vfs_write(lower_file, page_data + from, bytes,
7556 +                       &lower_file->f_pos);
7557 +       set_fs(old_fs);
7558 +
7559 +       kunmap(page);
7560 +
7561 +       if (err < 0)
7562 +               goto out;
7563 +
7564 +       /* if vfs_write succeeded above, sync up our times/sizes */
7565 +       lower_inode = lower_file->f_path.dentry->d_inode;
7566 +       if (!lower_inode)
7567 +               lower_inode = unionfs_lower_inode(inode);
7568 +       BUG_ON(!lower_inode);
7569 +       fsstack_copy_inode_size(inode, lower_inode);
7570 +       unionfs_copy_attr_times(inode);
7571 +       mark_inode_dirty_sync(inode);
7572 +
7573 +out:
7574 +       if (err < 0)
7575 +               ClearPageUptodate(page);
7576 +
7577 +       unionfs_check_file(file);
7578 +       unionfs_read_unlock(file->f_path.dentry->d_sb);
7579 +       return err;             /* assume all is ok */
7580 +}
7581 +
7582 +/*
7583 + * Although unionfs isn't a block-based file system, it may stack on one.
7584 + * ->bmap is needed, for example, to swapon(2) files.
7585 + */
7586 +sector_t unionfs_bmap(struct address_space *mapping, sector_t block)
7587 +{
7588 +       int err = -EINVAL;
7589 +       struct inode *inode, *lower_inode;
7590 +       sector_t (*bmap)(struct address_space *, sector_t);
7591 +
7592 +       inode = (struct inode *)mapping->host;
7593 +       lower_inode = unionfs_lower_inode(inode);
7594 +       if (!lower_inode)
7595 +               goto out;
7596 +       bmap = lower_inode->i_mapping->a_ops->bmap;
7597 +       if (bmap)
7598 +               err = bmap(lower_inode->i_mapping, block);
7599 +out:
7600 +       return err;
7601 +}
7602 +
7603 +
7604 +struct address_space_operations unionfs_aops = {
7605 +       .writepage      = unionfs_writepage,
7606 +       .writepages     = unionfs_writepages,
7607 +       .readpage       = unionfs_readpage,
7608 +       .prepare_write  = unionfs_prepare_write,
7609 +       .commit_write   = unionfs_commit_write,
7610 +       .bmap           = unionfs_bmap,
7611 +};
7612 diff --git a/fs/unionfs/rdstate.c b/fs/unionfs/rdstate.c
7613 new file mode 100644
7614 index 0000000..7ba1e1a
7615 --- /dev/null
7616 +++ b/fs/unionfs/rdstate.c
7617 @@ -0,0 +1,285 @@
7618 +/*
7619 + * Copyright (c) 2003-2007 Erez Zadok
7620 + * Copyright (c) 2003-2006 Charles P. Wright
7621 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7622 + * Copyright (c) 2005-2006 Junjiro Okajima
7623 + * Copyright (c) 2005      Arun M. Krishnakumar
7624 + * Copyright (c) 2004-2006 David P. Quigley
7625 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7626 + * Copyright (c) 2003      Puja Gupta
7627 + * Copyright (c) 2003      Harikesavan Krishnan
7628 + * Copyright (c) 2003-2007 Stony Brook University
7629 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7630 + *
7631 + * This program is free software; you can redistribute it and/or modify
7632 + * it under the terms of the GNU General Public License version 2 as
7633 + * published by the Free Software Foundation.
7634 + */
7635 +
7636 +#include "union.h"
7637 +
7638 +/* This file contains the routines for maintaining readdir state. */
7639 +
7640 +/*
7641 + * There are two structures here, rdstate which is a hash table
7642 + * of the second structure which is a filldir_node.
7643 + */
7644 +
7645 +/*
7646 + * This is a struct kmem_cache for filldir nodes, because we allocate a lot
7647 + * of them and they shouldn't waste memory.  If the node has a small name
7648 + * (as defined by the dentry structure), then we use an inline name to
7649 + * preserve kmalloc space.
7650 + */
7651 +static struct kmem_cache *unionfs_filldir_cachep;
7652 +
7653 +int unionfs_init_filldir_cache(void)
7654 +{
7655 +       unionfs_filldir_cachep =
7656 +               kmem_cache_create("unionfs_filldir",
7657 +                                 sizeof(struct filldir_node), 0,
7658 +                                 SLAB_RECLAIM_ACCOUNT, NULL);
7659 +
7660 +       return (unionfs_filldir_cachep ? 0 : -ENOMEM);
7661 +}
7662 +
7663 +void unionfs_destroy_filldir_cache(void)
7664 +{
7665 +       if (unionfs_filldir_cachep)
7666 +               kmem_cache_destroy(unionfs_filldir_cachep);
7667 +}
7668 +
7669 +/*
7670 + * This is a tuning parameter that tells us roughly how big to make the
7671 + * hash table in directory entries per page.  This isn't perfect, but
7672 + * at least we get a hash table size that shouldn't be too overloaded.
7673 + * The following averages are based on my home directory.
7674 + * 14.44693    Overall
7675 + * 12.29       Single Page Directories
7676 + * 117.93      Multi-page directories
7677 + */
7678 +#define DENTPAGE 4096
7679 +#define DENTPERONEPAGE 12
7680 +#define DENTPERPAGE 118
7681 +#define MINHASHSIZE 1
7682 +static int guesstimate_hash_size(struct inode *inode)
7683 +{
7684 +       struct inode *lower_inode;
7685 +       int bindex;
7686 +       int hashsize = MINHASHSIZE;
7687 +
7688 +       if (UNIONFS_I(inode)->hashsize > 0)
7689 +               return UNIONFS_I(inode)->hashsize;
7690 +
7691 +       for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
7692 +               lower_inode = unionfs_lower_inode_idx(inode, bindex);
7693 +               if (!lower_inode)
7694 +                       continue;
7695 +
7696 +               if (i_size_read(lower_inode) == DENTPAGE)
7697 +                       hashsize += DENTPERONEPAGE;
7698 +               else
7699 +                       hashsize += (i_size_read(lower_inode) / DENTPAGE) *
7700 +                               DENTPERPAGE;
7701 +       }
7702 +
7703 +       return hashsize;
7704 +}
7705 +
7706 +int init_rdstate(struct file *file)
7707 +{
7708 +       BUG_ON(sizeof(loff_t) !=
7709 +              (sizeof(unsigned int) + sizeof(unsigned int)));
7710 +       BUG_ON(UNIONFS_F(file)->rdstate != NULL);
7711 +
7712 +       UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_path.dentry->d_inode,
7713 +                                                fbstart(file));
7714 +
7715 +       return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM);
7716 +}
7717 +
7718 +struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos)
7719 +{
7720 +       struct unionfs_dir_state *rdstate = NULL;
7721 +       struct list_head *pos;
7722 +
7723 +       spin_lock(&UNIONFS_I(inode)->rdlock);
7724 +       list_for_each(pos, &UNIONFS_I(inode)->readdircache) {
7725 +               struct unionfs_dir_state *r =
7726 +                       list_entry(pos, struct unionfs_dir_state, cache);
7727 +               if (fpos == rdstate2offset(r)) {
7728 +                       UNIONFS_I(inode)->rdcount--;
7729 +                       list_del(&r->cache);
7730 +                       rdstate = r;
7731 +                       break;
7732 +               }
7733 +       }
7734 +       spin_unlock(&UNIONFS_I(inode)->rdlock);
7735 +       return rdstate;
7736 +}
7737 +
7738 +struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex)
7739 +{
7740 +       int i = 0;
7741 +       int hashsize;
7742 +       unsigned long mallocsize = sizeof(struct unionfs_dir_state);
7743 +       struct unionfs_dir_state *rdstate;
7744 +
7745 +       hashsize = guesstimate_hash_size(inode);
7746 +       mallocsize += hashsize * sizeof(struct list_head);
7747 +       mallocsize = __roundup_pow_of_two(mallocsize);
7748 +
7749 +       /* This should give us about 500 entries anyway. */
7750 +       if (mallocsize > PAGE_SIZE)
7751 +               mallocsize = PAGE_SIZE;
7752 +
7753 +       hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) /
7754 +               sizeof(struct list_head);
7755 +
7756 +       rdstate = kmalloc(mallocsize, GFP_KERNEL);
7757 +       if (unlikely(!rdstate))
7758 +               return NULL;
7759 +
7760 +       spin_lock(&UNIONFS_I(inode)->rdlock);
7761 +       if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1))
7762 +               UNIONFS_I(inode)->cookie = 1;
7763 +       else
7764 +               UNIONFS_I(inode)->cookie++;
7765 +
7766 +       rdstate->cookie = UNIONFS_I(inode)->cookie;
7767 +       spin_unlock(&UNIONFS_I(inode)->rdlock);
7768 +       rdstate->offset = 1;
7769 +       rdstate->access = jiffies;
7770 +       rdstate->bindex = bindex;
7771 +       rdstate->dirpos = 0;
7772 +       rdstate->hashentries = 0;
7773 +       rdstate->size = hashsize;
7774 +       for (i = 0; i < rdstate->size; i++)
7775 +               INIT_LIST_HEAD(&rdstate->list[i]);
7776 +
7777 +       return rdstate;
7778 +}
7779 +
7780 +static void free_filldir_node(struct filldir_node *node)
7781 +{
7782 +       if (node->namelen >= DNAME_INLINE_LEN_MIN)
7783 +               kfree(node->name);
7784 +       kmem_cache_free(unionfs_filldir_cachep, node);
7785 +}
7786 +
7787 +void free_rdstate(struct unionfs_dir_state *state)
7788 +{
7789 +       struct filldir_node *tmp;
7790 +       int i;
7791 +
7792 +       for (i = 0; i < state->size; i++) {
7793 +               struct list_head *head = &(state->list[i]);
7794 +               struct list_head *pos, *n;
7795 +
7796 +               /* traverse the list and deallocate space */
7797 +               list_for_each_safe(pos, n, head) {
7798 +                       tmp = list_entry(pos, struct filldir_node, file_list);
7799 +                       list_del(&tmp->file_list);
7800 +                       free_filldir_node(tmp);
7801 +               }
7802 +       }
7803 +
7804 +       kfree(state);
7805 +}
7806 +
7807 +struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
7808 +                                      const char *name, int namelen,
7809 +                                      int is_whiteout)
7810 +{
7811 +       int index;
7812 +       unsigned int hash;
7813 +       struct list_head *head;
7814 +       struct list_head *pos;
7815 +       struct filldir_node *cursor = NULL;
7816 +       int found = 0;
7817 +
7818 +       BUG_ON(namelen <= 0);
7819 +
7820 +       hash = full_name_hash(name, namelen);
7821 +       index = hash % rdstate->size;
7822 +
7823 +       head = &(rdstate->list[index]);
7824 +       list_for_each(pos, head) {
7825 +               cursor = list_entry(pos, struct filldir_node, file_list);
7826 +
7827 +               if (cursor->namelen == namelen && cursor->hash == hash &&
7828 +                   !strncmp(cursor->name, name, namelen)) {
7829 +                       /*
7830 +                        * a duplicate exists, and hence no need to create
7831 +                        * entry to the list
7832 +                        */
7833 +                       found = 1;
7834 +
7835 +                       /*
7836 +                        * if a duplicate is found in this branch, and is
7837 +                        * not due to the caller looking for an entry to
7838 +                        * whiteout, then the file system may be corrupted.
7839 +                        */
7840 +                       if (unlikely(!is_whiteout &&
7841 +                                    cursor->bindex == rdstate->bindex))
7842 +                               printk(KERN_ERR "unionfs: filldir: possible "
7843 +                                      "I/O error: a file is duplicated "
7844 +                                      "in the same branch %d: %s\n",
7845 +                                      rdstate->bindex, cursor->name);
7846 +                       break;
7847 +               }
7848 +       }
7849 +
7850 +       if (!found)
7851 +               cursor = NULL;
7852 +
7853 +       return cursor;
7854 +}
7855 +
7856 +int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
7857 +                    int namelen, int bindex, int whiteout)
7858 +{
7859 +       struct filldir_node *new;
7860 +       unsigned int hash;
7861 +       int index;
7862 +       int err = 0;
7863 +       struct list_head *head;
7864 +
7865 +       BUG_ON(namelen <= 0);
7866 +
7867 +       hash = full_name_hash(name, namelen);
7868 +       index = hash % rdstate->size;
7869 +       head = &(rdstate->list[index]);
7870 +
7871 +       new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL);
7872 +       if (unlikely(!new)) {
7873 +               err = -ENOMEM;
7874 +               goto out;
7875 +       }
7876 +
7877 +       INIT_LIST_HEAD(&new->file_list);
7878 +       new->namelen = namelen;
7879 +       new->hash = hash;
7880 +       new->bindex = bindex;
7881 +       new->whiteout = whiteout;
7882 +
7883 +       if (namelen < DNAME_INLINE_LEN_MIN) {
7884 +               new->name = new->iname;
7885 +       } else {
7886 +               new->name = kmalloc(namelen + 1, GFP_KERNEL);
7887 +               if (unlikely(!new->name)) {
7888 +                       kmem_cache_free(unionfs_filldir_cachep, new);
7889 +                       new = NULL;
7890 +                       goto out;
7891 +               }
7892 +       }
7893 +
7894 +       memcpy(new->name, name, namelen);
7895 +       new->name[namelen] = '\0';
7896 +
7897 +       rdstate->hashentries++;
7898 +
7899 +       list_add(&(new->file_list), head);
7900 +out:
7901 +       return err;
7902 +}
7903 diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c
7904 new file mode 100644
7905 index 0000000..cc16eb2
7906 --- /dev/null
7907 +++ b/fs/unionfs/rename.c
7908 @@ -0,0 +1,548 @@
7909 +/*
7910 + * Copyright (c) 2003-2007 Erez Zadok
7911 + * Copyright (c) 2003-2006 Charles P. Wright
7912 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
7913 + * Copyright (c) 2005-2006 Junjiro Okajima
7914 + * Copyright (c) 2005      Arun M. Krishnakumar
7915 + * Copyright (c) 2004-2006 David P. Quigley
7916 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
7917 + * Copyright (c) 2003      Puja Gupta
7918 + * Copyright (c) 2003      Harikesavan Krishnan
7919 + * Copyright (c) 2003-2007 Stony Brook University
7920 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
7921 + *
7922 + * This program is free software; you can redistribute it and/or modify
7923 + * it under the terms of the GNU General Public License version 2 as
7924 + * published by the Free Software Foundation.
7925 + */
7926 +
7927 +#include "union.h"
7928 +
7929 +static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7930 +                           struct inode *new_dir, struct dentry *new_dentry,
7931 +                           int bindex, struct dentry **wh_old)
7932 +{
7933 +       int err = 0;
7934 +       struct dentry *lower_old_dentry;
7935 +       struct dentry *lower_new_dentry;
7936 +       struct dentry *lower_old_dir_dentry;
7937 +       struct dentry *lower_new_dir_dentry;
7938 +       struct dentry *lower_wh_dentry;
7939 +       struct dentry *lower_wh_dir_dentry;
7940 +       struct dentry *trap;
7941 +       char *wh_name = NULL;
7942 +
7943 +       lower_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
7944 +       lower_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex);
7945 +
7946 +       if (!lower_new_dentry) {
7947 +               lower_new_dentry =
7948 +                       create_parents(new_dentry->d_parent->d_inode,
7949 +                                      new_dentry, new_dentry->d_name.name,
7950 +                                      bindex);
7951 +               if (IS_ERR(lower_new_dentry)) {
7952 +                       err = PTR_ERR(lower_new_dentry);
7953 +                       if (IS_COPYUP_ERR(err))
7954 +                               goto out;
7955 +                       printk(KERN_ERR "unionfs: error creating directory "
7956 +                              "tree for rename, bindex=%d err=%d\n",
7957 +                              bindex, err);
7958 +                       goto out;
7959 +               }
7960 +       }
7961 +
7962 +       wh_name = alloc_whname(new_dentry->d_name.name,
7963 +                              new_dentry->d_name.len);
7964 +       if (unlikely(IS_ERR(wh_name))) {
7965 +               err = PTR_ERR(wh_name);
7966 +               goto out;
7967 +       }
7968 +
7969 +       lower_wh_dentry = lookup_one_len(wh_name, lower_new_dentry->d_parent,
7970 +                                        new_dentry->d_name.len +
7971 +                                        UNIONFS_WHLEN);
7972 +       if (IS_ERR(lower_wh_dentry)) {
7973 +               err = PTR_ERR(lower_wh_dentry);
7974 +               goto out;
7975 +       }
7976 +
7977 +       if (lower_wh_dentry->d_inode) {
7978 +               /* get rid of the whiteout that is existing */
7979 +               if (lower_new_dentry->d_inode) {
7980 +                       printk(KERN_ERR "unionfs: both a whiteout and a "
7981 +                              "dentry exist when doing a rename!\n");
7982 +                       err = -EIO;
7983 +
7984 +                       dput(lower_wh_dentry);
7985 +                       goto out;
7986 +               }
7987 +
7988 +               lower_wh_dir_dentry = lock_parent_wh(lower_wh_dentry);
7989 +               err = is_robranch_super(old_dentry->d_sb, bindex);
7990 +               if (!err)
7991 +                       err = vfs_unlink(lower_wh_dir_dentry->d_inode,
7992 +                                        lower_wh_dentry);
7993 +
7994 +               dput(lower_wh_dentry);
7995 +               unlock_dir(lower_wh_dir_dentry);
7996 +               if (err)
7997 +                       goto out;
7998 +       } else {
7999 +               dput(lower_wh_dentry);
8000 +       }
8001 +
8002 +       err = is_robranch_super(old_dentry->d_sb, bindex);
8003 +       if (err)
8004 +               goto out;
8005 +
8006 +       dget(lower_old_dentry);
8007 +       dget(lower_new_dentry);
8008 +       lower_old_dir_dentry = dget_parent(lower_old_dentry);
8009 +       lower_new_dir_dentry = dget_parent(lower_new_dentry);
8010 +
8011 +       /*
8012 +        * ready to whiteout for old_dentry. caller will create the actual
8013 +        * whiteout, and must dput(*wh_old)
8014 +        */
8015 +       if (wh_old) {
8016 +               char *whname;
8017 +               whname = alloc_whname(old_dentry->d_name.name,
8018 +                                     old_dentry->d_name.len);
8019 +               err = PTR_ERR(whname);
8020 +               if (unlikely(IS_ERR(whname)))
8021 +                       goto out_dput;
8022 +               *wh_old = lookup_one_len(whname, lower_old_dir_dentry,
8023 +                                        old_dentry->d_name.len +
8024 +                                        UNIONFS_WHLEN);
8025 +               kfree(whname);
8026 +               err = PTR_ERR(*wh_old);
8027 +               if (IS_ERR(*wh_old)) {
8028 +                       *wh_old = NULL;
8029 +                       goto out_dput;
8030 +               }
8031 +       }
8032 +
8033 +       /* see Documentation/filesystems/unionfs/issues.txt */
8034 +       lockdep_off();
8035 +       trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
8036 +       /* source should not be ancenstor of target */
8037 +       if (trap == lower_old_dentry) {
8038 +               err = -EINVAL;
8039 +               goto out_err_unlock;
8040 +       }
8041 +       /* target should not be ancenstor of source */
8042 +       if (trap == lower_new_dentry) {
8043 +               err = -ENOTEMPTY;
8044 +               goto out_err_unlock;
8045 +       }
8046 +       err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
8047 +                        lower_new_dir_dentry->d_inode, lower_new_dentry);
8048 +out_err_unlock:
8049 +       if (!err) {
8050 +               /* update parent dir times */
8051 +               fsstack_copy_attr_times(old_dir, lower_old_dir_dentry->d_inode);
8052 +               fsstack_copy_attr_times(new_dir, lower_new_dir_dentry->d_inode);
8053 +       }
8054 +       unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
8055 +       lockdep_on();
8056 +
8057 +out_dput:
8058 +       dput(lower_old_dir_dentry);
8059 +       dput(lower_new_dir_dentry);
8060 +       dput(lower_old_dentry);
8061 +       dput(lower_new_dentry);
8062 +
8063 +out:
8064 +       if (!err) {
8065 +               /* Fixup the new_dentry. */
8066 +               if (bindex < dbstart(new_dentry))
8067 +                       set_dbstart(new_dentry, bindex);
8068 +               else if (bindex > dbend(new_dentry))
8069 +                       set_dbend(new_dentry, bindex);
8070 +       }
8071 +
8072 +       kfree(wh_name);
8073 +
8074 +       return err;
8075 +}
8076 +
8077 +/*
8078 + * Main rename code.  This is sufficiently complex, that it's documented in
8079 + * Documentation/filesystems/unionfs/rename.txt.  This routine calls
8080 + * __unionfs_rename() above to perform some of the work.
8081 + */
8082 +static int do_unionfs_rename(struct inode *old_dir,
8083 +                            struct dentry *old_dentry,
8084 +                            struct inode *new_dir,
8085 +                            struct dentry *new_dentry)
8086 +{
8087 +       int err = 0;
8088 +       int bindex, bwh_old;
8089 +       int old_bstart, old_bend;
8090 +       int new_bstart, new_bend;
8091 +       int do_copyup = -1;
8092 +       struct dentry *parent_dentry;
8093 +       int local_err = 0;
8094 +       int eio = 0;
8095 +       int revert = 0;
8096 +       struct dentry *wh_old = NULL;
8097 +
8098 +       old_bstart = dbstart(old_dentry);
8099 +       bwh_old = old_bstart;
8100 +       old_bend = dbend(old_dentry);
8101 +       parent_dentry = old_dentry->d_parent;
8102 +
8103 +       new_bstart = dbstart(new_dentry);
8104 +       new_bend = dbend(new_dentry);
8105 +
8106 +       /* Rename source to destination. */
8107 +       err = __unionfs_rename(old_dir, old_dentry, new_dir, new_dentry,
8108 +                              old_bstart, &wh_old);
8109 +       if (err) {
8110 +               if (!IS_COPYUP_ERR(err))
8111 +                       goto out;
8112 +               do_copyup = old_bstart - 1;
8113 +       } else {
8114 +               revert = 1;
8115 +       }
8116 +
8117 +       /*
8118 +        * Unlink all instances of destination that exist to the left of
8119 +        * bstart of source. On error, revert back, goto out.
8120 +        */
8121 +       for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) {
8122 +               struct dentry *unlink_dentry;
8123 +               struct dentry *unlink_dir_dentry;
8124 +
8125 +               unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
8126 +               if (!unlink_dentry)
8127 +                       continue;
8128 +
8129 +               unlink_dir_dentry = lock_parent(unlink_dentry);
8130 +               err = is_robranch_super(old_dir->i_sb, bindex);
8131 +               if (!err)
8132 +                       err = vfs_unlink(unlink_dir_dentry->d_inode,
8133 +                                        unlink_dentry);
8134 +
8135 +               fsstack_copy_attr_times(new_dentry->d_parent->d_inode,
8136 +                                       unlink_dir_dentry->d_inode);
8137 +               /* propagate number of hard-links */
8138 +               new_dentry->d_parent->d_inode->i_nlink =
8139 +                       unionfs_get_nlinks(new_dentry->d_parent->d_inode);
8140 +
8141 +               unlock_dir(unlink_dir_dentry);
8142 +               if (!err) {
8143 +                       if (bindex != new_bstart) {
8144 +                               dput(unlink_dentry);
8145 +                               unionfs_set_lower_dentry_idx(new_dentry,
8146 +                                                            bindex, NULL);
8147 +                       }
8148 +               } else if (IS_COPYUP_ERR(err)) {
8149 +                       do_copyup = bindex - 1;
8150 +               } else if (revert) {
8151 +                       dput(wh_old);
8152 +                       goto revert;
8153 +               }
8154 +       }
8155 +
8156 +       if (do_copyup != -1) {
8157 +               for (bindex = do_copyup; bindex >= 0; bindex--) {
8158 +                       /*
8159 +                        * copyup the file into some left directory, so that
8160 +                        * you can rename it
8161 +                        */
8162 +                       err = copyup_dentry(old_dentry->d_parent->d_inode,
8163 +                                           old_dentry, old_bstart, bindex,
8164 +                                           old_dentry->d_name.name,
8165 +                                           old_dentry->d_name.len, NULL,
8166 +                                           i_size_read(old_dentry->d_inode));
8167 +                       /* if copyup failed, try next branch to the left */
8168 +                       if (err)
8169 +                               continue;
8170 +                       dput(wh_old);
8171 +                       bwh_old = bindex;
8172 +                       err = __unionfs_rename(old_dir, old_dentry,
8173 +                                              new_dir, new_dentry,
8174 +                                              bindex, &wh_old);
8175 +                       break;
8176 +               }
8177 +       }
8178 +
8179 +       /* make it opaque */
8180 +       if (S_ISDIR(old_dentry->d_inode->i_mode)) {
8181 +               err = make_dir_opaque(old_dentry, dbstart(old_dentry));
8182 +               if (err)
8183 +                       goto revert;
8184 +       }
8185 +
8186 +       /*
8187 +        * Create whiteout for source, only if:
8188 +        * (1) There is more than one underlying instance of source.
8189 +        * (2) We did a copy_up
8190 +        */
8191 +       if ((old_bstart != old_bend) || (do_copyup != -1)) {
8192 +               struct dentry *lower_parent;
8193 +               struct nameidata nd;
8194 +               if (!wh_old || wh_old->d_inode || bwh_old < 0) {
8195 +                       printk(KERN_ERR "unionfs: rename error "
8196 +                              "(wh_old=%p/%p bwh_old=%d)\n", wh_old,
8197 +                              (wh_old ? wh_old->d_inode : NULL), bwh_old);
8198 +                       err = -EIO;
8199 +                       goto out;
8200 +               }
8201 +               err = init_lower_nd(&nd, LOOKUP_CREATE);
8202 +               if (unlikely(err < 0))
8203 +                       goto out;
8204 +               lower_parent = lock_parent_wh(wh_old);
8205 +               local_err = vfs_create(lower_parent->d_inode, wh_old, S_IRUGO,
8206 +                                      &nd);
8207 +               unlock_dir(lower_parent);
8208 +               if (!local_err) {
8209 +                       set_dbopaque(old_dentry, bwh_old);
8210 +               } else {
8211 +                       /*
8212 +                        * we can't fix anything now, so we cop-out and use
8213 +                        * -EIO.
8214 +                        */
8215 +                       printk(KERN_ERR "unionfs: can't create a whiteout for "
8216 +                              "the source in rename!\n");
8217 +                       err = -EIO;
8218 +               }
8219 +               release_lower_nd(&nd, local_err);
8220 +       }
8221 +
8222 +out:
8223 +       dput(wh_old);
8224 +       return err;
8225 +
8226 +revert:
8227 +       /* Do revert here. */
8228 +       local_err = unionfs_refresh_lower_dentry(new_dentry, old_bstart);
8229 +       if (local_err) {
8230 +               printk(KERN_ERR "unionfs: revert failed in rename: "
8231 +                      "the new refresh failed\n");
8232 +               eio = -EIO;
8233 +       }
8234 +
8235 +       local_err = unionfs_refresh_lower_dentry(old_dentry, old_bstart);
8236 +       if (local_err) {
8237 +               printk(KERN_ERR "unionfs: revert failed in rename: "
8238 +                      "the old refresh failed\n");
8239 +               eio = -EIO;
8240 +               goto revert_out;
8241 +       }
8242 +
8243 +       if (!unionfs_lower_dentry_idx(new_dentry, bindex) ||
8244 +           !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) {
8245 +               printk(KERN_ERR "unionfs: revert failed in rename: "
8246 +                      "the object disappeared from under us!\n");
8247 +               eio = -EIO;
8248 +               goto revert_out;
8249 +       }
8250 +
8251 +       if (unionfs_lower_dentry_idx(old_dentry, bindex) &&
8252 +           unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) {
8253 +               printk(KERN_ERR "unionfs: revert failed in rename: "
8254 +                      "the object was created underneath us!\n");
8255 +               eio = -EIO;
8256 +               goto revert_out;
8257 +       }
8258 +
8259 +       local_err = __unionfs_rename(new_dir, new_dentry,
8260 +                                    old_dir, old_dentry, old_bstart, NULL);
8261 +
8262 +       /* If we can't fix it, then we cop-out with -EIO. */
8263 +       if (local_err) {
8264 +               printk(KERN_ERR "unionfs: revert failed in rename!\n");
8265 +               eio = -EIO;
8266 +       }
8267 +
8268 +       local_err = unionfs_refresh_lower_dentry(new_dentry, bindex);
8269 +       if (local_err)
8270 +               eio = -EIO;
8271 +       local_err = unionfs_refresh_lower_dentry(old_dentry, bindex);
8272 +       if (local_err)
8273 +               eio = -EIO;
8274 +
8275 +revert_out:
8276 +       if (eio)
8277 +               err = eio;
8278 +       return err;
8279 +}
8280 +
8281 +static struct dentry *lookup_whiteout(struct dentry *dentry)
8282 +{
8283 +       char *whname;
8284 +       int bindex = -1, bstart = -1, bend = -1;
8285 +       struct dentry *parent, *lower_parent, *wh_dentry;
8286 +
8287 +       whname = alloc_whname(dentry->d_name.name, dentry->d_name.len);
8288 +       if (unlikely(IS_ERR(whname)))
8289 +               return (void *)whname;
8290 +
8291 +       parent = dget_parent(dentry);
8292 +       unionfs_lock_dentry(parent, UNIONFS_DMUTEX_WHITEOUT);
8293 +       bstart = dbstart(parent);
8294 +       bend = dbend(parent);
8295 +       wh_dentry = ERR_PTR(-ENOENT);
8296 +       for (bindex = bstart; bindex <= bend; bindex++) {
8297 +               lower_parent = unionfs_lower_dentry_idx(parent, bindex);
8298 +               if (!lower_parent)
8299 +                       continue;
8300 +               wh_dentry = lookup_one_len(whname, lower_parent,
8301 +                                          dentry->d_name.len + UNIONFS_WHLEN);
8302 +               if (IS_ERR(wh_dentry))
8303 +                       continue;
8304 +               if (wh_dentry->d_inode)
8305 +                       break;
8306 +               dput(wh_dentry);
8307 +               wh_dentry = ERR_PTR(-ENOENT);
8308 +       }
8309 +       unionfs_unlock_dentry(parent);
8310 +       dput(parent);
8311 +       kfree(whname);
8312 +       return wh_dentry;
8313 +}
8314 +
8315 +/*
8316 + * We can't copyup a directory, because it may involve huge numbers of
8317 + * children, etc.  Doing that in the kernel would be bad, so instead we
8318 + * return EXDEV to the user-space utility that caused this, and let the
8319 + * user-space recurse and ask us to copy up each file separately.
8320 + */
8321 +static int may_rename_dir(struct dentry *dentry)
8322 +{
8323 +       int err, bstart;
8324 +
8325 +       err = check_empty(dentry, NULL);
8326 +       if (err == -ENOTEMPTY) {
8327 +               if (is_robranch(dentry))
8328 +                       return -EXDEV;
8329 +       } else if (err) {
8330 +               return err;
8331 +       }
8332 +
8333 +       bstart = dbstart(dentry);
8334 +       if (dbend(dentry) == bstart || dbopaque(dentry) == bstart)
8335 +               return 0;
8336 +
8337 +       set_dbstart(dentry, bstart + 1);
8338 +       err = check_empty(dentry, NULL);
8339 +       set_dbstart(dentry, bstart);
8340 +       if (err == -ENOTEMPTY)
8341 +               err = -EXDEV;
8342 +       return err;
8343 +}
8344 +
8345 +int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8346 +                  struct inode *new_dir, struct dentry *new_dentry)
8347 +{
8348 +       int err = 0;
8349 +       struct dentry *wh_dentry;
8350 +
8351 +       unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD);
8352 +       unionfs_double_lock_dentry(old_dentry, new_dentry);
8353 +
8354 +       if (unlikely(!__unionfs_d_revalidate_chain(old_dentry, NULL, false))) {
8355 +               err = -ESTALE;
8356 +               goto out;
8357 +       }
8358 +       if (unlikely(!d_deleted(new_dentry) && new_dentry->d_inode &&
8359 +                    !__unionfs_d_revalidate_chain(new_dentry, NULL, false))) {
8360 +               err = -ESTALE;
8361 +               goto out;
8362 +       }
8363 +
8364 +       if (!S_ISDIR(old_dentry->d_inode->i_mode))
8365 +               err = unionfs_partial_lookup(old_dentry);
8366 +       else
8367 +               err = may_rename_dir(old_dentry);
8368 +
8369 +       if (err)
8370 +               goto out;
8371 +
8372 +       err = unionfs_partial_lookup(new_dentry);
8373 +       if (err)
8374 +               goto out;
8375 +
8376 +       /*
8377 +        * if new_dentry is already lower because of whiteout,
8378 +        * simply override it even if the whited-out dir is not empty.
8379 +        */
8380 +       wh_dentry = lookup_whiteout(new_dentry);
8381 +       if (!IS_ERR(wh_dentry)) {
8382 +               dput(wh_dentry);
8383 +       } else if (new_dentry->d_inode) {
8384 +               if (S_ISDIR(old_dentry->d_inode->i_mode) !=
8385 +                   S_ISDIR(new_dentry->d_inode->i_mode)) {
8386 +                       err = S_ISDIR(old_dentry->d_inode->i_mode) ?
8387 +                               -ENOTDIR : -EISDIR;
8388 +                       goto out;
8389 +               }
8390 +
8391 +               if (S_ISDIR(new_dentry->d_inode->i_mode)) {
8392 +                       struct unionfs_dir_state *namelist = NULL;
8393 +                       /* check if this unionfs directory is empty or not */
8394 +                       err = check_empty(new_dentry, &namelist);
8395 +                       if (err)
8396 +                               goto out;
8397 +
8398 +                       if (!is_robranch(new_dentry))
8399 +                               err = delete_whiteouts(new_dentry,
8400 +                                                      dbstart(new_dentry),
8401 +                                                      namelist);
8402 +
8403 +                       free_rdstate(namelist);
8404 +
8405 +                       if (err)
8406 +                               goto out;
8407 +               }
8408 +       }
8409 +
8410 +       err = do_unionfs_rename(old_dir, old_dentry, new_dir, new_dentry);
8411 +       if (err)
8412 +               goto out;
8413 +
8414 +       /*
8415 +        * force re-lookup since the dir on ro branch is not renamed, and
8416 +        * lower dentries still indicate the un-renamed ones.
8417 +        */
8418 +       if (S_ISDIR(old_dentry->d_inode->i_mode))
8419 +               atomic_dec(&UNIONFS_D(old_dentry)->generation);
8420 +       else
8421 +               unionfs_postcopyup_release(old_dentry);
8422 +       if (new_dentry->d_inode && !S_ISDIR(new_dentry->d_inode->i_mode)) {
8423 +               unionfs_postcopyup_release(new_dentry);
8424 +               unionfs_postcopyup_setmnt(new_dentry);
8425 +               if (!unionfs_lower_inode(new_dentry->d_inode)) {
8426 +                       /*
8427 +                        * If we get here, it means that no copyup was
8428 +                        * needed, and that a file by the old name already
8429 +                        * existing on the destination branch; that file got
8430 +                        * renamed earlier in this function, so all we need
8431 +                        * to do here is set the lower inode.
8432 +                        */
8433 +                       struct inode *inode;
8434 +                       inode = unionfs_lower_inode(old_dentry->d_inode);
8435 +                       igrab(inode);
8436 +                       unionfs_set_lower_inode_idx(new_dentry->d_inode,
8437 +                                                   dbstart(new_dentry),
8438 +                                                   inode);
8439 +               }
8440 +       }
8441 +       /* if all of this renaming succeeded, update our times */
8442 +       unionfs_copy_attr_times(old_dentry->d_inode);
8443 +       unionfs_copy_attr_times(new_dentry->d_inode);
8444 +       unionfs_check_inode(old_dir);
8445 +       unionfs_check_inode(new_dir);
8446 +       unionfs_check_dentry(old_dentry);
8447 +       unionfs_check_dentry(new_dentry);
8448 +
8449 +out:
8450 +       if (err)                /* clear the new_dentry stuff created */
8451 +               d_drop(new_dentry);
8452 +       unionfs_unlock_dentry(new_dentry);
8453 +       unionfs_unlock_dentry(old_dentry);
8454 +       unionfs_read_unlock(old_dentry->d_sb);
8455 +       return err;
8456 +}
8457 diff --git a/fs/unionfs/sioq.c b/fs/unionfs/sioq.c
8458 new file mode 100644
8459 index 0000000..2a8c88e
8460 --- /dev/null
8461 +++ b/fs/unionfs/sioq.c
8462 @@ -0,0 +1,119 @@
8463 +/*
8464 + * Copyright (c) 2006-2007 Erez Zadok
8465 + * Copyright (c) 2006      Charles P. Wright
8466 + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8467 + * Copyright (c) 2006      Junjiro Okajima
8468 + * Copyright (c) 2006      David P. Quigley
8469 + * Copyright (c) 2006-2007 Stony Brook University
8470 + * Copyright (c) 2006-2007 The Research Foundation of SUNY
8471 + *
8472 + * This program is free software; you can redistribute it and/or modify
8473 + * it under the terms of the GNU General Public License version 2 as
8474 + * published by the Free Software Foundation.
8475 + */
8476 +
8477 +#include "union.h"
8478 +
8479 +/*
8480 + * Super-user IO work Queue - sometimes we need to perform actions which
8481 + * would fail due to the unix permissions on the parent directory (e.g.,
8482 + * rmdir a directory which appears empty, but in reality contains
8483 + * whiteouts).
8484 + */
8485 +
8486 +static struct workqueue_struct *superio_workqueue;
8487 +
8488 +int __init init_sioq(void)
8489 +{
8490 +       int err;
8491 +
8492 +       superio_workqueue = create_workqueue("unionfs_siod");
8493 +       if (!IS_ERR(superio_workqueue))
8494 +               return 0;
8495 +
8496 +       err = PTR_ERR(superio_workqueue);
8497 +       printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err);
8498 +       superio_workqueue = NULL;
8499 +       return err;
8500 +}
8501 +
8502 +void stop_sioq(void)
8503 +{
8504 +       if (superio_workqueue)
8505 +               destroy_workqueue(superio_workqueue);
8506 +}
8507 +
8508 +void run_sioq(work_func_t func, struct sioq_args *args)
8509 +{
8510 +       INIT_WORK(&args->work, func);
8511 +
8512 +       init_completion(&args->comp);
8513 +       while (!queue_work(superio_workqueue, &args->work)) {
8514 +               /* TODO: do accounting if needed */
8515 +               schedule();
8516 +       }
8517 +       wait_for_completion(&args->comp);
8518 +}
8519 +
8520 +void __unionfs_create(struct work_struct *work)
8521 +{
8522 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8523 +       struct create_args *c = &args->create;
8524 +
8525 +       args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd);
8526 +       complete(&args->comp);
8527 +}
8528 +
8529 +void __unionfs_mkdir(struct work_struct *work)
8530 +{
8531 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8532 +       struct mkdir_args *m = &args->mkdir;
8533 +
8534 +       args->err = vfs_mkdir(m->parent, m->dentry, m->mode);
8535 +       complete(&args->comp);
8536 +}
8537 +
8538 +void __unionfs_mknod(struct work_struct *work)
8539 +{
8540 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8541 +       struct mknod_args *m = &args->mknod;
8542 +
8543 +       args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev);
8544 +       complete(&args->comp);
8545 +}
8546 +
8547 +void __unionfs_symlink(struct work_struct *work)
8548 +{
8549 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8550 +       struct symlink_args *s = &args->symlink;
8551 +
8552 +       args->err = vfs_symlink(s->parent, s->dentry, s->symbuf, s->mode);
8553 +       complete(&args->comp);
8554 +}
8555 +
8556 +void __unionfs_unlink(struct work_struct *work)
8557 +{
8558 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8559 +       struct unlink_args *u = &args->unlink;
8560 +
8561 +       args->err = vfs_unlink(u->parent, u->dentry);
8562 +       complete(&args->comp);
8563 +}
8564 +
8565 +void __delete_whiteouts(struct work_struct *work)
8566 +{
8567 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8568 +       struct deletewh_args *d = &args->deletewh;
8569 +
8570 +       args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist);
8571 +       complete(&args->comp);
8572 +}
8573 +
8574 +void __is_opaque_dir(struct work_struct *work)
8575 +{
8576 +       struct sioq_args *args = container_of(work, struct sioq_args, work);
8577 +
8578 +       args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry,
8579 +                                  sizeof(UNIONFS_DIR_OPAQUE) - 1);
8580 +       complete(&args->comp);
8581 +}
8582 diff --git a/fs/unionfs/sioq.h b/fs/unionfs/sioq.h
8583 new file mode 100644
8584 index 0000000..afb71ee
8585 --- /dev/null
8586 +++ b/fs/unionfs/sioq.h
8587 @@ -0,0 +1,92 @@
8588 +/*
8589 + * Copyright (c) 2006-2007 Erez Zadok
8590 + * Copyright (c) 2006      Charles P. Wright
8591 + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
8592 + * Copyright (c) 2006      Junjiro Okajima
8593 + * Copyright (c) 2006      David P. Quigley
8594 + * Copyright (c) 2006-2007 Stony Brook University
8595 + * Copyright (c) 2006-2007 The Research Foundation of SUNY
8596 + *
8597 + * This program is free software; you can redistribute it and/or modify
8598 + * it under the terms of the GNU General Public License version 2 as
8599 + * published by the Free Software Foundation.
8600 + */
8601 +
8602 +#ifndef _SIOQ_H
8603 +#define _SIOQ_H
8604 +
8605 +struct deletewh_args {
8606 +       struct unionfs_dir_state *namelist;
8607 +       struct dentry *dentry;
8608 +       int bindex;
8609 +};
8610 +
8611 +struct is_opaque_args {
8612 +       struct dentry *dentry;
8613 +};
8614 +
8615 +struct create_args {
8616 +       struct inode *parent;
8617 +       struct dentry *dentry;
8618 +       umode_t mode;
8619 +       struct nameidata *nd;
8620 +};
8621 +
8622 +struct mkdir_args {
8623 +       struct inode *parent;
8624 +       struct dentry *dentry;
8625 +       umode_t mode;
8626 +};
8627 +
8628 +struct mknod_args {
8629 +       struct inode *parent;
8630 +       struct dentry *dentry;
8631 +       umode_t mode;
8632 +       dev_t dev;
8633 +};
8634 +
8635 +struct symlink_args {
8636 +       struct inode *parent;
8637 +       struct dentry *dentry;
8638 +       char *symbuf;
8639 +       umode_t mode;
8640 +};
8641 +
8642 +struct unlink_args {
8643 +       struct inode *parent;
8644 +       struct dentry *dentry;
8645 +};
8646 +
8647 +
8648 +struct sioq_args {
8649 +       struct completion comp;
8650 +       struct work_struct work;
8651 +       int err;
8652 +       void *ret;
8653 +
8654 +       union {
8655 +               struct deletewh_args deletewh;
8656 +               struct is_opaque_args is_opaque;
8657 +               struct create_args create;
8658 +               struct mkdir_args mkdir;
8659 +               struct mknod_args mknod;
8660 +               struct symlink_args symlink;
8661 +               struct unlink_args unlink;
8662 +       };
8663 +};
8664 +
8665 +/* Extern definitions for SIOQ functions */
8666 +extern int __init init_sioq(void);
8667 +extern void stop_sioq(void);
8668 +extern void run_sioq(work_func_t func, struct sioq_args *args);
8669 +
8670 +/* Extern definitions for our privilege escalation helpers */
8671 +extern void __unionfs_create(struct work_struct *work);
8672 +extern void __unionfs_mkdir(struct work_struct *work);
8673 +extern void __unionfs_mknod(struct work_struct *work);
8674 +extern void __unionfs_symlink(struct work_struct *work);
8675 +extern void __unionfs_unlink(struct work_struct *work);
8676 +extern void __delete_whiteouts(struct work_struct *work);
8677 +extern void __is_opaque_dir(struct work_struct *work);
8678 +
8679 +#endif /* not _SIOQ_H */
8680 diff --git a/fs/unionfs/subr.c b/fs/unionfs/subr.c
8681 new file mode 100644
8682 index 0000000..1a40f63
8683 --- /dev/null
8684 +++ b/fs/unionfs/subr.c
8685 @@ -0,0 +1,298 @@
8686 +/*
8687 + * Copyright (c) 2003-2007 Erez Zadok
8688 + * Copyright (c) 2003-2006 Charles P. Wright
8689 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8690 + * Copyright (c) 2005-2006 Junjiro Okajima
8691 + * Copyright (c) 2005      Arun M. Krishnakumar
8692 + * Copyright (c) 2004-2006 David P. Quigley
8693 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8694 + * Copyright (c) 2003      Puja Gupta
8695 + * Copyright (c) 2003      Harikesavan Krishnan
8696 + * Copyright (c) 2003-2007 Stony Brook University
8697 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
8698 + *
8699 + * This program is free software; you can redistribute it and/or modify
8700 + * it under the terms of the GNU General Public License version 2 as
8701 + * published by the Free Software Foundation.
8702 + */
8703 +
8704 +#include "union.h"
8705 +
8706 +/*
8707 + * Pass an unionfs dentry and an index.  It will try to create a whiteout
8708 + * for the filename in dentry, and will try in branch 'index'.  On error,
8709 + * it will proceed to a branch to the left.
8710 + */
8711 +int create_whiteout(struct dentry *dentry, int start)
8712 +{
8713 +       int bstart, bend, bindex;
8714 +       struct dentry *lower_dir_dentry;
8715 +       struct dentry *lower_dentry;
8716 +       struct dentry *lower_wh_dentry;
8717 +       struct nameidata nd;
8718 +       char *name = NULL;
8719 +       int err = -EINVAL;
8720 +
8721 +       verify_locked(dentry);
8722 +
8723 +       bstart = dbstart(dentry);
8724 +       bend = dbend(dentry);
8725 +
8726 +       /* create dentry's whiteout equivalent */
8727 +       name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
8728 +       if (unlikely(IS_ERR(name))) {
8729 +               err = PTR_ERR(name);
8730 +               goto out;
8731 +       }
8732 +
8733 +       for (bindex = start; bindex >= 0; bindex--) {
8734 +               lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
8735 +
8736 +               if (!lower_dentry) {
8737 +                       /*
8738 +                        * if lower dentry is not present, create the
8739 +                        * entire lower dentry directory structure and go
8740 +                        * ahead.  Since we want to just create whiteout, we
8741 +                        * only want the parent dentry, and hence get rid of
8742 +                        * this dentry.
8743 +                        */
8744 +                       lower_dentry = create_parents(dentry->d_inode,
8745 +                                                     dentry,
8746 +                                                     dentry->d_name.name,
8747 +                                                     bindex);
8748 +                       if (!lower_dentry || IS_ERR(lower_dentry)) {
8749 +                               int ret = PTR_ERR(lower_dentry);
8750 +                               if (!IS_COPYUP_ERR(ret))
8751 +                                       printk(KERN_ERR
8752 +                                              "unionfs: create_parents for "
8753 +                                              "whiteout failed: bindex=%d "
8754 +                                              "err=%d\n", bindex, ret);
8755 +                               continue;
8756 +                       }
8757 +               }
8758 +
8759 +               lower_wh_dentry =
8760 +                       lookup_one_len(name, lower_dentry->d_parent,
8761 +                                      dentry->d_name.len + UNIONFS_WHLEN);
8762 +               if (IS_ERR(lower_wh_dentry))
8763 +                       continue;
8764 +
8765 +               /*
8766 +                * The whiteout already exists. This used to be impossible,
8767 +                * but now is possible because of opaqueness.
8768 +                */
8769 +               if (lower_wh_dentry->d_inode) {
8770 +                       dput(lower_wh_dentry);
8771 +                       err = 0;
8772 +                       goto out;
8773 +               }
8774 +
8775 +               err = init_lower_nd(&nd, LOOKUP_CREATE);
8776 +               if (unlikely(err < 0))
8777 +                       goto out;
8778 +               lower_dir_dentry = lock_parent_wh(lower_wh_dentry);
8779 +               err = is_robranch_super(dentry->d_sb, bindex);
8780 +               if (!err)
8781 +                       err = vfs_create(lower_dir_dentry->d_inode,
8782 +                                        lower_wh_dentry,
8783 +                                        ~current->fs->umask & S_IRWXUGO,
8784 +                                        &nd);
8785 +               unlock_dir(lower_dir_dentry);
8786 +               dput(lower_wh_dentry);
8787 +               release_lower_nd(&nd, err);
8788 +
8789 +               if (!err || !IS_COPYUP_ERR(err))
8790 +                       break;
8791 +       }
8792 +
8793 +       /* set dbopaque so that lookup will not proceed after this branch */
8794 +       if (!err)
8795 +               set_dbopaque(dentry, bindex);
8796 +
8797 +out:
8798 +       kfree(name);
8799 +       return err;
8800 +}
8801 +
8802 +/*
8803 + * This is a helper function for rename, which ends up with hosed over
8804 + * dentries when it needs to revert.
8805 + */
8806 +int unionfs_refresh_lower_dentry(struct dentry *dentry, int bindex)
8807 +{
8808 +       struct dentry *lower_dentry;
8809 +       struct dentry *lower_parent;
8810 +       int err = 0;
8811 +
8812 +       verify_locked(dentry);
8813 +
8814 +       unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_CHILD);
8815 +       lower_parent = unionfs_lower_dentry_idx(dentry->d_parent, bindex);
8816 +       unionfs_unlock_dentry(dentry->d_parent);
8817 +
8818 +       BUG_ON(!S_ISDIR(lower_parent->d_inode->i_mode));
8819 +
8820 +       lower_dentry = lookup_one_len(dentry->d_name.name, lower_parent,
8821 +                                     dentry->d_name.len);
8822 +       if (IS_ERR(lower_dentry)) {
8823 +               err = PTR_ERR(lower_dentry);
8824 +               goto out;
8825 +       }
8826 +
8827 +       dput(unionfs_lower_dentry_idx(dentry, bindex));
8828 +       iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
8829 +       unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL);
8830 +
8831 +       if (!lower_dentry->d_inode) {
8832 +               dput(lower_dentry);
8833 +               unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
8834 +       } else {
8835 +               unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry);
8836 +               unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
8837 +                                           igrab(lower_dentry->d_inode));
8838 +       }
8839 +
8840 +out:
8841 +       return err;
8842 +}
8843 +
8844 +int make_dir_opaque(struct dentry *dentry, int bindex)
8845 +{
8846 +       int err = 0;
8847 +       struct dentry *lower_dentry, *diropq;
8848 +       struct inode *lower_dir;
8849 +       struct nameidata nd;
8850 +       kernel_cap_t orig_cap;
8851 +
8852 +       /*
8853 +        * Opaque directory whiteout markers are special files (like regular
8854 +        * whiteouts), and should appear to the users as if they don't
8855 +        * exist.  They should be created/deleted regardless of directory
8856 +        * search/create permissions, but only for the duration of this
8857 +        * creation of the .wh.__dir_opaque: file.  Note, this does not
8858 +        * circumvent normal ->permission).
8859 +        */
8860 +       orig_cap = current->cap_effective;
8861 +       cap_raise(current->cap_effective, CAP_DAC_READ_SEARCH);
8862 +       cap_raise(current->cap_effective, CAP_DAC_OVERRIDE);
8863 +
8864 +       lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
8865 +       lower_dir = lower_dentry->d_inode;
8866 +       BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) ||
8867 +              !S_ISDIR(lower_dir->i_mode));
8868 +
8869 +       mutex_lock(&lower_dir->i_mutex);
8870 +       diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry,
8871 +                               sizeof(UNIONFS_DIR_OPAQUE) - 1);
8872 +       if (IS_ERR(diropq)) {
8873 +               err = PTR_ERR(diropq);
8874 +               goto out;
8875 +       }
8876 +
8877 +       err = init_lower_nd(&nd, LOOKUP_CREATE);
8878 +       if (unlikely(err < 0))
8879 +               goto out;
8880 +       if (!diropq->d_inode)
8881 +               err = vfs_create(lower_dir, diropq, S_IRUGO, &nd);
8882 +       if (!err)
8883 +               set_dbopaque(dentry, bindex);
8884 +       release_lower_nd(&nd, err);
8885 +
8886 +       dput(diropq);
8887 +
8888 +out:
8889 +       mutex_unlock(&lower_dir->i_mutex);
8890 +       current->cap_effective = orig_cap;
8891 +       return err;
8892 +}
8893 +
8894 +/*
8895 + * returns the right n_link value based on the inode type
8896 + */
8897 +int unionfs_get_nlinks(const struct inode *inode)
8898 +{
8899 +       /* don't bother to do all the work since we're unlinked */
8900 +       if (inode->i_nlink == 0)
8901 +               return 0;
8902 +
8903 +       if (!S_ISDIR(inode->i_mode))
8904 +               return unionfs_lower_inode(inode)->i_nlink;
8905 +
8906 +       /*
8907 +        * For directories, we return 1. The only place that could cares
8908 +        * about links is readdir, and there's d_type there so even that
8909 +        * doesn't matter.
8910 +        */
8911 +       return 1;
8912 +}
8913 +
8914 +/* construct whiteout filename */
8915 +char *alloc_whname(const char *name, int len)
8916 +{
8917 +       char *buf;
8918 +
8919 +       buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL);
8920 +       if (unlikely(!buf))
8921 +               return ERR_PTR(-ENOMEM);
8922 +
8923 +       strcpy(buf, UNIONFS_WHPFX);
8924 +       strlcat(buf, name, len + UNIONFS_WHLEN + 1);
8925 +
8926 +       return buf;
8927 +}
8928 +
8929 +/* copy a/m/ctime from the lower branch with the newest times */
8930 +void unionfs_copy_attr_times(struct inode *upper)
8931 +{
8932 +       int bindex;
8933 +       struct inode *lower;
8934 +
8935 +       if (!upper)
8936 +               return;
8937 +       if (ibstart(upper) < 0) {
8938 +#ifdef CONFIG_UNION_FS_DEBUG
8939 +               WARN_ON(ibstart(upper) < 0);
8940 +#endif /* CONFIG_UNION_FS_DEBUG */
8941 +               return;
8942 +       }
8943 +       for (bindex = ibstart(upper); bindex <= ibend(upper); bindex++) {
8944 +               lower = unionfs_lower_inode_idx(upper, bindex);
8945 +               if (!lower)
8946 +                       continue; /* not all lower dir objects may exist */
8947 +               if (unlikely(timespec_compare(&upper->i_mtime,
8948 +                                             &lower->i_mtime) < 0))
8949 +                       upper->i_mtime = lower->i_mtime;
8950 +               if (unlikely(timespec_compare(&upper->i_ctime,
8951 +                                             &lower->i_ctime) < 0))
8952 +                       upper->i_ctime = lower->i_ctime;
8953 +               if (unlikely(timespec_compare(&upper->i_atime,
8954 +                                             &lower->i_atime) < 0))
8955 +                       upper->i_atime = lower->i_atime;
8956 +       }
8957 +}
8958 +
8959 +/*
8960 + * A unionfs/fanout version of fsstack_copy_attr_all.  Uses a
8961 + * unionfs_get_nlinks to properly calcluate the number of links to a file.
8962 + * Also, copies the max() of all a/m/ctimes for all lower inodes (which is
8963 + * important if the lower inode is a directory type)
8964 + */
8965 +void unionfs_copy_attr_all(struct inode *dest,
8966 +                          const struct inode *src)
8967 +{
8968 +       dest->i_mode = src->i_mode;
8969 +       dest->i_uid = src->i_uid;
8970 +       dest->i_gid = src->i_gid;
8971 +       dest->i_rdev = src->i_rdev;
8972 +
8973 +       unionfs_copy_attr_times(dest);
8974 +
8975 +       dest->i_blkbits = src->i_blkbits;
8976 +       dest->i_flags = src->i_flags;
8977 +
8978 +       /*
8979 +        * Update the nlinks AFTER updating the above fields, because the
8980 +        * get_links callback may depend on them.
8981 +        */
8982 +       dest->i_nlink = unionfs_get_nlinks(dest);
8983 +}
8984 diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c
8985 new file mode 100644
8986 index 0000000..fba1598
8987 --- /dev/null
8988 +++ b/fs/unionfs/super.c
8989 @@ -0,0 +1,1049 @@
8990 +/*
8991 + * Copyright (c) 2003-2007 Erez Zadok
8992 + * Copyright (c) 2003-2006 Charles P. Wright
8993 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
8994 + * Copyright (c) 2005-2006 Junjiro Okajima
8995 + * Copyright (c) 2005      Arun M. Krishnakumar
8996 + * Copyright (c) 2004-2006 David P. Quigley
8997 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8998 + * Copyright (c) 2003      Puja Gupta
8999 + * Copyright (c) 2003      Harikesavan Krishnan
9000 + * Copyright (c) 2003-2007 Stony Brook University
9001 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
9002 + *
9003 + * This program is free software; you can redistribute it and/or modify
9004 + * it under the terms of the GNU General Public License version 2 as
9005 + * published by the Free Software Foundation.
9006 + */
9007 +
9008 +#include "union.h"
9009 +
9010 +/*
9011 + * The inode cache is used with alloc_inode for both our inode info and the
9012 + * vfs inode.
9013 + */
9014 +static struct kmem_cache *unionfs_inode_cachep;
9015 +
9016 +struct inode *unionfs_iget(struct super_block *sb, unsigned long ino)
9017 +{
9018 +       int size;
9019 +       struct unionfs_inode_info *info;
9020 +       struct inode *inode;
9021 +
9022 +       inode = iget_locked(sb, ino);
9023 +       if (!inode)
9024 +               return ERR_PTR(-ENOMEM);
9025 +       if (!(inode->i_state & I_NEW))
9026 +               return inode;
9027 +
9028 +       info = UNIONFS_I(inode);
9029 +       memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode));
9030 +       info->bstart = -1;
9031 +       info->bend = -1;
9032 +       atomic_set(&info->generation,
9033 +                  atomic_read(&UNIONFS_SB(inode->i_sb)->generation));
9034 +       spin_lock_init(&info->rdlock);
9035 +       info->rdcount = 1;
9036 +       info->hashsize = -1;
9037 +       INIT_LIST_HEAD(&info->readdircache);
9038 +
9039 +       size = sbmax(inode->i_sb) * sizeof(struct inode *);
9040 +       info->lower_inodes = kzalloc(size, GFP_KERNEL);
9041 +       if (unlikely(!info->lower_inodes)) {
9042 +               printk(KERN_CRIT "unionfs: no kernel memory when allocating "
9043 +                      "lower-pointer array!\n");
9044 +               iget_failed(inode);
9045 +               return ERR_PTR(-ENOMEM);
9046 +       }
9047 +
9048 +       inode->i_version++;
9049 +       inode->i_op = &unionfs_main_iops;
9050 +       inode->i_fop = &unionfs_main_fops;
9051 +
9052 +       inode->i_mapping->a_ops = &unionfs_aops;
9053 +
9054 +       /*
9055 +        * reset times so unionfs_copy_attr_all can keep out time invariants
9056 +        * right (upper inode time being the max of all lower ones).
9057 +        */
9058 +       inode->i_atime.tv_sec = inode->i_atime.tv_nsec = 0;
9059 +       inode->i_mtime.tv_sec = inode->i_mtime.tv_nsec = 0;
9060 +       inode->i_ctime.tv_sec = inode->i_ctime.tv_nsec = 0;
9061 +       unlock_new_inode(inode);
9062 +       return inode;
9063 +}
9064 +
9065 +/*
9066 + * we now define delete_inode, because there are two VFS paths that may
9067 + * destroy an inode: one of them calls clear inode before doing everything
9068 + * else that's needed, and the other is fine.  This way we truncate the inode
9069 + * size (and its pages) and then clear our own inode, which will do an iput
9070 + * on our and the lower inode.
9071 + *
9072 + * No need to lock sb info's rwsem.
9073 + */
9074 +static void unionfs_delete_inode(struct inode *inode)
9075 +{
9076 +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
9077 +       spin_lock(&inode->i_lock);
9078 +#endif
9079 +       i_size_write(inode, 0); /* every f/s seems to do that */
9080 +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
9081 +       spin_unlock(&inode->i_lock);
9082 +#endif
9083 +
9084 +       if (inode->i_data.nrpages)
9085 +               truncate_inode_pages(&inode->i_data, 0);
9086 +
9087 +       clear_inode(inode);
9088 +}
9089 +
9090 +/*
9091 + * final actions when unmounting a file system
9092 + *
9093 + * No need to lock rwsem.
9094 + */
9095 +static void unionfs_put_super(struct super_block *sb)
9096 +{
9097 +       int bindex, bstart, bend;
9098 +       struct unionfs_sb_info *spd;
9099 +       int leaks = 0;
9100 +
9101 +       spd = UNIONFS_SB(sb);
9102 +       if (!spd)
9103 +               return;
9104 +
9105 +       bstart = sbstart(sb);
9106 +       bend = sbend(sb);
9107 +
9108 +       /* Make sure we have no leaks of branchget/branchput. */
9109 +       for (bindex = bstart; bindex <= bend; bindex++)
9110 +               if (unlikely(branch_count(sb, bindex) != 0)) {
9111 +                       printk(KERN_CRIT
9112 +                              "unionfs: branch %d has %d references left!\n",
9113 +                              bindex, branch_count(sb, bindex));
9114 +                       leaks = 1;
9115 +               }
9116 +       BUG_ON(leaks != 0);
9117 +
9118 +       /* decrement lower super references */
9119 +       for (bindex = bstart; bindex <= bend; bindex++) {
9120 +               struct super_block *s;
9121 +               s = unionfs_lower_super_idx(sb, bindex);
9122 +               unionfs_set_lower_super_idx(sb, bindex, NULL);
9123 +               atomic_dec(&s->s_active);
9124 +       }
9125 +
9126 +       kfree(spd->data);
9127 +       kfree(spd);
9128 +       sb->s_fs_info = NULL;
9129 +}
9130 +
9131 +/*
9132 + * Since people use this to answer the "How big of a file can I write?"
9133 + * question, we report the size of the highest priority branch as the size of
9134 + * the union.
9135 + */
9136 +static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf)
9137 +{
9138 +       int err = 0;
9139 +       struct super_block *sb;
9140 +       struct dentry *lower_dentry;
9141 +
9142 +       sb = dentry->d_sb;
9143 +
9144 +       unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9145 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
9146 +
9147 +       if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
9148 +               err = -ESTALE;
9149 +               goto out;
9150 +       }
9151 +       unionfs_check_dentry(dentry);
9152 +
9153 +       lower_dentry = unionfs_lower_dentry(sb->s_root);
9154 +       err = vfs_statfs(lower_dentry, buf);
9155 +
9156 +       /* set return buf to our f/s to avoid confusing user-level utils */
9157 +       buf->f_type = UNIONFS_SUPER_MAGIC;
9158 +       /*
9159 +        * Our maximum file name can is shorter by a few bytes because every
9160 +        * file name could potentially be whited-out.
9161 +        *
9162 +        * XXX: this restriction goes away with ODF.
9163 +        */
9164 +       buf->f_namelen -= UNIONFS_WHLEN;
9165 +
9166 +       /*
9167 +        * reset two fields to avoid confusing user-land.
9168 +        * XXX: is this still necessary?
9169 +        */
9170 +       memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t));
9171 +       memset(&buf->f_spare, 0, sizeof(buf->f_spare));
9172 +
9173 +out:
9174 +       unionfs_check_dentry(dentry);
9175 +       unionfs_unlock_dentry(dentry);
9176 +       unionfs_read_unlock(sb);
9177 +       return err;
9178 +}
9179 +
9180 +/* handle mode changing during remount */
9181 +static noinline int do_remount_mode_option(char *optarg, int cur_branches,
9182 +                                          struct unionfs_data *new_data,
9183 +                                          struct path *new_lower_paths)
9184 +{
9185 +       int err = -EINVAL;
9186 +       int perms, idx;
9187 +       char *modename = strchr(optarg, '=');
9188 +       struct nameidata nd;
9189 +
9190 +       /* by now, optarg contains the branch name */
9191 +       if (!*optarg) {
9192 +               printk(KERN_ERR
9193 +                      "unionfs: no branch specified for mode change\n");
9194 +               goto out;
9195 +       }
9196 +       if (!modename) {
9197 +               printk(KERN_ERR "unionfs: branch \"%s\" requires a mode\n",
9198 +                      optarg);
9199 +               goto out;
9200 +       }
9201 +       *modename++ = '\0';
9202 +       err = parse_branch_mode(modename, &perms);
9203 +       if (err) {
9204 +               printk(KERN_ERR "unionfs: invalid mode \"%s\" for \"%s\"\n",
9205 +                      modename, optarg);
9206 +               goto out;
9207 +       }
9208 +
9209 +       /*
9210 +        * Find matching branch index.  For now, this assumes that nothing
9211 +        * has been mounted on top of this Unionfs stack.  Once we have /odf
9212 +        * and cache-coherency resolved, we'll address the branch-path
9213 +        * uniqueness.
9214 +        */
9215 +       err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9216 +       if (err) {
9217 +               printk(KERN_ERR "unionfs: error accessing "
9218 +                      "lower directory \"%s\" (error %d)\n",
9219 +                      optarg, err);
9220 +               goto out;
9221 +       }
9222 +       for (idx = 0; idx < cur_branches; idx++)
9223 +               if (nd.path.mnt == new_lower_paths[idx].mnt &&
9224 +                   nd.path.dentry == new_lower_paths[idx].dentry)
9225 +                       break;
9226 +       path_put(&nd.path);     /* no longer needed */
9227 +       if (idx == cur_branches) {
9228 +               err = -ENOENT;  /* err may have been reset above */
9229 +               printk(KERN_ERR "unionfs: branch \"%s\" "
9230 +                      "not found\n", optarg);
9231 +               goto out;
9232 +       }
9233 +       /* check/change mode for existing branch */
9234 +       /* we don't warn if perms==branchperms */
9235 +       new_data[idx].branchperms = perms;
9236 +       err = 0;
9237 +out:
9238 +       return err;
9239 +}
9240 +
9241 +/* handle branch deletion during remount */
9242 +static noinline int do_remount_del_option(char *optarg, int cur_branches,
9243 +                                         struct unionfs_data *new_data,
9244 +                                         struct path *new_lower_paths)
9245 +{
9246 +       int err = -EINVAL;
9247 +       int idx;
9248 +       struct nameidata nd;
9249 +
9250 +       /* optarg contains the branch name to delete */
9251 +
9252 +       /*
9253 +        * Find matching branch index.  For now, this assumes that nothing
9254 +        * has been mounted on top of this Unionfs stack.  Once we have /odf
9255 +        * and cache-coherency resolved, we'll address the branch-path
9256 +        * uniqueness.
9257 +        */
9258 +       err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9259 +       if (err) {
9260 +               printk(KERN_ERR "unionfs: error accessing "
9261 +                      "lower directory \"%s\" (error %d)\n",
9262 +                      optarg, err);
9263 +               goto out;
9264 +       }
9265 +       for (idx = 0; idx < cur_branches; idx++)
9266 +               if (nd.path.mnt == new_lower_paths[idx].mnt &&
9267 +                   nd.path.dentry == new_lower_paths[idx].dentry)
9268 +                       break;
9269 +       path_put(&nd.path);     /* no longer needed */
9270 +       if (idx == cur_branches) {
9271 +               printk(KERN_ERR "unionfs: branch \"%s\" "
9272 +                      "not found\n", optarg);
9273 +               err = -ENOENT;
9274 +               goto out;
9275 +       }
9276 +       /* check if there are any open files on the branch to be deleted */
9277 +       if (atomic_read(&new_data[idx].open_files) > 0) {
9278 +               err = -EBUSY;
9279 +               goto out;
9280 +       }
9281 +
9282 +       /*
9283 +        * Now we have to delete the branch.  First, release any handles it
9284 +        * has.  Then, move the remaining array indexes past "idx" in
9285 +        * new_data and new_lower_paths one to the left.  Finally, adjust
9286 +        * cur_branches.
9287 +        */
9288 +       path_put(&new_lower_paths[idx]);
9289 +
9290 +       if (idx < cur_branches - 1) {
9291 +               /* if idx==cur_branches-1, we delete last branch: easy */
9292 +               memmove(&new_data[idx], &new_data[idx+1],
9293 +                       (cur_branches - 1 - idx) *
9294 +                       sizeof(struct unionfs_data));
9295 +               memmove(&new_lower_paths[idx], &new_lower_paths[idx+1],
9296 +                       (cur_branches - 1 - idx) * sizeof(struct path));
9297 +       }
9298 +
9299 +       err = 0;
9300 +out:
9301 +       return err;
9302 +}
9303 +
9304 +/* handle branch insertion during remount */
9305 +static noinline int do_remount_add_option(char *optarg, int cur_branches,
9306 +                                         struct unionfs_data *new_data,
9307 +                                         struct path *new_lower_paths,
9308 +                                         int *high_branch_id)
9309 +{
9310 +       int err = -EINVAL;
9311 +       int perms;
9312 +       int idx = 0;            /* default: insert at beginning */
9313 +       char *new_branch , *modename = NULL;
9314 +       struct nameidata nd;
9315 +
9316 +       /*
9317 +        * optarg can be of several forms:
9318 +        *
9319 +        * /bar:/foo            insert /foo before /bar
9320 +        * /bar:/foo=ro         insert /foo in ro mode before /bar
9321 +        * /foo                 insert /foo in the beginning (prepend)
9322 +        * :/foo                insert /foo at the end (append)
9323 +        */
9324 +       if (*optarg == ':') {   /* append? */
9325 +               new_branch = optarg + 1; /* skip ':' */
9326 +               idx = cur_branches;
9327 +               goto found_insertion_point;
9328 +       }
9329 +       new_branch = strchr(optarg, ':');
9330 +       if (!new_branch) {      /* prepend? */
9331 +               new_branch = optarg;
9332 +               goto found_insertion_point;
9333 +       }
9334 +       *new_branch++ = '\0';   /* holds path+mode of new branch */
9335 +
9336 +       /*
9337 +        * Find matching branch index.  For now, this assumes that nothing
9338 +        * has been mounted on top of this Unionfs stack.  Once we have /odf
9339 +        * and cache-coherency resolved, we'll address the branch-path
9340 +        * uniqueness.
9341 +        */
9342 +       err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
9343 +       if (err) {
9344 +               printk(KERN_ERR "unionfs: error accessing "
9345 +                      "lower directory \"%s\" (error %d)\n",
9346 +                      optarg, err);
9347 +               goto out;
9348 +       }
9349 +       for (idx = 0; idx < cur_branches; idx++)
9350 +               if (nd.path.mnt == new_lower_paths[idx].mnt &&
9351 +                   nd.path.dentry == new_lower_paths[idx].dentry)
9352 +                       break;
9353 +       path_put(&nd.path);     /* no longer needed */
9354 +       if (idx == cur_branches) {
9355 +               printk(KERN_ERR "unionfs: branch \"%s\" "
9356 +                      "not found\n", optarg);
9357 +               err = -ENOENT;
9358 +               goto out;
9359 +       }
9360 +
9361 +       /*
9362 +        * At this point idx will hold the index where the new branch should
9363 +        * be inserted before.
9364 +        */
9365 +found_insertion_point:
9366 +       /* find the mode for the new branch */
9367 +       if (new_branch)
9368 +               modename = strchr(new_branch, '=');
9369 +       if (modename)
9370 +               *modename++ = '\0';
9371 +       if (!new_branch || !*new_branch) {
9372 +               printk(KERN_ERR "unionfs: null new branch\n");
9373 +               err = -EINVAL;
9374 +               goto out;
9375 +       }
9376 +       err = parse_branch_mode(modename, &perms);
9377 +       if (err) {
9378 +               printk(KERN_ERR "unionfs: invalid mode \"%s\" for "
9379 +                      "branch \"%s\"\n", modename, new_branch);
9380 +               goto out;
9381 +       }
9382 +       err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd);
9383 +       if (err) {
9384 +               printk(KERN_ERR "unionfs: error accessing "
9385 +                      "lower directory \"%s\" (error %d)\n",
9386 +                      new_branch, err);
9387 +               goto out;
9388 +       }
9389 +       /*
9390 +        * It's probably safe to check_mode the new branch to insert.  Note:
9391 +        * we don't allow inserting branches which are unionfs's by
9392 +        * themselves (check_branch returns EINVAL in that case).  This is
9393 +        * because this code base doesn't support stacking unionfs: the ODF
9394 +        * code base supports that correctly.
9395 +        */
9396 +       err = check_branch(&nd);
9397 +       if (err) {
9398 +               printk(KERN_ERR "unionfs: lower directory "
9399 +                      "\"%s\" is not a valid branch\n", optarg);
9400 +               path_put(&nd.path);
9401 +               goto out;
9402 +       }
9403 +
9404 +       /*
9405 +        * Now we have to insert the new branch.  But first, move the bits
9406 +        * to make space for the new branch, if needed.  Finally, adjust
9407 +        * cur_branches.
9408 +        * We don't release nd here; it's kept until umount/remount.
9409 +        */
9410 +       if (idx < cur_branches) {
9411 +               /* if idx==cur_branches, we append: easy */
9412 +               memmove(&new_data[idx+1], &new_data[idx],
9413 +                       (cur_branches - idx) * sizeof(struct unionfs_data));
9414 +               memmove(&new_lower_paths[idx+1], &new_lower_paths[idx],
9415 +                       (cur_branches - idx) * sizeof(struct path));
9416 +       }
9417 +       new_lower_paths[idx].dentry = nd.path.dentry;
9418 +       new_lower_paths[idx].mnt = nd.path.mnt;
9419 +
9420 +       new_data[idx].sb = nd.path.dentry->d_sb;
9421 +       atomic_set(&new_data[idx].open_files, 0);
9422 +       new_data[idx].branchperms = perms;
9423 +       new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */
9424 +
9425 +       err = 0;
9426 +out:
9427 +       return err;
9428 +}
9429 +
9430 +
9431 +/*
9432 + * Support branch management options on remount.
9433 + *
9434 + * See Documentation/filesystems/unionfs/ for details.
9435 + *
9436 + * @flags: numeric mount options
9437 + * @options: mount options string
9438 + *
9439 + * This function can rearrange a mounted union dynamically, adding and
9440 + * removing branches, including changing branch modes.  Clearly this has to
9441 + * be done safely and atomically.  Luckily, the VFS already calls this
9442 + * function with lock_super(sb) and lock_kernel() held, preventing
9443 + * concurrent mixing of new mounts, remounts, and unmounts.  Moreover,
9444 + * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb)
9445 + * to purge dentries/inodes from our superblock, and also called
9446 + * fsync_super(sb) to purge any dirty pages.  So we're good.
9447 + *
9448 + * XXX: however, our remount code may also need to invalidate mapped pages
9449 + * so as to force them to be re-gotten from the (newly reconfigured) lower
9450 + * branches.  This has to wait for proper mmap and cache coherency support
9451 + * in the VFS.
9452 + *
9453 + */
9454 +static int unionfs_remount_fs(struct super_block *sb, int *flags,
9455 +                             char *options)
9456 +{
9457 +       int err = 0;
9458 +       int i;
9459 +       char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */
9460 +       char *optname;
9461 +       int cur_branches = 0;   /* no. of current branches */
9462 +       int new_branches = 0;   /* no. of branches actually left in the end */
9463 +       int add_branches;       /* est. no. of branches to add */
9464 +       int del_branches;       /* est. no. of branches to del */
9465 +       int max_branches;       /* max possible no. of branches */
9466 +       struct unionfs_data *new_data = NULL, *tmp_data = NULL;
9467 +       struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
9468 +       struct inode **new_lower_inodes = NULL;
9469 +       int new_high_branch_id; /* new high branch ID */
9470 +       int size;               /* memory allocation size, temp var */
9471 +       int old_ibstart, old_ibend;
9472 +
9473 +       unionfs_write_lock(sb);
9474 +
9475 +       /*
9476 +        * The VFS will take care of "ro" and "rw" flags, and we can safely
9477 +        * ignore MS_SILENT, but anything else left over is an error.  So we
9478 +        * need to check if any other flags may have been passed (none are
9479 +        * allowed/supported as of now).
9480 +        */
9481 +       if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) {
9482 +               printk(KERN_ERR
9483 +                      "unionfs: remount flags 0x%x unsupported\n", *flags);
9484 +               err = -EINVAL;
9485 +               goto out_error;
9486 +       }
9487 +
9488 +       /*
9489 +        * If 'options' is NULL, it's probably because the user just changed
9490 +        * the union to a "ro" or "rw" and the VFS took care of it.  So
9491 +        * nothing to do and we're done.
9492 +        */
9493 +       if (!options || options[0] == '\0')
9494 +               goto out_error;
9495 +
9496 +       /*
9497 +        * Find out how many branches we will have in the end, counting
9498 +        * "add" and "del" commands.  Copy the "options" string because
9499 +        * strsep modifies the string and we need it later.
9500 +        */
9501 +       tmp_to_free = kstrdup(options, GFP_KERNEL);
9502 +       optionstmp = tmp_to_free;
9503 +       if (unlikely(!optionstmp)) {
9504 +               err = -ENOMEM;
9505 +               goto out_free;
9506 +       }
9507 +       cur_branches = sbmax(sb); /* current no. branches */
9508 +       new_branches = sbmax(sb);
9509 +       del_branches = 0;
9510 +       add_branches = 0;
9511 +       new_high_branch_id = sbhbid(sb); /* save current high_branch_id */
9512 +       while ((optname = strsep(&optionstmp, ",")) != NULL) {
9513 +               char *optarg;
9514 +
9515 +               if (!optname || !*optname)
9516 +                       continue;
9517 +
9518 +               optarg = strchr(optname, '=');
9519 +               if (optarg)
9520 +                       *optarg++ = '\0';
9521 +
9522 +               if (!strcmp("add", optname))
9523 +                       add_branches++;
9524 +               else if (!strcmp("del", optname))
9525 +                       del_branches++;
9526 +       }
9527 +       kfree(tmp_to_free);
9528 +       /* after all changes, will we have at least one branch left? */
9529 +       if ((new_branches + add_branches - del_branches) < 1) {
9530 +               printk(KERN_ERR
9531 +                      "unionfs: no branches left after remount\n");
9532 +               err = -EINVAL;
9533 +               goto out_free;
9534 +       }
9535 +
9536 +       /*
9537 +        * Since we haven't actually parsed all the add/del options, nor
9538 +        * have we checked them for errors, we don't know for sure how many
9539 +        * branches we will have after all changes have taken place.  In
9540 +        * fact, the total number of branches left could be less than what
9541 +        * we have now.  So we need to allocate space for a temporary
9542 +        * placeholder that is at least as large as the maximum number of
9543 +        * branches we *could* have, which is the current number plus all
9544 +        * the additions.  Once we're done with these temp placeholders, we
9545 +        * may have to re-allocate the final size, copy over from the temp,
9546 +        * and then free the temps (done near the end of this function).
9547 +        */
9548 +       max_branches = cur_branches + add_branches;
9549 +       /* allocate space for new pointers to lower dentry */
9550 +       tmp_data = kcalloc(max_branches,
9551 +                          sizeof(struct unionfs_data), GFP_KERNEL);
9552 +       if (unlikely(!tmp_data)) {
9553 +               err = -ENOMEM;
9554 +               goto out_free;
9555 +       }
9556 +       /* allocate space for new pointers to lower paths */
9557 +       tmp_lower_paths = kcalloc(max_branches,
9558 +                                 sizeof(struct path), GFP_KERNEL);
9559 +       if (unlikely(!tmp_lower_paths)) {
9560 +               err = -ENOMEM;
9561 +               goto out_free;
9562 +       }
9563 +       /* copy current info into new placeholders, incrementing refcnts */
9564 +       memcpy(tmp_data, UNIONFS_SB(sb)->data,
9565 +              cur_branches * sizeof(struct unionfs_data));
9566 +       memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths,
9567 +              cur_branches * sizeof(struct path));
9568 +       for (i = 0; i < cur_branches; i++)
9569 +               path_get(&tmp_lower_paths[i]); /* drop refs at end of fxn */
9570 +
9571 +       /*******************************************************************
9572 +        * For each branch command, do path_lookup on the requested branch,
9573 +        * and apply the change to a temp branch list.  To handle errors, we
9574 +        * already dup'ed the old arrays (above), and increased the refcnts
9575 +        * on various f/s objects.  So now we can do all the path_lookups
9576 +        * and branch-management commands on the new arrays.  If it fail mid
9577 +        * way, we free the tmp arrays and *put all objects.  If we succeed,
9578 +        * then we free old arrays and *put its objects, and then replace
9579 +        * the arrays with the new tmp list (we may have to re-allocate the
9580 +        * memory because the temp lists could have been larger than what we
9581 +        * actually needed).
9582 +        *******************************************************************/
9583 +
9584 +       while ((optname = strsep(&options, ",")) != NULL) {
9585 +               char *optarg;
9586 +
9587 +               if (!optname || !*optname)
9588 +                       continue;
9589 +               /*
9590 +                * At this stage optname holds a comma-delimited option, but
9591 +                * without the commas.  Next, we need to break the string on
9592 +                * the '=' symbol to separate CMD=ARG, where ARG itself can
9593 +                * be KEY=VAL.  For example, in mode=/foo=rw, CMD is "mode",
9594 +                * KEY is "/foo", and VAL is "rw".
9595 +                */
9596 +               optarg = strchr(optname, '=');
9597 +               if (optarg)
9598 +                       *optarg++ = '\0';
9599 +               /* incgen remount option (instead of old ioctl) */
9600 +               if (!strcmp("incgen", optname)) {
9601 +                       err = 0;
9602 +                       goto out_no_change;
9603 +               }
9604 +
9605 +               /*
9606 +                * All of our options take an argument now.  (Insert ones
9607 +                * that don't above this check.)  So at this stage optname
9608 +                * contains the CMD part and optarg contains the ARG part.
9609 +                */
9610 +               if (!optarg || !*optarg) {
9611 +                       printk(KERN_ERR "unionfs: all remount options require "
9612 +                              "an argument (%s)\n", optname);
9613 +                       err = -EINVAL;
9614 +                       goto out_release;
9615 +               }
9616 +
9617 +               if (!strcmp("add", optname)) {
9618 +                       err = do_remount_add_option(optarg, new_branches,
9619 +                                                   tmp_data,
9620 +                                                   tmp_lower_paths,
9621 +                                                   &new_high_branch_id);
9622 +                       if (err)
9623 +                               goto out_release;
9624 +                       new_branches++;
9625 +                       if (new_branches > UNIONFS_MAX_BRANCHES) {
9626 +                               printk(KERN_ERR "unionfs: command exceeds "
9627 +                                      "%d branches\n", UNIONFS_MAX_BRANCHES);
9628 +                               err = -E2BIG;
9629 +                               goto out_release;
9630 +                       }
9631 +                       continue;
9632 +               }
9633 +               if (!strcmp("del", optname)) {
9634 +                       err = do_remount_del_option(optarg, new_branches,
9635 +                                                   tmp_data,
9636 +                                                   tmp_lower_paths);
9637 +                       if (err)
9638 +                               goto out_release;
9639 +                       new_branches--;
9640 +                       continue;
9641 +               }
9642 +               if (!strcmp("mode", optname)) {
9643 +                       err = do_remount_mode_option(optarg, new_branches,
9644 +                                                    tmp_data,
9645 +                                                    tmp_lower_paths);
9646 +                       if (err)
9647 +                               goto out_release;
9648 +                       continue;
9649 +               }
9650 +
9651 +               /*
9652 +                * When you use "mount -o remount,ro", mount(8) will
9653 +                * reportedly pass the original dirs= string from
9654 +                * /proc/mounts.  So for now, we have to ignore dirs= and
9655 +                * not consider it an error, unless we want to allow users
9656 +                * to pass dirs= in remount.  Note that to allow the VFS to
9657 +                * actually process the ro/rw remount options, we have to
9658 +                * return 0 from this function.
9659 +                */
9660 +               if (!strcmp("dirs", optname)) {
9661 +                       printk(KERN_WARNING
9662 +                              "unionfs: remount ignoring option \"%s\"\n",
9663 +                              optname);
9664 +                       continue;
9665 +               }
9666 +
9667 +               err = -EINVAL;
9668 +               printk(KERN_ERR
9669 +                      "unionfs: unrecognized option \"%s\"\n", optname);
9670 +               goto out_release;
9671 +       }
9672 +
9673 +out_no_change:
9674 +
9675 +       /******************************************************************
9676 +        * WE'RE ALMOST DONE: check if leftmost branch might be read-only,
9677 +        * see if we need to allocate a small-sized new vector, copy the
9678 +        * vectors to their correct place, release the refcnt of the older
9679 +        * ones, and return.  Also handle invalidating any pages that will
9680 +        * have to be re-read.
9681 +        *******************************************************************/
9682 +
9683 +       if (!(tmp_data[0].branchperms & MAY_WRITE)) {
9684 +               printk(KERN_ERR "unionfs: leftmost branch cannot be read-only "
9685 +                      "(use \"remount,ro\" to create a read-only union)\n");
9686 +               err = -EINVAL;
9687 +               goto out_release;
9688 +       }
9689 +
9690 +       /* (re)allocate space for new pointers to lower dentry */
9691 +       size = new_branches * sizeof(struct unionfs_data);
9692 +       new_data = krealloc(tmp_data, size, GFP_KERNEL);
9693 +       if (unlikely(!new_data)) {
9694 +               err = -ENOMEM;
9695 +               goto out_release;
9696 +       }
9697 +
9698 +       /* allocate space for new pointers to lower paths */
9699 +       size = new_branches * sizeof(struct path);
9700 +       new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
9701 +       if (unlikely(!new_lower_paths)) {
9702 +               err = -ENOMEM;
9703 +               goto out_release;
9704 +       }
9705 +
9706 +       /* allocate space for new pointers to lower inodes */
9707 +       new_lower_inodes = kcalloc(new_branches,
9708 +                                  sizeof(struct inode *), GFP_KERNEL);
9709 +       if (unlikely(!new_lower_inodes)) {
9710 +               err = -ENOMEM;
9711 +               goto out_release;
9712 +       }
9713 +
9714 +       /*
9715 +        * OK, just before we actually put the new set of branches in place,
9716 +        * we need to ensure that our own f/s has no dirty objects left.
9717 +        * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and
9718 +        * fsync_super(sb), taking care of dentries, inodes, and dirty
9719 +        * pages.  So all that's left is for us to invalidate any leftover
9720 +        * (non-dirty) pages to ensure that they will be re-read from the
9721 +        * new lower branches (and to support mmap).
9722 +        */
9723 +
9724 +       /*
9725 +        * Once we finish the remounting successfully, our superblock
9726 +        * generation number will have increased.  This will be detected by
9727 +        * our dentry-revalidation code upon subsequent f/s operations
9728 +        * through unionfs.  The revalidation code will rebuild the union of
9729 +        * lower inodes for a given unionfs inode and invalidate any pages
9730 +        * of such "stale" inodes (by calling our purge_inode_data
9731 +        * function).  This revalidation will happen lazily and
9732 +        * incrementally, as users perform operations on cached inodes.  We
9733 +        * would like to encourage this revalidation to happen sooner if
9734 +        * possible, so we try to invalidate as many other pages in our
9735 +        * superblock as we can.
9736 +        */
9737 +       purge_sb_data(sb);
9738 +
9739 +       /* grab new lower super references; release old ones */
9740 +       for (i = 0; i < new_branches; i++)
9741 +               atomic_inc(&new_data[i].sb->s_active);
9742 +       for (i = 0; i < new_branches; i++)
9743 +               atomic_dec(&UNIONFS_SB(sb)->data[i].sb->s_active);
9744 +
9745 +       /* copy new vectors into their correct place */
9746 +       tmp_data = UNIONFS_SB(sb)->data;
9747 +       UNIONFS_SB(sb)->data = new_data;
9748 +       new_data = NULL;        /* so don't free good pointers below */
9749 +       tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths;
9750 +       UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths;
9751 +       new_lower_paths = NULL; /* so don't free good pointers below */
9752 +
9753 +       /* update our unionfs_sb_info and root dentry index of last branch */
9754 +       i = sbmax(sb);          /* save no. of branches to release at end */
9755 +       sbend(sb) = new_branches - 1;
9756 +       set_dbend(sb->s_root, new_branches - 1);
9757 +       old_ibstart = ibstart(sb->s_root->d_inode);
9758 +       old_ibend = ibend(sb->s_root->d_inode);
9759 +       ibend(sb->s_root->d_inode) = new_branches - 1;
9760 +       UNIONFS_D(sb->s_root)->bcount = new_branches;
9761 +       new_branches = i; /* no. of branches to release below */
9762 +
9763 +       /*
9764 +        * Update lower inodes: 3 steps
9765 +        * 1. grab ref on all new lower inodes
9766 +        */
9767 +       for (i = dbstart(sb->s_root); i <= dbend(sb->s_root); i++) {
9768 +               struct dentry *lower_dentry =
9769 +                       unionfs_lower_dentry_idx(sb->s_root, i);
9770 +               igrab(lower_dentry->d_inode);
9771 +               new_lower_inodes[i] = lower_dentry->d_inode;
9772 +       }
9773 +       /* 2. release reference on all older lower inodes */
9774 +       for (i = old_ibstart; i <= old_ibend; i++) {
9775 +               iput(unionfs_lower_inode_idx(sb->s_root->d_inode, i));
9776 +               unionfs_set_lower_inode_idx(sb->s_root->d_inode, i, NULL);
9777 +       }
9778 +       kfree(UNIONFS_I(sb->s_root->d_inode)->lower_inodes);
9779 +       /* 3. update root dentry's inode to new lower_inodes array */
9780 +       UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes;
9781 +       new_lower_inodes = NULL;
9782 +
9783 +       /* maxbytes may have changed */
9784 +       sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
9785 +       /* update high branch ID */
9786 +       sbhbid(sb) = new_high_branch_id;
9787 +
9788 +       /* update our sb->generation for revalidating objects */
9789 +       i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
9790 +       atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
9791 +       atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
9792 +       if (!(*flags & MS_SILENT))
9793 +               pr_info("unionfs: new generation number %d\n", i);
9794 +       /* finally, update the root dentry's times */
9795 +       unionfs_copy_attr_times(sb->s_root->d_inode);
9796 +       err = 0;                /* reset to success */
9797 +
9798 +       /*
9799 +        * The code above falls through to the next label, and releases the
9800 +        * refcnts of the older ones (stored in tmp_*): if we fell through
9801 +        * here, it means success.  However, if we jump directly to this
9802 +        * label from any error above, then an error occurred after we
9803 +        * grabbed various refcnts, and so we have to release the
9804 +        * temporarily constructed structures.
9805 +        */
9806 +out_release:
9807 +       /* no need to cleanup/release anything in tmp_data */
9808 +       if (tmp_lower_paths)
9809 +               for (i = 0; i < new_branches; i++)
9810 +                       path_put(&tmp_lower_paths[i]);
9811 +out_free:
9812 +       kfree(tmp_lower_paths);
9813 +       kfree(tmp_data);
9814 +       kfree(new_lower_paths);
9815 +       kfree(new_data);
9816 +       kfree(new_lower_inodes);
9817 +out_error:
9818 +       unionfs_check_dentry(sb->s_root);
9819 +       unionfs_write_unlock(sb);
9820 +       return err;
9821 +}
9822 +
9823 +/*
9824 + * Called by iput() when the inode reference count reached zero
9825 + * and the inode is not hashed anywhere.  Used to clear anything
9826 + * that needs to be, before the inode is completely destroyed and put
9827 + * on the inode free list.
9828 + *
9829 + * No need to lock sb info's rwsem.
9830 + */
9831 +static void unionfs_clear_inode(struct inode *inode)
9832 +{
9833 +       int bindex, bstart, bend;
9834 +       struct inode *lower_inode;
9835 +       struct list_head *pos, *n;
9836 +       struct unionfs_dir_state *rdstate;
9837 +
9838 +       list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9839 +               rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9840 +               list_del(&rdstate->cache);
9841 +               free_rdstate(rdstate);
9842 +       }
9843 +
9844 +       /*
9845 +        * Decrement a reference to a lower_inode, which was incremented
9846 +        * by our read_inode when it was created initially.
9847 +        */
9848 +       bstart = ibstart(inode);
9849 +       bend = ibend(inode);
9850 +       if (bstart >= 0) {
9851 +               for (bindex = bstart; bindex <= bend; bindex++) {
9852 +                       lower_inode = unionfs_lower_inode_idx(inode, bindex);
9853 +                       if (!lower_inode)
9854 +                               continue;
9855 +                       unionfs_set_lower_inode_idx(inode, bindex, NULL);
9856 +                       /* see Documentation/filesystems/unionfs/issues.txt */
9857 +                       lockdep_off();
9858 +                       iput(lower_inode);
9859 +                       lockdep_on();
9860 +               }
9861 +       }
9862 +
9863 +       kfree(UNIONFS_I(inode)->lower_inodes);
9864 +       UNIONFS_I(inode)->lower_inodes = NULL;
9865 +}
9866 +
9867 +static struct inode *unionfs_alloc_inode(struct super_block *sb)
9868 +{
9869 +       struct unionfs_inode_info *i;
9870 +
9871 +       i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL);
9872 +       if (unlikely(!i))
9873 +               return NULL;
9874 +
9875 +       /* memset everything up to the inode to 0 */
9876 +       memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode));
9877 +
9878 +       i->vfs_inode.i_version = 1;
9879 +       return &i->vfs_inode;
9880 +}
9881 +
9882 +static void unionfs_destroy_inode(struct inode *inode)
9883 +{
9884 +       kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode));
9885 +}
9886 +
9887 +/* unionfs inode cache constructor */
9888 +static void init_once(struct kmem_cache *cachep, void *obj)
9889 +{
9890 +       struct unionfs_inode_info *i = obj;
9891 +
9892 +       inode_init_once(&i->vfs_inode);
9893 +}
9894 +
9895 +int unionfs_init_inode_cache(void)
9896 +{
9897 +       int err = 0;
9898 +
9899 +       unionfs_inode_cachep =
9900 +               kmem_cache_create("unionfs_inode_cache",
9901 +                                 sizeof(struct unionfs_inode_info), 0,
9902 +                                 SLAB_RECLAIM_ACCOUNT, init_once);
9903 +       if (unlikely(!unionfs_inode_cachep))
9904 +               err = -ENOMEM;
9905 +       return err;
9906 +}
9907 +
9908 +/* unionfs inode cache destructor */
9909 +void unionfs_destroy_inode_cache(void)
9910 +{
9911 +       if (unionfs_inode_cachep)
9912 +               kmem_cache_destroy(unionfs_inode_cachep);
9913 +}
9914 +
9915 +/*
9916 + * Called when we have a dirty inode, right here we only throw out
9917 + * parts of our readdir list that are too old.
9918 + *
9919 + * No need to grab sb info's rwsem.
9920 + */
9921 +static int unionfs_write_inode(struct inode *inode, int sync)
9922 +{
9923 +       struct list_head *pos, *n;
9924 +       struct unionfs_dir_state *rdstate;
9925 +
9926 +       spin_lock(&UNIONFS_I(inode)->rdlock);
9927 +       list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
9928 +               rdstate = list_entry(pos, struct unionfs_dir_state, cache);
9929 +               /* We keep this list in LRU order. */
9930 +               if ((rdstate->access + RDCACHE_JIFFIES) > jiffies)
9931 +                       break;
9932 +               UNIONFS_I(inode)->rdcount--;
9933 +               list_del(&rdstate->cache);
9934 +               free_rdstate(rdstate);
9935 +       }
9936 +       spin_unlock(&UNIONFS_I(inode)->rdlock);
9937 +
9938 +       return 0;
9939 +}
9940 +
9941 +/*
9942 + * Used only in nfs, to kill any pending RPC tasks, so that subsequent
9943 + * code can actually succeed and won't leave tasks that need handling.
9944 + */
9945 +static void unionfs_umount_begin(struct vfsmount *mnt, int flags)
9946 +{
9947 +       struct super_block *sb, *lower_sb;
9948 +       struct vfsmount *lower_mnt;
9949 +       int bindex, bstart, bend;
9950 +
9951 +       if (!(flags & MNT_FORCE))
9952 +               /*
9953 +                * we are not being MNT_FORCE'd, therefore we should emulate
9954 +                * old behavior
9955 +                */
9956 +               return;
9957 +
9958 +       sb = mnt->mnt_sb;
9959 +
9960 +       unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9961 +
9962 +       bstart = sbstart(sb);
9963 +       bend = sbend(sb);
9964 +       for (bindex = bstart; bindex <= bend; bindex++) {
9965 +               lower_mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
9966 +               lower_sb = unionfs_lower_super_idx(sb, bindex);
9967 +
9968 +               if (lower_mnt && lower_sb && lower_sb->s_op &&
9969 +                   lower_sb->s_op->umount_begin)
9970 +                       lower_sb->s_op->umount_begin(lower_mnt, flags);
9971 +       }
9972 +
9973 +       unionfs_read_unlock(sb);
9974 +}
9975 +
9976 +static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt)
9977 +{
9978 +       struct super_block *sb = mnt->mnt_sb;
9979 +       int ret = 0;
9980 +       char *tmp_page;
9981 +       char *path;
9982 +       int bindex, bstart, bend;
9983 +       int perms;
9984 +
9985 +       unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD);
9986 +
9987 +       unionfs_lock_dentry(sb->s_root, UNIONFS_DMUTEX_CHILD);
9988 +
9989 +       tmp_page = (char *) __get_free_page(GFP_KERNEL);
9990 +       if (unlikely(!tmp_page)) {
9991 +               ret = -ENOMEM;
9992 +               goto out;
9993 +       }
9994 +
9995 +       bstart = sbstart(sb);
9996 +       bend = sbend(sb);
9997 +
9998 +       seq_printf(m, ",dirs=");
9999 +       for (bindex = bstart; bindex <= bend; bindex++) {
10000 +               struct path p;
10001 +               p.dentry = unionfs_lower_dentry_idx(sb->s_root, bindex);
10002 +               p.mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
10003 +               path = d_path(&p, tmp_page, PAGE_SIZE);
10004 +               if (IS_ERR(path)) {
10005 +                       ret = PTR_ERR(path);
10006 +                       goto out;
10007 +               }
10008 +
10009 +               perms = branchperms(sb, bindex);
10010 +
10011 +               seq_printf(m, "%s=%s", path,
10012 +                          perms & MAY_WRITE ? "rw" : "ro");
10013 +               if (bindex != bend)
10014 +                       seq_printf(m, ":");
10015 +       }
10016 +
10017 +out:
10018 +       free_page((unsigned long) tmp_page);
10019 +
10020 +       unionfs_unlock_dentry(sb->s_root);
10021 +
10022 +       unionfs_read_unlock(sb);
10023 +
10024 +       return ret;
10025 +}
10026 +
10027 +struct super_operations unionfs_sops = {
10028 +       .delete_inode   = unionfs_delete_inode,
10029 +       .put_super      = unionfs_put_super,
10030 +       .statfs         = unionfs_statfs,
10031 +       .remount_fs     = unionfs_remount_fs,
10032 +       .clear_inode    = unionfs_clear_inode,
10033 +       .umount_begin   = unionfs_umount_begin,
10034 +       .show_options   = unionfs_show_options,
10035 +       .write_inode    = unionfs_write_inode,
10036 +       .alloc_inode    = unionfs_alloc_inode,
10037 +       .destroy_inode  = unionfs_destroy_inode,
10038 +};
10039 diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h
10040 new file mode 100644
10041 index 0000000..a7f0538
10042 --- /dev/null
10043 +++ b/fs/unionfs/union.h
10044 @@ -0,0 +1,616 @@
10045 +/*
10046 + * Copyright (c) 2003-2007 Erez Zadok
10047 + * Copyright (c) 2003-2006 Charles P. Wright
10048 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10049 + * Copyright (c) 2005      Arun M. Krishnakumar
10050 + * Copyright (c) 2004-2006 David P. Quigley
10051 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10052 + * Copyright (c) 2003      Puja Gupta
10053 + * Copyright (c) 2003      Harikesavan Krishnan
10054 + * Copyright (c) 2003-2007 Stony Brook University
10055 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10056 + *
10057 + * This program is free software; you can redistribute it and/or modify
10058 + * it under the terms of the GNU General Public License version 2 as
10059 + * published by the Free Software Foundation.
10060 + */
10061 +
10062 +#ifndef _UNION_H_
10063 +#define _UNION_H_
10064 +
10065 +#include <linux/dcache.h>
10066 +#include <linux/file.h>
10067 +#include <linux/list.h>
10068 +#include <linux/fs.h>
10069 +#include <linux/mm.h>
10070 +#include <linux/module.h>
10071 +#include <linux/mount.h>
10072 +#include <linux/namei.h>
10073 +#include <linux/page-flags.h>
10074 +#include <linux/pagemap.h>
10075 +#include <linux/poll.h>
10076 +#include <linux/security.h>
10077 +#include <linux/seq_file.h>
10078 +#include <linux/slab.h>
10079 +#include <linux/spinlock.h>
10080 +#include <linux/smp_lock.h>
10081 +#include <linux/statfs.h>
10082 +#include <linux/string.h>
10083 +#include <linux/vmalloc.h>
10084 +#include <linux/writeback.h>
10085 +#include <linux/buffer_head.h>
10086 +#include <linux/xattr.h>
10087 +#include <linux/fs_stack.h>
10088 +#include <linux/magic.h>
10089 +#include <linux/log2.h>
10090 +#include <linux/poison.h>
10091 +#include <linux/mman.h>
10092 +#include <linux/backing-dev.h>
10093 +
10094 +#include <asm/system.h>
10095 +
10096 +#include <linux/union_fs.h>
10097 +
10098 +/* the file system name */
10099 +#define UNIONFS_NAME "unionfs"
10100 +
10101 +/* unionfs root inode number */
10102 +#define UNIONFS_ROOT_INO     1
10103 +
10104 +/* number of times we try to get a unique temporary file name */
10105 +#define GET_TMPNAM_MAX_RETRY   5
10106 +
10107 +/* maximum number of branches we support, to avoid memory blowup */
10108 +#define UNIONFS_MAX_BRANCHES   128
10109 +
10110 +/* minimum time (seconds) required for time-based cache-coherency */
10111 +#define UNIONFS_MIN_CC_TIME    3
10112 +
10113 +/* Operations vectors defined in specific files. */
10114 +extern struct file_operations unionfs_main_fops;
10115 +extern struct file_operations unionfs_dir_fops;
10116 +extern struct inode_operations unionfs_main_iops;
10117 +extern struct inode_operations unionfs_dir_iops;
10118 +extern struct inode_operations unionfs_symlink_iops;
10119 +extern struct super_operations unionfs_sops;
10120 +extern struct dentry_operations unionfs_dops;
10121 +extern struct address_space_operations unionfs_aops;
10122 +
10123 +/* How long should an entry be allowed to persist */
10124 +#define RDCACHE_JIFFIES        (5*HZ)
10125 +
10126 +/* compatibility with Real-Time patches */
10127 +#ifdef CONFIG_PREEMPT_RT
10128 +# define unionfs_rw_semaphore  compat_rw_semaphore
10129 +#else /* not CONFIG_PREEMPT_RT */
10130 +# define unionfs_rw_semaphore  rw_semaphore
10131 +#endif /* not CONFIG_PREEMPT_RT */
10132 +
10133 +/* file private data. */
10134 +struct unionfs_file_info {
10135 +       int bstart;
10136 +       int bend;
10137 +       atomic_t generation;
10138 +
10139 +       struct unionfs_dir_state *rdstate;
10140 +       struct file **lower_files;
10141 +       int *saved_branch_ids; /* IDs of branches when file was opened */
10142 +};
10143 +
10144 +/* unionfs inode data in memory */
10145 +struct unionfs_inode_info {
10146 +       int bstart;
10147 +       int bend;
10148 +       atomic_t generation;
10149 +       int stale;
10150 +       /* Stuff for readdir over NFS. */
10151 +       spinlock_t rdlock;
10152 +       struct list_head readdircache;
10153 +       int rdcount;
10154 +       int hashsize;
10155 +       int cookie;
10156 +
10157 +       /* The lower inodes */
10158 +       struct inode **lower_inodes;
10159 +
10160 +       struct inode vfs_inode;
10161 +};
10162 +
10163 +/* unionfs dentry data in memory */
10164 +struct unionfs_dentry_info {
10165 +       /*
10166 +        * The semaphore is used to lock the dentry as soon as we get into a
10167 +        * unionfs function from the VFS.  Our lock ordering is that children
10168 +        * go before their parents.
10169 +        */
10170 +       struct mutex lock;
10171 +       int bstart;
10172 +       int bend;
10173 +       int bopaque;
10174 +       int bcount;
10175 +       atomic_t generation;
10176 +       struct path *lower_paths;
10177 +};
10178 +
10179 +/* These are the pointers to our various objects. */
10180 +struct unionfs_data {
10181 +       struct super_block *sb; /* lower super_block */
10182 +       atomic_t open_files;    /* number of open files on branch */
10183 +       int branchperms;
10184 +       int branch_id;          /* unique branch ID at re/mount time */
10185 +};
10186 +
10187 +/* unionfs super-block data in memory */
10188 +struct unionfs_sb_info {
10189 +       int bend;
10190 +
10191 +       atomic_t generation;
10192 +
10193 +       /*
10194 +        * This rwsem is used to make sure that a branch management
10195 +        * operation...
10196 +        *   1) will not begin before all currently in-flight operations
10197 +        *      complete.
10198 +        *   2) any new operations do not execute until the currently
10199 +        *      running branch management operation completes.
10200 +        *
10201 +        * The write_lock_owner records the PID of the task which grabbed
10202 +        * the rw_sem for writing.  If the same task also tries to grab the
10203 +        * read lock, we allow it.  This prevents a self-deadlock when
10204 +        * branch-management is used on a pivot_root'ed union, because we
10205 +        * have to ->lookup paths which belong to the same union.
10206 +        */
10207 +       struct unionfs_rw_semaphore rwsem;
10208 +       pid_t write_lock_owner; /* PID of rw_sem owner (write lock) */
10209 +       int high_branch_id;     /* last unique branch ID given */
10210 +       struct unionfs_data *data;
10211 +};
10212 +
10213 +/*
10214 + * structure for making the linked list of entries by readdir on left branch
10215 + * to compare with entries on right branch
10216 + */
10217 +struct filldir_node {
10218 +       struct list_head file_list;     /* list for directory entries */
10219 +       char *name;             /* name entry */
10220 +       int hash;               /* name hash */
10221 +       int namelen;            /* name len since name is not 0 terminated */
10222 +
10223 +       /*
10224 +        * we can check for duplicate whiteouts and files in the same branch
10225 +        * in order to return -EIO.
10226 +        */
10227 +       int bindex;
10228 +
10229 +       /* is this a whiteout entry? */
10230 +       int whiteout;
10231 +
10232 +       /* Inline name, so we don't need to separately kmalloc small ones */
10233 +       char iname[DNAME_INLINE_LEN_MIN];
10234 +};
10235 +
10236 +/* Directory hash table. */
10237 +struct unionfs_dir_state {
10238 +       unsigned int cookie;    /* the cookie, based off of rdversion */
10239 +       unsigned int offset;    /* The entry we have returned. */
10240 +       int bindex;
10241 +       loff_t dirpos;          /* offset within the lower level directory */
10242 +       int size;               /* How big is the hash table? */
10243 +       int hashentries;        /* How many entries have been inserted? */
10244 +       unsigned long access;
10245 +
10246 +       /* This cache list is used when the inode keeps us around. */
10247 +       struct list_head cache;
10248 +       struct list_head list[0];
10249 +};
10250 +
10251 +/* externs needed for fanout.h or sioq.h */
10252 +extern int unionfs_get_nlinks(const struct inode *inode);
10253 +extern void unionfs_copy_attr_times(struct inode *upper);
10254 +extern void unionfs_copy_attr_all(struct inode *dest, const struct inode *src);
10255 +
10256 +/* include miscellaneous macros */
10257 +#include "fanout.h"
10258 +#include "sioq.h"
10259 +
10260 +/* externs for cache creation/deletion routines */
10261 +extern void unionfs_destroy_filldir_cache(void);
10262 +extern int unionfs_init_filldir_cache(void);
10263 +extern int unionfs_init_inode_cache(void);
10264 +extern void unionfs_destroy_inode_cache(void);
10265 +extern int unionfs_init_dentry_cache(void);
10266 +extern void unionfs_destroy_dentry_cache(void);
10267 +
10268 +/* Initialize and free readdir-specific  state. */
10269 +extern int init_rdstate(struct file *file);
10270 +extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode,
10271 +                                              int bindex);
10272 +extern struct unionfs_dir_state *find_rdstate(struct inode *inode,
10273 +                                             loff_t fpos);
10274 +extern void free_rdstate(struct unionfs_dir_state *state);
10275 +extern int add_filldir_node(struct unionfs_dir_state *rdstate,
10276 +                           const char *name, int namelen, int bindex,
10277 +                           int whiteout);
10278 +extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
10279 +                                             const char *name, int namelen,
10280 +                                             int is_whiteout);
10281 +
10282 +extern struct dentry **alloc_new_dentries(int objs);
10283 +extern struct unionfs_data *alloc_new_data(int objs);
10284 +
10285 +/* We can only use 32-bits of offset for rdstate --- blech! */
10286 +#define DIREOF (0xfffff)
10287 +#define RDOFFBITS 20           /* This is the number of bits in DIREOF. */
10288 +#define MAXRDCOOKIE (0xfff)
10289 +/* Turn an rdstate into an offset. */
10290 +static inline off_t rdstate2offset(struct unionfs_dir_state *buf)
10291 +{
10292 +       off_t tmp;
10293 +
10294 +       tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS)
10295 +               | (buf->offset & DIREOF);
10296 +       return tmp;
10297 +}
10298 +
10299 +/* Macros for locking a super_block. */
10300 +enum unionfs_super_lock_class {
10301 +       UNIONFS_SMUTEX_NORMAL,
10302 +       UNIONFS_SMUTEX_PARENT,  /* when locking on behalf of file */
10303 +       UNIONFS_SMUTEX_CHILD,   /* when locking on behalf of dentry */
10304 +};
10305 +static inline void unionfs_read_lock(struct super_block *sb, int subclass)
10306 +{
10307 +       if (UNIONFS_SB(sb)->write_lock_owner &&
10308 +           UNIONFS_SB(sb)->write_lock_owner == current->pid)
10309 +               return;
10310 +       down_read_nested(&UNIONFS_SB(sb)->rwsem, subclass);
10311 +}
10312 +static inline void unionfs_read_unlock(struct super_block *sb)
10313 +{
10314 +       if (UNIONFS_SB(sb)->write_lock_owner &&
10315 +           UNIONFS_SB(sb)->write_lock_owner == current->pid)
10316 +               return;
10317 +       up_read(&UNIONFS_SB(sb)->rwsem);
10318 +}
10319 +static inline void unionfs_write_lock(struct super_block *sb)
10320 +{
10321 +       down_write(&UNIONFS_SB(sb)->rwsem);
10322 +       UNIONFS_SB(sb)->write_lock_owner = current->pid;
10323 +}
10324 +static inline void unionfs_write_unlock(struct super_block *sb)
10325 +{
10326 +       up_write(&UNIONFS_SB(sb)->rwsem);
10327 +       UNIONFS_SB(sb)->write_lock_owner = 0;
10328 +}
10329 +
10330 +static inline void unionfs_double_lock_dentry(struct dentry *d1,
10331 +                                             struct dentry *d2)
10332 +{
10333 +       BUG_ON(d1 == d2);
10334 +       if (d1 < d2) {
10335 +               unionfs_lock_dentry(d2, UNIONFS_DMUTEX_CHILD);
10336 +               unionfs_lock_dentry(d1, UNIONFS_DMUTEX_PARENT);
10337 +       } else {
10338 +               unionfs_lock_dentry(d1, UNIONFS_DMUTEX_CHILD);
10339 +               unionfs_lock_dentry(d2, UNIONFS_DMUTEX_PARENT);
10340 +       }
10341 +}
10342 +
10343 +extern int new_dentry_private_data(struct dentry *dentry, int subclass);
10344 +extern void free_dentry_private_data(struct dentry *dentry);
10345 +extern void update_bstart(struct dentry *dentry);
10346 +extern int init_lower_nd(struct nameidata *nd, unsigned int flags);
10347 +extern void release_lower_nd(struct nameidata *nd, int err);
10348 +
10349 +/*
10350 + * EXTERNALS:
10351 + */
10352 +
10353 +/* replicates the directory structure up to given dentry in given branch */
10354 +extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
10355 +                                    const char *name, int bindex);
10356 +extern int make_dir_opaque(struct dentry *dir, int bindex);
10357 +
10358 +/* partial lookup */
10359 +extern int unionfs_partial_lookup(struct dentry *dentry);
10360 +
10361 +/*
10362 + * Pass an unionfs dentry and an index and it will try to create a whiteout
10363 + * in branch 'index'.
10364 + *
10365 + * On error, it will proceed to a branch to the left
10366 + */
10367 +extern int create_whiteout(struct dentry *dentry, int start);
10368 +/* copies a file from dbstart to newbindex branch */
10369 +extern int copyup_file(struct inode *dir, struct file *file, int bstart,
10370 +                      int newbindex, loff_t size);
10371 +extern int copyup_named_file(struct inode *dir, struct file *file,
10372 +                            char *name, int bstart, int new_bindex,
10373 +                            loff_t len);
10374 +/* copies a dentry from dbstart to newbindex branch */
10375 +extern int copyup_dentry(struct inode *dir, struct dentry *dentry,
10376 +                        int bstart, int new_bindex, const char *name,
10377 +                        int namelen, struct file **copyup_file, loff_t len);
10378 +/* helper functions for post-copyup actions */
10379 +extern void unionfs_postcopyup_setmnt(struct dentry *dentry);
10380 +extern void unionfs_postcopyup_release(struct dentry *dentry);
10381 +
10382 +extern int remove_whiteouts(struct dentry *dentry,
10383 +                           struct dentry *lower_dentry, int bindex);
10384 +
10385 +extern int do_delete_whiteouts(struct dentry *dentry, int bindex,
10386 +                              struct unionfs_dir_state *namelist);
10387 +
10388 +/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */
10389 +extern int check_empty(struct dentry *dentry,
10390 +                      struct unionfs_dir_state **namelist);
10391 +/* Delete whiteouts from this directory in branch bindex. */
10392 +extern int delete_whiteouts(struct dentry *dentry, int bindex,
10393 +                           struct unionfs_dir_state *namelist);
10394 +
10395 +/* Re-lookup a lower dentry. */
10396 +extern int unionfs_refresh_lower_dentry(struct dentry *dentry, int bindex);
10397 +
10398 +extern void unionfs_reinterpose(struct dentry *this_dentry);
10399 +extern struct super_block *unionfs_duplicate_super(struct super_block *sb);
10400 +
10401 +/* Locking functions. */
10402 +extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl);
10403 +extern int unionfs_getlk(struct file *file, struct file_lock *fl);
10404 +
10405 +/* Common file operations. */
10406 +extern int unionfs_file_revalidate_locked(struct file *file, bool willwrite);
10407 +extern int unionfs_file_revalidate(struct file *file, bool willwrite);
10408 +extern int unionfs_open(struct inode *inode, struct file *file);
10409 +extern int unionfs_file_release(struct inode *inode, struct file *file);
10410 +extern int unionfs_flush(struct file *file, fl_owner_t id);
10411 +extern long unionfs_ioctl(struct file *file, unsigned int cmd,
10412 +                         unsigned long arg);
10413 +extern int unionfs_fsync(struct file *file, struct dentry *dentry,
10414 +                        int datasync);
10415 +extern int unionfs_fasync(int fd, struct file *file, int flag);
10416 +
10417 +/* Inode operations */
10418 +extern struct inode *unionfs_iget(struct super_block *sb, unsigned long ino);
10419 +extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
10420 +                         struct inode *new_dir, struct dentry *new_dentry);
10421 +extern int unionfs_unlink(struct inode *dir, struct dentry *dentry);
10422 +extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
10423 +
10424 +extern bool __unionfs_d_revalidate_one_locked(struct dentry *dentry,
10425 +                                             struct nameidata *nd,
10426 +                                             bool willwrite);
10427 +extern bool __unionfs_d_revalidate_chain(struct dentry *dentry,
10428 +                                        struct nameidata *nd, bool willwrite);
10429 +extern bool is_newer_lower(const struct dentry *dentry);
10430 +extern void purge_sb_data(struct super_block *sb);
10431 +
10432 +/* The values for unionfs_interpose's flag. */
10433 +#define INTERPOSE_DEFAULT      0
10434 +#define INTERPOSE_LOOKUP       1
10435 +#define INTERPOSE_REVAL                2
10436 +#define INTERPOSE_REVAL_NEG    3
10437 +#define INTERPOSE_PARTIAL      4
10438 +
10439 +extern struct dentry *unionfs_interpose(struct dentry *this_dentry,
10440 +                                       struct super_block *sb, int flag);
10441 +
10442 +#ifdef CONFIG_UNION_FS_XATTR
10443 +/* Extended attribute functions. */
10444 +extern void *unionfs_xattr_alloc(size_t size, size_t limit);
10445 +static inline void unionfs_xattr_kfree(const void *p)
10446 +{
10447 +       kfree(p);
10448 +}
10449 +extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name,
10450 +                               void *value, size_t size);
10451 +extern int unionfs_removexattr(struct dentry *dentry, const char *name);
10452 +extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list,
10453 +                                size_t size);
10454 +extern int unionfs_setxattr(struct dentry *dentry, const char *name,
10455 +                           const void *value, size_t size, int flags);
10456 +#endif /* CONFIG_UNION_FS_XATTR */
10457 +
10458 +/* The root directory is unhashed, but isn't deleted. */
10459 +static inline int d_deleted(struct dentry *d)
10460 +{
10461 +       return d_unhashed(d) && (d != d->d_sb->s_root);
10462 +}
10463 +
10464 +struct dentry *unionfs_lookup_backend(struct dentry *dentry,
10465 +                                     struct nameidata *nd, int lookupmode);
10466 +
10467 +/* unionfs_permission, check if we should bypass error to facilitate copyup */
10468 +#define IS_COPYUP_ERR(err) ((err) == -EROFS)
10469 +
10470 +/* unionfs_open, check if we need to copyup the file */
10471 +#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND)
10472 +#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS)
10473 +
10474 +static inline int branchperms(const struct super_block *sb, int index)
10475 +{
10476 +       BUG_ON(index < 0);
10477 +       return UNIONFS_SB(sb)->data[index].branchperms;
10478 +}
10479 +
10480 +static inline int set_branchperms(struct super_block *sb, int index, int perms)
10481 +{
10482 +       BUG_ON(index < 0);
10483 +       UNIONFS_SB(sb)->data[index].branchperms = perms;
10484 +       return perms;
10485 +}
10486 +
10487 +/* Is this file on a read-only branch? */
10488 +static inline int is_robranch_super(const struct super_block *sb, int index)
10489 +{
10490 +       int ret;
10491 +
10492 +       ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0;
10493 +       return ret;
10494 +}
10495 +
10496 +/* Is this file on a read-only branch? */
10497 +static inline int is_robranch_idx(const struct dentry *dentry, int index)
10498 +{
10499 +       struct super_block *lower_sb;
10500 +
10501 +       BUG_ON(index < 0);
10502 +
10503 +       if (!(branchperms(dentry->d_sb, index) & MAY_WRITE))
10504 +               return -EROFS;
10505 +
10506 +       lower_sb = unionfs_lower_super_idx(dentry->d_sb, index);
10507 +       BUG_ON(lower_sb == NULL);
10508 +       /*
10509 +        * test sb flags directly, not IS_RDONLY(lower_inode) because the
10510 +        * lower_dentry could be a negative.
10511 +        */
10512 +       if (lower_sb->s_flags & MS_RDONLY)
10513 +               return -EROFS;
10514 +
10515 +       return 0;
10516 +}
10517 +
10518 +static inline int is_robranch(const struct dentry *dentry)
10519 +{
10520 +       int index;
10521 +
10522 +       index = UNIONFS_D(dentry)->bstart;
10523 +       BUG_ON(index < 0);
10524 +
10525 +       return is_robranch_idx(dentry, index);
10526 +}
10527 +
10528 +/* What do we use for whiteouts. */
10529 +#define UNIONFS_WHPFX ".wh."
10530 +#define UNIONFS_WHLEN 4
10531 +/*
10532 + * If a directory contains this file, then it is opaque.  We start with the
10533 + * .wh. flag so that it is blocked by lookup.
10534 + */
10535 +#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque"
10536 +#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME
10537 +
10538 +/*
10539 + * EXTERNALS:
10540 + */
10541 +extern char *alloc_whname(const char *name, int len);
10542 +extern int check_branch(struct nameidata *nd);
10543 +extern int parse_branch_mode(const char *name, int *perms);
10544 +
10545 +/* locking helpers */
10546 +static inline struct dentry *lock_parent(struct dentry *dentry)
10547 +{
10548 +       struct dentry *dir = dget_parent(dentry);
10549 +       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
10550 +       return dir;
10551 +}
10552 +static inline struct dentry *lock_parent_wh(struct dentry *dentry)
10553 +{
10554 +       struct dentry *dir = dget_parent(dentry);
10555 +
10556 +       mutex_lock_nested(&dir->d_inode->i_mutex, UNIONFS_DMUTEX_WHITEOUT);
10557 +       return dir;
10558 +}
10559 +
10560 +static inline void unlock_dir(struct dentry *dir)
10561 +{
10562 +       mutex_unlock(&dir->d_inode->i_mutex);
10563 +       dput(dir);
10564 +}
10565 +
10566 +static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
10567 +                                             int bindex)
10568 +{
10569 +       struct vfsmount *mnt;
10570 +
10571 +       BUG_ON(!dentry || bindex < 0);
10572 +
10573 +       mnt = mntget(unionfs_lower_mnt_idx(dentry, bindex));
10574 +#ifdef CONFIG_UNION_FS_DEBUG
10575 +       if (!mnt)
10576 +               pr_debug("unionfs: mntget: mnt=%p bindex=%d\n",
10577 +                        mnt, bindex);
10578 +#endif /* CONFIG_UNION_FS_DEBUG */
10579 +
10580 +       return mnt;
10581 +}
10582 +
10583 +static inline void unionfs_mntput(struct dentry *dentry, int bindex)
10584 +{
10585 +       struct vfsmount *mnt;
10586 +
10587 +       if (!dentry && bindex < 0)
10588 +               return;
10589 +       BUG_ON(!dentry || bindex < 0);
10590 +
10591 +       mnt = unionfs_lower_mnt_idx(dentry, bindex);
10592 +#ifdef CONFIG_UNION_FS_DEBUG
10593 +       /*
10594 +        * Directories can have NULL lower objects in between start/end, but
10595 +        * NOT if at the start/end range.  We cannot verify that this dentry
10596 +        * is a type=DIR, because it may already be a negative dentry.  But
10597 +        * if dbstart is greater than dbend, we know that this couldn't have
10598 +        * been a regular file: it had to have been a directory.
10599 +        */
10600 +       if (!mnt && !(bindex > dbstart(dentry) && bindex < dbend(dentry)))
10601 +               pr_debug("unionfs: mntput: mnt=%p bindex=%d\n", mnt, bindex);
10602 +#endif /* CONFIG_UNION_FS_DEBUG */
10603 +       mntput(mnt);
10604 +}
10605 +
10606 +#ifdef CONFIG_UNION_FS_DEBUG
10607 +
10608 +/* useful for tracking code reachability */
10609 +#define UDBG pr_debug("DBG:%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__)
10610 +
10611 +#define unionfs_check_inode(i) __unionfs_check_inode((i),      \
10612 +       __FILE__, __FUNCTION__, __LINE__)
10613 +#define unionfs_check_dentry(d)        __unionfs_check_dentry((d),     \
10614 +       __FILE__, __FUNCTION__, __LINE__)
10615 +#define unionfs_check_file(f)  __unionfs_check_file((f),       \
10616 +       __FILE__, __FUNCTION__, __LINE__)
10617 +#define unionfs_check_nd(n)    __unionfs_check_nd((n),         \
10618 +       __FILE__, __FUNCTION__, __LINE__)
10619 +#define show_branch_counts(sb) __show_branch_counts((sb),      \
10620 +       __FILE__, __FUNCTION__, __LINE__)
10621 +#define show_inode_times(i)    __show_inode_times((i),         \
10622 +       __FILE__, __FUNCTION__, __LINE__)
10623 +#define show_dinode_times(d)   __show_dinode_times((d),        \
10624 +       __FILE__, __FUNCTION__, __LINE__)
10625 +#define show_inode_counts(i)   __show_inode_counts((i),        \
10626 +       __FILE__, __FUNCTION__, __LINE__)
10627 +
10628 +extern void __unionfs_check_inode(const struct inode *inode, const char *fname,
10629 +                                 const char *fxn, int line);
10630 +extern void __unionfs_check_dentry(const struct dentry *dentry,
10631 +                                  const char *fname, const char *fxn,
10632 +                                  int line);
10633 +extern void __unionfs_check_file(const struct file *file,
10634 +                                const char *fname, const char *fxn, int line);
10635 +extern void __unionfs_check_nd(const struct nameidata *nd,
10636 +                              const char *fname, const char *fxn, int line);
10637 +extern void __show_branch_counts(const struct super_block *sb,
10638 +                                const char *file, const char *fxn, int line);
10639 +extern void __show_inode_times(const struct inode *inode,
10640 +                              const char *file, const char *fxn, int line);
10641 +extern void __show_dinode_times(const struct dentry *dentry,
10642 +                               const char *file, const char *fxn, int line);
10643 +extern void __show_inode_counts(const struct inode *inode,
10644 +                               const char *file, const char *fxn, int line);
10645 +
10646 +#else /* not CONFIG_UNION_FS_DEBUG */
10647 +
10648 +/* we leave useful hooks for these check functions throughout the code */
10649 +#define unionfs_check_inode(i)         do { } while (0)
10650 +#define unionfs_check_dentry(d)                do { } while (0)
10651 +#define unionfs_check_file(f)          do { } while (0)
10652 +#define unionfs_check_nd(n)            do { } while (0)
10653 +#define show_branch_counts(sb)         do { } while (0)
10654 +#define show_inode_times(i)            do { } while (0)
10655 +#define show_dinode_times(d)           do { } while (0)
10656 +#define show_inode_counts(i)           do { } while (0)
10657 +
10658 +#endif /* not CONFIG_UNION_FS_DEBUG */
10659 +
10660 +#endif /* not _UNION_H_ */
10661 diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c
10662 new file mode 100644
10663 index 0000000..6e93da3
10664 --- /dev/null
10665 +++ b/fs/unionfs/unlink.c
10666 @@ -0,0 +1,260 @@
10667 +/*
10668 + * Copyright (c) 2003-2007 Erez Zadok
10669 + * Copyright (c) 2003-2006 Charles P. Wright
10670 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10671 + * Copyright (c) 2005-2006 Junjiro Okajima
10672 + * Copyright (c) 2005      Arun M. Krishnakumar
10673 + * Copyright (c) 2004-2006 David P. Quigley
10674 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10675 + * Copyright (c) 2003      Puja Gupta
10676 + * Copyright (c) 2003      Harikesavan Krishnan
10677 + * Copyright (c) 2003-2007 Stony Brook University
10678 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10679 + *
10680 + * This program is free software; you can redistribute it and/or modify
10681 + * it under the terms of the GNU General Public License version 2 as
10682 + * published by the Free Software Foundation.
10683 + */
10684 +
10685 +#include "union.h"
10686 +
10687 +/* unlink a file by creating a whiteout */
10688 +static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry)
10689 +{
10690 +       struct dentry *lower_dentry;
10691 +       struct dentry *lower_dir_dentry;
10692 +       int bindex;
10693 +       int err = 0;
10694 +
10695 +       err = unionfs_partial_lookup(dentry);
10696 +       if (err)
10697 +               goto out;
10698 +
10699 +       bindex = dbstart(dentry);
10700 +
10701 +       lower_dentry = unionfs_lower_dentry_idx(dentry, bindex);
10702 +       if (!lower_dentry)
10703 +               goto out;
10704 +
10705 +       lower_dir_dentry = lock_parent(lower_dentry);
10706 +
10707 +       /* avoid destroying the lower inode if the file is in use */
10708 +       dget(lower_dentry);
10709 +       err = is_robranch_super(dentry->d_sb, bindex);
10710 +       if (!err) {
10711 +               /* see Documentation/filesystems/unionfs/issues.txt */
10712 +               lockdep_off();
10713 +               err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
10714 +               lockdep_on();
10715 +       }
10716 +       /* if vfs_unlink succeeded, update our inode's times */
10717 +       if (!err)
10718 +               unionfs_copy_attr_times(dentry->d_inode);
10719 +       dput(lower_dentry);
10720 +       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10721 +       unlock_dir(lower_dir_dentry);
10722 +
10723 +       if (err && !IS_COPYUP_ERR(err))
10724 +               goto out;
10725 +
10726 +       /*
10727 +        * We create whiteouts if (1) there was an error unlinking the main
10728 +        * file; (2) there is a lower priority file with the same name
10729 +        * (dbopaque); (3) the branch in which the file is not the last
10730 +        * (rightmost0 branch.  The last rule is an optimization to avoid
10731 +        * creating all those whiteouts if there's no chance they'd be
10732 +        * masking any lower-priority branch, as well as unionfs is used
10733 +        * with only one branch (using only one branch, while odd, is still
10734 +        * possible).
10735 +        */
10736 +       if (err) {
10737 +               if (dbstart(dentry) == 0)
10738 +                       goto out;
10739 +               err = create_whiteout(dentry, dbstart(dentry) - 1);
10740 +       } else if (dbopaque(dentry) != -1) {
10741 +               err = create_whiteout(dentry, dbopaque(dentry));
10742 +       } else if (dbstart(dentry) < sbend(dentry->d_sb)) {
10743 +               err = create_whiteout(dentry, dbstart(dentry));
10744 +       }
10745 +
10746 +out:
10747 +       if (!err)
10748 +               inode_dec_link_count(dentry->d_inode);
10749 +
10750 +       /* We don't want to leave negative leftover dentries for revalidate. */
10751 +       if (!err && (dbopaque(dentry) != -1))
10752 +               update_bstart(dentry);
10753 +
10754 +       return err;
10755 +}
10756 +
10757 +int unionfs_unlink(struct inode *dir, struct dentry *dentry)
10758 +{
10759 +       int err = 0;
10760 +       struct inode *inode = dentry->d_inode;
10761 +       int valid;
10762 +
10763 +       BUG_ON(S_ISDIR(inode->i_mode));
10764 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10765 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10766 +       unionfs_lock_dentry(dentry->d_parent, UNIONFS_DMUTEX_PARENT);
10767 +
10768 +       valid = __unionfs_d_revalidate_chain(dentry->d_parent, NULL, false);
10769 +       if (unlikely(!valid)) {
10770 +               err = -ESTALE;
10771 +               goto out;
10772 +       }
10773 +       valid = __unionfs_d_revalidate_one_locked(dentry, NULL, false);
10774 +       if (unlikely(!valid)) {
10775 +               err = -ESTALE;
10776 +               goto out;
10777 +       }
10778 +       unionfs_check_dentry(dentry);
10779 +
10780 +       err = unionfs_unlink_whiteout(dir, dentry);
10781 +       /* call d_drop so the system "forgets" about us */
10782 +       if (!err) {
10783 +               unionfs_postcopyup_release(dentry);
10784 +               if (inode->i_nlink == 0) {
10785 +                       /* drop lower inodes */
10786 +                       iput(unionfs_lower_inode(inode));
10787 +                       unionfs_set_lower_inode(inode, NULL);
10788 +                       ibstart(inode) = ibend(inode) = -1;
10789 +               }
10790 +               d_drop(dentry);
10791 +               /*
10792 +                * if unlink/whiteout succeeded, parent dir mtime has
10793 +                * changed
10794 +                */
10795 +               unionfs_copy_attr_times(dir);
10796 +       }
10797 +
10798 +out:
10799 +       if (!err) {
10800 +               unionfs_check_dentry(dentry);
10801 +               unionfs_check_inode(dir);
10802 +       }
10803 +       unionfs_unlock_dentry(dentry->d_parent);
10804 +       unionfs_unlock_dentry(dentry);
10805 +       unionfs_read_unlock(dentry->d_sb);
10806 +       return err;
10807 +}
10808 +
10809 +static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry,
10810 +                              struct unionfs_dir_state *namelist)
10811 +{
10812 +       int err;
10813 +       struct dentry *lower_dentry;
10814 +       struct dentry *lower_dir_dentry = NULL;
10815 +
10816 +       /* Here we need to remove whiteout entries. */
10817 +       err = delete_whiteouts(dentry, dbstart(dentry), namelist);
10818 +       if (err)
10819 +               goto out;
10820 +
10821 +       lower_dentry = unionfs_lower_dentry(dentry);
10822 +
10823 +       lower_dir_dentry = lock_parent(lower_dentry);
10824 +
10825 +       /* avoid destroying the lower inode if the file is in use */
10826 +       dget(lower_dentry);
10827 +       err = is_robranch(dentry);
10828 +       if (!err) {
10829 +               /* see Documentation/filesystems/unionfs/issues.txt */
10830 +               lockdep_off();
10831 +               err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
10832 +               lockdep_on();
10833 +       }
10834 +       dput(lower_dentry);
10835 +
10836 +       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
10837 +       /* propagate number of hard-links */
10838 +       dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode);
10839 +
10840 +out:
10841 +       if (lower_dir_dentry)
10842 +               unlock_dir(lower_dir_dentry);
10843 +       return err;
10844 +}
10845 +
10846 +int unionfs_rmdir(struct inode *dir, struct dentry *dentry)
10847 +{
10848 +       int err = 0;
10849 +       struct unionfs_dir_state *namelist = NULL;
10850 +       int dstart, dend;
10851 +
10852 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10853 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10854 +
10855 +       if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
10856 +               err = -ESTALE;
10857 +               goto out;
10858 +       }
10859 +       unionfs_check_dentry(dentry);
10860 +
10861 +       /* check if this unionfs directory is empty or not */
10862 +       err = check_empty(dentry, &namelist);
10863 +       if (err)
10864 +               goto out;
10865 +
10866 +       err = unionfs_rmdir_first(dir, dentry, namelist);
10867 +       dstart = dbstart(dentry);
10868 +       dend = dbend(dentry);
10869 +       /*
10870 +        * We create a whiteout for the directory if there was an error to
10871 +        * rmdir the first directory entry in the union.  Otherwise, we
10872 +        * create a whiteout only if there is no chance that a lower
10873 +        * priority branch might also have the same named directory.  IOW,
10874 +        * if there is not another same-named directory at a lower priority
10875 +        * branch, then we don't need to create a whiteout for it.
10876 +        */
10877 +       if (!err) {
10878 +               if (dstart < dend)
10879 +                       err = create_whiteout(dentry, dstart);
10880 +       } else {
10881 +               int new_err;
10882 +
10883 +               if (dstart == 0)
10884 +                       goto out;
10885 +
10886 +               /* exit if the error returned was NOT -EROFS */
10887 +               if (!IS_COPYUP_ERR(err))
10888 +                       goto out;
10889 +
10890 +               new_err = create_whiteout(dentry, dstart - 1);
10891 +               if (new_err != -EEXIST)
10892 +                       err = new_err;
10893 +       }
10894 +
10895 +out:
10896 +       /*
10897 +        * Drop references to lower dentry/inode so storage space for them
10898 +        * can be reclaimed.  Then, call d_drop so the system "forgets"
10899 +        * about us.
10900 +        */
10901 +       if (!err) {
10902 +               struct inode *inode = dentry->d_inode;
10903 +               BUG_ON(!inode);
10904 +               iput(unionfs_lower_inode_idx(inode, dstart));
10905 +               unionfs_set_lower_inode_idx(inode, dstart, NULL);
10906 +               dput(unionfs_lower_dentry_idx(dentry, dstart));
10907 +               unionfs_set_lower_dentry_idx(dentry, dstart, NULL);
10908 +               /*
10909 +                * If the last directory is unlinked, then mark istart/end
10910 +                * as -1, (to maintain the invariant that if there are no
10911 +                * lower objects, then branch index start and end are set to
10912 +                * -1).
10913 +                */
10914 +               if (!unionfs_lower_inode_idx(inode, dstart) &&
10915 +                   !unionfs_lower_inode_idx(inode, dend))
10916 +                       ibstart(inode) = ibend(inode) = -1;
10917 +               d_drop(dentry);
10918 +       }
10919 +
10920 +       if (namelist)
10921 +               free_rdstate(namelist);
10922 +
10923 +       unionfs_unlock_dentry(dentry);
10924 +       unionfs_read_unlock(dentry->d_sb);
10925 +       return err;
10926 +}
10927 diff --git a/fs/unionfs/xattr.c b/fs/unionfs/xattr.c
10928 new file mode 100644
10929 index 0000000..8001c65
10930 --- /dev/null
10931 +++ b/fs/unionfs/xattr.c
10932 @@ -0,0 +1,153 @@
10933 +/*
10934 + * Copyright (c) 2003-2007 Erez Zadok
10935 + * Copyright (c) 2003-2006 Charles P. Wright
10936 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
10937 + * Copyright (c) 2005-2006 Junjiro Okajima
10938 + * Copyright (c) 2005      Arun M. Krishnakumar
10939 + * Copyright (c) 2004-2006 David P. Quigley
10940 + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
10941 + * Copyright (c) 2003      Puja Gupta
10942 + * Copyright (c) 2003      Harikesavan Krishnan
10943 + * Copyright (c) 2003-2007 Stony Brook University
10944 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
10945 + *
10946 + * This program is free software; you can redistribute it and/or modify
10947 + * it under the terms of the GNU General Public License version 2 as
10948 + * published by the Free Software Foundation.
10949 + */
10950 +
10951 +#include "union.h"
10952 +
10953 +/* This is lifted from fs/xattr.c */
10954 +void *unionfs_xattr_alloc(size_t size, size_t limit)
10955 +{
10956 +       void *ptr;
10957 +
10958 +       if (size > limit)
10959 +               return ERR_PTR(-E2BIG);
10960 +
10961 +       if (!size)              /* size request, no buffer is needed */
10962 +               return NULL;
10963 +
10964 +       ptr = kmalloc(size, GFP_KERNEL);
10965 +       if (unlikely(!ptr))
10966 +               return ERR_PTR(-ENOMEM);
10967 +       return ptr;
10968 +}
10969 +
10970 +/*
10971 + * BKL held by caller.
10972 + * dentry->d_inode->i_mutex locked
10973 + */
10974 +ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value,
10975 +                        size_t size)
10976 +{
10977 +       struct dentry *lower_dentry = NULL;
10978 +       int err = -EOPNOTSUPP;
10979 +
10980 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
10981 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
10982 +
10983 +       if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
10984 +               err = -ESTALE;
10985 +               goto out;
10986 +       }
10987 +
10988 +       lower_dentry = unionfs_lower_dentry(dentry);
10989 +
10990 +       err = vfs_getxattr(lower_dentry, (char *) name, value, size);
10991 +
10992 +out:
10993 +       unionfs_check_dentry(dentry);
10994 +       unionfs_unlock_dentry(dentry);
10995 +       unionfs_read_unlock(dentry->d_sb);
10996 +       return err;
10997 +}
10998 +
10999 +/*
11000 + * BKL held by caller.
11001 + * dentry->d_inode->i_mutex locked
11002 + */
11003 +int unionfs_setxattr(struct dentry *dentry, const char *name,
11004 +                    const void *value, size_t size, int flags)
11005 +{
11006 +       struct dentry *lower_dentry = NULL;
11007 +       int err = -EOPNOTSUPP;
11008 +
11009 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11010 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11011 +
11012 +       if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
11013 +               err = -ESTALE;
11014 +               goto out;
11015 +       }
11016 +
11017 +       lower_dentry = unionfs_lower_dentry(dentry);
11018 +
11019 +       err = vfs_setxattr(lower_dentry, (char *) name, (void *) value,
11020 +                          size, flags);
11021 +
11022 +out:
11023 +       unionfs_check_dentry(dentry);
11024 +       unionfs_unlock_dentry(dentry);
11025 +       unionfs_read_unlock(dentry->d_sb);
11026 +       return err;
11027 +}
11028 +
11029 +/*
11030 + * BKL held by caller.
11031 + * dentry->d_inode->i_mutex locked
11032 + */
11033 +int unionfs_removexattr(struct dentry *dentry, const char *name)
11034 +{
11035 +       struct dentry *lower_dentry = NULL;
11036 +       int err = -EOPNOTSUPP;
11037 +
11038 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11039 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11040 +
11041 +       if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
11042 +               err = -ESTALE;
11043 +               goto out;
11044 +       }
11045 +
11046 +       lower_dentry = unionfs_lower_dentry(dentry);
11047 +
11048 +       err = vfs_removexattr(lower_dentry, (char *) name);
11049 +
11050 +out:
11051 +       unionfs_check_dentry(dentry);
11052 +       unionfs_unlock_dentry(dentry);
11053 +       unionfs_read_unlock(dentry->d_sb);
11054 +       return err;
11055 +}
11056 +
11057 +/*
11058 + * BKL held by caller.
11059 + * dentry->d_inode->i_mutex locked
11060 + */
11061 +ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size)
11062 +{
11063 +       struct dentry *lower_dentry = NULL;
11064 +       int err = -EOPNOTSUPP;
11065 +       char *encoded_list = NULL;
11066 +
11067 +       unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD);
11068 +       unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD);
11069 +
11070 +       if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) {
11071 +               err = -ESTALE;
11072 +               goto out;
11073 +       }
11074 +
11075 +       lower_dentry = unionfs_lower_dentry(dentry);
11076 +
11077 +       encoded_list = list;
11078 +       err = vfs_listxattr(lower_dentry, encoded_list, size);
11079 +
11080 +out:
11081 +       unionfs_check_dentry(dentry);
11082 +       unionfs_unlock_dentry(dentry);
11083 +       unionfs_read_unlock(dentry->d_sb);
11084 +       return err;
11085 +}
11086 diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
11087 index bb516ce..6b52faf 100644
11088 --- a/include/linux/fs_stack.h
11089 +++ b/include/linux/fs_stack.h
11090 @@ -1,17 +1,28 @@
11091 +/*
11092 + * Copyright (c) 2006-2007 Erez Zadok
11093 + * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
11094 + * Copyright (c) 2006-2007 Stony Brook University
11095 + * Copyright (c) 2006-2007 The Research Foundation of SUNY
11096 + *
11097 + * This program is free software; you can redistribute it and/or modify
11098 + * it under the terms of the GNU General Public License version 2 as
11099 + * published by the Free Software Foundation.
11100 + */
11101 +
11102  #ifndef _LINUX_FS_STACK_H
11103  #define _LINUX_FS_STACK_H
11104
11105 -/* This file defines generic functions used primarily by stackable
11106 +/*
11107 + * This file defines generic functions used primarily by stackable
11108   * filesystems; none of these functions require i_mutex to be held.
11109   */
11110
11111  #include <linux/fs.h>
11112
11113  /* externs for fs/stack.c */
11114 -extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
11115 -                               int (*get_nlinks)(struct inode *));
11116 -
11117 -extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
11118 +extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
11119 +extern void fsstack_copy_inode_size(struct inode *dst,
11120 +                                   const struct inode *src);
11121
11122  /* inlines */
11123  static inline void fsstack_copy_attr_atime(struct inode *dest,
11124 diff --git a/include/linux/magic.h b/include/linux/magic.h
11125 index 1fa0c2c..67043ed 100644
11126 --- a/include/linux/magic.h
11127 +++ b/include/linux/magic.h
11128 @@ -35,6 +35,8 @@
11129  #define REISER2FS_SUPER_MAGIC_STRING   "ReIsEr2Fs"
11130  #define REISER2FS_JR_SUPER_MAGIC_STRING        "ReIsEr3Fs"
11131
11132 +#define UNIONFS_SUPER_MAGIC 0xf15f083d
11133 +
11134  #define SMB_SUPER_MAGIC                0x517B
11135  #define USBDEVICE_SUPER_MAGIC  0x9fa2
11136  #define CGROUP_SUPER_MAGIC     0x27e0eb
11137 diff --git a/include/linux/union_fs.h b/include/linux/union_fs.h
11138 new file mode 100644
11139 index 0000000..a467de0
11140 --- /dev/null
11141 +++ b/include/linux/union_fs.h
11142 @@ -0,0 +1,22 @@
11143 +/*
11144 + * Copyright (c) 2003-2007 Erez Zadok
11145 + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
11146 + * Copyright (c) 2003-2007 Stony Brook University
11147 + * Copyright (c) 2003-2007 The Research Foundation of SUNY
11148 + *
11149 + * This program is free software; you can redistribute it and/or modify
11150 + * it under the terms of the GNU General Public License version 2 as
11151 + * published by the Free Software Foundation.
11152 + */
11153 +
11154 +#ifndef _LINUX_UNION_FS_H
11155 +#define _LINUX_UNION_FS_H
11156 +
11157 +/*
11158 + * DEFINITIONS FOR USER AND KERNEL CODE:
11159 + */
11160 +# define UNIONFS_IOCTL_INCGEN          _IOR(0x15, 11, int)
11161 +# define UNIONFS_IOCTL_QUERYFILE       _IOR(0x15, 15, int)
11162 +
11163 +#endif /* _LINUX_UNIONFS_H */
11164 +